mirror of
https://github.com/postgres/pgweb.git
synced 2025-08-01 15:54:53 +00:00
Explicitly exclude urls with .. in search crawling
There were per-site configured rules defined but the regexp was slightly incorrectly defined. However, we should just simply never crawl urls like this unless they are normalized, so for now just add them to the hardcoded exclusion rules.
This commit is contained in:
@ -31,6 +31,8 @@ class GenericSiteCrawler(BaseSiteCrawler):
|
||||
self.queue.put((x, 0.5, False))
|
||||
|
||||
def exclude_url(self, url):
|
||||
if ".." in url:
|
||||
return True
|
||||
if self.robots and self.robots.block_url(url):
|
||||
return True
|
||||
for r in self.extra_excludes:
|
||||
|
Reference in New Issue
Block a user