support negation patterns by checking both re.search and re.match
This commit is contained in:
parent
65d452fe7b
commit
e4974d3536
1 changed files with 4 additions and 1 deletions
|
@ -141,7 +141,10 @@ def archivable_links(links: Iterable[Link]) -> Iterable[Link]:
|
||||||
continue
|
continue
|
||||||
if scheme(link.url) not in ('http', 'https', 'ftp'):
|
if scheme(link.url) not in ('http', 'https', 'ftp'):
|
||||||
continue
|
continue
|
||||||
if URL_BLACKLIST_PTN and URL_BLACKLIST_PTN.search(link.url):
|
if URL_BLACKLIST_PTN and (URL_BLACKLIST_PTN.match(link.url) or URL_BLACKLIST_PTN.search(link.url)):
|
||||||
|
# https://stackoverflow.com/questions/180986/what-is-the-difference-between-re-search-and-re-match
|
||||||
|
# we want both behaviors in order to support multiple patterns in the regex,
|
||||||
|
# and negation regexes like (?!someptnhere) to allow for whitelisting
|
||||||
continue
|
continue
|
||||||
|
|
||||||
yield link
|
yield link
|
||||||
|
|
Loading…
Reference in a new issue