1
0
Fork 0

Merge pull request #448 from pirate/skip-invalid-urls

Skip invalid URLs when archiving
This commit is contained in:
Nick Sweeting 2020-08-18 00:53:31 -04:00 committed by GitHub
commit 09ad3a5303
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -9,6 +9,7 @@ from itertools import chain
from typing import List, Tuple, Dict, Optional, Iterable
from collections import OrderedDict
from contextlib import contextmanager
from urllib.parse import urlparse
from ..system import atomic_write
from ..util import (
@ -139,6 +140,10 @@ def validate_links(links: Iterable[Link]) -> List[Link]:
def archivable_links(links: Iterable[Link]) -> Iterable[Link]:
"""remove chrome://, about:// or other schemed links that cant be archived"""
for link in links:
try:
urlparse(link.url)
except ValueError:
continue
scheme_is_valid = scheme(link.url) in ('http', 'https', 'ftp')
not_blacklisted = (not URL_BLACKLIST_PTN.match(link.url)) if URL_BLACKLIST_PTN else True
if scheme_is_valid and not_blacklisted: