1
0
Fork 0

Rename URL_(WHITE|BLACK)LIST to URL_(ALLOW|DENY)LIST

Retain aliases for old configuration files
This commit is contained in:
Ross Williams 2023-07-30 23:43:04 -04:00
parent b773041952
commit 46e80dd509
4 changed files with 10 additions and 10 deletions

View file

@ -82,8 +82,8 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
'MEDIA_TIMEOUT': {'type': int, 'default': 3600}, 'MEDIA_TIMEOUT': {'type': int, 'default': 3600},
'OUTPUT_PERMISSIONS': {'type': str, 'default': '644'}, 'OUTPUT_PERMISSIONS': {'type': str, 'default': '644'},
'RESTRICT_FILE_NAMES': {'type': str, 'default': 'windows'}, 'RESTRICT_FILE_NAMES': {'type': str, 'default': 'windows'},
'URL_BLACKLIST': {'type': str, 'default': r'\.(css|js|otf|ttf|woff|woff2|gstatic\.com|googleapis\.com/css)(\?.*)?$'}, # to avoid downloading code assets as their own pages 'URL_DENYLIST': {'type': str, 'default': r'\.(css|js|otf|ttf|woff|woff2|gstatic\.com|googleapis\.com/css)(\?.*)?$', 'aliases': ('URL_BLACKLIST',)}, # to avoid downloading code assets as their own pages
'URL_WHITELIST': {'type': str, 'default': None}, 'URL_ALLOWLIST': {'type': str, 'default': None, 'aliases': ('URL_WHITELIST',)},
'ENFORCE_ATOMIC_WRITES': {'type': bool, 'default': True}, 'ENFORCE_ATOMIC_WRITES': {'type': bool, 'default': True},
'TAG_SEPARATOR_PATTERN': {'type': str, 'default': r'[,]'}, 'TAG_SEPARATOR_PATTERN': {'type': str, 'default': r'[,]'},
}, },
@ -371,8 +371,8 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
'CONFIG_FILE': {'default': lambda c: Path(c['CONFIG_FILE']).resolve() if c['CONFIG_FILE'] else c['OUTPUT_DIR'] / CONFIG_FILENAME}, 'CONFIG_FILE': {'default': lambda c: Path(c['CONFIG_FILE']).resolve() if c['CONFIG_FILE'] else c['OUTPUT_DIR'] / CONFIG_FILENAME},
'COOKIES_FILE': {'default': lambda c: c['COOKIES_FILE'] and Path(c['COOKIES_FILE']).resolve()}, 'COOKIES_FILE': {'default': lambda c: c['COOKIES_FILE'] and Path(c['COOKIES_FILE']).resolve()},
'CHROME_USER_DATA_DIR': {'default': lambda c: find_chrome_data_dir() if c['CHROME_USER_DATA_DIR'] is None else (Path(c['CHROME_USER_DATA_DIR']).resolve() if c['CHROME_USER_DATA_DIR'] else None)}, # None means unset, so we autodetect it with find_chrome_Data_dir(), but emptystring '' means user manually set it to '', and we should store it as None 'CHROME_USER_DATA_DIR': {'default': lambda c: find_chrome_data_dir() if c['CHROME_USER_DATA_DIR'] is None else (Path(c['CHROME_USER_DATA_DIR']).resolve() if c['CHROME_USER_DATA_DIR'] else None)}, # None means unset, so we autodetect it with find_chrome_Data_dir(), but emptystring '' means user manually set it to '', and we should store it as None
'URL_BLACKLIST_PTN': {'default': lambda c: c['URL_BLACKLIST'] and re.compile(c['URL_BLACKLIST'] or '', re.IGNORECASE | re.UNICODE | re.MULTILINE)}, 'URL_DENYLIST_PTN': {'default': lambda c: c['URL_DENYLIST'] and re.compile(c['URL_DENYLIST'] or '', re.IGNORECASE | re.UNICODE | re.MULTILINE)},
'URL_WHITELIST_PTN': {'default': lambda c: c['URL_WHITELIST'] and re.compile(c['URL_WHITELIST'] or '', re.IGNORECASE | re.UNICODE | re.MULTILINE)}, 'URL_ALLOWLIST_PTN': {'default': lambda c: c['URL_ALLOWLIST'] and re.compile(c['URL_ALLOWLIST'] or '', re.IGNORECASE | re.UNICODE | re.MULTILINE)},
'DIR_OUTPUT_PERMISSIONS': {'default': lambda c: c['OUTPUT_PERMISSIONS'].replace('6', '7').replace('4', '5')}, 'DIR_OUTPUT_PERMISSIONS': {'default': lambda c: c['OUTPUT_PERMISSIONS'].replace('6', '7').replace('4', '5')},
'ARCHIVEBOX_BINARY': {'default': lambda c: sys.argv[0] or bin_path('archivebox')}, 'ARCHIVEBOX_BINARY': {'default': lambda c: sys.argv[0] or bin_path('archivebox')},

View file

@ -41,7 +41,7 @@ class ConfigDict(BaseConfig, total=False):
MEDIA_TIMEOUT: int MEDIA_TIMEOUT: int
OUTPUT_PERMISSIONS: str OUTPUT_PERMISSIONS: str
RESTRICT_FILE_NAMES: str RESTRICT_FILE_NAMES: str
URL_BLACKLIST: str URL_DENYLIST: str
SECRET_KEY: Optional[str] SECRET_KEY: Optional[str]
BIND_ADDR: str BIND_ADDR: str

View file

@ -41,7 +41,7 @@ class AddLinkForm(forms.Form):
# label="Exclude patterns", # label="Exclude patterns",
# min_length='1', # min_length='1',
# required=False, # required=False,
# initial=URL_BLACKLIST, # initial=URL_DENYLIST,
# ) # )
# timeout = forms.IntegerField( # timeout = forms.IntegerField(
# initial=TIMEOUT, # initial=TIMEOUT,

View file

@ -22,8 +22,8 @@ from ..config import (
JSON_INDEX_FILENAME, JSON_INDEX_FILENAME,
OUTPUT_DIR, OUTPUT_DIR,
TIMEOUT, TIMEOUT,
URL_BLACKLIST_PTN, URL_DENYLIST_PTN,
URL_WHITELIST_PTN, URL_ALLOWLIST_PTN,
stderr, stderr,
OUTPUT_PERMISSIONS OUTPUT_PERMISSIONS
) )
@ -142,9 +142,9 @@ def archivable_links(links: Iterable[Link]) -> Iterable[Link]:
continue continue
if scheme(link.url) not in ('http', 'https', 'ftp'): if scheme(link.url) not in ('http', 'https', 'ftp'):
continue continue
if URL_BLACKLIST_PTN and URL_BLACKLIST_PTN.search(link.url): if URL_DENYLIST_PTN and URL_DENYLIST_PTN.search(link.url):
continue continue
if URL_WHITELIST_PTN and (not URL_WHITELIST_PTN.search(link.url)): if URL_ALLOWLIST_PTN and (not URL_ALLOWLIST_PTN.search(link.url)):
continue continue
yield link yield link