add archivebox config command and move config into sections
This commit is contained in:
parent
583d77bc31
commit
4d6ad7a65d
4 changed files with 394 additions and 122 deletions
117
archivebox/cli/archivebox_config.py
Normal file
117
archivebox/cli/archivebox_config.py
Normal file
|
@ -0,0 +1,117 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
__package__ = 'archivebox.cli'
|
||||||
|
__command__ = 'archivebox config'
|
||||||
|
__description__ = 'Get and set your ArchiveBox project configuration values'
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
from typing import Optional, List
|
||||||
|
|
||||||
|
from ..legacy.util import SmartFormatter
|
||||||
|
from ..legacy.config import (
|
||||||
|
check_data_folder,
|
||||||
|
OUTPUT_DIR,
|
||||||
|
write_config_file,
|
||||||
|
CONFIG,
|
||||||
|
ConfigDict,
|
||||||
|
stderr,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def main(args: List[str]=None, stdin: Optional[str]=None) -> None:
|
||||||
|
check_data_folder()
|
||||||
|
|
||||||
|
args = sys.argv[1:] if args is None else args
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
prog=__command__,
|
||||||
|
description=__description__,
|
||||||
|
add_help=True,
|
||||||
|
formatter_class=SmartFormatter,
|
||||||
|
)
|
||||||
|
group = parser.add_mutually_exclusive_group()
|
||||||
|
group.add_argument(
|
||||||
|
'--get', #'-g',
|
||||||
|
action='store_true',
|
||||||
|
help="Get the value for the given config KEYs",
|
||||||
|
)
|
||||||
|
group.add_argument(
|
||||||
|
'--set', #'-s',
|
||||||
|
action='store_true',
|
||||||
|
help="Set the given KEY=VALUE config values",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'config_options',
|
||||||
|
nargs='*',
|
||||||
|
type=str,
|
||||||
|
help='KEY or KEY=VALUE formatted config values to get or set',
|
||||||
|
)
|
||||||
|
command = parser.parse_args(args)
|
||||||
|
|
||||||
|
if stdin or not sys.stdin.isatty():
|
||||||
|
stdin_raw_text = stdin or sys.stdin.read()
|
||||||
|
if stdin_raw_text and command.config_options:
|
||||||
|
stderr(
|
||||||
|
'[X] You should either pass config values as an arguments '
|
||||||
|
'or via stdin, but not both.\n',
|
||||||
|
color='red',
|
||||||
|
)
|
||||||
|
raise SystemExit(1)
|
||||||
|
|
||||||
|
config_options = stdin_raw_text.split('\n')
|
||||||
|
else:
|
||||||
|
config_options = command.config_options
|
||||||
|
|
||||||
|
no_args = not (command.get or command.set or command.config_options)
|
||||||
|
|
||||||
|
matching_config: ConfigDict = {}
|
||||||
|
if command.get or no_args:
|
||||||
|
if config_options:
|
||||||
|
matching_config = {key: CONFIG[key] for key in config_options if key in CONFIG}
|
||||||
|
failed_config = [key for key in config_options if key not in CONFIG]
|
||||||
|
if failed_config:
|
||||||
|
stderr()
|
||||||
|
stderr('[X] These options failed to get', color='red')
|
||||||
|
stderr(' {}'.format('\n '.join(config_options)))
|
||||||
|
raise SystemExit(1)
|
||||||
|
else:
|
||||||
|
matching_config = CONFIG
|
||||||
|
|
||||||
|
print('\n'.join(f'{key}={val}' for key, val in matching_config.items()))
|
||||||
|
raise SystemExit(not matching_config)
|
||||||
|
elif command.set:
|
||||||
|
new_config = {}
|
||||||
|
failed_options = []
|
||||||
|
for line in config_options:
|
||||||
|
if line.startswith('#') or not line.strip():
|
||||||
|
continue
|
||||||
|
if '=' not in line:
|
||||||
|
stderr('[X] Config KEY=VALUE must have an = sign in it', color='red')
|
||||||
|
stderr(f' {line}')
|
||||||
|
raise SystemExit(2)
|
||||||
|
|
||||||
|
key, val = line.split('=')
|
||||||
|
if key.upper().strip() in CONFIG:
|
||||||
|
new_config[key.upper().strip()] = val.strip()
|
||||||
|
else:
|
||||||
|
failed_options.append(line)
|
||||||
|
|
||||||
|
if new_config:
|
||||||
|
matching_config = write_config_file(new_config, out_dir=OUTPUT_DIR)
|
||||||
|
print('\n'.join(f'{key}={val}' for key, val in matching_config.items()))
|
||||||
|
if failed_options:
|
||||||
|
stderr()
|
||||||
|
stderr('[X] These options failed to set:', color='red')
|
||||||
|
stderr(' {}'.format('\n '.join(failed_options)))
|
||||||
|
raise SystemExit(bool(failed_options))
|
||||||
|
else:
|
||||||
|
stderr('[X] You must pass either --get or --set, or no arguments to get the whole config.', color='red')
|
||||||
|
stderr(' archivebox config')
|
||||||
|
stderr(' archivebox config --get SOME_KEY')
|
||||||
|
stderr(' archivebox config --set SOME_KEY=SOME_VALUE')
|
||||||
|
raise SystemExit(2)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
58
archivebox/legacy/ArchiveBox.conf
Normal file
58
archivebox/legacy/ArchiveBox.conf
Normal file
|
@ -0,0 +1,58 @@
|
||||||
|
# This is the example default configiration file for ArchiveBox.
|
||||||
|
#
|
||||||
|
# Copy example config from here into your project's ArchiveBox.conf file,
|
||||||
|
# DO NOT EDIT THIS FILE DIRECTLY!
|
||||||
|
#
|
||||||
|
# See the list of all the possible options. documentation, and examples here:
|
||||||
|
# https://github.com/pirate/ArchiveBox/wiki/Configuration
|
||||||
|
|
||||||
|
[GENERAL_CONFIG]
|
||||||
|
OUTPUT_PERMISSIONS = 755
|
||||||
|
ONLY_NEW = False
|
||||||
|
TIMEOUT = 60
|
||||||
|
MEDIA_TIMEOUT = 3600
|
||||||
|
ACTIVE_THEME = default
|
||||||
|
FOOTER_INFO = Content is hosted for personal archiving purposes only. Contact server owner for any takedown requests.
|
||||||
|
URL_BLACKLIST = (://(.*\.)?facebook\.com)|(://(.*\.)?ebay\.com)|(.*\.exe$)
|
||||||
|
|
||||||
|
[ARCHIVE_METHOD_TOGGLES]
|
||||||
|
SAVE_TITLE = True
|
||||||
|
SAVE_FAVICON = True
|
||||||
|
SAVE_WGET = True
|
||||||
|
SAVE_WGET_REQUISITES = True
|
||||||
|
SAVE_WARC = True
|
||||||
|
SAVE_PDF = True
|
||||||
|
SAVE_SCREENSHOT = True
|
||||||
|
SAVE_DOM = True
|
||||||
|
SAVE_GIT = True
|
||||||
|
SAVE_MEDIA = False
|
||||||
|
SAVE_ARCHIVE_DOT_ORG = True
|
||||||
|
|
||||||
|
|
||||||
|
[ARCHIVE_METHOD_OPTIONS]
|
||||||
|
CHECK_SSL_VALIDITY = True
|
||||||
|
RESOLUTION = 1440,900
|
||||||
|
GIT_DOMAINS = github.com,bitbucket.org,gitlab.com
|
||||||
|
|
||||||
|
CROME_HEADLESS = True
|
||||||
|
CROME_SANDBOX = True
|
||||||
|
|
||||||
|
COOKIES_FILE = path/to/cookies.txt
|
||||||
|
CHROME_USER_DATA_DIR = ~/.config/google-chrome/Default
|
||||||
|
|
||||||
|
WGET_USER_AGENT = Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36
|
||||||
|
CHROME_USER_AGENT = Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36
|
||||||
|
|
||||||
|
|
||||||
|
[DEPENDENCY_CONFIG]
|
||||||
|
USE_CURL = True
|
||||||
|
USE_WGET = True
|
||||||
|
USE_CHROME = True
|
||||||
|
USE_YOUTUBEDL = True
|
||||||
|
USE_GIT = True
|
||||||
|
|
||||||
|
CURL_BINARY = curl
|
||||||
|
GIT_BINARY = git"
|
||||||
|
WGET_BINARY = wget
|
||||||
|
YOUTUBEDL_BINARY = youtube-dl
|
||||||
|
CHROME_BINARY = chromium
|
|
@ -9,8 +9,9 @@ import getpass
|
||||||
import shutil
|
import shutil
|
||||||
|
|
||||||
from hashlib import md5
|
from hashlib import md5
|
||||||
from typing import Optional, Type, Tuple
|
from typing import Optional, Type, Tuple, Dict
|
||||||
from subprocess import run, PIPE, DEVNULL
|
from subprocess import run, PIPE, DEVNULL
|
||||||
|
from configparser import ConfigParser
|
||||||
|
|
||||||
from .config_stubs import (
|
from .config_stubs import (
|
||||||
SimpleConfigValueDict,
|
SimpleConfigValueDict,
|
||||||
|
@ -29,63 +30,66 @@ from .config_stubs import (
|
||||||
|
|
||||||
################################# User Config ##################################
|
################################# User Config ##################################
|
||||||
|
|
||||||
SHELL_CONFIG_DEFAULTS: ConfigDefaultDict = {
|
CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = {
|
||||||
'IS_TTY': {'type': bool, 'default': lambda _: sys.stdout.isatty()},
|
'SHELL_CONFIG': {
|
||||||
'USE_COLOR': {'type': bool, 'default': lambda c: c['IS_TTY']},
|
'IS_TTY': {'type': bool, 'default': lambda _: sys.stdout.isatty()},
|
||||||
'SHOW_PROGRESS': {'type': bool, 'default': lambda c: c['IS_TTY']},
|
'USE_COLOR': {'type': bool, 'default': lambda c: c['IS_TTY']},
|
||||||
}
|
'SHOW_PROGRESS': {'type': bool, 'default': lambda c: c['IS_TTY']},
|
||||||
|
},
|
||||||
|
|
||||||
ARCHIVE_CONFIG_DEFAULTS: ConfigDefaultDict = {
|
'GENERAL_CONFIG': {
|
||||||
'OUTPUT_DIR': {'type': str, 'default': None},
|
'OUTPUT_DIR': {'type': str, 'default': None},
|
||||||
'ONLY_NEW': {'type': bool, 'default': False},
|
'CONFIG_FILE': {'type': str, 'default': None},
|
||||||
'TIMEOUT': {'type': int, 'default': 60},
|
'ONLY_NEW': {'type': bool, 'default': False},
|
||||||
'MEDIA_TIMEOUT': {'type': int, 'default': 3600},
|
'TIMEOUT': {'type': int, 'default': 60},
|
||||||
'OUTPUT_PERMISSIONS': {'type': str, 'default': '755'},
|
'MEDIA_TIMEOUT': {'type': int, 'default': 3600},
|
||||||
'FOOTER_INFO': {'type': str, 'default': 'Content is hosted for personal archiving purposes only. Contact server owner for any takedown requests.'},
|
'OUTPUT_PERMISSIONS': {'type': str, 'default': '755'},
|
||||||
'URL_BLACKLIST': {'type': str, 'default': None},
|
'FOOTER_INFO': {'type': str, 'default': 'Content is hosted for personal archiving purposes only. Contact server owner for any takedown requests.'},
|
||||||
}
|
'URL_BLACKLIST': {'type': str, 'default': None},
|
||||||
|
},
|
||||||
|
|
||||||
ARCHIVE_METHOD_TOGGLES_DEFAULTS: ConfigDefaultDict = {
|
'ARCHIVE_METHOD_TOGGLES': {
|
||||||
'SAVE_TITLE': {'type': bool, 'default': True, 'aliases': ('FETCH_TITLE',)},
|
'SAVE_TITLE': {'type': bool, 'default': True, 'aliases': ('FETCH_TITLE',)},
|
||||||
'SAVE_FAVICON': {'type': bool, 'default': True, 'aliases': ('FETCH_FAVICON',)},
|
'SAVE_FAVICON': {'type': bool, 'default': True, 'aliases': ('FETCH_FAVICON',)},
|
||||||
'SAVE_WGET': {'type': bool, 'default': True, 'aliases': ('FETCH_WGET',)},
|
'SAVE_WGET': {'type': bool, 'default': True, 'aliases': ('FETCH_WGET',)},
|
||||||
'SAVE_WGET_REQUISITES': {'type': bool, 'default': True, 'aliases': ('FETCH_WGET_REQUISITES',)},
|
'SAVE_WGET_REQUISITES': {'type': bool, 'default': True, 'aliases': ('FETCH_WGET_REQUISITES',)},
|
||||||
'SAVE_PDF': {'type': bool, 'default': True, 'aliases': ('FETCH_PDF',)},
|
'SAVE_PDF': {'type': bool, 'default': True, 'aliases': ('FETCH_PDF',)},
|
||||||
'SAVE_SCREENSHOT': {'type': bool, 'default': True, 'aliases': ('FETCH_SCREENSHOT',)},
|
'SAVE_SCREENSHOT': {'type': bool, 'default': True, 'aliases': ('FETCH_SCREENSHOT',)},
|
||||||
'SAVE_DOM': {'type': bool, 'default': True, 'aliases': ('FETCH_DOM',)},
|
'SAVE_DOM': {'type': bool, 'default': True, 'aliases': ('FETCH_DOM',)},
|
||||||
'SAVE_WARC': {'type': bool, 'default': True, 'aliases': ('FETCH_WARC',)},
|
'SAVE_WARC': {'type': bool, 'default': True, 'aliases': ('FETCH_WARC',)},
|
||||||
'SAVE_GIT': {'type': bool, 'default': True, 'aliases': ('FETCH_GIT',)},
|
'SAVE_GIT': {'type': bool, 'default': True, 'aliases': ('FETCH_GIT',)},
|
||||||
'SAVE_MEDIA': {'type': bool, 'default': True, 'aliases': ('FETCH_MEDIA',)},
|
'SAVE_MEDIA': {'type': bool, 'default': True, 'aliases': ('FETCH_MEDIA',)},
|
||||||
'SAVE_ARCHIVE_DOT_ORG': {'type': bool, 'default': True, 'aliases': ('SUBMIT_ARCHIVE_DOT_ORG',)},
|
'SAVE_ARCHIVE_DOT_ORG': {'type': bool, 'default': True, 'aliases': ('SUBMIT_ARCHIVE_DOT_ORG',)},
|
||||||
}
|
},
|
||||||
|
|
||||||
ARCHIVE_METHOD_OPTIONS_DEFAULTS: ConfigDefaultDict = {
|
'ARCHIVE_METHOD_OPTIONS': {
|
||||||
'RESOLUTION': {'type': str, 'default': '1440,2000', 'aliases': ('SCREENSHOT_RESOLUTION',)},
|
'RESOLUTION': {'type': str, 'default': '1440,2000', 'aliases': ('SCREENSHOT_RESOLUTION',)},
|
||||||
'GIT_DOMAINS': {'type': str, 'default': 'github.com,bitbucket.org,gitlab.com'},
|
'GIT_DOMAINS': {'type': str, 'default': 'github.com,bitbucket.org,gitlab.com'},
|
||||||
'CHECK_SSL_VALIDITY': {'type': bool, 'default': True},
|
'CHECK_SSL_VALIDITY': {'type': bool, 'default': True},
|
||||||
|
|
||||||
'WGET_USER_AGENT': {'type': str, 'default': 'ArchiveBox/{VERSION} (+https://github.com/pirate/ArchiveBox/) wget/{WGET_VERSION}'},
|
'WGET_USER_AGENT': {'type': str, 'default': 'ArchiveBox/{VERSION} (+https://github.com/pirate/ArchiveBox/) wget/{WGET_VERSION}'},
|
||||||
'CHROME_USER_AGENT': {'type': str, 'default': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36'},
|
'CHROME_USER_AGENT': {'type': str, 'default': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36'},
|
||||||
|
|
||||||
'COOKIES_FILE': {'type': str, 'default': None},
|
'COOKIES_FILE': {'type': str, 'default': None},
|
||||||
'CHROME_USER_DATA_DIR': {'type': str, 'default': None},
|
'CHROME_USER_DATA_DIR': {'type': str, 'default': None},
|
||||||
|
|
||||||
'CHROME_HEADLESS': {'type': bool, 'default': True},
|
'CHROME_HEADLESS': {'type': bool, 'default': True},
|
||||||
'CHROME_SANDBOX': {'type': bool, 'default': True},
|
'CHROME_SANDBOX': {'type': bool, 'default': True},
|
||||||
}
|
},
|
||||||
|
|
||||||
DEPENDENCY_CONFIG_DEFAULTS: ConfigDefaultDict = {
|
'DEPENDENCY_CONFIG': {
|
||||||
'USE_CURL': {'type': bool, 'default': True},
|
'USE_CURL': {'type': bool, 'default': True},
|
||||||
'USE_WGET': {'type': bool, 'default': True},
|
'USE_WGET': {'type': bool, 'default': True},
|
||||||
'USE_GIT': {'type': bool, 'default': True},
|
'USE_GIT': {'type': bool, 'default': True},
|
||||||
'USE_CHROME': {'type': bool, 'default': True},
|
'USE_CHROME': {'type': bool, 'default': True},
|
||||||
'USE_YOUTUBEDL': {'type': bool, 'default': True},
|
'USE_YOUTUBEDL': {'type': bool, 'default': True},
|
||||||
|
|
||||||
'CURL_BINARY': {'type': str, 'default': 'curl'},
|
'CURL_BINARY': {'type': str, 'default': 'curl'},
|
||||||
'GIT_BINARY': {'type': str, 'default': 'git'},
|
'GIT_BINARY': {'type': str, 'default': 'git'},
|
||||||
'WGET_BINARY': {'type': str, 'default': 'wget'},
|
'WGET_BINARY': {'type': str, 'default': 'wget'},
|
||||||
'YOUTUBEDL_BINARY': {'type': str, 'default': 'youtube-dl'},
|
'YOUTUBEDL_BINARY': {'type': str, 'default': 'youtube-dl'},
|
||||||
'CHROME_BINARY': {'type': str, 'default': None},
|
'CHROME_BINARY': {'type': str, 'default': None},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
############################## Derived Config ##############################
|
############################## Derived Config ##############################
|
||||||
|
@ -120,7 +124,21 @@ JSON_INDEX_FILENAME = 'index.json'
|
||||||
HTML_INDEX_FILENAME = 'index.html'
|
HTML_INDEX_FILENAME = 'index.html'
|
||||||
ROBOTS_TXT_FILENAME = 'robots.txt'
|
ROBOTS_TXT_FILENAME = 'robots.txt'
|
||||||
FAVICON_FILENAME = 'favicon.ico'
|
FAVICON_FILENAME = 'favicon.ico'
|
||||||
|
CONFIG_FILENAME = 'ArchiveBox.conf'
|
||||||
|
|
||||||
|
CONFIG_HEADER = """
|
||||||
|
# This is the default config file for new ArchiveBox projects.
|
||||||
|
# Add your archive collection config here in INI format.
|
||||||
|
#
|
||||||
|
# After updating your config, make sure to update your archive by running:
|
||||||
|
# archivebox init
|
||||||
|
#
|
||||||
|
# The example default configuration file can be found at:
|
||||||
|
# ArchiveBox/etc/Archivebox.conf.default
|
||||||
|
#
|
||||||
|
# See the list of all the possible options. documentation, and examples here:
|
||||||
|
# https://github.com/pirate/ArchiveBox/wiki/Configuration
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
DERIVED_CONFIG_DEFAULTS: ConfigDefaultDict = {
|
DERIVED_CONFIG_DEFAULTS: ConfigDefaultDict = {
|
||||||
|
@ -137,6 +155,7 @@ DERIVED_CONFIG_DEFAULTS: ConfigDefaultDict = {
|
||||||
'ARCHIVE_DIR': {'default': lambda c: os.path.join(c['OUTPUT_DIR'], ARCHIVE_DIR_NAME)},
|
'ARCHIVE_DIR': {'default': lambda c: os.path.join(c['OUTPUT_DIR'], ARCHIVE_DIR_NAME)},
|
||||||
'SOURCES_DIR': {'default': lambda c: os.path.join(c['OUTPUT_DIR'], SOURCES_DIR_NAME)},
|
'SOURCES_DIR': {'default': lambda c: os.path.join(c['OUTPUT_DIR'], SOURCES_DIR_NAME)},
|
||||||
'LOGS_DIR': {'default': lambda c: os.path.join(c['OUTPUT_DIR'], LOGS_DIR_NAME)},
|
'LOGS_DIR': {'default': lambda c: os.path.join(c['OUTPUT_DIR'], LOGS_DIR_NAME)},
|
||||||
|
'CONFIG_FILE': {'default': lambda c: os.path.abspath(os.path.expanduser(c['CONFIG_FILE'])) if c['CONFIG_FILE'] else os.path.join(c['OUTPUT_DIR'], CONFIG_FILENAME)},
|
||||||
'COOKIES_FILE': {'default': lambda c: c['COOKIES_FILE'] and os.path.abspath(os.path.expanduser(c['COOKIES_FILE']))},
|
'COOKIES_FILE': {'default': lambda c: c['COOKIES_FILE'] and os.path.abspath(os.path.expanduser(c['COOKIES_FILE']))},
|
||||||
'CHROME_USER_DATA_DIR': {'default': lambda c: find_chrome_data_dir() if c['CHROME_USER_DATA_DIR'] is None else (os.path.abspath(os.path.expanduser(c['CHROME_USER_DATA_DIR'])) or None)},
|
'CHROME_USER_DATA_DIR': {'default': lambda c: find_chrome_data_dir() if c['CHROME_USER_DATA_DIR'] is None else (os.path.abspath(os.path.expanduser(c['CHROME_USER_DATA_DIR'])) or None)},
|
||||||
'URL_BLACKLIST_PTN': {'default': lambda c: c['URL_BLACKLIST'] and re.compile(c['URL_BLACKLIST'], re.IGNORECASE)},
|
'URL_BLACKLIST_PTN': {'default': lambda c: c['URL_BLACKLIST'] and re.compile(c['URL_BLACKLIST'], re.IGNORECASE)},
|
||||||
|
@ -194,14 +213,20 @@ def load_config_val(key: str,
|
||||||
default: ConfigDefaultValue=None,
|
default: ConfigDefaultValue=None,
|
||||||
type: Optional[Type]=None,
|
type: Optional[Type]=None,
|
||||||
aliases: Optional[Tuple[str, ...]]=None,
|
aliases: Optional[Tuple[str, ...]]=None,
|
||||||
config: Optional[ConfigDict]=None) -> ConfigValue:
|
config: Optional[ConfigDict]=None,
|
||||||
|
env_vars: Optional[os._Environ]=None,
|
||||||
|
config_file_vars: Optional[Dict[str, str]]=None) -> ConfigValue:
|
||||||
|
|
||||||
# check the canonical option name first, then check any older aliases
|
config_keys_to_check = (key, *(aliases or ()))
|
||||||
possible_env_keys = (key, *(aliases or ()))
|
for key in config_keys_to_check:
|
||||||
for key in possible_env_keys:
|
if env_vars:
|
||||||
val = os.getenv(key, None)
|
val = env_vars.get(key)
|
||||||
if val:
|
if val:
|
||||||
break
|
break
|
||||||
|
if config_file_vars:
|
||||||
|
val = config_file_vars.get(key)
|
||||||
|
if val:
|
||||||
|
break
|
||||||
|
|
||||||
if type is None or val is None:
|
if type is None or val is None:
|
||||||
if callable(default):
|
if callable(default):
|
||||||
|
@ -230,7 +255,84 @@ def load_config_val(key: str,
|
||||||
|
|
||||||
raise Exception('Config values can only be str, bool, or int')
|
raise Exception('Config values can only be str, bool, or int')
|
||||||
|
|
||||||
def load_config(defaults: ConfigDefaultDict, config: Optional[ConfigDict]=None) -> ConfigDict:
|
def load_config_file(out_dir: str=None) -> Optional[Dict[str, str]]:
|
||||||
|
"""load the ini-formatted config file from OUTPUT_DIR/Archivebox.conf"""
|
||||||
|
|
||||||
|
out_dir = out_dir or os.path.abspath(os.getenv('OUTPUT_DIR', '.'))
|
||||||
|
config_path = os.path.join(out_dir, CONFIG_FILENAME)
|
||||||
|
if os.path.exists(config_path):
|
||||||
|
config_file = ConfigParser()
|
||||||
|
config_file.optionxform = str
|
||||||
|
config_file.read(config_path)
|
||||||
|
# flatten into one namespace
|
||||||
|
config_file_vars = {
|
||||||
|
key.upper(): val
|
||||||
|
for section, options in config_file.items()
|
||||||
|
for key, val in options.items()
|
||||||
|
}
|
||||||
|
# print('[i] Loaded config file', os.path.abspath(config_path))
|
||||||
|
# print(config_file_vars)
|
||||||
|
return config_file_vars
|
||||||
|
return None
|
||||||
|
|
||||||
|
def write_config_file(config: Dict[str, str], out_dir: str=None) -> Optional[Dict[str, str]]:
|
||||||
|
"""load the ini-formatted config file from OUTPUT_DIR/Archivebox.conf"""
|
||||||
|
|
||||||
|
out_dir = out_dir or os.path.abspath(os.getenv('OUTPUT_DIR', '.'))
|
||||||
|
config_path = os.path.join(out_dir, CONFIG_FILENAME)
|
||||||
|
if not os.path.exists(config_path):
|
||||||
|
with open(config_path, 'w+') as f:
|
||||||
|
f.write(CONFIG_HEADER)
|
||||||
|
|
||||||
|
config_file = ConfigParser()
|
||||||
|
config_file.optionxform = str
|
||||||
|
config_file.read(config_path)
|
||||||
|
|
||||||
|
find_section = lambda key: [name for name, opts in CONFIG_DEFAULTS.items() if key in opts][0]
|
||||||
|
|
||||||
|
with open(f'{config_path}.old', 'w+') as old:
|
||||||
|
with open(config_path, 'r') as new:
|
||||||
|
old.write(new.read())
|
||||||
|
|
||||||
|
with open(config_path, 'w+') as f:
|
||||||
|
for key, val in config.items():
|
||||||
|
section = find_section(key)
|
||||||
|
if section in config_file:
|
||||||
|
existing_config = dict(config_file[section])
|
||||||
|
else:
|
||||||
|
existing_config = {}
|
||||||
|
|
||||||
|
config_file[section] = {**existing_config, key: val}
|
||||||
|
|
||||||
|
config_file.write(f)
|
||||||
|
|
||||||
|
try:
|
||||||
|
CONFIG = load_all_config()
|
||||||
|
return {
|
||||||
|
key.upper(): CONFIG.get(key.upper())
|
||||||
|
for key in config.keys()
|
||||||
|
}
|
||||||
|
except:
|
||||||
|
with open(f'{config_path}.old', 'r') as old:
|
||||||
|
with open(config_path, 'w+') as new:
|
||||||
|
new.write(old.read())
|
||||||
|
|
||||||
|
if os.path.exists(f'{config_path}.old'):
|
||||||
|
os.remove(f'{config_path}.old')
|
||||||
|
|
||||||
|
return {}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def load_config(defaults: ConfigDefaultDict,
|
||||||
|
config: Optional[ConfigDict]=None,
|
||||||
|
out_dir: Optional[str]=None,
|
||||||
|
env_vars: Optional[os._Environ]=None,
|
||||||
|
config_file_vars: Optional[Dict[str, str]]=None) -> ConfigDict:
|
||||||
|
|
||||||
|
env_vars = env_vars or os.environ
|
||||||
|
config_file_vars = config_file_vars or load_config_file(out_dir=out_dir)
|
||||||
|
|
||||||
extended_config: ConfigDict = config.copy() if config else {}
|
extended_config: ConfigDict = config.copy() if config else {}
|
||||||
for key, default in defaults.items():
|
for key, default in defaults.items():
|
||||||
try:
|
try:
|
||||||
|
@ -240,6 +342,8 @@ def load_config(defaults: ConfigDefaultDict, config: Optional[ConfigDict]=None)
|
||||||
type=default.get('type'),
|
type=default.get('type'),
|
||||||
aliases=default.get('aliases'),
|
aliases=default.get('aliases'),
|
||||||
config=extended_config,
|
config=extended_config,
|
||||||
|
env_vars=env_vars,
|
||||||
|
config_file_vars=config_file_vars,
|
||||||
)
|
)
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
raise SystemExit(0)
|
raise SystemExit(0)
|
||||||
|
@ -253,10 +357,16 @@ def load_config(defaults: ConfigDefaultDict, config: Optional[ConfigDict]=None)
|
||||||
stderr(' For config documentation and examples see:')
|
stderr(' For config documentation and examples see:')
|
||||||
stderr(' https://github.com/pirate/ArchiveBox/wiki/Configuration')
|
stderr(' https://github.com/pirate/ArchiveBox/wiki/Configuration')
|
||||||
stderr()
|
stderr()
|
||||||
raise SystemExit(1)
|
raise SystemExit(2)
|
||||||
|
|
||||||
return extended_config
|
return extended_config
|
||||||
|
|
||||||
|
# def write_config(config: ConfigDict):
|
||||||
|
|
||||||
|
# with open(os.path.join(config['OUTPUT_DIR'], CONFIG_FILENAME), 'w+') as f:
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def stderr(*args, color: Optional[str]=None, config: Optional[ConfigDict]=None) -> None:
|
def stderr(*args, color: Optional[str]=None, config: Optional[ConfigDict]=None) -> None:
|
||||||
ansi = DEFAULT_CLI_COLORS if (config or {}).get('USE_COLOR') else ANSI
|
ansi = DEFAULT_CLI_COLORS if (config or {}).get('USE_COLOR') else ANSI
|
||||||
|
|
||||||
|
@ -391,6 +501,11 @@ def get_code_locations(config: ConfigDict) -> SimpleConfigValueDict:
|
||||||
def get_config_locations(config: ConfigDict) -> ConfigValue:
|
def get_config_locations(config: ConfigDict) -> ConfigValue:
|
||||||
abspath = lambda path: None if path is None else os.path.abspath(path)
|
abspath = lambda path: None if path is None else os.path.abspath(path)
|
||||||
return {
|
return {
|
||||||
|
'CONFIG_FILE': {
|
||||||
|
'path': abspath(config['CHROME_USER_DATA_DIR']),
|
||||||
|
'enabled': config['USE_CHROME'] and config['CHROME_USER_DATA_DIR'],
|
||||||
|
'is_valid': False if config['CHROME_USER_DATA_DIR'] is None else os.path.exists(os.path.join(config['CHROME_USER_DATA_DIR'], 'Default')),
|
||||||
|
},
|
||||||
'CHROME_USER_DATA_DIR': {
|
'CHROME_USER_DATA_DIR': {
|
||||||
'path': abspath(config['CHROME_USER_DATA_DIR']),
|
'path': abspath(config['CHROME_USER_DATA_DIR']),
|
||||||
'enabled': config['USE_CHROME'] and config['CHROME_USER_DATA_DIR'],
|
'enabled': config['USE_CHROME'] and config['CHROME_USER_DATA_DIR'],
|
||||||
|
|
|
@ -1,74 +1,56 @@
|
||||||
# Example config file for ArchiveBox: The self-hosted internet archive.
|
# This is the default config file for new ArchiveBox projects.
|
||||||
# Copy this file to ~/.ArchiveBox.conf before editing it.
|
# Edit values below using INI syntax, then update your archive by running:
|
||||||
# Config file is in both Python and .env syntax (all strings must be quoted).
|
# archivebox init
|
||||||
# For documentation, see:
|
# For more options, example setups, and documentation, see:
|
||||||
# https://github.com/pirate/ArchiveBox/wiki/Configuration
|
# https://github.com/pirate/ArchiveBox/wiki/Configuration
|
||||||
|
|
||||||
################################################################################
|
[GENERAL_CONFIG]
|
||||||
## General Settings
|
# OUTPUT_PERMISSIONS = 755
|
||||||
################################################################################
|
# ONLY_NEW = False
|
||||||
|
# TIMEOUT = 60
|
||||||
|
# MEDIA_TIMEOUT = 3600
|
||||||
|
# ACTIVE_THEME = default
|
||||||
|
# FOOTER_INFO = Content is hosted for personal archiving purposes only. Contact server owner for any takedown requests.
|
||||||
|
# URL_BLACKLIST = (://(.*\.)?facebook\.com)|(://(.*\.)?ebay\.com)|(.*\.exe$)
|
||||||
|
|
||||||
#OUTPUT_DIR="output"
|
[ARCHIVE_METHOD_TOGGLES]
|
||||||
#OUTPUT_PERMISSIONS=755
|
# SAVE_TITLE = True
|
||||||
#ONLY_NEW=False
|
# SAVE_FAVICON = True
|
||||||
#TIMEOUT=60
|
# SAVE_WGET = True
|
||||||
#MEDIA_TIMEOUT=3600
|
# SAVE_WGET_REQUISITES = True
|
||||||
#TEMPLATES_DIR="archivebox/templates"
|
# SAVE_WARC = True
|
||||||
#FOOTER_INFO="Content is hosted for personal archiving purposes only. Contact server owner for any takedown requests."
|
# SAVE_PDF = True
|
||||||
#URL_BLACKLIST="(://(.*\.)?youtube\.com)|(://(.*\.)?amazon\.com)|(.*\.exe$)"
|
# SAVE_SCREENSHOT = True
|
||||||
|
# SAVE_DOM = True
|
||||||
################################################################################
|
# SAVE_GIT = True
|
||||||
## Archive Method Toggles
|
# SAVE_MEDIA = False
|
||||||
################################################################################
|
# SAVE_ARCHIVE_DOT_ORG = True
|
||||||
|
|
||||||
#SAVE_TITLE=True
|
|
||||||
#SAVE_FAVICON=True
|
|
||||||
#SAVE_WGET=True
|
|
||||||
#SAVE_WGET_REQUISITES=True
|
|
||||||
#SAVE_WARC=True
|
|
||||||
#SAVE_PDF=True
|
|
||||||
#SAVE_SCREENSHOT=True
|
|
||||||
#SAVE_DOM=True
|
|
||||||
#SAVE_GIT=True
|
|
||||||
#SAVE_MEDIA=False
|
|
||||||
#SAVE_ARCHIVE_DOT_ORG=True
|
|
||||||
|
|
||||||
|
|
||||||
################################################################################
|
[ARCHIVE_METHOD_OPTIONS]
|
||||||
## Archive Method Options
|
# CHECK_SSL_VALIDITY = True
|
||||||
################################################################################
|
# RESOLUTION = 1440,900
|
||||||
|
# GIT_DOMAINS = github.com,bitbucket.org,gitlab.com
|
||||||
|
|
||||||
#CHECK_SSL_VALIDITY=True
|
# CROME_HEADLESS = True
|
||||||
#RESOLUTION="1440,900"
|
# CROME_SANDBOX = True
|
||||||
#GIT_DOMAINS="github.com,bitbucket.org,gitlab.com"
|
|
||||||
|
|
||||||
#CROME_HEADLESS=True
|
# COOKIES_FILE = path/to/cookies.txt
|
||||||
#CROME_SANDBOX=True
|
# CHROME_USER_DATA_DIR = ~/.config/google-chrome/Default
|
||||||
|
|
||||||
#COOKIES_FILE="path/to/cookies.txt"
|
# WGET_USER_AGENT = Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36
|
||||||
#CHROME_USER_DATA_DIR="~/.config/google-chrome/Default"
|
# CHROME_USER_AGENT = Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36
|
||||||
|
|
||||||
#WGET_USER_AGENT="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36"
|
|
||||||
#CHROME_USER_AGENT="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36"
|
|
||||||
|
|
||||||
################################################################################
|
|
||||||
## Shell Options
|
|
||||||
################################################################################
|
|
||||||
|
|
||||||
#USE_COLOR=True
|
|
||||||
#SHOW_PROGRESS=True
|
|
||||||
|
|
||||||
|
|
||||||
################################################################################
|
[DEPENDENCY_CONFIG]
|
||||||
## Dependency Options
|
# USE_CURL = True
|
||||||
################################################################################
|
# USE_WGET = True
|
||||||
|
# USE_CHROME = True
|
||||||
|
# USE_YOUTUBEDL = True
|
||||||
|
# USE_GIT = True
|
||||||
|
|
||||||
#USE_CURL=True
|
# CURL_BINARY = curl
|
||||||
#USE_WGET=True
|
# GIT_BINARY = git"
|
||||||
#USE_CHROME=True
|
# WGET_BINARY = wget
|
||||||
|
# YOUTUBEDL_BINARY = youtube-dl
|
||||||
#CURL_BINARY="curl"
|
# CHROME_BINARY = chromium
|
||||||
#GIT_BINARY="git"
|
|
||||||
#WGET_BINARY="wget"
|
|
||||||
#YOUTUBEDL_BINARY="youtube-dl"
|
|
||||||
#CHROME_BINARY="chromium-browser"
|
|
||||||
|
|
Loading…
Reference in a new issue