better config comments and docstrings
This commit is contained in:
parent
9b6774f270
commit
9784dcb816
3 changed files with 114 additions and 62 deletions
2
.github/workflows/debian.yml
vendored
2
.github/workflows/debian.yml
vendored
|
@ -47,7 +47,7 @@ jobs:
|
||||||
archivebox config --set SAVE_READABILITY=False
|
archivebox config --set SAVE_READABILITY=False
|
||||||
archivebox config --set SAVE_MERCURY=False
|
archivebox config --set SAVE_MERCURY=False
|
||||||
archivebox config --set SAVE_SINGLEFILE=False
|
archivebox config --set SAVE_SINGLEFILE=False
|
||||||
archivebox version
|
archivebox --version
|
||||||
|
|
||||||
- name: Add some links to test
|
- name: Add some links to test
|
||||||
run: |
|
run: |
|
||||||
|
|
10
Dockerfile
10
Dockerfile
|
@ -50,6 +50,13 @@ RUN apt-get update -qq \
|
||||||
fontconfig fonts-ipafont-gothic fonts-wqy-zenhei fonts-thai-tlwg fonts-kacst fonts-symbola fonts-noto fonts-freefont-ttf \
|
fontconfig fonts-ipafont-gothic fonts-wqy-zenhei fonts-thai-tlwg fonts-kacst fonts-symbola fonts-noto fonts-freefont-ttf \
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Install apt development dependencies
|
||||||
|
# RUN apt-get install -qq \
|
||||||
|
# && apt-get install -qq -y --no-install-recommends \
|
||||||
|
# python3 python3-dev python3-pip python3-venv python3-all \
|
||||||
|
# dh-python debhelper devscripts dput software-properties-common \
|
||||||
|
# python3-distutils python3-setuptools python3-wheel python3-stdeb
|
||||||
|
|
||||||
# Install Node environment
|
# Install Node environment
|
||||||
RUN curl -s https://deb.nodesource.com/gpgkey/nodesource.gpg.key | apt-key add - \
|
RUN curl -s https://deb.nodesource.com/gpgkey/nodesource.gpg.key | apt-key add - \
|
||||||
&& echo 'deb https://deb.nodesource.com/node_15.x buster main' >> /etc/apt/sources.list \
|
&& echo 'deb https://deb.nodesource.com/node_15.x buster main' >> /etc/apt/sources.list \
|
||||||
|
@ -62,7 +69,6 @@ RUN curl -s https://deb.nodesource.com/gpgkey/nodesource.gpg.key | apt-key add -
|
||||||
WORKDIR "$NODE_DIR"
|
WORKDIR "$NODE_DIR"
|
||||||
ENV PATH="${PATH}:$NODE_DIR/node_modules/.bin" \
|
ENV PATH="${PATH}:$NODE_DIR/node_modules/.bin" \
|
||||||
npm_config_loglevel=error
|
npm_config_loglevel=error
|
||||||
# RUN npm install -g npm
|
|
||||||
ADD ./package.json ./package.json
|
ADD ./package.json ./package.json
|
||||||
ADD ./package-lock.json ./package-lock.json
|
ADD ./package-lock.json ./package-lock.json
|
||||||
RUN npm ci
|
RUN npm ci
|
||||||
|
@ -82,7 +88,7 @@ RUN apt-get update -qq \
|
||||||
&& apt-get autoremove -y \
|
&& apt-get autoremove -y \
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
# Install ArchiveBox Python package
|
# Install ArchiveBox Python package and its dependencies
|
||||||
WORKDIR "$CODE_DIR"
|
WORKDIR "$CODE_DIR"
|
||||||
ADD . "$CODE_DIR"
|
ADD . "$CODE_DIR"
|
||||||
RUN pip install -e .
|
RUN pip install -e .
|
||||||
|
|
|
@ -1,3 +1,24 @@
|
||||||
|
"""
|
||||||
|
ArchiveBox config definitons (including defaults and dynamic config options).
|
||||||
|
|
||||||
|
Config Usage Example:
|
||||||
|
|
||||||
|
archivebox config --set MEDIA_TIMEOUT=600
|
||||||
|
env MEDIA_TIMEOUT=600 USE_COLOR=False ... archivebox [subcommand] ...
|
||||||
|
|
||||||
|
Config Precedence Order:
|
||||||
|
|
||||||
|
1. cli args (--update-all / --index-only / etc.)
|
||||||
|
2. shell environment vars (env USE_COLOR=False archivebox add '...')
|
||||||
|
3. config file (echo "SAVE_FAVICON=False" >> ArchiveBox.conf)
|
||||||
|
4. defaults (defined below in Python)
|
||||||
|
|
||||||
|
Documentation:
|
||||||
|
|
||||||
|
https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
__package__ = 'archivebox'
|
__package__ = 'archivebox'
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
@ -24,26 +45,9 @@ from .config_stubs import (
|
||||||
ConfigDefaultDict,
|
ConfigDefaultDict,
|
||||||
)
|
)
|
||||||
|
|
||||||
# precedence order for config:
|
############################### Config Schema ##################################
|
||||||
# 1. cli args (e.g. )
|
|
||||||
# 2. shell environment vars (env USE_COLOR=False archivebox add '...')
|
|
||||||
# 3. config file (echo "SAVE_FAVICON=False" >> ArchiveBox.conf)
|
|
||||||
# 4. defaults (defined below in Python)
|
|
||||||
|
|
||||||
#
|
CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
|
||||||
# env SHOW_PROGRESS=1 archivebox add '...'
|
|
||||||
# archivebox config --set TIMEOUT=600
|
|
||||||
#
|
|
||||||
|
|
||||||
# ******************************************************************************
|
|
||||||
# Documentation: https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration
|
|
||||||
# Use the 'env' command to pass config options to ArchiveBox. e.g.:
|
|
||||||
# env USE_COLOR=True CHROME_BINARY=chromium archivebox add < example.html
|
|
||||||
# ******************************************************************************
|
|
||||||
|
|
||||||
################################# User Config ##################################
|
|
||||||
|
|
||||||
CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = {
|
|
||||||
'SHELL_CONFIG': {
|
'SHELL_CONFIG': {
|
||||||
'IS_TTY': {'type': bool, 'default': lambda _: sys.stdout.isatty()},
|
'IS_TTY': {'type': bool, 'default': lambda _: sys.stdout.isatty()},
|
||||||
'USE_COLOR': {'type': bool, 'default': lambda c: c['IS_TTY']},
|
'USE_COLOR': {'type': bool, 'default': lambda c: c['IS_TTY']},
|
||||||
|
@ -179,21 +183,40 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = {
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
########################## Backwards-Compatibility #############################
|
||||||
|
|
||||||
|
|
||||||
# for backwards compatibility with old config files, check old/deprecated names for each key
|
# for backwards compatibility with old config files, check old/deprecated names for each key
|
||||||
CONFIG_ALIASES = {
|
CONFIG_ALIASES = {
|
||||||
alias: key
|
alias: key
|
||||||
for section in CONFIG_DEFAULTS.values()
|
for section in CONFIG_SCHEMA.values()
|
||||||
for key, default in section.items()
|
for key, default in section.items()
|
||||||
for alias in default.get('aliases', ())
|
for alias in default.get('aliases', ())
|
||||||
}
|
}
|
||||||
USER_CONFIG = {key for section in CONFIG_DEFAULTS.values() for key in section.keys()}
|
USER_CONFIG = {key for section in CONFIG_SCHEMA.values() for key in section.keys()}
|
||||||
|
|
||||||
def get_real_name(key: str) -> str:
|
def get_real_name(key: str) -> str:
|
||||||
|
"""get the current canonical name for a given deprecated config key"""
|
||||||
return CONFIG_ALIASES.get(key.upper().strip(), key.upper().strip())
|
return CONFIG_ALIASES.get(key.upper().strip(), key.upper().strip())
|
||||||
|
|
||||||
############################## Derived Config ##############################
|
|
||||||
|
|
||||||
# Constants
|
|
||||||
|
################################ Constants #####################################
|
||||||
|
|
||||||
|
PACKAGE_DIR_NAME = 'archivebox'
|
||||||
|
TEMPLATES_DIR_NAME = 'themes'
|
||||||
|
|
||||||
|
ARCHIVE_DIR_NAME = 'archive'
|
||||||
|
SOURCES_DIR_NAME = 'sources'
|
||||||
|
LOGS_DIR_NAME = 'logs'
|
||||||
|
STATIC_DIR_NAME = 'static'
|
||||||
|
SQL_INDEX_FILENAME = 'index.sqlite3'
|
||||||
|
JSON_INDEX_FILENAME = 'index.json'
|
||||||
|
HTML_INDEX_FILENAME = 'index.html'
|
||||||
|
ROBOTS_TXT_FILENAME = 'robots.txt'
|
||||||
|
FAVICON_FILENAME = 'favicon.ico'
|
||||||
|
CONFIG_FILENAME = 'ArchiveBox.conf'
|
||||||
|
|
||||||
DEFAULT_CLI_COLORS = {
|
DEFAULT_CLI_COLORS = {
|
||||||
'reset': '\033[00;00m',
|
'reset': '\033[00;00m',
|
||||||
|
@ -242,36 +265,12 @@ STATICFILE_EXTENSIONS = {
|
||||||
# html, htm, shtml, xhtml, xml, aspx, php, cgi
|
# html, htm, shtml, xhtml, xml, aspx, php, cgi
|
||||||
}
|
}
|
||||||
|
|
||||||
PACKAGE_DIR_NAME = 'archivebox'
|
|
||||||
TEMPLATES_DIR_NAME = 'themes'
|
|
||||||
|
|
||||||
ARCHIVE_DIR_NAME = 'archive'
|
|
||||||
SOURCES_DIR_NAME = 'sources'
|
|
||||||
LOGS_DIR_NAME = 'logs'
|
|
||||||
STATIC_DIR_NAME = 'static'
|
|
||||||
SQL_INDEX_FILENAME = 'index.sqlite3'
|
|
||||||
JSON_INDEX_FILENAME = 'index.json'
|
|
||||||
HTML_INDEX_FILENAME = 'index.html'
|
|
||||||
ROBOTS_TXT_FILENAME = 'robots.txt'
|
|
||||||
FAVICON_FILENAME = 'favicon.ico'
|
|
||||||
CONFIG_FILENAME = 'ArchiveBox.conf'
|
|
||||||
|
|
||||||
CONFIG_HEADER = (
|
|
||||||
"""# This is the config file for your ArchiveBox collection.
|
|
||||||
#
|
|
||||||
# You can add options here manually in INI format, or automatically by running:
|
|
||||||
# archivebox config --set KEY=VALUE
|
|
||||||
#
|
|
||||||
# If you modify this file manually, make sure to update your archive after by running:
|
|
||||||
# archivebox init
|
|
||||||
#
|
|
||||||
# A list of all possible config with documentation and examples can be found here:
|
|
||||||
# https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration
|
|
||||||
|
|
||||||
""")
|
|
||||||
|
|
||||||
|
|
||||||
DERIVED_CONFIG_DEFAULTS: ConfigDefaultDict = {
|
############################## Derived Config ##################################
|
||||||
|
|
||||||
|
|
||||||
|
DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
|
||||||
'TERM_WIDTH': {'default': lambda c: lambda: shutil.get_terminal_size((100, 10)).columns},
|
'TERM_WIDTH': {'default': lambda c: lambda: shutil.get_terminal_size((100, 10)).columns},
|
||||||
'USER': {'default': lambda c: getpass.getuser() or os.getlogin()},
|
'USER': {'default': lambda c: getpass.getuser() or os.getlogin()},
|
||||||
'ANSI': {'default': lambda c: DEFAULT_CLI_COLORS if c['USE_COLOR'] else {k: '' for k in DEFAULT_CLI_COLORS.keys()}},
|
'ANSI': {'default': lambda c: DEFAULT_CLI_COLORS if c['USE_COLOR'] else {k: '' for k in DEFAULT_CLI_COLORS.keys()}},
|
||||||
|
@ -359,6 +358,7 @@ DERIVED_CONFIG_DEFAULTS: ConfigDefaultDict = {
|
||||||
|
|
||||||
################################### Helpers ####################################
|
################################### Helpers ####################################
|
||||||
|
|
||||||
|
|
||||||
def load_config_val(key: str,
|
def load_config_val(key: str,
|
||||||
default: ConfigDefaultValue=None,
|
default: ConfigDefaultValue=None,
|
||||||
type: Optional[Type]=None,
|
type: Optional[Type]=None,
|
||||||
|
@ -437,6 +437,20 @@ def write_config_file(config: Dict[str, str], out_dir: str=None) -> ConfigDict:
|
||||||
|
|
||||||
from .system import atomic_write
|
from .system import atomic_write
|
||||||
|
|
||||||
|
CONFIG_HEADER = (
|
||||||
|
"""# This is the config file for your ArchiveBox collection.
|
||||||
|
#
|
||||||
|
# You can add options here manually in INI format, or automatically by running:
|
||||||
|
# archivebox config --set KEY=VALUE
|
||||||
|
#
|
||||||
|
# If you modify this file manually, make sure to update your archive after by running:
|
||||||
|
# archivebox init
|
||||||
|
#
|
||||||
|
# A list of all possible config with documentation and examples can be found here:
|
||||||
|
# https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration
|
||||||
|
|
||||||
|
""")
|
||||||
|
|
||||||
out_dir = out_dir or Path(os.getenv('OUTPUT_DIR', '.')).resolve()
|
out_dir = out_dir or Path(os.getenv('OUTPUT_DIR', '.')).resolve()
|
||||||
config_path = Path(out_dir) / CONFIG_FILENAME
|
config_path = Path(out_dir) / CONFIG_FILENAME
|
||||||
|
|
||||||
|
@ -450,7 +464,7 @@ def write_config_file(config: Dict[str, str], out_dir: str=None) -> ConfigDict:
|
||||||
with open(config_path, 'r') as old:
|
with open(config_path, 'r') as old:
|
||||||
atomic_write(f'{config_path}.bak', old.read())
|
atomic_write(f'{config_path}.bak', old.read())
|
||||||
|
|
||||||
find_section = lambda key: [name for name, opts in CONFIG_DEFAULTS.items() if key in opts][0]
|
find_section = lambda key: [name for name, opts in CONFIG_SCHEMA.items() if key in opts][0]
|
||||||
|
|
||||||
# Set up sections in empty config file
|
# Set up sections in empty config file
|
||||||
for key, val in config.items():
|
for key, val in config.items():
|
||||||
|
@ -539,6 +553,8 @@ def load_config(defaults: ConfigDefaultDict,
|
||||||
|
|
||||||
# with open(os.path.join(config['OUTPUT_DIR'], CONFIG_FILENAME), 'w+') as f:
|
# with open(os.path.join(config['OUTPUT_DIR'], CONFIG_FILENAME), 'w+') as f:
|
||||||
|
|
||||||
|
|
||||||
|
# Logging Helpers
|
||||||
def stdout(*args, color: Optional[str]=None, prefix: str='', config: Optional[ConfigDict]=None) -> None:
|
def stdout(*args, color: Optional[str]=None, prefix: str='', config: Optional[ConfigDict]=None) -> None:
|
||||||
ansi = DEFAULT_CLI_COLORS if (config or {}).get('USE_COLOR') else ANSI
|
ansi = DEFAULT_CLI_COLORS if (config or {}).get('USE_COLOR') else ANSI
|
||||||
|
|
||||||
|
@ -570,6 +586,7 @@ def hint(text: Union[Tuple[str, ...], List[str], str], prefix=' ', config: Op
|
||||||
stderr('{} {}'.format(prefix, line))
|
stderr('{} {}'.format(prefix, line))
|
||||||
|
|
||||||
|
|
||||||
|
# Dependency Metadata Helpers
|
||||||
def bin_version(binary: Optional[str]) -> Optional[str]:
|
def bin_version(binary: Optional[str]) -> Optional[str]:
|
||||||
"""check the presence and return valid version line of a specified binary"""
|
"""check the presence and return valid version line of a specified binary"""
|
||||||
|
|
||||||
|
@ -837,6 +854,14 @@ def get_dependency_info(config: ConfigDict) -> ConfigValue:
|
||||||
'enabled': config['USE_RIPGREP'],
|
'enabled': config['USE_RIPGREP'],
|
||||||
'is_valid': bool(config['RIPGREP_VERSION']),
|
'is_valid': bool(config['RIPGREP_VERSION']),
|
||||||
},
|
},
|
||||||
|
# TODO: add an entry for the sonic search backend?
|
||||||
|
# 'SONIC_BINARY': {
|
||||||
|
# 'path': bin_path(config['SONIC_BINARY']),
|
||||||
|
# 'version': config['SONIC_VERSION'],
|
||||||
|
# 'hash': bin_hash(config['SONIC_BINARY']),
|
||||||
|
# 'enabled': config['USE_SONIC'],
|
||||||
|
# 'is_valid': bool(config['SONIC_VERSION']),
|
||||||
|
# },
|
||||||
}
|
}
|
||||||
|
|
||||||
def get_chrome_info(config: ConfigDict) -> ConfigValue:
|
def get_chrome_info(config: ConfigDict) -> ConfigValue:
|
||||||
|
@ -852,28 +877,51 @@ def get_chrome_info(config: ConfigDict) -> ConfigValue:
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
################################## Load Config #################################
|
# ******************************************************************************
|
||||||
|
# ******************************************************************************
|
||||||
|
# ******************************** Load Config *********************************
|
||||||
|
# ******* (compile the defaults, configs, and metadata all into CONFIG) ********
|
||||||
|
# ******************************************************************************
|
||||||
|
# ******************************************************************************
|
||||||
|
|
||||||
|
|
||||||
def load_all_config():
|
def load_all_config():
|
||||||
CONFIG: ConfigDict = {}
|
CONFIG: ConfigDict = {}
|
||||||
for section_name, section_config in CONFIG_DEFAULTS.items():
|
for section_name, section_config in CONFIG_SCHEMA.items():
|
||||||
CONFIG = load_config(section_config, CONFIG)
|
CONFIG = load_config(section_config, CONFIG)
|
||||||
|
|
||||||
return load_config(DERIVED_CONFIG_DEFAULTS, CONFIG)
|
return load_config(DYNAMIC_CONFIG_SCHEMA, CONFIG)
|
||||||
|
|
||||||
|
# add all final config values in CONFIG to globals in this file
|
||||||
CONFIG = load_all_config()
|
CONFIG = load_all_config()
|
||||||
globals().update(CONFIG)
|
globals().update(CONFIG)
|
||||||
|
# this lets us do: from .config import DEBUG, MEDIA_TIMEOUT, ...
|
||||||
|
|
||||||
# Timezone set as UTC
|
|
||||||
|
# ******************************************************************************
|
||||||
|
# ******************************************************************************
|
||||||
|
# ******************************************************************************
|
||||||
|
# ******************************************************************************
|
||||||
|
# ******************************************************************************
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
########################### System Environment Setup ###########################
|
||||||
|
|
||||||
|
|
||||||
|
# Set timezone to UTC and umask to OUTPUT_PERMISSIONS
|
||||||
os.environ["TZ"] = 'UTC'
|
os.environ["TZ"] = 'UTC'
|
||||||
|
os.umask(0o777 - int(OUTPUT_PERMISSIONS, base=8)) # noqa: F821
|
||||||
|
|
||||||
# add ./node_modules/.bin to $PATH so we can use node scripts in extractors
|
# add ./node_modules/.bin to $PATH so we can use node scripts in extractors
|
||||||
NODE_BIN_PATH = str((Path(CONFIG["OUTPUT_DIR"]).absolute() / 'node_modules' / '.bin'))
|
NODE_BIN_PATH = str((Path(CONFIG["OUTPUT_DIR"]).absolute() / 'node_modules' / '.bin'))
|
||||||
sys.path.append(NODE_BIN_PATH)
|
sys.path.append(NODE_BIN_PATH)
|
||||||
|
|
||||||
|
|
||||||
############################## Importable Checkers #############################
|
|
||||||
|
|
||||||
|
########################### Config Validity Checkers ###########################
|
||||||
|
|
||||||
|
|
||||||
def check_system_config(config: ConfigDict=CONFIG) -> None:
|
def check_system_config(config: ConfigDict=CONFIG) -> None:
|
||||||
### Check system environment
|
### Check system environment
|
||||||
|
@ -1031,5 +1079,3 @@ def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG,
|
||||||
f'No database file {SQL_INDEX_FILENAME} found in OUTPUT_DIR: {config["OUTPUT_DIR"]}')
|
f'No database file {SQL_INDEX_FILENAME} found in OUTPUT_DIR: {config["OUTPUT_DIR"]}')
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
raise SystemExit(2)
|
raise SystemExit(2)
|
||||||
|
|
||||||
os.umask(0o777 - int(OUTPUT_PERMISSIONS, base=8)) # noqa: F821
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue