From d77c770c47143dca0909e841cff93e3e47e272c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E3=81=B5=E3=81=81?= Date: Tue, 14 Mar 2023 20:29:41 +0900 Subject: [PATCH] add CHROME_TIMEOUT args MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: ふぁ --- archivebox/config.py | 2 ++ archivebox/config_stubs.py | 1 + archivebox/extractors/dom.py | 2 +- archivebox/extractors/pdf.py | 2 +- archivebox/extractors/screenshot.py | 2 +- archivebox/extractors/singlefile.py | 2 +- archivebox/util.py | 4 ++-- 7 files changed, 9 insertions(+), 6 deletions(-) diff --git a/archivebox/config.py b/archivebox/config.py index dbfb1a4f..6d7c8cb5 100644 --- a/archivebox/config.py +++ b/archivebox/config.py @@ -139,6 +139,7 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = { 'COOKIES_FILE': {'type': str, 'default': None}, 'CHROME_USER_DATA_DIR': {'type': str, 'default': None}, + 'CHROME_TIMEOUT': {'type': int, 'default': 0}, 'CHROME_HEADLESS': {'type': bool, 'default': True}, 'CHROME_SANDBOX': {'type': bool, 'default': lambda c: not c['IN_DOCKER']}, 'YOUTUBEDL_ARGS': {'type': list, 'default': lambda c: [ @@ -981,6 +982,7 @@ def get_chrome_info(config: ConfigDict) -> ConfigValue: 'RESOLUTION': config['RESOLUTION'], 'CHECK_SSL_VALIDITY': config['CHECK_SSL_VALIDITY'], 'CHROME_BINARY': bin_path(config['CHROME_BINARY']), + 'CHROME_TIMEOUT':config['CHROME_TIMEOUT'], 'CHROME_HEADLESS': config['CHROME_HEADLESS'], 'CHROME_SANDBOX': config['CHROME_SANDBOX'], 'CHROME_USER_AGENT': config['CHROME_USER_AGENT'], diff --git a/archivebox/config_stubs.py b/archivebox/config_stubs.py index ead541a5..2c42e808 100644 --- a/archivebox/config_stubs.py +++ b/archivebox/config_stubs.py @@ -74,6 +74,7 @@ class ConfigDict(BaseConfig, total=False): CHROME_USER_AGENT: str COOKIES_FILE: Union[str, Path, None] CHROME_USER_DATA_DIR: Union[str, Path, None] + CHROME_TIMEOUT: int CHROME_HEADLESS: bool CHROME_SANDBOX: bool diff --git a/archivebox/extractors/dom.py b/archivebox/extractors/dom.py index e1c3571a..162ae38b 100644 --- a/archivebox/extractors/dom.py +++ b/archivebox/extractors/dom.py @@ -39,7 +39,7 @@ def save_dom(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) -> output: ArchiveOutput = 'output.html' output_path = out_dir / output cmd = [ - *chrome_args(TIMEOUT=timeout), + *chrome_args(), '--dump-dom', link.url ] diff --git a/archivebox/extractors/pdf.py b/archivebox/extractors/pdf.py index 7138206c..9b256015 100644 --- a/archivebox/extractors/pdf.py +++ b/archivebox/extractors/pdf.py @@ -37,7 +37,7 @@ def save_pdf(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) -> out_dir = out_dir or Path(link.link_dir) output: ArchiveOutput = 'output.pdf' cmd = [ - *chrome_args(TIMEOUT=timeout), + *chrome_args(), '--print-to-pdf', link.url, ] diff --git a/archivebox/extractors/screenshot.py b/archivebox/extractors/screenshot.py index cc748bf6..a50f5896 100644 --- a/archivebox/extractors/screenshot.py +++ b/archivebox/extractors/screenshot.py @@ -37,7 +37,7 @@ def save_screenshot(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEO out_dir = out_dir or Path(link.link_dir) output: ArchiveOutput = 'screenshot.png' cmd = [ - *chrome_args(TIMEOUT=timeout), + *chrome_args(), '--screenshot', link.url, ] diff --git a/archivebox/extractors/singlefile.py b/archivebox/extractors/singlefile.py index f7b1b686..e3860527 100644 --- a/archivebox/extractors/singlefile.py +++ b/archivebox/extractors/singlefile.py @@ -42,7 +42,7 @@ def save_singlefile(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEO out_dir = out_dir or Path(link.link_dir) output = "singlefile.html" - browser_args = chrome_args(TIMEOUT=0) + browser_args = chrome_args(CHROME_TIMEOUT=0) # SingleFile CLI Docs: https://github.com/gildas-lormeau/SingleFile/tree/master/cli browser_args = '--browser-args={}'.format(json.dumps(browser_args[1:])) diff --git a/archivebox/util.py b/archivebox/util.py index 1b1006e1..fe6850ea 100644 --- a/archivebox/util.py +++ b/archivebox/util.py @@ -260,8 +260,8 @@ def chrome_args(**options) -> List[str]: if options['RESOLUTION']: cmd_args += ('--window-size={}'.format(options['RESOLUTION']),) - #if options['TIMEOUT']: - # cmd_args += ('--timeout={}'.format(options['TIMEOUT'] * 1000),) + if options['CHROME_TIMEOUT']: + cmd_args += ('--timeout={}'.format(options['CHROME_TIMEOUT'] * 1000),) if options['CHROME_USER_DATA_DIR']: cmd_args.append('--user-data-dir={}'.format(options['CHROME_USER_DATA_DIR']))