From c47398851b642f83a184ad7f5a52a0575d723535 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Sat, 31 Oct 2020 07:56:51 -0400 Subject: [PATCH] nicer timeout hints --- archivebox/logging_util.py | 23 +++++++++++++++++------ archivebox/util.py | 6 ++++-- bin/test.sh | 2 +- 3 files changed, 22 insertions(+), 9 deletions(-) diff --git a/archivebox/logging_util.py b/archivebox/logging_util.py index b404aa6d..431dbaa9 100644 --- a/archivebox/logging_util.py +++ b/archivebox/logging_util.py @@ -356,6 +356,21 @@ def log_archive_method_finished(result: "ArchiveResult"): ) if result.status == 'failed': + if result.output.__class__.__name__ == 'TimeoutExpired': + duration = (result.end_ts - result.start_ts).seconds + hint_header = [ + '{lightyellow}Extractor timed out after {}s.{reset}'.format(duration, **ANSI), + ] + else: + hint_header = [ + '{lightyellow}Extractor failed:{reset}'.format(**ANSI), + ' {reset}{} {red}{}{reset}'.format( + result.output.__class__.__name__.replace('ArchiveError', ''), + result.output, + **ANSI, + ), + ] + # Prettify error output hints string and limit to five lines hints = getattr(result.output, 'hints', None) or () if hints: @@ -365,14 +380,10 @@ def log_archive_method_finished(result: "ArchiveResult"): for line in hints[:5] if line.strip() ) + # Collect and prefix output lines with indentation output_lines = [ - '{lightyellow}Extractor failed:{reset}'.format(**ANSI), - ' {reset}{} {red}{}{reset}'.format( - result.output.__class__.__name__.replace('ArchiveError', ''), - result.output, - **ANSI, - ), + *hint_header, *hints, '{}Run to see full output:{}'.format(ANSI['lightred'], ANSI['reset']), *([' cd {};'.format(result.pwd)] if result.pwd else []), diff --git a/archivebox/util.py b/archivebox/util.py index fca3de80..ae827899 100644 --- a/archivebox/util.py +++ b/archivebox/util.py @@ -15,7 +15,7 @@ from datetime import datetime from dateparser import parse as dateparser import requests -from requests.exceptions import RequestException +from requests.exceptions import RequestException, ReadTimeout from base32_crockford import encode as base32_encode # type: ignore from w3lib.encoding import html_body_declared_encoding, http_content_type_encoding @@ -186,10 +186,12 @@ def get_headers(url: str, timeout: int=None) -> str: headers={'User-Agent': WGET_USER_AGENT}, verify=CHECK_SSL_VALIDITY, timeout=timeout, - allow_redirects=True + allow_redirects=True, ) if response.status_code >= 400: raise RequestException + except ReadTimeout: + raise except RequestException: response = requests.get( url, diff --git a/bin/test.sh b/bin/test.sh index f19ca14a..3c472812 100755 --- a/bin/test.sh +++ b/bin/test.sh @@ -14,4 +14,4 @@ DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )" source "$DIR/.venv/bin/activate" -pytest +pytest -s