From 438203f4cec49e92c49976d57788be6b188f173e Mon Sep 17 00:00:00 2001 From: Cristian Date: Fri, 3 Jul 2020 12:54:21 -0500 Subject: [PATCH] test: add basic download_url test --- tests/test_util.py | 26 +++++--------------------- 1 file changed, 5 insertions(+), 21 deletions(-) diff --git a/tests/test_util.py b/tests/test_util.py index 19ed31c0..1497de5a 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -1,21 +1,5 @@ -#@enforce_types -#def download_url(url: str, timeout: int=None) -> str: -# """Download the contents of a remote url and return the text""" -# from .config import TIMEOUT, CHECK_SSL_VALIDITY, WGET_USER_AGENT -# timeout = timeout or TIMEOUT -# response = requests.get( -# url, -# headers={'User-Agent': WGET_USER_AGENT}, -# verify=CHECK_SSL_VALIDITY, -# timeout=timeout, -# ) -# if response.headers.get('Content-Type') == 'application/rss+xml': -# # Based on https://github.com/scrapy/w3lib/blob/master/w3lib/encoding.py -# _TEMPLATE = r'''%s\s*=\s*["']?\s*%s\s*["']?''' -# _XML_ENCODING_RE = _TEMPLATE % ('encoding', r'(?P[\w-]+)') -# _BODY_ENCODING_PATTERN = r'<\s*(\?xml\s[^>]+%s)' % (_XML_ENCODING_RE) -# _BODY_ENCODING_STR_RE = re.compile(_BODY_ENCODING_PATTERN, re.I | re.VERBOSE) -# match = _BODY_ENCODING_STR_RE.search(response.text[:1024]) -# if match: -# response.encoding = match.group('xmlcharset') -# return response.text \ No newline at end of file +from archivebox import util + +def test_download_url_downloads_content(): + text = util.download_url("https://example.com") + assert "Example Domain" in text \ No newline at end of file