1
0
Fork 0

tests: Add mechanism to avoid using extractors that we are not testing

This commit is contained in:
Cristian 2020-08-04 08:42:30 -05:00
parent 3c5c6a689e
commit 5429096c30
7 changed files with 57 additions and 30 deletions

View file

@ -7,4 +7,19 @@ import pytest
def process(tmp_path): def process(tmp_path):
os.chdir(tmp_path) os.chdir(tmp_path)
process = subprocess.run(['archivebox', 'init'], capture_output=True) process = subprocess.run(['archivebox', 'init'], capture_output=True)
return process return process
@pytest.fixture
def disable_extractors_dict():
env = os.environ.copy()
env.update({
"USE_WGET": "false",
"USE_SINGLEFILE": "false",
"SAVE_PDF": "false",
"SAVE_SCREENSHOT": "false",
"SAVE_DOM": "false",
"USE_GIT": "false",
"SAVE_MEDIA": "false",
"SAVE_ARCHIVE_DOT_ORG": "false"
})
return env

View file

@ -3,25 +3,30 @@ import json
from .fixtures import * from .fixtures import *
def test_depth_flag_is_accepted(process): def test_depth_flag_is_accepted(process, disable_extractors_dict):
arg_process = subprocess.run(["archivebox", "add", "http://127.0.0.1:8080/static/example.com.html", "--depth=0"], capture_output=True) arg_process = subprocess.run(["archivebox", "add", "http://127.0.0.1:8080/static/example.com.html", "--depth=0"],
capture_output=True, env=disable_extractors_dict)
assert 'unrecognized arguments: --depth' not in arg_process.stderr.decode("utf-8") assert 'unrecognized arguments: --depth' not in arg_process.stderr.decode("utf-8")
def test_depth_flag_fails_if_it_is_not_0_or_1(process): def test_depth_flag_fails_if_it_is_not_0_or_1(process, disable_extractors_dict):
arg_process = subprocess.run(["archivebox", "add", "http://127.0.0.1:8080/static/example.com.html", "--depth=5"], capture_output=True) arg_process = subprocess.run(["archivebox", "add", "http://127.0.0.1:8080/static/example.com.html", "--depth=5"],
capture_output=True, env=disable_extractors_dict)
assert 'invalid choice' in arg_process.stderr.decode("utf-8") assert 'invalid choice' in arg_process.stderr.decode("utf-8")
arg_process = subprocess.run(["archivebox", "add", "http://127.0.0.1:8080/static/example.com.html", "--depth=-1"], capture_output=True) arg_process = subprocess.run(["archivebox", "add", "http://127.0.0.1:8080/static/example.com.html", "--depth=-1"],
capture_output=True, env=disable_extractors_dict)
assert 'invalid choice' in arg_process.stderr.decode("utf-8") assert 'invalid choice' in arg_process.stderr.decode("utf-8")
def test_depth_flag_0_crawls_only_the_arg_page(tmp_path, process): def test_depth_flag_0_crawls_only_the_arg_page(tmp_path, process, disable_extractors_dict):
arg_process = subprocess.run(["archivebox", "add", "http://127.0.0.1:8080/static/example.com.html", "--depth=0"], capture_output=True) arg_process = subprocess.run(["archivebox", "add", "http://127.0.0.1:8080/static/example.com.html", "--depth=0"],
capture_output=True, env=disable_extractors_dict)
archived_item_path = list(tmp_path.glob('archive/**/*'))[0] archived_item_path = list(tmp_path.glob('archive/**/*'))[0]
with open(archived_item_path / "index.json", "r") as f: with open(archived_item_path / "index.json", "r") as f:
output_json = json.load(f) output_json = json.load(f)
assert output_json["base_url"] == "127.0.0.1:8080/static/example.com.html" assert output_json["base_url"] == "127.0.0.1:8080/static/example.com.html"
def test_depth_flag_1_crawls_the_page_AND_links(tmp_path, process): def test_depth_flag_1_crawls_the_page_AND_links(tmp_path, process, disable_extractors_dict):
arg_process = subprocess.run(["archivebox", "add", "http://127.0.0.1:8080/static/example.com.html", "--depth=1"], capture_output=True) arg_process = subprocess.run(["archivebox", "add", "http://127.0.0.1:8080/static/example.com.html", "--depth=1"],
capture_output=True, env=disable_extractors_dict)
with open(tmp_path / "index.json", "r") as f: with open(tmp_path / "index.json", "r") as f:
archive_file = f.read() archive_file = f.read()
assert "http://127.0.0.1:8080/static/example.com.html" in archive_file assert "http://127.0.0.1:8080/static/example.com.html" in archive_file

View file

@ -1,8 +1,10 @@
from .fixtures import * from .fixtures import *
from archivebox.extractors import ignore_methods, get_default_archive_methods, should_save_title from archivebox.extractors import ignore_methods, get_default_archive_methods, should_save_title
def test_wget_broken_pipe(tmp_path, process): def test_wget_broken_pipe(tmp_path, process, disable_extractors_dict):
add_process = subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True) disable_extractors_dict.update({"USE_WGET": "true"})
add_process = subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'],
capture_output=True, env=disable_extractors_dict)
assert "TypeError chmod_file(..., path: str) got unexpected NoneType argument path=None" not in add_process.stdout.decode("utf-8") assert "TypeError chmod_file(..., path: str) got unexpected NoneType argument path=None" not in add_process.stdout.decode("utf-8")
def test_ignore_methods(): def test_ignore_methods():
@ -12,10 +14,10 @@ def test_ignore_methods():
ignored = ignore_methods(['title']) ignored = ignore_methods(['title'])
assert should_save_title not in ignored assert should_save_title not in ignored
def test_singlefile_works(tmp_path, process, disable_extractors_dict):
disable_extractors_dict.update({"USE_SINGLEFILE": "true"})
def test_singlefile_works(tmp_path, process): add_process = subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'],
add_process = subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True) capture_output=True, env=disable_extractors_dict)
archived_item_path = list(tmp_path.glob('archive/**/*'))[0] archived_item_path = list(tmp_path.glob('archive/**/*'))[0]
output_file = archived_item_path / "singlefile.html" output_file = archived_item_path / "singlefile.html"
assert output_file.exists() assert output_file.exists()

View file

@ -18,9 +18,10 @@ def test_update(tmp_path, process):
update_process = subprocess.run(['archivebox', 'init'], capture_output=True) update_process = subprocess.run(['archivebox', 'init'], capture_output=True)
assert "Updating existing ArchiveBox collection in this folder" in update_process.stdout.decode("utf-8") assert "Updating existing ArchiveBox collection in this folder" in update_process.stdout.decode("utf-8")
def test_add_link(tmp_path, process): def test_add_link(tmp_path, process, disable_extractors_dict):
os.chdir(tmp_path) os.chdir(tmp_path)
add_process = subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True) add_process = subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'],
capture_output=True, env=disable_extractors_dict)
archived_item_path = list(tmp_path.glob('archive/**/*'))[0] archived_item_path = list(tmp_path.glob('archive/**/*'))[0]
assert "index.json" in [x.name for x in archived_item_path.iterdir()] assert "index.json" in [x.name for x in archived_item_path.iterdir()]
@ -33,9 +34,10 @@ def test_add_link(tmp_path, process):
output_html = f.read() output_html = f.read()
assert "Example Domain" in output_html assert "Example Domain" in output_html
def test_add_link_support_stdin(tmp_path, process): def test_add_link_support_stdin(tmp_path, process, disable_extractors_dict):
os.chdir(tmp_path) os.chdir(tmp_path)
stdin_process = subprocess.Popen(["archivebox", "add"], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) stdin_process = subprocess.Popen(["archivebox", "add"], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
env=disable_extractors_dict)
stdin_process.communicate(input="http://127.0.0.1:8080/static/example.com.html".encode()) stdin_process.communicate(input="http://127.0.0.1:8080/static/example.com.html".encode())
archived_item_path = list(tmp_path.glob('archive/**/*'))[0] archived_item_path = list(tmp_path.glob('archive/**/*'))[0]
@ -51,9 +53,10 @@ def test_correct_permissions_output_folder(tmp_path, process):
file_path = tmp_path / file file_path = tmp_path / file
assert oct(file_path.stat().st_mode)[-3:] == OUTPUT_PERMISSIONS assert oct(file_path.stat().st_mode)[-3:] == OUTPUT_PERMISSIONS
def test_correct_permissions_add_command_results(tmp_path, process): def test_correct_permissions_add_command_results(tmp_path, process, disable_extractors_dict):
os.chdir(tmp_path) os.chdir(tmp_path)
add_process = subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True) add_process = subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True,
env=disable_extractors_dict)
archived_item_path = list(tmp_path.glob('archive/**/*'))[0] archived_item_path = list(tmp_path.glob('archive/**/*'))[0]
for path in archived_item_path.iterdir(): for path in archived_item_path.iterdir():
assert oct(path.stat().st_mode)[-3:] == OUTPUT_PERMISSIONS assert oct(path.stat().st_mode)[-3:] == OUTPUT_PERMISSIONS

View file

@ -2,13 +2,14 @@ from pathlib import Path
from .fixtures import * from .fixtures import *
def test_oneshot_command_exists(tmp_path): def test_oneshot_command_exists(tmp_path, disable_extractors_dict):
os.chdir(tmp_path) os.chdir(tmp_path)
process = subprocess.run(['archivebox', 'oneshot'], capture_output=True) process = subprocess.run(['archivebox', 'oneshot'], capture_output=True, env=disable_extractors_dict)
assert not "invalid choice: 'oneshot'" in process.stderr.decode("utf-8") assert not "invalid choice: 'oneshot'" in process.stderr.decode("utf-8")
def test_oneshot_commad_saves_page_in_right_folder(tmp_path): def test_oneshot_commad_saves_page_in_right_folder(tmp_path, disable_extractors_dict):
process = subprocess.run(["archivebox", "oneshot", f"--out-dir={tmp_path}", "http://127.0.0.1:8080/static/example.com.html"], capture_output=True) process = subprocess.run(["archivebox", "oneshot", f"--out-dir={tmp_path}", "http://127.0.0.1:8080/static/example.com.html"],
capture_output=True, env=disable_extractors_dict)
items = ' '.join([str(x) for x in tmp_path.iterdir()]) items = ' '.join([str(x) for x in tmp_path.iterdir()])
current_path = ' '.join([str(x) for x in Path.cwd().iterdir()]) current_path = ' '.join([str(x) for x in Path.cwd().iterdir()])
assert "index.json" in items assert "index.json" in items

View file

@ -1,8 +1,8 @@
from .fixtures import * from .fixtures import *
def test_remove_leaves_index_in_consistent_state(tmp_path, process): def test_remove_leaves_index_in_consistent_state(tmp_path, process, disable_extractors_dict):
os.chdir(tmp_path) os.chdir(tmp_path)
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True) subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True, env=disable_extractors_dict)
remove_process = subprocess.run(['archivebox', 'remove', '127.0.0.1:8080/static/example.com.html', '--yes'], capture_output=True) remove_process = subprocess.run(['archivebox', 'remove', '127.0.0.1:8080/static/example.com.html', '--yes'], capture_output=True)
list_process = subprocess.run(['archivebox', 'list'], capture_output=True) list_process = subprocess.run(['archivebox', 'list'], capture_output=True)
assert "Warning: SQL index does not match JSON index!" not in list_process.stderr.decode("utf-8") assert "Warning: SQL index does not match JSON index!" not in list_process.stderr.decode("utf-8")

View file

@ -1,12 +1,13 @@
from .fixtures import * from .fixtures import *
def test_title_is_htmlencoded_in_index_html(tmp_path, process): def test_title_is_htmlencoded_in_index_html(tmp_path, process, disable_extractors_dict):
""" """
https://github.com/pirate/ArchiveBox/issues/330 https://github.com/pirate/ArchiveBox/issues/330
Unencoded content should not be rendered as it facilitates xss injections Unencoded content should not be rendered as it facilitates xss injections
and breaks the layout. and breaks the layout.
""" """
add_process = subprocess.run(['archivebox', 'add', 'http://localhost:8080/static/title_with_html.com.html'], capture_output=True) add_process = subprocess.run(['archivebox', 'add', 'http://localhost:8080/static/title_with_html.com.html'],
capture_output=True, env=disable_extractors_dict)
with open(tmp_path / "index.html", "r") as f: with open(tmp_path / "index.html", "r") as f:
output_html = f.read() output_html = f.read()