1
0
Fork 0

feat: Add tests for remove command

This commit is contained in:
Cristian 2020-08-21 13:32:31 -05:00 committed by Cristian Vargas
parent a8ed72501d
commit fe9604a772
4 changed files with 68 additions and 8 deletions

View file

@ -9,7 +9,6 @@ from ..index.schema import Link
from ..index import ( from ..index import (
load_link_details, load_link_details,
write_link_details, write_link_details,
write_main_index,
) )
from ..util import enforce_types from ..util import enforce_types
from ..logging_util import ( from ..logging_util import (

View file

@ -1,6 +1,5 @@
__package__ = 'archivebox.index' __package__ = 'archivebox.index'
import re
import os import os
import shutil import shutil
import json as pyjson import json as pyjson
@ -373,7 +372,7 @@ LINK_FILTERS = {
'exact': lambda pattern: Q(url=pattern), 'exact': lambda pattern: Q(url=pattern),
'substring': lambda pattern: Q(url__icontains=pattern), 'substring': lambda pattern: Q(url__icontains=pattern),
'regex': lambda pattern: Q(url__iregex=pattern), 'regex': lambda pattern: Q(url__iregex=pattern),
'domain': lambda pattern: Q(domain=pattern), 'domain': lambda pattern: Q(url__istartswith=f"http://{pattern}") | Q(url__istartswith=f"https://{pattern}") | Q(url__istartswith=f"ftp://{pattern}"),
} }
@enforce_types @enforce_types

View file

@ -24,7 +24,6 @@ def parse_sql_main_index(out_dir: str=OUTPUT_DIR) -> Iterator[Link]:
@enforce_types @enforce_types
def remove_from_sql_main_index(snapshots: QuerySet, out_dir: str=OUTPUT_DIR) -> None: def remove_from_sql_main_index(snapshots: QuerySet, out_dir: str=OUTPUT_DIR) -> None:
setup_django(out_dir, check_db=True) setup_django(out_dir, check_db=True)
from core.models import Snapshot
from django.db import transaction from django.db import transaction
with transaction.atomic(): with transaction.atomic():

View file

@ -1,8 +1,71 @@
import os
import sqlite3
from .fixtures import * from .fixtures import *
def test_remove_leaves_index_in_consistent_state(tmp_path, process, disable_extractors_dict): def test_remove_single_page(tmp_path, process, disable_extractors_dict):
os.chdir(tmp_path) os.chdir(tmp_path)
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True, env=disable_extractors_dict) subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True, env=disable_extractors_dict)
remove_process = subprocess.run(['archivebox', 'remove', '127.0.0.1:8080/static/example.com.html', '--yes'], capture_output=True) remove_process = subprocess.run(['archivebox', 'remove', 'http://127.0.0.1:8080/static/example.com.html', '--yes'], capture_output=True)
list_process = subprocess.run(['archivebox', 'list'], capture_output=True) assert "Found 1 matching URLs to remove" in remove_process.stdout.decode("utf-8")
assert "Warning: SQL index does not match JSON index!" not in list_process.stderr.decode("utf-8")
conn = sqlite3.connect("index.sqlite3")
c = conn.cursor()
count = c.execute("SELECT COUNT() from core_snapshot").fetchone()[0]
conn.commit()
conn.close()
assert count == 0
def test_remove_single_page_filesystem(tmp_path, process, disable_extractors_dict):
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True, env=disable_extractors_dict)
assert list((tmp_path / "archive").iterdir()) != []
subprocess.run(['archivebox', 'remove', 'http://127.0.0.1:8080/static/example.com.html', '--yes', '--delete'], capture_output=True)
assert list((tmp_path / "archive").iterdir()) == []
def test_remove_regex(tmp_path, process, disable_extractors_dict):
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True, env=disable_extractors_dict)
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/iana.org.html'], capture_output=True, env=disable_extractors_dict)
assert list((tmp_path / "archive").iterdir()) != []
subprocess.run(['archivebox', 'remove', '--filter-type=regex', '.*', '--yes', '--delete'], capture_output=True)
assert list((tmp_path / "archive").iterdir()) == []
def test_remove_exact(tmp_path, process, disable_extractors_dict):
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True, env=disable_extractors_dict)
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/iana.org.html'], capture_output=True, env=disable_extractors_dict)
assert list((tmp_path / "archive").iterdir()) != []
remove_process = subprocess.run(['archivebox', 'remove', '--filter-type=exact', 'http://127.0.0.1:8080/static/iana.org.html', '--yes', '--delete'], capture_output=True)
assert len(list((tmp_path / "archive").iterdir())) == 1
def test_remove_substr(tmp_path, process, disable_extractors_dict):
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True, env=disable_extractors_dict)
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/iana.org.html'], capture_output=True, env=disable_extractors_dict)
assert list((tmp_path / "archive").iterdir()) != []
subprocess.run(['archivebox', 'remove', '--filter-type=substring', 'example.com', '--yes', '--delete'], capture_output=True)
assert len(list((tmp_path / "archive").iterdir())) == 1
def test_remove_domain(tmp_path, process, disable_extractors_dict):
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True, env=disable_extractors_dict)
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/iana.org.html'], capture_output=True, env=disable_extractors_dict)
assert list((tmp_path / "archive").iterdir()) != []
remove_process = subprocess.run(['archivebox', 'remove', '--filter-type=domain', '127.0.0.1', '--yes', '--delete'], capture_output=True)
assert len(list((tmp_path / "archive").iterdir())) == 0
conn = sqlite3.connect("index.sqlite3")
c = conn.cursor()
count = c.execute("SELECT COUNT() from core_snapshot").fetchone()[0]
conn.commit()
conn.close()
assert count == 0