feat: Add tests for remove command
This commit is contained in:
parent
a8ed72501d
commit
fe9604a772
4 changed files with 68 additions and 8 deletions
|
@ -9,7 +9,6 @@ from ..index.schema import Link
|
|||
from ..index import (
|
||||
load_link_details,
|
||||
write_link_details,
|
||||
write_main_index,
|
||||
)
|
||||
from ..util import enforce_types
|
||||
from ..logging_util import (
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
__package__ = 'archivebox.index'
|
||||
|
||||
import re
|
||||
import os
|
||||
import shutil
|
||||
import json as pyjson
|
||||
|
@ -373,7 +372,7 @@ LINK_FILTERS = {
|
|||
'exact': lambda pattern: Q(url=pattern),
|
||||
'substring': lambda pattern: Q(url__icontains=pattern),
|
||||
'regex': lambda pattern: Q(url__iregex=pattern),
|
||||
'domain': lambda pattern: Q(domain=pattern),
|
||||
'domain': lambda pattern: Q(url__istartswith=f"http://{pattern}") | Q(url__istartswith=f"https://{pattern}") | Q(url__istartswith=f"ftp://{pattern}"),
|
||||
}
|
||||
|
||||
@enforce_types
|
||||
|
|
|
@ -24,7 +24,6 @@ def parse_sql_main_index(out_dir: str=OUTPUT_DIR) -> Iterator[Link]:
|
|||
@enforce_types
|
||||
def remove_from_sql_main_index(snapshots: QuerySet, out_dir: str=OUTPUT_DIR) -> None:
|
||||
setup_django(out_dir, check_db=True)
|
||||
from core.models import Snapshot
|
||||
from django.db import transaction
|
||||
|
||||
with transaction.atomic():
|
||||
|
|
|
@ -1,8 +1,71 @@
|
|||
import os
|
||||
import sqlite3
|
||||
|
||||
from .fixtures import *
|
||||
|
||||
def test_remove_leaves_index_in_consistent_state(tmp_path, process, disable_extractors_dict):
|
||||
def test_remove_single_page(tmp_path, process, disable_extractors_dict):
|
||||
os.chdir(tmp_path)
|
||||
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True, env=disable_extractors_dict)
|
||||
remove_process = subprocess.run(['archivebox', 'remove', '127.0.0.1:8080/static/example.com.html', '--yes'], capture_output=True)
|
||||
list_process = subprocess.run(['archivebox', 'list'], capture_output=True)
|
||||
assert "Warning: SQL index does not match JSON index!" not in list_process.stderr.decode("utf-8")
|
||||
remove_process = subprocess.run(['archivebox', 'remove', 'http://127.0.0.1:8080/static/example.com.html', '--yes'], capture_output=True)
|
||||
assert "Found 1 matching URLs to remove" in remove_process.stdout.decode("utf-8")
|
||||
|
||||
conn = sqlite3.connect("index.sqlite3")
|
||||
c = conn.cursor()
|
||||
count = c.execute("SELECT COUNT() from core_snapshot").fetchone()[0]
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
assert count == 0
|
||||
|
||||
|
||||
def test_remove_single_page_filesystem(tmp_path, process, disable_extractors_dict):
|
||||
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True, env=disable_extractors_dict)
|
||||
assert list((tmp_path / "archive").iterdir()) != []
|
||||
|
||||
subprocess.run(['archivebox', 'remove', 'http://127.0.0.1:8080/static/example.com.html', '--yes', '--delete'], capture_output=True)
|
||||
|
||||
assert list((tmp_path / "archive").iterdir()) == []
|
||||
|
||||
def test_remove_regex(tmp_path, process, disable_extractors_dict):
|
||||
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True, env=disable_extractors_dict)
|
||||
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/iana.org.html'], capture_output=True, env=disable_extractors_dict)
|
||||
assert list((tmp_path / "archive").iterdir()) != []
|
||||
|
||||
subprocess.run(['archivebox', 'remove', '--filter-type=regex', '.*', '--yes', '--delete'], capture_output=True)
|
||||
|
||||
assert list((tmp_path / "archive").iterdir()) == []
|
||||
|
||||
def test_remove_exact(tmp_path, process, disable_extractors_dict):
|
||||
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True, env=disable_extractors_dict)
|
||||
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/iana.org.html'], capture_output=True, env=disable_extractors_dict)
|
||||
assert list((tmp_path / "archive").iterdir()) != []
|
||||
|
||||
remove_process = subprocess.run(['archivebox', 'remove', '--filter-type=exact', 'http://127.0.0.1:8080/static/iana.org.html', '--yes', '--delete'], capture_output=True)
|
||||
|
||||
assert len(list((tmp_path / "archive").iterdir())) == 1
|
||||
|
||||
def test_remove_substr(tmp_path, process, disable_extractors_dict):
|
||||
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True, env=disable_extractors_dict)
|
||||
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/iana.org.html'], capture_output=True, env=disable_extractors_dict)
|
||||
assert list((tmp_path / "archive").iterdir()) != []
|
||||
|
||||
subprocess.run(['archivebox', 'remove', '--filter-type=substring', 'example.com', '--yes', '--delete'], capture_output=True)
|
||||
|
||||
assert len(list((tmp_path / "archive").iterdir())) == 1
|
||||
|
||||
def test_remove_domain(tmp_path, process, disable_extractors_dict):
|
||||
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True, env=disable_extractors_dict)
|
||||
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/iana.org.html'], capture_output=True, env=disable_extractors_dict)
|
||||
assert list((tmp_path / "archive").iterdir()) != []
|
||||
|
||||
remove_process = subprocess.run(['archivebox', 'remove', '--filter-type=domain', '127.0.0.1', '--yes', '--delete'], capture_output=True)
|
||||
|
||||
assert len(list((tmp_path / "archive").iterdir())) == 0
|
||||
|
||||
conn = sqlite3.connect("index.sqlite3")
|
||||
c = conn.cursor()
|
||||
count = c.execute("SELECT COUNT() from core_snapshot").fetchone()[0]
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
assert count == 0
|
Loading…
Reference in a new issue