feat: Add tests for remove command
This commit is contained in:
parent
a8ed72501d
commit
fe9604a772
4 changed files with 68 additions and 8 deletions
|
@ -9,7 +9,6 @@ from ..index.schema import Link
|
||||||
from ..index import (
|
from ..index import (
|
||||||
load_link_details,
|
load_link_details,
|
||||||
write_link_details,
|
write_link_details,
|
||||||
write_main_index,
|
|
||||||
)
|
)
|
||||||
from ..util import enforce_types
|
from ..util import enforce_types
|
||||||
from ..logging_util import (
|
from ..logging_util import (
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
__package__ = 'archivebox.index'
|
__package__ = 'archivebox.index'
|
||||||
|
|
||||||
import re
|
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
import json as pyjson
|
import json as pyjson
|
||||||
|
@ -373,7 +372,7 @@ LINK_FILTERS = {
|
||||||
'exact': lambda pattern: Q(url=pattern),
|
'exact': lambda pattern: Q(url=pattern),
|
||||||
'substring': lambda pattern: Q(url__icontains=pattern),
|
'substring': lambda pattern: Q(url__icontains=pattern),
|
||||||
'regex': lambda pattern: Q(url__iregex=pattern),
|
'regex': lambda pattern: Q(url__iregex=pattern),
|
||||||
'domain': lambda pattern: Q(domain=pattern),
|
'domain': lambda pattern: Q(url__istartswith=f"http://{pattern}") | Q(url__istartswith=f"https://{pattern}") | Q(url__istartswith=f"ftp://{pattern}"),
|
||||||
}
|
}
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
|
|
|
@ -24,7 +24,6 @@ def parse_sql_main_index(out_dir: str=OUTPUT_DIR) -> Iterator[Link]:
|
||||||
@enforce_types
|
@enforce_types
|
||||||
def remove_from_sql_main_index(snapshots: QuerySet, out_dir: str=OUTPUT_DIR) -> None:
|
def remove_from_sql_main_index(snapshots: QuerySet, out_dir: str=OUTPUT_DIR) -> None:
|
||||||
setup_django(out_dir, check_db=True)
|
setup_django(out_dir, check_db=True)
|
||||||
from core.models import Snapshot
|
|
||||||
from django.db import transaction
|
from django.db import transaction
|
||||||
|
|
||||||
with transaction.atomic():
|
with transaction.atomic():
|
||||||
|
|
|
@ -1,8 +1,71 @@
|
||||||
|
import os
|
||||||
|
import sqlite3
|
||||||
|
|
||||||
from .fixtures import *
|
from .fixtures import *
|
||||||
|
|
||||||
def test_remove_leaves_index_in_consistent_state(tmp_path, process, disable_extractors_dict):
|
def test_remove_single_page(tmp_path, process, disable_extractors_dict):
|
||||||
os.chdir(tmp_path)
|
os.chdir(tmp_path)
|
||||||
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True, env=disable_extractors_dict)
|
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True, env=disable_extractors_dict)
|
||||||
remove_process = subprocess.run(['archivebox', 'remove', '127.0.0.1:8080/static/example.com.html', '--yes'], capture_output=True)
|
remove_process = subprocess.run(['archivebox', 'remove', 'http://127.0.0.1:8080/static/example.com.html', '--yes'], capture_output=True)
|
||||||
list_process = subprocess.run(['archivebox', 'list'], capture_output=True)
|
assert "Found 1 matching URLs to remove" in remove_process.stdout.decode("utf-8")
|
||||||
assert "Warning: SQL index does not match JSON index!" not in list_process.stderr.decode("utf-8")
|
|
||||||
|
conn = sqlite3.connect("index.sqlite3")
|
||||||
|
c = conn.cursor()
|
||||||
|
count = c.execute("SELECT COUNT() from core_snapshot").fetchone()[0]
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
assert count == 0
|
||||||
|
|
||||||
|
|
||||||
|
def test_remove_single_page_filesystem(tmp_path, process, disable_extractors_dict):
|
||||||
|
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True, env=disable_extractors_dict)
|
||||||
|
assert list((tmp_path / "archive").iterdir()) != []
|
||||||
|
|
||||||
|
subprocess.run(['archivebox', 'remove', 'http://127.0.0.1:8080/static/example.com.html', '--yes', '--delete'], capture_output=True)
|
||||||
|
|
||||||
|
assert list((tmp_path / "archive").iterdir()) == []
|
||||||
|
|
||||||
|
def test_remove_regex(tmp_path, process, disable_extractors_dict):
|
||||||
|
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True, env=disable_extractors_dict)
|
||||||
|
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/iana.org.html'], capture_output=True, env=disable_extractors_dict)
|
||||||
|
assert list((tmp_path / "archive").iterdir()) != []
|
||||||
|
|
||||||
|
subprocess.run(['archivebox', 'remove', '--filter-type=regex', '.*', '--yes', '--delete'], capture_output=True)
|
||||||
|
|
||||||
|
assert list((tmp_path / "archive").iterdir()) == []
|
||||||
|
|
||||||
|
def test_remove_exact(tmp_path, process, disable_extractors_dict):
|
||||||
|
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True, env=disable_extractors_dict)
|
||||||
|
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/iana.org.html'], capture_output=True, env=disable_extractors_dict)
|
||||||
|
assert list((tmp_path / "archive").iterdir()) != []
|
||||||
|
|
||||||
|
remove_process = subprocess.run(['archivebox', 'remove', '--filter-type=exact', 'http://127.0.0.1:8080/static/iana.org.html', '--yes', '--delete'], capture_output=True)
|
||||||
|
|
||||||
|
assert len(list((tmp_path / "archive").iterdir())) == 1
|
||||||
|
|
||||||
|
def test_remove_substr(tmp_path, process, disable_extractors_dict):
|
||||||
|
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True, env=disable_extractors_dict)
|
||||||
|
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/iana.org.html'], capture_output=True, env=disable_extractors_dict)
|
||||||
|
assert list((tmp_path / "archive").iterdir()) != []
|
||||||
|
|
||||||
|
subprocess.run(['archivebox', 'remove', '--filter-type=substring', 'example.com', '--yes', '--delete'], capture_output=True)
|
||||||
|
|
||||||
|
assert len(list((tmp_path / "archive").iterdir())) == 1
|
||||||
|
|
||||||
|
def test_remove_domain(tmp_path, process, disable_extractors_dict):
|
||||||
|
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True, env=disable_extractors_dict)
|
||||||
|
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/iana.org.html'], capture_output=True, env=disable_extractors_dict)
|
||||||
|
assert list((tmp_path / "archive").iterdir()) != []
|
||||||
|
|
||||||
|
remove_process = subprocess.run(['archivebox', 'remove', '--filter-type=domain', '127.0.0.1', '--yes', '--delete'], capture_output=True)
|
||||||
|
|
||||||
|
assert len(list((tmp_path / "archive").iterdir())) == 0
|
||||||
|
|
||||||
|
conn = sqlite3.connect("index.sqlite3")
|
||||||
|
c = conn.cursor()
|
||||||
|
count = c.execute("SELECT COUNT() from core_snapshot").fetchone()[0]
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
assert count == 0
|
Loading…
Reference in a new issue