1
0
Fork 0

fix: Remove link from sql index on remove command

This commit is contained in:
Cristian 2020-07-23 15:07:00 -05:00
parent 4cb671ae61
commit fe0884f1ec
3 changed files with 27 additions and 3 deletions

View file

@ -20,6 +20,16 @@ def parse_sql_main_index(out_dir: str=OUTPUT_DIR) -> Iterator[Link]:
for page in Snapshot.objects.all()
)
@enforce_types
def remove_from_sql_main_index(links: List[Link], out_dir: str=OUTPUT_DIR) -> None:
setup_django(out_dir, check_db=True)
from core.models import Snapshot
from django.db import transaction
with transaction.atomic():
for link in links:
Snapshot.objects.filter(url=link.url).delete()
@enforce_types
def write_sql_main_index(links: List[Link], out_dir: str=OUTPUT_DIR) -> None:
setup_django(out_dir, check_db=True)

View file

@ -49,6 +49,7 @@ from .index.sql import (
parse_sql_main_index,
get_admins,
apply_migrations,
remove_from_sql_main_index,
)
from .index.html import parse_html_main_index
from .extractors import archive_links
@ -600,6 +601,7 @@ def remove(filter_str: Optional[str]=None,
timer = TimedProgress(360, prefix=' ')
try:
to_keep = []
to_delete = []
all_links = load_main_index(out_dir=out_dir)
for link in all_links:
should_remove = (
@ -607,13 +609,17 @@ def remove(filter_str: Optional[str]=None,
or (before is not None and float(link.timestamp) > before)
or link_matches_filter(link, filter_patterns, filter_type)
)
if not should_remove:
if should_remove:
to_delete.append(link)
if delete:
shutil.rmtree(link.link_dir, ignore_errors=True)
else:
to_keep.append(link)
elif should_remove and delete:
shutil.rmtree(link.link_dir, ignore_errors=True)
finally:
timer.end()
remove_from_sql_main_index(links=to_delete, out_dir=out_dir)
write_main_index(links=to_keep, out_dir=out_dir, finished=True)
log_removal_finished(len(all_links), len(to_keep))

8
tests/test_remove.py Normal file
View file

@ -0,0 +1,8 @@
from .fixtures import *
def test_remove_leaves_index_in_consistent_state(tmp_path, process):
os.chdir(tmp_path)
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True)
remove_process = subprocess.run(['archivebox', 'remove', '127.0.0.1:8080/static/example.com.html', '--yes'], capture_output=True)
list_process = subprocess.run(['archivebox', 'list'], capture_output=True)
assert "Warning: SQL index does not match JSON index!" not in list_process.stderr.decode("utf-8")