From 47daa038eb61674df22345e99201472ea770762c Mon Sep 17 00:00:00 2001 From: JDC Date: Thu, 19 Nov 2020 16:45:12 -0500 Subject: [PATCH] Implement flush for search backend after remove command --- archivebox/config.py | 4 ++-- archivebox/core/mixins.py | 2 +- archivebox/main.py | 2 ++ archivebox/search/__init__.py | 9 ++++++++- archivebox/search/backends/sonic.py | 11 ++++++++--- 5 files changed, 21 insertions(+), 7 deletions(-) diff --git a/archivebox/config.py b/archivebox/config.py index 0ca2d7d9..ee2f0b4a 100644 --- a/archivebox/config.py +++ b/archivebox/config.py @@ -147,8 +147,8 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = { 'SEARCH_BACKEND_PORT': {'type': int, 'default': 1491}, 'SEARCH_BACKEND_PASSWORD': {'type': str, 'default': 'SecretPassword'}, # SONIC - 'SONIC_BUCKET': {'type': str, 'default': 'archivebox'}, - 'SONIC_COLLECTION': {'type': str, 'default': 'snapshots'}, + 'SONIC_COLLECTION': {'type': str, 'default': 'archivebox'}, + 'SONIC_BUCKET': {'type': str, 'default': 'snapshots'}, }, 'DEPENDENCY_CONFIG': { diff --git a/archivebox/core/mixins.py b/archivebox/core/mixins.py index afae2d78..b361790a 100644 --- a/archivebox/core/mixins.py +++ b/archivebox/core/mixins.py @@ -18,7 +18,7 @@ class SearchResultsAdminMixin(object): except Exception as err: messages.add_message(request, messages.WARNING, f'Error from the search backend, only showing results from default admin search fields - Error: {err}') else: - qsearch = queryset.filter(id__in=snapshot_ids) + qsearch = queryset.filter(pk__in=snapshot_ids) qs |= qsearch finally: diff --git a/archivebox/main.py b/archivebox/main.py index cbbd2218..504cd670 100644 --- a/archivebox/main.py +++ b/archivebox/main.py @@ -115,6 +115,7 @@ from .logging_util import ( printable_dependency_version, ) +from .search import flush_search_index ALLOWED_IN_OUTPUT_DIR = { 'lost+found', @@ -665,6 +666,7 @@ def remove(filter_str: Optional[str]=None, to_remove = snapshots.count() remove_from_sql_main_index(snapshots=snapshots, out_dir=out_dir) + flush_search_index(snapshot_ids=[str(pk) for pk in snapshots.values_list('pk',flat=True)]) all_snapshots = load_main_index(out_dir=out_dir) log_removal_finished(all_snapshots.count(), to_remove) diff --git a/archivebox/search/__init__.py b/archivebox/search/__init__.py index 7db4af46..93245bda 100644 --- a/archivebox/search/__init__.py +++ b/archivebox/search/__init__.py @@ -45,4 +45,11 @@ def query_search_index(text: str) -> List: return backend.search(text) else: return [] - \ No newline at end of file + +@enforce_types +def flush_search_index(snapshot_ids: List[str]): + if not indexing_enabled() or not snapshot_ids: + return + backend = import_backend() + backend.flush(snapshot_ids) + \ No newline at end of file diff --git a/archivebox/search/backends/sonic.py b/archivebox/search/backends/sonic.py index e062f9e1..8fd93ae8 100644 --- a/archivebox/search/backends/sonic.py +++ b/archivebox/search/backends/sonic.py @@ -10,11 +10,16 @@ from archivebox.config import SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEA def index(snapshot_id: str, texts: List[str]): with IngestClient(SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD) as ingestcl: for text in texts: - ingestcl.push(SONIC_BUCKET, SONIC_COLLECTION, snapshot_id, str(text)) + ingestcl.push(SONIC_COLLECTION, SONIC_BUCKET, snapshot_id, str(text)) @enforce_types def search(text: str) -> List: with SearchClient(SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD) as querycl: - snap_ids = querycl.query(SONIC_BUCKET, SONIC_COLLECTION, text) + snap_ids = querycl.query(SONIC_COLLECTION, SONIC_BUCKET, text) return snap_ids - \ No newline at end of file + +@enforce_types +def flush(snapshot_ids: List[str]): + with IngestClient(SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD) as ingestcl: + for id in snapshot_ids: + ingestcl.flush_object(SONIC_COLLECTION, SONIC_BUCKET, str(id))