From 30930576755d332ad167c665ace07d65d3aa2ea1 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Sat, 10 Apr 2021 05:18:03 -0400 Subject: [PATCH] bail out on sonic indexing after 5 errors --- archivebox/search/backends/sonic.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/archivebox/search/backends/sonic.py b/archivebox/search/backends/sonic.py index 38c187fa..8bde333c 100644 --- a/archivebox/search/backends/sonic.py +++ b/archivebox/search/backends/sonic.py @@ -7,10 +7,11 @@ from archivebox.config import SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEA MAX_SONIC_TEXT_TOTAL_LENGTH = 100000000 # dont index more than 100 million characters per text MAX_SONIC_TEXT_CHUNK_LENGTH = 2000 # dont index more than 2000 characters per chunk - +MAX_SONIC_ERRORS_BEFORE_ABORT = 5 @enforce_types def index(snapshot_id: str, texts: List[str]): + error_count = 0 with IngestClient(SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD) as ingestcl: for text in texts: chunks = ( @@ -21,8 +22,14 @@ def index(snapshot_id: str, texts: List[str]): MAX_SONIC_TEXT_CHUNK_LENGTH, ) ) - for chunk in chunks: - ingestcl.push(SONIC_COLLECTION, SONIC_BUCKET, snapshot_id, str(chunk)) + try: + for chunk in chunks: + ingestcl.push(SONIC_COLLECTION, SONIC_BUCKET, snapshot_id, str(chunk)) + except Exception as err: + print(f'[!] Sonic search backend threw an error while indexing: {err.__class__.__name__} {err}') + error_count += 1 + if error_count > MAX_SONIC_ERRORS_BEFORE_ABORT: + raise @enforce_types def search(text: str) -> List[str]: