From 9815241b78ecc3a4cc9b0f80ccbcbf1357cd621f Mon Sep 17 00:00:00 2001 From: Cristian Date: Wed, 22 Jul 2020 14:22:00 -0500 Subject: [PATCH 1/3] feat: Fallback to link detail when there is an issue loading a link from main index --- archivebox/index/json.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/archivebox/index/json.py b/archivebox/index/json.py index deca4bea..212c09c3 100644 --- a/archivebox/index/json.py +++ b/archivebox/index/json.py @@ -3,6 +3,7 @@ __package__ = 'archivebox.index' import os import sys import json as pyjson +from pathlib import Path from datetime import datetime from typing import List, Optional, Iterator, Any @@ -49,7 +50,11 @@ def parse_json_main_index(out_dir: str=OUTPUT_DIR) -> Iterator[Link]: with open(index_path, 'r', encoding='utf-8') as f: links = pyjson.load(f)['links'] for link_json in links: - yield Link.from_json(link_json) + try: + yield Link.from_json(link_json) + except KeyError: + detail_index_path = OUTPUT_DIR / Path(f"archive/{link_json['timestamp']}") + yield parse_json_link_details(str(detail_index_path)) return () From 263eb4e372fc4a9cc38331521a3aa6a4f121b9fe Mon Sep 17 00:00:00 2001 From: Cristian Date: Wed, 22 Jul 2020 14:37:10 -0500 Subject: [PATCH 2/3] fix: Change path to use ARCHIVE_DIR_NAME --- archivebox/index/json.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/archivebox/index/json.py b/archivebox/index/json.py index 212c09c3..421e91b0 100644 --- a/archivebox/index/json.py +++ b/archivebox/index/json.py @@ -53,7 +53,7 @@ def parse_json_main_index(out_dir: str=OUTPUT_DIR) -> Iterator[Link]: try: yield Link.from_json(link_json) except KeyError: - detail_index_path = OUTPUT_DIR / Path(f"archive/{link_json['timestamp']}") + detail_index_path = Path(f"{OUTPUT_DIR}/{ARCHIVE_DIR_NAME}/{link_json['timestamp']}") yield parse_json_link_details(str(detail_index_path)) return () From e58c3deb05dc69abde667cb662d2e351a362729a Mon Sep 17 00:00:00 2001 From: Cristian Vargas Date: Wed, 22 Jul 2020 14:46:03 -0500 Subject: [PATCH 3/3] feat: Update path generation in detail index fallback Co-authored-by: Nick Sweeting --- archivebox/index/json.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/archivebox/index/json.py b/archivebox/index/json.py index 421e91b0..d0d38f86 100644 --- a/archivebox/index/json.py +++ b/archivebox/index/json.py @@ -53,7 +53,7 @@ def parse_json_main_index(out_dir: str=OUTPUT_DIR) -> Iterator[Link]: try: yield Link.from_json(link_json) except KeyError: - detail_index_path = Path(f"{OUTPUT_DIR}/{ARCHIVE_DIR_NAME}/{link_json['timestamp']}") + detail_index_path = Path(OUTPUT_DIR) / ARCHIVE_DIR_NAME / link_json['timestamp'] yield parse_json_link_details(str(detail_index_path)) return () @@ -155,4 +155,3 @@ def to_json(obj: Any, indent: Optional[int]=4, sort_keys: bool=True, cls=Extende return pyjson.dumps(obj, indent=indent, sort_keys=sort_keys, cls=ExtendedEncoder) -