From e37b1caf7fbd853aaaf5a9a35168a29ab22ca693 Mon Sep 17 00:00:00 2001
From: Alex Kotov <kotovalexarian@gmail.com>
Date: Tue, 29 Aug 2023 17:40:50 +0400
Subject: [PATCH] Some refactoring

---
 archivebox/index/html.py   | 33 +++++++++++++++++----------------
 archivebox/index/schema.py |  5 ++++-
 2 files changed, 21 insertions(+), 17 deletions(-)
diff --git a/archivebox/index/html.py b/archivebox/index/html.py
index ba22a1d2..07058ab3 100644
--- a/archivebox/index/html.py
+++ b/archivebox/index/html.py
@@ -34,6 +34,20 @@ MINIMAL_INDEX_TEMPLATE = 'minimal_index.html'
 LINK_DETAILS_TEMPLATE = 'snapshot.html'
 TITLE_LOADING_MSG = 'Not yet archived...'
 
+SNAPSHOT_ICONS = {
+    'singlefile': '❶',
+    'wget': '🆆',
+    'dom': '🅷',
+    'pdf': '📄',
+    'screenshot': '💻',
+    'media': '📼',
+    'git': '🅶',
+    'archive_org': '🏛',
+    'readability': '🆁',
+    'mercury': '🅼',
+    'warc': '📦',
+}
+
 
 ### Main Links Index
 
@@ -134,19 +148,6 @@ def snapshot_icons(snapshot) -> str:
         canon = link.canonical_outputs()
         output = ""
         output_template = '<a href="/{}/{}" class="exists-{}" title="{}">{}</a> &nbsp;'
-        icons = {
-            "singlefile": "❶",
-            "wget": "🆆",
-            "dom": "🅷",
-            "pdf": "📄",
-            "screenshot": "💻",
-            "media": "📼",
-            "git": "🅶",
-            "archive_org": "🏛",
-            "readability": "🆁",
-            "mercury": "🅼",
-            "warc": "📦"
-        }
         exclude = ["favicon", "title", "headers", "archive_org"]
         # Missing specific entry for WARC
 
@@ -167,7 +168,7 @@ def snapshot_icons(snapshot) -> str:
                 #     elif existing.is_dir():
                 #         existing = any(existing.glob('*.*'))
                 output += format_html(output_template, path, canon[f"{extractor}_path"], str(bool(existing)),
-                                             extractor, icons.get(extractor, "?"))
+                                             extractor, SNAPSHOT_ICONS.get(extractor, "?"))
             if extractor == "wget":
                 # warc isn't technically it's own extractor, so we have to add it after wget
                 
@@ -175,7 +176,7 @@ def snapshot_icons(snapshot) -> str:
                 exists = extractor_outputs[extractor] and extractor_outputs[extractor].status == 'succeeded' and extractor_outputs[extractor].output
                 # get from filesystem (slower but more accurate)
                 # exists = list((Path(path) / canon["warc_path"]).glob("*.warc.gz"))
-                output += format_html(output_template, path, canon["warc_path"], str(bool(exists)), "warc", icons.get("warc", "?"))
+                output += format_html(output_template, path, canon["warc_path"], str(bool(exists)), "warc", SNAPSHOT_ICONS.get("warc", "?"))
 
             if extractor == "archive_org":
                 # The check for archive_org is different, so it has to be handled separately
@@ -186,7 +187,7 @@ def snapshot_icons(snapshot) -> str:
                 # target_path = Path(path) / "archive.org.txt"
                 # exists = target_path.exists()
                 output += '<a href="{}" class="exists-{}" title="{}">{}</a> '.format(canon["archive_org_path"], str(exists),
-                                                                                            "archive_org", icons.get("archive_org", "?"))
+                                                                                            "archive_org", SNAPSHOT_ICONS.get("archive_org", "?"))
 
         result = format_html('<span class="files-icons" style="font-size: 1.1em; opacity: 0.8; min-width: 240px; display: inline-block">{}<span>', mark_safe(output))
         # end = datetime.now(timezone.utc)
diff --git a/archivebox/index/schema.py b/archivebox/index/schema.py
index c44165a9..137d817e 100644
--- a/archivebox/index/schema.py
+++ b/archivebox/index/schema.py
@@ -22,6 +22,9 @@ from ..system import get_dir_size
 from ..util import ts_to_date_str, parse_date
 from ..config import OUTPUT_DIR, ARCHIVE_DIR_NAME, FAVICON_PROVIDER
 
+ARCHIVE_DOT_ORG_TEMPLATE = 'https://web.archive.org/web/{}'
+
+
 class ArchiveError(Exception):
     def __init__(self, message, hints=None):
         super().__init__(message)
@@ -432,7 +435,7 @@ class Link:
             'pdf_path': 'output.pdf',
             'screenshot_path': 'screenshot.png',
             'dom_path': 'output.html',
-            'archive_org_path': 'https://web.archive.org/web/{}'.format(self.base_url),
+            'archive_org_path': ARCHIVE_DOT_ORG_TEMPLATE.format(self.base_url),
             'git_path': 'git/',
             'media_path': 'media/',
             'headers_path': 'headers.json',