Some refactoring
This commit is contained in:
parent
c53c772c4f
commit
e37b1caf7f
2 changed files with 21 additions and 17 deletions
|
@ -34,6 +34,20 @@ MINIMAL_INDEX_TEMPLATE = 'minimal_index.html'
|
||||||
LINK_DETAILS_TEMPLATE = 'snapshot.html'
|
LINK_DETAILS_TEMPLATE = 'snapshot.html'
|
||||||
TITLE_LOADING_MSG = 'Not yet archived...'
|
TITLE_LOADING_MSG = 'Not yet archived...'
|
||||||
|
|
||||||
|
SNAPSHOT_ICONS = {
|
||||||
|
'singlefile': '❶',
|
||||||
|
'wget': '🆆',
|
||||||
|
'dom': '🅷',
|
||||||
|
'pdf': '📄',
|
||||||
|
'screenshot': '💻',
|
||||||
|
'media': '📼',
|
||||||
|
'git': '🅶',
|
||||||
|
'archive_org': '🏛',
|
||||||
|
'readability': '🆁',
|
||||||
|
'mercury': '🅼',
|
||||||
|
'warc': '📦',
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
### Main Links Index
|
### Main Links Index
|
||||||
|
|
||||||
|
@ -134,19 +148,6 @@ def snapshot_icons(snapshot) -> str:
|
||||||
canon = link.canonical_outputs()
|
canon = link.canonical_outputs()
|
||||||
output = ""
|
output = ""
|
||||||
output_template = '<a href="/{}/{}" class="exists-{}" title="{}">{}</a> '
|
output_template = '<a href="/{}/{}" class="exists-{}" title="{}">{}</a> '
|
||||||
icons = {
|
|
||||||
"singlefile": "❶",
|
|
||||||
"wget": "🆆",
|
|
||||||
"dom": "🅷",
|
|
||||||
"pdf": "📄",
|
|
||||||
"screenshot": "💻",
|
|
||||||
"media": "📼",
|
|
||||||
"git": "🅶",
|
|
||||||
"archive_org": "🏛",
|
|
||||||
"readability": "🆁",
|
|
||||||
"mercury": "🅼",
|
|
||||||
"warc": "📦"
|
|
||||||
}
|
|
||||||
exclude = ["favicon", "title", "headers", "archive_org"]
|
exclude = ["favicon", "title", "headers", "archive_org"]
|
||||||
# Missing specific entry for WARC
|
# Missing specific entry for WARC
|
||||||
|
|
||||||
|
@ -167,7 +168,7 @@ def snapshot_icons(snapshot) -> str:
|
||||||
# elif existing.is_dir():
|
# elif existing.is_dir():
|
||||||
# existing = any(existing.glob('*.*'))
|
# existing = any(existing.glob('*.*'))
|
||||||
output += format_html(output_template, path, canon[f"{extractor}_path"], str(bool(existing)),
|
output += format_html(output_template, path, canon[f"{extractor}_path"], str(bool(existing)),
|
||||||
extractor, icons.get(extractor, "?"))
|
extractor, SNAPSHOT_ICONS.get(extractor, "?"))
|
||||||
if extractor == "wget":
|
if extractor == "wget":
|
||||||
# warc isn't technically it's own extractor, so we have to add it after wget
|
# warc isn't technically it's own extractor, so we have to add it after wget
|
||||||
|
|
||||||
|
@ -175,7 +176,7 @@ def snapshot_icons(snapshot) -> str:
|
||||||
exists = extractor_outputs[extractor] and extractor_outputs[extractor].status == 'succeeded' and extractor_outputs[extractor].output
|
exists = extractor_outputs[extractor] and extractor_outputs[extractor].status == 'succeeded' and extractor_outputs[extractor].output
|
||||||
# get from filesystem (slower but more accurate)
|
# get from filesystem (slower but more accurate)
|
||||||
# exists = list((Path(path) / canon["warc_path"]).glob("*.warc.gz"))
|
# exists = list((Path(path) / canon["warc_path"]).glob("*.warc.gz"))
|
||||||
output += format_html(output_template, path, canon["warc_path"], str(bool(exists)), "warc", icons.get("warc", "?"))
|
output += format_html(output_template, path, canon["warc_path"], str(bool(exists)), "warc", SNAPSHOT_ICONS.get("warc", "?"))
|
||||||
|
|
||||||
if extractor == "archive_org":
|
if extractor == "archive_org":
|
||||||
# The check for archive_org is different, so it has to be handled separately
|
# The check for archive_org is different, so it has to be handled separately
|
||||||
|
@ -186,7 +187,7 @@ def snapshot_icons(snapshot) -> str:
|
||||||
# target_path = Path(path) / "archive.org.txt"
|
# target_path = Path(path) / "archive.org.txt"
|
||||||
# exists = target_path.exists()
|
# exists = target_path.exists()
|
||||||
output += '<a href="{}" class="exists-{}" title="{}">{}</a> '.format(canon["archive_org_path"], str(exists),
|
output += '<a href="{}" class="exists-{}" title="{}">{}</a> '.format(canon["archive_org_path"], str(exists),
|
||||||
"archive_org", icons.get("archive_org", "?"))
|
"archive_org", SNAPSHOT_ICONS.get("archive_org", "?"))
|
||||||
|
|
||||||
result = format_html('<span class="files-icons" style="font-size: 1.1em; opacity: 0.8; min-width: 240px; display: inline-block">{}<span>', mark_safe(output))
|
result = format_html('<span class="files-icons" style="font-size: 1.1em; opacity: 0.8; min-width: 240px; display: inline-block">{}<span>', mark_safe(output))
|
||||||
# end = datetime.now(timezone.utc)
|
# end = datetime.now(timezone.utc)
|
||||||
|
|
|
@ -22,6 +22,9 @@ from ..system import get_dir_size
|
||||||
from ..util import ts_to_date_str, parse_date
|
from ..util import ts_to_date_str, parse_date
|
||||||
from ..config import OUTPUT_DIR, ARCHIVE_DIR_NAME, FAVICON_PROVIDER
|
from ..config import OUTPUT_DIR, ARCHIVE_DIR_NAME, FAVICON_PROVIDER
|
||||||
|
|
||||||
|
ARCHIVE_DOT_ORG_TEMPLATE = 'https://web.archive.org/web/{}'
|
||||||
|
|
||||||
|
|
||||||
class ArchiveError(Exception):
|
class ArchiveError(Exception):
|
||||||
def __init__(self, message, hints=None):
|
def __init__(self, message, hints=None):
|
||||||
super().__init__(message)
|
super().__init__(message)
|
||||||
|
@ -432,7 +435,7 @@ class Link:
|
||||||
'pdf_path': 'output.pdf',
|
'pdf_path': 'output.pdf',
|
||||||
'screenshot_path': 'screenshot.png',
|
'screenshot_path': 'screenshot.png',
|
||||||
'dom_path': 'output.html',
|
'dom_path': 'output.html',
|
||||||
'archive_org_path': 'https://web.archive.org/web/{}'.format(self.base_url),
|
'archive_org_path': ARCHIVE_DOT_ORG_TEMPLATE.format(self.base_url),
|
||||||
'git_path': 'git/',
|
'git_path': 'git/',
|
||||||
'media_path': 'media/',
|
'media_path': 'media/',
|
||||||
'headers_path': 'headers.json',
|
'headers_path': 'headers.json',
|
||||||
|
|
Loading…
Reference in a new issue