remove redundant utils file
This commit is contained in:
parent
84507b68b5
commit
104553489f
5 changed files with 64 additions and 62 deletions
|
@ -13,8 +13,8 @@ from django import forms
|
||||||
|
|
||||||
from core.models import Snapshot, Tag
|
from core.models import Snapshot, Tag
|
||||||
from core.forms import AddLinkForm, TagField
|
from core.forms import AddLinkForm, TagField
|
||||||
from core.utils import get_icons
|
|
||||||
|
|
||||||
|
from index.html import snapshot_icons
|
||||||
from util import htmldecode, urldecode, ansi_to_html
|
from util import htmldecode, urldecode, ansi_to_html
|
||||||
from logging_util import printable_filesize
|
from logging_util import printable_filesize
|
||||||
from main import add, remove
|
from main import add, remove
|
||||||
|
@ -128,7 +128,7 @@ class SnapshotAdmin(admin.ModelAdmin):
|
||||||
) + mark_safe(f' <span class="tags">{tags}</span>')
|
) + mark_safe(f' <span class="tags">{tags}</span>')
|
||||||
|
|
||||||
def files(self, obj):
|
def files(self, obj):
|
||||||
return get_icons(obj)
|
return snapshot_icons(obj)
|
||||||
|
|
||||||
def size(self, obj):
|
def size(self, obj):
|
||||||
archive_size = obj.archive_size
|
archive_size = obj.archive_size
|
||||||
|
|
|
@ -1,54 +0,0 @@
|
||||||
from django.utils.html import format_html
|
|
||||||
from collections import defaultdict
|
|
||||||
|
|
||||||
from core.models import Snapshot, EXTRACTORS
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
|
|
||||||
def get_icons(snapshot: Snapshot) -> str:
|
|
||||||
archive_results = snapshot.archiveresult_set.filter(status="succeeded")
|
|
||||||
link = snapshot.as_link()
|
|
||||||
path = link.archive_path
|
|
||||||
canon = link.canonical_outputs()
|
|
||||||
output = ""
|
|
||||||
output_template = '<a href="/{}/{}" class="exists-{}" title="{}">{} </a>'
|
|
||||||
icons = {
|
|
||||||
"singlefile": "❶",
|
|
||||||
"wget": "🆆",
|
|
||||||
"dom": "🅷",
|
|
||||||
"pdf": "📄",
|
|
||||||
"screenshot": "💻",
|
|
||||||
"media": "📼",
|
|
||||||
"git": "🅶",
|
|
||||||
"archive_org": "🏛",
|
|
||||||
"readability": "🆁",
|
|
||||||
"mercury": "🅼",
|
|
||||||
"warc": "📦"
|
|
||||||
}
|
|
||||||
exclude = ["favicon", "title", "headers", "archive_org"]
|
|
||||||
# Missing specific entry for WARC
|
|
||||||
|
|
||||||
extractor_items = defaultdict(lambda: None)
|
|
||||||
for extractor, _ in EXTRACTORS:
|
|
||||||
for result in archive_results:
|
|
||||||
if result.extractor == extractor:
|
|
||||||
extractor_items[extractor] = result
|
|
||||||
|
|
||||||
for extractor, _ in EXTRACTORS:
|
|
||||||
if extractor not in exclude:
|
|
||||||
exists = extractor_items[extractor] is not None
|
|
||||||
output += output_template.format(path, canon[f"{extractor}_path"], str(exists),
|
|
||||||
extractor, icons.get(extractor, "?"))
|
|
||||||
if extractor == "wget":
|
|
||||||
# warc isn't technically it's own extractor, so we have to add it after wget
|
|
||||||
exists = list((Path(path) / canon["warc_path"]).glob("*.warc.gz"))
|
|
||||||
output += output_template.format(exists[0] if exists else '#', canon["warc_path"], str(bool(exists)), "warc", icons.get("warc", "?"))
|
|
||||||
|
|
||||||
if extractor == "archive_org":
|
|
||||||
# The check for archive_org is different, so it has to be handled separately
|
|
||||||
target_path = Path(path) / "archive.org.txt"
|
|
||||||
exists = target_path.exists()
|
|
||||||
output += '<a href="{}" class="exists-{}" title="{}">{}</a> '.format(canon["archive_org_path"], str(exists),
|
|
||||||
"archive_org", icons.get("archive_org", "?"))
|
|
||||||
|
|
||||||
return format_html(f'<span class="files-icons" style="font-size: 1.1em; opacity: 0.8">{output}<span>')
|
|
|
@ -12,7 +12,6 @@ from django.views.generic import FormView
|
||||||
from django.contrib.auth.mixins import UserPassesTestMixin
|
from django.contrib.auth.mixins import UserPassesTestMixin
|
||||||
|
|
||||||
from core.models import Snapshot
|
from core.models import Snapshot
|
||||||
from core.utils import get_icons
|
|
||||||
from core.forms import AddLinkForm
|
from core.forms import AddLinkForm
|
||||||
|
|
||||||
from ..config import (
|
from ..config import (
|
||||||
|
@ -25,6 +24,7 @@ from ..config import (
|
||||||
)
|
)
|
||||||
from main import add
|
from main import add
|
||||||
from ..util import base_url, ansi_to_html
|
from ..util import base_url, ansi_to_html
|
||||||
|
from ..index.html import snapshot_icons
|
||||||
|
|
||||||
|
|
||||||
class MainIndex(View):
|
class MainIndex(View):
|
||||||
|
@ -108,7 +108,7 @@ class PublicArchiveView(ListView):
|
||||||
if query:
|
if query:
|
||||||
qs = Snapshot.objects.filter(title__icontains=query)
|
qs = Snapshot.objects.filter(title__icontains=query)
|
||||||
for snapshot in qs:
|
for snapshot in qs:
|
||||||
snapshot.icons = get_icons(snapshot)
|
snapshot.icons = snapshot_icons(snapshot)
|
||||||
return qs
|
return qs
|
||||||
|
|
||||||
def get(self, *args, **kwargs):
|
def get(self, *args, **kwargs):
|
||||||
|
|
|
@ -5,8 +5,13 @@ from datetime import datetime
|
||||||
from typing import List, Optional, Iterator, Mapping
|
from typing import List, Optional, Iterator, Mapping
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
from django.utils.html import format_html
|
||||||
|
from collections import defaultdict
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
from .schema import Link
|
from .schema import Link
|
||||||
from ..system import atomic_write, copy_and_overwrite
|
from ..system import atomic_write
|
||||||
from ..logging_util import printable_filesize
|
from ..logging_util import printable_filesize
|
||||||
from ..util import (
|
from ..util import (
|
||||||
enforce_types,
|
enforce_types,
|
||||||
|
@ -23,9 +28,6 @@ from ..config import (
|
||||||
FOOTER_INFO,
|
FOOTER_INFO,
|
||||||
ARCHIVE_DIR_NAME,
|
ARCHIVE_DIR_NAME,
|
||||||
HTML_INDEX_FILENAME,
|
HTML_INDEX_FILENAME,
|
||||||
STATIC_DIR_NAME,
|
|
||||||
ROBOTS_TXT_FILENAME,
|
|
||||||
FAVICON_FILENAME,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
MAIN_INDEX_TEMPLATE = str(Path(TEMPLATES_DIR) / 'main_index.html')
|
MAIN_INDEX_TEMPLATE = str(Path(TEMPLATES_DIR) / 'main_index.html')
|
||||||
|
@ -143,3 +145,56 @@ def render_legacy_template(template_path: str, context: Mapping[str, str]) -> st
|
||||||
with open(template_path, 'r', encoding='utf-8') as template:
|
with open(template_path, 'r', encoding='utf-8') as template:
|
||||||
template_str = template.read()
|
template_str = template.read()
|
||||||
return Template(template_str).substitute(**context)
|
return Template(template_str).substitute(**context)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def snapshot_icons(snapshot) -> str:
|
||||||
|
from core.models import Snapshot, EXTRACTORS
|
||||||
|
|
||||||
|
archive_results = snapshot.archiveresult_set.filter(status="succeeded")
|
||||||
|
link = snapshot.as_link()
|
||||||
|
path = link.archive_path
|
||||||
|
canon = link.canonical_outputs()
|
||||||
|
output = ""
|
||||||
|
output_template = '<a href="/{}/{}" class="exists-{}" title="{}">{} </a>'
|
||||||
|
icons = {
|
||||||
|
"singlefile": "❶",
|
||||||
|
"wget": "🆆",
|
||||||
|
"dom": "🅷",
|
||||||
|
"pdf": "📄",
|
||||||
|
"screenshot": "💻",
|
||||||
|
"media": "📼",
|
||||||
|
"git": "🅶",
|
||||||
|
"archive_org": "🏛",
|
||||||
|
"readability": "🆁",
|
||||||
|
"mercury": "🅼",
|
||||||
|
"warc": "📦"
|
||||||
|
}
|
||||||
|
exclude = ["favicon", "title", "headers", "archive_org"]
|
||||||
|
# Missing specific entry for WARC
|
||||||
|
|
||||||
|
extractor_items = defaultdict(lambda: None)
|
||||||
|
for extractor, _ in EXTRACTORS:
|
||||||
|
for result in archive_results:
|
||||||
|
if result.extractor == extractor:
|
||||||
|
extractor_items[extractor] = result
|
||||||
|
|
||||||
|
for extractor, _ in EXTRACTORS:
|
||||||
|
if extractor not in exclude:
|
||||||
|
exists = extractor_items[extractor] is not None
|
||||||
|
output += output_template.format(path, canon[f"{extractor}_path"], str(exists),
|
||||||
|
extractor, icons.get(extractor, "?"))
|
||||||
|
if extractor == "wget":
|
||||||
|
# warc isn't technically it's own extractor, so we have to add it after wget
|
||||||
|
exists = list((Path(path) / canon["warc_path"]).glob("*.warc.gz"))
|
||||||
|
output += output_template.format(exists[0] if exists else '#', canon["warc_path"], str(bool(exists)), "warc", icons.get("warc", "?"))
|
||||||
|
|
||||||
|
if extractor == "archive_org":
|
||||||
|
# The check for archive_org is different, so it has to be handled separately
|
||||||
|
target_path = Path(path) / "archive.org.txt"
|
||||||
|
exists = target_path.exists()
|
||||||
|
output += '<a href="{}" class="exists-{}" title="{}">{}</a> '.format(canon["archive_org_path"], str(exists),
|
||||||
|
"archive_org", icons.get("archive_org", "?"))
|
||||||
|
|
||||||
|
return format_html(f'<span class="files-icons" style="font-size: 1.1em; opacity: 0.8">{output}<span>')
|
||||||
|
|
|
@ -246,6 +246,7 @@ def chrome_args(**options) -> List[str]:
|
||||||
|
|
||||||
return cmd_args
|
return cmd_args
|
||||||
|
|
||||||
|
|
||||||
def ansi_to_html(text):
|
def ansi_to_html(text):
|
||||||
"""
|
"""
|
||||||
Based on: https://stackoverflow.com/questions/19212665/python-converting-ansi-color-codes-to-html
|
Based on: https://stackoverflow.com/questions/19212665/python-converting-ansi-color-codes-to-html
|
||||||
|
|
Loading…
Add table
Reference in a new issue