diff --git a/index.py b/index.py index 75d28e72..6bb82bb0 100644 --- a/index.py +++ b/index.py @@ -146,7 +146,7 @@ def write_html_link_index(out_dir, link): 'bookmarked': datetime.fromtimestamp(float(link['timestamp'])).strftime('%Y-%m-%d %H:%M'), 'updated': datetime.fromtimestamp(float(link['updated'])).strftime('%Y-%m-%d %H:%M'), 'archive_org': link['latest'].get('archive_org') or 'https://web.archive.org/save/{}'.format(link['url']), - 'wget': link['latest'].get('wget') or link['domain'], + 'wget': link['latest'].get('wget') or wget_output_path(link), })) chmod_file(path) diff --git a/links.py b/links.py index c1f31ba3..5d977ab0 100644 --- a/links.py +++ b/links.py @@ -33,7 +33,7 @@ Link { """ import datetime -from urllib.parse import unquote +from html import unescape from util import ( domain, @@ -41,6 +41,7 @@ from util import ( str_between, get_link_type, merge_links, + wget_output_path, ) from config import ANSI @@ -54,6 +55,19 @@ def validate_links(links): print('[X] No links found :(') raise SystemExit(1) + for link in links: + link['title'] = unescape(link['title']) + link['latest'] = link.get('latest') or {} + + if not link['latest'].get('wget'): + link['latest']['wget'] = wget_output_path(link) + + if not link['latest'].get('pdf'): + link['latest']['pdf'] = wget_output_path(link) + + if not link['latest'].get('screenshot'): + link['latest']['screenshot'] = wget_output_path(link) + return list(links) @@ -86,7 +100,6 @@ def uniquefied_links(sorted_links): unique_timestamps = {} for link in unique_urls.values(): link['timestamp'] = lowest_uniq_timestamp(unique_timestamps, link['timestamp']) - link['title'] = unquote(link['title']) unique_timestamps[link['timestamp']] = link return unique_timestamps.values()