1
0
Fork 0

fix urldecoding of titles

This commit is contained in:
Nick Sweeting 2018-04-17 10:30:25 -04:00
parent 650380efce
commit dbe4660da3
2 changed files with 16 additions and 3 deletions

View file

@ -146,7 +146,7 @@ def write_html_link_index(out_dir, link):
'bookmarked': datetime.fromtimestamp(float(link['timestamp'])).strftime('%Y-%m-%d %H:%M'), 'bookmarked': datetime.fromtimestamp(float(link['timestamp'])).strftime('%Y-%m-%d %H:%M'),
'updated': datetime.fromtimestamp(float(link['updated'])).strftime('%Y-%m-%d %H:%M'), 'updated': datetime.fromtimestamp(float(link['updated'])).strftime('%Y-%m-%d %H:%M'),
'archive_org': link['latest'].get('archive_org') or 'https://web.archive.org/save/{}'.format(link['url']), 'archive_org': link['latest'].get('archive_org') or 'https://web.archive.org/save/{}'.format(link['url']),
'wget': link['latest'].get('wget') or link['domain'], 'wget': link['latest'].get('wget') or wget_output_path(link),
})) }))
chmod_file(path) chmod_file(path)

View file

@ -33,7 +33,7 @@ Link {
""" """
import datetime import datetime
from urllib.parse import unquote from html import unescape
from util import ( from util import (
domain, domain,
@ -41,6 +41,7 @@ from util import (
str_between, str_between,
get_link_type, get_link_type,
merge_links, merge_links,
wget_output_path,
) )
from config import ANSI from config import ANSI
@ -54,6 +55,19 @@ def validate_links(links):
print('[X] No links found :(') print('[X] No links found :(')
raise SystemExit(1) raise SystemExit(1)
for link in links:
link['title'] = unescape(link['title'])
link['latest'] = link.get('latest') or {}
if not link['latest'].get('wget'):
link['latest']['wget'] = wget_output_path(link)
if not link['latest'].get('pdf'):
link['latest']['pdf'] = wget_output_path(link)
if not link['latest'].get('screenshot'):
link['latest']['screenshot'] = wget_output_path(link)
return list(links) return list(links)
@ -86,7 +100,6 @@ def uniquefied_links(sorted_links):
unique_timestamps = {} unique_timestamps = {}
for link in unique_urls.values(): for link in unique_urls.values():
link['timestamp'] = lowest_uniq_timestamp(unique_timestamps, link['timestamp']) link['timestamp'] = lowest_uniq_timestamp(unique_timestamps, link['timestamp'])
link['title'] = unquote(link['title'])
unique_timestamps[link['timestamp']] = link unique_timestamps[link['timestamp']] = link
return unique_timestamps.values() return unique_timestamps.values()