fix urldecoding of titles
This commit is contained in:
parent
650380efce
commit
dbe4660da3
2 changed files with 16 additions and 3 deletions
2
index.py
2
index.py
|
@ -146,7 +146,7 @@ def write_html_link_index(out_dir, link):
|
||||||
'bookmarked': datetime.fromtimestamp(float(link['timestamp'])).strftime('%Y-%m-%d %H:%M'),
|
'bookmarked': datetime.fromtimestamp(float(link['timestamp'])).strftime('%Y-%m-%d %H:%M'),
|
||||||
'updated': datetime.fromtimestamp(float(link['updated'])).strftime('%Y-%m-%d %H:%M'),
|
'updated': datetime.fromtimestamp(float(link['updated'])).strftime('%Y-%m-%d %H:%M'),
|
||||||
'archive_org': link['latest'].get('archive_org') or 'https://web.archive.org/save/{}'.format(link['url']),
|
'archive_org': link['latest'].get('archive_org') or 'https://web.archive.org/save/{}'.format(link['url']),
|
||||||
'wget': link['latest'].get('wget') or link['domain'],
|
'wget': link['latest'].get('wget') or wget_output_path(link),
|
||||||
}))
|
}))
|
||||||
|
|
||||||
chmod_file(path)
|
chmod_file(path)
|
||||||
|
|
17
links.py
17
links.py
|
@ -33,7 +33,7 @@ Link {
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
from urllib.parse import unquote
|
from html import unescape
|
||||||
|
|
||||||
from util import (
|
from util import (
|
||||||
domain,
|
domain,
|
||||||
|
@ -41,6 +41,7 @@ from util import (
|
||||||
str_between,
|
str_between,
|
||||||
get_link_type,
|
get_link_type,
|
||||||
merge_links,
|
merge_links,
|
||||||
|
wget_output_path,
|
||||||
)
|
)
|
||||||
from config import ANSI
|
from config import ANSI
|
||||||
|
|
||||||
|
@ -54,6 +55,19 @@ def validate_links(links):
|
||||||
print('[X] No links found :(')
|
print('[X] No links found :(')
|
||||||
raise SystemExit(1)
|
raise SystemExit(1)
|
||||||
|
|
||||||
|
for link in links:
|
||||||
|
link['title'] = unescape(link['title'])
|
||||||
|
link['latest'] = link.get('latest') or {}
|
||||||
|
|
||||||
|
if not link['latest'].get('wget'):
|
||||||
|
link['latest']['wget'] = wget_output_path(link)
|
||||||
|
|
||||||
|
if not link['latest'].get('pdf'):
|
||||||
|
link['latest']['pdf'] = wget_output_path(link)
|
||||||
|
|
||||||
|
if not link['latest'].get('screenshot'):
|
||||||
|
link['latest']['screenshot'] = wget_output_path(link)
|
||||||
|
|
||||||
return list(links)
|
return list(links)
|
||||||
|
|
||||||
|
|
||||||
|
@ -86,7 +100,6 @@ def uniquefied_links(sorted_links):
|
||||||
unique_timestamps = {}
|
unique_timestamps = {}
|
||||||
for link in unique_urls.values():
|
for link in unique_urls.values():
|
||||||
link['timestamp'] = lowest_uniq_timestamp(unique_timestamps, link['timestamp'])
|
link['timestamp'] = lowest_uniq_timestamp(unique_timestamps, link['timestamp'])
|
||||||
link['title'] = unquote(link['title'])
|
|
||||||
unique_timestamps[link['timestamp']] = link
|
unique_timestamps[link['timestamp']] = link
|
||||||
|
|
||||||
return unique_timestamps.values()
|
return unique_timestamps.values()
|
||||||
|
|
Loading…
Reference in a new issue