1
0
Fork 0

catch json parse errors in link archiving

This commit is contained in:
Nick Sweeting 2019-02-04 08:00:08 -08:00
parent c37941efd1
commit ad7038e031

View file

@ -80,43 +80,46 @@ def archive_links(archive_path, links, source=None, resume=None):
def archive_link(link_dir, link, overwrite=True):
"""download the DOM, PDF, and a screenshot into a folder named after the link's timestamp"""
update_existing = os.path.exists(link_dir)
if update_existing:
link = {
**parse_json_link_index(link_dir),
**link,
}
else:
os.makedirs(link_dir)
log_link_archive(link_dir, link, update_existing)
try:
update_existing = os.path.exists(link_dir)
if update_existing:
link = {
**parse_json_link_index(link_dir),
**link,
}
else:
os.makedirs(link_dir)
log_link_archive(link_dir, link, update_existing)
if FETCH_FAVICON:
link = fetch_favicon(link_dir, link, overwrite=overwrite)
if FETCH_FAVICON:
link = fetch_favicon(link_dir, link, overwrite=overwrite)
if FETCH_WGET:
link = fetch_wget(link_dir, link, overwrite=overwrite)
if FETCH_WGET:
link = fetch_wget(link_dir, link, overwrite=overwrite)
if FETCH_PDF:
link = fetch_pdf(link_dir, link, overwrite=overwrite)
if FETCH_PDF:
link = fetch_pdf(link_dir, link, overwrite=overwrite)
if FETCH_SCREENSHOT:
link = fetch_screenshot(link_dir, link, overwrite=overwrite)
if FETCH_SCREENSHOT:
link = fetch_screenshot(link_dir, link, overwrite=overwrite)
if FETCH_DOM:
link = fetch_dom(link_dir, link, overwrite=overwrite)
if FETCH_DOM:
link = fetch_dom(link_dir, link, overwrite=overwrite)
if SUBMIT_ARCHIVE_DOT_ORG:
link = archive_dot_org(link_dir, link, overwrite=overwrite)
if SUBMIT_ARCHIVE_DOT_ORG:
link = archive_dot_org(link_dir, link, overwrite=overwrite)
if FETCH_GIT:
link = fetch_git(link_dir, link, overwrite=overwrite)
if FETCH_GIT:
link = fetch_git(link_dir, link, overwrite=overwrite)
if FETCH_MEDIA:
link = fetch_media(link_dir, link, overwrite=overwrite)
if FETCH_MEDIA:
link = fetch_media(link_dir, link, overwrite=overwrite)
write_link_index(link_dir, link)
write_link_index(link_dir, link)
except Exception as err:
print(' ! Failed to archive link: {err.__class__.__name__}: {err}')
return link