From 8b850393dfab878bf1cd30ca258c6a4b323ddfd9 Mon Sep 17 00:00:00 2001 From: William Esz Date: Mon, 15 Oct 2018 13:07:20 +0200 Subject: [PATCH 1/2] Fix archive_dot_org CMD `curl -I {url}` returns 404 --- archiver/archive_methods.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/archiver/archive_methods.py b/archiver/archive_methods.py index 351e5873..f1416f1a 100644 --- a/archiver/archive_methods.py +++ b/archiver/archive_methods.py @@ -355,7 +355,7 @@ def archive_dot_org(link_dir, link, timeout=TIMEOUT): submit_url = 'https://web.archive.org/save/{}'.format(link['url'].split('?', 1)[0]) success = False - CMD = ['curl', '-I', submit_url] + CMD = ['curl', '-L', '-I', '-X', 'GET', submit_url] end = progress(timeout, prefix=' ') try: result = run(CMD, stdout=PIPE, stderr=DEVNULL, cwd=link_dir, timeout=timeout + 1) # archive.org.txt From a59d609571b787ae4fc02c261cf52152e9aa820c Mon Sep 17 00:00:00 2001 From: William Esz Date: Mon, 15 Oct 2018 13:09:31 +0200 Subject: [PATCH 2/2] Fix archive_dot_org submit_url It was removing functional query parameters. (e.g., https://news.ycombinator.com/item?id=18216459) --- archiver/archive_methods.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/archiver/archive_methods.py b/archiver/archive_methods.py index f1416f1a..32bc6731 100644 --- a/archiver/archive_methods.py +++ b/archiver/archive_methods.py @@ -352,7 +352,7 @@ def archive_dot_org(link_dir, link, timeout=TIMEOUT): archive_org_url = open(path, 'r').read().strip() return {'output': archive_org_url, 'status': 'skipped'} - submit_url = 'https://web.archive.org/save/{}'.format(link['url'].split('?', 1)[0]) + submit_url = 'https://web.archive.org/save/{}'.format(link['url']) success = False CMD = ['curl', '-L', '-I', '-X', 'GET', submit_url]