cleanup ARCHIVE_DIR paths
This commit is contained in:
parent
46ea65d4f2
commit
c90f4bfd5b
3 changed files with 8 additions and 5 deletions
|
@ -27,6 +27,7 @@ from config import (
|
|||
CHROME_USER_DATA_DIR,
|
||||
TIMEOUT,
|
||||
ANSI,
|
||||
ARCHIVE_DIR,
|
||||
)
|
||||
from util import (
|
||||
check_dependencies,
|
||||
|
@ -50,7 +51,7 @@ def archive_links(archive_path, links, source=None, resume=None):
|
|||
|
||||
try:
|
||||
for idx, link in enumerate(to_archive):
|
||||
link_dir = os.path.join(archive_path, 'archive', link['timestamp'])
|
||||
link_dir = os.path.join(ARCHIVE_DIR, link['timestamp'])
|
||||
archive_link(link_dir, link)
|
||||
|
||||
except (KeyboardInterrupt, SystemExit, Exception) as e:
|
||||
|
|
|
@ -36,6 +36,7 @@ FOOTER_INFO = os.getenv('FOOTER_INFO', 'Content is hosted
|
|||
REPO_DIR = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), '..'))
|
||||
|
||||
OUTPUT_DIR = os.path.join(REPO_DIR, 'output')
|
||||
ARCHIVE_DIR = os.path.join(OUTPUT_DIR, 'archive')
|
||||
SOURCES_DIR = os.path.join(OUTPUT_DIR, 'sources')
|
||||
|
||||
PYTHON_PATH = os.path.join(REPO_DIR, 'archiver')
|
||||
|
|
|
@ -16,6 +16,7 @@ from config import (
|
|||
REPO_DIR,
|
||||
SOURCES_DIR,
|
||||
OUTPUT_DIR,
|
||||
ARCHIVE_DIR,
|
||||
TIMEOUT,
|
||||
TERM_WIDTH,
|
||||
SHOW_PROGRESS,
|
||||
|
@ -262,7 +263,7 @@ def find_link(folder, links):
|
|||
timestamp = folder.split('.')[0]
|
||||
for link in links:
|
||||
if link['timestamp'].startswith(timestamp):
|
||||
if link['domain'] in os.listdir(os.path.join(OUTPUT_DIR, 'archive', folder)):
|
||||
if link['domain'] in os.listdir(os.path.join(ARCHIVE_DIR, folder)):
|
||||
return link # careful now, this isn't safe for most ppl
|
||||
if link['domain'] in parse_url(folder):
|
||||
return link
|
||||
|
@ -271,7 +272,7 @@ def find_link(folder, links):
|
|||
|
||||
def parse_url(folder):
|
||||
"""for a given archive folder, figure out what url it's for"""
|
||||
link_json = os.path.join(OUTPUT_DIR, 'archive', folder, 'index.json')
|
||||
link_json = os.path.join(ARCHIVE_DIR, folder, 'index.json')
|
||||
if os.path.exists(link_json):
|
||||
with open(link_json, 'r') as f:
|
||||
try:
|
||||
|
@ -282,7 +283,7 @@ def parse_url(folder):
|
|||
except ValueError:
|
||||
print('File contains invalid JSON: {}!'.format(link_json))
|
||||
|
||||
archive_org_txt = os.path.join(OUTPUT_DIR, 'archive', folder, 'archive.org.txt')
|
||||
archive_org_txt = os.path.join(ARCHIVE_DIR, folder, 'archive.org.txt')
|
||||
if os.path.exists(archive_org_txt):
|
||||
with open(archive_org_txt, 'r') as f:
|
||||
original_link = f.read().strip().split('/http', 1)[-1]
|
||||
|
@ -417,7 +418,7 @@ def wget_output_path(link, look_in=None):
|
|||
# instead of trying to emulate it here, we just look in the output folder
|
||||
# to see what html file wget actually created as the output
|
||||
wget_folder = link['base_url'].rsplit('/', 1)[0].split('/')
|
||||
look_in = os.path.join(OUTPUT_DIR, 'archive', link['timestamp'], *wget_folder)
|
||||
look_in = os.path.join(ARCHIVE_DIR, link['timestamp'], *wget_folder)
|
||||
|
||||
if look_in and os.path.exists(look_in):
|
||||
html_files = [
|
||||
|
|
Loading…
Add table
Reference in a new issue