new version handling and absolute imports
This commit is contained in:
parent
bc1bc9fe02
commit
93216a3c3e
9 changed files with 58 additions and 61 deletions
|
@ -13,34 +13,37 @@ __package__ = 'archivebox'
|
|||
import os
|
||||
import sys
|
||||
|
||||
|
||||
from typing import List, Optional
|
||||
|
||||
from schema import Link
|
||||
from links import links_after_timestamp
|
||||
from index import write_links_index, load_links_index
|
||||
from archive_methods import archive_link
|
||||
from config import (
|
||||
from .schema import Link
|
||||
from .links import links_after_timestamp
|
||||
from .index import write_links_index, load_links_index
|
||||
from .archive_methods import archive_link
|
||||
from .config import (
|
||||
ONLY_NEW,
|
||||
OUTPUT_DIR,
|
||||
GIT_SHA,
|
||||
PYTHON_DIR,
|
||||
VERSION,
|
||||
)
|
||||
from util import (
|
||||
from .util import (
|
||||
enforce_types,
|
||||
save_remote_source,
|
||||
save_stdin_source,
|
||||
)
|
||||
from logs import (
|
||||
from .logs import (
|
||||
log_archiving_started,
|
||||
log_archiving_paused,
|
||||
log_archiving_finished,
|
||||
)
|
||||
|
||||
__AUTHOR__ = 'Nick Sweeting <git@nicksweeting.com>'
|
||||
__VERSION__ = GIT_SHA[:9]
|
||||
__VERSION__ = VERSION
|
||||
__DESCRIPTION__ = 'ArchiveBox: The self-hosted internet archive.'
|
||||
__DOCUMENTATION__ = 'https://github.com/pirate/ArchiveBox/wiki'
|
||||
|
||||
|
||||
|
||||
def print_help():
|
||||
print('ArchiveBox: The self-hosted internet archive.\n')
|
||||
print("Documentation:")
|
||||
|
|
|
@ -4,13 +4,13 @@ from typing import Dict, List, Tuple
|
|||
from collections import defaultdict
|
||||
from datetime import datetime
|
||||
|
||||
from schema import Link, ArchiveResult, ArchiveError
|
||||
from index import (
|
||||
from .schema import Link, ArchiveResult, ArchiveError
|
||||
from .index import (
|
||||
write_link_index,
|
||||
patch_links_index,
|
||||
load_json_link_index,
|
||||
)
|
||||
from config import (
|
||||
from .config import (
|
||||
CURL_BINARY,
|
||||
GIT_BINARY,
|
||||
WGET_BINARY,
|
||||
|
@ -31,7 +31,7 @@ from config import (
|
|||
ANSI,
|
||||
OUTPUT_DIR,
|
||||
GIT_DOMAINS,
|
||||
GIT_SHA,
|
||||
VERSION,
|
||||
WGET_USER_AGENT,
|
||||
CHECK_SSL_VALIDITY,
|
||||
COOKIES_FILE,
|
||||
|
@ -43,7 +43,7 @@ from config import (
|
|||
ONLY_NEW,
|
||||
WGET_AUTO_COMPRESSION,
|
||||
)
|
||||
from util import (
|
||||
from .util import (
|
||||
enforce_types,
|
||||
domain,
|
||||
extension,
|
||||
|
@ -58,7 +58,7 @@ from util import (
|
|||
run, PIPE, DEVNULL,
|
||||
Link,
|
||||
)
|
||||
from logs import (
|
||||
from .logs import (
|
||||
log_link_archiving_started,
|
||||
log_link_archiving_finished,
|
||||
log_archive_method_started,
|
||||
|
@ -123,6 +123,7 @@ def archive_link(link: Link, page=None) -> Link:
|
|||
if was_changed:
|
||||
patch_links_index(link)
|
||||
|
||||
|
||||
log_link_archiving_finished(link.link_dir, link, is_new, stats)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
|
@ -606,7 +607,7 @@ def archive_dot_org(link_dir: str, link: Link, timeout: int=TIMEOUT) -> ArchiveR
|
|||
CURL_BINARY,
|
||||
'--location',
|
||||
'--head',
|
||||
'--user-agent', 'ArchiveBox/{} (+https://github.com/pirate/ArchiveBox/)'.format(GIT_SHA), # be nice to the Archive.org people and show them where all this ArchiveBox traffic is coming from
|
||||
'--user-agent', 'ArchiveBox/{} (+https://github.com/pirate/ArchiveBox/)'.format(VERSION), # be nice to the Archive.org people and show them where all this ArchiveBox traffic is coming from
|
||||
'--max-time', str(timeout),
|
||||
*(() if CHECK_SSL_VALIDITY else ('--insecure',)),
|
||||
submit_url,
|
||||
|
|
|
@ -40,7 +40,7 @@ SUBMIT_ARCHIVE_DOT_ORG = os.getenv('SUBMIT_ARCHIVE_DOT_ORG', 'True'
|
|||
CHECK_SSL_VALIDITY = os.getenv('CHECK_SSL_VALIDITY', 'True' ).lower() == 'true'
|
||||
RESOLUTION = os.getenv('RESOLUTION', '1440,2000' )
|
||||
GIT_DOMAINS = os.getenv('GIT_DOMAINS', 'github.com,bitbucket.org,gitlab.com').split(',')
|
||||
WGET_USER_AGENT = os.getenv('WGET_USER_AGENT', 'ArchiveBox/{GIT_SHA} (+https://github.com/pirate/ArchiveBox/) wget/{WGET_VERSION}')
|
||||
WGET_USER_AGENT = os.getenv('WGET_USER_AGENT', 'ArchiveBox/{VERSION} (+https://github.com/pirate/ArchiveBox/) wget/{WGET_VERSION}')
|
||||
COOKIES_FILE = os.getenv('COOKIES_FILE', None)
|
||||
CHROME_USER_DATA_DIR = os.getenv('CHROME_USER_DATA_DIR', None)
|
||||
CHROME_HEADLESS = os.getenv('CHROME_HEADLESS', 'True' ).lower() == 'true'
|
||||
|
@ -163,21 +163,13 @@ def find_chrome_data_dir() -> Optional[str]:
|
|||
return None
|
||||
|
||||
|
||||
def get_git_version() -> str:
|
||||
"""get the git commit hash of the python code folder (aka code version)"""
|
||||
try:
|
||||
return run([GIT_BINARY, 'rev-list', '-1', 'HEAD', './'], stdout=PIPE, cwd=REPO_DIR).stdout.strip().decode()
|
||||
except Exception:
|
||||
print('[!] Warning: unable to determine git version, is git installed and in your $PATH?')
|
||||
return 'unknown'
|
||||
|
||||
|
||||
# ******************************************************************************
|
||||
# ************************ Environment & Dependencies **************************
|
||||
# ******************************************************************************
|
||||
|
||||
try:
|
||||
GIT_SHA = get_git_version()
|
||||
VERSION = open(os.path.join(PYTHON_DIR, 'VERSION'), 'r').read().strip()
|
||||
GIT_SHA = VERSION.split('+')[1]
|
||||
|
||||
### Terminal Configuration
|
||||
TERM_WIDTH = lambda: shutil.get_terminal_size((100, 10)).columns
|
||||
|
@ -234,7 +226,7 @@ try:
|
|||
WGET_AUTO_COMPRESSION = not run([WGET_BINARY, "--compression=auto", "--help"], stdout=DEVNULL).returncode
|
||||
|
||||
WGET_USER_AGENT = WGET_USER_AGENT.format(
|
||||
GIT_SHA=GIT_SHA[:9],
|
||||
VERSION=VERSION,
|
||||
WGET_VERSION=WGET_VERSION or '',
|
||||
)
|
||||
|
||||
|
|
|
@ -6,15 +6,16 @@ from string import Template
|
|||
from typing import List, Tuple, Iterator, Optional
|
||||
from dataclasses import fields
|
||||
|
||||
from schema import Link, ArchiveIndex, ArchiveResult
|
||||
from config import (
|
||||
from .schema import Link, ArchiveResult
|
||||
from .config import (
|
||||
OUTPUT_DIR,
|
||||
TEMPLATES_DIR,
|
||||
VERSION,
|
||||
GIT_SHA,
|
||||
FOOTER_INFO,
|
||||
TIMEOUT,
|
||||
)
|
||||
from util import (
|
||||
from .util import (
|
||||
merge_links,
|
||||
chmod_file,
|
||||
urlencode,
|
||||
|
@ -25,9 +26,9 @@ from util import (
|
|||
TimedProgress,
|
||||
copy_and_overwrite,
|
||||
)
|
||||
from parse import parse_links
|
||||
from links import validate_links
|
||||
from logs import (
|
||||
from .parse import parse_links
|
||||
from .links import validate_links
|
||||
from .logs import (
|
||||
log_indexing_process_started,
|
||||
log_indexing_started,
|
||||
log_indexing_finished,
|
||||
|
@ -178,8 +179,8 @@ def write_html_links_index(out_dir: str, links: List[Link], finished: bool=False
|
|||
'date_updated': datetime.now().strftime('%Y-%m-%d'),
|
||||
'time_updated': datetime.now().strftime('%Y-%m-%d %H:%M'),
|
||||
'footer_info': FOOTER_INFO,
|
||||
'version': VERSION,
|
||||
'git_sha': GIT_SHA,
|
||||
'short_git_sha': GIT_SHA[:8],
|
||||
'rows': link_rows,
|
||||
'status': 'finished' if finished else 'running',
|
||||
}
|
||||
|
|
|
@ -22,8 +22,8 @@ Link {
|
|||
from typing import Iterable
|
||||
from collections import OrderedDict
|
||||
|
||||
from schema import Link
|
||||
from util import (
|
||||
from .schema import Link
|
||||
from .util import (
|
||||
scheme,
|
||||
fuzzy_url,
|
||||
merge_links,
|
||||
|
|
|
@ -24,8 +24,8 @@ from typing import Tuple, List, IO, Iterable
|
|||
from datetime import datetime
|
||||
import xml.etree.ElementTree as etree
|
||||
|
||||
from config import TIMEOUT
|
||||
from util import (
|
||||
from .config import TIMEOUT
|
||||
from .util import (
|
||||
htmldecode,
|
||||
str_between,
|
||||
URL_REGEX,
|
||||
|
|
|
@ -108,60 +108,60 @@ class Link:
|
|||
|
||||
@property
|
||||
def link_dir(self) -> str:
|
||||
from config import ARCHIVE_DIR
|
||||
from .config import ARCHIVE_DIR
|
||||
return os.path.join(ARCHIVE_DIR, self.timestamp)
|
||||
|
||||
@property
|
||||
def archive_path(self) -> str:
|
||||
from config import ARCHIVE_DIR_NAME
|
||||
from .config import ARCHIVE_DIR_NAME
|
||||
return '{}/{}'.format(ARCHIVE_DIR_NAME, self.timestamp)
|
||||
|
||||
### URL Helpers
|
||||
@property
|
||||
def urlhash(self):
|
||||
from util import hashurl
|
||||
from .util import hashurl
|
||||
|
||||
return hashurl(self.url)
|
||||
|
||||
@property
|
||||
def extension(self) -> str:
|
||||
from util import extension
|
||||
from .util import extension
|
||||
return extension(self.url)
|
||||
|
||||
@property
|
||||
def domain(self) -> str:
|
||||
from util import domain
|
||||
from .util import domain
|
||||
return domain(self.url)
|
||||
|
||||
@property
|
||||
def path(self) -> str:
|
||||
from util import path
|
||||
from .util import path
|
||||
return path(self.url)
|
||||
|
||||
@property
|
||||
def basename(self) -> str:
|
||||
from util import basename
|
||||
from .util import basename
|
||||
return basename(self.url)
|
||||
|
||||
@property
|
||||
def base_url(self) -> str:
|
||||
from util import base_url
|
||||
from .util import base_url
|
||||
return base_url(self.url)
|
||||
|
||||
### Pretty Printing Helpers
|
||||
@property
|
||||
def bookmarked_date(self) -> Optional[str]:
|
||||
from util import ts_to_date
|
||||
from .util import ts_to_date
|
||||
return ts_to_date(self.timestamp) if self.timestamp else None
|
||||
|
||||
@property
|
||||
def updated_date(self) -> Optional[str]:
|
||||
from util import ts_to_date
|
||||
from .util import ts_to_date
|
||||
return ts_to_date(self.updated) if self.updated else None
|
||||
|
||||
@property
|
||||
def oldest_archive_date(self) -> Optional[datetime]:
|
||||
from util import ts_to_date
|
||||
from .util import ts_to_date
|
||||
|
||||
most_recent = min(
|
||||
(ts_to_date(result.start_ts)
|
||||
|
@ -173,7 +173,7 @@ class Link:
|
|||
|
||||
@property
|
||||
def newest_archive_date(self) -> Optional[datetime]:
|
||||
from util import ts_to_date
|
||||
from .util import ts_to_date
|
||||
|
||||
most_recent = max(
|
||||
(ts_to_date(result.start_ts)
|
||||
|
@ -197,13 +197,13 @@ class Link:
|
|||
|
||||
@property
|
||||
def is_static(self) -> bool:
|
||||
from util import is_static_file
|
||||
from .util import is_static_file
|
||||
return is_static_file(self.url)
|
||||
|
||||
@property
|
||||
def is_archived(self) -> bool:
|
||||
from config import ARCHIVE_DIR
|
||||
from util import domain
|
||||
from .config import ARCHIVE_DIR
|
||||
from .util import domain
|
||||
|
||||
return os.path.exists(os.path.join(
|
||||
ARCHIVE_DIR,
|
||||
|
@ -240,7 +240,7 @@ class Link:
|
|||
return latest
|
||||
|
||||
def canonical_outputs(self) -> Dict[str, Optional[str]]:
|
||||
from util import wget_output_path
|
||||
from .util import wget_output_path
|
||||
canonical = {
|
||||
'index_url': 'index.html',
|
||||
'favicon_url': 'favicon.ico',
|
||||
|
|
|
@ -209,7 +209,7 @@
|
|||
<center>
|
||||
<small>
|
||||
Archive created using <a href="https://github.com/pirate/ArchiveBox" title="Github">ArchiveBox</a>
|
||||
version <a href="https://github.com/pirate/ArchiveBox/commit/$git_sha" title="Git commit">$short_git_sha</a> |
|
||||
version <a href="https://github.com/pirate/ArchiveBox/commit/$git_sha" title="Git commit">$version</a> |
|
||||
Download index as <a href="index.json" title="JSON summary of archived links.">JSON</a>
|
||||
<br/><br/>
|
||||
$footer_info
|
||||
|
|
|
@ -25,8 +25,8 @@ from subprocess import (
|
|||
|
||||
from base32_crockford import encode as base32_encode
|
||||
|
||||
from schema import Link
|
||||
from config import (
|
||||
from .schema import Link
|
||||
from .config import (
|
||||
ANSI,
|
||||
TERM_WIDTH,
|
||||
SOURCES_DIR,
|
||||
|
@ -37,9 +37,9 @@ from config import (
|
|||
CHECK_SSL_VALIDITY,
|
||||
WGET_USER_AGENT,
|
||||
CHROME_OPTIONS,
|
||||
PYTHON_PATH,
|
||||
PYTHON_DIR,
|
||||
)
|
||||
from logs import pretty_path
|
||||
from .logs import pretty_path
|
||||
|
||||
### Parsing Helpers
|
||||
|
||||
|
@ -334,7 +334,7 @@ def wget_output_path(link: Link) -> Optional[str]:
|
|||
|
||||
@enforce_types
|
||||
def read_js_script(script_name: str) -> str:
|
||||
script_path = os.path.join(PYTHON_PATH, 'scripts', script_name)
|
||||
script_path = os.path.join(PYTHON_DIR, 'scripts', script_name)
|
||||
|
||||
with open(script_path, 'r') as f:
|
||||
return f.read().split('// INFO BELOW HERE')[0].strip()
|
||||
|
|
Loading…
Add table
Reference in a new issue