working archivebox command inside django legacy folder
2
VERSION
|
@ -1 +1 @@
|
|||
0.3.0
|
||||
0.4.0
|
||||
|
|
|
@ -1,5 +0,0 @@
|
|||
|
||||
|
||||
#__name__ = 'archivebox'
|
||||
#__package__ = 'archivebox'
|
||||
|
21
archivebox/__main__.py
Executable file
|
@ -0,0 +1,21 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
"""
|
||||
Main ArchiveBox command line application entrypoint.
|
||||
"""
|
||||
|
||||
__package__ = 'archivebox'
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
PYTHON_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
sys.path.append(PYTHON_DIR)
|
||||
|
||||
from .env import *
|
||||
from .legacy.archive import main
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main(sys.argv)
|
||||
|
|
@ -1 +0,0 @@
|
|||
../VERSION
|
|
@ -1,123 +0,0 @@
|
|||
"""
|
||||
Django settings for archivebox project.
|
||||
|
||||
Generated by 'django-admin startproject' using Django 2.1.7.
|
||||
|
||||
For more information on this file, see
|
||||
https://docs.djangoproject.com/en/2.1/topics/settings/
|
||||
|
||||
For the full list of settings and their values, see
|
||||
https://docs.djangoproject.com/en/2.1/ref/settings/
|
||||
"""
|
||||
|
||||
import os
|
||||
|
||||
# Build paths inside the project like this: os.path.join(COLLECTION_DIR, ...)
|
||||
REPO_DIR = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), '..'))
|
||||
COLLECTION_DIR = os.path.abspath(os.curdir)
|
||||
|
||||
print(REPO_DIR)
|
||||
print(COLLECTION_DIR)
|
||||
raise SystemExit(0)
|
||||
|
||||
|
||||
# Quick-start development settings - unsuitable for production
|
||||
# See https://docs.djangoproject.com/en/2.1/howto/deployment/checklist/
|
||||
|
||||
# SECURITY WARNING: keep the secret key used in production secret!
|
||||
SECRET_KEY = 'm-ma!-z^0b5w4%**le#ig!7-d@h($t02q*96h*-ua+$lm9bvao'
|
||||
|
||||
# SECURITY WARNING: don't run with debug turned on in production!
|
||||
DEBUG = True
|
||||
|
||||
ALLOWED_HOSTS = []
|
||||
|
||||
|
||||
# Application definition
|
||||
|
||||
INSTALLED_APPS = [
|
||||
'django.contrib.admin',
|
||||
'django.contrib.auth',
|
||||
'django.contrib.contenttypes',
|
||||
'django.contrib.sessions',
|
||||
'django.contrib.messages',
|
||||
'django.contrib.staticfiles',
|
||||
|
||||
'core',
|
||||
]
|
||||
|
||||
MIDDLEWARE = [
|
||||
'django.middleware.security.SecurityMiddleware',
|
||||
'django.contrib.sessions.middleware.SessionMiddleware',
|
||||
'django.middleware.common.CommonMiddleware',
|
||||
'django.middleware.csrf.CsrfViewMiddleware',
|
||||
'django.contrib.auth.middleware.AuthenticationMiddleware',
|
||||
'django.contrib.messages.middleware.MessageMiddleware',
|
||||
'django.middleware.clickjacking.XFrameOptionsMiddleware',
|
||||
]
|
||||
|
||||
ROOT_URLCONF = 'archivebox.urls'
|
||||
|
||||
ACTIVE_THEME = 'default'
|
||||
TEMPLATES_DIR = os.path.join(REPO_DIR, 'themes', ACTIVE_THEME)
|
||||
TEMPLATES = [
|
||||
{
|
||||
'BACKEND': 'django.template.backends.django.DjangoTemplates',
|
||||
'DIRS': [TEMPLATES_DIR],
|
||||
'APP_DIRS': True,
|
||||
'OPTIONS': {
|
||||
'context_processors': [
|
||||
'django.template.context_processors.debug',
|
||||
'django.template.context_processors.request',
|
||||
'django.contrib.auth.context_processors.auth',
|
||||
'django.contrib.messages.context_processors.messages',
|
||||
],
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
WSGI_APPLICATION = 'archivebox.wsgi.application'
|
||||
|
||||
|
||||
# Database
|
||||
# https://docs.djangoproject.com/en/2.1/ref/settings/#databases
|
||||
|
||||
DATABASES = {
|
||||
'default': {
|
||||
'ENGINE': 'django.db.backends.sqlite3',
|
||||
'NAME': os.path.join(COLLECTION_DIR, 'database.sqlite3'),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
# Password validation
|
||||
# https://docs.djangoproject.com/en/2.1/ref/settings/#auth-password-validators
|
||||
|
||||
AUTH_PASSWORD_VALIDATORS = [
|
||||
{
|
||||
'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
|
||||
},
|
||||
{
|
||||
'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator',
|
||||
},
|
||||
{
|
||||
'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator',
|
||||
},
|
||||
{
|
||||
'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator',
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
# Internationalization
|
||||
# https://docs.djangoproject.com/en/2.1/topics/i18n/
|
||||
LANGUAGE_CODE = 'en-us'
|
||||
TIME_ZONE = 'UTC'
|
||||
USE_I18N = True
|
||||
USE_L10N = True
|
||||
USE_TZ = True
|
||||
|
||||
|
||||
# Static files (CSS, JavaScript, Images)
|
||||
# https://docs.djangoproject.com/en/2.1/howto/static-files/
|
||||
STATIC_URL = '/static/'
|
|
@ -1,10 +1,11 @@
|
|||
from django.core.management.base import BaseCommand
|
||||
|
||||
|
||||
from core.archive import main
|
||||
from legacy.archive import main
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = 'ArchiveBox test.bee'
|
||||
|
||||
def handle(self, *args, **kwargs):
|
||||
main()
|
||||
main(*args)
|
||||
|
|
78
archivebox/core/settings.py
Normal file
|
@ -0,0 +1,78 @@
|
|||
import os
|
||||
|
||||
from legacy.config import (
|
||||
REPO_DIR,
|
||||
OUTPUT_DIR,
|
||||
TEMPLATES_DIR,
|
||||
DATABASE_DIR,
|
||||
)
|
||||
|
||||
|
||||
SECRET_KEY = '---------------- not a valid secret key ! ----------------'
|
||||
DEBUG = True
|
||||
|
||||
|
||||
INSTALLED_APPS = [
|
||||
'django.contrib.admin',
|
||||
'django.contrib.auth',
|
||||
'django.contrib.contenttypes',
|
||||
'django.contrib.sessions',
|
||||
'django.contrib.messages',
|
||||
'django.contrib.staticfiles',
|
||||
|
||||
'core',
|
||||
]
|
||||
|
||||
MIDDLEWARE = [
|
||||
'django.middleware.security.SecurityMiddleware',
|
||||
'django.contrib.sessions.middleware.SessionMiddleware',
|
||||
'django.middleware.common.CommonMiddleware',
|
||||
'django.middleware.csrf.CsrfViewMiddleware',
|
||||
'django.contrib.auth.middleware.AuthenticationMiddleware',
|
||||
'django.contrib.messages.middleware.MessageMiddleware',
|
||||
'django.middleware.clickjacking.XFrameOptionsMiddleware',
|
||||
]
|
||||
|
||||
ROOT_URLCONF = 'core.urls'
|
||||
TEMPLATES = [
|
||||
{
|
||||
'BACKEND': 'django.template.backends.django.DjangoTemplates',
|
||||
'DIRS': [TEMPLATES_DIR],
|
||||
'APP_DIRS': True,
|
||||
'OPTIONS': {
|
||||
'context_processors': [
|
||||
'django.template.context_processors.debug',
|
||||
'django.template.context_processors.request',
|
||||
'django.contrib.auth.context_processors.auth',
|
||||
'django.contrib.messages.context_processors.messages',
|
||||
],
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
WSGI_APPLICATION = 'core.wsgi.application'
|
||||
|
||||
|
||||
DATABASES = {
|
||||
'default': {
|
||||
'ENGINE': 'django.db.backends.sqlite3',
|
||||
'NAME': os.path.join(DATABASE_DIR, 'database.sqlite3'),
|
||||
}
|
||||
}
|
||||
|
||||
AUTH_PASSWORD_VALIDATORS = [
|
||||
{'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator'},
|
||||
{'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator'},
|
||||
{'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator'},
|
||||
{'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator'},
|
||||
]
|
||||
|
||||
|
||||
LANGUAGE_CODE = 'en-us'
|
||||
TIME_ZONE = 'UTC'
|
||||
USE_I18N = True
|
||||
USE_L10N = True
|
||||
USE_TZ = True
|
||||
|
||||
|
||||
STATIC_URL = '/static/'
|
11
archivebox/env.py
Normal file
|
@ -0,0 +1,11 @@
|
|||
import os
|
||||
import sys
|
||||
|
||||
|
||||
PYTHON_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
sys.path.append(PYTHON_DIR)
|
||||
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "core.settings")
|
||||
|
||||
import django
|
||||
django.setup()
|
5
archivebox/legacy/__init__.py
Normal file
|
@ -0,0 +1,5 @@
|
|||
|
||||
|
||||
#__name__ = 'archivebox'
|
||||
#__package__ = 'archivebox'
|
||||
|
|
@ -8,7 +8,7 @@ but you can also run it directly using `python3 archive.py`
|
|||
Usage & Documentation:
|
||||
https://github.com/pirate/ArchiveBox/Wiki
|
||||
"""
|
||||
__package__ = 'archivebox'
|
||||
__package__ = 'legacy'
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
@ -16,37 +16,50 @@ import shutil
|
|||
|
||||
from typing import List, Optional
|
||||
|
||||
from core.schema import Link
|
||||
from core.links import links_after_timestamp
|
||||
from core.index import write_links_index, load_links_index
|
||||
from core.archive_methods import archive_link
|
||||
from core.config import (
|
||||
from .schema import Link
|
||||
from .links import links_after_timestamp
|
||||
from .index import write_links_index, load_links_index
|
||||
from .archive_methods import archive_link
|
||||
from .config import (
|
||||
ONLY_NEW,
|
||||
OUTPUT_DIR,
|
||||
VERSION,
|
||||
ANSI,
|
||||
CURL_VERSION,
|
||||
GIT_VERSION,
|
||||
WGET_VERSION,
|
||||
YOUTUBEDL_VERSION,
|
||||
CHROME_VERSION,
|
||||
|
||||
REPO_DIR,
|
||||
PYTHON_DIR,
|
||||
LEGACY_DIR,
|
||||
TEMPLATES_DIR,
|
||||
OUTPUT_DIR,
|
||||
SOURCES_DIR,
|
||||
ARCHIVE_DIR,
|
||||
DATABASE_DIR,
|
||||
|
||||
USE_CURL,
|
||||
USE_WGET,
|
||||
USE_CHROME,
|
||||
FETCH_GIT,
|
||||
FETCH_MEDIA,
|
||||
|
||||
DJANGO_BINARY,
|
||||
CURL_BINARY,
|
||||
GIT_BINARY,
|
||||
WGET_BINARY,
|
||||
YOUTUBEDL_BINARY,
|
||||
CHROME_BINARY,
|
||||
FETCH_GIT,
|
||||
FETCH_MEDIA,
|
||||
|
||||
DJANGO_VERSION,
|
||||
CURL_VERSION,
|
||||
GIT_VERSION,
|
||||
WGET_VERSION,
|
||||
YOUTUBEDL_VERSION,
|
||||
CHROME_VERSION,
|
||||
)
|
||||
from core.util import (
|
||||
from .util import (
|
||||
enforce_types,
|
||||
handle_stdin_import,
|
||||
handle_file_import,
|
||||
)
|
||||
from core.logs import (
|
||||
from .logs import (
|
||||
log_archiving_started,
|
||||
log_archiving_paused,
|
||||
log_archiving_finished,
|
||||
|
@ -74,9 +87,26 @@ def print_help():
|
|||
print(" archivebox add --depth=1 https://example.com/feed.rss")
|
||||
print(" archivebox update --resume=15109948213.123")
|
||||
|
||||
|
||||
def print_version():
|
||||
print('ArchiveBox v{}'.format(__VERSION__))
|
||||
print()
|
||||
print('[i] Folder locations:')
|
||||
print(' REPO_DIR: ', REPO_DIR)
|
||||
print(' PYTHON_DIR: ', PYTHON_DIR)
|
||||
print(' LEGACY_DIR: ', LEGACY_DIR)
|
||||
print(' TEMPLATES_DIR: ', TEMPLATES_DIR)
|
||||
print()
|
||||
print(' OUTPUT_DIR: ', OUTPUT_DIR)
|
||||
print(' SOURCES_DIR: ', SOURCES_DIR)
|
||||
print(' ARCHIVE_DIR: ', ARCHIVE_DIR)
|
||||
print(' DATABASE_DIR: ', DATABASE_DIR)
|
||||
print()
|
||||
print(
|
||||
'[√] Django:'.ljust(14),
|
||||
'python3 {} --version\n'.format(DJANGO_BINARY),
|
||||
' '*13, DJANGO_VERSION, '\n',
|
||||
)
|
||||
print(
|
||||
'[{}] CURL:'.format('√' if USE_CURL else 'X').ljust(14),
|
||||
'{} --version\n'.format(shutil.which(CURL_BINARY)),
|
||||
|
@ -132,8 +162,11 @@ def main(args=None) -> None:
|
|||
if not os.path.exists(OUTPUT_DIR):
|
||||
print('{green}[+] Created a new archive directory: {}{reset}'.format(OUTPUT_DIR, **ANSI))
|
||||
os.makedirs(OUTPUT_DIR)
|
||||
os.makedirs(SOURCES_DIR)
|
||||
os.makedirs(ARCHIVE_DIR)
|
||||
os.makedirs(DATABASE_DIR)
|
||||
else:
|
||||
not_empty = len(set(os.listdir(OUTPUT_DIR)) - {'.DS_Store'})
|
||||
not_empty = len(set(os.listdir(OUTPUT_DIR)) - {'.DS_Store', '.venv', 'venv', 'virtualenv', '.virtualenv'})
|
||||
index_exists = os.path.exists(os.path.join(OUTPUT_DIR, 'index.json'))
|
||||
if not_empty and not index_exists:
|
||||
print(
|
|
@ -4,13 +4,13 @@ from typing import Dict, List, Tuple, Optional
|
|||
from collections import defaultdict
|
||||
from datetime import datetime
|
||||
|
||||
from core.schema import Link, ArchiveResult, ArchiveOutput
|
||||
from core.index import (
|
||||
from .schema import Link, ArchiveResult, ArchiveOutput
|
||||
from .index import (
|
||||
write_link_index,
|
||||
patch_links_index,
|
||||
load_json_link_index,
|
||||
)
|
||||
from core.config import (
|
||||
from .config import (
|
||||
CURL_BINARY,
|
||||
GIT_BINARY,
|
||||
WGET_BINARY,
|
||||
|
@ -40,7 +40,7 @@ from core.config import (
|
|||
YOUTUBEDL_VERSION,
|
||||
WGET_AUTO_COMPRESSION,
|
||||
)
|
||||
from core.util import (
|
||||
from .util import (
|
||||
enforce_types,
|
||||
domain,
|
||||
extension,
|
||||
|
@ -54,7 +54,7 @@ from core.util import (
|
|||
chrome_args,
|
||||
run, PIPE, DEVNULL,
|
||||
)
|
||||
from core.logs import (
|
||||
from .logs import (
|
||||
log_link_archiving_started,
|
||||
log_link_archiving_finished,
|
||||
log_archive_method_started,
|
|
@ -1,6 +1,7 @@
|
|||
import os
|
||||
import re
|
||||
import sys
|
||||
import django
|
||||
import shutil
|
||||
|
||||
from typing import Optional
|
||||
|
@ -58,7 +59,6 @@ YOUTUBEDL_BINARY = os.getenv('YOUTUBEDL_BINARY', 'youtube-dl')
|
|||
CHROME_BINARY = os.getenv('CHROME_BINARY', None)
|
||||
|
||||
|
||||
|
||||
# ******************************************************************************
|
||||
|
||||
### Terminal Configuration
|
||||
|
@ -79,7 +79,7 @@ if not USE_COLOR:
|
|||
ANSI = {k: '' for k in ANSI.keys()}
|
||||
|
||||
|
||||
REPO_DIR = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), '..'))
|
||||
REPO_DIR = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', '..'))
|
||||
if OUTPUT_DIR:
|
||||
OUTPUT_DIR = os.path.abspath(OUTPUT_DIR)
|
||||
else:
|
||||
|
@ -87,11 +87,14 @@ else:
|
|||
|
||||
ARCHIVE_DIR_NAME = 'archive'
|
||||
SOURCES_DIR_NAME = 'sources'
|
||||
DATABASE_DIR_NAME = 'database'
|
||||
ARCHIVE_DIR = os.path.join(OUTPUT_DIR, ARCHIVE_DIR_NAME)
|
||||
SOURCES_DIR = os.path.join(OUTPUT_DIR, SOURCES_DIR_NAME)
|
||||
DATABASE_DIR = os.path.join(OUTPUT_DIR, DATABASE_DIR_NAME)
|
||||
|
||||
PYTHON_DIR = os.path.join(REPO_DIR, 'archivebox')
|
||||
TEMPLATES_DIR = os.path.join(PYTHON_DIR, 'templates')
|
||||
LEGACY_DIR = os.path.join(PYTHON_DIR, 'legacy')
|
||||
TEMPLATES_DIR = os.path.join(LEGACY_DIR, 'templates')
|
||||
|
||||
if COOKIES_FILE:
|
||||
COOKIES_FILE = os.path.abspath(COOKIES_FILE)
|
||||
|
@ -100,8 +103,8 @@ URL_BLACKLIST_PTN = re.compile(URL_BLACKLIST, re.IGNORECASE) if URL_BLACKLIST el
|
|||
|
||||
########################### Environment & Dependencies #########################
|
||||
|
||||
VERSION = open(os.path.join(PYTHON_DIR, 'VERSION'), 'r').read().strip()
|
||||
GIT_SHA = VERSION.split('+')[1]
|
||||
VERSION = open(os.path.join(REPO_DIR, 'VERSION'), 'r').read().strip()
|
||||
GIT_SHA = VERSION.split('+')[-1] or 'unknown'
|
||||
|
||||
### Check Python environment
|
||||
python_vers = float('{}.{}'.format(sys.version_info.major, sys.version_info.minor))
|
||||
|
@ -196,6 +199,10 @@ def find_chrome_data_dir() -> Optional[str]:
|
|||
# ******************************************************************************
|
||||
|
||||
try:
|
||||
### Get Django version
|
||||
DJANGO_BINARY = django.__file__.replace('__init__.py', 'bin/django-admin.py')
|
||||
DJANGO_VERSION = '{}.{}.{} {} ({})'.format(*django.VERSION)
|
||||
|
||||
### Make sure curl is installed
|
||||
if USE_CURL:
|
||||
USE_CURL = FETCH_FAVICON or SUBMIT_ARCHIVE_DOT_ORG
|
|
@ -5,8 +5,8 @@ from datetime import datetime
|
|||
from string import Template
|
||||
from typing import List, Tuple, Iterator, Optional, Mapping
|
||||
|
||||
from core.schema import Link, ArchiveResult
|
||||
from core.config import (
|
||||
from .schema import Link, ArchiveResult
|
||||
from .config import (
|
||||
OUTPUT_DIR,
|
||||
TEMPLATES_DIR,
|
||||
VERSION,
|
||||
|
@ -14,7 +14,7 @@ from core.config import (
|
|||
FOOTER_INFO,
|
||||
TIMEOUT,
|
||||
)
|
||||
from core.util import (
|
||||
from .util import (
|
||||
ts_to_date,
|
||||
merge_links,
|
||||
urlencode,
|
||||
|
@ -27,9 +27,9 @@ from core.util import (
|
|||
copy_and_overwrite,
|
||||
atomic_write,
|
||||
)
|
||||
from core.parse import parse_links
|
||||
from core.links import validate_links
|
||||
from core.logs import (
|
||||
from .parse import parse_links
|
||||
from .links import validate_links
|
||||
from .logs import (
|
||||
log_indexing_process_started,
|
||||
log_indexing_started,
|
||||
log_indexing_finished,
|
|
@ -1,14 +1,14 @@
|
|||
from typing import Iterable
|
||||
from collections import OrderedDict
|
||||
|
||||
from core.schema import Link
|
||||
from core.util import (
|
||||
from .schema import Link
|
||||
from .util import (
|
||||
scheme,
|
||||
fuzzy_url,
|
||||
merge_links,
|
||||
)
|
||||
|
||||
from core.config import URL_BLACKLIST_PTN
|
||||
from .config import URL_BLACKLIST_PTN
|
||||
|
||||
|
||||
def validate_links(links: Iterable[Link]) -> Iterable[Link]:
|
|
@ -5,8 +5,8 @@ from datetime import datetime
|
|||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
|
||||
from core.schema import Link, ArchiveResult
|
||||
from core.config import ANSI, OUTPUT_DIR
|
||||
from .schema import Link, ArchiveResult
|
||||
from .config import ANSI, OUTPUT_DIR
|
||||
|
||||
|
||||
@dataclass
|
|
@ -24,8 +24,8 @@ from typing import Tuple, List, IO, Iterable
|
|||
from datetime import datetime
|
||||
import xml.etree.ElementTree as etree
|
||||
|
||||
from core.config import TIMEOUT
|
||||
from core.util import (
|
||||
from .config import TIMEOUT
|
||||
from .util import (
|
||||
htmldecode,
|
||||
str_between,
|
||||
URL_REGEX,
|
|
@ -6,8 +6,8 @@ from os.path import exists, join
|
|||
from shutil import rmtree
|
||||
from typing import List
|
||||
|
||||
from core.config import ARCHIVE_DIR, OUTPUT_DIR
|
||||
from core.index import parse_json_links_index, write_html_links_index, write_json_links_index
|
||||
from .config import ARCHIVE_DIR, OUTPUT_DIR
|
||||
from .index import parse_json_links_index, write_html_links_index, write_json_links_index
|
||||
|
||||
|
||||
def cleanup_index(regexes: List[str], proceed: bool, delete: bool) -> None:
|
Before Width: | Height: | Size: 17 KiB After Width: | Height: | Size: 17 KiB |
Before Width: | Height: | Size: 1.6 KiB After Width: | Height: | Size: 1.6 KiB |
Before Width: | Height: | Size: 158 B After Width: | Height: | Size: 158 B |
Before Width: | Height: | Size: 201 B After Width: | Height: | Size: 201 B |
Before Width: | Height: | Size: 157 B After Width: | Height: | Size: 157 B |
Before Width: | Height: | Size: 11 KiB After Width: | Height: | Size: 11 KiB |
|
@ -26,8 +26,8 @@ from subprocess import (
|
|||
|
||||
from base32_crockford import encode as base32_encode # type: ignore
|
||||
|
||||
from core.schema import Link
|
||||
from core.config import (
|
||||
from .schema import Link
|
||||
from .config import (
|
||||
ANSI,
|
||||
TERM_WIDTH,
|
||||
SOURCES_DIR,
|
||||
|
@ -38,9 +38,8 @@ from core.config import (
|
|||
CHECK_SSL_VALIDITY,
|
||||
WGET_USER_AGENT,
|
||||
CHROME_OPTIONS,
|
||||
PYTHON_DIR,
|
||||
)
|
||||
from core.logs import pretty_path
|
||||
from .logs import pretty_path
|
||||
|
||||
### Parsing Helpers
|
||||
|
||||
|
@ -332,14 +331,6 @@ def wget_output_path(link: Link) -> Optional[str]:
|
|||
return None
|
||||
|
||||
|
||||
@enforce_types
|
||||
def read_js_script(script_name: str) -> str:
|
||||
script_path = os.path.join(PYTHON_DIR, 'scripts', script_name)
|
||||
|
||||
with open(script_path, 'r') as f:
|
||||
return f.read().split('// INFO BELOW HERE')[0].strip()
|
||||
|
||||
|
||||
### String Manipulation & Logging Helpers
|
||||
|
||||
@enforce_types
|
|
@ -3,7 +3,7 @@ import os
|
|||
import sys
|
||||
|
||||
if __name__ == '__main__':
|
||||
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'archivebox.settings')
|
||||
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings')
|
||||
try:
|
||||
from django.core.management import execute_from_command_line
|
||||
except ImportError as exc:
|
||||
|
|
|
@ -1,34 +0,0 @@
|
|||
<!DOCTYPE NETSCAPE-Bookmark-file-1>
|
||||
<!-- This is an automatically generated file.
|
||||
It will be read and overwritten.
|
||||
DO NOT EDIT! -->
|
||||
<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
|
||||
<TITLE>Bookmarks</TITLE>
|
||||
<H1>Bookmarks Menu</H1>
|
||||
|
||||
<DL><p>
|
||||
<DT><A HREF="place:folder=BOOKMARKS_MENU&folder=UNFILED_BOOKMARKS&folder=TOOLBAR&queryType=1&sort=12&maxResults=10&excludeQueries=1" ADD_DATE="1409779227" LAST_MODIFIED="1470506008">Recently Bookmarked</A>
|
||||
<DT><A HREF="place:type=6&sort=14&maxResults=10" ADD_DATE="1470506008" LAST_MODIFIED="1470506008">Recent Tags</A>
|
||||
<HR> <DT><H3 ADD_DATE="1409779227" LAST_MODIFIED="1409779227">Mozilla Firefox</H3>
|
||||
<DL><p>
|
||||
<DT><A HREF="https://www.mozilla.org/en-US/firefox/help/" ADD_DATE="1409779227" LAST_MODIFIED="1409779227" ICON_URI="http://www.mozilla.org/2005/made-up-favicon/0-1409779227970" ICON="">Help and Tutorials</A>
|
||||
<DT><A HREF="https://www.mozilla.org/en-US/firefox/customize/" ADD_DATE="1409779227" LAST_MODIFIED="1409779227" ICON_URI="http://www.mozilla.org/2005/made-up-favicon/1-1409779227971" ICON="">Customize Firefox</A>
|
||||
<DT><A HREF="https://www.mozilla.org/en-US/contribute/" ADD_DATE="1409779227" LAST_MODIFIED="1409779227" ICON_URI="http://www.mozilla.org/2005/made-up-favicon/2-1409779227973" ICON="">Get Involved</A>
|
||||
<DT><A HREF="https://www.mozilla.org/en-US/about/" ADD_DATE="1409779227" LAST_MODIFIED="1409779227" ICON_URI="http://www.mozilla.org/2005/made-up-favicon/3-1409779227974" ICON="">About Us</A>
|
||||
</DL><p>
|
||||
<DT><H3 ADD_DATE="1497562973" LAST_MODIFIED="1497562974">[Folder Name]</H3>
|
||||
<DL><p>
|
||||
<DT><A HREF="https://duckduckgo.com/?q=firefox+export+bookmarks&t=ffhp&ia=web" ADD_DATE="1497562974" LAST_MODIFIED="1497562974" ICON_URI="https://duckduckgo.com/favicon.ico" ICON="">firefox export bookmarks at DuckDuckGo</A>
|
||||
<DT><A HREF="https://duckduckgo.com/?q=archive+firefox+bookmarks&t=ffab&ia=web" ADD_DATE="1497562974" LAST_MODIFIED="1497562974" ICON_URI="https://duckduckgo.com/favicon.ico" ICON="">archive firefox bookmarks at DuckDuckGo</A>
|
||||
<DT><A HREF="https://github.com/nodiscc" ADD_DATE="1497562974" LAST_MODIFIED="1497562974" ICON_URI="https://assets-cdn.github.com/favicon.ico" ICON="">nodiscc (nodiscc) · GitHub</A>
|
||||
<DT><A HREF="https://github.com/pirate/ArchiveBox#troubleshooting" ADD_DATE="1497562975" LAST_MODIFIED="1497562975" ICON_URI="https://assets-cdn.github.com/favicon.ico" ICON="">pirate/ArchiveBox · Github</A>
|
||||
<DT><A HREF="http://www.cs.unc.edu/~fabian/papers/foniks-oak11.pdf" ADD_DATE="1497562976" LAST_MODIFIED="1497562976" ICON_URI="https://assets-cdn.github.com/favicon.ico" ICON="">Phonotactic Reconstruction of Encrypted VoIP Conversations</A>
|
||||
<DT><A HREF="https://www.ghacks.net/2009/07/23/firefox-bookmarks-archiver/" ADD_DATE="1497562974" LAST_MODIFIED="1497562974" ICON_URI="https://www.ghacks.net/wp-content/uploads/2005/10/favicon.ico" ICON="">Firefox Bookmarks Archiver - gHacks Tech News</A>
|
||||
</DL><p>
|
||||
<DT><H3 ADD_DATE="1409779227" LAST_MODIFIED="1470506008" PERSONAL_TOOLBAR_FOLDER="true">Bookmarks Toolbar</H3>
|
||||
<DD>Add bookmarks to this folder to see them displayed on the Bookmarks Toolbar
|
||||
<DL><p>
|
||||
<DT><A HREF="place:sort=8&maxResults=10" ADD_DATE="1470506008" LAST_MODIFIED="1470506008">Most Visited</A>
|
||||
<DT><A HREF="https://www.mozilla.org/en-US/firefox/central/" ADD_DATE="1409779227" LAST_MODIFIED="1409779227">Getting Started</A>
|
||||
</DL><p>
|
||||
</DL>
|
|
@ -1,12 +0,0 @@
|
|||
<!DOCTYPE NETSCAPE-Bookmark-file-1>
|
||||
<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
|
||||
<TITLE>Pinboard Bookmarks</TITLE>
|
||||
<H1>Bookmarks</H1>
|
||||
<DL>
|
||||
<p>
|
||||
|
||||
<DT><A HREF="https://github.com/trailofbits/algo" ADD_DATE="1542616733" PRIVATE="1" TOREAD="1" TAGS="vpn,scripts,toread">Algo VPN scripts</A>
|
||||
<DT><A HREF="http://www.ulisp.com/" ADD_DATE="1542374412" PRIVATE="1" TOREAD="1" TAGS="arduino,avr,embedded,lisp,toread">uLisp</A>
|
||||
|
||||
</DL>
|
||||
</p>
|
|
@ -1,8 +0,0 @@
|
|||
[{"href":"https:\/\/en.wikipedia.org\/wiki\/International_Typographic_Style","description":"International Typographic Style - Wikipedia, the free encyclopedia","extended":"","meta":"32f4cc916e6f5919cc19aceb10559cc1","hash":"3dd64e155e16731d20350bec6bef7cb5","time":"2016-06-07T11:27:08Z","shared":"no","toread":"yes","tags":""},
|
||||
{"href":"https:\/\/news.ycombinator.com\/item?id=11686984","description":"Announcing Certbot: EFF's Client for Let's Encrypt | Hacker News","extended":"","meta":"4a49602ba5d20ec3505c75d38ebc1d63","hash":"1c1acb53a5bd520e8529ce4f9600abee","time":"2016-05-13T05:46:16Z","shared":"no","toread":"yes","tags":""},
|
||||
{"href":"https:\/\/github.com\/google\/styleguide","description":"GitHub - google\/styleguide: Style guides for Google-originated open-source projects","extended":"","meta":"15a8d50f7295f18ccb6dd19cb689c68a","hash":"1028bf9872d8e4ea1b1858f4044abb58","time":"2016-02-24T08:49:25Z","shared":"no","toread":"no","tags":"code.style.guide programming reference web.dev"},
|
||||
{"href":"http:\/\/en.wikipedia.org\/wiki\/List_of_XML_and_HTML_character_entity_references","description":"List of XML and HTML character entity references - Wikipedia, the free encyclopedia","extended":"","meta":"6683a70f0f59c92c0bfd0bce653eab69","hash":"344d975c6251a8d460971fa2c43d9bbb","time":"2014-06-16T04:17:15Z","shared":"no","toread":"no","tags":"html reference web.dev typography"},
|
||||
{"href":"https:\/\/pushover.net\/","description":"Pushover: Simple Notifications for Android, iOS, and Desktop","extended":"","meta":"1e68511234d9390d10b7772c8ccc4b9e","hash":"bb93374ead8a937b18c7c46e13168a7d","time":"2014-06-14T15:51:42Z","shared":"no","toread":"no","tags":"app android"},
|
||||
{"href":"http:\/\/www.reddit.com\/r\/Android","description":"r\/android","extended":"","meta":"18a973f09c9cc0608c116967b64e0419","hash":"910293f019c2f4bb1a749fb937ba58e3","time":"2014-06-14T15:51:42Z","shared":"no","toread":"no","tags":"reddit android 1"},
|
||||
{"href":"http:\/\/www.reddit.com\/r\/Android2","description":"r\/android","extended":"","meta":"18a973f09c9cc0608c116967b64e0419","hash":"910293f019c2f4bb1a749fb937ba58e2","time":"2014-06-14T15:51:42Z","shared":"no","toread":"no","tags":"reddit android 2"},
|
||||
{"href":"http:\/\/www.reddit.com\/r\/Android3","description":"r\/android","extended":"","meta":"18a973f09c9cc0608c116967b64e0419","hash":"910293f019c2f4bb1a749fb937ba58e4","time":"2014-06-14T15:51:42Z","shared":"no","toread":"no","tags":"reddit android 3"}]
|
|
@ -1,46 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<rdf:RDF xmlns="http://purl.org/rss/1.0/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://web.resource.org/cc/" xmlns:syn="http://purl.org/rss/1.0/modules/syndication/" xmlns:admin="http://webns.net/mvcb/">
|
||||
<channel rdf:about="http://pinboard.in">
|
||||
<title>Pinboard (private aaronmueller)</title>
|
||||
<link>https://pinboard.in/u:aaronmueller/private/</link>
|
||||
<description></description>
|
||||
<items>
|
||||
<rdf:Seq>
|
||||
<rdf:li rdf:resource="https://mehkee.com/"/>
|
||||
<rdf:li rdf:resource="https://qmk.fm/"/>
|
||||
</rdf:Seq>
|
||||
</items>
|
||||
</channel>
|
||||
|
||||
<item rdf:about="https://mehkee.com/">
|
||||
<title>Mehkee - Mechanical Keyboard Parts & Accessories</title>
|
||||
<dc:date>2018-11-08T21:29:32+00:00</dc:date>
|
||||
<link>https://mehkee.com/</link>
|
||||
<dc:creator>aaronmueller</dc:creator>
|
||||
<dc:subject>keyboard gadget diy</dc:subject>
|
||||
<dc:source>http://pinboard.in/</dc:source>
|
||||
<dc:identifier>http://pinboard.in/u:aaronmueller/b:xxx/</dc:identifier>
|
||||
<taxo:topics>
|
||||
<rdf:Bag>
|
||||
<rdf:li rdf:resource="http://pinboard.in/u:aaronmueller/t:keyboard"/>
|
||||
<rdf:li rdf:resource="http://pinboard.in/u:aaronmueller/t:gadget"/>
|
||||
<rdf:li rdf:resource="http://pinboard.in/u:aaronmueller/t:diy"/>
|
||||
</rdf:Bag>
|
||||
</taxo:topics>
|
||||
</item>
|
||||
<item rdf:about="https://qmk.fm/">
|
||||
<title>QMK Firmware - An open source firmware for AVR and ARM based keyboards</title>
|
||||
<dc:date>2018-11-06T22:36:21+00:00</dc:date>
|
||||
<link>https://qmk.fm/</link>
|
||||
<dc:creator>aaronmueller</dc:creator>
|
||||
<dc:subject>firmware keyboard</dc:subject>
|
||||
<dc:source>http://pinboard.in/</dc:source>
|
||||
<dc:identifier>http://pinboard.in/u:aaronmueller/b:xxx/</dc:identifier>
|
||||
<taxo:topics>
|
||||
<rdf:Bag>
|
||||
<rdf:li rdf:resource="http://pinboard.in/u:aaronmueller/t:firmware"/>
|
||||
<rdf:li rdf:resource="http://pinboard.in/u:aaronmueller/t:keyboard"/>
|
||||
</rdf:Bag>
|
||||
</taxo:topics>
|
||||
</item>
|
||||
</rdf:RDF>
|
|
@ -1,5 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<posts user="aaronmueller">
|
||||
<post href="https://github.com/trailofbits/algo" time="2018-11-19T08:38:53Z" description="Algo VPN scripts" extended="" tag="vpn scripts" hash="18d708f67bb26d843b1cac4530bb52aa" shared="no" toread="yes" />
|
||||
<post href="http://www.ulisp.com/" time="2018-11-16T13:20:12Z" description="uLisp" extended="" tag="arduino avr embedded lisp" hash="2a17ae95925a03a5b9bb38cf7f6c6f9b" shared="no" toread="yes" />
|
||||
</posts>
|
|
@ -1,2 +0,0 @@
|
|||
[{"href":"https:\/\/github.com\/trailofbits\/algo","description":"Algo VPN scripts","extended":"","meta":"62325ba3b577683aee854d7f191034dc","hash":"18d708f67bb26d843b1cac4530bb52aa","time":"2018-11-19T08:38:53Z","shared":"no","toread":"yes","tags":"vpn scripts"},
|
||||
{"href":"http:\/\/www.ulisp.com\/","description":"uLisp","extended":"","meta":"7bd0c0ef31f69d1459e3d37366e742b3","hash":"2a17ae95925a03a5b9bb38cf7f6c6f9b","time":"2018-11-16T13:20:12Z","shared":"no","toread":"yes","tags":"arduino avr embedded lisp"}]
|
|
@ -1,38 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html>
|
||||
<!--So long and thanks for all the fish-->
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
<title>Pocket Export</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Unread</h1>
|
||||
<ul>
|
||||
<li><a href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3110382/" time_added="1493913054" tags="">The Radical Plasticity Thesis: How the Brain Learns to be Conscious</a></li>
|
||||
<li><a href="https://martinfowler.com/eaaDev/uiArchs.html" time_added="1493909628" tags="">GUI Architectures</a></li>
|
||||
<li><a href="https://issuu.com/crowdcraft/docs/shanghai-talk-july-2012" time_added="1493900327" tags="make512">Shanghai Talk July 2012 by Mike Hall - issuu</a></li>
|
||||
<li><a href="http://make512.weebly.com/about-us.html" time_added="1493900002" tags="">About Us - make512</a></li>
|
||||
<li><a href="https://openzfsonosx.org/wiki/ZFS_on_Boot" time_added="1493887140" tags="">ZFS on Boot - OpenZFS on OS X</a></li>
|
||||
<li><a href="http://www.softpanorama.org/DNS/history.shtml" time_added="1493869958" tags="">History of DNS</a></li>
|
||||
<li><a href="https://chromium.googlesource.com/chromium/src/+/master/docs/linux_sandboxing.md" time_added="1493869649" tags="">Linux Sandboxing</a></li>
|
||||
<li><a href="https://hackernoon.com/rems-and-ems-and-why-you-probably-dont-need-them-664b9ce1e09f" time_added="1493694979" tags="">rems and ems, and why you probably don’t need them – Hacker Noon</a></li>
|
||||
<li><a href="https://wiki.archlinux.org/index.php/full_system_backup_with_rsync" time_added="1493581911" tags="">Full system backup with rsync - ArchWiki</a></li>
|
||||
<li><a href="https://www.youtube.com/watch?v=iNnAQpAHfmA" time_added="1493581911" tags="">SingUnltd. - Nature Boy (Flying Lotus Massage Situation Sample?! )</a></li>
|
||||
</ul>
|
||||
|
||||
<h1>Read Archive</h1>
|
||||
<ul>
|
||||
<li><a href="https://github.com/Droogans/unmaintainable-code" time_added="1478739800" tags="">Droogans/unmaintainable-code: An easier to share version of the infamous ht</a></li>
|
||||
<li><a href="http://www.benstopford.com/2015/02/14/log-structured-merge-trees/" time_added="1478739709" tags="">Log Structured Merge Trees - ben stopford</a></li>
|
||||
<li><a href="http://jgthms.com/web-design-in-4-minutes/#share" time_added="1478739628" tags="">Web Design in 4 minutes</a></li>
|
||||
<li><a href="https://eev.ee/blog/2016/07/26/the-hardest-problem-in-computer-science/" time_added="1478739622" tags="">The hardest problem in computer science / fuzzy notepad</a></li>
|
||||
<li><a href="https://medium.com/@iamjordanlittle/9-underutilized-features-in-css-90ced6ddbfe7#.690ah7whf" time_added="1476686912" tags="">9 Underutilized Features in CSS – Medium</a></li>
|
||||
<li><a href="http://themacro.com/articles/2016/09/employee-1-coinbase/" time_added="1476686907" tags="">Employee #1: Coinbase · The Macro</a></li>
|
||||
<li><a href="https://juokaz.com/blog/becoming-a-cto" time_added="1476686904" tags="">Becoming a CTO // Juozas Kaziukėnas</a></li>
|
||||
<li><a href="https://backchannel.com/the-internet-really-has-changed-everything-here-s-the-proof-928eaead18a8#.ekfmwcjh2" time_added="1476686896" tags="">The Internet Really Has Changed Everything. Here’s the Proof.</a></li>
|
||||
<li><a href="http://www.hindawi.com/journals/ijbm/2011/172389/" time_added="1424321329" tags="">Experimental and Modeling Study of Collagen Scaffolds with the Effects of C</a></li>
|
||||
<li><a href="http://search.cpan.org/dist/Locale-Maketext/lib/Locale/Maketext/TPJ13.pod?#A_Localization_Horror_Story:_It_Could_Happen_To_You" time_added="1424306906" tags="">Locale::Maketext::TPJ13 - search.cpan.org</a></li>
|
||||
|
||||
</ul>
|
||||
</body>
|
||||
</html>
|
|
@ -1,228 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?><rss version="2.0"
|
||||
xmlns:content="http://purl.org/rss/1.0/modules/content/"
|
||||
xmlns:wfw="http://wellformedweb.org/CommentAPI/"
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:atom="http://www.w3.org/2005/Atom"
|
||||
>
|
||||
|
||||
<channel>
|
||||
|
||||
<title>My Reading List: Read and Unread</title>
|
||||
<description>Items I've saved to read</description>
|
||||
<link>http://readitlaterlist.com/users/nikisweeting/feed/all</link>
|
||||
<atom:link href="http://readitlaterlist.com/users/nikisweeting/feed/all" rel="self" type="application/rss+xml" />
|
||||
|
||||
|
||||
<item>
|
||||
<title><![CDATA[Cell signaling]]></title>
|
||||
<category>Unread</category>
|
||||
<link>https://en.wikipedia.org/wiki/Cell_signaling</link>
|
||||
<guid>https://en.wikipedia.org/wiki/Cell_signaling</guid>
|
||||
<pubDate>Mon, 30 Oct 2017 01:12:10 -0500</pubDate>
|
||||
</item>
|
||||
<item>
|
||||
<title><![CDATA[Hayflick limit]]></title>
|
||||
<category>Unread</category>
|
||||
<link>https://en.wikipedia.org/wiki/Hayflick_limit</link>
|
||||
<guid>https://en.wikipedia.org/wiki/Hayflick_limit</guid>
|
||||
<pubDate>Mon, 30 Oct 2017 01:11:38 -0500</pubDate>
|
||||
</item>
|
||||
<item>
|
||||
<title><![CDATA[Even moderate drinking by parents can upset children – study]]></title>
|
||||
<category>Unread</category>
|
||||
<link>https://theguardian.com/society/2017/oct/18/even-moderate-drinking-by-parents-can-upset-children-study?CMP=Share_AndroidApp_Signal</link>
|
||||
<guid>https://theguardian.com/society/2017/oct/18/even-moderate-drinking-by-parents-can-upset-children-study?CMP=Share_AndroidApp_Signal</guid>
|
||||
<pubDate>Mon, 30 Oct 2017 01:11:30 -0500</pubDate>
|
||||
</item>
|
||||
<item>
|
||||
<title><![CDATA[How Merkle trees enable the decentralized Web]]></title>
|
||||
<category>Unread</category>
|
||||
<link>https://taravancil.com/blog/how-merkle-trees-enable-decentralized-web</link>
|
||||
<guid>https://taravancil.com/blog/how-merkle-trees-enable-decentralized-web</guid>
|
||||
<pubDate>Mon, 30 Oct 2017 01:11:30 -0500</pubDate>
|
||||
</item>
|
||||
<item>
|
||||
<title><![CDATA[Inertial navigation system]]></title>
|
||||
<category>Unread</category>
|
||||
<link>https://en.wikipedia.org/wiki/Inertial_navigation_system</link>
|
||||
<guid>https://en.wikipedia.org/wiki/Inertial_navigation_system</guid>
|
||||
<pubDate>Mon, 30 Oct 2017 01:10:10 -0500</pubDate>
|
||||
</item>
|
||||
<item>
|
||||
<title><![CDATA[Dead reckoning]]></title>
|
||||
<category>Unread</category>
|
||||
<link>https://en.wikipedia.org/wiki/Dead_reckoning</link>
|
||||
<guid>https://en.wikipedia.org/wiki/Dead_reckoning</guid>
|
||||
<pubDate>Mon, 30 Oct 2017 01:10:08 -0500</pubDate>
|
||||
</item>
|
||||
<item>
|
||||
<title><![CDATA[Calling Rust From Python]]></title>
|
||||
<category>Unread</category>
|
||||
<link>https://bheisler.github.io/post/calling-rust-in-python</link>
|
||||
<guid>https://bheisler.github.io/post/calling-rust-in-python</guid>
|
||||
<pubDate>Mon, 30 Oct 2017 01:04:33 -0500</pubDate>
|
||||
</item>
|
||||
<item>
|
||||
<title><![CDATA[Why would anyone choose Docker over fat binaries?]]></title>
|
||||
<category>Unread</category>
|
||||
<link>http://smashcompany.com/technology/why-would-anyone-choose-docker-over-fat-binaries</link>
|
||||
<guid>http://smashcompany.com/technology/why-would-anyone-choose-docker-over-fat-binaries</guid>
|
||||
<pubDate>Sun, 29 Oct 2017 14:57:25 -0500</pubDate>
|
||||
</item>
|
||||
<item>
|
||||
<title><![CDATA[]]></title>
|
||||
<category>Unread</category>
|
||||
<link>https://heml.io</link>
|
||||
<guid>https://heml.io</guid>
|
||||
<pubDate>Sun, 29 Oct 2017 14:55:26 -0500</pubDate>
|
||||
</item>
|
||||
<item>
|
||||
<title><![CDATA[A surprising amount of people want to be in North Korea]]></title>
|
||||
<category>Unread</category>
|
||||
<link>https://blog.benjojo.co.uk/post/north-korea-dprk-bgp-geoip-fruad</link>
|
||||
<guid>https://blog.benjojo.co.uk/post/north-korea-dprk-bgp-geoip-fruad</guid>
|
||||
<pubDate>Sat, 28 Oct 2017 05:41:41 -0500</pubDate>
|
||||
</item>
|
||||
<item>
|
||||
<title><![CDATA[Learning a Hierarchy]]></title>
|
||||
<category>Unread</category>
|
||||
<link>https://blog.openai.com/learning-a-hierarchy</link>
|
||||
<guid>https://blog.openai.com/learning-a-hierarchy</guid>
|
||||
<pubDate>Thu, 26 Oct 2017 16:43:48 -0500</pubDate>
|
||||
</item>
|
||||
<item>
|
||||
<title><![CDATA[High Performance Browser Networking]]></title>
|
||||
<category>Unread</category>
|
||||
<link>https://hpbn.co</link>
|
||||
<guid>https://hpbn.co</guid>
|
||||
<pubDate>Wed, 25 Oct 2017 19:05:24 -0500</pubDate>
|
||||
</item>
|
||||
<item>
|
||||
<title><![CDATA[What tender and juicy drama is going on at your school/workplace?]]></title>
|
||||
<category>Unread</category>
|
||||
<link>https://reddit.com/r/AskReddit/comments/78nc2a/what_tender_and_juicy_drama_is_going_on_at_your/dovab2v</link>
|
||||
<guid>https://reddit.com/r/AskReddit/comments/78nc2a/what_tender_and_juicy_drama_is_going_on_at_your/dovab2v</guid>
|
||||
<pubDate>Wed, 25 Oct 2017 18:05:58 -0500</pubDate>
|
||||
</item>
|
||||
<item>
|
||||
<title><![CDATA[Using an SSH Bastion Host]]></title>
|
||||
<category>Unread</category>
|
||||
<link>https://blog.scottlowe.org/2015/11/21/using-ssh-bastion-host</link>
|
||||
<guid>https://blog.scottlowe.org/2015/11/21/using-ssh-bastion-host</guid>
|
||||
<pubDate>Wed, 25 Oct 2017 11:38:47 -0500</pubDate>
|
||||
</item>
|
||||
<item>
|
||||
<title><![CDATA[Let's Define "undefined" | NathanShane.me]]></title>
|
||||
<category>Unread</category>
|
||||
<link>https://nathanshane.me/blog/let's-define-undefined</link>
|
||||
<guid>https://nathanshane.me/blog/let's-define-undefined</guid>
|
||||
<pubDate>Wed, 25 Oct 2017 11:32:59 -0500</pubDate>
|
||||
</item>
|
||||
<item>
|
||||
<title><![CDATA[Control theory]]></title>
|
||||
<category>Unread</category>
|
||||
<link>https://en.wikipedia.org/wiki/Control_theory#Closed-loop_transfer_function</link>
|
||||
<guid>https://en.wikipedia.org/wiki/Control_theory#Closed-loop_transfer_function</guid>
|
||||
<pubDate>Tue, 24 Oct 2017 22:57:43 -0500</pubDate>
|
||||
</item>
|
||||
<item>
|
||||
<title><![CDATA[J012-86-intractable.pdf]]></title>
|
||||
<category>Unread</category>
|
||||
<link>http://mit.edu/~jnt/Papers/J012-86-intractable.pdf</link>
|
||||
<guid>http://mit.edu/~jnt/Papers/J012-86-intractable.pdf</guid>
|
||||
<pubDate>Tue, 24 Oct 2017 22:56:32 -0500</pubDate>
|
||||
</item>
|
||||
<item>
|
||||
<title><![CDATA[Dynamic Programming: First Principles]]></title>
|
||||
<category>Unread</category>
|
||||
<link>http://flawlessrhetoric.com/Dynamic-Programming-First-Principles</link>
|
||||
<guid>http://flawlessrhetoric.com/Dynamic-Programming-First-Principles</guid>
|
||||
<pubDate>Tue, 24 Oct 2017 22:56:30 -0500</pubDate>
|
||||
</item>
|
||||
<item>
|
||||
<title><![CDATA[What Would Happen If There Were No Number 6?]]></title>
|
||||
<category>Unread</category>
|
||||
<link>https://fivethirtyeight.com/features/what-would-happen-if-there-were-no-number-6</link>
|
||||
<guid>https://fivethirtyeight.com/features/what-would-happen-if-there-were-no-number-6</guid>
|
||||
<pubDate>Tue, 24 Oct 2017 22:21:59 -0500</pubDate>
|
||||
</item>
|
||||
<item>
|
||||
<title><![CDATA[Ten Basic Rules for Adventure]]></title>
|
||||
<category>Unread</category>
|
||||
<link>https://outsideonline.com/2252916/10-basic-rules-adventure</link>
|
||||
<guid>https://outsideonline.com/2252916/10-basic-rules-adventure</guid>
|
||||
<pubDate>Tue, 24 Oct 2017 20:56:25 -0500</pubDate>
|
||||
</item>
|
||||
<item>
|
||||
<title><![CDATA[Insects Are In Serious Trouble]]></title>
|
||||
<category>Unread</category>
|
||||
<link>https://theatlantic.com/science/archive/2017/10/oh-no/543390?single_page=true</link>
|
||||
<guid>https://theatlantic.com/science/archive/2017/10/oh-no/543390?single_page=true</guid>
|
||||
<pubDate>Mon, 23 Oct 2017 23:10:10 -0500</pubDate>
|
||||
</item>
|
||||
<item>
|
||||
<title><![CDATA[Netflix/bless]]></title>
|
||||
<category>Unread</category>
|
||||
<link>https://github.com/Netflix/bless</link>
|
||||
<guid>https://github.com/Netflix/bless</guid>
|
||||
<pubDate>Mon, 23 Oct 2017 23:04:46 -0500</pubDate>
|
||||
</item>
|
||||
<item>
|
||||
<title><![CDATA[Getting Your First 10 Customers]]></title>
|
||||
<category>Unread</category>
|
||||
<link>https://stripe.com/atlas/guides/starting-sales</link>
|
||||
<guid>https://stripe.com/atlas/guides/starting-sales</guid>
|
||||
<pubDate>Mon, 23 Oct 2017 22:27:36 -0500</pubDate>
|
||||
</item>
|
||||
<item>
|
||||
<title><![CDATA[GPS Hardware]]></title>
|
||||
<category>Unread</category>
|
||||
<link>https://novasummits.com/gps-hardware</link>
|
||||
<guid>https://novasummits.com/gps-hardware</guid>
|
||||
<pubDate>Mon, 23 Oct 2017 04:44:40 -0500</pubDate>
|
||||
</item>
|
||||
<item>
|
||||
<title><![CDATA[Bicycle Tires and Tubes]]></title>
|
||||
<category>Unread</category>
|
||||
<link>http://sheldonbrown.com/tires.html#pressure</link>
|
||||
<guid>http://sheldonbrown.com/tires.html#pressure</guid>
|
||||
<pubDate>Mon, 23 Oct 2017 01:28:32 -0500</pubDate>
|
||||
</item>
|
||||
<item>
|
||||
<title><![CDATA[Tire light is on]]></title>
|
||||
<category>Unread</category>
|
||||
<link>https://reddit.com/r/Justrolledintotheshop/comments/77zm9e/tire_light_is_on/doqbshe</link>
|
||||
<guid>https://reddit.com/r/Justrolledintotheshop/comments/77zm9e/tire_light_is_on/doqbshe</guid>
|
||||
<pubDate>Mon, 23 Oct 2017 01:21:42 -0500</pubDate>
|
||||
</item>
|
||||
<item>
|
||||
<title><![CDATA[Bad_Salish_Boo ?? on Twitter]]></title>
|
||||
<category>Unread</category>
|
||||
<link>https://t.co/PDLlNjACv9</link>
|
||||
<guid>https://t.co/PDLlNjACv9</guid>
|
||||
<pubDate>Sat, 21 Oct 2017 06:48:07 -0500</pubDate>
|
||||
</item>
|
||||
<item>
|
||||
<title><![CDATA[Is an Open Marriage a Happier Marriage?]]></title>
|
||||
<category>Unread</category>
|
||||
<link>https://nytimes.com/2017/05/11/magazine/is-an-open-marriage-a-happier-marriage.html</link>
|
||||
<guid>https://nytimes.com/2017/05/11/magazine/is-an-open-marriage-a-happier-marriage.html</guid>
|
||||
<pubDate>Fri, 20 Oct 2017 13:08:52 -0500</pubDate>
|
||||
</item>
|
||||
<item>
|
||||
<title><![CDATA[The Invention of Monogamy]]></title>
|
||||
<category>Unread</category>
|
||||
<link>https://thenib.com/the-invention-of-monogamy</link>
|
||||
<guid>https://thenib.com/the-invention-of-monogamy</guid>
|
||||
<pubDate>Fri, 20 Oct 2017 12:19:00 -0500</pubDate>
|
||||
</item>
|
||||
<item>
|
||||
<title><![CDATA[Google Chrome May Add a Permission to Stop In-Browser Cryptocurrency Miners]]></title>
|
||||
<category>Unread</category>
|
||||
<link>https://bleepingcomputer.com/news/google/google-chrome-may-add-a-permission-to-stop-in-browser-cryptocurrency-miners</link>
|
||||
<guid>https://bleepingcomputer.com/news/google/google-chrome-may-add-a-permission-to-stop-in-browser-cryptocurrency-miners</guid>
|
||||
<pubDate>Fri, 20 Oct 2017 03:57:41 -0500</pubDate>
|
||||
</item>
|
||||
</channel>
|
||||
|
||||
</rss>
|
|
@ -1,92 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
import json
|
||||
import os
|
||||
from os.path import dirname, pardir, join
|
||||
from subprocess import check_output, check_call
|
||||
from tempfile import TemporaryDirectory
|
||||
from typing import List
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
ARCHIVER_BIN = join(dirname(__file__), pardir, 'archive.py')
|
||||
|
||||
|
||||
class Helper:
|
||||
def __init__(self, output_dir: str):
|
||||
self.output_dir = output_dir
|
||||
|
||||
def run(self, links, env=None, env_defaults=None):
|
||||
if env_defaults is None:
|
||||
env_defaults = {
|
||||
# we don't wanna spam archive.org witin our tests..
|
||||
'SUBMIT_ARCHIVE_DOT_ORG': 'False',
|
||||
}
|
||||
if env is None:
|
||||
env = {}
|
||||
|
||||
env = dict(**env_defaults, **env)
|
||||
|
||||
jj = []
|
||||
for url in links:
|
||||
jj.append({
|
||||
'href': url,
|
||||
'description': url,
|
||||
})
|
||||
input_json = join(self.output_dir, 'input.json')
|
||||
with open(input_json, 'w') as fo:
|
||||
json.dump(jj, fo)
|
||||
|
||||
if env is None:
|
||||
env = {}
|
||||
env['OUTPUT_DIR'] = self.output_dir
|
||||
check_call(
|
||||
[ARCHIVER_BIN, input_json],
|
||||
env={**os.environ.copy(), **env},
|
||||
)
|
||||
|
||||
|
||||
class TestArchiver:
|
||||
def setup(self):
|
||||
# self.tdir = TemporaryDirectory(dir='hello')
|
||||
class AAA:
|
||||
name = 'hello'
|
||||
self.tdir = AAA()
|
||||
|
||||
def teardown(self):
|
||||
pass
|
||||
# self.tdir.cleanup()
|
||||
|
||||
@property
|
||||
def output_dir(self):
|
||||
return self.tdir.name
|
||||
|
||||
def test_fetch_favicon_false(self):
|
||||
h = Helper(self.output_dir)
|
||||
|
||||
h.run(links=[
|
||||
'https://google.com',
|
||||
], env={
|
||||
'FETCH_FAVICON': 'False',
|
||||
})
|
||||
# for now no asserts, good enough if it isn't failing
|
||||
|
||||
def test_3000_links(self):
|
||||
"""
|
||||
The pages are deliberatly unreachable. The tool should gracefully process all of them even though individual links are failing.
|
||||
"""
|
||||
h = Helper(self.output_dir)
|
||||
|
||||
h.run(links=[
|
||||
f'https://localhost:123/whatever_{i}.html' for i in range(3000)
|
||||
], env={
|
||||
'FETCH_FAVICON': 'False',
|
||||
'FETCH_SCREENSHOT': 'False',
|
||||
'FETCH_PDF': 'False',
|
||||
'FETCH_DOM': 'False',
|
||||
'CHECK_SSL_VALIDITY': 'False',
|
||||
})
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__])
|
|
@ -1,18 +0,0 @@
|
|||
# Binaries for running ArchiveBox
|
||||
|
||||
This folder contains all the executables that ArchiveBox provides.
|
||||
|
||||
|
||||
# Adding it to your `$PATH`
|
||||
To be able to run ArchiveBox from anywhere on your system, you can add this entire folder to your path, like so:
|
||||
|
||||
**Edit `~/.bash_profile`:**
|
||||
```bash
|
||||
export PATH=/opt/ArchiveBox/bin:$PATH
|
||||
```
|
||||
|
||||
# Running executables directly
|
||||
|
||||
If you don't want to add ArchiveBox to your `$PATH` you can also call these executables directly with their full path, like so:
|
||||
|
||||
`/opt/ArchiveBox/bin/ArchiveBox https://example.com/some/feed.rss`
|
|
@ -1 +0,0 @@
|
|||
../archivebox/archive.py
|
15
bin/archivebox
Executable file
|
@ -0,0 +1,15 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
BIN_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
REPO_DIR = os.path.abspath(os.path.join(BIN_DIR, os.pardir))
|
||||
sys.path.append(REPO_DIR)
|
||||
|
||||
from archivebox.__main__ import main
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main(sys.argv)
|
|
@ -1 +0,0 @@
|
|||
../archivebox/purge.py
|
3
setup.py
|
@ -37,10 +37,11 @@ setuptools.setup(
|
|||
python_requires='>=3.6',
|
||||
install_requires=[
|
||||
"base32-crockford==0.3.0",
|
||||
"django==2.2",
|
||||
],
|
||||
entry_points={
|
||||
'console_scripts': [
|
||||
'archivebox = archivebox.archive:main',
|
||||
'archivebox = archivebox.__main__:main',
|
||||
],
|
||||
},
|
||||
package_data={
|
||||
|
|