first working django model with archivebox-shell command and sql exporting
This commit is contained in:
parent
ecf95d398a
commit
cdb70c73df
17 changed files with 215 additions and 21 deletions
|
@ -1 +1,3 @@
|
||||||
__package__ = 'archivebox'
|
__package__ = 'archivebox'
|
||||||
|
|
||||||
|
from . import core
|
||||||
|
|
|
@ -8,9 +8,8 @@ import sys
|
||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
|
|
||||||
from ..legacy.main import list_archive_data, remove_archive_links
|
from ..legacy.main import remove_archive_links
|
||||||
from ..legacy.util import reject_stdin, to_csv, TimedProgress
|
from ..legacy.util import reject_stdin
|
||||||
from ..legacy.config import ANSI
|
|
||||||
|
|
||||||
|
|
||||||
def main(args=None):
|
def main(args=None):
|
||||||
|
|
31
archivebox/cli/archivebox_shell.py
Normal file
31
archivebox/cli/archivebox_shell.py
Normal file
|
@ -0,0 +1,31 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
__package__ = 'archivebox.cli'
|
||||||
|
__command__ = 'archivebox shell'
|
||||||
|
__description__ = 'Enter an interactive ArchiveBox Django shell'
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
from ..legacy.config import setup_django
|
||||||
|
from ..legacy.util import reject_stdin
|
||||||
|
|
||||||
|
|
||||||
|
def main(args=None):
|
||||||
|
args = sys.argv[1:] if args is None else args
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
prog=__command__,
|
||||||
|
description=__description__,
|
||||||
|
add_help=True,
|
||||||
|
)
|
||||||
|
parser.parse_args(args)
|
||||||
|
reject_stdin(__command__)
|
||||||
|
|
||||||
|
setup_django()
|
||||||
|
from django.core.management import call_command
|
||||||
|
call_command("shell_plus")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
|
@ -0,0 +1 @@
|
||||||
|
__package__ = 'archivebox.core'
|
28
archivebox/core/migrations/0001_initial.py
Normal file
28
archivebox/core/migrations/0001_initial.py
Normal file
|
@ -0,0 +1,28 @@
|
||||||
|
# Generated by Django 2.2 on 2019-04-17 06:46
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
initial = True
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.CreateModel(
|
||||||
|
name='Page',
|
||||||
|
fields=[
|
||||||
|
('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)),
|
||||||
|
('url', models.URLField()),
|
||||||
|
('timestamp', models.CharField(default=None, max_length=32, null=True)),
|
||||||
|
('title', models.CharField(default=None, max_length=128, null=True)),
|
||||||
|
('tags', models.CharField(default=None, max_length=256, null=True)),
|
||||||
|
('added', models.DateTimeField(auto_now_add=True)),
|
||||||
|
('bookmarked', models.DateTimeField()),
|
||||||
|
('updated', models.DateTimeField(default=None, null=True)),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
]
|
27
archivebox/core/migrations/0002_auto_20190417_0739.py
Normal file
27
archivebox/core/migrations/0002_auto_20190417_0739.py
Normal file
|
@ -0,0 +1,27 @@
|
||||||
|
# Generated by Django 2.2 on 2019-04-17 07:39
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('core', '0001_initial'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.RemoveField(
|
||||||
|
model_name='page',
|
||||||
|
name='bookmarked',
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='page',
|
||||||
|
name='timestamp',
|
||||||
|
field=models.CharField(default=None, max_length=32, null=True, unique=True),
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='page',
|
||||||
|
name='url',
|
||||||
|
field=models.URLField(unique=True),
|
||||||
|
),
|
||||||
|
]
|
|
@ -1,3 +1,33 @@
|
||||||
|
__package__ = 'archivebox.core'
|
||||||
|
|
||||||
|
import uuid
|
||||||
|
|
||||||
from django.db import models
|
from django.db import models
|
||||||
|
|
||||||
# Create your models here.
|
|
||||||
|
class Page(models.Model):
|
||||||
|
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
|
||||||
|
|
||||||
|
url = models.URLField(unique=True)
|
||||||
|
timestamp = models.CharField(unique=True, max_length=32, null=True, default=None)
|
||||||
|
|
||||||
|
title = models.CharField(max_length=128, null=True, default=None)
|
||||||
|
tags = models.CharField(max_length=256, null=True, default=None)
|
||||||
|
|
||||||
|
added = models.DateTimeField(auto_now_add=True)
|
||||||
|
updated = models.DateTimeField(null=True, default=None)
|
||||||
|
# bookmarked = models.DateTimeField()
|
||||||
|
|
||||||
|
sql_args = ('url', 'timestamp', 'title', 'tags', 'updated')
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_json(cls, info: dict):
|
||||||
|
info = {k: v for k, v in info.items() if k in cls.sql_args}
|
||||||
|
return cls(**info)
|
||||||
|
|
||||||
|
def as_json(self, *args) -> dict:
|
||||||
|
args = args or self.sql_args
|
||||||
|
return {
|
||||||
|
key: getattr(self, key)
|
||||||
|
for key in args
|
||||||
|
}
|
||||||
|
|
|
@ -1,24 +1,22 @@
|
||||||
__package__ = 'archivebox.core'
|
__package__ = 'archivebox.core'
|
||||||
|
|
||||||
from ..legacy.config import (
|
import os
|
||||||
TEMPLATES_DIR,
|
|
||||||
DATABASE_FILE,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
SECRET_KEY = '---------------- not a valid secret key ! ----------------'
|
SECRET_KEY = '---------------- not a valid secret key ! ----------------'
|
||||||
DEBUG = True
|
DEBUG = True
|
||||||
|
|
||||||
|
|
||||||
INSTALLED_APPS = [
|
INSTALLED_APPS = [
|
||||||
# 'django.contrib.admin',
|
'django.contrib.admin',
|
||||||
# 'django.contrib.auth',
|
'django.contrib.auth',
|
||||||
# 'django.contrib.contenttypes',
|
'django.contrib.contenttypes',
|
||||||
# 'django.contrib.sessions',
|
'django.contrib.sessions',
|
||||||
# 'django.contrib.messages',
|
'django.contrib.messages',
|
||||||
# 'django.contrib.staticfiles',
|
'django.contrib.staticfiles',
|
||||||
|
|
||||||
'core',
|
'core',
|
||||||
|
|
||||||
|
'django_extensions',
|
||||||
]
|
]
|
||||||
|
|
||||||
MIDDLEWARE = [
|
MIDDLEWARE = [
|
||||||
|
@ -35,7 +33,7 @@ ROOT_URLCONF = 'core.urls'
|
||||||
TEMPLATES = [
|
TEMPLATES = [
|
||||||
{
|
{
|
||||||
'BACKEND': 'django.template.backends.django.DjangoTemplates',
|
'BACKEND': 'django.template.backends.django.DjangoTemplates',
|
||||||
'DIRS': [TEMPLATES_DIR],
|
'DIRS': ['templates'],
|
||||||
'APP_DIRS': True,
|
'APP_DIRS': True,
|
||||||
'OPTIONS': {
|
'OPTIONS': {
|
||||||
'context_processors': [
|
'context_processors': [
|
||||||
|
@ -53,7 +51,7 @@ WSGI_APPLICATION = 'core.wsgi.application'
|
||||||
DATABASES = {
|
DATABASES = {
|
||||||
'default': {
|
'default': {
|
||||||
'ENGINE': 'django.db.backends.sqlite3',
|
'ENGINE': 'django.db.backends.sqlite3',
|
||||||
'NAME': DATABASE_FILE,
|
'NAME': os.path.join(os.path.abspath(os.curdir), 'database', 'database.sqlite3'),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,14 +1,15 @@
|
||||||
|
__package__ = 'archivebox.legacy'
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
import getpass
|
|
||||||
import django
|
import django
|
||||||
|
import getpass
|
||||||
import shutil
|
import shutil
|
||||||
|
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
from subprocess import run, PIPE, DEVNULL
|
from subprocess import run, PIPE, DEVNULL
|
||||||
|
|
||||||
|
|
||||||
# ******************************************************************************
|
# ******************************************************************************
|
||||||
# Documentation: https://github.com/pirate/ArchiveBox/wiki/Configuration
|
# Documentation: https://github.com/pirate/ArchiveBox/wiki/Configuration
|
||||||
# Use the 'env' command to pass config options to ArchiveBox. e.g.:
|
# Use the 'env' command to pass config options to ArchiveBox. e.g.:
|
||||||
|
@ -93,10 +94,11 @@ else:
|
||||||
ARCHIVE_DIR_NAME = 'archive'
|
ARCHIVE_DIR_NAME = 'archive'
|
||||||
SOURCES_DIR_NAME = 'sources'
|
SOURCES_DIR_NAME = 'sources'
|
||||||
DATABASE_DIR_NAME = 'database'
|
DATABASE_DIR_NAME = 'database'
|
||||||
|
DATABASE_FILE_NAME = 'database.sqlite3'
|
||||||
ARCHIVE_DIR = os.path.join(OUTPUT_DIR, ARCHIVE_DIR_NAME)
|
ARCHIVE_DIR = os.path.join(OUTPUT_DIR, ARCHIVE_DIR_NAME)
|
||||||
SOURCES_DIR = os.path.join(OUTPUT_DIR, SOURCES_DIR_NAME)
|
SOURCES_DIR = os.path.join(OUTPUT_DIR, SOURCES_DIR_NAME)
|
||||||
DATABASE_DIR = os.path.join(OUTPUT_DIR, DATABASE_DIR_NAME)
|
DATABASE_DIR = os.path.join(OUTPUT_DIR, DATABASE_DIR_NAME)
|
||||||
DATABASE_FILE = os.path.join(DATABASE_DIR, 'database.sqlite3')
|
DATABASE_FILE = os.path.join(DATABASE_DIR, DATABASE_FILE_NAME)
|
||||||
|
|
||||||
PYTHON_DIR = os.path.join(REPO_DIR, 'archivebox')
|
PYTHON_DIR = os.path.join(REPO_DIR, 'archivebox')
|
||||||
LEGACY_DIR = os.path.join(PYTHON_DIR, 'legacy')
|
LEGACY_DIR = os.path.join(PYTHON_DIR, 'legacy')
|
||||||
|
@ -221,6 +223,12 @@ def find_chrome_data_dir() -> Optional[str]:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def setup_django():
|
||||||
|
import django
|
||||||
|
sys.path.append(PYTHON_DIR)
|
||||||
|
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings')
|
||||||
|
django.setup()
|
||||||
|
|
||||||
# ******************************************************************************
|
# ******************************************************************************
|
||||||
# ************************ Environment & Dependencies **************************
|
# ************************ Environment & Dependencies **************************
|
||||||
# ******************************************************************************
|
# ******************************************************************************
|
||||||
|
|
|
@ -6,6 +6,8 @@ from collections import OrderedDict
|
||||||
|
|
||||||
from .schema import Link, ArchiveResult
|
from .schema import Link, ArchiveResult
|
||||||
from .config import (
|
from .config import (
|
||||||
|
DATABASE_DIR,
|
||||||
|
DATABASE_FILE_NAME,
|
||||||
OUTPUT_DIR,
|
OUTPUT_DIR,
|
||||||
TIMEOUT,
|
TIMEOUT,
|
||||||
URL_BLACKLIST_PTN,
|
URL_BLACKLIST_PTN,
|
||||||
|
@ -19,6 +21,10 @@ from .storage.json import (
|
||||||
parse_json_link_details,
|
parse_json_link_details,
|
||||||
write_json_link_details,
|
write_json_link_details,
|
||||||
)
|
)
|
||||||
|
from .storage.sql import (
|
||||||
|
write_sql_main_index,
|
||||||
|
parse_sql_main_index,
|
||||||
|
)
|
||||||
from .util import (
|
from .util import (
|
||||||
scheme,
|
scheme,
|
||||||
enforce_types,
|
enforce_types,
|
||||||
|
@ -204,6 +210,14 @@ def write_main_index(links: List[Link], out_dir: str=OUTPUT_DIR, finished: bool=
|
||||||
|
|
||||||
log_indexing_process_started()
|
log_indexing_process_started()
|
||||||
|
|
||||||
|
log_indexing_started(DATABASE_DIR, DATABASE_FILE_NAME)
|
||||||
|
timer = TimedProgress(TIMEOUT * 2, prefix=' ')
|
||||||
|
try:
|
||||||
|
write_sql_main_index(links)
|
||||||
|
finally:
|
||||||
|
timer.end()
|
||||||
|
log_indexing_finished(DATABASE_DIR, DATABASE_FILE_NAME)
|
||||||
|
|
||||||
log_indexing_started(out_dir, 'index.json')
|
log_indexing_started(out_dir, 'index.json')
|
||||||
timer = TimedProgress(TIMEOUT * 2, prefix=' ')
|
timer = TimedProgress(TIMEOUT * 2, prefix=' ')
|
||||||
try:
|
try:
|
||||||
|
@ -228,6 +242,8 @@ def load_main_index(out_dir: str=OUTPUT_DIR, import_path: Optional[str]=None) ->
|
||||||
existing_links: List[Link] = []
|
existing_links: List[Link] = []
|
||||||
if out_dir:
|
if out_dir:
|
||||||
existing_links = list(parse_json_main_index(out_dir))
|
existing_links = list(parse_json_main_index(out_dir))
|
||||||
|
existing_sql_links = list(parse_sql_main_index())
|
||||||
|
assert set(l.url for l in existing_links) == set(l['url'] for l in existing_sql_links)
|
||||||
|
|
||||||
new_links: List[Link] = []
|
new_links: List[Link] = []
|
||||||
if import_path:
|
if import_path:
|
||||||
|
|
|
@ -22,6 +22,7 @@ from .config import (
|
||||||
DATABASE_DIR,
|
DATABASE_DIR,
|
||||||
check_dependencies,
|
check_dependencies,
|
||||||
check_data_folder,
|
check_data_folder,
|
||||||
|
setup_django,
|
||||||
)
|
)
|
||||||
from .logs import (
|
from .logs import (
|
||||||
log_archiving_started,
|
log_archiving_started,
|
||||||
|
@ -75,6 +76,11 @@ def init():
|
||||||
|
|
||||||
write_main_index([], out_dir=OUTPUT_DIR, finished=True)
|
write_main_index([], out_dir=OUTPUT_DIR, finished=True)
|
||||||
|
|
||||||
|
setup_django()
|
||||||
|
from django.core.management import call_command
|
||||||
|
call_command("makemigrations", interactive=False)
|
||||||
|
call_command("migrate", interactive=False)
|
||||||
|
|
||||||
stderr('{green}[√] Done.{reset}'.format(**ANSI))
|
stderr('{green}[√] Done.{reset}'.format(**ANSI))
|
||||||
|
|
||||||
|
|
||||||
|
|
10
archivebox/legacy/mypy_django.ini
Normal file
10
archivebox/legacy/mypy_django.ini
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
[mypy_django_plugin]
|
||||||
|
|
||||||
|
# specify settings module to use for django.conf.settings, this setting
|
||||||
|
# could also be specified with DJANGO_SETTINGS_MODULE environment variable
|
||||||
|
# (it also takes priority over config file)
|
||||||
|
django_settings = core.settings
|
||||||
|
|
||||||
|
# if True, all unknown settings in django.conf.settings will fallback to Any,
|
||||||
|
# specify it if your settings are loaded dynamically to avoid false positives
|
||||||
|
ignore_missing_settings = True
|
32
archivebox/legacy/storage/sql.py
Normal file
32
archivebox/legacy/storage/sql.py
Normal file
|
@ -0,0 +1,32 @@
|
||||||
|
__package__ = 'archivebox.legacy.storage'
|
||||||
|
|
||||||
|
from typing import List, Iterator
|
||||||
|
|
||||||
|
from ..schema import Link
|
||||||
|
from ..util import enforce_types
|
||||||
|
from ..config import setup_django
|
||||||
|
|
||||||
|
|
||||||
|
### Main Links Index
|
||||||
|
|
||||||
|
sql_keys = ('url', 'timestamp', 'title', 'tags', 'updated')
|
||||||
|
|
||||||
|
|
||||||
|
@enforce_types
|
||||||
|
def parse_sql_main_index() -> Iterator[Link]:
|
||||||
|
setup_django()
|
||||||
|
from core.models import Page
|
||||||
|
|
||||||
|
return (
|
||||||
|
page.as_json(*sql_keys)
|
||||||
|
for page in Page.objects.all()
|
||||||
|
)
|
||||||
|
|
||||||
|
@enforce_types
|
||||||
|
def write_sql_main_index(links: List[Link]) -> None:
|
||||||
|
setup_django()
|
||||||
|
from core.models import Page
|
||||||
|
|
||||||
|
for link in links:
|
||||||
|
info = {k: v for k, v in link._asdict().items() if k in sql_keys}
|
||||||
|
Page.objects.update_or_create(url=link.url, defaults=info)
|
3
archivebox/mypy.ini
Normal file
3
archivebox/mypy.ini
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
[mypy]
|
||||||
|
plugins =
|
||||||
|
mypy_django_plugin.main
|
|
@ -2,6 +2,7 @@
|
||||||
|
|
||||||
__package__ = 'archivebox'
|
__package__ = 'archivebox'
|
||||||
|
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import shutil
|
import shutil
|
||||||
|
|
|
@ -5,6 +5,7 @@ base32-crockford
|
||||||
setuptools
|
setuptools
|
||||||
ipdb
|
ipdb
|
||||||
mypy
|
mypy
|
||||||
|
django-stubs
|
||||||
flake8
|
flake8
|
||||||
|
|
||||||
#wpull
|
#wpull
|
||||||
|
|
3
setup.py
3
setup.py
|
@ -36,9 +36,10 @@ setuptools.setup(
|
||||||
packages=setuptools.find_packages(),
|
packages=setuptools.find_packages(),
|
||||||
python_requires='>=3.6',
|
python_requires='>=3.6',
|
||||||
install_requires=[
|
install_requires=[
|
||||||
|
"dataclasses==0.6",
|
||||||
"base32-crockford==0.3.0",
|
"base32-crockford==0.3.0",
|
||||||
"django==2.2",
|
"django==2.2",
|
||||||
"dataclasses==0.6",
|
"django-extensions==2.1.6",
|
||||||
],
|
],
|
||||||
entry_points={
|
entry_points={
|
||||||
'console_scripts': [
|
'console_scripts': [
|
||||||
|
|
Loading…
Reference in a new issue