1
0
Fork 0

first working django model with archivebox-shell command and sql exporting

This commit is contained in:
Nick Sweeting 2019-04-17 03:49:18 -04:00
parent ecf95d398a
commit cdb70c73df
17 changed files with 215 additions and 21 deletions

View file

@ -1 +1,3 @@
__package__ = 'archivebox'
from . import core

View file

@ -8,9 +8,8 @@ import sys
import argparse
from ..legacy.main import list_archive_data, remove_archive_links
from ..legacy.util import reject_stdin, to_csv, TimedProgress
from ..legacy.config import ANSI
from ..legacy.main import remove_archive_links
from ..legacy.util import reject_stdin
def main(args=None):

View file

@ -0,0 +1,31 @@
#!/usr/bin/env python3
__package__ = 'archivebox.cli'
__command__ = 'archivebox shell'
__description__ = 'Enter an interactive ArchiveBox Django shell'
import sys
import argparse
from ..legacy.config import setup_django
from ..legacy.util import reject_stdin
def main(args=None):
args = sys.argv[1:] if args is None else args
parser = argparse.ArgumentParser(
prog=__command__,
description=__description__,
add_help=True,
)
parser.parse_args(args)
reject_stdin(__command__)
setup_django()
from django.core.management import call_command
call_command("shell_plus")
if __name__ == '__main__':
main()

View file

@ -0,0 +1 @@
__package__ = 'archivebox.core'

View file

@ -0,0 +1,28 @@
# Generated by Django 2.2 on 2019-04-17 06:46
from django.db import migrations, models
import uuid
class Migration(migrations.Migration):
initial = True
dependencies = [
]
operations = [
migrations.CreateModel(
name='Page',
fields=[
('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)),
('url', models.URLField()),
('timestamp', models.CharField(default=None, max_length=32, null=True)),
('title', models.CharField(default=None, max_length=128, null=True)),
('tags', models.CharField(default=None, max_length=256, null=True)),
('added', models.DateTimeField(auto_now_add=True)),
('bookmarked', models.DateTimeField()),
('updated', models.DateTimeField(default=None, null=True)),
],
),
]

View file

@ -0,0 +1,27 @@
# Generated by Django 2.2 on 2019-04-17 07:39
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('core', '0001_initial'),
]
operations = [
migrations.RemoveField(
model_name='page',
name='bookmarked',
),
migrations.AlterField(
model_name='page',
name='timestamp',
field=models.CharField(default=None, max_length=32, null=True, unique=True),
),
migrations.AlterField(
model_name='page',
name='url',
field=models.URLField(unique=True),
),
]

View file

@ -1,3 +1,33 @@
__package__ = 'archivebox.core'
import uuid
from django.db import models
# Create your models here.
class Page(models.Model):
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
url = models.URLField(unique=True)
timestamp = models.CharField(unique=True, max_length=32, null=True, default=None)
title = models.CharField(max_length=128, null=True, default=None)
tags = models.CharField(max_length=256, null=True, default=None)
added = models.DateTimeField(auto_now_add=True)
updated = models.DateTimeField(null=True, default=None)
# bookmarked = models.DateTimeField()
sql_args = ('url', 'timestamp', 'title', 'tags', 'updated')
@classmethod
def from_json(cls, info: dict):
info = {k: v for k, v in info.items() if k in cls.sql_args}
return cls(**info)
def as_json(self, *args) -> dict:
args = args or self.sql_args
return {
key: getattr(self, key)
for key in args
}

View file

@ -1,24 +1,22 @@
__package__ = 'archivebox.core'
from ..legacy.config import (
TEMPLATES_DIR,
DATABASE_FILE,
)
import os
SECRET_KEY = '---------------- not a valid secret key ! ----------------'
DEBUG = True
INSTALLED_APPS = [
# 'django.contrib.admin',
# 'django.contrib.auth',
# 'django.contrib.contenttypes',
# 'django.contrib.sessions',
# 'django.contrib.messages',
# 'django.contrib.staticfiles',
'django.contrib.admin',
'django.contrib.auth',
'django.contrib.contenttypes',
'django.contrib.sessions',
'django.contrib.messages',
'django.contrib.staticfiles',
'core',
'django_extensions',
]
MIDDLEWARE = [
@ -35,7 +33,7 @@ ROOT_URLCONF = 'core.urls'
TEMPLATES = [
{
'BACKEND': 'django.template.backends.django.DjangoTemplates',
'DIRS': [TEMPLATES_DIR],
'DIRS': ['templates'],
'APP_DIRS': True,
'OPTIONS': {
'context_processors': [
@ -53,7 +51,7 @@ WSGI_APPLICATION = 'core.wsgi.application'
DATABASES = {
'default': {
'ENGINE': 'django.db.backends.sqlite3',
'NAME': DATABASE_FILE,
'NAME': os.path.join(os.path.abspath(os.curdir), 'database', 'database.sqlite3'),
}
}

View file

@ -1,14 +1,15 @@
__package__ = 'archivebox.legacy'
import os
import re
import sys
import getpass
import django
import getpass
import shutil
from typing import Optional
from subprocess import run, PIPE, DEVNULL
# ******************************************************************************
# Documentation: https://github.com/pirate/ArchiveBox/wiki/Configuration
# Use the 'env' command to pass config options to ArchiveBox. e.g.:
@ -93,10 +94,11 @@ else:
ARCHIVE_DIR_NAME = 'archive'
SOURCES_DIR_NAME = 'sources'
DATABASE_DIR_NAME = 'database'
DATABASE_FILE_NAME = 'database.sqlite3'
ARCHIVE_DIR = os.path.join(OUTPUT_DIR, ARCHIVE_DIR_NAME)
SOURCES_DIR = os.path.join(OUTPUT_DIR, SOURCES_DIR_NAME)
DATABASE_DIR = os.path.join(OUTPUT_DIR, DATABASE_DIR_NAME)
DATABASE_FILE = os.path.join(DATABASE_DIR, 'database.sqlite3')
DATABASE_FILE = os.path.join(DATABASE_DIR, DATABASE_FILE_NAME)
PYTHON_DIR = os.path.join(REPO_DIR, 'archivebox')
LEGACY_DIR = os.path.join(PYTHON_DIR, 'legacy')
@ -221,6 +223,12 @@ def find_chrome_data_dir() -> Optional[str]:
return None
def setup_django():
import django
sys.path.append(PYTHON_DIR)
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings')
django.setup()
# ******************************************************************************
# ************************ Environment & Dependencies **************************
# ******************************************************************************

View file

@ -6,6 +6,8 @@ from collections import OrderedDict
from .schema import Link, ArchiveResult
from .config import (
DATABASE_DIR,
DATABASE_FILE_NAME,
OUTPUT_DIR,
TIMEOUT,
URL_BLACKLIST_PTN,
@ -19,6 +21,10 @@ from .storage.json import (
parse_json_link_details,
write_json_link_details,
)
from .storage.sql import (
write_sql_main_index,
parse_sql_main_index,
)
from .util import (
scheme,
enforce_types,
@ -204,6 +210,14 @@ def write_main_index(links: List[Link], out_dir: str=OUTPUT_DIR, finished: bool=
log_indexing_process_started()
log_indexing_started(DATABASE_DIR, DATABASE_FILE_NAME)
timer = TimedProgress(TIMEOUT * 2, prefix=' ')
try:
write_sql_main_index(links)
finally:
timer.end()
log_indexing_finished(DATABASE_DIR, DATABASE_FILE_NAME)
log_indexing_started(out_dir, 'index.json')
timer = TimedProgress(TIMEOUT * 2, prefix=' ')
try:
@ -228,6 +242,8 @@ def load_main_index(out_dir: str=OUTPUT_DIR, import_path: Optional[str]=None) ->
existing_links: List[Link] = []
if out_dir:
existing_links = list(parse_json_main_index(out_dir))
existing_sql_links = list(parse_sql_main_index())
assert set(l.url for l in existing_links) == set(l['url'] for l in existing_sql_links)
new_links: List[Link] = []
if import_path:

View file

@ -22,6 +22,7 @@ from .config import (
DATABASE_DIR,
check_dependencies,
check_data_folder,
setup_django,
)
from .logs import (
log_archiving_started,
@ -75,6 +76,11 @@ def init():
write_main_index([], out_dir=OUTPUT_DIR, finished=True)
setup_django()
from django.core.management import call_command
call_command("makemigrations", interactive=False)
call_command("migrate", interactive=False)
stderr('{green}[√] Done.{reset}'.format(**ANSI))

View file

@ -0,0 +1,10 @@
[mypy_django_plugin]
# specify settings module to use for django.conf.settings, this setting
# could also be specified with DJANGO_SETTINGS_MODULE environment variable
# (it also takes priority over config file)
django_settings = core.settings
# if True, all unknown settings in django.conf.settings will fallback to Any,
# specify it if your settings are loaded dynamically to avoid false positives
ignore_missing_settings = True

View file

@ -0,0 +1,32 @@
__package__ = 'archivebox.legacy.storage'
from typing import List, Iterator
from ..schema import Link
from ..util import enforce_types
from ..config import setup_django
### Main Links Index
sql_keys = ('url', 'timestamp', 'title', 'tags', 'updated')
@enforce_types
def parse_sql_main_index() -> Iterator[Link]:
setup_django()
from core.models import Page
return (
page.as_json(*sql_keys)
for page in Page.objects.all()
)
@enforce_types
def write_sql_main_index(links: List[Link]) -> None:
setup_django()
from core.models import Page
for link in links:
info = {k: v for k, v in link._asdict().items() if k in sql_keys}
Page.objects.update_or_create(url=link.url, defaults=info)

3
archivebox/mypy.ini Normal file
View file

@ -0,0 +1,3 @@
[mypy]
plugins =
mypy_django_plugin.main

View file

@ -2,6 +2,7 @@
__package__ = 'archivebox'
import os
import sys
import shutil

View file

@ -5,6 +5,7 @@ base32-crockford
setuptools
ipdb
mypy
django-stubs
flake8
#wpull

View file

@ -36,9 +36,10 @@ setuptools.setup(
packages=setuptools.find_packages(),
python_requires='>=3.6',
install_requires=[
"dataclasses==0.6",
"base32-crockford==0.3.0",
"django==2.2",
"dataclasses==0.6",
"django-extensions==2.1.6",
],
entry_points={
'console_scripts': [