diff --git a/archivebox/cli/__init__.py b/archivebox/cli/__init__.py index 3df41809..f9a55efd 100644 --- a/archivebox/cli/__init__.py +++ b/archivebox/cli/__init__.py @@ -63,7 +63,7 @@ def run_subcommand(subcommand: str, if subcommand not in meta_cmds: from ..config import setup_django - setup_django(in_memory_db=subcommand in fake_db) + setup_django(in_memory_db=subcommand in fake_db, check_db=subcommand in archive_cmds) module = import_module('.archivebox_{}'.format(subcommand), __package__) module.main(args=subcommand_args, stdin=stdin, pwd=pwd) # type: ignore diff --git a/archivebox/extractors/title.py b/archivebox/extractors/title.py index ff70f689..28cb128f 100644 --- a/archivebox/extractors/title.py +++ b/archivebox/extractors/title.py @@ -20,7 +20,6 @@ from ..config import ( CURL_ARGS, CURL_VERSION, CURL_USER_AGENT, - setup_django, ) from ..logging_util import TimedProgress @@ -81,7 +80,6 @@ def extract_title_with_regex(html): def save_title(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) -> ArchiveResult: """try to guess the page's title from its content""" - setup_django(out_dir=out_dir) from core.models import Snapshot output: ArchiveOutput = None diff --git a/archivebox/index/__init__.py b/archivebox/index/__init__.py index bf1d0c6a..4f4ac3d4 100644 --- a/archivebox/index/__init__.py +++ b/archivebox/index/__init__.py @@ -18,7 +18,6 @@ from ..util import ( ExtendedEncoder, ) from ..config import ( - setup_django, ARCHIVE_DIR_NAME, SQL_INDEX_FILENAME, JSON_INDEX_FILENAME, @@ -243,16 +242,9 @@ def write_main_index(links: List[Link], out_dir: Path=OUTPUT_DIR) -> None: log_indexing_process_finished() -@enforce_types -def get_empty_snapshot_queryset(out_dir: Path=OUTPUT_DIR): - setup_django(out_dir, check_db=True) - from core.models import Snapshot - return Snapshot.objects.none() - @enforce_types def load_main_index(out_dir: Path=OUTPUT_DIR, warn: bool=True) -> List[Link]: """parse and load existing index with any new links from import_path merged in""" - setup_django(out_dir, check_db=True) from core.models import Snapshot try: return Snapshot.objects.all() @@ -390,8 +382,9 @@ def search_filter(snapshots: QuerySet, filter_patterns: List[str], filter_type: color='red', ) raise SystemExit(2) + from core.models import Snapshot - qsearch = get_empty_snapshot_queryset() + qsearch = Snapshot.objects.none() for pattern in filter_patterns: try: qsearch |= query_search_index(pattern) diff --git a/archivebox/index/html.py b/archivebox/index/html.py index 4ead04ce..a62e2c7e 100644 --- a/archivebox/index/html.py +++ b/archivebox/index/html.py @@ -23,7 +23,6 @@ from ..config import ( GIT_SHA, FOOTER_INFO, HTML_INDEX_FILENAME, - setup_django, ) MAIN_INDEX_TEMPLATE = 'main_index.html' @@ -111,7 +110,6 @@ def render_django_template(template: str, context: Mapping[str, str]) -> str: """render a given html template string with the given template content""" from django.template.loader import render_to_string - setup_django(check_db=False) return render_to_string(template, context) diff --git a/archivebox/main.py b/archivebox/main.py index 756fecde..eb8cd6a0 100644 --- a/archivebox/main.py +++ b/archivebox/main.py @@ -29,7 +29,6 @@ from .util import enforce_types # type: ignore from .system import get_dir_size, dedupe_cron_jobs, CRON_COMMENT from .index import ( load_main_index, - get_empty_snapshot_queryset, parse_links_from_source, dedupe_links, write_main_index, @@ -265,6 +264,7 @@ def run(subcommand: str, @enforce_types def init(force: bool=False, out_dir: Path=OUTPUT_DIR) -> None: """Initialize a new ArchiveBox collection in the current directory""" + from core.models import Snapshot Path(out_dir).mkdir(exist_ok=True) is_empty = not len(set(os.listdir(out_dir)) - ALLOWED_IN_OUTPUT_DIR) @@ -335,7 +335,7 @@ def init(force: bool=False, out_dir: Path=OUTPUT_DIR) -> None: print() print('{green}[*] Collecting links from any existing indexes and archive folders...{reset}'.format(**ANSI)) - all_links = get_empty_snapshot_queryset() + all_links = Snapshot.objects.none() pending_links: Dict[str, Link] = {} if existing_index: diff --git a/archivebox/search/__init__.py b/archivebox/search/__init__.py index 360b20ff..6191ede9 100644 --- a/archivebox/search/__init__.py +++ b/archivebox/search/__init__.py @@ -6,7 +6,7 @@ from django.db.models import QuerySet from archivebox.index.schema import Link from archivebox.util import enforce_types -from archivebox.config import setup_django,stderr, OUTPUT_DIR, USE_INDEXING_BACKEND, USE_SEARCHING_BACKEND, SEARCH_BACKEND_ENGINE +from archivebox.config import stderr, OUTPUT_DIR, USE_INDEXING_BACKEND, USE_SEARCHING_BACKEND, SEARCH_BACKEND_ENGINE from .utils import get_indexable_content, log_index_started @@ -49,7 +49,6 @@ def write_search_index(link: Link, texts: Union[List[str], None]=None, out_dir: @enforce_types def query_search_index(query: str, out_dir: Path=OUTPUT_DIR) -> QuerySet: - setup_django(out_dir, check_db=True) from core.models import Snapshot if search_backend_enabled(): diff --git a/archivebox/search/backends/ripgrep.py b/archivebox/search/backends/ripgrep.py index ff02008d..e2e03c9b 100644 --- a/archivebox/search/backends/ripgrep.py +++ b/archivebox/search/backends/ripgrep.py @@ -2,7 +2,7 @@ import re from subprocess import run, PIPE, DEVNULL from typing import List, Generator -from archivebox.config import setup_django, ARCHIVE_DIR +from archivebox.config import ARCHIVE_DIR from archivebox.util import enforce_types RG_IGNORE_EXTENSIONS = ('css','js','orig','svg') @@ -30,7 +30,6 @@ def search(text: str) -> List[str]: if is_rg_installed.returncode: raise Exception("ripgrep binary not found, install ripgrep to use this search backend") - setup_django(check_db=True) from core.models import Snapshot rg_cmd = ['rg', RG_ADD_TYPE, RG_IGNORE_ARGUMENTS, RG_DEFAULT_ARGUMENTS, RG_REGEX_ARGUMENT, text, str(ARCHIVE_DIR)] diff --git a/tests/test_oneshot.py b/tests/test_oneshot.py index 4057a6ad..560ac43c 100644 --- a/tests/test_oneshot.py +++ b/tests/test_oneshot.py @@ -20,7 +20,6 @@ def test_oneshot_command_saves_page_in_right_folder(tmp_path, disable_extractors capture_output=True, env=disable_extractors_dict, ) - print(process.stdout) items = ' '.join([str(x) for x in tmp_path.iterdir()]) current_path = ' '.join([str(x) for x in Path.cwd().iterdir()]) assert "index.json" in items