feat: depth=0 crawls the current page only
This commit is contained in:
parent
8b22a2a7dd
commit
2db0324539
2 changed files with 21 additions and 5 deletions
|
@ -53,14 +53,22 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
|
||||||
help="Recursively archive all linked pages up to this many hops away"
|
help="Recursively archive all linked pages up to this many hops away"
|
||||||
)
|
)
|
||||||
command = parser.parse_args(args or ())
|
command = parser.parse_args(args or ())
|
||||||
import_str = accept_stdin(stdin)
|
#import_str = accept_stdin(stdin)
|
||||||
add(
|
add(
|
||||||
import_str=import_str,
|
import_str=command.import_path,
|
||||||
import_path=command.import_path,
|
import_path=None,
|
||||||
update_all=command.update_all,
|
update_all=command.update_all,
|
||||||
index_only=command.index_only,
|
index_only=command.index_only,
|
||||||
out_dir=pwd or OUTPUT_DIR,
|
out_dir=pwd or OUTPUT_DIR,
|
||||||
)
|
)
|
||||||
|
#if command.depth == 1:
|
||||||
|
# add(
|
||||||
|
# import_str=None,
|
||||||
|
# import_path=command.import_path,
|
||||||
|
# update_all=command.update_all,
|
||||||
|
# index_only=command.index_only,
|
||||||
|
# out_dir=pwd or OUTPUT_DIR,
|
||||||
|
# )
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
|
@ -1,7 +1,15 @@
|
||||||
import subprocess
|
import subprocess
|
||||||
|
import json
|
||||||
|
|
||||||
from .fixtures import *
|
from .fixtures import *
|
||||||
|
|
||||||
def test_depth_flag_is_accepted(tmp_path, process):
|
def test_depth_flag_is_accepted(process):
|
||||||
arg_process = subprocess.run(["archivebox", "add", "https://example.com", "--depth=0"], capture_output=True)
|
arg_process = subprocess.run(["archivebox", "add", "https://example.com", "--depth=0"], capture_output=True)
|
||||||
assert 'unrecognized arguments: --depth' not in arg_process.stderr.decode('utf-8')
|
assert 'unrecognized arguments: --depth' not in arg_process.stderr.decode('utf-8')
|
||||||
|
|
||||||
|
def test_depth_flag_0_crawls_only_the_arg_page(tmp_path, process):
|
||||||
|
arg_process = subprocess.run(["archivebox", "add", "https://example.com", "--depth=0"], capture_output=True)
|
||||||
|
archived_item_path = list(tmp_path.glob('archive/**/*'))[0]
|
||||||
|
with open(archived_item_path / "index.json", "r") as f:
|
||||||
|
output_json = json.load(f)
|
||||||
|
assert output_json["base_url"] == "example.com"
|
Loading…
Reference in a new issue