Merge pull request #536 from jdcaballerov/tag-list-filter
This commit is contained in:
commit
79a936835c
4 changed files with 26 additions and 2 deletions
|
@ -98,7 +98,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'--filter-type',
|
'--filter-type',
|
||||||
type=str,
|
type=str,
|
||||||
choices=('exact', 'substring', 'domain', 'regex'),
|
choices=('exact', 'substring', 'domain', 'regex','tag'),
|
||||||
default='exact',
|
default='exact',
|
||||||
help='Type of pattern matching to use when filtering URLs',
|
help='Type of pattern matching to use when filtering URLs',
|
||||||
)
|
)
|
||||||
|
|
|
@ -50,7 +50,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'--filter-type',
|
'--filter-type',
|
||||||
type=str,
|
type=str,
|
||||||
choices=('exact', 'substring', 'domain', 'regex'),
|
choices=('exact', 'substring', 'domain', 'regex','tag'),
|
||||||
default='exact',
|
default='exact',
|
||||||
help='Type of pattern matching to use when filtering URLs',
|
help='Type of pattern matching to use when filtering URLs',
|
||||||
)
|
)
|
||||||
|
|
|
@ -361,6 +361,7 @@ LINK_FILTERS = {
|
||||||
'substring': lambda pattern: Q(url__icontains=pattern),
|
'substring': lambda pattern: Q(url__icontains=pattern),
|
||||||
'regex': lambda pattern: Q(url__iregex=pattern),
|
'regex': lambda pattern: Q(url__iregex=pattern),
|
||||||
'domain': lambda pattern: Q(url__istartswith=f"http://{pattern}") | Q(url__istartswith=f"https://{pattern}") | Q(url__istartswith=f"ftp://{pattern}"),
|
'domain': lambda pattern: Q(url__istartswith=f"http://{pattern}") | Q(url__istartswith=f"https://{pattern}") | Q(url__istartswith=f"ftp://{pattern}"),
|
||||||
|
'tag': lambda pattern: Q(tags__name=pattern),
|
||||||
}
|
}
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
|
|
|
@ -70,6 +70,29 @@ def test_remove_domain(tmp_path, process, disable_extractors_dict):
|
||||||
|
|
||||||
assert count == 0
|
assert count == 0
|
||||||
|
|
||||||
|
|
||||||
|
def test_remove_tag(tmp_path, process, disable_extractors_dict):
|
||||||
|
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True, env=disable_extractors_dict)
|
||||||
|
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/iana.org.html'], capture_output=True, env=disable_extractors_dict)
|
||||||
|
assert list((tmp_path / "archive").iterdir()) != []
|
||||||
|
|
||||||
|
conn = sqlite3.connect("index.sqlite3")
|
||||||
|
c = conn.cursor()
|
||||||
|
c.execute("INSERT INTO core_tag (id, name, slug) VALUES (2, 'test-tag', 'test-tag')")
|
||||||
|
snapshot_ids = c.execute("SELECT id from core_snapshot")
|
||||||
|
c.executemany('INSERT INTO core_snapshot_tags (snapshot_id, tag_id) VALUES (?, 2)', list(snapshot_ids))
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
remove_process = subprocess.run(['archivebox', 'remove', '--filter-type=tag', 'test-tag', '--yes', '--delete'], capture_output=True)
|
||||||
|
|
||||||
|
assert len(list((tmp_path / "archive").iterdir())) == 0
|
||||||
|
|
||||||
|
count = c.execute("SELECT COUNT() from core_snapshot").fetchone()[0]
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
assert count == 0
|
||||||
|
|
||||||
def test_remove_before(tmp_path, process, disable_extractors_dict):
|
def test_remove_before(tmp_path, process, disable_extractors_dict):
|
||||||
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True, env=disable_extractors_dict)
|
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True, env=disable_extractors_dict)
|
||||||
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/iana.org.html'], capture_output=True, env=disable_extractors_dict)
|
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/iana.org.html'], capture_output=True, env=disable_extractors_dict)
|
||||||
|
|
Loading…
Reference in a new issue