1
0
Fork 0

Merge pull request #569 from cdvv7788/extract-command-update

feat: Add --extract flag to update command
This commit is contained in:
Nick Sweeting 2020-12-05 17:43:28 -05:00 committed by GitHub
commit 3b280e6b02
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 18 additions and 1 deletions

View file

@ -102,6 +102,13 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
default=None,
help='Update only URLs matching these filter patterns.'
)
parser.add_argument(
"--extract",
type=str,
help="Pass a list of the extractors to be used. If the method name is not correct, it will be ignored. \
This does not take precedence over the configuration",
default=""
)
command = parser.parse_args(args or ())
filter_patterns_str = accept_stdin(stdin)
@ -117,6 +124,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
after=command.after,
before=command.before,
out_dir=pwd or OUTPUT_DIR,
extractors=command.extract,
)

View file

@ -681,6 +681,7 @@ def update(resume: Optional[float]=None,
status: Optional[str]=None,
after: Optional[str]=None,
before: Optional[str]=None,
extractors: str="",
out_dir: Path=OUTPUT_DIR) -> List[Link]:
"""Import any new links from subscriptions and retry any previously failed/skipped links"""
@ -688,6 +689,8 @@ def update(resume: Optional[float]=None,
check_dependencies()
new_links: List[Link] = [] # TODO: Remove input argument: only_new
extractors = extractors.split(",") if extractors else []
# Step 1: Filter for selected_links
matching_snapshots = list_links(
filter_patterns=filter_patterns,
@ -720,7 +723,13 @@ def update(resume: Optional[float]=None,
stderr(f'[√] Nothing found to resume after {resume}', color='green')
return all_links
archive_links(to_archive, overwrite=overwrite, out_dir=out_dir)
archive_kwargs = {
"out_dir": out_dir,
}
if extractors:
archive_kwargs["methods"] = extractors
archive_links(to_archive, overwrite=overwrite, **archive_kwargs)
# Step 4: Re-write links index with updated titles, icons, and resources
all_links = load_main_index(out_dir=out_dir)