diff --git a/archivebox/index/schema.py b/archivebox/index/schema.py index f8d81e34..66baa9d9 100644 --- a/archivebox/index/schema.py +++ b/archivebox/index/schema.py @@ -126,7 +126,7 @@ class Link: assert isinstance(self.url, str) and '://' in self.url assert self.updated is None or isinstance(self.updated, datetime) assert self.title is None or (isinstance(self.title, str) and self.title) - assert self.tags is None or (isinstance(self.tags, str) and self.tags) + assert self.tags is None or isinstance(self.tags, str) assert isinstance(self.sources, list) assert all(isinstance(source, str) and source for source in self.sources) assert isinstance(self.history, dict) @@ -186,7 +186,7 @@ class Link: for key, val in json_info.items() if key in cls.field_names() } - info['updated'] = parse_date(info['updated']) + info['updated'] = parse_date(info.get('updated')) info['sources'] = info.get('sources') or [] json_history = info.get('history') or {} diff --git a/archivebox/parsers/__init__.py b/archivebox/parsers/__init__.py index e3e19c45..4de3f8ab 100644 --- a/archivebox/parsers/__init__.py +++ b/archivebox/parsers/__init__.py @@ -71,11 +71,12 @@ def parse_links(source_file: str) -> Tuple[List[Link], str]: timer.end() return links, parser_name except Exception as err: # noqa + pass # Parsers are tried one by one down the list, and the first one # that succeeds is used. To see why a certain parser was not used # due to error or format incompatibility, uncomment this line: # print('[!] Parser {} failed: {} {}'.format(parser_name, err.__class__.__name__, err)) - pass + # raise timer.end() return [], 'Failed to parse'