Merge pull request #1168 from mAAdhaTTah/add-readwise-reader
This commit is contained in:
commit
16d278fbdb
3 changed files with 127 additions and 0 deletions
|
@ -223,6 +223,8 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
|
||||||
|
|
||||||
'POCKET_CONSUMER_KEY': {'type': str, 'default': None},
|
'POCKET_CONSUMER_KEY': {'type': str, 'default': None},
|
||||||
'POCKET_ACCESS_TOKENS': {'type': dict, 'default': {}},
|
'POCKET_ACCESS_TOKENS': {'type': dict, 'default': {}},
|
||||||
|
|
||||||
|
'READWISE_READER_TOKENS': {'type': dict, 'default': {}},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -34,6 +34,7 @@ from ..index.schema import Link
|
||||||
from ..logging_util import TimedProgress, log_source_saved
|
from ..logging_util import TimedProgress, log_source_saved
|
||||||
|
|
||||||
from . import pocket_api
|
from . import pocket_api
|
||||||
|
from . import readwise_reader_api
|
||||||
from . import wallabag_atom
|
from . import wallabag_atom
|
||||||
from . import pocket_html
|
from . import pocket_html
|
||||||
from . import pinboard_rss
|
from . import pinboard_rss
|
||||||
|
@ -51,6 +52,7 @@ from . import url_list
|
||||||
PARSERS = {
|
PARSERS = {
|
||||||
# Specialized parsers
|
# Specialized parsers
|
||||||
pocket_api.KEY: (pocket_api.NAME, pocket_api.PARSER),
|
pocket_api.KEY: (pocket_api.NAME, pocket_api.PARSER),
|
||||||
|
readwise_reader_api.KEY: (readwise_reader_api.NAME, readwise_reader_api.PARSER),
|
||||||
wallabag_atom.KEY: (wallabag_atom.NAME, wallabag_atom.PARSER),
|
wallabag_atom.KEY: (wallabag_atom.NAME, wallabag_atom.PARSER),
|
||||||
pocket_html.KEY: (pocket_html.NAME, pocket_html.PARSER),
|
pocket_html.KEY: (pocket_html.NAME, pocket_html.PARSER),
|
||||||
pinboard_rss.KEY: (pinboard_rss.NAME, pinboard_rss.PARSER),
|
pinboard_rss.KEY: (pinboard_rss.NAME, pinboard_rss.PARSER),
|
||||||
|
|
123
archivebox/parsers/readwise_reader_api.py
Normal file
123
archivebox/parsers/readwise_reader_api.py
Normal file
|
@ -0,0 +1,123 @@
|
||||||
|
__package__ = "archivebox.parsers"
|
||||||
|
|
||||||
|
|
||||||
|
import re
|
||||||
|
import requests
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
from typing import IO, Iterable, Optional
|
||||||
|
from configparser import ConfigParser
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from ..index.schema import Link
|
||||||
|
from ..util import enforce_types
|
||||||
|
from ..system import atomic_write
|
||||||
|
from ..config import (
|
||||||
|
SOURCES_DIR,
|
||||||
|
READWISE_READER_TOKENS,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
API_DB_PATH = Path(SOURCES_DIR) / "readwise_reader_api.db"
|
||||||
|
|
||||||
|
|
||||||
|
class ReadwiseReaderAPI:
|
||||||
|
cursor: Optional[str]
|
||||||
|
|
||||||
|
def __init__(self, api_token, cursor=None) -> None:
|
||||||
|
self.api_token = api_token
|
||||||
|
self.cursor = cursor
|
||||||
|
|
||||||
|
def get_archive(self):
|
||||||
|
response = requests.get(
|
||||||
|
url="https://readwise.io/api/v3/list/",
|
||||||
|
headers={"Authorization": "Token s71gNtiNDWquEvlJFFUyDU10ao8fn99lGyNryvyllQcDSnrd7X"},
|
||||||
|
params={
|
||||||
|
"location": "archive",
|
||||||
|
"pageCursor": self.cursor,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
response.raise_for_status()
|
||||||
|
return response
|
||||||
|
|
||||||
|
def get_readwise_reader_articles(api: ReadwiseReaderAPI):
|
||||||
|
response = api.get_archive()
|
||||||
|
body = response.json()
|
||||||
|
articles = body["results"]
|
||||||
|
|
||||||
|
yield from articles
|
||||||
|
|
||||||
|
|
||||||
|
if body['nextPageCursor']:
|
||||||
|
api.cursor = body["nextPageCursor"]
|
||||||
|
yield from get_readwise_reader_articles(api)
|
||||||
|
|
||||||
|
|
||||||
|
def link_from_article(article: dict, sources: list):
|
||||||
|
url: str = article['source_url']
|
||||||
|
title = article["title"] or url
|
||||||
|
timestamp = datetime.fromisoformat(article['updated_at']).timestamp()
|
||||||
|
|
||||||
|
return Link(
|
||||||
|
url=url,
|
||||||
|
timestamp=str(timestamp),
|
||||||
|
title=title,
|
||||||
|
tags="",
|
||||||
|
sources=sources,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def write_cursor(username: str, since: str):
|
||||||
|
if not API_DB_PATH.exists():
|
||||||
|
atomic_write(API_DB_PATH, "")
|
||||||
|
|
||||||
|
since_file = ConfigParser()
|
||||||
|
since_file.optionxform = str
|
||||||
|
since_file.read(API_DB_PATH)
|
||||||
|
|
||||||
|
since_file[username] = {"since": since}
|
||||||
|
|
||||||
|
with open(API_DB_PATH, "w+") as new:
|
||||||
|
since_file.write(new)
|
||||||
|
|
||||||
|
|
||||||
|
def read_cursor(username: str) -> Optional[str]:
|
||||||
|
if not API_DB_PATH.exists():
|
||||||
|
atomic_write(API_DB_PATH, "")
|
||||||
|
|
||||||
|
config_file = ConfigParser()
|
||||||
|
config_file.optionxform = str
|
||||||
|
config_file.read(API_DB_PATH)
|
||||||
|
|
||||||
|
return config_file.get(username, "since", fallback=None)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@enforce_types
|
||||||
|
def should_parse_as_readwise_reader_api(text: str) -> bool:
|
||||||
|
return text.startswith("readwise-reader://")
|
||||||
|
|
||||||
|
|
||||||
|
@enforce_types
|
||||||
|
def parse_readwise_reader_api_export(input_buffer: IO[str], **_kwargs) -> Iterable[Link]:
|
||||||
|
"""Parse bookmarks from the Readwise Reader API"""
|
||||||
|
|
||||||
|
input_buffer.seek(0)
|
||||||
|
pattern = re.compile(r"^readwise-reader:\/\/(\w+)")
|
||||||
|
for line in input_buffer:
|
||||||
|
if should_parse_as_readwise_reader_api(line):
|
||||||
|
username = pattern.search(line).group(1)
|
||||||
|
api = ReadwiseReaderAPI(READWISE_READER_TOKENS[username], cursor=read_cursor(username))
|
||||||
|
|
||||||
|
for article in get_readwise_reader_articles(api):
|
||||||
|
yield link_from_article(article, sources=[line])
|
||||||
|
|
||||||
|
if api.cursor:
|
||||||
|
write_cursor(username, api.cursor)
|
||||||
|
|
||||||
|
|
||||||
|
KEY = "readwise_reader_api"
|
||||||
|
NAME = "Readwise Reader API"
|
||||||
|
PARSER = parse_readwise_reader_api_export
|
Loading…
Add table
Reference in a new issue