From fc75b0cd8eb5800196095fa49f51b16588ce2b46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Guillot?= Date: Sun, 2 Aug 2020 12:16:17 -0700 Subject: [PATCH] Add workaround to get YouTube feed from video page --- reader/subscription/finder.go | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/reader/subscription/finder.go b/reader/subscription/finder.go index 512a087f..62db5a33 100644 --- a/reader/subscription/finder.go +++ b/reader/subscription/finder.go @@ -22,11 +22,13 @@ import ( var ( errUnreadableDoc = "Unable to analyze this page: %v" youtubeChannelRegex = regexp.MustCompile(`youtube\.com/channel/(.*)`) + youtubeVideoRegex = regexp.MustCompile(`youtube\.com/watch\?v=(.*)`) ) // FindSubscriptions downloads and try to find one or more subscriptions from an URL. func FindSubscriptions(websiteURL, userAgent, username, password string) (Subscriptions, *errors.LocalizedError) { websiteURL = findYoutubeChannelFeed(websiteURL) + websiteURL = parseYoutubeVideoPage(websiteURL) request := client.New(websiteURL) request.WithCredentials(username, password) @@ -48,14 +50,15 @@ func FindSubscriptions(websiteURL, userAgent, username, password string) (Subscr return subscriptions, nil } - subscriptions, err := parseDocument(response.EffectiveURL, strings.NewReader(body)) + subscriptions, err := parseWebPage(response.EffectiveURL, strings.NewReader(body)) if err != nil || subscriptions != nil { return subscriptions, err } + return tryWellKnownUrls(websiteURL, userAgent, username, password) } -func parseDocument(websiteURL string, data io.Reader) (Subscriptions, *errors.LocalizedError) { +func parseWebPage(websiteURL string, data io.Reader) (Subscriptions, *errors.LocalizedError) { var subscriptions Subscriptions queries := map[string]string{ "link[type='application/rss+xml']": "rss", @@ -105,6 +108,29 @@ func findYoutubeChannelFeed(websiteURL string) string { return websiteURL } +func parseYoutubeVideoPage(websiteURL string) string { + if !youtubeVideoRegex.MatchString(websiteURL) { + return websiteURL + } + + request := client.New(websiteURL) + response, browserErr := browser.Exec(request) + if browserErr != nil { + return websiteURL + } + + doc, docErr := goquery.NewDocumentFromReader(response.Body) + if docErr != nil { + return websiteURL + } + + if channelID, exists := doc.Find(`meta[itemprop="channelId"]`).First().Attr("content"); exists { + return fmt.Sprintf(`https://www.youtube.com/feeds/videos.xml?channel_id=%s`, channelID) + } + + return websiteURL +} + func tryWellKnownUrls(websiteURL, userAgent, username, password string) (Subscriptions, *errors.LocalizedError) { var subscriptions Subscriptions knownURLs := map[string]string{