Add Youtube videos in Quanta articles
Some articles (especially the recent year-in-review ones) include a Youtube video. The server-side rendered articles do not include the Youtube iframe, but they do have a script that looks like <script type="text/javascript" data-reactid="6"> window.__APOLLO_STATE__ = { ... youtube_id: "9uASADiYe_8", We add a reformatting function that tries to detect obvious JavaScript code that has a field or variable called youtube_id that has an 11-character double-quoted value, and adds the referenced Youtube videos in the beginning of the article. This is slightly more general than needed for Quanta, in the hope that it could be useful for similar sites.
This commit is contained in:
parent
dcf87bd642
commit
bb0d2bf675
4 changed files with 26 additions and 6 deletions
|
@ -15,10 +15,11 @@ import (
|
|||
)
|
||||
|
||||
var (
|
||||
youtubeRegex = regexp.MustCompile(`youtube\.com/watch\?v=(.*)`)
|
||||
invidioRegex = regexp.MustCompile(`https?:\/\/(.*)\/watch\?v=(.*)`)
|
||||
imgRegex = regexp.MustCompile(`<img [^>]+>`)
|
||||
textLinkRegex = regexp.MustCompile(`(?mi)(\bhttps?:\/\/[-A-Z0-9+&@#\/%?=~_|!:,.;]*[-A-Z0-9+&@#\/%=~_|])`)
|
||||
youtubeRegex = regexp.MustCompile(`youtube\.com/watch\?v=(.*)`)
|
||||
youtubeIdRegex = regexp.MustCompile(`youtube_id"?\s*[:=]\s*"([a-zA-Z0-9_-]{11})"`)
|
||||
invidioRegex = regexp.MustCompile(`https?:\/\/(.*)\/watch\?v=(.*)`)
|
||||
imgRegex = regexp.MustCompile(`<img [^>]+>`)
|
||||
textLinkRegex = regexp.MustCompile(`(?mi)(\bhttps?:\/\/[-A-Z0-9+&@#\/%?=~_|!:,.;]*[-A-Z0-9+&@#\/%=~_|])`)
|
||||
)
|
||||
|
||||
func addImageTitle(entryURL, entryContent string) string {
|
||||
|
@ -219,6 +220,23 @@ func addYoutubeVideoUsingInvidiousPlayer(entryURL, entryContent string) string {
|
|||
return entryContent
|
||||
}
|
||||
|
||||
func addYoutubeVideoFromId(entryContent string) string {
|
||||
matches := youtubeIdRegex.FindAllStringSubmatch(entryContent, -1)
|
||||
if matches == nil {
|
||||
return entryContent
|
||||
}
|
||||
sb := strings.Builder{}
|
||||
for _, match := range matches {
|
||||
if len(match) == 2 {
|
||||
sb.WriteString(`<iframe width="650" height="350" frameborder="0" src="https://www.youtube-nocookie.com/embed/`)
|
||||
sb.WriteString(match[1])
|
||||
sb.WriteString(`" allowfullscreen></iframe><br>`)
|
||||
}
|
||||
}
|
||||
sb.WriteString(entryContent)
|
||||
return sb.String()
|
||||
}
|
||||
|
||||
func addInvidiousVideo(entryURL, entryContent string) string {
|
||||
matches := invidioRegex.FindStringSubmatch(entryURL)
|
||||
if len(matches) == 3 {
|
||||
|
|
|
@ -74,6 +74,8 @@ func applyRule(entryURL, entryContent string, rule rule) string {
|
|||
entryContent = addInvidiousVideo(entryURL, entryContent)
|
||||
case "add_youtube_video_using_invidious_player":
|
||||
entryContent = addYoutubeVideoUsingInvidiousPlayer(entryURL, entryContent)
|
||||
case "add_youtube_video_from_id":
|
||||
entryContent = addYoutubeVideoFromId(entryContent)
|
||||
case "add_pdf_download_link":
|
||||
entryContent = addPDFLink(entryURL, entryContent)
|
||||
case "nl2br":
|
||||
|
|
|
@ -26,7 +26,7 @@ var predefinedRules = map[string]string{
|
|||
"oglaf.com": "add_image_title",
|
||||
"optipess.com": "add_image_title",
|
||||
"peebleslab.com": "add_image_title",
|
||||
"quantamagazine.org": `remove("h6:not(.byline,.post__title__kicker), #comments, .next-post__content, .footer__section, figure .outer--content")`,
|
||||
"quantamagazine.org": `add_youtube_video_from_id, remove("h6:not(.byline,.post__title__kicker), #comments, .next-post__content, .footer__section, figure .outer--content, script")`,
|
||||
"sentfromthemoon.com": "add_image_title",
|
||||
"thedoghousediaries.com": "add_image_title",
|
||||
"treelobsters.com": "add_image_title",
|
||||
|
|
|
@ -33,7 +33,7 @@ var predefinedRules = map[string]string{
|
|||
"osnews.com": "div.newscontent1",
|
||||
"phoronix.com": "div.content",
|
||||
"pseudo-sciences.org": "#art_main",
|
||||
"quantamagazine.org": ".outer--content, figure",
|
||||
"quantamagazine.org": ".outer--content, figure, script",
|
||||
"raywenderlich.com": "article",
|
||||
"slate.fr": ".field-items",
|
||||
"techcrunch.com": "div.article-entry",
|
||||
|
|
Loading…
Reference in a new issue