From 8aec33928398f796486ff3b33086ca119632c012 Mon Sep 17 00:00:00 2001 From: makeworld Date: Sat, 19 Dec 2020 19:41:25 -0500 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20Fix=20invalid=20URLs,=20NFC=20norm.?= =?UTF-8?q?,=20support=20IDNs?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #138 --- CHANGELOG.md | 6 +++++- display/util.go | 23 ++++++++++++++++++++++- display/util_test.go | 5 +++++ go.mod | 5 ++--- go.sum | 13 +++++++++---- 5 files changed, 43 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 74c1c34..04dcf35 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,9 +9,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - **Subscriptions** to feeds and page changes (#61) - Opening local files with `file://` URIs (#103, #117) - `show_link` option added in config to optionally see the URL (#133) +- Support for Unicode in domain names (IDNs) +- Unnecessarily encoded characters in URLs will be decoded (#138) +- URLs are NFC-normalized before any processing (#138) ### Changed -- Updated [go-gemini](https://github.com/makeworld-the-better-one/go-gemini) to v0.10.0 +- Updated [go-gemini](https://github.com/makeworld-the-better-one/go-gemini) to v0.11.0 - Supports CN-only wildcard certs - Time out when header takes too long - Preformatted text is now light yellow by default @@ -20,6 +23,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed - Single quotes are used in the default config for commands and paths so that Windows paths with backslashes will be parsed correctly - Downloading now uses proxies when appropriate +- User-entered URLs with invalid characters will be percent-encoded (#138) ## [1.6.0] - 2020-11-04 diff --git a/display/util.go b/display/util.go index 4ea8e91..7c3d19c 100644 --- a/display/util.go +++ b/display/util.go @@ -5,8 +5,10 @@ import ( "net/url" "strings" + "github.com/makeworld-the-better-one/go-gemini" "github.com/spf13/viper" "gitlab.com/tslocum/cview" + "golang.org/x/text/unicode/norm" ) // This file contains funcs that are small, self-contained utilities. @@ -73,15 +75,23 @@ func resolveRelLink(t *tab, prev, next string) (string, error) { // Example: gemini://gus.guru:1965/ and //gus.guru/. // This function will take both output the same URL each time. // +// It will also percent-encode invalid characters, and decode chars +// that don't need to be encoded. It will also apply Unicode NFC +// normalization. +// // The string passed must already be confirmed to be a URL. // Detection of a search string vs. a URL must happen elsewhere. // // It only works with absolute URLs. func normalizeURL(u string) string { - parsed, err := url.Parse(u) + u = norm.NFC.String(u) + + tmp, err := gemini.GetPunycodeURL(u) if err != nil { return u } + u = tmp + parsed, _ := url.Parse(u) if parsed.Scheme == "" { // Always add scheme @@ -102,6 +112,17 @@ func normalizeURL(u string) string { // gemini://example.com -> gemini://example.com/ if parsed.Path == "" { parsed.Path = "/" + } else { + // Decode and re-encode path + // This removes needless encoding, like that of ASCII chars + // And encodes anything that wasn't but should've been + parsed.RawPath = strings.ReplaceAll(url.PathEscape(parsed.Path), "%2F", "/") + } + + // Do the same to the query string + un, err := gemini.QueryUnescape(parsed.RawQuery) + if err == nil { + parsed.RawQuery = gemini.QueryEscape(un) } return parsed.String() diff --git a/display/util_test.go b/display/util_test.go index 4cc3506..6d092ad 100644 --- a/display/util_test.go +++ b/display/util_test.go @@ -21,6 +21,11 @@ var normalizeURLTests = []struct { {"mailto:example@example.com", "mailto:example@example.com"}, {"magnet:?xt=urn:btih:test", "magnet:?xt=urn:btih:test"}, {"https://example.com", "https://example.com"}, + // Fixing URL tests + {"gemini://gemini.circumlunar.space/%64%6f%63%73/%66%61%71%2e%67%6d%69", "gemini://gemini.circumlunar.space/docs/faq.gmi"}, + {"gemini://example.com/蛸", "gemini://example.com/%E8%9B%B8"}, + {"gemini://gemini.circumlunar.space/%64%6f%63%73/;;.'%66%61%71蛸%2e%67%6d%69", "gemini://gemini.circumlunar.space/docs/%3B%3B.%27faq%E8%9B%B8.gmi"}, + {"gemini://example.com/?%2Ch%64ello蛸", "gemini://example.com/?%2Chdello%E8%9B%B8"}, } func TestNormalizeURL(t *testing.T) { diff --git a/go.mod b/go.mod index 5dab22f..93f8046 100644 --- a/go.mod +++ b/go.mod @@ -7,7 +7,7 @@ require ( github.com/fsnotify/fsnotify v1.4.9 // indirect github.com/gdamore/tcell v1.3.1-0.20200608133353-cb1e5d6fa606 github.com/google/go-cmp v0.5.0 // indirect - github.com/makeworld-the-better-one/go-gemini v0.10.0 + github.com/makeworld-the-better-one/go-gemini v0.11.0 github.com/makeworld-the-better-one/go-isemoji v1.1.0 github.com/makeworld-the-better-one/progressbar/v3 v3.3.5-0.20200710151429-125743e22b4f github.com/mitchellh/go-homedir v1.1.0 @@ -22,8 +22,7 @@ require ( github.com/spf13/viper v1.7.0 github.com/stretchr/testify v1.6.1 gitlab.com/tslocum/cview v1.4.8-0.20200713214710-cc7796c4ca44 - golang.org/x/sys v0.0.0-20200817155316-9781c653f443 // indirect - golang.org/x/text v0.3.3 + golang.org/x/text v0.3.5-0.20201208001344-75a595aef632 gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 // indirect gopkg.in/ini.v1 v1.57.0 // indirect ) diff --git a/go.sum b/go.sum index 8368eef..f1ee053 100644 --- a/go.sum +++ b/go.sum @@ -133,8 +133,8 @@ github.com/lucasb-eyer/go-colorful v1.0.3 h1:QIbQXiugsb+q10B+MI+7DI1oQLdmnep86tW github.com/lucasb-eyer/go-colorful v1.0.3/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0= github.com/magiconair/properties v1.8.1 h1:ZC2Vc7/ZFkGmsVC9KvOjumD+G5lXy2RtTKyzRKO2BQ4= github.com/magiconair/properties v1.8.1/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= -github.com/makeworld-the-better-one/go-gemini v0.10.0 h1:MZYuGD5RcjXD5k+gZLKOs1djKaPDaQrdY0OhdIh637c= -github.com/makeworld-the-better-one/go-gemini v0.10.0/go.mod h1:P7/FbZ+IEIbA/d+A0Y3w2GNgD8SA2AcNv7aDGJbaWG4= +github.com/makeworld-the-better-one/go-gemini v0.11.0 h1:MNGiULJFvcqls9oCy40tE897hDeKvNmEK9i5kRucgQk= +github.com/makeworld-the-better-one/go-gemini v0.11.0/go.mod h1:F+3x+R1xeYK90jMtBq+U+8Sh64r2dHleDZ/en3YgSmg= github.com/makeworld-the-better-one/go-isemoji v1.1.0 h1:wZBHOKB5zAIgaU2vaWnXFDDhatebB8TySrNVxjVV84g= github.com/makeworld-the-better-one/go-isemoji v1.1.0/go.mod h1:FBjkPl9rr0G4vlZCc+Mr+QcnOfGCTbGWYW8/1sp06I0= github.com/makeworld-the-better-one/gofeed v1.1.1-0.20201123002655-c0c6354134fe h1:i3b9Qy5z23DcXRnrsMYcM5s9Ng5VIidM1xZd+szuTsY= @@ -286,6 +286,8 @@ golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200301022130-244492dfa37a h1:GuSPYbZzB5/dcLNCwLQLsg3obCJtX9IJhpXkvY7kzk0= golang.org/x/net v0.0.0-20200301022130-244492dfa37a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20201216054612-986b41b23924 h1:QsnDpLLOKwHBBDa8nDws4DYNc/ryVW2vCpxCs09d4PY= +golang.org/x/net v0.0.0-20201216054612-986b41b23924/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= @@ -311,13 +313,16 @@ golang.org/x/sys v0.0.0-20190626150813-e07cf5db2756/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200610111108-226ff32320da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200817155316-9781c653f443 h1:X18bCaipMcoJGm27Nv7zr4XYPKGUy92GtqboKC2Hxaw= -golang.org/x/sys v0.0.0-20200817155316-9781c653f443/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68 h1:nxC68pudNYkKU6jWhgrqdreuFiOQWj1Fs7T3VrH4Pjw= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3 h1:cokOdA+Jmi5PJGXLlLllQSgYigAEfHXJAERHVMaCc2k= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.5-0.20201208001344-75a595aef632 h1:clKlpQ6BheG1zIRhU2SPRAXpLgol/tqWVEeRkjpsaDI= +golang.org/x/text v0.3.5-0.20201208001344-75a595aef632/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=