From aaac279de09eb1fe48b32fd95e61453f0d602ac4 Mon Sep 17 00:00:00 2001 From: Peter Zhu <peter@peterzhu.ca> Date: Mon, 11 Apr 2022 15:37:29 -0400 Subject: [PATCH] [ruby/rdoc] Only parse valid URLs Only valid characters for URLs should be used for generating URLs. A list of valid characters can be found in sections 2.2 and 2.3 of IETF RFC 3986 (https://www.ietf.org/rfc/rfc3986.txt). https://github.com/ruby/rdoc/commit/2bd8fcdd4f --- lib/rdoc/markup/to_html.rb | 4 +++- test/rdoc/test_rdoc_markup_to_html.rb | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/rdoc/markup/to_html.rb b/lib/rdoc/markup/to_html.rb index 7c2e91cecf..2bfabc8942 100644 --- a/lib/rdoc/markup/to_html.rb +++ b/lib/rdoc/markup/to_html.rb @@ -61,12 +61,14 @@ class RDoc::Markup::ToHtml < RDoc::Markup::Formatter # # These methods are used by regexp handling markup added by RDoc::Markup#add_regexp_handling. + URL_CHARACTERS_REGEXP_STR = /[A-Za-z0-9\-._~:\/\?#\[\]@!$&'\(\)*+,;%=]/.source + ## # Adds regexp handlings. def init_regexp_handlings # external links - @markup.add_regexp_handling(/(?:link:|https?:|mailto:|ftp:|irc:|www\.)\S+\w/, + @markup.add_regexp_handling(/(?:link:|https?:|mailto:|ftp:|irc:|www\.)#{URL_CHARACTERS_REGEXP_STR}+\w/, :HYPERLINK) init_link_notation_regexp_handlings end diff --git a/test/rdoc/test_rdoc_markup_to_html.rb b/test/rdoc/test_rdoc_markup_to_html.rb index b2b21de806..e5d7a35710 100644 --- a/test/rdoc/test_rdoc_markup_to_html.rb +++ b/test/rdoc/test_rdoc_markup_to_html.rb @@ -707,6 +707,7 @@ EXPECTED def test_convert_with_exclude_tag assert_equal "\n<p><code>aaa</code>[:symbol]</p>\n", @to.convert('+aaa+[:symbol]') assert_equal "\n<p><code>aaa[:symbol]</code></p>\n", @to.convert('+aaa[:symbol]+') + assert_equal "\n<p><code>https:</code>-foobar</p>\n", @to.convert('<tt>https:</tt>-foobar') assert_equal "\n<p><a href=\":symbol\">aaa</a></p>\n", @to.convert('aaa[:symbol]') end