1
0
Fork 0

Merge pull request #1205 from overhacked/fix_url_regex_hyphen

This commit is contained in:
Nick Sweeting 2023-08-09 14:20:07 -07:00 committed by GitHub
commit a7d7644dca
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 5 additions and 1 deletions

View file

@ -233,6 +233,10 @@ _test_url_strs = {
'https://example.com/?what=1#how-about-this=1&2%20baf': 1, 'https://example.com/?what=1#how-about-this=1&2%20baf': 1,
'https://example.com?what=1#how-about-this=1&2%20baf': 1, 'https://example.com?what=1#how-about-this=1&2%20baf': 1,
'<test>http://example7.com</test>': 1, '<test>http://example7.com</test>': 1,
'https://<test>': 0,
'https://[test]': 0,
'http://"test"': 0,
'http://\'test\'': 0,
'[https://example8.com/what/is/this.php?what=1]': 1, '[https://example8.com/what/is/this.php?what=1]': 1,
'[and http://example9.com?what=1&other=3#and-thing=2]': 1, '[and http://example9.com?what=1&other=3#and-thing=2]': 1,
'<what>https://example10.com#and-thing=2 "</about>': 1, '<what>https://example10.com#and-thing=2 "</about>': 1,

View file

@ -59,7 +59,7 @@ URL_REGEX = re.compile(
r'(?=(' r'(?=('
r'http[s]?://' # start matching from allowed schemes r'http[s]?://' # start matching from allowed schemes
r'(?:[a-zA-Z]|[0-9]' # followed by allowed alphanum characters r'(?:[a-zA-Z]|[0-9]' # followed by allowed alphanum characters
r'|[$-_@.&+]|[!*\(\),]' # or allowed symbols r'|[-_$@.&+!*\(\),]' # or allowed symbols (keep hyphen first to match literal hyphen)
r'|(?:%[0-9a-fA-F][0-9a-fA-F]))' # or allowed unicode bytes r'|(?:%[0-9a-fA-F][0-9a-fA-F]))' # or allowed unicode bytes
r'[^\]\[\(\)<>"\'\s]+' # stop parsing at these symbols r'[^\]\[\(\)<>"\'\s]+' # stop parsing at these symbols
r'))', r'))',