Merge pull request #1205 from overhacked/fix_url_regex_hyphen
This commit is contained in:
commit
a7d7644dca
2 changed files with 5 additions and 1 deletions
|
@ -233,6 +233,10 @@ _test_url_strs = {
|
||||||
'https://example.com/?what=1#how-about-this=1&2%20baf': 1,
|
'https://example.com/?what=1#how-about-this=1&2%20baf': 1,
|
||||||
'https://example.com?what=1#how-about-this=1&2%20baf': 1,
|
'https://example.com?what=1#how-about-this=1&2%20baf': 1,
|
||||||
'<test>http://example7.com</test>': 1,
|
'<test>http://example7.com</test>': 1,
|
||||||
|
'https://<test>': 0,
|
||||||
|
'https://[test]': 0,
|
||||||
|
'http://"test"': 0,
|
||||||
|
'http://\'test\'': 0,
|
||||||
'[https://example8.com/what/is/this.php?what=1]': 1,
|
'[https://example8.com/what/is/this.php?what=1]': 1,
|
||||||
'[and http://example9.com?what=1&other=3#and-thing=2]': 1,
|
'[and http://example9.com?what=1&other=3#and-thing=2]': 1,
|
||||||
'<what>https://example10.com#and-thing=2 "</about>': 1,
|
'<what>https://example10.com#and-thing=2 "</about>': 1,
|
||||||
|
|
|
@ -59,7 +59,7 @@ URL_REGEX = re.compile(
|
||||||
r'(?=('
|
r'(?=('
|
||||||
r'http[s]?://' # start matching from allowed schemes
|
r'http[s]?://' # start matching from allowed schemes
|
||||||
r'(?:[a-zA-Z]|[0-9]' # followed by allowed alphanum characters
|
r'(?:[a-zA-Z]|[0-9]' # followed by allowed alphanum characters
|
||||||
r'|[$-_@.&+]|[!*\(\),]' # or allowed symbols
|
r'|[-_$@.&+!*\(\),]' # or allowed symbols (keep hyphen first to match literal hyphen)
|
||||||
r'|(?:%[0-9a-fA-F][0-9a-fA-F]))' # or allowed unicode bytes
|
r'|(?:%[0-9a-fA-F][0-9a-fA-F]))' # or allowed unicode bytes
|
||||||
r'[^\]\[\(\)<>"\'\s]+' # stop parsing at these symbols
|
r'[^\]\[\(\)<>"\'\s]+' # stop parsing at these symbols
|
||||||
r'))',
|
r'))',
|
||||||
|
|
Loading…
Reference in a new issue