Trim unicode characters from the start of URLs

This commit is contained in:
Sander van Harmelen 2019-01-28 19:28:51 +01:00 committed by Christian Duerr
parent e95c80764e
commit 3562f2c9aa
2 changed files with 12 additions and 4 deletions

View File

@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Resolved off-by-one issue with erasing characters in the last column - Resolved off-by-one issue with erasing characters in the last column
- Excessive polling every 100ms with `live_config_reload` enabled - Excessive polling every 100ms with `live_config_reload` enabled
- Unicode characters at the beginning of URLs are now properly ignored
## Version 0.2.7 ## Version 0.2.7

View File

@ -45,13 +45,19 @@ impl UrlParser {
/// Returns the URL if the parser has found any. /// Returns the URL if the parser has found any.
pub fn url(mut self) -> Option<String> { pub fn url(mut self) -> Option<String> {
// Remove non-alphabetical characters before scheme // Remove non-alphabetical characters before the scheme
// https://tools.ietf.org/html/rfc3986#section-3.1
if let Some(index) = self.state.find("://") { if let Some(index) = self.state.find("://") {
for i in (0..index - 1).rev() { let iter = self
match self.state.chars().nth(i).unwrap() { .state
.char_indices()
.rev()
.skip_while(|(byte_index, _)| *byte_index >= index);
for (byte_index, c) in iter {
match c {
'a'...'z' | 'A'...'Z' => (), 'a'...'z' | 'A'...'Z' => (),
_ => { _ => {
self.state = self.state.split_off(i + 1); self.state = self.state.split_off(byte_index + c.len_utf8());
break; break;
} }
} }
@ -206,6 +212,7 @@ mod tests {
url_test("complicated:https://example.org", "https://example.org", 15); url_test("complicated:https://example.org", "https://example.org", 15);
url_test("test.https://example.org", "https://example.org", 10); url_test("test.https://example.org", "https://example.org", 10);
url_test(",https://example.org", "https://example.org", 5); url_test(",https://example.org", "https://example.org", 5);
url_test("\u{2502}https://example.org", "https://example.org", 5);
} }
#[test] #[test]