Trim unicode characters from the start of URLs

This commit is contained in:
Sander van Harmelen 2019-01-28 19:28:51 +01:00 committed by Christian Duerr
parent e95c80764e
commit 3562f2c9aa
2 changed files with 12 additions and 4 deletions

View File

@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Resolved off-by-one issue with erasing characters in the last column
- Excessive polling every 100ms with `live_config_reload` enabled
- Unicode characters at the beginning of URLs are now properly ignored
## Version 0.2.7

View File

@ -45,13 +45,19 @@ impl UrlParser {
/// Returns the URL if the parser has found any.
pub fn url(mut self) -> Option<String> {
// Remove non-alphabetical characters before scheme
// Remove non-alphabetical characters before the scheme
// https://tools.ietf.org/html/rfc3986#section-3.1
if let Some(index) = self.state.find("://") {
for i in (0..index - 1).rev() {
match self.state.chars().nth(i).unwrap() {
let iter = self
.state
.char_indices()
.rev()
.skip_while(|(byte_index, _)| *byte_index >= index);
for (byte_index, c) in iter {
match c {
'a'...'z' | 'A'...'Z' => (),
_ => {
self.state = self.state.split_off(i + 1);
self.state = self.state.split_off(byte_index + c.len_utf8());
break;
}
}
@ -206,6 +212,7 @@ mod tests {
url_test("complicated:https://example.org", "https://example.org", 15);
url_test("test.https://example.org", "https://example.org", 10);
url_test(",https://example.org", "https://example.org", 5);
url_test("\u{2502}https://example.org", "https://example.org", 5);
}
#[test]