Trim unicode characters from the start of URLs
This commit is contained in:
parent
e95c80764e
commit
3562f2c9aa
|
@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||||
|
|
||||||
- Resolved off-by-one issue with erasing characters in the last column
|
- Resolved off-by-one issue with erasing characters in the last column
|
||||||
- Excessive polling every 100ms with `live_config_reload` enabled
|
- Excessive polling every 100ms with `live_config_reload` enabled
|
||||||
|
- Unicode characters at the beginning of URLs are now properly ignored
|
||||||
|
|
||||||
## Version 0.2.7
|
## Version 0.2.7
|
||||||
|
|
||||||
|
|
15
src/url.rs
15
src/url.rs
|
@ -45,13 +45,19 @@ impl UrlParser {
|
||||||
|
|
||||||
/// Returns the URL if the parser has found any.
|
/// Returns the URL if the parser has found any.
|
||||||
pub fn url(mut self) -> Option<String> {
|
pub fn url(mut self) -> Option<String> {
|
||||||
// Remove non-alphabetical characters before scheme
|
// Remove non-alphabetical characters before the scheme
|
||||||
|
// https://tools.ietf.org/html/rfc3986#section-3.1
|
||||||
if let Some(index) = self.state.find("://") {
|
if let Some(index) = self.state.find("://") {
|
||||||
for i in (0..index - 1).rev() {
|
let iter = self
|
||||||
match self.state.chars().nth(i).unwrap() {
|
.state
|
||||||
|
.char_indices()
|
||||||
|
.rev()
|
||||||
|
.skip_while(|(byte_index, _)| *byte_index >= index);
|
||||||
|
for (byte_index, c) in iter {
|
||||||
|
match c {
|
||||||
'a'...'z' | 'A'...'Z' => (),
|
'a'...'z' | 'A'...'Z' => (),
|
||||||
_ => {
|
_ => {
|
||||||
self.state = self.state.split_off(i + 1);
|
self.state = self.state.split_off(byte_index + c.len_utf8());
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -206,6 +212,7 @@ mod tests {
|
||||||
url_test("complicated:https://example.org", "https://example.org", 15);
|
url_test("complicated:https://example.org", "https://example.org", 15);
|
||||||
url_test("test.https://example.org", "https://example.org", 10);
|
url_test("test.https://example.org", "https://example.org", 10);
|
||||||
url_test(",https://example.org", "https://example.org", 5);
|
url_test(",https://example.org", "https://example.org", 5);
|
||||||
|
url_test("\u{2502}https://example.org", "https://example.org", 5);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
Loading…
Reference in New Issue