From 9d41a10148875347bd8a957e1ad28c69d086f75b Mon Sep 17 00:00:00 2001 From: Tom Parker-Shemilt Date: Mon, 24 Feb 2020 23:41:45 +0000 Subject: [PATCH] Cache the results of getting links across Travis build and only re-check every so often --- .gitignore | 3 +- .travis.yml | 15 ++++++- Cargo.toml | 3 +- results/.gitignore | 2 + src/main.rs | 98 +++++++++++++++++++++++++++++++++++----------- 5 files changed, 93 insertions(+), 28 deletions(-) create mode 100644 results/.gitignore diff --git a/.gitignore b/.gitignore index 409fcf2..008f177 100644 --- a/.gitignore +++ b/.gitignore @@ -3,5 +3,4 @@ Cargo.lock target/ cleanup cleanup.exe -ab-results*.json -results.yaml \ No newline at end of file +ab-results*.json \ No newline at end of file diff --git a/.travis.yml b/.travis.yml index 9a290c6..3347450 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,7 +1,18 @@ language: rust script: cargo run -cache: cargo env: global: - - RUST_LOG=warn \ No newline at end of file + - RUST_LOG=warn + +# Cache hints based off of https://gist.github.com/jkcclemens/000456ca646bd502cac0dbddcb8fa307 +cache: + cargo: true + directories: + - $TRAVIS_BUILD_DIR/results # So we don't have to check all the things every time +before_cache: + - rm -rfv target/debug/incremental/awesome_rust-* + - rm -rfv target/debug/.fingerprint/awesome_rust-* + - rm -rfv target/debug/deps/awesome_crate-* + - rm -rfv target/debug/awesome_rust.d + - rm -rfv target/debug/incremental/awesome_rust-* \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml index 21c0a97..e111a0b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,4 +19,5 @@ env_logger = "0.7" async-std = "1" log = "0.4" regex = "1" -scraper = "0.11" \ No newline at end of file +scraper = "0.11" +chrono = { version = "0.4", features = ["serde"] } \ No newline at end of file diff --git a/results/.gitignore b/results/.gitignore new file mode 100644 index 0000000..96a88f2 --- /dev/null +++ b/results/.gitignore @@ -0,0 +1,2 @@ +# Note that this is in a directory so we can cache it with Travis +results.yaml \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index 007d936..caae908 100644 --- a/src/main.rs +++ b/src/main.rs @@ -13,6 +13,7 @@ use reqwest::{Client, redirect::Policy, StatusCode, header, Url}; use regex::Regex; use scraper::{Html, Selector}; use failure::{Fail, Error, format_err}; +use chrono::{Local, DateTime, Duration}; #[derive(Debug, Fail)] enum CheckerError { @@ -161,19 +162,14 @@ fn get_url(url: String) -> BoxFuture<'static, (String, Result, - failed: BTreeMap +struct Link { + last_working: Option>, + updated_at: DateTime, + working: bool, + message: String } -impl Results { - fn new() -> Results { - Results { - working: BTreeSet::new(), - failed: BTreeMap::new() - } - } -} +type Results = BTreeMap; #[tokio::main] async fn main() -> Result<(), Error> { @@ -181,20 +177,28 @@ async fn main() -> Result<(), Error> { let markdown_input = fs::read_to_string("README.md").expect("Can't read README.md"); let parser = Parser::new(&markdown_input); - let mut results: Results = fs::read_to_string("results.yaml") + let mut used: BTreeSet = BTreeSet::new(); + let mut results: Results = fs::read_to_string("results/results.yaml") .map_err(|e| format_err!("{}", e)) .and_then(|x| serde_yaml::from_str(&x).map_err(|e| format_err!("{}", e))) .unwrap_or(Results::new()); - results.failed.clear(); let mut url_checks = vec![]; + let min_between_checks: Duration = Duration::days(1); + let max_allowed_failed: Duration = Duration::days(3); let mut do_check = |url: String| { if !url.starts_with("http") { return; } - if results.working.contains(&url) { - return; + used.insert(url.clone()); + if let Some(link) = results.get(&url) { + if link.working { + let since = Local::now() - link.updated_at; + if since < min_between_checks { + return; + } + } } let check = get_url(url).boxed(); url_checks.push(check); @@ -229,6 +233,13 @@ async fn main() -> Result<(), Error> { } } + let results_keys = results.keys().cloned().collect::>(); + let old_links = results_keys.difference(&used); + for link in old_links { + results.remove(link).unwrap(); + } + fs::write("results/results.yaml", serde_yaml::to_string(&results)?)?; + while url_checks.len() > 0 { debug!("Waiting..."); let ((url, res), _index, remaining) = select_all(url_checks).await; @@ -236,7 +247,19 @@ async fn main() -> Result<(), Error> { match res { Ok(_) => { print!("\u{2714} "); - results.working.insert(url); + if let Some(link) = results.get_mut(&url) { + link.updated_at = Local::now(); + link.last_working = Some(Local::now()); + link.working = true; + link.message = String::from("") + } else { + results.insert(url.clone(), Link { + updated_at: Local::now(), + last_working: Some(Local::now()), + working: true, + message: String::from("") + }); + } }, Err(err) => { print!("\u{2718} "); @@ -258,20 +281,49 @@ async fn main() -> Result<(), Error> { format!("{:?}", err) } }; - results.failed.insert(url, message); + if let Some(link) = results.get_mut(&url) { + link.updated_at = Local::now(); + link.working = false; + link.message = message; + link.last_working = None; + } else { + results.insert(url.clone(), Link { + updated_at: Local::now(), + working: false, + message: message, + last_working: None + }); + } } } std::io::stdout().flush().unwrap(); - fs::write("results.yaml", serde_yaml::to_string(&results)?)?; + fs::write("results/results.yaml", serde_yaml::to_string(&results)?)?; } println!(""); - if results.failed.is_empty() { + let mut failed: u32 = 0; + + for (_url, link) in results.iter() { + if !link.working { + if link.last_working.is_none() { + println!("{}", link.message); + failed +=1; + continue; + } + if let Some(last_working) = link.last_working { + let since = Local::now() - last_working; + if since > max_allowed_failed { + println!("{}", link.message); + failed +=1; + } else { + println!("Failure occurred but only {} ago, so we're not worrying yet: {}", since, link.message); + } + } + } + } + if failed == 0 { println!("No errors!"); Ok(()) } else { - for (_url, error) in &results.failed { - println!("{}", error); - } - Err(format_err!("{} urls with errors", results.failed.len())) + Err(format_err!("{} urls with errors", failed)) } } \ No newline at end of file