diff --git a/Cargo.lock b/Cargo.lock index c3825e9..58fb601 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -303,6 +303,7 @@ dependencies = [ "bb8-postgres 0.4.0-pre (git+https://github.com/khuey/bb8.git)", "furaffinity-rs 0.1.0 (git+https://git.huefox.com/syfaro/furaffinity-rs.git)", "futures 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", + "hamming 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", "image 0.22.3 (registry+https://github.com/rust-lang/crates.io-index)", "img_hash 3.0.0 (registry+https://github.com/rust-lang/crates.io-index)", "reqwest 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)", @@ -552,6 +553,11 @@ dependencies = [ "tokio-util 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "hamming" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "heck" version = "0.3.1" @@ -2267,6 +2273,7 @@ dependencies = [ "checksum getrandom 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)" = "7abc8dd8451921606d809ba32e95b6111925cd2906060d2dcc29c070220503eb" "checksum gif 0.10.3 (registry+https://github.com/rust-lang/crates.io-index)" = "471d90201b3b223f3451cd4ad53e34295f16a1df17b1edf3736d47761c3981af" "checksum h2 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b9433d71e471c1736fd5a61b671fc0b148d7a2992f666c958d03cd8feb3b88d1" +"checksum hamming 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "65043da274378d68241eb9a8f8f8aa54e349136f7b8e12f63e3ef44043cc30e1" "checksum heck 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "20564e78d53d2bb135c343b3f47714a56af2061f1c928fdb541dc7b9fdd94205" "checksum hermit-abi 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "eff2656d88f158ce120947499e971d743c05dbcbed62e5bd2f38f1698bbc3772" "checksum hmac 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)" = "5dcb5e64cda4c23119ab41ba960d1e170a774c8e4b9d9e6a9bc18aabf5e59695" diff --git a/Cargo.toml b/Cargo.toml index 148b828..d49cf6f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,6 +8,7 @@ edition = "2018" reqwest = { version = "*", features = ["json"] } tokio = { version = "0.2", features = ["full"] } futures = { version = "*", features = ["thread-pool"] } +hamming = "0.1.3" serde = "*" serde_json = "*" diff --git a/src/bin/load_hashes.rs b/src/bin/load_hashes.rs index 64e95ac..11a1a22 100644 --- a/src/bin/load_hashes.rs +++ b/src/bin/load_hashes.rs @@ -46,10 +46,10 @@ async fn main() { tokio_postgres::NoTls, ); - let pool = match Pool::builder().build(manager).await { - Ok(pool) => pool, - Err(e) => panic!("unable to build pool: {}", e), - }; + let pool = Pool::builder() + .build(manager) + .await + .expect("unable to build pool"); let client = reqwest::Client::builder() .user_agent("Syfaro test client syfaro@huefox.com") @@ -58,7 +58,7 @@ async fn main() { let client = std::sync::Arc::new(client); loop { - println!("getting next 100 posts"); + println!("getting next 384 posts"); let db = pool.clone(); @@ -78,7 +78,7 @@ async fn main() { data->>'file_ext' IN ('jpg', 'png') AND data->>'file_url' <> '/images/deleted-preview.png' ORDER BY id DESC - LIMIT 100", + LIMIT 384", &[], ) .await diff --git a/src/bin/query_hash.rs b/src/bin/query_hash.rs index 393f613..6fb82a2 100644 --- a/src/bin/query_hash.rs +++ b/src/bin/query_hash.rs @@ -1,8 +1,9 @@ #[derive(Debug)] struct Row { id: i32, - artists: Vec, - sources: Vec, + artists: Option>, + sources: Option>, + distance: Option, } async fn get_hash_distance_from_url( @@ -53,6 +54,7 @@ async fn main() { .query( "SELECT post.id id, + post.hash hash, artists_agg.artists artists, sources_agg.sources sources FROM @@ -69,19 +71,36 @@ async fn main() { .await .expect("unable to query") .into_iter() - .map(|row| Row { - id: row.get("id"), - sources: row.get("sources"), - artists: row.get("artists"), + .map(|row| { + let distance = row + .get::<&str, Option>("hash") + .map(|hash| hamming::distance_fast(&hash.to_be_bytes(), &bytes).unwrap()); + + Row { + id: row.get("id"), + sources: row.get("sources"), + artists: row.get("artists"), + distance, + } }); for row in rows { println!( - "Possible match: https://e621.net/post/show/{} by {}", + "Possible match: [distance of {}] https://e621.net/post/show/{} by {}", + row.distance.unwrap_or_else(u64::max_value), row.id, - row.artists.join(", ") + row.artists + .map(|artists| artists.join(", ")) + .unwrap_or_else(|| "unknown".to_string()) ); - for source in row.sources { + let sources = match row.sources { + Some(source) => source, + _ => { + println!("no sources"); + continue; + } + }; + for source in sources { let distance = get_hash_distance_from_url(&client, &source, &hash).await; println!( "- {} (distance of {})", diff --git a/src/bin/update.rs b/src/bin/update.rs index 6635bcc..3b0c169 100644 --- a/src/bin/update.rs +++ b/src/bin/update.rs @@ -73,6 +73,8 @@ async fn main() { .build() .expect("Unable to build http client"); + println!("max is id: {}", max_id); + let mut now; let mut min_id: Option = None; @@ -90,9 +92,10 @@ async fn main() { .expect("Unable to insert"); if let Some(min_id) = min_id { - if min_id >= max_id { + println!("min id is: {}", min_id); + if min_id <= max_id { println!("finished run, {}, {}", min_id, max_id); - break + break; } }