Various fixes.

This commit is contained in:
Syfaro 2020-01-12 02:18:12 -06:00
parent 1ce40026b3
commit 8673515ed1
5 changed files with 47 additions and 17 deletions

7
Cargo.lock generated
View File

@ -303,6 +303,7 @@ dependencies = [
"bb8-postgres 0.4.0-pre (git+https://github.com/khuey/bb8.git)",
"furaffinity-rs 0.1.0 (git+https://git.huefox.com/syfaro/furaffinity-rs.git)",
"futures 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
"hamming 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
"image 0.22.3 (registry+https://github.com/rust-lang/crates.io-index)",
"img_hash 3.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
"reqwest 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)",
@ -552,6 +553,11 @@ dependencies = [
"tokio-util 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "hamming"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "heck"
version = "0.3.1"
@ -2267,6 +2273,7 @@ dependencies = [
"checksum getrandom 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)" = "7abc8dd8451921606d809ba32e95b6111925cd2906060d2dcc29c070220503eb"
"checksum gif 0.10.3 (registry+https://github.com/rust-lang/crates.io-index)" = "471d90201b3b223f3451cd4ad53e34295f16a1df17b1edf3736d47761c3981af"
"checksum h2 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b9433d71e471c1736fd5a61b671fc0b148d7a2992f666c958d03cd8feb3b88d1"
"checksum hamming 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "65043da274378d68241eb9a8f8f8aa54e349136f7b8e12f63e3ef44043cc30e1"
"checksum heck 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "20564e78d53d2bb135c343b3f47714a56af2061f1c928fdb541dc7b9fdd94205"
"checksum hermit-abi 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "eff2656d88f158ce120947499e971d743c05dbcbed62e5bd2f38f1698bbc3772"
"checksum hmac 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)" = "5dcb5e64cda4c23119ab41ba960d1e170a774c8e4b9d9e6a9bc18aabf5e59695"

View File

@ -8,6 +8,7 @@ edition = "2018"
reqwest = { version = "*", features = ["json"] }
tokio = { version = "0.2", features = ["full"] }
futures = { version = "*", features = ["thread-pool"] }
hamming = "0.1.3"
serde = "*"
serde_json = "*"

View File

@ -46,10 +46,10 @@ async fn main() {
tokio_postgres::NoTls,
);
let pool = match Pool::builder().build(manager).await {
Ok(pool) => pool,
Err(e) => panic!("unable to build pool: {}", e),
};
let pool = Pool::builder()
.build(manager)
.await
.expect("unable to build pool");
let client = reqwest::Client::builder()
.user_agent("Syfaro test client syfaro@huefox.com")
@ -58,7 +58,7 @@ async fn main() {
let client = std::sync::Arc::new(client);
loop {
println!("getting next 100 posts");
println!("getting next 384 posts");
let db = pool.clone();
@ -78,7 +78,7 @@ async fn main() {
data->>'file_ext' IN ('jpg', 'png') AND
data->>'file_url' <> '/images/deleted-preview.png'
ORDER BY id DESC
LIMIT 100",
LIMIT 384",
&[],
)
.await

View File

@ -1,8 +1,9 @@
#[derive(Debug)]
struct Row {
id: i32,
artists: Vec<String>,
sources: Vec<String>,
artists: Option<Vec<String>>,
sources: Option<Vec<String>>,
distance: Option<u64>,
}
async fn get_hash_distance_from_url(
@ -53,6 +54,7 @@ async fn main() {
.query(
"SELECT
post.id id,
post.hash hash,
artists_agg.artists artists,
sources_agg.sources sources
FROM
@ -69,19 +71,36 @@ async fn main() {
.await
.expect("unable to query")
.into_iter()
.map(|row| Row {
.map(|row| {
let distance = row
.get::<&str, Option<i64>>("hash")
.map(|hash| hamming::distance_fast(&hash.to_be_bytes(), &bytes).unwrap());
Row {
id: row.get("id"),
sources: row.get("sources"),
artists: row.get("artists"),
distance,
}
});
for row in rows {
println!(
"Possible match: https://e621.net/post/show/{} by {}",
"Possible match: [distance of {}] https://e621.net/post/show/{} by {}",
row.distance.unwrap_or_else(u64::max_value),
row.id,
row.artists.join(", ")
row.artists
.map(|artists| artists.join(", "))
.unwrap_or_else(|| "unknown".to_string())
);
for source in row.sources {
let sources = match row.sources {
Some(source) => source,
_ => {
println!("no sources");
continue;
}
};
for source in sources {
let distance = get_hash_distance_from_url(&client, &source, &hash).await;
println!(
"- {} (distance of {})",

View File

@ -73,6 +73,8 @@ async fn main() {
.build()
.expect("Unable to build http client");
println!("max is id: {}", max_id);
let mut now;
let mut min_id: Option<i32> = None;
@ -90,9 +92,10 @@ async fn main() {
.expect("Unable to insert");
if let Some(min_id) = min_id {
if min_id >= max_id {
println!("min id is: {}", min_id);
if min_id <= max_id {
println!("finished run, {}, {}", min_id, max_id);
break
break;
}
}