mirror of
https://github.com/Syfaro/fuzzysearch.git
synced 2024-11-10 17:02:38 +00:00
Various fixes.
This commit is contained in:
parent
1ce40026b3
commit
8673515ed1
7
Cargo.lock
generated
7
Cargo.lock
generated
@ -303,6 +303,7 @@ dependencies = [
|
|||||||
"bb8-postgres 0.4.0-pre (git+https://github.com/khuey/bb8.git)",
|
"bb8-postgres 0.4.0-pre (git+https://github.com/khuey/bb8.git)",
|
||||||
"furaffinity-rs 0.1.0 (git+https://git.huefox.com/syfaro/furaffinity-rs.git)",
|
"furaffinity-rs 0.1.0 (git+https://git.huefox.com/syfaro/furaffinity-rs.git)",
|
||||||
"futures 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
"futures 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
"hamming 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"image 0.22.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
"image 0.22.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"img_hash 3.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
"img_hash 3.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"reqwest 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
"reqwest 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
@ -552,6 +553,11 @@ dependencies = [
|
|||||||
"tokio-util 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
"tokio-util 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "hamming"
|
||||||
|
version = "0.1.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "heck"
|
name = "heck"
|
||||||
version = "0.3.1"
|
version = "0.3.1"
|
||||||
@ -2267,6 +2273,7 @@ dependencies = [
|
|||||||
"checksum getrandom 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)" = "7abc8dd8451921606d809ba32e95b6111925cd2906060d2dcc29c070220503eb"
|
"checksum getrandom 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)" = "7abc8dd8451921606d809ba32e95b6111925cd2906060d2dcc29c070220503eb"
|
||||||
"checksum gif 0.10.3 (registry+https://github.com/rust-lang/crates.io-index)" = "471d90201b3b223f3451cd4ad53e34295f16a1df17b1edf3736d47761c3981af"
|
"checksum gif 0.10.3 (registry+https://github.com/rust-lang/crates.io-index)" = "471d90201b3b223f3451cd4ad53e34295f16a1df17b1edf3736d47761c3981af"
|
||||||
"checksum h2 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b9433d71e471c1736fd5a61b671fc0b148d7a2992f666c958d03cd8feb3b88d1"
|
"checksum h2 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b9433d71e471c1736fd5a61b671fc0b148d7a2992f666c958d03cd8feb3b88d1"
|
||||||
|
"checksum hamming 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "65043da274378d68241eb9a8f8f8aa54e349136f7b8e12f63e3ef44043cc30e1"
|
||||||
"checksum heck 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "20564e78d53d2bb135c343b3f47714a56af2061f1c928fdb541dc7b9fdd94205"
|
"checksum heck 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "20564e78d53d2bb135c343b3f47714a56af2061f1c928fdb541dc7b9fdd94205"
|
||||||
"checksum hermit-abi 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "eff2656d88f158ce120947499e971d743c05dbcbed62e5bd2f38f1698bbc3772"
|
"checksum hermit-abi 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "eff2656d88f158ce120947499e971d743c05dbcbed62e5bd2f38f1698bbc3772"
|
||||||
"checksum hmac 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)" = "5dcb5e64cda4c23119ab41ba960d1e170a774c8e4b9d9e6a9bc18aabf5e59695"
|
"checksum hmac 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)" = "5dcb5e64cda4c23119ab41ba960d1e170a774c8e4b9d9e6a9bc18aabf5e59695"
|
||||||
|
@ -8,6 +8,7 @@ edition = "2018"
|
|||||||
reqwest = { version = "*", features = ["json"] }
|
reqwest = { version = "*", features = ["json"] }
|
||||||
tokio = { version = "0.2", features = ["full"] }
|
tokio = { version = "0.2", features = ["full"] }
|
||||||
futures = { version = "*", features = ["thread-pool"] }
|
futures = { version = "*", features = ["thread-pool"] }
|
||||||
|
hamming = "0.1.3"
|
||||||
|
|
||||||
serde = "*"
|
serde = "*"
|
||||||
serde_json = "*"
|
serde_json = "*"
|
||||||
|
@ -46,10 +46,10 @@ async fn main() {
|
|||||||
tokio_postgres::NoTls,
|
tokio_postgres::NoTls,
|
||||||
);
|
);
|
||||||
|
|
||||||
let pool = match Pool::builder().build(manager).await {
|
let pool = Pool::builder()
|
||||||
Ok(pool) => pool,
|
.build(manager)
|
||||||
Err(e) => panic!("unable to build pool: {}", e),
|
.await
|
||||||
};
|
.expect("unable to build pool");
|
||||||
|
|
||||||
let client = reqwest::Client::builder()
|
let client = reqwest::Client::builder()
|
||||||
.user_agent("Syfaro test client syfaro@huefox.com")
|
.user_agent("Syfaro test client syfaro@huefox.com")
|
||||||
@ -58,7 +58,7 @@ async fn main() {
|
|||||||
let client = std::sync::Arc::new(client);
|
let client = std::sync::Arc::new(client);
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
println!("getting next 100 posts");
|
println!("getting next 384 posts");
|
||||||
|
|
||||||
let db = pool.clone();
|
let db = pool.clone();
|
||||||
|
|
||||||
@ -78,7 +78,7 @@ async fn main() {
|
|||||||
data->>'file_ext' IN ('jpg', 'png') AND
|
data->>'file_ext' IN ('jpg', 'png') AND
|
||||||
data->>'file_url' <> '/images/deleted-preview.png'
|
data->>'file_url' <> '/images/deleted-preview.png'
|
||||||
ORDER BY id DESC
|
ORDER BY id DESC
|
||||||
LIMIT 100",
|
LIMIT 384",
|
||||||
&[],
|
&[],
|
||||||
)
|
)
|
||||||
.await
|
.await
|
||||||
|
@ -1,8 +1,9 @@
|
|||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
struct Row {
|
struct Row {
|
||||||
id: i32,
|
id: i32,
|
||||||
artists: Vec<String>,
|
artists: Option<Vec<String>>,
|
||||||
sources: Vec<String>,
|
sources: Option<Vec<String>>,
|
||||||
|
distance: Option<u64>,
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn get_hash_distance_from_url(
|
async fn get_hash_distance_from_url(
|
||||||
@ -53,6 +54,7 @@ async fn main() {
|
|||||||
.query(
|
.query(
|
||||||
"SELECT
|
"SELECT
|
||||||
post.id id,
|
post.id id,
|
||||||
|
post.hash hash,
|
||||||
artists_agg.artists artists,
|
artists_agg.artists artists,
|
||||||
sources_agg.sources sources
|
sources_agg.sources sources
|
||||||
FROM
|
FROM
|
||||||
@ -69,19 +71,36 @@ async fn main() {
|
|||||||
.await
|
.await
|
||||||
.expect("unable to query")
|
.expect("unable to query")
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|row| Row {
|
.map(|row| {
|
||||||
|
let distance = row
|
||||||
|
.get::<&str, Option<i64>>("hash")
|
||||||
|
.map(|hash| hamming::distance_fast(&hash.to_be_bytes(), &bytes).unwrap());
|
||||||
|
|
||||||
|
Row {
|
||||||
id: row.get("id"),
|
id: row.get("id"),
|
||||||
sources: row.get("sources"),
|
sources: row.get("sources"),
|
||||||
artists: row.get("artists"),
|
artists: row.get("artists"),
|
||||||
|
distance,
|
||||||
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
for row in rows {
|
for row in rows {
|
||||||
println!(
|
println!(
|
||||||
"Possible match: https://e621.net/post/show/{} by {}",
|
"Possible match: [distance of {}] https://e621.net/post/show/{} by {}",
|
||||||
|
row.distance.unwrap_or_else(u64::max_value),
|
||||||
row.id,
|
row.id,
|
||||||
row.artists.join(", ")
|
row.artists
|
||||||
|
.map(|artists| artists.join(", "))
|
||||||
|
.unwrap_or_else(|| "unknown".to_string())
|
||||||
);
|
);
|
||||||
for source in row.sources {
|
let sources = match row.sources {
|
||||||
|
Some(source) => source,
|
||||||
|
_ => {
|
||||||
|
println!("no sources");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
for source in sources {
|
||||||
let distance = get_hash_distance_from_url(&client, &source, &hash).await;
|
let distance = get_hash_distance_from_url(&client, &source, &hash).await;
|
||||||
println!(
|
println!(
|
||||||
"- {} (distance of {})",
|
"- {} (distance of {})",
|
||||||
|
@ -73,6 +73,8 @@ async fn main() {
|
|||||||
.build()
|
.build()
|
||||||
.expect("Unable to build http client");
|
.expect("Unable to build http client");
|
||||||
|
|
||||||
|
println!("max is id: {}", max_id);
|
||||||
|
|
||||||
let mut now;
|
let mut now;
|
||||||
let mut min_id: Option<i32> = None;
|
let mut min_id: Option<i32> = None;
|
||||||
|
|
||||||
@ -90,9 +92,10 @@ async fn main() {
|
|||||||
.expect("Unable to insert");
|
.expect("Unable to insert");
|
||||||
|
|
||||||
if let Some(min_id) = min_id {
|
if let Some(min_id) = min_id {
|
||||||
if min_id >= max_id {
|
println!("min id is: {}", min_id);
|
||||||
|
if min_id <= max_id {
|
||||||
println!("finished run, {}, {}", min_id, max_id);
|
println!("finished run, {}, {}", min_id, max_id);
|
||||||
break
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user