mirror of
https://github.com/Syfaro/fuzzysearch.git
synced 2024-12-24 14:25:35 +00:00
Various fixes.
This commit is contained in:
parent
1ce40026b3
commit
8673515ed1
7
Cargo.lock
generated
7
Cargo.lock
generated
@ -303,6 +303,7 @@ dependencies = [
|
||||
"bb8-postgres 0.4.0-pre (git+https://github.com/khuey/bb8.git)",
|
||||
"furaffinity-rs 0.1.0 (git+https://git.huefox.com/syfaro/furaffinity-rs.git)",
|
||||
"futures 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"hamming 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"image 0.22.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"img_hash 3.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"reqwest 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
@ -552,6 +553,11 @@ dependencies = [
|
||||
"tokio-util 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hamming"
|
||||
version = "0.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "heck"
|
||||
version = "0.3.1"
|
||||
@ -2267,6 +2273,7 @@ dependencies = [
|
||||
"checksum getrandom 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)" = "7abc8dd8451921606d809ba32e95b6111925cd2906060d2dcc29c070220503eb"
|
||||
"checksum gif 0.10.3 (registry+https://github.com/rust-lang/crates.io-index)" = "471d90201b3b223f3451cd4ad53e34295f16a1df17b1edf3736d47761c3981af"
|
||||
"checksum h2 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b9433d71e471c1736fd5a61b671fc0b148d7a2992f666c958d03cd8feb3b88d1"
|
||||
"checksum hamming 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "65043da274378d68241eb9a8f8f8aa54e349136f7b8e12f63e3ef44043cc30e1"
|
||||
"checksum heck 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "20564e78d53d2bb135c343b3f47714a56af2061f1c928fdb541dc7b9fdd94205"
|
||||
"checksum hermit-abi 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "eff2656d88f158ce120947499e971d743c05dbcbed62e5bd2f38f1698bbc3772"
|
||||
"checksum hmac 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)" = "5dcb5e64cda4c23119ab41ba960d1e170a774c8e4b9d9e6a9bc18aabf5e59695"
|
||||
|
@ -8,6 +8,7 @@ edition = "2018"
|
||||
reqwest = { version = "*", features = ["json"] }
|
||||
tokio = { version = "0.2", features = ["full"] }
|
||||
futures = { version = "*", features = ["thread-pool"] }
|
||||
hamming = "0.1.3"
|
||||
|
||||
serde = "*"
|
||||
serde_json = "*"
|
||||
|
@ -46,10 +46,10 @@ async fn main() {
|
||||
tokio_postgres::NoTls,
|
||||
);
|
||||
|
||||
let pool = match Pool::builder().build(manager).await {
|
||||
Ok(pool) => pool,
|
||||
Err(e) => panic!("unable to build pool: {}", e),
|
||||
};
|
||||
let pool = Pool::builder()
|
||||
.build(manager)
|
||||
.await
|
||||
.expect("unable to build pool");
|
||||
|
||||
let client = reqwest::Client::builder()
|
||||
.user_agent("Syfaro test client syfaro@huefox.com")
|
||||
@ -58,7 +58,7 @@ async fn main() {
|
||||
let client = std::sync::Arc::new(client);
|
||||
|
||||
loop {
|
||||
println!("getting next 100 posts");
|
||||
println!("getting next 384 posts");
|
||||
|
||||
let db = pool.clone();
|
||||
|
||||
@ -78,7 +78,7 @@ async fn main() {
|
||||
data->>'file_ext' IN ('jpg', 'png') AND
|
||||
data->>'file_url' <> '/images/deleted-preview.png'
|
||||
ORDER BY id DESC
|
||||
LIMIT 100",
|
||||
LIMIT 384",
|
||||
&[],
|
||||
)
|
||||
.await
|
||||
|
@ -1,8 +1,9 @@
|
||||
#[derive(Debug)]
|
||||
struct Row {
|
||||
id: i32,
|
||||
artists: Vec<String>,
|
||||
sources: Vec<String>,
|
||||
artists: Option<Vec<String>>,
|
||||
sources: Option<Vec<String>>,
|
||||
distance: Option<u64>,
|
||||
}
|
||||
|
||||
async fn get_hash_distance_from_url(
|
||||
@ -53,6 +54,7 @@ async fn main() {
|
||||
.query(
|
||||
"SELECT
|
||||
post.id id,
|
||||
post.hash hash,
|
||||
artists_agg.artists artists,
|
||||
sources_agg.sources sources
|
||||
FROM
|
||||
@ -69,19 +71,36 @@ async fn main() {
|
||||
.await
|
||||
.expect("unable to query")
|
||||
.into_iter()
|
||||
.map(|row| Row {
|
||||
id: row.get("id"),
|
||||
sources: row.get("sources"),
|
||||
artists: row.get("artists"),
|
||||
.map(|row| {
|
||||
let distance = row
|
||||
.get::<&str, Option<i64>>("hash")
|
||||
.map(|hash| hamming::distance_fast(&hash.to_be_bytes(), &bytes).unwrap());
|
||||
|
||||
Row {
|
||||
id: row.get("id"),
|
||||
sources: row.get("sources"),
|
||||
artists: row.get("artists"),
|
||||
distance,
|
||||
}
|
||||
});
|
||||
|
||||
for row in rows {
|
||||
println!(
|
||||
"Possible match: https://e621.net/post/show/{} by {}",
|
||||
"Possible match: [distance of {}] https://e621.net/post/show/{} by {}",
|
||||
row.distance.unwrap_or_else(u64::max_value),
|
||||
row.id,
|
||||
row.artists.join(", ")
|
||||
row.artists
|
||||
.map(|artists| artists.join(", "))
|
||||
.unwrap_or_else(|| "unknown".to_string())
|
||||
);
|
||||
for source in row.sources {
|
||||
let sources = match row.sources {
|
||||
Some(source) => source,
|
||||
_ => {
|
||||
println!("no sources");
|
||||
continue;
|
||||
}
|
||||
};
|
||||
for source in sources {
|
||||
let distance = get_hash_distance_from_url(&client, &source, &hash).await;
|
||||
println!(
|
||||
"- {} (distance of {})",
|
||||
|
@ -73,6 +73,8 @@ async fn main() {
|
||||
.build()
|
||||
.expect("Unable to build http client");
|
||||
|
||||
println!("max is id: {}", max_id);
|
||||
|
||||
let mut now;
|
||||
let mut min_id: Option<i32> = None;
|
||||
|
||||
@ -90,9 +92,10 @@ async fn main() {
|
||||
.expect("Unable to insert");
|
||||
|
||||
if let Some(min_id) = min_id {
|
||||
if min_id >= max_id {
|
||||
println!("min id is: {}", min_id);
|
||||
if min_id <= max_id {
|
||||
println!("finished run, {}, {}", min_id, max_id);
|
||||
break
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user