mirror of
https://github.com/Syfaro/fuzzysearch.git
synced 2024-11-05 14:32:56 +00:00
Start unifying all FuzzySearch crates.
This commit is contained in:
parent
b1bc46d929
commit
59da1e99a8
15
Cargo.lock
generated
15
Cargo.lock
generated
@ -554,6 +554,7 @@ dependencies = [
|
||||
"ffmpeg-next",
|
||||
"futures",
|
||||
"futures-util",
|
||||
"fuzzysearch-common",
|
||||
"hamming",
|
||||
"image",
|
||||
"img_hash",
|
||||
@ -561,7 +562,6 @@ dependencies = [
|
||||
"opentelemetry",
|
||||
"opentelemetry-jaeger",
|
||||
"serde",
|
||||
"tempfile",
|
||||
"tokio",
|
||||
"tokio-postgres",
|
||||
"tracing",
|
||||
@ -571,6 +571,19 @@ dependencies = [
|
||||
"warp",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fuzzysearch-common"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"ffmpeg-next",
|
||||
"image",
|
||||
"img_hash",
|
||||
"serde",
|
||||
"tempfile",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "generator"
|
||||
version = "0.6.23"
|
||||
|
44
Cargo.toml
44
Cargo.toml
@ -1,42 +1,8 @@
|
||||
[package]
|
||||
name = "fuzzysearch"
|
||||
version = "0.1.0"
|
||||
authors = ["Syfaro <syfaro@huefox.com>"]
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
tracing = "0.1"
|
||||
tracing-subscriber = "0.2"
|
||||
tracing-futures = "0.2"
|
||||
|
||||
opentelemetry = "0.6"
|
||||
opentelemetry-jaeger = "0.5"
|
||||
tracing-opentelemetry = "0.5"
|
||||
|
||||
tokio = { version = "0.2", features = ["full"] }
|
||||
futures = "0.3"
|
||||
futures-util = "0.3"
|
||||
|
||||
anyhow = "1"
|
||||
chrono = "0.4"
|
||||
bytes = "0.5"
|
||||
tempfile = "3"
|
||||
infer = { version = "0.3", default-features = false }
|
||||
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
warp = "0.2"
|
||||
|
||||
tokio-postgres = "0.5"
|
||||
bb8 = "0.4"
|
||||
bb8-postgres = "0.4"
|
||||
|
||||
image = "0.23"
|
||||
ffmpeg-next = "4"
|
||||
|
||||
img_hash = "3"
|
||||
hamming = "0.1"
|
||||
|
||||
bk-tree = "0.3"
|
||||
[workspace]
|
||||
members = [
|
||||
"fuzzysearch",
|
||||
"fuzzysearch-common"
|
||||
]
|
||||
|
||||
[profile.dev]
|
||||
opt-level = 2
|
||||
|
17
fuzzysearch-common/Cargo.toml
Normal file
17
fuzzysearch-common/Cargo.toml
Normal file
@ -0,0 +1,17 @@
|
||||
[package]
|
||||
name = "fuzzysearch-common"
|
||||
version = "0.1.0"
|
||||
authors = ["Syfaro <syfaro@huefox.com>"]
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1"
|
||||
tracing = "0.1"
|
||||
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
|
||||
image = "0.23"
|
||||
img_hash = "3"
|
||||
|
||||
ffmpeg-next = "4"
|
||||
tempfile = "3"
|
13
fuzzysearch-common/src/lib.rs
Normal file
13
fuzzysearch-common/src/lib.rs
Normal file
@ -0,0 +1,13 @@
|
||||
pub mod types;
|
||||
pub mod video;
|
||||
|
||||
/// Create an instance of img_hash with project defaults.
|
||||
pub fn get_hasher() -> img_hash::Hasher<[u8; 8]> {
|
||||
use img_hash::{HashAlg::Gradient, HasherConfig};
|
||||
|
||||
HasherConfig::with_bytes_type::<[u8; 8]>()
|
||||
.hash_alg(Gradient)
|
||||
.hash_size(8, 8)
|
||||
.preproc_dct()
|
||||
.to_hasher()
|
||||
}
|
39
fuzzysearch-common/src/types.rs
Normal file
39
fuzzysearch-common/src/types.rs
Normal file
@ -0,0 +1,39 @@
|
||||
use serde::Serialize;
|
||||
|
||||
/// A general type for every result in a search.
|
||||
#[derive(Debug, Default, Serialize)]
|
||||
pub struct SearchResult {
|
||||
pub id: i32,
|
||||
|
||||
pub site_id: i64,
|
||||
pub site_id_str: String,
|
||||
|
||||
pub url: String,
|
||||
pub filename: String,
|
||||
pub artists: Option<Vec<String>>,
|
||||
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
#[serde(flatten)]
|
||||
pub site_info: Option<SiteInfo>,
|
||||
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub hash: Option<i64>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub distance: Option<u64>,
|
||||
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub searched_hash: Option<i64>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
#[serde(tag = "site", content = "site_info")]
|
||||
pub enum SiteInfo {
|
||||
FurAffinity {
|
||||
file_id: i32,
|
||||
},
|
||||
#[serde(rename = "e621")]
|
||||
E621 {
|
||||
sources: Option<Vec<String>>,
|
||||
},
|
||||
Twitter,
|
||||
}
|
@ -153,7 +153,7 @@ mod tests {
|
||||
fn test_extract_gif_hashes() -> anyhow::Result<()> {
|
||||
use std::fs::File;
|
||||
|
||||
let gif = File::open("tests/fox.gif")?;
|
||||
let gif = File::open("../tests/fox.gif")?;
|
||||
let hashes = extract_gif_hashes(&gif)?;
|
||||
|
||||
assert_eq!(
|
||||
@ -180,7 +180,7 @@ mod tests {
|
||||
fn test_extract_video_hashes() -> anyhow::Result<()> {
|
||||
use std::fs::File;
|
||||
|
||||
let video = File::open("tests/video.webm")?;
|
||||
let video = File::open("../tests/video.webm")?;
|
||||
let hashes = extract_video_hashes(&video)?;
|
||||
|
||||
assert_eq!(
|
40
fuzzysearch/Cargo.toml
Normal file
40
fuzzysearch/Cargo.toml
Normal file
@ -0,0 +1,40 @@
|
||||
[package]
|
||||
name = "fuzzysearch"
|
||||
version = "0.1.0"
|
||||
authors = ["Syfaro <syfaro@huefox.com>"]
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
tracing = "0.1"
|
||||
tracing-subscriber = "0.2"
|
||||
tracing-futures = "0.2"
|
||||
|
||||
opentelemetry = "0.6"
|
||||
opentelemetry-jaeger = "0.5"
|
||||
tracing-opentelemetry = "0.5"
|
||||
|
||||
tokio = { version = "0.2", features = ["full"] }
|
||||
futures = "0.3"
|
||||
futures-util = "0.3"
|
||||
|
||||
anyhow = "1"
|
||||
chrono = "0.4"
|
||||
bytes = "0.5"
|
||||
infer = { version = "0.3", default-features = false }
|
||||
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
warp = "0.2"
|
||||
|
||||
tokio-postgres = "0.5"
|
||||
bb8 = "0.4"
|
||||
bb8-postgres = "0.4"
|
||||
|
||||
image = "0.23"
|
||||
ffmpeg-next = "4"
|
||||
|
||||
img_hash = "3"
|
||||
hamming = "0.1"
|
||||
|
||||
bk-tree = "0.3"
|
||||
|
||||
fuzzysearch-common = { path = "../fuzzysearch-common" }
|
@ -5,6 +5,8 @@ use tracing::{span, warn};
|
||||
use tracing_futures::Instrument;
|
||||
use warp::{reject, Rejection, Reply};
|
||||
|
||||
use fuzzysearch_common::types::{SearchResult, SiteInfo};
|
||||
|
||||
fn map_bb8_err(err: bb8::RunError<tokio_postgres::Error>) -> Rejection {
|
||||
reject::custom(Error::from(err))
|
||||
}
|
||||
@ -65,7 +67,7 @@ async fn hash_input(form: warp::multipart::FormData) -> (i64, img_hash::ImageHas
|
||||
let len = bytes.len();
|
||||
|
||||
let hash = tokio::task::spawn_blocking(move || {
|
||||
let hasher = crate::get_hasher();
|
||||
let hasher = fuzzysearch_common::get_hasher();
|
||||
let image = image::load_from_memory(&bytes).unwrap();
|
||||
hasher.hash_image(&image)
|
||||
})
|
||||
@ -87,9 +89,9 @@ async fn hash_video(form: warp::multipart::FormData) -> Vec<[u8; 8]> {
|
||||
|
||||
let hashes = tokio::task::spawn_blocking(move || {
|
||||
if infer::is_video(&bytes) {
|
||||
crate::video::extract_video_hashes(bytes.reader()).unwrap()
|
||||
fuzzysearch_common::video::extract_video_hashes(bytes.reader()).unwrap()
|
||||
} else if infer::image::is_gif(&bytes) {
|
||||
crate::video::extract_gif_hashes(bytes.reader()).unwrap()
|
||||
fuzzysearch_common::video::extract_gif_hashes(bytes.reader()).unwrap()
|
||||
} else {
|
||||
panic!("invalid file type provided");
|
||||
}
|
||||
@ -195,7 +197,7 @@ pub async fn stream_image(
|
||||
}
|
||||
|
||||
fn sse_matches(
|
||||
matches: Result<Vec<File>, tokio_postgres::Error>,
|
||||
matches: Result<Vec<SearchResult>, tokio_postgres::Error>,
|
||||
) -> Result<impl warp::sse::ServerSentEvent, core::convert::Infallible> {
|
||||
let items = matches.unwrap();
|
||||
|
||||
@ -286,7 +288,7 @@ pub async fn search_file(
|
||||
.await
|
||||
.map_err(map_postgres_err)?
|
||||
.into_iter()
|
||||
.map(|row| File {
|
||||
.map(|row| SearchResult {
|
||||
id: row.get("hash_id"),
|
||||
site_id: row.get::<&str, i32>("id") as i64,
|
||||
site_id_str: row.get::<&str, i32>("id").to_string(),
|
||||
@ -297,9 +299,9 @@ pub async fn search_file(
|
||||
.map(|artist| vec![artist]),
|
||||
distance: None,
|
||||
hash: None,
|
||||
site_info: Some(SiteInfo::FurAffinity(FurAffinityFile {
|
||||
site_info: Some(SiteInfo::FurAffinity {
|
||||
file_id: row.get("file_id"),
|
||||
})),
|
||||
}),
|
||||
searched_hash: None,
|
||||
})
|
||||
.collect();
|
@ -9,7 +9,6 @@ mod handlers;
|
||||
mod models;
|
||||
mod types;
|
||||
mod utils;
|
||||
mod video;
|
||||
|
||||
use warp::Filter;
|
||||
|
||||
@ -196,13 +195,3 @@ async fn main() {
|
||||
}
|
||||
|
||||
type Pool = bb8::Pool<bb8_postgres::PostgresConnectionManager<tokio_postgres::NoTls>>;
|
||||
|
||||
fn get_hasher() -> img_hash::Hasher<[u8; 8]> {
|
||||
use img_hash::{HashAlg::Gradient, HasherConfig};
|
||||
|
||||
HasherConfig::with_bytes_type::<[u8; 8]>()
|
||||
.hash_alg(Gradient)
|
||||
.hash_size(8, 8)
|
||||
.preproc_dct()
|
||||
.to_hasher()
|
||||
}
|
@ -3,6 +3,8 @@ use crate::utils::extract_rows;
|
||||
use crate::{Pool, Tree};
|
||||
use tracing_futures::Instrument;
|
||||
|
||||
use fuzzysearch_common::types::SearchResult;
|
||||
|
||||
pub type DB<'a> =
|
||||
&'a bb8::PooledConnection<'a, bb8_postgres::PostgresConnectionManager<tokio_postgres::NoTls>>;
|
||||
|
||||
@ -48,7 +50,7 @@ pub async fn image_query(
|
||||
hashes: Vec<i64>,
|
||||
distance: i64,
|
||||
hash: Option<Vec<u8>>,
|
||||
) -> Result<Vec<File>, tokio_postgres::Error> {
|
||||
) -> Result<Vec<SearchResult>, tokio_postgres::Error> {
|
||||
let mut results = image_query_sync(pool, tree, hashes, distance, hash);
|
||||
let mut matches = Vec::new();
|
||||
|
||||
@ -66,8 +68,8 @@ pub fn image_query_sync(
|
||||
hashes: Vec<i64>,
|
||||
distance: i64,
|
||||
hash: Option<Vec<u8>>,
|
||||
) -> tokio::sync::mpsc::Receiver<Result<Vec<File>, tokio_postgres::Error>> {
|
||||
let (tx, rx) = tokio::sync::mpsc::channel(50);
|
||||
) -> tokio::sync::mpsc::Receiver<Result<Vec<SearchResult>, tokio_postgres::Error>> {
|
||||
let (mut tx, rx) = tokio::sync::mpsc::channel(50);
|
||||
|
||||
tokio::spawn(async move {
|
||||
let db = pool.get().await.unwrap();
|
@ -1,5 +1,7 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use fuzzysearch_common::types::SearchResult;
|
||||
|
||||
/// An API key representation from the database.alloc
|
||||
///
|
||||
/// May contain information about the owner, always has rate limit information.
|
||||
@ -23,52 +25,6 @@ pub enum RateLimit {
|
||||
Available(i16),
|
||||
}
|
||||
|
||||
/// A general type for every file.
|
||||
#[derive(Debug, Default, Serialize)]
|
||||
pub struct File {
|
||||
pub id: i32,
|
||||
|
||||
pub site_id: i64,
|
||||
pub site_id_str: String,
|
||||
|
||||
pub url: String,
|
||||
pub filename: String,
|
||||
pub artists: Option<Vec<String>>,
|
||||
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
#[serde(flatten)]
|
||||
pub site_info: Option<SiteInfo>,
|
||||
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub hash: Option<i64>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub distance: Option<u64>,
|
||||
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub searched_hash: Option<i64>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
#[serde(tag = "site", content = "site_info")]
|
||||
pub enum SiteInfo {
|
||||
FurAffinity(FurAffinityFile),
|
||||
#[serde(rename = "e621")]
|
||||
E621(E621File),
|
||||
Twitter,
|
||||
}
|
||||
|
||||
/// Information about a file hosted on FurAffinity.
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct FurAffinityFile {
|
||||
pub file_id: i32,
|
||||
}
|
||||
|
||||
/// Information about a file hosted on e621.
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct E621File {
|
||||
pub sources: Option<Vec<String>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct FileSearchOpts {
|
||||
pub id: Option<i32>,
|
||||
@ -93,7 +49,7 @@ pub enum ImageSearchType {
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct ImageSimilarity {
|
||||
pub hash: i64,
|
||||
pub matches: Vec<File>,
|
||||
pub matches: Vec<SearchResult>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
@ -1,6 +1,8 @@
|
||||
use crate::models::DB;
|
||||
use crate::types::*;
|
||||
|
||||
use fuzzysearch_common::types::{SearchResult, SiteInfo};
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! rate_limit {
|
||||
($api_key:expr, $db:expr, $limit:tt, $group:expr) => {
|
||||
@ -66,7 +68,7 @@ pub async fn update_rate_limit(
|
||||
pub fn extract_rows<'a>(
|
||||
rows: Vec<tokio_postgres::Row>,
|
||||
hash: Option<&'a [u8]>,
|
||||
) -> impl IntoIterator<Item = File> + 'a {
|
||||
) -> impl IntoIterator<Item = SearchResult> + 'a {
|
||||
rows.into_iter().map(move |row| {
|
||||
let dbhash: i64 = row.get("hash");
|
||||
let dbbytes = dbhash.to_be_bytes();
|
||||
@ -80,16 +82,16 @@ pub fn extract_rows<'a>(
|
||||
let (site_id, site_info) = if let Some(fa_id) = furaffinity_id {
|
||||
(
|
||||
fa_id as i64,
|
||||
Some(SiteInfo::FurAffinity(FurAffinityFile {
|
||||
Some(SiteInfo::FurAffinity {
|
||||
file_id: row.get("file_id"),
|
||||
})),
|
||||
}),
|
||||
)
|
||||
} else if let Some(e6_id) = e621_id {
|
||||
(
|
||||
e6_id as i64,
|
||||
Some(SiteInfo::E621(E621File {
|
||||
Some(SiteInfo::E621 {
|
||||
sources: row.get("sources"),
|
||||
})),
|
||||
}),
|
||||
)
|
||||
} else if let Some(t_id) = twitter_id {
|
||||
(t_id, Some(SiteInfo::Twitter))
|
||||
@ -97,7 +99,7 @@ pub fn extract_rows<'a>(
|
||||
(-1, None)
|
||||
};
|
||||
|
||||
File {
|
||||
SearchResult {
|
||||
id: row.get("id"),
|
||||
site_id,
|
||||
site_info,
|
Loading…
Reference in New Issue
Block a user