diff --git a/Cargo.lock b/Cargo.lock index baf5ca7..a4a776d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -554,6 +554,7 @@ dependencies = [ "ffmpeg-next", "futures", "futures-util", + "fuzzysearch-common", "hamming", "image", "img_hash", @@ -561,7 +562,6 @@ dependencies = [ "opentelemetry", "opentelemetry-jaeger", "serde", - "tempfile", "tokio", "tokio-postgres", "tracing", @@ -571,6 +571,19 @@ dependencies = [ "warp", ] +[[package]] +name = "fuzzysearch-common" +version = "0.1.0" +dependencies = [ + "anyhow", + "ffmpeg-next", + "image", + "img_hash", + "serde", + "tempfile", + "tracing", +] + [[package]] name = "generator" version = "0.6.23" diff --git a/Cargo.toml b/Cargo.toml index b35c87f..dd0af91 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,42 +1,8 @@ -[package] -name = "fuzzysearch" -version = "0.1.0" -authors = ["Syfaro "] -edition = "2018" - -[dependencies] -tracing = "0.1" -tracing-subscriber = "0.2" -tracing-futures = "0.2" - -opentelemetry = "0.6" -opentelemetry-jaeger = "0.5" -tracing-opentelemetry = "0.5" - -tokio = { version = "0.2", features = ["full"] } -futures = "0.3" -futures-util = "0.3" - -anyhow = "1" -chrono = "0.4" -bytes = "0.5" -tempfile = "3" -infer = { version = "0.3", default-features = false } - -serde = { version = "1", features = ["derive"] } -warp = "0.2" - -tokio-postgres = "0.5" -bb8 = "0.4" -bb8-postgres = "0.4" - -image = "0.23" -ffmpeg-next = "4" - -img_hash = "3" -hamming = "0.1" - -bk-tree = "0.3" +[workspace] +members = [ + "fuzzysearch", + "fuzzysearch-common" +] [profile.dev] opt-level = 2 diff --git a/fuzzysearch-common/Cargo.toml b/fuzzysearch-common/Cargo.toml new file mode 100644 index 0000000..2b3367d --- /dev/null +++ b/fuzzysearch-common/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "fuzzysearch-common" +version = "0.1.0" +authors = ["Syfaro "] +edition = "2018" + +[dependencies] +anyhow = "1" +tracing = "0.1" + +serde = { version = "1", features = ["derive"] } + +image = "0.23" +img_hash = "3" + +ffmpeg-next = "4" +tempfile = "3" diff --git a/fuzzysearch-common/src/lib.rs b/fuzzysearch-common/src/lib.rs new file mode 100644 index 0000000..3078aed --- /dev/null +++ b/fuzzysearch-common/src/lib.rs @@ -0,0 +1,13 @@ +pub mod types; +pub mod video; + +/// Create an instance of img_hash with project defaults. +pub fn get_hasher() -> img_hash::Hasher<[u8; 8]> { + use img_hash::{HashAlg::Gradient, HasherConfig}; + + HasherConfig::with_bytes_type::<[u8; 8]>() + .hash_alg(Gradient) + .hash_size(8, 8) + .preproc_dct() + .to_hasher() +} diff --git a/fuzzysearch-common/src/types.rs b/fuzzysearch-common/src/types.rs new file mode 100644 index 0000000..ac66095 --- /dev/null +++ b/fuzzysearch-common/src/types.rs @@ -0,0 +1,39 @@ +use serde::Serialize; + +/// A general type for every result in a search. +#[derive(Debug, Default, Serialize)] +pub struct SearchResult { + pub id: i32, + + pub site_id: i64, + pub site_id_str: String, + + pub url: String, + pub filename: String, + pub artists: Option>, + + #[serde(skip_serializing_if = "Option::is_none")] + #[serde(flatten)] + pub site_info: Option, + + #[serde(skip_serializing_if = "Option::is_none")] + pub hash: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub distance: Option, + + #[serde(skip_serializing_if = "Option::is_none")] + pub searched_hash: Option, +} + +#[derive(Debug, Serialize)] +#[serde(tag = "site", content = "site_info")] +pub enum SiteInfo { + FurAffinity { + file_id: i32, + }, + #[serde(rename = "e621")] + E621 { + sources: Option>, + }, + Twitter, +} diff --git a/src/video.rs b/fuzzysearch-common/src/video.rs similarity index 98% rename from src/video.rs rename to fuzzysearch-common/src/video.rs index 27a4180..0f72aa3 100644 --- a/src/video.rs +++ b/fuzzysearch-common/src/video.rs @@ -153,7 +153,7 @@ mod tests { fn test_extract_gif_hashes() -> anyhow::Result<()> { use std::fs::File; - let gif = File::open("tests/fox.gif")?; + let gif = File::open("../tests/fox.gif")?; let hashes = extract_gif_hashes(&gif)?; assert_eq!( @@ -180,7 +180,7 @@ mod tests { fn test_extract_video_hashes() -> anyhow::Result<()> { use std::fs::File; - let video = File::open("tests/video.webm")?; + let video = File::open("../tests/video.webm")?; let hashes = extract_video_hashes(&video)?; assert_eq!( diff --git a/fuzzysearch/Cargo.toml b/fuzzysearch/Cargo.toml new file mode 100644 index 0000000..9ccdba9 --- /dev/null +++ b/fuzzysearch/Cargo.toml @@ -0,0 +1,40 @@ +[package] +name = "fuzzysearch" +version = "0.1.0" +authors = ["Syfaro "] +edition = "2018" + +[dependencies] +tracing = "0.1" +tracing-subscriber = "0.2" +tracing-futures = "0.2" + +opentelemetry = "0.6" +opentelemetry-jaeger = "0.5" +tracing-opentelemetry = "0.5" + +tokio = { version = "0.2", features = ["full"] } +futures = "0.3" +futures-util = "0.3" + +anyhow = "1" +chrono = "0.4" +bytes = "0.5" +infer = { version = "0.3", default-features = false } + +serde = { version = "1", features = ["derive"] } +warp = "0.2" + +tokio-postgres = "0.5" +bb8 = "0.4" +bb8-postgres = "0.4" + +image = "0.23" +ffmpeg-next = "4" + +img_hash = "3" +hamming = "0.1" + +bk-tree = "0.3" + +fuzzysearch-common = { path = "../fuzzysearch-common" } diff --git a/Dockerfile b/fuzzysearch/Dockerfile similarity index 100% rename from Dockerfile rename to fuzzysearch/Dockerfile diff --git a/src/filters.rs b/fuzzysearch/src/filters.rs similarity index 100% rename from src/filters.rs rename to fuzzysearch/src/filters.rs diff --git a/src/handlers.rs b/fuzzysearch/src/handlers.rs similarity index 95% rename from src/handlers.rs rename to fuzzysearch/src/handlers.rs index eeece64..729fe0d 100644 --- a/src/handlers.rs +++ b/fuzzysearch/src/handlers.rs @@ -5,6 +5,8 @@ use tracing::{span, warn}; use tracing_futures::Instrument; use warp::{reject, Rejection, Reply}; +use fuzzysearch_common::types::{SearchResult, SiteInfo}; + fn map_bb8_err(err: bb8::RunError) -> Rejection { reject::custom(Error::from(err)) } @@ -65,7 +67,7 @@ async fn hash_input(form: warp::multipart::FormData) -> (i64, img_hash::ImageHas let len = bytes.len(); let hash = tokio::task::spawn_blocking(move || { - let hasher = crate::get_hasher(); + let hasher = fuzzysearch_common::get_hasher(); let image = image::load_from_memory(&bytes).unwrap(); hasher.hash_image(&image) }) @@ -87,9 +89,9 @@ async fn hash_video(form: warp::multipart::FormData) -> Vec<[u8; 8]> { let hashes = tokio::task::spawn_blocking(move || { if infer::is_video(&bytes) { - crate::video::extract_video_hashes(bytes.reader()).unwrap() + fuzzysearch_common::video::extract_video_hashes(bytes.reader()).unwrap() } else if infer::image::is_gif(&bytes) { - crate::video::extract_gif_hashes(bytes.reader()).unwrap() + fuzzysearch_common::video::extract_gif_hashes(bytes.reader()).unwrap() } else { panic!("invalid file type provided"); } @@ -195,7 +197,7 @@ pub async fn stream_image( } fn sse_matches( - matches: Result, tokio_postgres::Error>, + matches: Result, tokio_postgres::Error>, ) -> Result { let items = matches.unwrap(); @@ -286,7 +288,7 @@ pub async fn search_file( .await .map_err(map_postgres_err)? .into_iter() - .map(|row| File { + .map(|row| SearchResult { id: row.get("hash_id"), site_id: row.get::<&str, i32>("id") as i64, site_id_str: row.get::<&str, i32>("id").to_string(), @@ -297,9 +299,9 @@ pub async fn search_file( .map(|artist| vec![artist]), distance: None, hash: None, - site_info: Some(SiteInfo::FurAffinity(FurAffinityFile { + site_info: Some(SiteInfo::FurAffinity { file_id: row.get("file_id"), - })), + }), searched_hash: None, }) .collect(); diff --git a/src/main.rs b/fuzzysearch/src/main.rs similarity index 95% rename from src/main.rs rename to fuzzysearch/src/main.rs index 6976f09..6e34389 100644 --- a/src/main.rs +++ b/fuzzysearch/src/main.rs @@ -9,7 +9,6 @@ mod handlers; mod models; mod types; mod utils; -mod video; use warp::Filter; @@ -196,13 +195,3 @@ async fn main() { } type Pool = bb8::Pool>; - -fn get_hasher() -> img_hash::Hasher<[u8; 8]> { - use img_hash::{HashAlg::Gradient, HasherConfig}; - - HasherConfig::with_bytes_type::<[u8; 8]>() - .hash_alg(Gradient) - .hash_size(8, 8) - .preproc_dct() - .to_hasher() -} diff --git a/src/models.rs b/fuzzysearch/src/models.rs similarity index 95% rename from src/models.rs rename to fuzzysearch/src/models.rs index 2bd4fb0..ecefce7 100644 --- a/src/models.rs +++ b/fuzzysearch/src/models.rs @@ -3,6 +3,8 @@ use crate::utils::extract_rows; use crate::{Pool, Tree}; use tracing_futures::Instrument; +use fuzzysearch_common::types::SearchResult; + pub type DB<'a> = &'a bb8::PooledConnection<'a, bb8_postgres::PostgresConnectionManager>; @@ -48,7 +50,7 @@ pub async fn image_query( hashes: Vec, distance: i64, hash: Option>, -) -> Result, tokio_postgres::Error> { +) -> Result, tokio_postgres::Error> { let mut results = image_query_sync(pool, tree, hashes, distance, hash); let mut matches = Vec::new(); @@ -66,8 +68,8 @@ pub fn image_query_sync( hashes: Vec, distance: i64, hash: Option>, -) -> tokio::sync::mpsc::Receiver, tokio_postgres::Error>> { - let (tx, rx) = tokio::sync::mpsc::channel(50); +) -> tokio::sync::mpsc::Receiver, tokio_postgres::Error>> { + let (mut tx, rx) = tokio::sync::mpsc::channel(50); tokio::spawn(async move { let db = pool.get().await.unwrap(); diff --git a/src/types.rs b/fuzzysearch/src/types.rs similarity index 57% rename from src/types.rs rename to fuzzysearch/src/types.rs index 6106a6c..4340be9 100644 --- a/src/types.rs +++ b/fuzzysearch/src/types.rs @@ -1,5 +1,7 @@ use serde::{Deserialize, Serialize}; +use fuzzysearch_common::types::SearchResult; + /// An API key representation from the database.alloc /// /// May contain information about the owner, always has rate limit information. @@ -23,52 +25,6 @@ pub enum RateLimit { Available(i16), } -/// A general type for every file. -#[derive(Debug, Default, Serialize)] -pub struct File { - pub id: i32, - - pub site_id: i64, - pub site_id_str: String, - - pub url: String, - pub filename: String, - pub artists: Option>, - - #[serde(skip_serializing_if = "Option::is_none")] - #[serde(flatten)] - pub site_info: Option, - - #[serde(skip_serializing_if = "Option::is_none")] - pub hash: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub distance: Option, - - #[serde(skip_serializing_if = "Option::is_none")] - pub searched_hash: Option, -} - -#[derive(Debug, Serialize)] -#[serde(tag = "site", content = "site_info")] -pub enum SiteInfo { - FurAffinity(FurAffinityFile), - #[serde(rename = "e621")] - E621(E621File), - Twitter, -} - -/// Information about a file hosted on FurAffinity. -#[derive(Debug, Serialize)] -pub struct FurAffinityFile { - pub file_id: i32, -} - -/// Information about a file hosted on e621. -#[derive(Debug, Serialize)] -pub struct E621File { - pub sources: Option>, -} - #[derive(Debug, Deserialize)] pub struct FileSearchOpts { pub id: Option, @@ -93,7 +49,7 @@ pub enum ImageSearchType { #[derive(Debug, Serialize)] pub struct ImageSimilarity { pub hash: i64, - pub matches: Vec, + pub matches: Vec, } #[derive(Serialize)] diff --git a/src/utils.rs b/fuzzysearch/src/utils.rs similarity index 92% rename from src/utils.rs rename to fuzzysearch/src/utils.rs index becb04f..89a2911 100644 --- a/src/utils.rs +++ b/fuzzysearch/src/utils.rs @@ -1,6 +1,8 @@ use crate::models::DB; use crate::types::*; +use fuzzysearch_common::types::{SearchResult, SiteInfo}; + #[macro_export] macro_rules! rate_limit { ($api_key:expr, $db:expr, $limit:tt, $group:expr) => { @@ -66,7 +68,7 @@ pub async fn update_rate_limit( pub fn extract_rows<'a>( rows: Vec, hash: Option<&'a [u8]>, -) -> impl IntoIterator + 'a { +) -> impl IntoIterator + 'a { rows.into_iter().map(move |row| { let dbhash: i64 = row.get("hash"); let dbbytes = dbhash.to_be_bytes(); @@ -80,16 +82,16 @@ pub fn extract_rows<'a>( let (site_id, site_info) = if let Some(fa_id) = furaffinity_id { ( fa_id as i64, - Some(SiteInfo::FurAffinity(FurAffinityFile { + Some(SiteInfo::FurAffinity { file_id: row.get("file_id"), - })), + }), ) } else if let Some(e6_id) = e621_id { ( e6_id as i64, - Some(SiteInfo::E621(E621File { + Some(SiteInfo::E621 { sources: row.get("sources"), - })), + }), ) } else if let Some(t_id) = twitter_id { (t_id, Some(SiteInfo::Twitter)) @@ -97,7 +99,7 @@ pub fn extract_rows<'a>( (-1, None) }; - File { + SearchResult { id: row.get("id"), site_id, site_info,