Start unifying all FuzzySearch crates.

This commit is contained in:
Syfaro 2020-12-07 17:41:32 -06:00
parent b1bc46d929
commit 59da1e99a8
14 changed files with 155 additions and 116 deletions

15
Cargo.lock generated
View File

@ -554,6 +554,7 @@ dependencies = [
"ffmpeg-next", "ffmpeg-next",
"futures", "futures",
"futures-util", "futures-util",
"fuzzysearch-common",
"hamming", "hamming",
"image", "image",
"img_hash", "img_hash",
@ -561,7 +562,6 @@ dependencies = [
"opentelemetry", "opentelemetry",
"opentelemetry-jaeger", "opentelemetry-jaeger",
"serde", "serde",
"tempfile",
"tokio", "tokio",
"tokio-postgres", "tokio-postgres",
"tracing", "tracing",
@ -571,6 +571,19 @@ dependencies = [
"warp", "warp",
] ]
[[package]]
name = "fuzzysearch-common"
version = "0.1.0"
dependencies = [
"anyhow",
"ffmpeg-next",
"image",
"img_hash",
"serde",
"tempfile",
"tracing",
]
[[package]] [[package]]
name = "generator" name = "generator"
version = "0.6.23" version = "0.6.23"

View File

@ -1,42 +1,8 @@
[package] [workspace]
name = "fuzzysearch" members = [
version = "0.1.0" "fuzzysearch",
authors = ["Syfaro <syfaro@huefox.com>"] "fuzzysearch-common"
edition = "2018" ]
[dependencies]
tracing = "0.1"
tracing-subscriber = "0.2"
tracing-futures = "0.2"
opentelemetry = "0.6"
opentelemetry-jaeger = "0.5"
tracing-opentelemetry = "0.5"
tokio = { version = "0.2", features = ["full"] }
futures = "0.3"
futures-util = "0.3"
anyhow = "1"
chrono = "0.4"
bytes = "0.5"
tempfile = "3"
infer = { version = "0.3", default-features = false }
serde = { version = "1", features = ["derive"] }
warp = "0.2"
tokio-postgres = "0.5"
bb8 = "0.4"
bb8-postgres = "0.4"
image = "0.23"
ffmpeg-next = "4"
img_hash = "3"
hamming = "0.1"
bk-tree = "0.3"
[profile.dev] [profile.dev]
opt-level = 2 opt-level = 2

View File

@ -0,0 +1,17 @@
[package]
name = "fuzzysearch-common"
version = "0.1.0"
authors = ["Syfaro <syfaro@huefox.com>"]
edition = "2018"
[dependencies]
anyhow = "1"
tracing = "0.1"
serde = { version = "1", features = ["derive"] }
image = "0.23"
img_hash = "3"
ffmpeg-next = "4"
tempfile = "3"

View File

@ -0,0 +1,13 @@
pub mod types;
pub mod video;
/// Create an instance of img_hash with project defaults.
pub fn get_hasher() -> img_hash::Hasher<[u8; 8]> {
use img_hash::{HashAlg::Gradient, HasherConfig};
HasherConfig::with_bytes_type::<[u8; 8]>()
.hash_alg(Gradient)
.hash_size(8, 8)
.preproc_dct()
.to_hasher()
}

View File

@ -0,0 +1,39 @@
use serde::Serialize;
/// A general type for every result in a search.
#[derive(Debug, Default, Serialize)]
pub struct SearchResult {
pub id: i32,
pub site_id: i64,
pub site_id_str: String,
pub url: String,
pub filename: String,
pub artists: Option<Vec<String>>,
#[serde(skip_serializing_if = "Option::is_none")]
#[serde(flatten)]
pub site_info: Option<SiteInfo>,
#[serde(skip_serializing_if = "Option::is_none")]
pub hash: Option<i64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub distance: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub searched_hash: Option<i64>,
}
#[derive(Debug, Serialize)]
#[serde(tag = "site", content = "site_info")]
pub enum SiteInfo {
FurAffinity {
file_id: i32,
},
#[serde(rename = "e621")]
E621 {
sources: Option<Vec<String>>,
},
Twitter,
}

View File

@ -153,7 +153,7 @@ mod tests {
fn test_extract_gif_hashes() -> anyhow::Result<()> { fn test_extract_gif_hashes() -> anyhow::Result<()> {
use std::fs::File; use std::fs::File;
let gif = File::open("tests/fox.gif")?; let gif = File::open("../tests/fox.gif")?;
let hashes = extract_gif_hashes(&gif)?; let hashes = extract_gif_hashes(&gif)?;
assert_eq!( assert_eq!(
@ -180,7 +180,7 @@ mod tests {
fn test_extract_video_hashes() -> anyhow::Result<()> { fn test_extract_video_hashes() -> anyhow::Result<()> {
use std::fs::File; use std::fs::File;
let video = File::open("tests/video.webm")?; let video = File::open("../tests/video.webm")?;
let hashes = extract_video_hashes(&video)?; let hashes = extract_video_hashes(&video)?;
assert_eq!( assert_eq!(

40
fuzzysearch/Cargo.toml Normal file
View File

@ -0,0 +1,40 @@
[package]
name = "fuzzysearch"
version = "0.1.0"
authors = ["Syfaro <syfaro@huefox.com>"]
edition = "2018"
[dependencies]
tracing = "0.1"
tracing-subscriber = "0.2"
tracing-futures = "0.2"
opentelemetry = "0.6"
opentelemetry-jaeger = "0.5"
tracing-opentelemetry = "0.5"
tokio = { version = "0.2", features = ["full"] }
futures = "0.3"
futures-util = "0.3"
anyhow = "1"
chrono = "0.4"
bytes = "0.5"
infer = { version = "0.3", default-features = false }
serde = { version = "1", features = ["derive"] }
warp = "0.2"
tokio-postgres = "0.5"
bb8 = "0.4"
bb8-postgres = "0.4"
image = "0.23"
ffmpeg-next = "4"
img_hash = "3"
hamming = "0.1"
bk-tree = "0.3"
fuzzysearch-common = { path = "../fuzzysearch-common" }

View File

@ -5,6 +5,8 @@ use tracing::{span, warn};
use tracing_futures::Instrument; use tracing_futures::Instrument;
use warp::{reject, Rejection, Reply}; use warp::{reject, Rejection, Reply};
use fuzzysearch_common::types::{SearchResult, SiteInfo};
fn map_bb8_err(err: bb8::RunError<tokio_postgres::Error>) -> Rejection { fn map_bb8_err(err: bb8::RunError<tokio_postgres::Error>) -> Rejection {
reject::custom(Error::from(err)) reject::custom(Error::from(err))
} }
@ -65,7 +67,7 @@ async fn hash_input(form: warp::multipart::FormData) -> (i64, img_hash::ImageHas
let len = bytes.len(); let len = bytes.len();
let hash = tokio::task::spawn_blocking(move || { let hash = tokio::task::spawn_blocking(move || {
let hasher = crate::get_hasher(); let hasher = fuzzysearch_common::get_hasher();
let image = image::load_from_memory(&bytes).unwrap(); let image = image::load_from_memory(&bytes).unwrap();
hasher.hash_image(&image) hasher.hash_image(&image)
}) })
@ -87,9 +89,9 @@ async fn hash_video(form: warp::multipart::FormData) -> Vec<[u8; 8]> {
let hashes = tokio::task::spawn_blocking(move || { let hashes = tokio::task::spawn_blocking(move || {
if infer::is_video(&bytes) { if infer::is_video(&bytes) {
crate::video::extract_video_hashes(bytes.reader()).unwrap() fuzzysearch_common::video::extract_video_hashes(bytes.reader()).unwrap()
} else if infer::image::is_gif(&bytes) { } else if infer::image::is_gif(&bytes) {
crate::video::extract_gif_hashes(bytes.reader()).unwrap() fuzzysearch_common::video::extract_gif_hashes(bytes.reader()).unwrap()
} else { } else {
panic!("invalid file type provided"); panic!("invalid file type provided");
} }
@ -195,7 +197,7 @@ pub async fn stream_image(
} }
fn sse_matches( fn sse_matches(
matches: Result<Vec<File>, tokio_postgres::Error>, matches: Result<Vec<SearchResult>, tokio_postgres::Error>,
) -> Result<impl warp::sse::ServerSentEvent, core::convert::Infallible> { ) -> Result<impl warp::sse::ServerSentEvent, core::convert::Infallible> {
let items = matches.unwrap(); let items = matches.unwrap();
@ -286,7 +288,7 @@ pub async fn search_file(
.await .await
.map_err(map_postgres_err)? .map_err(map_postgres_err)?
.into_iter() .into_iter()
.map(|row| File { .map(|row| SearchResult {
id: row.get("hash_id"), id: row.get("hash_id"),
site_id: row.get::<&str, i32>("id") as i64, site_id: row.get::<&str, i32>("id") as i64,
site_id_str: row.get::<&str, i32>("id").to_string(), site_id_str: row.get::<&str, i32>("id").to_string(),
@ -297,9 +299,9 @@ pub async fn search_file(
.map(|artist| vec![artist]), .map(|artist| vec![artist]),
distance: None, distance: None,
hash: None, hash: None,
site_info: Some(SiteInfo::FurAffinity(FurAffinityFile { site_info: Some(SiteInfo::FurAffinity {
file_id: row.get("file_id"), file_id: row.get("file_id"),
})), }),
searched_hash: None, searched_hash: None,
}) })
.collect(); .collect();

View File

@ -9,7 +9,6 @@ mod handlers;
mod models; mod models;
mod types; mod types;
mod utils; mod utils;
mod video;
use warp::Filter; use warp::Filter;
@ -196,13 +195,3 @@ async fn main() {
} }
type Pool = bb8::Pool<bb8_postgres::PostgresConnectionManager<tokio_postgres::NoTls>>; type Pool = bb8::Pool<bb8_postgres::PostgresConnectionManager<tokio_postgres::NoTls>>;
fn get_hasher() -> img_hash::Hasher<[u8; 8]> {
use img_hash::{HashAlg::Gradient, HasherConfig};
HasherConfig::with_bytes_type::<[u8; 8]>()
.hash_alg(Gradient)
.hash_size(8, 8)
.preproc_dct()
.to_hasher()
}

View File

@ -3,6 +3,8 @@ use crate::utils::extract_rows;
use crate::{Pool, Tree}; use crate::{Pool, Tree};
use tracing_futures::Instrument; use tracing_futures::Instrument;
use fuzzysearch_common::types::SearchResult;
pub type DB<'a> = pub type DB<'a> =
&'a bb8::PooledConnection<'a, bb8_postgres::PostgresConnectionManager<tokio_postgres::NoTls>>; &'a bb8::PooledConnection<'a, bb8_postgres::PostgresConnectionManager<tokio_postgres::NoTls>>;
@ -48,7 +50,7 @@ pub async fn image_query(
hashes: Vec<i64>, hashes: Vec<i64>,
distance: i64, distance: i64,
hash: Option<Vec<u8>>, hash: Option<Vec<u8>>,
) -> Result<Vec<File>, tokio_postgres::Error> { ) -> Result<Vec<SearchResult>, tokio_postgres::Error> {
let mut results = image_query_sync(pool, tree, hashes, distance, hash); let mut results = image_query_sync(pool, tree, hashes, distance, hash);
let mut matches = Vec::new(); let mut matches = Vec::new();
@ -66,8 +68,8 @@ pub fn image_query_sync(
hashes: Vec<i64>, hashes: Vec<i64>,
distance: i64, distance: i64,
hash: Option<Vec<u8>>, hash: Option<Vec<u8>>,
) -> tokio::sync::mpsc::Receiver<Result<Vec<File>, tokio_postgres::Error>> { ) -> tokio::sync::mpsc::Receiver<Result<Vec<SearchResult>, tokio_postgres::Error>> {
let (tx, rx) = tokio::sync::mpsc::channel(50); let (mut tx, rx) = tokio::sync::mpsc::channel(50);
tokio::spawn(async move { tokio::spawn(async move {
let db = pool.get().await.unwrap(); let db = pool.get().await.unwrap();

View File

@ -1,5 +1,7 @@
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use fuzzysearch_common::types::SearchResult;
/// An API key representation from the database.alloc /// An API key representation from the database.alloc
/// ///
/// May contain information about the owner, always has rate limit information. /// May contain information about the owner, always has rate limit information.
@ -23,52 +25,6 @@ pub enum RateLimit {
Available(i16), Available(i16),
} }
/// A general type for every file.
#[derive(Debug, Default, Serialize)]
pub struct File {
pub id: i32,
pub site_id: i64,
pub site_id_str: String,
pub url: String,
pub filename: String,
pub artists: Option<Vec<String>>,
#[serde(skip_serializing_if = "Option::is_none")]
#[serde(flatten)]
pub site_info: Option<SiteInfo>,
#[serde(skip_serializing_if = "Option::is_none")]
pub hash: Option<i64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub distance: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub searched_hash: Option<i64>,
}
#[derive(Debug, Serialize)]
#[serde(tag = "site", content = "site_info")]
pub enum SiteInfo {
FurAffinity(FurAffinityFile),
#[serde(rename = "e621")]
E621(E621File),
Twitter,
}
/// Information about a file hosted on FurAffinity.
#[derive(Debug, Serialize)]
pub struct FurAffinityFile {
pub file_id: i32,
}
/// Information about a file hosted on e621.
#[derive(Debug, Serialize)]
pub struct E621File {
pub sources: Option<Vec<String>>,
}
#[derive(Debug, Deserialize)] #[derive(Debug, Deserialize)]
pub struct FileSearchOpts { pub struct FileSearchOpts {
pub id: Option<i32>, pub id: Option<i32>,
@ -93,7 +49,7 @@ pub enum ImageSearchType {
#[derive(Debug, Serialize)] #[derive(Debug, Serialize)]
pub struct ImageSimilarity { pub struct ImageSimilarity {
pub hash: i64, pub hash: i64,
pub matches: Vec<File>, pub matches: Vec<SearchResult>,
} }
#[derive(Serialize)] #[derive(Serialize)]

View File

@ -1,6 +1,8 @@
use crate::models::DB; use crate::models::DB;
use crate::types::*; use crate::types::*;
use fuzzysearch_common::types::{SearchResult, SiteInfo};
#[macro_export] #[macro_export]
macro_rules! rate_limit { macro_rules! rate_limit {
($api_key:expr, $db:expr, $limit:tt, $group:expr) => { ($api_key:expr, $db:expr, $limit:tt, $group:expr) => {
@ -66,7 +68,7 @@ pub async fn update_rate_limit(
pub fn extract_rows<'a>( pub fn extract_rows<'a>(
rows: Vec<tokio_postgres::Row>, rows: Vec<tokio_postgres::Row>,
hash: Option<&'a [u8]>, hash: Option<&'a [u8]>,
) -> impl IntoIterator<Item = File> + 'a { ) -> impl IntoIterator<Item = SearchResult> + 'a {
rows.into_iter().map(move |row| { rows.into_iter().map(move |row| {
let dbhash: i64 = row.get("hash"); let dbhash: i64 = row.get("hash");
let dbbytes = dbhash.to_be_bytes(); let dbbytes = dbhash.to_be_bytes();
@ -80,16 +82,16 @@ pub fn extract_rows<'a>(
let (site_id, site_info) = if let Some(fa_id) = furaffinity_id { let (site_id, site_info) = if let Some(fa_id) = furaffinity_id {
( (
fa_id as i64, fa_id as i64,
Some(SiteInfo::FurAffinity(FurAffinityFile { Some(SiteInfo::FurAffinity {
file_id: row.get("file_id"), file_id: row.get("file_id"),
})), }),
) )
} else if let Some(e6_id) = e621_id { } else if let Some(e6_id) = e621_id {
( (
e6_id as i64, e6_id as i64,
Some(SiteInfo::E621(E621File { Some(SiteInfo::E621 {
sources: row.get("sources"), sources: row.get("sources"),
})), }),
) )
} else if let Some(t_id) = twitter_id { } else if let Some(t_id) = twitter_id {
(t_id, Some(SiteInfo::Twitter)) (t_id, Some(SiteInfo::Twitter))
@ -97,7 +99,7 @@ pub fn extract_rows<'a>(
(-1, None) (-1, None)
}; };
File { SearchResult {
id: row.get("id"), id: row.get("id"),
site_id, site_id,
site_info, site_info,