Start unifying all FuzzySearch crates.

This commit is contained in:
Syfaro 2020-12-07 17:41:32 -06:00
parent b1bc46d929
commit 59da1e99a8
14 changed files with 155 additions and 116 deletions

15
Cargo.lock generated
View File

@ -554,6 +554,7 @@ dependencies = [
"ffmpeg-next",
"futures",
"futures-util",
"fuzzysearch-common",
"hamming",
"image",
"img_hash",
@ -561,7 +562,6 @@ dependencies = [
"opentelemetry",
"opentelemetry-jaeger",
"serde",
"tempfile",
"tokio",
"tokio-postgres",
"tracing",
@ -571,6 +571,19 @@ dependencies = [
"warp",
]
[[package]]
name = "fuzzysearch-common"
version = "0.1.0"
dependencies = [
"anyhow",
"ffmpeg-next",
"image",
"img_hash",
"serde",
"tempfile",
"tracing",
]
[[package]]
name = "generator"
version = "0.6.23"

View File

@ -1,42 +1,8 @@
[package]
name = "fuzzysearch"
version = "0.1.0"
authors = ["Syfaro <syfaro@huefox.com>"]
edition = "2018"
[dependencies]
tracing = "0.1"
tracing-subscriber = "0.2"
tracing-futures = "0.2"
opentelemetry = "0.6"
opentelemetry-jaeger = "0.5"
tracing-opentelemetry = "0.5"
tokio = { version = "0.2", features = ["full"] }
futures = "0.3"
futures-util = "0.3"
anyhow = "1"
chrono = "0.4"
bytes = "0.5"
tempfile = "3"
infer = { version = "0.3", default-features = false }
serde = { version = "1", features = ["derive"] }
warp = "0.2"
tokio-postgres = "0.5"
bb8 = "0.4"
bb8-postgres = "0.4"
image = "0.23"
ffmpeg-next = "4"
img_hash = "3"
hamming = "0.1"
bk-tree = "0.3"
[workspace]
members = [
"fuzzysearch",
"fuzzysearch-common"
]
[profile.dev]
opt-level = 2

View File

@ -0,0 +1,17 @@
[package]
name = "fuzzysearch-common"
version = "0.1.0"
authors = ["Syfaro <syfaro@huefox.com>"]
edition = "2018"
[dependencies]
anyhow = "1"
tracing = "0.1"
serde = { version = "1", features = ["derive"] }
image = "0.23"
img_hash = "3"
ffmpeg-next = "4"
tempfile = "3"

View File

@ -0,0 +1,13 @@
pub mod types;
pub mod video;
/// Create an instance of img_hash with project defaults.
pub fn get_hasher() -> img_hash::Hasher<[u8; 8]> {
use img_hash::{HashAlg::Gradient, HasherConfig};
HasherConfig::with_bytes_type::<[u8; 8]>()
.hash_alg(Gradient)
.hash_size(8, 8)
.preproc_dct()
.to_hasher()
}

View File

@ -0,0 +1,39 @@
use serde::Serialize;
/// A general type for every result in a search.
#[derive(Debug, Default, Serialize)]
pub struct SearchResult {
pub id: i32,
pub site_id: i64,
pub site_id_str: String,
pub url: String,
pub filename: String,
pub artists: Option<Vec<String>>,
#[serde(skip_serializing_if = "Option::is_none")]
#[serde(flatten)]
pub site_info: Option<SiteInfo>,
#[serde(skip_serializing_if = "Option::is_none")]
pub hash: Option<i64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub distance: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub searched_hash: Option<i64>,
}
#[derive(Debug, Serialize)]
#[serde(tag = "site", content = "site_info")]
pub enum SiteInfo {
FurAffinity {
file_id: i32,
},
#[serde(rename = "e621")]
E621 {
sources: Option<Vec<String>>,
},
Twitter,
}

View File

@ -153,7 +153,7 @@ mod tests {
fn test_extract_gif_hashes() -> anyhow::Result<()> {
use std::fs::File;
let gif = File::open("tests/fox.gif")?;
let gif = File::open("../tests/fox.gif")?;
let hashes = extract_gif_hashes(&gif)?;
assert_eq!(
@ -180,7 +180,7 @@ mod tests {
fn test_extract_video_hashes() -> anyhow::Result<()> {
use std::fs::File;
let video = File::open("tests/video.webm")?;
let video = File::open("../tests/video.webm")?;
let hashes = extract_video_hashes(&video)?;
assert_eq!(

40
fuzzysearch/Cargo.toml Normal file
View File

@ -0,0 +1,40 @@
[package]
name = "fuzzysearch"
version = "0.1.0"
authors = ["Syfaro <syfaro@huefox.com>"]
edition = "2018"
[dependencies]
tracing = "0.1"
tracing-subscriber = "0.2"
tracing-futures = "0.2"
opentelemetry = "0.6"
opentelemetry-jaeger = "0.5"
tracing-opentelemetry = "0.5"
tokio = { version = "0.2", features = ["full"] }
futures = "0.3"
futures-util = "0.3"
anyhow = "1"
chrono = "0.4"
bytes = "0.5"
infer = { version = "0.3", default-features = false }
serde = { version = "1", features = ["derive"] }
warp = "0.2"
tokio-postgres = "0.5"
bb8 = "0.4"
bb8-postgres = "0.4"
image = "0.23"
ffmpeg-next = "4"
img_hash = "3"
hamming = "0.1"
bk-tree = "0.3"
fuzzysearch-common = { path = "../fuzzysearch-common" }

View File

@ -5,6 +5,8 @@ use tracing::{span, warn};
use tracing_futures::Instrument;
use warp::{reject, Rejection, Reply};
use fuzzysearch_common::types::{SearchResult, SiteInfo};
fn map_bb8_err(err: bb8::RunError<tokio_postgres::Error>) -> Rejection {
reject::custom(Error::from(err))
}
@ -65,7 +67,7 @@ async fn hash_input(form: warp::multipart::FormData) -> (i64, img_hash::ImageHas
let len = bytes.len();
let hash = tokio::task::spawn_blocking(move || {
let hasher = crate::get_hasher();
let hasher = fuzzysearch_common::get_hasher();
let image = image::load_from_memory(&bytes).unwrap();
hasher.hash_image(&image)
})
@ -87,9 +89,9 @@ async fn hash_video(form: warp::multipart::FormData) -> Vec<[u8; 8]> {
let hashes = tokio::task::spawn_blocking(move || {
if infer::is_video(&bytes) {
crate::video::extract_video_hashes(bytes.reader()).unwrap()
fuzzysearch_common::video::extract_video_hashes(bytes.reader()).unwrap()
} else if infer::image::is_gif(&bytes) {
crate::video::extract_gif_hashes(bytes.reader()).unwrap()
fuzzysearch_common::video::extract_gif_hashes(bytes.reader()).unwrap()
} else {
panic!("invalid file type provided");
}
@ -195,7 +197,7 @@ pub async fn stream_image(
}
fn sse_matches(
matches: Result<Vec<File>, tokio_postgres::Error>,
matches: Result<Vec<SearchResult>, tokio_postgres::Error>,
) -> Result<impl warp::sse::ServerSentEvent, core::convert::Infallible> {
let items = matches.unwrap();
@ -286,7 +288,7 @@ pub async fn search_file(
.await
.map_err(map_postgres_err)?
.into_iter()
.map(|row| File {
.map(|row| SearchResult {
id: row.get("hash_id"),
site_id: row.get::<&str, i32>("id") as i64,
site_id_str: row.get::<&str, i32>("id").to_string(),
@ -297,9 +299,9 @@ pub async fn search_file(
.map(|artist| vec![artist]),
distance: None,
hash: None,
site_info: Some(SiteInfo::FurAffinity(FurAffinityFile {
site_info: Some(SiteInfo::FurAffinity {
file_id: row.get("file_id"),
})),
}),
searched_hash: None,
})
.collect();

View File

@ -9,7 +9,6 @@ mod handlers;
mod models;
mod types;
mod utils;
mod video;
use warp::Filter;
@ -196,13 +195,3 @@ async fn main() {
}
type Pool = bb8::Pool<bb8_postgres::PostgresConnectionManager<tokio_postgres::NoTls>>;
fn get_hasher() -> img_hash::Hasher<[u8; 8]> {
use img_hash::{HashAlg::Gradient, HasherConfig};
HasherConfig::with_bytes_type::<[u8; 8]>()
.hash_alg(Gradient)
.hash_size(8, 8)
.preproc_dct()
.to_hasher()
}

View File

@ -3,6 +3,8 @@ use crate::utils::extract_rows;
use crate::{Pool, Tree};
use tracing_futures::Instrument;
use fuzzysearch_common::types::SearchResult;
pub type DB<'a> =
&'a bb8::PooledConnection<'a, bb8_postgres::PostgresConnectionManager<tokio_postgres::NoTls>>;
@ -48,7 +50,7 @@ pub async fn image_query(
hashes: Vec<i64>,
distance: i64,
hash: Option<Vec<u8>>,
) -> Result<Vec<File>, tokio_postgres::Error> {
) -> Result<Vec<SearchResult>, tokio_postgres::Error> {
let mut results = image_query_sync(pool, tree, hashes, distance, hash);
let mut matches = Vec::new();
@ -66,8 +68,8 @@ pub fn image_query_sync(
hashes: Vec<i64>,
distance: i64,
hash: Option<Vec<u8>>,
) -> tokio::sync::mpsc::Receiver<Result<Vec<File>, tokio_postgres::Error>> {
let (tx, rx) = tokio::sync::mpsc::channel(50);
) -> tokio::sync::mpsc::Receiver<Result<Vec<SearchResult>, tokio_postgres::Error>> {
let (mut tx, rx) = tokio::sync::mpsc::channel(50);
tokio::spawn(async move {
let db = pool.get().await.unwrap();

View File

@ -1,5 +1,7 @@
use serde::{Deserialize, Serialize};
use fuzzysearch_common::types::SearchResult;
/// An API key representation from the database.alloc
///
/// May contain information about the owner, always has rate limit information.
@ -23,52 +25,6 @@ pub enum RateLimit {
Available(i16),
}
/// A general type for every file.
#[derive(Debug, Default, Serialize)]
pub struct File {
pub id: i32,
pub site_id: i64,
pub site_id_str: String,
pub url: String,
pub filename: String,
pub artists: Option<Vec<String>>,
#[serde(skip_serializing_if = "Option::is_none")]
#[serde(flatten)]
pub site_info: Option<SiteInfo>,
#[serde(skip_serializing_if = "Option::is_none")]
pub hash: Option<i64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub distance: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub searched_hash: Option<i64>,
}
#[derive(Debug, Serialize)]
#[serde(tag = "site", content = "site_info")]
pub enum SiteInfo {
FurAffinity(FurAffinityFile),
#[serde(rename = "e621")]
E621(E621File),
Twitter,
}
/// Information about a file hosted on FurAffinity.
#[derive(Debug, Serialize)]
pub struct FurAffinityFile {
pub file_id: i32,
}
/// Information about a file hosted on e621.
#[derive(Debug, Serialize)]
pub struct E621File {
pub sources: Option<Vec<String>>,
}
#[derive(Debug, Deserialize)]
pub struct FileSearchOpts {
pub id: Option<i32>,
@ -93,7 +49,7 @@ pub enum ImageSearchType {
#[derive(Debug, Serialize)]
pub struct ImageSimilarity {
pub hash: i64,
pub matches: Vec<File>,
pub matches: Vec<SearchResult>,
}
#[derive(Serialize)]

View File

@ -1,6 +1,8 @@
use crate::models::DB;
use crate::types::*;
use fuzzysearch_common::types::{SearchResult, SiteInfo};
#[macro_export]
macro_rules! rate_limit {
($api_key:expr, $db:expr, $limit:tt, $group:expr) => {
@ -66,7 +68,7 @@ pub async fn update_rate_limit(
pub fn extract_rows<'a>(
rows: Vec<tokio_postgres::Row>,
hash: Option<&'a [u8]>,
) -> impl IntoIterator<Item = File> + 'a {
) -> impl IntoIterator<Item = SearchResult> + 'a {
rows.into_iter().map(move |row| {
let dbhash: i64 = row.get("hash");
let dbbytes = dbhash.to_be_bytes();
@ -80,16 +82,16 @@ pub fn extract_rows<'a>(
let (site_id, site_info) = if let Some(fa_id) = furaffinity_id {
(
fa_id as i64,
Some(SiteInfo::FurAffinity(FurAffinityFile {
Some(SiteInfo::FurAffinity {
file_id: row.get("file_id"),
})),
}),
)
} else if let Some(e6_id) = e621_id {
(
e6_id as i64,
Some(SiteInfo::E621(E621File {
Some(SiteInfo::E621 {
sources: row.get("sources"),
})),
}),
)
} else if let Some(t_id) = twitter_id {
(t_id, Some(SiteInfo::Twitter))
@ -97,7 +99,7 @@ pub fn extract_rows<'a>(
(-1, None)
};
File {
SearchResult {
id: row.get("id"),
site_id,
site_info,