mirror of
https://github.com/Syfaro/fuzzysearch.git
synced 2024-11-05 06:23:08 +00:00
Remove hashes table, add Weasyl lookups (#7)
This commit is contained in:
parent
bc278441b6
commit
bd1238b34a
@ -26,8 +26,6 @@ impl std::str::FromStr for Rating {
|
||||
/// A general type for every result in a search.
|
||||
#[derive(Clone, Debug, Default, Deserialize, Serialize)]
|
||||
pub struct SearchResult {
|
||||
pub id: i32,
|
||||
|
||||
pub site_id: i64,
|
||||
pub site_id_str: String,
|
||||
|
||||
@ -60,4 +58,5 @@ pub enum SiteInfo {
|
||||
sources: Option<Vec<String>>,
|
||||
},
|
||||
Twitter,
|
||||
Weasyl,
|
||||
}
|
||||
|
@ -23,81 +23,25 @@
|
||||
]
|
||||
}
|
||||
},
|
||||
"1bd0057782de5a3b41f90081a31d24d14bb70299391050c3404742a6d2915d9e": {
|
||||
"query": "SELECT\n hashes.id,\n hashes.hash,\n hashes.furaffinity_id,\n hashes.e621_id,\n hashes.twitter_id,\n CASE\n WHEN furaffinity_id IS NOT NULL THEN (f.url)\n WHEN e621_id IS NOT NULL THEN (e.data->'file'->>'url')\n WHEN twitter_id IS NOT NULL THEN (tm.url)\n END url,\n CASE\n WHEN furaffinity_id IS NOT NULL THEN (f.filename)\n WHEN e621_id IS NOT NULL THEN ((e.data->'file'->>'md5') || '.' || (e.data->'file'->>'ext'))\n WHEN twitter_id IS NOT NULL THEN (SELECT split_part(split_part(tm.url, '/', 5), ':', 1))\n END filename,\n CASE\n WHEN furaffinity_id IS NOT NULL THEN (ARRAY(SELECT f.name))\n WHEN e621_id IS NOT NULL THEN ARRAY(SELECT jsonb_array_elements_text(e.data->'tags'->'artist'))\n WHEN twitter_id IS NOT NULL THEN ARRAY(SELECT tw.data->'user'->>'screen_name')\n END artists,\n CASE\n WHEN furaffinity_id IS NOT NULL THEN (f.file_id)\n END file_id,\n CASE\n WHEN e621_id IS NOT NULL THEN ARRAY(SELECT jsonb_array_elements_text(e.data->'sources'))\n END sources,\n CASE\n WHEN furaffinity_id IS NOT NULL THEN (f.rating)\n WHEN e621_id IS NOT NULL THEN (e.data->>'rating')\n WHEN twitter_id IS NOT NULL THEN\n CASE\n WHEN (tw.data->'possibly_sensitive')::boolean IS true THEN 'adult'\n WHEN (tw.data->'possibly_sensitive')::boolean IS false THEN 'general'\n END\n END rating\n FROM\n hashes\n LEFT JOIN LATERAL (\n SELECT *\n FROM submission\n JOIN artist ON submission.artist_id = artist.id\n WHERE submission.id = hashes.furaffinity_id\n ) f ON hashes.furaffinity_id IS NOT NULL\n LEFT JOIN LATERAL (\n SELECT *\n FROM e621\n WHERE e621.id = hashes.e621_id\n ) e ON hashes.e621_id IS NOT NULL\n LEFT JOIN LATERAL (\n SELECT *\n FROM tweet\n WHERE tweet.id = hashes.twitter_id\n ) tw ON hashes.twitter_id IS NOT NULL\n LEFT JOIN LATERAL (\n SELECT *\n FROM tweet_media\n WHERE\n tweet_media.tweet_id = hashes.twitter_id AND\n tweet_media.hash <@ (hashes.hash, 0)\n LIMIT 1\n ) tm ON hashes.twitter_id IS NOT NULL\n WHERE hashes.id = $1",
|
||||
"1fc936eeccf8260ca00d7758efdbbb74bd58900107d9dc05f881555f068238c6": {
|
||||
"query": "SELECT id, hash_int hash FROM submission WHERE hash_int IS NOT NULL\n UNION ALL\n SELECT id, hash FROM e621 WHERE hash IS NOT NULL\n UNION ALL\n SELECT tweet_id, hash FROM tweet_media WHERE hash IS NOT NULL\n UNION ALL\n SELECT id, hash FROM weasyl WHERE hash IS NOT NULL",
|
||||
"describe": {
|
||||
"columns": [
|
||||
{
|
||||
"ordinal": 0,
|
||||
"name": "id",
|
||||
"type_info": "Int4"
|
||||
"type_info": "Int8"
|
||||
},
|
||||
{
|
||||
"ordinal": 1,
|
||||
"name": "hash",
|
||||
"type_info": "Int8"
|
||||
},
|
||||
{
|
||||
"ordinal": 2,
|
||||
"name": "furaffinity_id",
|
||||
"type_info": "Int4"
|
||||
},
|
||||
{
|
||||
"ordinal": 3,
|
||||
"name": "e621_id",
|
||||
"type_info": "Int4"
|
||||
},
|
||||
{
|
||||
"ordinal": 4,
|
||||
"name": "twitter_id",
|
||||
"type_info": "Int8"
|
||||
},
|
||||
{
|
||||
"ordinal": 5,
|
||||
"name": "url",
|
||||
"type_info": "Text"
|
||||
},
|
||||
{
|
||||
"ordinal": 6,
|
||||
"name": "filename",
|
||||
"type_info": "Text"
|
||||
},
|
||||
{
|
||||
"ordinal": 7,
|
||||
"name": "artists",
|
||||
"type_info": "TextArray"
|
||||
},
|
||||
{
|
||||
"ordinal": 8,
|
||||
"name": "file_id",
|
||||
"type_info": "Int4"
|
||||
},
|
||||
{
|
||||
"ordinal": 9,
|
||||
"name": "sources",
|
||||
"type_info": "TextArray"
|
||||
},
|
||||
{
|
||||
"ordinal": 10,
|
||||
"name": "rating",
|
||||
"type_info": "Bpchar"
|
||||
}
|
||||
],
|
||||
"parameters": {
|
||||
"Left": [
|
||||
"Int4"
|
||||
]
|
||||
"Left": []
|
||||
},
|
||||
"nullable": [
|
||||
false,
|
||||
false,
|
||||
true,
|
||||
true,
|
||||
true,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null
|
||||
]
|
||||
@ -173,27 +117,71 @@
|
||||
]
|
||||
}
|
||||
},
|
||||
"fe60be66b2d8a8f02b3bfe06d1f0e57e4bb07e80cba1b379a5f17f6cbd8b075c": {
|
||||
"query": "SELECT id, hash FROM hashes",
|
||||
"f798404b69897f47a732fb6a8e08e843f233803aea5d5a7f04087619dbb55626": {
|
||||
"query": "SELECT\n 'FurAffinity' site,\n submission.id,\n submission.hash_int hash,\n submission.url,\n submission.filename,\n ARRAY(SELECT artist.name) artists,\n submission.file_id,\n null sources,\n submission.rating\n FROM submission\n JOIN artist ON submission.artist_id = artist.id\n WHERE hash_int <@ ($1, 0)\n UNION\n SELECT\n 'e621' site,\n e621.id,\n e621.hash,\n e621.data->'file'->>'url' url,\n (e621.data->'file'->>'md5') || '.' || (e621.data->'file'->>'ext') filename,\n ARRAY(SELECT jsonb_array_elements_text(e621.data->'tags'->'artist')) artists,\n null file_id,\n ARRAY(SELECT jsonb_array_elements_text(e621.data->'sources')) sources,\n e621.data->>'rating' rating\n FROM e621\n WHERE hash <@ ($1, 0)\n UNION\n SELECT\n 'Weasyl' site,\n weasyl.id,\n weasyl.hash,\n weasyl.data->>'link' url,\n null filename,\n ARRAY(SELECT weasyl.data->>'owner_login') artists,\n null file_id,\n null sources,\n weasyl.data->>'rating' rating\n FROM weasyl\n WHERE hash <@ ($1, 0)\n UNION\n SELECT\n 'Twitter' site,\n tweet.id,\n tweet_media.hash,\n tweet_media.url,\n null filename,\n ARRAY(SELECT tweet.data->'user'->>'screen_name') artists,\n null file_id,\n null sources,\n CASE\n WHEN (tweet.data->'possibly_sensitive')::boolean IS true THEN 'adult'\n WHEN (tweet.data->'possibly_sensitive')::boolean IS false THEN 'general'\n END rating\n FROM tweet_media\n JOIN tweet ON tweet_media.tweet_id = tweet.id\n WHERE hash <@ ($1, 0)",
|
||||
"describe": {
|
||||
"columns": [
|
||||
{
|
||||
"ordinal": 0,
|
||||
"name": "id",
|
||||
"type_info": "Int4"
|
||||
"name": "site",
|
||||
"type_info": "Text"
|
||||
},
|
||||
{
|
||||
"ordinal": 1,
|
||||
"name": "id",
|
||||
"type_info": "Int8"
|
||||
},
|
||||
{
|
||||
"ordinal": 2,
|
||||
"name": "hash",
|
||||
"type_info": "Int8"
|
||||
},
|
||||
{
|
||||
"ordinal": 3,
|
||||
"name": "url",
|
||||
"type_info": "Text"
|
||||
},
|
||||
{
|
||||
"ordinal": 4,
|
||||
"name": "filename",
|
||||
"type_info": "Text"
|
||||
},
|
||||
{
|
||||
"ordinal": 5,
|
||||
"name": "artists",
|
||||
"type_info": "TextArray"
|
||||
},
|
||||
{
|
||||
"ordinal": 6,
|
||||
"name": "file_id",
|
||||
"type_info": "Int4"
|
||||
},
|
||||
{
|
||||
"ordinal": 7,
|
||||
"name": "sources",
|
||||
"type_info": "TextArray"
|
||||
},
|
||||
{
|
||||
"ordinal": 8,
|
||||
"name": "rating",
|
||||
"type_info": "Bpchar"
|
||||
}
|
||||
],
|
||||
"parameters": {
|
||||
"Left": []
|
||||
"Left": [
|
||||
"Int8"
|
||||
]
|
||||
},
|
||||
"nullable": [
|
||||
false,
|
||||
false
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null
|
||||
]
|
||||
}
|
||||
}
|
||||
|
@ -418,7 +418,6 @@ pub async fn search_file(
|
||||
|
||||
let matches: Result<Vec<SearchResult>, _> = query
|
||||
.map(|row| SearchResult {
|
||||
id: row.get("hash_id"),
|
||||
site_id: row.get::<i32, _>("id") as i64,
|
||||
site_id_str: row.get::<i32, _>("id").to_string(),
|
||||
url: row.get("url"),
|
||||
|
@ -14,14 +14,19 @@ type Tree = Arc<RwLock<bk_tree::BKTree<Node, Hamming>>>;
|
||||
type Pool = sqlx::PgPool;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Node {
|
||||
id: i32,
|
||||
hash: [u8; 8],
|
||||
}
|
||||
pub struct Node(pub [u8; 8]);
|
||||
|
||||
impl Node {
|
||||
pub fn new(hash: i64) -> Self {
|
||||
Self(hash.to_be_bytes())
|
||||
}
|
||||
|
||||
pub fn query(hash: [u8; 8]) -> Self {
|
||||
Self { id: -1, hash }
|
||||
Self(hash)
|
||||
}
|
||||
|
||||
pub fn num(&self) -> i64 {
|
||||
i64::from_be_bytes(self.0)
|
||||
}
|
||||
}
|
||||
|
||||
@ -29,7 +34,7 @@ pub struct Hamming;
|
||||
|
||||
impl bk_tree::Metric<Node> for Hamming {
|
||||
fn distance(&self, a: &Node, b: &Node) -> u64 {
|
||||
hamming::distance_fast(&a.hash, &b.hash).unwrap()
|
||||
hamming::distance_fast(&a.0, &b.0).unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
@ -167,7 +172,6 @@ async fn serve_metrics() {
|
||||
|
||||
#[derive(serde::Deserialize)]
|
||||
struct HashRow {
|
||||
id: i32,
|
||||
hash: i64,
|
||||
}
|
||||
|
||||
@ -176,13 +180,25 @@ async fn create_tree(conn: &Pool) -> bk_tree::BKTree<Node, Hamming> {
|
||||
|
||||
let mut tree = bk_tree::BKTree::new(Hamming);
|
||||
|
||||
let mut rows = sqlx::query_as!(HashRow, "SELECT id, hash FROM hashes").fetch(conn);
|
||||
let mut rows = sqlx::query!(
|
||||
"SELECT id, hash_int hash FROM submission WHERE hash_int IS NOT NULL
|
||||
UNION ALL
|
||||
SELECT id, hash FROM e621 WHERE hash IS NOT NULL
|
||||
UNION ALL
|
||||
SELECT tweet_id, hash FROM tweet_media WHERE hash IS NOT NULL
|
||||
UNION ALL
|
||||
SELECT id, hash FROM weasyl WHERE hash IS NOT NULL"
|
||||
)
|
||||
.fetch(conn);
|
||||
|
||||
while let Some(row) = rows.try_next().await.expect("Unable to get row") {
|
||||
tree.add(Node {
|
||||
id: row.id,
|
||||
hash: row.hash.to_be_bytes(),
|
||||
})
|
||||
if let Some(hash) = row.hash {
|
||||
if tree.find_exact(&Node::new(hash)).is_some() {
|
||||
continue;
|
||||
}
|
||||
|
||||
tree.add(Node::new(hash));
|
||||
}
|
||||
}
|
||||
|
||||
tree
|
||||
@ -207,13 +223,16 @@ async fn load_updates(conn: Pool, tree: Tree) {
|
||||
.expect("Unable to recv notification")
|
||||
{
|
||||
let payload: HashRow = serde_json::from_str(notification.payload()).unwrap();
|
||||
tracing::debug!(id = payload.id, "Adding new hash to tree");
|
||||
tracing::debug!(hash = payload.hash, "Adding new hash to tree");
|
||||
|
||||
let lock = tree.read().await;
|
||||
if lock.find_exact(&Node::new(payload.hash)).is_some() {
|
||||
continue;
|
||||
}
|
||||
drop(lock);
|
||||
|
||||
let mut lock = tree.write().await;
|
||||
lock.add(Node {
|
||||
id: payload.id,
|
||||
hash: payload.hash.to_be_bytes(),
|
||||
});
|
||||
lock.add(Node(payload.hash.to_be_bytes()));
|
||||
drop(lock);
|
||||
}
|
||||
|
||||
|
@ -1,9 +1,12 @@
|
||||
use std::collections::HashSet;
|
||||
|
||||
use lazy_static::lazy_static;
|
||||
use prometheus::{register_histogram, Histogram};
|
||||
use tracing_futures::Instrument;
|
||||
|
||||
use crate::types::*;
|
||||
use crate::{Pool, Tree};
|
||||
use futures::TryStreamExt;
|
||||
use fuzzysearch_common::types::{SearchResult, SiteInfo};
|
||||
|
||||
lazy_static! {
|
||||
@ -73,130 +76,125 @@ pub fn image_query_sync(
|
||||
) -> tokio::sync::mpsc::Receiver<Result<Vec<SearchResult>, sqlx::Error>> {
|
||||
let (tx, rx) = tokio::sync::mpsc::channel(50);
|
||||
|
||||
tokio::spawn(async move {
|
||||
let db = pool;
|
||||
tokio::spawn(
|
||||
async move {
|
||||
let db = pool;
|
||||
|
||||
for query_hash in hashes {
|
||||
let mut seen = std::collections::HashSet::new();
|
||||
for query_hash in hashes {
|
||||
tracing::trace!(query_hash, "Evaluating hash");
|
||||
|
||||
let _timer = IMAGE_LOOKUP_DURATION.start_timer();
|
||||
let mut seen: HashSet<[u8; 8]> = HashSet::new();
|
||||
|
||||
let node = crate::Node::query(query_hash.to_be_bytes());
|
||||
let lock = tree.read().await;
|
||||
let items = lock.find(&node, distance as u64);
|
||||
let _timer = IMAGE_LOOKUP_DURATION.start_timer();
|
||||
|
||||
for (dist, item) in items {
|
||||
if seen.contains(&item.id) {
|
||||
continue;
|
||||
}
|
||||
seen.insert(item.id);
|
||||
let node = crate::Node::query(query_hash.to_be_bytes());
|
||||
let lock = tree.read().await;
|
||||
let items = lock.find(&node, distance as u64);
|
||||
|
||||
let _timer = IMAGE_QUERY_DURATION.start_timer();
|
||||
for (dist, item) in items {
|
||||
if seen.contains(&item.0) {
|
||||
tracing::trace!("Already searched for hash");
|
||||
continue;
|
||||
}
|
||||
seen.insert(item.0);
|
||||
|
||||
let row = sqlx::query!("SELECT
|
||||
hashes.id,
|
||||
hashes.hash,
|
||||
hashes.furaffinity_id,
|
||||
hashes.e621_id,
|
||||
hashes.twitter_id,
|
||||
CASE
|
||||
WHEN furaffinity_id IS NOT NULL THEN (f.url)
|
||||
WHEN e621_id IS NOT NULL THEN (e.data->'file'->>'url')
|
||||
WHEN twitter_id IS NOT NULL THEN (tm.url)
|
||||
END url,
|
||||
CASE
|
||||
WHEN furaffinity_id IS NOT NULL THEN (f.filename)
|
||||
WHEN e621_id IS NOT NULL THEN ((e.data->'file'->>'md5') || '.' || (e.data->'file'->>'ext'))
|
||||
WHEN twitter_id IS NOT NULL THEN (SELECT split_part(split_part(tm.url, '/', 5), ':', 1))
|
||||
END filename,
|
||||
CASE
|
||||
WHEN furaffinity_id IS NOT NULL THEN (ARRAY(SELECT f.name))
|
||||
WHEN e621_id IS NOT NULL THEN ARRAY(SELECT jsonb_array_elements_text(e.data->'tags'->'artist'))
|
||||
WHEN twitter_id IS NOT NULL THEN ARRAY(SELECT tw.data->'user'->>'screen_name')
|
||||
END artists,
|
||||
CASE
|
||||
WHEN furaffinity_id IS NOT NULL THEN (f.file_id)
|
||||
END file_id,
|
||||
CASE
|
||||
WHEN e621_id IS NOT NULL THEN ARRAY(SELECT jsonb_array_elements_text(e.data->'sources'))
|
||||
END sources,
|
||||
CASE
|
||||
WHEN furaffinity_id IS NOT NULL THEN (f.rating)
|
||||
WHEN e621_id IS NOT NULL THEN (e.data->>'rating')
|
||||
WHEN twitter_id IS NOT NULL THEN
|
||||
let _timer = IMAGE_QUERY_DURATION.start_timer();
|
||||
|
||||
tracing::debug!(num = item.num(), "Searching database for hash in tree");
|
||||
|
||||
let mut row = sqlx::query!(
|
||||
"SELECT
|
||||
'FurAffinity' site,
|
||||
submission.id,
|
||||
submission.hash_int hash,
|
||||
submission.url,
|
||||
submission.filename,
|
||||
ARRAY(SELECT artist.name) artists,
|
||||
submission.file_id,
|
||||
null sources,
|
||||
submission.rating
|
||||
FROM submission
|
||||
JOIN artist ON submission.artist_id = artist.id
|
||||
WHERE hash_int <@ ($1, 0)
|
||||
UNION
|
||||
SELECT
|
||||
'e621' site,
|
||||
e621.id,
|
||||
e621.hash,
|
||||
e621.data->'file'->>'url' url,
|
||||
(e621.data->'file'->>'md5') || '.' || (e621.data->'file'->>'ext') filename,
|
||||
ARRAY(SELECT jsonb_array_elements_text(e621.data->'tags'->'artist')) artists,
|
||||
null file_id,
|
||||
ARRAY(SELECT jsonb_array_elements_text(e621.data->'sources')) sources,
|
||||
e621.data->>'rating' rating
|
||||
FROM e621
|
||||
WHERE hash <@ ($1, 0)
|
||||
UNION
|
||||
SELECT
|
||||
'Weasyl' site,
|
||||
weasyl.id,
|
||||
weasyl.hash,
|
||||
weasyl.data->>'link' url,
|
||||
null filename,
|
||||
ARRAY(SELECT weasyl.data->>'owner_login') artists,
|
||||
null file_id,
|
||||
null sources,
|
||||
weasyl.data->>'rating' rating
|
||||
FROM weasyl
|
||||
WHERE hash <@ ($1, 0)
|
||||
UNION
|
||||
SELECT
|
||||
'Twitter' site,
|
||||
tweet.id,
|
||||
tweet_media.hash,
|
||||
tweet_media.url,
|
||||
null filename,
|
||||
ARRAY(SELECT tweet.data->'user'->>'screen_name') artists,
|
||||
null file_id,
|
||||
null sources,
|
||||
CASE
|
||||
WHEN (tw.data->'possibly_sensitive')::boolean IS true THEN 'adult'
|
||||
WHEN (tw.data->'possibly_sensitive')::boolean IS false THEN 'general'
|
||||
END
|
||||
END rating
|
||||
FROM
|
||||
hashes
|
||||
LEFT JOIN LATERAL (
|
||||
SELECT *
|
||||
FROM submission
|
||||
JOIN artist ON submission.artist_id = artist.id
|
||||
WHERE submission.id = hashes.furaffinity_id
|
||||
) f ON hashes.furaffinity_id IS NOT NULL
|
||||
LEFT JOIN LATERAL (
|
||||
SELECT *
|
||||
FROM e621
|
||||
WHERE e621.id = hashes.e621_id
|
||||
) e ON hashes.e621_id IS NOT NULL
|
||||
LEFT JOIN LATERAL (
|
||||
SELECT *
|
||||
FROM tweet
|
||||
WHERE tweet.id = hashes.twitter_id
|
||||
) tw ON hashes.twitter_id IS NOT NULL
|
||||
LEFT JOIN LATERAL (
|
||||
SELECT *
|
||||
FROM tweet_media
|
||||
WHERE
|
||||
tweet_media.tweet_id = hashes.twitter_id AND
|
||||
tweet_media.hash <@ (hashes.hash, 0)
|
||||
LIMIT 1
|
||||
) tm ON hashes.twitter_id IS NOT NULL
|
||||
WHERE hashes.id = $1", item.id).map(|row| {
|
||||
let (site_id, site_info) = if let Some(fa_id) = row.furaffinity_id {
|
||||
(
|
||||
fa_id as i64,
|
||||
Some(SiteInfo::FurAffinity {
|
||||
file_id: row.file_id.unwrap(),
|
||||
})
|
||||
)
|
||||
} else if let Some(e621_id) = row.e621_id {
|
||||
(
|
||||
e621_id as i64,
|
||||
Some(SiteInfo::E621 {
|
||||
sources: row.sources,
|
||||
})
|
||||
)
|
||||
} else if let Some(twitter_id) = row.twitter_id {
|
||||
(twitter_id, Some(SiteInfo::Twitter))
|
||||
} else {
|
||||
(-1, None)
|
||||
};
|
||||
WHEN (tweet.data->'possibly_sensitive')::boolean IS true THEN 'adult'
|
||||
WHEN (tweet.data->'possibly_sensitive')::boolean IS false THEN 'general'
|
||||
END rating
|
||||
FROM tweet_media
|
||||
JOIN tweet ON tweet_media.tweet_id = tweet.id
|
||||
WHERE hash <@ ($1, 0)",
|
||||
&item.num()
|
||||
)
|
||||
.map(|row| {
|
||||
let site_info = match row.site.as_deref() {
|
||||
Some("FurAffinity") => SiteInfo::FurAffinity { file_id: row.file_id.unwrap_or(-1) },
|
||||
Some("e621") => SiteInfo::E621 { sources: row.sources },
|
||||
Some("Twitter") => SiteInfo::Twitter,
|
||||
Some("Weasyl") => SiteInfo::Weasyl,
|
||||
_ => panic!("Got unknown site"),
|
||||
};
|
||||
|
||||
let file = SearchResult {
|
||||
id: row.id,
|
||||
site_id,
|
||||
site_info,
|
||||
rating: row.rating.and_then(|rating| rating.parse().ok()),
|
||||
site_id_str: site_id.to_string(),
|
||||
url: row.url.unwrap_or_default(),
|
||||
hash: Some(row.hash),
|
||||
distance: Some(dist),
|
||||
artists: row.artists,
|
||||
filename: row.filename.unwrap_or_default(),
|
||||
searched_hash: Some(query_hash),
|
||||
};
|
||||
let file = SearchResult {
|
||||
site_id: row.id.unwrap_or_default(),
|
||||
site_info: Some(site_info),
|
||||
rating: row.rating.and_then(|rating| rating.parse().ok()),
|
||||
site_id_str: row.id.unwrap_or_default().to_string(),
|
||||
url: row.url.unwrap_or_default(),
|
||||
hash: row.hash,
|
||||
distance: Some(dist),
|
||||
artists: row.artists,
|
||||
filename: row.filename.unwrap_or_default(),
|
||||
searched_hash: Some(query_hash),
|
||||
};
|
||||
|
||||
vec![file]
|
||||
}).fetch_one(&db).await;
|
||||
vec![file]
|
||||
})
|
||||
.fetch(&db);
|
||||
|
||||
tx.send(row).await.unwrap();
|
||||
while let Some(row) = row.try_next().await.ok().flatten() {
|
||||
tx.send(Ok(row)).await.unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}.in_current_span());
|
||||
.in_current_span(),
|
||||
);
|
||||
|
||||
rx
|
||||
}
|
||||
|
4
migrations/20210419174900_index_all_hashes.down.sql
Normal file
4
migrations/20210419174900_index_all_hashes.down.sql
Normal file
@ -0,0 +1,4 @@
|
||||
DROP INDEX bk_furaffinity_hash;
|
||||
DROP INDEX bk_e621_hash;
|
||||
DROP INDEX bk_twitter_hash;
|
||||
DROP INDEX bk_weasyl_hash;
|
4
migrations/20210419174900_index_all_hashes.up.sql
Normal file
4
migrations/20210419174900_index_all_hashes.up.sql
Normal file
@ -0,0 +1,4 @@
|
||||
CREATE INDEX bk_furaffinity_hash ON submission USING spgist (hash_int bktree_ops);
|
||||
CREATE INDEX bk_e621_hash ON e621 USING spgist (hash bktree_ops);
|
||||
CREATE INDEX bk_twitter_hash ON tweet_media USING spgist (hash bktree_ops);
|
||||
CREATE INDEX bk_weasyl_hash ON weasyl USING spgist (hash bktree_ops);
|
86
migrations/20210419202830_remove_old_index.down.sql
Normal file
86
migrations/20210419202830_remove_old_index.down.sql
Normal file
@ -0,0 +1,86 @@
|
||||
DROP FUNCTION update_notify_furaffinity CASCADE;
|
||||
DROP FUNCTION update_notify_others CASCADE;
|
||||
|
||||
CREATE TABLE hashes (
|
||||
id SERIAL PRIMARY KEY,
|
||||
hash BIGINT NOT NULL,
|
||||
furaffinity_id INTEGER UNIQUE REFERENCES submission (id),
|
||||
e621_id INTEGER UNIQUE REFERENCES e621 (id),
|
||||
twitter_id BIGINT REFERENCES tweet (id)
|
||||
);
|
||||
|
||||
CREATE FUNCTION hashes_insert_furaffinity()
|
||||
RETURNS trigger
|
||||
LANGUAGE plpgsql
|
||||
AS $$
|
||||
BEGIN
|
||||
if NEW.hash_int IS NOT NULL THEN
|
||||
INSERT INTO hashes (furaffinity_id, hash) VALUES (NEW.id, NEW.hash_int);
|
||||
END IF;
|
||||
|
||||
RETURN NEW;
|
||||
END;
|
||||
$$;
|
||||
|
||||
CREATE FUNCTION hashes_insert_e621()
|
||||
RETURNS trigger
|
||||
LANGUAGE plpgsql
|
||||
AS $$
|
||||
BEGIN
|
||||
IF NEW.hash IS NOT NULL THEN
|
||||
IF exists(SELECT 1 FROM hashes WHERE hashes.e621_id = NEW.id) THEN
|
||||
UPDATE hashes SET hashes.hash = NEW.hash WHERE e621_id = NEW.id;
|
||||
ELSE
|
||||
INSERT INTO hashes (e621_id, hash) VALUES (NEW.id, NEW.hash);
|
||||
END IF;
|
||||
END IF;
|
||||
|
||||
RETURN NEW;
|
||||
END;
|
||||
$$;
|
||||
|
||||
CREATE FUNCTION hashes_insert_twitter()
|
||||
RETURNS trigger
|
||||
LANGUAGE plpgsql
|
||||
AS $$
|
||||
BEGIN
|
||||
IF NEW.hash IS NOT NULL THEN
|
||||
INSERT INTO hashes (twitter_id, hash) VALUES (NEW.tweet_id, NEW.hash);
|
||||
END IF;
|
||||
|
||||
RETURN NEW;
|
||||
END;
|
||||
$$;
|
||||
|
||||
CREATE TRIGGER hashes_insert_furaffinity AFTER INSERT ON submission
|
||||
FOR EACH ROW EXECUTE PROCEDURE hashes_insert_furaffinity();
|
||||
CREATE TRIGGER hashes_insert_e621 AFTER INSERT ON e621
|
||||
FOR EACH ROW EXECUTE PROCEDURE hashes_insert_e621();
|
||||
CREATE TRIGGER hashes_insert_twitter AFTER INSERT ON tweet_media
|
||||
FOR EACH ROW EXECUTE PROCEDURE hashes_insert_twitter();
|
||||
|
||||
INSERT INTO hashes (furaffinity_id, hash)
|
||||
SELECT id, hash_int FROM submission WHERE hash_int IS NOT NULL
|
||||
ON CONFLICT DO NOTHING;
|
||||
INSERT INTO hashes (e621_id, hash)
|
||||
SELECT id, hash FROM e621 WHERE hash IS NOT NULL
|
||||
ON CONFLICT DO NOTHING;
|
||||
INSERT INTO hashes (twitter_id, hash)
|
||||
SELECT tweet_id, hash FROM tweet_media WHERE hash IS NOT NULL
|
||||
ON CONFLICT DO NOTHING;
|
||||
|
||||
CREATE INDEX ON hashes USING spgist (hash bktree_ops);
|
||||
|
||||
CREATE FUNCTION hashes_notify_inserted()
|
||||
RETURNS trigger
|
||||
LANGUAGE plpgsql
|
||||
AS $$
|
||||
BEGIN
|
||||
PERFORM pg_notify('fuzzysearch_hash_added'::text,
|
||||
json_build_object('id', NEW.id, 'hash', NEW.hash)::text);
|
||||
RETURN NEW;
|
||||
END;
|
||||
$$;
|
||||
|
||||
CREATE TRIGGER hashes_notify_inserted AFTER INSERT ON hashes
|
||||
FOR EACH ROW EXECUTE PROCEDURE hashes_notify_inserted();
|
44
migrations/20210419202830_remove_old_index.up.sql
Normal file
44
migrations/20210419202830_remove_old_index.up.sql
Normal file
@ -0,0 +1,44 @@
|
||||
DROP TABLE hashes;
|
||||
DROP FUNCTION hashes_notify_inserted CASCADE;
|
||||
DROP FUNCTION hashes_insert_furaffinity CASCADE;
|
||||
DROP FUNCTION hashes_insert_e621 CASCADE;
|
||||
DROP FUNCTION hashes_insert_twitter CASCADE;
|
||||
|
||||
CREATE FUNCTION update_notify_furaffinity()
|
||||
RETURNS trigger
|
||||
LANGUAGE plpgsql
|
||||
AS $$
|
||||
BEGIN
|
||||
if NEW.hash_int IS NOT NULL THEN
|
||||
PERFORM pg_notify('fuzzysearch_hash_added'::text,
|
||||
json_build_object('hash', NEW.hash_int)::text);
|
||||
RETURN NEW;
|
||||
END IF;
|
||||
|
||||
RETURN NEW;
|
||||
END;
|
||||
$$;
|
||||
|
||||
CREATE FUNCTION update_notify_others()
|
||||
RETURNS trigger
|
||||
LANGUAGE plpgsql
|
||||
AS $$
|
||||
BEGIN
|
||||
if NEW.hash IS NOT NULL THEN
|
||||
PERFORM pg_notify('fuzzysearch_hash_added'::text,
|
||||
json_build_object('hash', NEW.hash)::text);
|
||||
RETURN NEW;
|
||||
END IF;
|
||||
|
||||
RETURN NEW;
|
||||
END;
|
||||
$$;
|
||||
|
||||
CREATE TRIGGER update_notify_furaffinity AFTER INSERT OR UPDATE ON submission
|
||||
FOR EACH ROW EXECUTE PROCEDURE update_notify_furaffinity();
|
||||
CREATE TRIGGER update_notify_e621 AFTER INSERT OR UPDATE ON e621
|
||||
FOR EACH ROW EXECUTE PROCEDURE update_notify_others();
|
||||
CREATE TRIGGER update_notify_twitter AFTER INSERT OR UPDATE ON tweet_media
|
||||
FOR EACH ROW EXECUTE PROCEDURE update_notify_others();
|
||||
CREATE TRIGGER update_notify_weasyl AFTER INSERT OR UPDATE ON weasyl
|
||||
FOR EACH ROW EXECUTE PROCEDURE update_notify_others();
|
Loading…
Reference in New Issue
Block a user