From bd1238b34abd43c66dbca15375446b6a4ccd9e53 Mon Sep 17 00:00:00 2001 From: Syfaro Date: Mon, 19 Apr 2021 17:22:40 -0400 Subject: [PATCH] Remove hashes table, add Weasyl lookups (#7) --- fuzzysearch-common/src/types.rs | 3 +- fuzzysearch/sqlx-data.json | 122 +++++----- fuzzysearch/src/handlers.rs | 1 - fuzzysearch/src/main.rs | 53 +++-- fuzzysearch/src/models.rs | 222 +++++++++--------- .../20210419174900_index_all_hashes.down.sql | 4 + .../20210419174900_index_all_hashes.up.sql | 4 + .../20210419202830_remove_old_index.down.sql | 86 +++++++ .../20210419202830_remove_old_index.up.sql | 44 ++++ 9 files changed, 340 insertions(+), 199 deletions(-) create mode 100644 migrations/20210419174900_index_all_hashes.down.sql create mode 100644 migrations/20210419174900_index_all_hashes.up.sql create mode 100644 migrations/20210419202830_remove_old_index.down.sql create mode 100644 migrations/20210419202830_remove_old_index.up.sql diff --git a/fuzzysearch-common/src/types.rs b/fuzzysearch-common/src/types.rs index 5ad892b..b7ba75b 100644 --- a/fuzzysearch-common/src/types.rs +++ b/fuzzysearch-common/src/types.rs @@ -26,8 +26,6 @@ impl std::str::FromStr for Rating { /// A general type for every result in a search. #[derive(Clone, Debug, Default, Deserialize, Serialize)] pub struct SearchResult { - pub id: i32, - pub site_id: i64, pub site_id_str: String, @@ -60,4 +58,5 @@ pub enum SiteInfo { sources: Option>, }, Twitter, + Weasyl, } diff --git a/fuzzysearch/sqlx-data.json b/fuzzysearch/sqlx-data.json index 18084bc..19bdf9a 100644 --- a/fuzzysearch/sqlx-data.json +++ b/fuzzysearch/sqlx-data.json @@ -23,81 +23,25 @@ ] } }, - "1bd0057782de5a3b41f90081a31d24d14bb70299391050c3404742a6d2915d9e": { - "query": "SELECT\n hashes.id,\n hashes.hash,\n hashes.furaffinity_id,\n hashes.e621_id,\n hashes.twitter_id,\n CASE\n WHEN furaffinity_id IS NOT NULL THEN (f.url)\n WHEN e621_id IS NOT NULL THEN (e.data->'file'->>'url')\n WHEN twitter_id IS NOT NULL THEN (tm.url)\n END url,\n CASE\n WHEN furaffinity_id IS NOT NULL THEN (f.filename)\n WHEN e621_id IS NOT NULL THEN ((e.data->'file'->>'md5') || '.' || (e.data->'file'->>'ext'))\n WHEN twitter_id IS NOT NULL THEN (SELECT split_part(split_part(tm.url, '/', 5), ':', 1))\n END filename,\n CASE\n WHEN furaffinity_id IS NOT NULL THEN (ARRAY(SELECT f.name))\n WHEN e621_id IS NOT NULL THEN ARRAY(SELECT jsonb_array_elements_text(e.data->'tags'->'artist'))\n WHEN twitter_id IS NOT NULL THEN ARRAY(SELECT tw.data->'user'->>'screen_name')\n END artists,\n CASE\n WHEN furaffinity_id IS NOT NULL THEN (f.file_id)\n END file_id,\n CASE\n WHEN e621_id IS NOT NULL THEN ARRAY(SELECT jsonb_array_elements_text(e.data->'sources'))\n END sources,\n CASE\n WHEN furaffinity_id IS NOT NULL THEN (f.rating)\n WHEN e621_id IS NOT NULL THEN (e.data->>'rating')\n WHEN twitter_id IS NOT NULL THEN\n CASE\n WHEN (tw.data->'possibly_sensitive')::boolean IS true THEN 'adult'\n WHEN (tw.data->'possibly_sensitive')::boolean IS false THEN 'general'\n END\n END rating\n FROM\n hashes\n LEFT JOIN LATERAL (\n SELECT *\n FROM submission\n JOIN artist ON submission.artist_id = artist.id\n WHERE submission.id = hashes.furaffinity_id\n ) f ON hashes.furaffinity_id IS NOT NULL\n LEFT JOIN LATERAL (\n SELECT *\n FROM e621\n WHERE e621.id = hashes.e621_id\n ) e ON hashes.e621_id IS NOT NULL\n LEFT JOIN LATERAL (\n SELECT *\n FROM tweet\n WHERE tweet.id = hashes.twitter_id\n ) tw ON hashes.twitter_id IS NOT NULL\n LEFT JOIN LATERAL (\n SELECT *\n FROM tweet_media\n WHERE\n tweet_media.tweet_id = hashes.twitter_id AND\n tweet_media.hash <@ (hashes.hash, 0)\n LIMIT 1\n ) tm ON hashes.twitter_id IS NOT NULL\n WHERE hashes.id = $1", + "1fc936eeccf8260ca00d7758efdbbb74bd58900107d9dc05f881555f068238c6": { + "query": "SELECT id, hash_int hash FROM submission WHERE hash_int IS NOT NULL\n UNION ALL\n SELECT id, hash FROM e621 WHERE hash IS NOT NULL\n UNION ALL\n SELECT tweet_id, hash FROM tweet_media WHERE hash IS NOT NULL\n UNION ALL\n SELECT id, hash FROM weasyl WHERE hash IS NOT NULL", "describe": { "columns": [ { "ordinal": 0, "name": "id", - "type_info": "Int4" + "type_info": "Int8" }, { "ordinal": 1, "name": "hash", "type_info": "Int8" - }, - { - "ordinal": 2, - "name": "furaffinity_id", - "type_info": "Int4" - }, - { - "ordinal": 3, - "name": "e621_id", - "type_info": "Int4" - }, - { - "ordinal": 4, - "name": "twitter_id", - "type_info": "Int8" - }, - { - "ordinal": 5, - "name": "url", - "type_info": "Text" - }, - { - "ordinal": 6, - "name": "filename", - "type_info": "Text" - }, - { - "ordinal": 7, - "name": "artists", - "type_info": "TextArray" - }, - { - "ordinal": 8, - "name": "file_id", - "type_info": "Int4" - }, - { - "ordinal": 9, - "name": "sources", - "type_info": "TextArray" - }, - { - "ordinal": 10, - "name": "rating", - "type_info": "Bpchar" } ], "parameters": { - "Left": [ - "Int4" - ] + "Left": [] }, "nullable": [ - false, - false, - true, - true, - true, - null, - null, - null, - null, null, null ] @@ -173,27 +117,71 @@ ] } }, - "fe60be66b2d8a8f02b3bfe06d1f0e57e4bb07e80cba1b379a5f17f6cbd8b075c": { - "query": "SELECT id, hash FROM hashes", + "f798404b69897f47a732fb6a8e08e843f233803aea5d5a7f04087619dbb55626": { + "query": "SELECT\n 'FurAffinity' site,\n submission.id,\n submission.hash_int hash,\n submission.url,\n submission.filename,\n ARRAY(SELECT artist.name) artists,\n submission.file_id,\n null sources,\n submission.rating\n FROM submission\n JOIN artist ON submission.artist_id = artist.id\n WHERE hash_int <@ ($1, 0)\n UNION\n SELECT\n 'e621' site,\n e621.id,\n e621.hash,\n e621.data->'file'->>'url' url,\n (e621.data->'file'->>'md5') || '.' || (e621.data->'file'->>'ext') filename,\n ARRAY(SELECT jsonb_array_elements_text(e621.data->'tags'->'artist')) artists,\n null file_id,\n ARRAY(SELECT jsonb_array_elements_text(e621.data->'sources')) sources,\n e621.data->>'rating' rating\n FROM e621\n WHERE hash <@ ($1, 0)\n UNION\n SELECT\n 'Weasyl' site,\n weasyl.id,\n weasyl.hash,\n weasyl.data->>'link' url,\n null filename,\n ARRAY(SELECT weasyl.data->>'owner_login') artists,\n null file_id,\n null sources,\n weasyl.data->>'rating' rating\n FROM weasyl\n WHERE hash <@ ($1, 0)\n UNION\n SELECT\n 'Twitter' site,\n tweet.id,\n tweet_media.hash,\n tweet_media.url,\n null filename,\n ARRAY(SELECT tweet.data->'user'->>'screen_name') artists,\n null file_id,\n null sources,\n CASE\n WHEN (tweet.data->'possibly_sensitive')::boolean IS true THEN 'adult'\n WHEN (tweet.data->'possibly_sensitive')::boolean IS false THEN 'general'\n END rating\n FROM tweet_media\n JOIN tweet ON tweet_media.tweet_id = tweet.id\n WHERE hash <@ ($1, 0)", "describe": { "columns": [ { "ordinal": 0, - "name": "id", - "type_info": "Int4" + "name": "site", + "type_info": "Text" }, { "ordinal": 1, + "name": "id", + "type_info": "Int8" + }, + { + "ordinal": 2, "name": "hash", "type_info": "Int8" + }, + { + "ordinal": 3, + "name": "url", + "type_info": "Text" + }, + { + "ordinal": 4, + "name": "filename", + "type_info": "Text" + }, + { + "ordinal": 5, + "name": "artists", + "type_info": "TextArray" + }, + { + "ordinal": 6, + "name": "file_id", + "type_info": "Int4" + }, + { + "ordinal": 7, + "name": "sources", + "type_info": "TextArray" + }, + { + "ordinal": 8, + "name": "rating", + "type_info": "Bpchar" } ], "parameters": { - "Left": [] + "Left": [ + "Int8" + ] }, "nullable": [ - false, - false + null, + null, + null, + null, + null, + null, + null, + null, + null ] } } diff --git a/fuzzysearch/src/handlers.rs b/fuzzysearch/src/handlers.rs index 3a76d99..b5f9d46 100644 --- a/fuzzysearch/src/handlers.rs +++ b/fuzzysearch/src/handlers.rs @@ -418,7 +418,6 @@ pub async fn search_file( let matches: Result, _> = query .map(|row| SearchResult { - id: row.get("hash_id"), site_id: row.get::("id") as i64, site_id_str: row.get::("id").to_string(), url: row.get("url"), diff --git a/fuzzysearch/src/main.rs b/fuzzysearch/src/main.rs index 239e030..9e5389a 100644 --- a/fuzzysearch/src/main.rs +++ b/fuzzysearch/src/main.rs @@ -14,14 +14,19 @@ type Tree = Arc>>; type Pool = sqlx::PgPool; #[derive(Debug)] -pub struct Node { - id: i32, - hash: [u8; 8], -} +pub struct Node(pub [u8; 8]); impl Node { + pub fn new(hash: i64) -> Self { + Self(hash.to_be_bytes()) + } + pub fn query(hash: [u8; 8]) -> Self { - Self { id: -1, hash } + Self(hash) + } + + pub fn num(&self) -> i64 { + i64::from_be_bytes(self.0) } } @@ -29,7 +34,7 @@ pub struct Hamming; impl bk_tree::Metric for Hamming { fn distance(&self, a: &Node, b: &Node) -> u64 { - hamming::distance_fast(&a.hash, &b.hash).unwrap() + hamming::distance_fast(&a.0, &b.0).unwrap() } } @@ -167,7 +172,6 @@ async fn serve_metrics() { #[derive(serde::Deserialize)] struct HashRow { - id: i32, hash: i64, } @@ -176,13 +180,25 @@ async fn create_tree(conn: &Pool) -> bk_tree::BKTree { let mut tree = bk_tree::BKTree::new(Hamming); - let mut rows = sqlx::query_as!(HashRow, "SELECT id, hash FROM hashes").fetch(conn); + let mut rows = sqlx::query!( + "SELECT id, hash_int hash FROM submission WHERE hash_int IS NOT NULL + UNION ALL + SELECT id, hash FROM e621 WHERE hash IS NOT NULL + UNION ALL + SELECT tweet_id, hash FROM tweet_media WHERE hash IS NOT NULL + UNION ALL + SELECT id, hash FROM weasyl WHERE hash IS NOT NULL" + ) + .fetch(conn); while let Some(row) = rows.try_next().await.expect("Unable to get row") { - tree.add(Node { - id: row.id, - hash: row.hash.to_be_bytes(), - }) + if let Some(hash) = row.hash { + if tree.find_exact(&Node::new(hash)).is_some() { + continue; + } + + tree.add(Node::new(hash)); + } } tree @@ -207,13 +223,16 @@ async fn load_updates(conn: Pool, tree: Tree) { .expect("Unable to recv notification") { let payload: HashRow = serde_json::from_str(notification.payload()).unwrap(); - tracing::debug!(id = payload.id, "Adding new hash to tree"); + tracing::debug!(hash = payload.hash, "Adding new hash to tree"); + + let lock = tree.read().await; + if lock.find_exact(&Node::new(payload.hash)).is_some() { + continue; + } + drop(lock); let mut lock = tree.write().await; - lock.add(Node { - id: payload.id, - hash: payload.hash.to_be_bytes(), - }); + lock.add(Node(payload.hash.to_be_bytes())); drop(lock); } diff --git a/fuzzysearch/src/models.rs b/fuzzysearch/src/models.rs index dd943ef..fd246ec 100644 --- a/fuzzysearch/src/models.rs +++ b/fuzzysearch/src/models.rs @@ -1,9 +1,12 @@ +use std::collections::HashSet; + use lazy_static::lazy_static; use prometheus::{register_histogram, Histogram}; use tracing_futures::Instrument; use crate::types::*; use crate::{Pool, Tree}; +use futures::TryStreamExt; use fuzzysearch_common::types::{SearchResult, SiteInfo}; lazy_static! { @@ -73,130 +76,125 @@ pub fn image_query_sync( ) -> tokio::sync::mpsc::Receiver, sqlx::Error>> { let (tx, rx) = tokio::sync::mpsc::channel(50); - tokio::spawn(async move { - let db = pool; + tokio::spawn( + async move { + let db = pool; - for query_hash in hashes { - let mut seen = std::collections::HashSet::new(); + for query_hash in hashes { + tracing::trace!(query_hash, "Evaluating hash"); - let _timer = IMAGE_LOOKUP_DURATION.start_timer(); + let mut seen: HashSet<[u8; 8]> = HashSet::new(); - let node = crate::Node::query(query_hash.to_be_bytes()); - let lock = tree.read().await; - let items = lock.find(&node, distance as u64); + let _timer = IMAGE_LOOKUP_DURATION.start_timer(); - for (dist, item) in items { - if seen.contains(&item.id) { - continue; - } - seen.insert(item.id); + let node = crate::Node::query(query_hash.to_be_bytes()); + let lock = tree.read().await; + let items = lock.find(&node, distance as u64); - let _timer = IMAGE_QUERY_DURATION.start_timer(); + for (dist, item) in items { + if seen.contains(&item.0) { + tracing::trace!("Already searched for hash"); + continue; + } + seen.insert(item.0); - let row = sqlx::query!("SELECT - hashes.id, - hashes.hash, - hashes.furaffinity_id, - hashes.e621_id, - hashes.twitter_id, - CASE - WHEN furaffinity_id IS NOT NULL THEN (f.url) - WHEN e621_id IS NOT NULL THEN (e.data->'file'->>'url') - WHEN twitter_id IS NOT NULL THEN (tm.url) - END url, - CASE - WHEN furaffinity_id IS NOT NULL THEN (f.filename) - WHEN e621_id IS NOT NULL THEN ((e.data->'file'->>'md5') || '.' || (e.data->'file'->>'ext')) - WHEN twitter_id IS NOT NULL THEN (SELECT split_part(split_part(tm.url, '/', 5), ':', 1)) - END filename, - CASE - WHEN furaffinity_id IS NOT NULL THEN (ARRAY(SELECT f.name)) - WHEN e621_id IS NOT NULL THEN ARRAY(SELECT jsonb_array_elements_text(e.data->'tags'->'artist')) - WHEN twitter_id IS NOT NULL THEN ARRAY(SELECT tw.data->'user'->>'screen_name') - END artists, - CASE - WHEN furaffinity_id IS NOT NULL THEN (f.file_id) - END file_id, - CASE - WHEN e621_id IS NOT NULL THEN ARRAY(SELECT jsonb_array_elements_text(e.data->'sources')) - END sources, - CASE - WHEN furaffinity_id IS NOT NULL THEN (f.rating) - WHEN e621_id IS NOT NULL THEN (e.data->>'rating') - WHEN twitter_id IS NOT NULL THEN + let _timer = IMAGE_QUERY_DURATION.start_timer(); + + tracing::debug!(num = item.num(), "Searching database for hash in tree"); + + let mut row = sqlx::query!( + "SELECT + 'FurAffinity' site, + submission.id, + submission.hash_int hash, + submission.url, + submission.filename, + ARRAY(SELECT artist.name) artists, + submission.file_id, + null sources, + submission.rating + FROM submission + JOIN artist ON submission.artist_id = artist.id + WHERE hash_int <@ ($1, 0) + UNION + SELECT + 'e621' site, + e621.id, + e621.hash, + e621.data->'file'->>'url' url, + (e621.data->'file'->>'md5') || '.' || (e621.data->'file'->>'ext') filename, + ARRAY(SELECT jsonb_array_elements_text(e621.data->'tags'->'artist')) artists, + null file_id, + ARRAY(SELECT jsonb_array_elements_text(e621.data->'sources')) sources, + e621.data->>'rating' rating + FROM e621 + WHERE hash <@ ($1, 0) + UNION + SELECT + 'Weasyl' site, + weasyl.id, + weasyl.hash, + weasyl.data->>'link' url, + null filename, + ARRAY(SELECT weasyl.data->>'owner_login') artists, + null file_id, + null sources, + weasyl.data->>'rating' rating + FROM weasyl + WHERE hash <@ ($1, 0) + UNION + SELECT + 'Twitter' site, + tweet.id, + tweet_media.hash, + tweet_media.url, + null filename, + ARRAY(SELECT tweet.data->'user'->>'screen_name') artists, + null file_id, + null sources, CASE - WHEN (tw.data->'possibly_sensitive')::boolean IS true THEN 'adult' - WHEN (tw.data->'possibly_sensitive')::boolean IS false THEN 'general' - END - END rating - FROM - hashes - LEFT JOIN LATERAL ( - SELECT * - FROM submission - JOIN artist ON submission.artist_id = artist.id - WHERE submission.id = hashes.furaffinity_id - ) f ON hashes.furaffinity_id IS NOT NULL - LEFT JOIN LATERAL ( - SELECT * - FROM e621 - WHERE e621.id = hashes.e621_id - ) e ON hashes.e621_id IS NOT NULL - LEFT JOIN LATERAL ( - SELECT * - FROM tweet - WHERE tweet.id = hashes.twitter_id - ) tw ON hashes.twitter_id IS NOT NULL - LEFT JOIN LATERAL ( - SELECT * - FROM tweet_media - WHERE - tweet_media.tweet_id = hashes.twitter_id AND - tweet_media.hash <@ (hashes.hash, 0) - LIMIT 1 - ) tm ON hashes.twitter_id IS NOT NULL - WHERE hashes.id = $1", item.id).map(|row| { - let (site_id, site_info) = if let Some(fa_id) = row.furaffinity_id { - ( - fa_id as i64, - Some(SiteInfo::FurAffinity { - file_id: row.file_id.unwrap(), - }) - ) - } else if let Some(e621_id) = row.e621_id { - ( - e621_id as i64, - Some(SiteInfo::E621 { - sources: row.sources, - }) - ) - } else if let Some(twitter_id) = row.twitter_id { - (twitter_id, Some(SiteInfo::Twitter)) - } else { - (-1, None) - }; + WHEN (tweet.data->'possibly_sensitive')::boolean IS true THEN 'adult' + WHEN (tweet.data->'possibly_sensitive')::boolean IS false THEN 'general' + END rating + FROM tweet_media + JOIN tweet ON tweet_media.tweet_id = tweet.id + WHERE hash <@ ($1, 0)", + &item.num() + ) + .map(|row| { + let site_info = match row.site.as_deref() { + Some("FurAffinity") => SiteInfo::FurAffinity { file_id: row.file_id.unwrap_or(-1) }, + Some("e621") => SiteInfo::E621 { sources: row.sources }, + Some("Twitter") => SiteInfo::Twitter, + Some("Weasyl") => SiteInfo::Weasyl, + _ => panic!("Got unknown site"), + }; - let file = SearchResult { - id: row.id, - site_id, - site_info, - rating: row.rating.and_then(|rating| rating.parse().ok()), - site_id_str: site_id.to_string(), - url: row.url.unwrap_or_default(), - hash: Some(row.hash), - distance: Some(dist), - artists: row.artists, - filename: row.filename.unwrap_or_default(), - searched_hash: Some(query_hash), - }; + let file = SearchResult { + site_id: row.id.unwrap_or_default(), + site_info: Some(site_info), + rating: row.rating.and_then(|rating| rating.parse().ok()), + site_id_str: row.id.unwrap_or_default().to_string(), + url: row.url.unwrap_or_default(), + hash: row.hash, + distance: Some(dist), + artists: row.artists, + filename: row.filename.unwrap_or_default(), + searched_hash: Some(query_hash), + }; - vec![file] - }).fetch_one(&db).await; + vec![file] + }) + .fetch(&db); - tx.send(row).await.unwrap(); + while let Some(row) = row.try_next().await.ok().flatten() { + tx.send(Ok(row)).await.unwrap(); + } + } } } - }.in_current_span()); + .in_current_span(), + ); rx } diff --git a/migrations/20210419174900_index_all_hashes.down.sql b/migrations/20210419174900_index_all_hashes.down.sql new file mode 100644 index 0000000..a7a2a53 --- /dev/null +++ b/migrations/20210419174900_index_all_hashes.down.sql @@ -0,0 +1,4 @@ +DROP INDEX bk_furaffinity_hash; +DROP INDEX bk_e621_hash; +DROP INDEX bk_twitter_hash; +DROP INDEX bk_weasyl_hash; diff --git a/migrations/20210419174900_index_all_hashes.up.sql b/migrations/20210419174900_index_all_hashes.up.sql new file mode 100644 index 0000000..efc8d3e --- /dev/null +++ b/migrations/20210419174900_index_all_hashes.up.sql @@ -0,0 +1,4 @@ +CREATE INDEX bk_furaffinity_hash ON submission USING spgist (hash_int bktree_ops); +CREATE INDEX bk_e621_hash ON e621 USING spgist (hash bktree_ops); +CREATE INDEX bk_twitter_hash ON tweet_media USING spgist (hash bktree_ops); +CREATE INDEX bk_weasyl_hash ON weasyl USING spgist (hash bktree_ops); diff --git a/migrations/20210419202830_remove_old_index.down.sql b/migrations/20210419202830_remove_old_index.down.sql new file mode 100644 index 0000000..fae4bb3 --- /dev/null +++ b/migrations/20210419202830_remove_old_index.down.sql @@ -0,0 +1,86 @@ +DROP FUNCTION update_notify_furaffinity CASCADE; +DROP FUNCTION update_notify_others CASCADE; + +CREATE TABLE hashes ( + id SERIAL PRIMARY KEY, + hash BIGINT NOT NULL, + furaffinity_id INTEGER UNIQUE REFERENCES submission (id), + e621_id INTEGER UNIQUE REFERENCES e621 (id), + twitter_id BIGINT REFERENCES tweet (id) +); + +CREATE FUNCTION hashes_insert_furaffinity() + RETURNS trigger + LANGUAGE plpgsql +AS $$ +BEGIN + if NEW.hash_int IS NOT NULL THEN + INSERT INTO hashes (furaffinity_id, hash) VALUES (NEW.id, NEW.hash_int); + END IF; + + RETURN NEW; +END; +$$; + +CREATE FUNCTION hashes_insert_e621() + RETURNS trigger + LANGUAGE plpgsql +AS $$ +BEGIN + IF NEW.hash IS NOT NULL THEN + IF exists(SELECT 1 FROM hashes WHERE hashes.e621_id = NEW.id) THEN + UPDATE hashes SET hashes.hash = NEW.hash WHERE e621_id = NEW.id; + ELSE + INSERT INTO hashes (e621_id, hash) VALUES (NEW.id, NEW.hash); + END IF; + END IF; + + RETURN NEW; +END; +$$; + +CREATE FUNCTION hashes_insert_twitter() + RETURNS trigger + LANGUAGE plpgsql +AS $$ +BEGIN + IF NEW.hash IS NOT NULL THEN + INSERT INTO hashes (twitter_id, hash) VALUES (NEW.tweet_id, NEW.hash); + END IF; + + RETURN NEW; +END; +$$; + +CREATE TRIGGER hashes_insert_furaffinity AFTER INSERT ON submission + FOR EACH ROW EXECUTE PROCEDURE hashes_insert_furaffinity(); +CREATE TRIGGER hashes_insert_e621 AFTER INSERT ON e621 + FOR EACH ROW EXECUTE PROCEDURE hashes_insert_e621(); +CREATE TRIGGER hashes_insert_twitter AFTER INSERT ON tweet_media + FOR EACH ROW EXECUTE PROCEDURE hashes_insert_twitter(); + +INSERT INTO hashes (furaffinity_id, hash) + SELECT id, hash_int FROM submission WHERE hash_int IS NOT NULL + ON CONFLICT DO NOTHING; +INSERT INTO hashes (e621_id, hash) + SELECT id, hash FROM e621 WHERE hash IS NOT NULL + ON CONFLICT DO NOTHING; +INSERT INTO hashes (twitter_id, hash) + SELECT tweet_id, hash FROM tweet_media WHERE hash IS NOT NULL + ON CONFLICT DO NOTHING; + +CREATE INDEX ON hashes USING spgist (hash bktree_ops); + +CREATE FUNCTION hashes_notify_inserted() + RETURNS trigger + LANGUAGE plpgsql +AS $$ +BEGIN + PERFORM pg_notify('fuzzysearch_hash_added'::text, + json_build_object('id', NEW.id, 'hash', NEW.hash)::text); + RETURN NEW; +END; +$$; + +CREATE TRIGGER hashes_notify_inserted AFTER INSERT ON hashes + FOR EACH ROW EXECUTE PROCEDURE hashes_notify_inserted(); diff --git a/migrations/20210419202830_remove_old_index.up.sql b/migrations/20210419202830_remove_old_index.up.sql new file mode 100644 index 0000000..5b3d639 --- /dev/null +++ b/migrations/20210419202830_remove_old_index.up.sql @@ -0,0 +1,44 @@ +DROP TABLE hashes; +DROP FUNCTION hashes_notify_inserted CASCADE; +DROP FUNCTION hashes_insert_furaffinity CASCADE; +DROP FUNCTION hashes_insert_e621 CASCADE; +DROP FUNCTION hashes_insert_twitter CASCADE; + +CREATE FUNCTION update_notify_furaffinity() + RETURNS trigger + LANGUAGE plpgsql +AS $$ +BEGIN + if NEW.hash_int IS NOT NULL THEN + PERFORM pg_notify('fuzzysearch_hash_added'::text, + json_build_object('hash', NEW.hash_int)::text); + RETURN NEW; + END IF; + + RETURN NEW; +END; +$$; + +CREATE FUNCTION update_notify_others() + RETURNS trigger + LANGUAGE plpgsql +AS $$ +BEGIN + if NEW.hash IS NOT NULL THEN + PERFORM pg_notify('fuzzysearch_hash_added'::text, + json_build_object('hash', NEW.hash)::text); + RETURN NEW; + END IF; + + RETURN NEW; +END; +$$; + +CREATE TRIGGER update_notify_furaffinity AFTER INSERT OR UPDATE ON submission + FOR EACH ROW EXECUTE PROCEDURE update_notify_furaffinity(); +CREATE TRIGGER update_notify_e621 AFTER INSERT OR UPDATE ON e621 + FOR EACH ROW EXECUTE PROCEDURE update_notify_others(); +CREATE TRIGGER update_notify_twitter AFTER INSERT OR UPDATE ON tweet_media + FOR EACH ROW EXECUTE PROCEDURE update_notify_others(); +CREATE TRIGGER update_notify_weasyl AFTER INSERT OR UPDATE ON weasyl + FOR EACH ROW EXECUTE PROCEDURE update_notify_others();