From 5f50e2fe0ea4d5dbb2d18ccf4e618a468d3e04e2 Mon Sep 17 00:00:00 2001 From: Syfaro Date: Tue, 4 Jan 2022 23:39:21 -0500 Subject: [PATCH] Add metrics, bump versions. --- Cargo.lock | 34 +++++----- fuzzysearch-api/Cargo.toml | 2 +- fuzzysearch-api/sqlx-data.json | 106 +++++++++++++++++++++++------- fuzzysearch-api/src/api/mod.rs | 5 ++ fuzzysearch-api/src/main.rs | 51 +++++++++++++- fuzzysearch-hash-input/Cargo.toml | 8 +-- 6 files changed, 159 insertions(+), 47 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c367886..ed9de64 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -96,9 +96,9 @@ dependencies = [ [[package]] name = "actix-router" -version = "0.5.0-beta.3" +version = "0.5.0-beta.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ddd9f117b910fbcce6e9f45092ffd4ff017785a346d09e2d4fd049f4e20384f4" +checksum = "b53c1deabdbf3a8a8b9a949123edd3cafb873abd75da96b5933a8b590f9d6dc2" dependencies = [ "bytestring", "firestorm", @@ -199,9 +199,9 @@ dependencies = [ [[package]] name = "actix-web-codegen" -version = "0.5.0-beta.6" +version = "0.5.0-rc.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30a90b7f6c2fde9a1fe3df4da758c2c3c9d620dfa3eae4da0b6925dc0a13444a" +checksum = "98a793e4a7bd059e06e1bc1bd9943b57a47f806de3599d2437441682292c333e" dependencies = [ "actix-router", "proc-macro2", @@ -894,9 +894,9 @@ checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" [[package]] name = "firestorm" -version = "0.4.6" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "31586bda1b136406162e381a3185a506cdfc1631708dd40cba2f6628d8634499" +checksum = "4d3d6188b8804df28032815ea256b6955c9625c24da7525f387a7af02fbb8f01" [[package]] name = "flate2" @@ -1080,7 +1080,7 @@ dependencies = [ [[package]] name = "fuzzysearch-api" -version = "0.2.0" +version = "0.3.0" dependencies = [ "bkapi-client", "bytes 1.1.0", @@ -1268,9 +1268,9 @@ dependencies = [ [[package]] name = "generic-array" -version = "0.14.4" +version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "501466ecc8a30d1d3b7fc9229b122b2ce8ed6e9d9223f1138d4babb253e51817" +checksum = "fd48d33ec7f05fbfa152300fdad764757cbded343c1aa1cff2fbaf4134851803" dependencies = [ "typenum", "version_check", @@ -2306,9 +2306,9 @@ dependencies = [ [[package]] name = "poem" -version = "1.2.22" +version = "1.2.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "729b68ea9a102b96d2d3a46954cd108bbe14e1d906b722822b0deff7d7617c3d" +checksum = "e8f609173a5155e4dd994804848f55a3de894e7daecd562312e69151ec6c3556" dependencies = [ "async-trait", "bytes 1.1.0", @@ -2346,9 +2346,9 @@ dependencies = [ [[package]] name = "poem-derive" -version = "1.2.22" +version = "1.2.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c4648e2d2f0ef5a50119594205afdceb40ef1b6ec57bfa2f949df4aaa6f8bc7" +checksum = "bb42548e64280207b0d28cb15ee93d4f22b4fd4f5aae857854f25ebf73cf8f3f" dependencies = [ "proc-macro-crate", "proc-macro2", @@ -2358,9 +2358,9 @@ dependencies = [ [[package]] name = "poem-openapi" -version = "1.2.22" +version = "1.2.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "964b896103226ad7cf87a82328e1b151c1a6319af59496a23f3ef28adcc8a0f5" +checksum = "089c258deab06ca3e4f40afbc5abafcce11d9025c2baf7976ace3f3b59170de0" dependencies = [ "base64 0.13.0", "bytes 1.1.0", @@ -2382,9 +2382,9 @@ dependencies = [ [[package]] name = "poem-openapi-derive" -version = "1.2.22" +version = "1.2.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19aecdb1b13e895f34e28b078f4ef7605a7e1bae9403a5025688200ec7496f32" +checksum = "6a6cb9b595ad55c370e919e655d7338a524edf678355f4784eb32a76a77ad3fc" dependencies = [ "Inflector", "darling", diff --git a/fuzzysearch-api/Cargo.toml b/fuzzysearch-api/Cargo.toml index 791af27..dd7b0bf 100644 --- a/fuzzysearch-api/Cargo.toml +++ b/fuzzysearch-api/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "fuzzysearch-api" -version = "0.2.0" +version = "0.3.0" authors = ["Syfaro "] edition = "2018" diff --git a/fuzzysearch-api/sqlx-data.json b/fuzzysearch-api/sqlx-data.json index 26fb794..28bbe27 100644 --- a/fuzzysearch-api/sqlx-data.json +++ b/fuzzysearch-api/sqlx-data.json @@ -1,7 +1,7 @@ { "db": "PostgreSQL", - "1984ce60f052d6a29638f8e05b35671b8edfbf273783d4b843ebd35cbb8a391f": { - "query": "INSERT INTO\n rate_limit (api_key_id, time_window, group_name, count)\n VALUES\n ($1, $2, $3, $4)\n ON CONFLICT ON CONSTRAINT unique_window\n DO UPDATE set count = rate_limit.count + $4\n RETURNING rate_limit.count", + "08a21d9ae3a6330beee1e74e74d9dcf9ecaf4ca76948339b7b3210fd9507a5fa": { + "query": "INSERT INTO\n rate_limit (api_key_id, time_window, group_name, count)\nVALUES\n ($1, $2, $3, $4) ON CONFLICT ON CONSTRAINT unique_window DO\nUPDATE\nset\n count = rate_limit.count + $4 RETURNING rate_limit.count\n", "describe": { "columns": [ { @@ -23,8 +23,28 @@ ] } }, - "607c1801f1ccc639f70d06b42c5a1d3cd89196bf22b115a895577f2c0cd8f746": { - "query": "WITH hashes AS (\n SELECT * FROM jsonb_to_recordset($1::jsonb)\n AS hashes(searched_hash bigint, found_hash bigint, distance bigint)\n )\n SELECT\n 'FurAffinity' site,\n submission.id,\n submission.hash_int hash,\n submission.url,\n submission.filename,\n ARRAY(SELECT artist.name) artists,\n submission.file_id,\n null sources,\n submission.rating,\n submission.posted_at,\n hashes.searched_hash,\n hashes.distance\n FROM hashes\n JOIN submission ON hashes.found_hash = submission.hash_int\n JOIN artist ON submission.artist_id = artist.id\n WHERE hash_int IN (SELECT hashes.found_hash)\n UNION ALL\n SELECT\n 'e621' site,\n e621.id,\n e621.hash,\n e621.data->'file'->>'url' url,\n (e621.data->'file'->>'md5') || '.' || (e621.data->'file'->>'ext') filename,\n ARRAY(SELECT jsonb_array_elements_text(e621.data->'tags'->'artist')) artists,\n null file_id,\n ARRAY(SELECT jsonb_array_elements_text(e621.data->'sources')) sources,\n e621.data->>'rating' rating,\n to_timestamp(data->>'created_at', 'YYYY-MM-DD\"T\"HH24:MI:SS\"Z\"') posted_at,\n hashes.searched_hash,\n hashes.distance\n FROM hashes\n JOIN e621 ON hashes.found_hash = e621.hash\n WHERE e621.hash IN (SELECT hashes.found_hash)\n UNION ALL\n SELECT\n 'Weasyl' site,\n weasyl.id,\n weasyl.hash,\n weasyl.data->>'link' url,\n null filename,\n ARRAY(SELECT weasyl.data->>'owner_login') artists,\n null file_id,\n null sources,\n weasyl.data->>'rating' rating,\n to_timestamp(data->>'posted_at', 'YYYY-MM-DD\"T\"HH24:MI:SS\"Z\"') posted_at,\n hashes.searched_hash,\n hashes.distance\n FROM hashes\n JOIN weasyl ON hashes.found_hash = weasyl.hash\n WHERE weasyl.hash IN (SELECT hashes.found_hash)\n UNION ALL\n SELECT\n 'Twitter' site,\n tweet.id,\n tweet_media.hash,\n tweet_media.url,\n null filename,\n ARRAY(SELECT tweet.data->'user'->>'screen_name') artists,\n null file_id,\n null sources,\n CASE\n WHEN (tweet.data->'possibly_sensitive')::boolean IS true THEN 'adult'\n WHEN (tweet.data->'possibly_sensitive')::boolean IS false THEN 'general'\n END rating,\n to_timestamp(tweet.data->>'created_at', 'DY Mon DD HH24:MI:SS +0000 YYYY') posted_at,\n hashes.searched_hash,\n hashes.distance\n FROM hashes\n JOIN tweet_media ON hashes.found_hash = tweet_media.hash\n JOIN tweet ON tweet_media.tweet_id = tweet.id\n WHERE tweet_media.hash IN (SELECT hashes.found_hash)", + "56ce1706518e7831d65aa5263bc8968ae8013f29749ddd24105b464182b34d93": { + "query": "SELECT\n exists(\n SELECT\n 1\n FROM\n twitter_user\n WHERE\n lower(data ->> 'screen_name') = lower($1)\n ) \"exists!\";\n", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "exists!", + "type_info": "Bool" + } + ], + "parameters": { + "Left": [ + "Text" + ] + }, + "nullable": [ + null + ] + } + }, + "590fc61bba9227b888b22ee4e0654426e0af47dac5bb657382984b1bf25bc558": { + "query": "WITH hashes AS (\n SELECT * FROM jsonb_to_recordset($1::jsonb)\n AS hashes(searched_hash bigint, found_hash bigint, distance bigint)\n)\nSELECT\n 'FurAffinity' site,\n submission.id,\n submission.hash_int hash,\n submission.url,\n submission.filename,\n ARRAY(SELECT artist.name) artists,\n submission.file_id,\n null sources,\n submission.rating,\n submission.posted_at,\n hashes.searched_hash,\n hashes.distance\nFROM hashes\nJOIN submission ON hashes.found_hash = submission.hash_int\nJOIN artist ON submission.artist_id = artist.id\nWHERE hash_int IN (SELECT hashes.found_hash)\nUNION ALL\nSELECT\n 'e621' site,\n e621.id,\n e621.hash,\n e621.data->'file'->>'url' url,\n (e621.data->'file'->>'md5') || '.' || (e621.data->'file'->>'ext') filename,\n ARRAY(SELECT jsonb_array_elements_text(e621.data->'tags'->'artist')) artists,\n null file_id,\n ARRAY(SELECT jsonb_array_elements_text(e621.data->'sources')) sources,\n e621.data->>'rating' rating,\n to_timestamp(data->>'created_at', 'YYYY-MM-DD\"T\"HH24:MI:SS\"Z\"') posted_at,\n hashes.searched_hash,\n hashes.distance\nFROM hashes\nJOIN e621 ON hashes.found_hash = e621.hash\nWHERE e621.hash IN (SELECT hashes.found_hash)\nUNION ALL\nSELECT\n 'Weasyl' site,\n weasyl.id,\n weasyl.hash,\n weasyl.data->>'link' url,\n null filename,\n ARRAY(SELECT weasyl.data->>'owner_login') artists,\n null file_id,\n null sources,\n weasyl.data->>'rating' rating,\n to_timestamp(data->>'posted_at', 'YYYY-MM-DD\"T\"HH24:MI:SS\"Z\"') posted_at,\n hashes.searched_hash,\n hashes.distance\nFROM hashes\nJOIN weasyl ON hashes.found_hash = weasyl.hash\nWHERE weasyl.hash IN (SELECT hashes.found_hash)\nUNION ALL\nSELECT\n 'Twitter' site,\n tweet.id,\n tweet_media.hash,\n tweet_media.url,\n null filename,\n ARRAY(SELECT tweet.data->'user'->>'screen_name') artists,\n null file_id,\n null sources,\n CASE\n WHEN (tweet.data->'possibly_sensitive')::boolean IS true THEN 'adult'\n WHEN (tweet.data->'possibly_sensitive')::boolean IS false THEN 'general'\n END rating,\n to_timestamp(tweet.data->>'created_at', 'DY Mon DD HH24:MI:SS +0000 YYYY') posted_at,\n hashes.searched_hash,\n hashes.distance\nFROM hashes\nJOIN tweet_media ON hashes.found_hash = tweet_media.hash\nJOIN tweet ON tweet_media.tweet_id = tweet.id\nWHERE tweet_media.hash IN (SELECT hashes.found_hash)\n", "describe": { "columns": [ { @@ -109,8 +129,8 @@ ] } }, - "659ee9ddc1c5ccd42ba9dc1617440544c30ece449ba3ba7f9d39f447b8af3cfe": { - "query": "SELECT\n api_key.id,\n api_key.name_limit,\n api_key.image_limit,\n api_key.hash_limit,\n api_key.name,\n account.email owner_email\n FROM\n api_key\n JOIN account\n ON account.id = api_key.user_id\n WHERE\n api_key.key = $1\n ", + "954a4422cf5a2517ff1af3bb4c1544de2ec392c9b8bcbf6defdeba5ad3727fe4": { + "query": "SELECT\n api_key.id,\n api_key.user_id,\n api_key.name_limit,\n api_key.image_limit,\n api_key.hash_limit,\n api_key.name\nFROM\n api_key\nWHERE\n api_key.key = $1\n", "describe": { "columns": [ { @@ -120,27 +140,27 @@ }, { "ordinal": 1, + "name": "user_id", + "type_info": "Int4" + }, + { + "ordinal": 2, "name": "name_limit", "type_info": "Int2" }, { - "ordinal": 2, + "ordinal": 3, "name": "image_limit", "type_info": "Int2" }, { - "ordinal": 3, + "ordinal": 4, "name": "hash_limit", "type_info": "Int2" }, - { - "ordinal": 4, - "name": "name", - "type_info": "Text" - }, { "ordinal": 5, - "name": "owner_email", + "name": "name", "type_info": "Text" } ], @@ -154,28 +174,70 @@ false, false, false, - true, - false + false, + true ] } }, - "6b8d304fc40fa539ae671e6e24e7978ad271cb7a1cafb20fc4b4096a958d790f": { - "query": "SELECT exists(SELECT 1 FROM twitter_user WHERE lower(data->>'screen_name') = lower($1))", + "fd604c5df0c2fa6b80dbeed8e5989f01188bb28553ae9b6ca16c5861bacf3118": { + "query": "SELECT\n submission.id,\n submission.url,\n submission.filename,\n submission.file_id,\n submission.rating,\n submission.posted_at,\n submission.hash_int hash,\n artist.name artist\nFROM\n submission\n LEFT JOIN artist ON artist.id = submission.artist_id\nWHERE\n file_id = $1\nLIMIT\n 10;\n", "describe": { "columns": [ { "ordinal": 0, - "name": "exists", - "type_info": "Bool" + "name": "id", + "type_info": "Int4" + }, + { + "ordinal": 1, + "name": "url", + "type_info": "Text" + }, + { + "ordinal": 2, + "name": "filename", + "type_info": "Text" + }, + { + "ordinal": 3, + "name": "file_id", + "type_info": "Int4" + }, + { + "ordinal": 4, + "name": "rating", + "type_info": "Bpchar" + }, + { + "ordinal": 5, + "name": "posted_at", + "type_info": "Timestamptz" + }, + { + "ordinal": 6, + "name": "hash", + "type_info": "Int8" + }, + { + "ordinal": 7, + "name": "artist", + "type_info": "Text" } ], "parameters": { "Left": [ - "Text" + "Int4" ] }, "nullable": [ - null + false, + true, + true, + true, + true, + true, + true, + false ] } } diff --git a/fuzzysearch-api/src/api/mod.rs b/fuzzysearch-api/src/api/mod.rs index a680d5f..f3e1b6b 100644 --- a/fuzzysearch-api/src/api/mod.rs +++ b/fuzzysearch-api/src/api/mod.rs @@ -188,6 +188,8 @@ pub(crate) async fn url( let distance = distance.unwrap_or(3); + let timer = crate::IMAGE_URL_DOWNLOAD_DURATION.start_timer(); + let content_length = resp .headers() .get("content-length") @@ -218,6 +220,9 @@ pub(crate) async fn url( buf.put(chunk); } + let seconds = timer.stop_and_record(); + tracing::info!("completed url download in {} seconds", seconds); + let body = reqwest::Body::from(buf.to_vec()); let hash = hash_input(client, &endpoints.hash_input, body).await?; diff --git a/fuzzysearch-api/src/main.rs b/fuzzysearch-api/src/main.rs index 59b14e5..24f0d87 100644 --- a/fuzzysearch-api/src/main.rs +++ b/fuzzysearch-api/src/main.rs @@ -3,6 +3,7 @@ use std::{borrow::Cow, fmt::Display, str::FromStr}; use api::ApiKeyAuthorization; use bkapi_client::BKApiClient; use hyper::StatusCode; +use lazy_static::lazy_static; use poem::{error::ResponseError, listener::TcpListener, web::Data, EndpointExt, Route}; use poem_openapi::{ param::{Path, Query}, @@ -10,11 +11,36 @@ use poem_openapi::{ types::multipart::Upload, Multipart, Object, OneOf, OpenApi, OpenApiService, }; +use prometheus::{register_histogram, register_int_counter_vec, Histogram, IntCounterVec}; mod api; type Pool = sqlx::PgPool; +lazy_static! { + static ref RATE_LIMIT_ATTEMPTS: IntCounterVec = register_int_counter_vec!( + "fuzzysearch_api_rate_limit_attempts_count", + "Number of attempts on each rate limit bucket", + &["bucket", "status"] + ) + .unwrap(); + static ref IMAGE_QUERY_DURATION: Histogram = register_histogram!( + "fuzzysearch_api_image_query_seconds", + "Duration to perform an image lookup query" + ) + .unwrap(); + static ref IMAGE_HASH_DURATION: Histogram = register_histogram!( + "fuzzysearch_api_image_hash_seconds", + "Duration to send image for hashing" + ) + .unwrap(); + static ref IMAGE_URL_DOWNLOAD_DURATION: Histogram = register_histogram!( + "fuzzysearch_api_image_url_download_seconds", + "Duration to download an image from a provided URL" + ) + .unwrap(); +} + #[derive(Clone)] pub struct Endpoints { pub hash_input: String, @@ -159,7 +185,7 @@ async fn update_rate_limit( pool: &Pool, key_id: i32, key_group_limit: i16, - group_name: &'static str, + bucket_name: &'static str, incr_by: i16, ) -> Result { let now = chrono::Utc::now(); @@ -170,15 +196,23 @@ async fn update_rate_limit( "queries/update_rate_limit.sql", key_id, time_window, - group_name, + bucket_name, incr_by ) .fetch_one(pool) .await?; if count > key_group_limit { + RATE_LIMIT_ATTEMPTS + .with_label_values(&[bucket_name, "limited"]) + .inc(); + Ok(RateLimit::Limited) } else { + RATE_LIMIT_ATTEMPTS + .with_label_values(&[bucket_name, "available"]) + .inc(); + Ok(RateLimit::Available(( key_group_limit - count, key_group_limit, @@ -246,6 +280,7 @@ async fn lookup_hashes( let data = serde_json::to_value(index_hashes)?; + let timer = IMAGE_QUERY_DURATION.start_timer(); let results = sqlx::query_file!("queries/lookup_hashes.sql", data) .map(|row| { let site_extra_data = match row.site.as_deref() { @@ -275,8 +310,13 @@ async fn lookup_hashes( }) .fetch_all(pool) .await?; + let seconds = timer.stop_and_record(); - tracing::info!("found {} matches from database", results.len()); + tracing::info!( + "found {} matches from database in {} seconds", + results.len(), + seconds + ); tracing::trace!("database matches: {:?}", results); Ok(results) @@ -298,11 +338,15 @@ async fn hash_input( tracing::info!("sending image for hashing"); + let timer = IMAGE_HASH_DURATION.start_timer(); let resp = client .post(hash_input_endpoint) .multipart(form) .send() .await?; + let seconds = timer.stop_and_record(); + + tracing::info!("completed image hash in {} seconds", seconds); if resp.status() != StatusCode::OK { tracing::warn!("got wrong status code: {}", resp.status()); @@ -507,6 +551,7 @@ async fn main() { .nest("/", api_service) .nest("/docs", docs) .at("/openapi.json", api_spec_endpoint) + .at("/metrics", poem::endpoint::PrometheusExporter::new()) .data(pool) .data(bkapi) .data(endpoints) diff --git a/fuzzysearch-hash-input/Cargo.toml b/fuzzysearch-hash-input/Cargo.toml index e28a786..73642a2 100644 --- a/fuzzysearch-hash-input/Cargo.toml +++ b/fuzzysearch-hash-input/Cargo.toml @@ -14,10 +14,10 @@ tokio-stream = "0.1" tempfile = "3" image = "0.23" -actix-web = "4.0.0-beta.18" -actix-http = "3.0.0-beta.17" -actix-multipart = "0.4.0-beta.11" -tracing-actix-web = { version = "0.5.0-beta.8", features = ["opentelemetry_0_16"] } +actix-web = "=4.0.0-beta.18" +actix-http = "=3.0.0-beta.17" +actix-multipart = "=0.4.0-beta.11" +tracing-actix-web = { version = "=0.5.0-beta.8", features = ["opentelemetry_0_16"] } lazy_static = "1" prometheus = { version = "0.13", features = ["process"] }