Add metrics, bump versions.

This commit is contained in:
Syfaro 2022-01-04 23:39:21 -05:00
parent cc8a88fe67
commit 5f50e2fe0e
6 changed files with 159 additions and 47 deletions

34
Cargo.lock generated
View File

@ -96,9 +96,9 @@ dependencies = [
[[package]]
name = "actix-router"
version = "0.5.0-beta.3"
version = "0.5.0-beta.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ddd9f117b910fbcce6e9f45092ffd4ff017785a346d09e2d4fd049f4e20384f4"
checksum = "b53c1deabdbf3a8a8b9a949123edd3cafb873abd75da96b5933a8b590f9d6dc2"
dependencies = [
"bytestring",
"firestorm",
@ -199,9 +199,9 @@ dependencies = [
[[package]]
name = "actix-web-codegen"
version = "0.5.0-beta.6"
version = "0.5.0-rc.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "30a90b7f6c2fde9a1fe3df4da758c2c3c9d620dfa3eae4da0b6925dc0a13444a"
checksum = "98a793e4a7bd059e06e1bc1bd9943b57a47f806de3599d2437441682292c333e"
dependencies = [
"actix-router",
"proc-macro2",
@ -894,9 +894,9 @@ checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7"
[[package]]
name = "firestorm"
version = "0.4.6"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "31586bda1b136406162e381a3185a506cdfc1631708dd40cba2f6628d8634499"
checksum = "4d3d6188b8804df28032815ea256b6955c9625c24da7525f387a7af02fbb8f01"
[[package]]
name = "flate2"
@ -1080,7 +1080,7 @@ dependencies = [
[[package]]
name = "fuzzysearch-api"
version = "0.2.0"
version = "0.3.0"
dependencies = [
"bkapi-client",
"bytes 1.1.0",
@ -1268,9 +1268,9 @@ dependencies = [
[[package]]
name = "generic-array"
version = "0.14.4"
version = "0.14.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "501466ecc8a30d1d3b7fc9229b122b2ce8ed6e9d9223f1138d4babb253e51817"
checksum = "fd48d33ec7f05fbfa152300fdad764757cbded343c1aa1cff2fbaf4134851803"
dependencies = [
"typenum",
"version_check",
@ -2306,9 +2306,9 @@ dependencies = [
[[package]]
name = "poem"
version = "1.2.22"
version = "1.2.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "729b68ea9a102b96d2d3a46954cd108bbe14e1d906b722822b0deff7d7617c3d"
checksum = "e8f609173a5155e4dd994804848f55a3de894e7daecd562312e69151ec6c3556"
dependencies = [
"async-trait",
"bytes 1.1.0",
@ -2346,9 +2346,9 @@ dependencies = [
[[package]]
name = "poem-derive"
version = "1.2.22"
version = "1.2.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8c4648e2d2f0ef5a50119594205afdceb40ef1b6ec57bfa2f949df4aaa6f8bc7"
checksum = "bb42548e64280207b0d28cb15ee93d4f22b4fd4f5aae857854f25ebf73cf8f3f"
dependencies = [
"proc-macro-crate",
"proc-macro2",
@ -2358,9 +2358,9 @@ dependencies = [
[[package]]
name = "poem-openapi"
version = "1.2.22"
version = "1.2.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "964b896103226ad7cf87a82328e1b151c1a6319af59496a23f3ef28adcc8a0f5"
checksum = "089c258deab06ca3e4f40afbc5abafcce11d9025c2baf7976ace3f3b59170de0"
dependencies = [
"base64 0.13.0",
"bytes 1.1.0",
@ -2382,9 +2382,9 @@ dependencies = [
[[package]]
name = "poem-openapi-derive"
version = "1.2.22"
version = "1.2.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "19aecdb1b13e895f34e28b078f4ef7605a7e1bae9403a5025688200ec7496f32"
checksum = "6a6cb9b595ad55c370e919e655d7338a524edf678355f4784eb32a76a77ad3fc"
dependencies = [
"Inflector",
"darling",

View File

@ -1,6 +1,6 @@
[package]
name = "fuzzysearch-api"
version = "0.2.0"
version = "0.3.0"
authors = ["Syfaro <syfaro@huefox.com>"]
edition = "2018"

View File

@ -1,7 +1,7 @@
{
"db": "PostgreSQL",
"1984ce60f052d6a29638f8e05b35671b8edfbf273783d4b843ebd35cbb8a391f": {
"query": "INSERT INTO\n rate_limit (api_key_id, time_window, group_name, count)\n VALUES\n ($1, $2, $3, $4)\n ON CONFLICT ON CONSTRAINT unique_window\n DO UPDATE set count = rate_limit.count + $4\n RETURNING rate_limit.count",
"08a21d9ae3a6330beee1e74e74d9dcf9ecaf4ca76948339b7b3210fd9507a5fa": {
"query": "INSERT INTO\n rate_limit (api_key_id, time_window, group_name, count)\nVALUES\n ($1, $2, $3, $4) ON CONFLICT ON CONSTRAINT unique_window DO\nUPDATE\nset\n count = rate_limit.count + $4 RETURNING rate_limit.count\n",
"describe": {
"columns": [
{
@ -23,8 +23,28 @@
]
}
},
"607c1801f1ccc639f70d06b42c5a1d3cd89196bf22b115a895577f2c0cd8f746": {
"query": "WITH hashes AS (\n SELECT * FROM jsonb_to_recordset($1::jsonb)\n AS hashes(searched_hash bigint, found_hash bigint, distance bigint)\n )\n SELECT\n 'FurAffinity' site,\n submission.id,\n submission.hash_int hash,\n submission.url,\n submission.filename,\n ARRAY(SELECT artist.name) artists,\n submission.file_id,\n null sources,\n submission.rating,\n submission.posted_at,\n hashes.searched_hash,\n hashes.distance\n FROM hashes\n JOIN submission ON hashes.found_hash = submission.hash_int\n JOIN artist ON submission.artist_id = artist.id\n WHERE hash_int IN (SELECT hashes.found_hash)\n UNION ALL\n SELECT\n 'e621' site,\n e621.id,\n e621.hash,\n e621.data->'file'->>'url' url,\n (e621.data->'file'->>'md5') || '.' || (e621.data->'file'->>'ext') filename,\n ARRAY(SELECT jsonb_array_elements_text(e621.data->'tags'->'artist')) artists,\n null file_id,\n ARRAY(SELECT jsonb_array_elements_text(e621.data->'sources')) sources,\n e621.data->>'rating' rating,\n to_timestamp(data->>'created_at', 'YYYY-MM-DD\"T\"HH24:MI:SS\"Z\"') posted_at,\n hashes.searched_hash,\n hashes.distance\n FROM hashes\n JOIN e621 ON hashes.found_hash = e621.hash\n WHERE e621.hash IN (SELECT hashes.found_hash)\n UNION ALL\n SELECT\n 'Weasyl' site,\n weasyl.id,\n weasyl.hash,\n weasyl.data->>'link' url,\n null filename,\n ARRAY(SELECT weasyl.data->>'owner_login') artists,\n null file_id,\n null sources,\n weasyl.data->>'rating' rating,\n to_timestamp(data->>'posted_at', 'YYYY-MM-DD\"T\"HH24:MI:SS\"Z\"') posted_at,\n hashes.searched_hash,\n hashes.distance\n FROM hashes\n JOIN weasyl ON hashes.found_hash = weasyl.hash\n WHERE weasyl.hash IN (SELECT hashes.found_hash)\n UNION ALL\n SELECT\n 'Twitter' site,\n tweet.id,\n tweet_media.hash,\n tweet_media.url,\n null filename,\n ARRAY(SELECT tweet.data->'user'->>'screen_name') artists,\n null file_id,\n null sources,\n CASE\n WHEN (tweet.data->'possibly_sensitive')::boolean IS true THEN 'adult'\n WHEN (tweet.data->'possibly_sensitive')::boolean IS false THEN 'general'\n END rating,\n to_timestamp(tweet.data->>'created_at', 'DY Mon DD HH24:MI:SS +0000 YYYY') posted_at,\n hashes.searched_hash,\n hashes.distance\n FROM hashes\n JOIN tweet_media ON hashes.found_hash = tweet_media.hash\n JOIN tweet ON tweet_media.tweet_id = tweet.id\n WHERE tweet_media.hash IN (SELECT hashes.found_hash)",
"56ce1706518e7831d65aa5263bc8968ae8013f29749ddd24105b464182b34d93": {
"query": "SELECT\n exists(\n SELECT\n 1\n FROM\n twitter_user\n WHERE\n lower(data ->> 'screen_name') = lower($1)\n ) \"exists!\";\n",
"describe": {
"columns": [
{
"ordinal": 0,
"name": "exists!",
"type_info": "Bool"
}
],
"parameters": {
"Left": [
"Text"
]
},
"nullable": [
null
]
}
},
"590fc61bba9227b888b22ee4e0654426e0af47dac5bb657382984b1bf25bc558": {
"query": "WITH hashes AS (\n SELECT * FROM jsonb_to_recordset($1::jsonb)\n AS hashes(searched_hash bigint, found_hash bigint, distance bigint)\n)\nSELECT\n 'FurAffinity' site,\n submission.id,\n submission.hash_int hash,\n submission.url,\n submission.filename,\n ARRAY(SELECT artist.name) artists,\n submission.file_id,\n null sources,\n submission.rating,\n submission.posted_at,\n hashes.searched_hash,\n hashes.distance\nFROM hashes\nJOIN submission ON hashes.found_hash = submission.hash_int\nJOIN artist ON submission.artist_id = artist.id\nWHERE hash_int IN (SELECT hashes.found_hash)\nUNION ALL\nSELECT\n 'e621' site,\n e621.id,\n e621.hash,\n e621.data->'file'->>'url' url,\n (e621.data->'file'->>'md5') || '.' || (e621.data->'file'->>'ext') filename,\n ARRAY(SELECT jsonb_array_elements_text(e621.data->'tags'->'artist')) artists,\n null file_id,\n ARRAY(SELECT jsonb_array_elements_text(e621.data->'sources')) sources,\n e621.data->>'rating' rating,\n to_timestamp(data->>'created_at', 'YYYY-MM-DD\"T\"HH24:MI:SS\"Z\"') posted_at,\n hashes.searched_hash,\n hashes.distance\nFROM hashes\nJOIN e621 ON hashes.found_hash = e621.hash\nWHERE e621.hash IN (SELECT hashes.found_hash)\nUNION ALL\nSELECT\n 'Weasyl' site,\n weasyl.id,\n weasyl.hash,\n weasyl.data->>'link' url,\n null filename,\n ARRAY(SELECT weasyl.data->>'owner_login') artists,\n null file_id,\n null sources,\n weasyl.data->>'rating' rating,\n to_timestamp(data->>'posted_at', 'YYYY-MM-DD\"T\"HH24:MI:SS\"Z\"') posted_at,\n hashes.searched_hash,\n hashes.distance\nFROM hashes\nJOIN weasyl ON hashes.found_hash = weasyl.hash\nWHERE weasyl.hash IN (SELECT hashes.found_hash)\nUNION ALL\nSELECT\n 'Twitter' site,\n tweet.id,\n tweet_media.hash,\n tweet_media.url,\n null filename,\n ARRAY(SELECT tweet.data->'user'->>'screen_name') artists,\n null file_id,\n null sources,\n CASE\n WHEN (tweet.data->'possibly_sensitive')::boolean IS true THEN 'adult'\n WHEN (tweet.data->'possibly_sensitive')::boolean IS false THEN 'general'\n END rating,\n to_timestamp(tweet.data->>'created_at', 'DY Mon DD HH24:MI:SS +0000 YYYY') posted_at,\n hashes.searched_hash,\n hashes.distance\nFROM hashes\nJOIN tweet_media ON hashes.found_hash = tweet_media.hash\nJOIN tweet ON tweet_media.tweet_id = tweet.id\nWHERE tweet_media.hash IN (SELECT hashes.found_hash)\n",
"describe": {
"columns": [
{
@ -109,8 +129,8 @@
]
}
},
"659ee9ddc1c5ccd42ba9dc1617440544c30ece449ba3ba7f9d39f447b8af3cfe": {
"query": "SELECT\n api_key.id,\n api_key.name_limit,\n api_key.image_limit,\n api_key.hash_limit,\n api_key.name,\n account.email owner_email\n FROM\n api_key\n JOIN account\n ON account.id = api_key.user_id\n WHERE\n api_key.key = $1\n ",
"954a4422cf5a2517ff1af3bb4c1544de2ec392c9b8bcbf6defdeba5ad3727fe4": {
"query": "SELECT\n api_key.id,\n api_key.user_id,\n api_key.name_limit,\n api_key.image_limit,\n api_key.hash_limit,\n api_key.name\nFROM\n api_key\nWHERE\n api_key.key = $1\n",
"describe": {
"columns": [
{
@ -120,27 +140,27 @@
},
{
"ordinal": 1,
"name": "user_id",
"type_info": "Int4"
},
{
"ordinal": 2,
"name": "name_limit",
"type_info": "Int2"
},
{
"ordinal": 2,
"ordinal": 3,
"name": "image_limit",
"type_info": "Int2"
},
{
"ordinal": 3,
"ordinal": 4,
"name": "hash_limit",
"type_info": "Int2"
},
{
"ordinal": 4,
"name": "name",
"type_info": "Text"
},
{
"ordinal": 5,
"name": "owner_email",
"name": "name",
"type_info": "Text"
}
],
@ -154,28 +174,70 @@
false,
false,
false,
true,
false
false,
true
]
}
},
"6b8d304fc40fa539ae671e6e24e7978ad271cb7a1cafb20fc4b4096a958d790f": {
"query": "SELECT exists(SELECT 1 FROM twitter_user WHERE lower(data->>'screen_name') = lower($1))",
"fd604c5df0c2fa6b80dbeed8e5989f01188bb28553ae9b6ca16c5861bacf3118": {
"query": "SELECT\n submission.id,\n submission.url,\n submission.filename,\n submission.file_id,\n submission.rating,\n submission.posted_at,\n submission.hash_int hash,\n artist.name artist\nFROM\n submission\n LEFT JOIN artist ON artist.id = submission.artist_id\nWHERE\n file_id = $1\nLIMIT\n 10;\n",
"describe": {
"columns": [
{
"ordinal": 0,
"name": "exists",
"type_info": "Bool"
"name": "id",
"type_info": "Int4"
},
{
"ordinal": 1,
"name": "url",
"type_info": "Text"
},
{
"ordinal": 2,
"name": "filename",
"type_info": "Text"
},
{
"ordinal": 3,
"name": "file_id",
"type_info": "Int4"
},
{
"ordinal": 4,
"name": "rating",
"type_info": "Bpchar"
},
{
"ordinal": 5,
"name": "posted_at",
"type_info": "Timestamptz"
},
{
"ordinal": 6,
"name": "hash",
"type_info": "Int8"
},
{
"ordinal": 7,
"name": "artist",
"type_info": "Text"
}
],
"parameters": {
"Left": [
"Text"
"Int4"
]
},
"nullable": [
null
false,
true,
true,
true,
true,
true,
true,
false
]
}
}

View File

@ -188,6 +188,8 @@ pub(crate) async fn url(
let distance = distance.unwrap_or(3);
let timer = crate::IMAGE_URL_DOWNLOAD_DURATION.start_timer();
let content_length = resp
.headers()
.get("content-length")
@ -218,6 +220,9 @@ pub(crate) async fn url(
buf.put(chunk);
}
let seconds = timer.stop_and_record();
tracing::info!("completed url download in {} seconds", seconds);
let body = reqwest::Body::from(buf.to_vec());
let hash = hash_input(client, &endpoints.hash_input, body).await?;

View File

@ -3,6 +3,7 @@ use std::{borrow::Cow, fmt::Display, str::FromStr};
use api::ApiKeyAuthorization;
use bkapi_client::BKApiClient;
use hyper::StatusCode;
use lazy_static::lazy_static;
use poem::{error::ResponseError, listener::TcpListener, web::Data, EndpointExt, Route};
use poem_openapi::{
param::{Path, Query},
@ -10,11 +11,36 @@ use poem_openapi::{
types::multipart::Upload,
Multipart, Object, OneOf, OpenApi, OpenApiService,
};
use prometheus::{register_histogram, register_int_counter_vec, Histogram, IntCounterVec};
mod api;
type Pool = sqlx::PgPool;
lazy_static! {
static ref RATE_LIMIT_ATTEMPTS: IntCounterVec = register_int_counter_vec!(
"fuzzysearch_api_rate_limit_attempts_count",
"Number of attempts on each rate limit bucket",
&["bucket", "status"]
)
.unwrap();
static ref IMAGE_QUERY_DURATION: Histogram = register_histogram!(
"fuzzysearch_api_image_query_seconds",
"Duration to perform an image lookup query"
)
.unwrap();
static ref IMAGE_HASH_DURATION: Histogram = register_histogram!(
"fuzzysearch_api_image_hash_seconds",
"Duration to send image for hashing"
)
.unwrap();
static ref IMAGE_URL_DOWNLOAD_DURATION: Histogram = register_histogram!(
"fuzzysearch_api_image_url_download_seconds",
"Duration to download an image from a provided URL"
)
.unwrap();
}
#[derive(Clone)]
pub struct Endpoints {
pub hash_input: String,
@ -159,7 +185,7 @@ async fn update_rate_limit(
pool: &Pool,
key_id: i32,
key_group_limit: i16,
group_name: &'static str,
bucket_name: &'static str,
incr_by: i16,
) -> Result<RateLimit, sqlx::Error> {
let now = chrono::Utc::now();
@ -170,15 +196,23 @@ async fn update_rate_limit(
"queries/update_rate_limit.sql",
key_id,
time_window,
group_name,
bucket_name,
incr_by
)
.fetch_one(pool)
.await?;
if count > key_group_limit {
RATE_LIMIT_ATTEMPTS
.with_label_values(&[bucket_name, "limited"])
.inc();
Ok(RateLimit::Limited)
} else {
RATE_LIMIT_ATTEMPTS
.with_label_values(&[bucket_name, "available"])
.inc();
Ok(RateLimit::Available((
key_group_limit - count,
key_group_limit,
@ -246,6 +280,7 @@ async fn lookup_hashes(
let data = serde_json::to_value(index_hashes)?;
let timer = IMAGE_QUERY_DURATION.start_timer();
let results = sqlx::query_file!("queries/lookup_hashes.sql", data)
.map(|row| {
let site_extra_data = match row.site.as_deref() {
@ -275,8 +310,13 @@ async fn lookup_hashes(
})
.fetch_all(pool)
.await?;
let seconds = timer.stop_and_record();
tracing::info!("found {} matches from database", results.len());
tracing::info!(
"found {} matches from database in {} seconds",
results.len(),
seconds
);
tracing::trace!("database matches: {:?}", results);
Ok(results)
@ -298,11 +338,15 @@ async fn hash_input(
tracing::info!("sending image for hashing");
let timer = IMAGE_HASH_DURATION.start_timer();
let resp = client
.post(hash_input_endpoint)
.multipart(form)
.send()
.await?;
let seconds = timer.stop_and_record();
tracing::info!("completed image hash in {} seconds", seconds);
if resp.status() != StatusCode::OK {
tracing::warn!("got wrong status code: {}", resp.status());
@ -507,6 +551,7 @@ async fn main() {
.nest("/", api_service)
.nest("/docs", docs)
.at("/openapi.json", api_spec_endpoint)
.at("/metrics", poem::endpoint::PrometheusExporter::new())
.data(pool)
.data(bkapi)
.data(endpoints)

View File

@ -14,10 +14,10 @@ tokio-stream = "0.1"
tempfile = "3"
image = "0.23"
actix-web = "4.0.0-beta.18"
actix-http = "3.0.0-beta.17"
actix-multipart = "0.4.0-beta.11"
tracing-actix-web = { version = "0.5.0-beta.8", features = ["opentelemetry_0_16"] }
actix-web = "=4.0.0-beta.18"
actix-http = "=3.0.0-beta.17"
actix-multipart = "=0.4.0-beta.11"
tracing-actix-web = { version = "=0.5.0-beta.8", features = ["opentelemetry_0_16"] }
lazy_static = "1"
prometheus = { version = "0.13", features = ["process"] }