From 5e76d4ecf277aba4b21def2aec01cadca1a0ad4a Mon Sep 17 00:00:00 2001 From: Syfaro Date: Tue, 4 Jan 2022 00:42:22 -0500 Subject: [PATCH] Rework fuzzysearch-api with Poem and OpenAPI. --- Cargo.lock | 539 ++++++++++----- fuzzysearch-api/Cargo.toml | 10 +- fuzzysearch-api/queries/handle_twitter.sql | 9 + fuzzysearch-api/queries/lookup_api_key.sql | 12 + .../queries/lookup_furaffinity_file_id.sql | 16 + fuzzysearch-api/queries/lookup_hashes.sql | 76 +++ fuzzysearch-api/queries/update_rate_limit.sql | 7 + fuzzysearch-api/src/filters.rs | 130 ---- fuzzysearch-api/src/handlers.rs | 515 --------------- fuzzysearch-api/src/main.rs | 612 +++++++++++++++++- fuzzysearch-api/src/models.rs | 191 ------ fuzzysearch-api/src/types.rs | 76 --- fuzzysearch-api/src/utils.rs | 108 ---- 13 files changed, 1106 insertions(+), 1195 deletions(-) create mode 100644 fuzzysearch-api/queries/handle_twitter.sql create mode 100644 fuzzysearch-api/queries/lookup_api_key.sql create mode 100644 fuzzysearch-api/queries/lookup_furaffinity_file_id.sql create mode 100644 fuzzysearch-api/queries/lookup_hashes.sql create mode 100644 fuzzysearch-api/queries/update_rate_limit.sql delete mode 100644 fuzzysearch-api/src/filters.rs delete mode 100644 fuzzysearch-api/src/handlers.rs delete mode 100644 fuzzysearch-api/src/models.rs delete mode 100644 fuzzysearch-api/src/types.rs delete mode 100644 fuzzysearch-api/src/utils.rs diff --git a/Cargo.lock b/Cargo.lock index 1af1b1b..c367886 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,16 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "Inflector" +version = "0.11.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe438c63458706e03479442743baae6c88256498e6431708f6dfc520a26515d3" +dependencies = [ + "lazy_static", + "regex", +] + [[package]] name = "actix-codec" version = "0.4.2" @@ -9,7 +19,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a36c014a3e811624313b51a227b775ecba55d36ef9462bbaac7d4f13e54c9271" dependencies = [ "bitflags", - "bytes", + "bytes 1.1.0", "futures-core", "futures-sink", "log", @@ -30,10 +40,10 @@ dependencies = [ "actix-service", "actix-utils", "ahash", - "base64", + "base64 0.13.0", "bitflags", "brotli2", - "bytes", + "bytes 1.1.0", "bytestring", "derive_more", "encoding_rs", @@ -74,14 +84,14 @@ checksum = "473ef32141eec0050af83ce8945b5e62b1609a68ce19f753a8dac1ccce8d2dd3" dependencies = [ "actix-utils", "actix-web", - "bytes", + "bytes 1.1.0", "derive_more", "futures-core", "httparse", "local-waker", "log", "mime", - "twoway 0.2.2", + "twoway", ] [[package]] @@ -164,7 +174,7 @@ dependencies = [ "actix-utils", "actix-web-codegen", "ahash", - "bytes", + "bytes 1.1.0", "cfg-if", "cookie", "derive_more", @@ -220,6 +230,41 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234" +[[package]] +name = "aead" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b613b8e1e3cf911a086f53f03bf286f52fd7a7258e4fa606f0ef220d39d8877" +dependencies = [ + "generic-array", +] + +[[package]] +name = "aes" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e8b47f52ea9bae42228d07ec09eb676433d7c4ed1ebdf0f1d1c29ed446f1ab8" +dependencies = [ + "cfg-if", + "cipher", + "cpufeatures", + "opaque-debug", +] + +[[package]] +name = "aes-gcm" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df5f85a83a7d8b0442b6aa7b504b8212c1733da07b98aae43d4bc21b2cb3cdf6" +dependencies = [ + "aead", + "aes", + "cipher", + "ctr", + "ghash", + "subtle", +] + [[package]] name = "ahash" version = "0.7.6" @@ -302,6 +347,12 @@ dependencies = [ "rustc-demangle", ] +[[package]] +name = "base64" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b41b7ea54a0c9d92199de89e20e58d49f02f8e699814ef3fdf266f6f748d15c7" + [[package]] name = "base64" version = "0.13.0" @@ -367,16 +418,6 @@ dependencies = [ "libc", ] -[[package]] -name = "buf_redux" -version = "0.8.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b953a6887648bb07a535631f2bc00fbdb2a2216f135552cb3f534ed136b9c07f" -dependencies = [ - "memchr", - "safemem", -] - [[package]] name = "bufstream" version = "0.1.4" @@ -401,6 +442,12 @@ version = "1.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" +[[package]] +name = "bytes" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e4cec68f03f32e44924783795810fa50a7035d8c8ebe78580ad7e6c703fba38" + [[package]] name = "bytes" version = "1.1.0" @@ -413,7 +460,7 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "90706ba19e97b90786e19dc0d5e2abd80008d99d4c0c5d1ad0b5e72cec7c494d" dependencies = [ - "bytes", + "bytes 1.1.0", ] [[package]] @@ -445,6 +492,15 @@ dependencies = [ "winapi", ] +[[package]] +name = "cipher" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ee52072ec15386f770805afd189a01c8841be8696bed250fa2f13c4c0d6dfb7" +dependencies = [ + "generic-array", +] + [[package]] name = "color_quant" version = "1.1.0" @@ -463,7 +519,14 @@ version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94d4706de1b0fa5b132270cddffa8585166037822e260a944fe161acd137ca05" dependencies = [ + "aes-gcm", + "base64 0.13.0", + "hkdf", + "hmac 0.12.0", "percent-encoding", + "rand 0.8.4", + "sha2 0.10.0", + "subtle", "time 0.3.5", "version_check", ] @@ -617,6 +680,60 @@ dependencies = [ "syn", ] +[[package]] +name = "ctr" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "049bb91fb4aaf0e3c7efa6cd5ef877dbbbd15b39dad06d9948de4ec8a75761ea" +dependencies = [ + "cipher", +] + +[[package]] +name = "darling" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0d720b8683f8dd83c65155f0530560cba68cd2bf395f6513a483caee57ff7f4" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a340f241d2ceed1deb47ae36c4144b2707ec7dd0b649f894cb39bb595986324" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn", +] + +[[package]] +name = "darling_macro" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72c41b3b7352feb3211a0d743dc5700a4e3b60f51bd2b368892d1e0f9a95f44b" +dependencies = [ + "darling_core", + "quote", + "syn", +] + +[[package]] +name = "dashmap" +version = "4.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e77a43b28d0668df09411cb0bc9a8c2adc40f9a048afe863e05fd43251e8e39c" +dependencies = [ + "cfg-if", + "num_cpus", +] + [[package]] name = "deflate" version = "0.8.6" @@ -966,7 +1083,7 @@ name = "fuzzysearch-api" version = "0.2.0" dependencies = [ "bkapi-client", - "bytes", + "bytes 1.1.0", "chrono", "futures", "fuzzysearch-common", @@ -978,19 +1095,21 @@ dependencies = [ "opentelemetry", "opentelemetry-http", "opentelemetry-jaeger", - "prometheus", + "poem", + "poem-openapi", + "prometheus 0.13.0", "reqwest", "serde", "serde_json", "sqlx", + "thiserror", "tokio", - "tokio-stream", + "tokio-util", "tracing", "tracing-futures", "tracing-log", "tracing-opentelemetry", "tracing-subscriber", - "warp", ] [[package]] @@ -998,7 +1117,7 @@ name = "fuzzysearch-common" version = "0.1.0" dependencies = [ "anyhow", - "base64", + "base64 0.13.0", "chrono", "faktory", "futures", @@ -1009,7 +1128,7 @@ dependencies = [ "opentelemetry", "opentelemetry-http", "opentelemetry-jaeger", - "prometheus", + "prometheus 0.13.0", "reqwest", "serde", "serde_json", @@ -1032,7 +1151,7 @@ dependencies = [ "fuzzysearch-common", "image", "lazy_static", - "prometheus", + "prometheus 0.13.0", "tempfile", "tokio", "tokio-stream", @@ -1050,7 +1169,7 @@ dependencies = [ "image", "img_hash", "lazy_static", - "prometheus", + "prometheus 0.13.0", "reqwest", "serde", "serde_json", @@ -1073,7 +1192,7 @@ dependencies = [ "fuzzysearch-common", "hyper", "lazy_static", - "prometheus", + "prometheus 0.13.0", "reqwest", "serde", "serde_json", @@ -1092,7 +1211,7 @@ dependencies = [ "image", "img_hash", "lazy_static", - "prometheus", + "prometheus 0.13.0", "reqwest", "serde", "serde_json", @@ -1188,6 +1307,16 @@ dependencies = [ "wasi 0.10.2+wasi-snapshot-preview1", ] +[[package]] +name = "ghash" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1583cc1656d7839fd3732b80cf4f38850336cdb9b8ded1cd399ca62958de3c99" +dependencies = [ + "opaque-debug", + "polyval", +] + [[package]] name = "gif" version = "0.11.3" @@ -1210,7 +1339,7 @@ version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f072413d126e57991455e0a922b31e4c8ba7c2ffbebf6b78b4f8521397d65cd" dependencies = [ - "bytes", + "bytes 1.1.0", "fnv", "futures-core", "futures-sink", @@ -1253,9 +1382,9 @@ version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a4c4eb0471fcb85846d8b0690695ef354f9afb11cb03cac2e1d7c9253351afb0" dependencies = [ - "base64", + "base64 0.13.0", "bitflags", - "bytes", + "bytes 1.1.0", "headers-core", "http", "httpdate", @@ -1296,6 +1425,15 @@ version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" +[[package]] +name = "hkdf" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94f41e9c77b6fc05b57497b960aad55942a9bbc5b20e1e623cf7fb1868f695d1" +dependencies = [ + "hmac 0.12.0", +] + [[package]] name = "hmac" version = "0.11.0" @@ -1346,7 +1484,7 @@ version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "31f4c6746584866f0feabcc69893c5b51beef3831656a968ed7ae254cdc4fd03" dependencies = [ - "bytes", + "bytes 1.1.0", "fnv", "itoa 1.0.1", ] @@ -1357,7 +1495,7 @@ version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1ff4f84919677303da5f147645dbea6b1881f368d03ac84e1dc09031ebd7b2c6" dependencies = [ - "bytes", + "bytes 1.1.0", "http", "pin-project-lite", ] @@ -1380,7 +1518,7 @@ version = "0.14.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b7ec3e62bdc98a2f0393a5048e4c30ef659440ea6e0e572965103e72bd836f55" dependencies = [ - "bytes", + "bytes 1.1.0", "futures-channel", "futures-core", "futures-util", @@ -1404,13 +1542,19 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905" dependencies = [ - "bytes", + "bytes 1.1.0", "hyper", "native-tls", "tokio", "tokio-native-tls", ] +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + [[package]] name = "idna" version = "0.2.3" @@ -1447,7 +1591,7 @@ version = "3.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5ea4eac6fc4f64ed363d5c210732b747bfa5ddd8a25ac347d887f298c3a70b49" dependencies = [ - "base64", + "base64 0.13.0", "image", "rustdct", "serde", @@ -1740,21 +1884,22 @@ dependencies = [ ] [[package]] -name = "multipart" -version = "0.18.0" +name = "multer" +version = "2.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00dec633863867f29cb39df64a397cdf4a6354708ddd7759f70c7fb51c5f9182" +checksum = "5f8f35e687561d5c1667590911e6698a8cb714a134a7505718a182e7bc9d3836" dependencies = [ - "buf_redux", + "bytes 1.1.0", + "encoding_rs", + "futures-util", + "http", "httparse", "log", + "memchr", "mime", - "mime_guess", - "quick-error", - "rand 0.8.4", - "safemem", - "tempfile", - "twoway 0.1.8", + "spin", + "tokio", + "version_check", ] [[package]] @@ -1930,6 +2075,8 @@ checksum = "e1cf9b1c4e9a6c4de793c632496fa490bdc0e1eea73f0c91394f7b6990935d22" dependencies = [ "async-trait", "crossbeam-channel", + "dashmap", + "fnv", "futures", "js-sys", "lazy_static", @@ -1948,7 +2095,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d50ceb0b0e8b75cb3e388a2571a807c8228dabc5d6670f317b6eb21301095373" dependencies = [ "async-trait", - "bytes", + "bytes 1.1.0", "futures-util", "http", "opentelemetry", @@ -1969,6 +2116,17 @@ dependencies = [ "tokio", ] +[[package]] +name = "opentelemetry-prometheus" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ee9c06c1366665e7d4dba6540a42ea48900a9c92dc5b963f3ae05fbba76dc63" +dependencies = [ + "opentelemetry", + "prometheus 0.12.0", + "protobuf", +] + [[package]] name = "opentelemetry-semantic-conventions" version = "0.8.0" @@ -2146,13 +2304,120 @@ dependencies = [ "miniz_oxide 0.3.7", ] +[[package]] +name = "poem" +version = "1.2.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "729b68ea9a102b96d2d3a46954cd108bbe14e1d906b722822b0deff7d7617c3d" +dependencies = [ + "async-trait", + "bytes 1.1.0", + "chrono", + "cookie", + "futures-util", + "headers", + "http", + "hyper", + "mime", + "mime_guess", + "multer", + "opentelemetry", + "opentelemetry-http", + "opentelemetry-prometheus", + "opentelemetry-semantic-conventions", + "parking_lot", + "percent-encoding", + "pin-project-lite", + "poem-derive", + "prometheus 0.12.0", + "regex", + "serde", + "serde_json", + "serde_urlencoded", + "smallvec", + "tempfile", + "thiserror", + "time 0.3.5", + "tokio", + "tokio-stream", + "tokio-util", + "tracing", +] + +[[package]] +name = "poem-derive" +version = "1.2.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c4648e2d2f0ef5a50119594205afdceb40ef1b6ec57bfa2f949df4aaa6f8bc7" +dependencies = [ + "proc-macro-crate", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "poem-openapi" +version = "1.2.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "964b896103226ad7cf87a82328e1b151c1a6319af59496a23f3ef28adcc8a0f5" +dependencies = [ + "base64 0.13.0", + "bytes 1.1.0", + "chrono", + "derive_more", + "futures-util", + "mime", + "num-traits", + "once_cell", + "poem", + "poem-openapi-derive", + "regex", + "serde", + "serde_json", + "thiserror", + "tokio", + "typed-headers", +] + +[[package]] +name = "poem-openapi-derive" +version = "1.2.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19aecdb1b13e895f34e28b078f4ef7605a7e1bae9403a5025688200ec7496f32" +dependencies = [ + "Inflector", + "darling", + "http", + "indexmap", + "mime", + "proc-macro-crate", + "proc-macro2", + "quote", + "regex", + "syn", + "thiserror", +] + +[[package]] +name = "polyval" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8419d2b623c7c0896ff2d5d96e2cb4ede590fed28fcc34934f4c33c036e620a1" +dependencies = [ + "cfg-if", + "cpufeatures", + "opaque-debug", + "universal-hash", +] + [[package]] name = "postgres" version = "0.19.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eb76d6535496f633fa799bb872ffb4790e9cbdedda9d35564ca0252f930c0dd5" dependencies = [ - "bytes", + "bytes 1.1.0", "fallible-iterator", "futures", "log", @@ -2166,9 +2431,9 @@ version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "79ec03bce71f18b4a27c4c64c6ba2ddf74686d69b91d8714fb32ead3adaed713" dependencies = [ - "base64", + "base64 0.13.0", "byteorder", - "bytes", + "bytes 1.1.0", "fallible-iterator", "hmac 0.12.0", "md-5 0.10.0", @@ -2184,7 +2449,7 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04619f94ba0cc80999f4fc7073607cb825bc739a883cb6d20900fc5e009d6b0d" dependencies = [ - "bytes", + "bytes 1.1.0", "chrono", "fallible-iterator", "postgres-protocol", @@ -2202,6 +2467,16 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" +[[package]] +name = "proc-macro-crate" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ebace6889caf889b4d3f76becee12e90353f2b8c7d875534a71e5742f8f6f83" +dependencies = [ + "thiserror", + "toml", +] + [[package]] name = "proc-macro-hack" version = "0.5.19" @@ -2231,6 +2506,21 @@ dependencies = [ "libc", ] +[[package]] +name = "prometheus" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5986aa8d62380092d2f50f8b1cdba9cb9b6731ffd4b25b51fd126b6c3e05b99c" +dependencies = [ + "cfg-if", + "fnv", + "lazy_static", + "memchr", + "parking_lot", + "protobuf", + "thiserror", +] + [[package]] name = "prometheus" version = "0.13.0" @@ -2254,12 +2544,6 @@ version = "2.25.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "47c327e191621a2158159df97cdbc2e7074bb4e940275e35abf38eb3d2595754" -[[package]] -name = "quick-error" -version = "1.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" - [[package]] name = "quote" version = "1.0.14" @@ -2466,8 +2750,8 @@ version = "0.11.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c4e0a76dc12a116108933f6301b95e83634e0c47b0afbed6abbaa0601e99258" dependencies = [ - "base64", - "bytes", + "base64 0.13.0", + "bytes 1.1.0", "encoding_rs", "futures-core", "futures-util", @@ -2489,6 +2773,7 @@ dependencies = [ "serde_urlencoded", "tokio", "tokio-native-tls", + "tokio-util", "url", "wasm-bindgen", "wasm-bindgen-futures", @@ -2539,12 +2824,6 @@ version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "73b4b750c782965c211b42f022f59af1fbceabdd026623714f104152f1ec149f" -[[package]] -name = "safemem" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef703b7cb59335eae2eb93ceb664c0eb7ea6bf567079d843e09420219668e072" - [[package]] name = "schannel" version = "0.1.19" @@ -2564,12 +2843,6 @@ dependencies = [ "parking_lot", ] -[[package]] -name = "scoped-tls" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea6a9290e3c9cf0f18145ef7ffa62d68ee0bf5fcd651017e586dc7fd5da448c2" - [[package]] name = "scoped_threadpool" version = "0.1.9" @@ -2795,6 +3068,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "spin" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "511254be0c5bcf062b019a6c89c01a664aa359ded62f78aa72c6fc137c0590e5" + [[package]] name = "sqlformat" version = "0.1.8" @@ -2824,10 +3103,10 @@ checksum = "518be6f6fff5ca76f985d434f9c37f3662af279642acf730388f271dff7b9016" dependencies = [ "ahash", "atoi", - "base64", + "base64 0.13.0", "bitflags", "byteorder", - "bytes", + "bytes 1.1.0", "chrono", "crc", "crossbeam-channel", @@ -2948,6 +3227,12 @@ dependencies = [ "unicode-normalization", ] +[[package]] +name = "strsim" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" + [[package]] name = "subtle" version = "2.4.1" @@ -3118,7 +3403,7 @@ version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fbbf1c778ec206785635ce8ad57fe52b3009ae9e0c9f574a728f3049d3e55838" dependencies = [ - "bytes", + "bytes 1.1.0", "libc", "memchr", "mio 0.7.14", @@ -3160,7 +3445,7 @@ checksum = "4b6c8b33df661b548dcd8f9bf87debb8c56c05657ed291122e1188698c2ece95" dependencies = [ "async-trait", "byteorder", - "bytes", + "bytes 1.1.0", "fallible-iterator", "futures", "log", @@ -3186,26 +3471,13 @@ dependencies = [ "tokio", ] -[[package]] -name = "tokio-tungstenite" -version = "0.15.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "511de3f85caf1c98983545490c3d09685fa8eb634e57eec22bb4db271f46cbd8" -dependencies = [ - "futures-util", - "log", - "pin-project", - "tokio", - "tungstenite", -] - [[package]] name = "tokio-util" version = "0.6.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9e99e1983e5d376cd8eb4b66604d2e99e79f5bd988c3055891dcd8c9e2604cc0" dependencies = [ - "bytes", + "bytes 1.1.0", "futures-core", "futures-sink", "log", @@ -3213,6 +3485,15 @@ dependencies = [ "tokio", ] +[[package]] +name = "toml" +version = "0.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a31142970826733df8241ef35dc040ef98c679ab14d7c3e54d827099b3acecaa" +dependencies = [ + "serde", +] + [[package]] name = "tower-service" version = "0.3.1" @@ -3226,7 +3507,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "375a639232caf30edfc78e8d89b2d4c375515393e7af7e16f01cd96917fb2105" dependencies = [ "cfg-if", - "log", "pin-project-lite", "tracing-attributes", "tracing-core", @@ -3364,34 +3644,6 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "59547bce71d9c38b83d9c0e92b6066c4253371f15005def0c30d9657f50c7642" -[[package]] -name = "tungstenite" -version = "0.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0b2d8558abd2e276b0a8df5c05a2ec762609344191e5fd23e292c910e9165b5" -dependencies = [ - "base64", - "byteorder", - "bytes", - "http", - "httparse", - "log", - "rand 0.8.4", - "sha-1 0.9.8", - "thiserror", - "url", - "utf-8", -] - -[[package]] -name = "twoway" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59b11b2b5241ba34be09c3cc85a36e56e48f9888862e19cedf23336d35316ed1" -dependencies = [ - "memchr", -] - [[package]] name = "twoway" version = "0.2.2" @@ -3402,6 +3654,19 @@ dependencies = [ "unchecked-index", ] +[[package]] +name = "typed-headers" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3179a61e9eccceead5f1574fd173cf2e162ac42638b9bf214c6ad0baf7efa24a" +dependencies = [ + "base64 0.11.0", + "bytes 0.5.6", + "chrono", + "http", + "mime", +] + [[package]] name = "typenum" version = "1.15.0" @@ -3462,6 +3727,16 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" +[[package]] +name = "universal-hash" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f214e8f697e925001e66ec2c6e37a4ef93f0f78c2eed7814394e10c62025b05" +dependencies = [ + "generic-array", + "subtle", +] + [[package]] name = "url" version = "2.2.2" @@ -3511,36 +3786,6 @@ dependencies = [ "try-lock", ] -[[package]] -name = "warp" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3cef4e1e9114a4b7f1ac799f16ce71c14de5778500c5450ec6b7b920c55b587e" -dependencies = [ - "bytes", - "futures-channel", - "futures-util", - "headers", - "http", - "hyper", - "log", - "mime", - "mime_guess", - "multipart", - "percent-encoding", - "pin-project", - "scoped-tls", - "serde", - "serde_json", - "serde_urlencoded", - "tokio", - "tokio-stream", - "tokio-tungstenite", - "tokio-util", - "tower-service", - "tracing", -] - [[package]] name = "wasi" version = "0.9.0+wasi-snapshot-preview1" diff --git a/fuzzysearch-api/Cargo.toml b/fuzzysearch-api/Cargo.toml index cad9abf..791af27 100644 --- a/fuzzysearch-api/Cargo.toml +++ b/fuzzysearch-api/Cargo.toml @@ -10,6 +10,8 @@ tracing-subscriber = "0.3" tracing-futures = "0.2" tracing-log = "0.1" +thiserror = "1" + prometheus = { version = "0.13", features = ["process"] } lazy_static = "1" @@ -19,7 +21,7 @@ tracing-opentelemetry = "0.16" opentelemetry-http = "0.5" tokio = { version = "1", features = ["full"] } -tokio-stream = "0.1" +tokio-util = "0.6" futures = "0.3" @@ -29,8 +31,10 @@ bytes = "1" serde = { version = "1", features = ["derive"] } serde_json = "1" -warp = "0.3" -reqwest = { version = "0.11", features = ["multipart"] } +poem = { version = "1", features = ["multipart", "opentelemetry", "prometheus"] } +poem-openapi = { version = "1", features = ["chrono", "swagger-ui"] } + +reqwest = { version = "0.11", features = ["multipart", "stream"] } hyper = "0.14" sqlx = { version = "0.5", features = ["runtime-tokio-native-tls", "postgres", "macros", "json", "offline", "chrono"] } diff --git a/fuzzysearch-api/queries/handle_twitter.sql b/fuzzysearch-api/queries/handle_twitter.sql new file mode 100644 index 0000000..d2105d8 --- /dev/null +++ b/fuzzysearch-api/queries/handle_twitter.sql @@ -0,0 +1,9 @@ +SELECT + exists( + SELECT + 1 + FROM + twitter_user + WHERE + lower(data ->> 'screen_name') = lower($1) + ) "exists!"; diff --git a/fuzzysearch-api/queries/lookup_api_key.sql b/fuzzysearch-api/queries/lookup_api_key.sql new file mode 100644 index 0000000..ae69d66 --- /dev/null +++ b/fuzzysearch-api/queries/lookup_api_key.sql @@ -0,0 +1,12 @@ +SELECT + api_key.id, + api_key.name_limit, + api_key.image_limit, + api_key.hash_limit, + api_key.name, + account.email owner_email +FROM + api_key + JOIN account ON account.id = api_key.user_id +WHERE + api_key.key = $1 diff --git a/fuzzysearch-api/queries/lookup_furaffinity_file_id.sql b/fuzzysearch-api/queries/lookup_furaffinity_file_id.sql new file mode 100644 index 0000000..2b35f0a --- /dev/null +++ b/fuzzysearch-api/queries/lookup_furaffinity_file_id.sql @@ -0,0 +1,16 @@ +SELECT + submission.id, + submission.url, + submission.filename, + submission.file_id, + submission.rating, + submission.posted_at, + submission.hash_int hash, + artist.name artist +FROM + submission + LEFT JOIN artist ON artist.id = submission.artist_id +WHERE + file_id = $1 +LIMIT + 10; diff --git a/fuzzysearch-api/queries/lookup_hashes.sql b/fuzzysearch-api/queries/lookup_hashes.sql new file mode 100644 index 0000000..331677e --- /dev/null +++ b/fuzzysearch-api/queries/lookup_hashes.sql @@ -0,0 +1,76 @@ +WITH hashes AS ( + SELECT * FROM jsonb_to_recordset($1::jsonb) + AS hashes(searched_hash bigint, found_hash bigint, distance bigint) +) +SELECT + 'FurAffinity' site, + submission.id, + submission.hash_int hash, + submission.url, + submission.filename, + ARRAY(SELECT artist.name) artists, + submission.file_id, + null sources, + submission.rating, + submission.posted_at, + hashes.searched_hash, + hashes.distance +FROM hashes +JOIN submission ON hashes.found_hash = submission.hash_int +JOIN artist ON submission.artist_id = artist.id +WHERE hash_int IN (SELECT hashes.found_hash) +UNION ALL +SELECT + 'e621' site, + e621.id, + e621.hash, + e621.data->'file'->>'url' url, + (e621.data->'file'->>'md5') || '.' || (e621.data->'file'->>'ext') filename, + ARRAY(SELECT jsonb_array_elements_text(e621.data->'tags'->'artist')) artists, + null file_id, + ARRAY(SELECT jsonb_array_elements_text(e621.data->'sources')) sources, + e621.data->>'rating' rating, + to_timestamp(data->>'created_at', 'YYYY-MM-DD"T"HH24:MI:SS"Z"') posted_at, + hashes.searched_hash, + hashes.distance +FROM hashes +JOIN e621 ON hashes.found_hash = e621.hash +WHERE e621.hash IN (SELECT hashes.found_hash) +UNION ALL +SELECT + 'Weasyl' site, + weasyl.id, + weasyl.hash, + weasyl.data->>'link' url, + null filename, + ARRAY(SELECT weasyl.data->>'owner_login') artists, + null file_id, + null sources, + weasyl.data->>'rating' rating, + to_timestamp(data->>'posted_at', 'YYYY-MM-DD"T"HH24:MI:SS"Z"') posted_at, + hashes.searched_hash, + hashes.distance +FROM hashes +JOIN weasyl ON hashes.found_hash = weasyl.hash +WHERE weasyl.hash IN (SELECT hashes.found_hash) +UNION ALL +SELECT + 'Twitter' site, + tweet.id, + tweet_media.hash, + tweet_media.url, + null filename, + ARRAY(SELECT tweet.data->'user'->>'screen_name') artists, + null file_id, + null sources, + CASE + WHEN (tweet.data->'possibly_sensitive')::boolean IS true THEN 'adult' + WHEN (tweet.data->'possibly_sensitive')::boolean IS false THEN 'general' + END rating, + to_timestamp(tweet.data->>'created_at', 'DY Mon DD HH24:MI:SS +0000 YYYY') posted_at, + hashes.searched_hash, + hashes.distance +FROM hashes +JOIN tweet_media ON hashes.found_hash = tweet_media.hash +JOIN tweet ON tweet_media.tweet_id = tweet.id +WHERE tweet_media.hash IN (SELECT hashes.found_hash) diff --git a/fuzzysearch-api/queries/update_rate_limit.sql b/fuzzysearch-api/queries/update_rate_limit.sql new file mode 100644 index 0000000..590141a --- /dev/null +++ b/fuzzysearch-api/queries/update_rate_limit.sql @@ -0,0 +1,7 @@ +INSERT INTO + rate_limit (api_key_id, time_window, group_name, count) +VALUES + ($1, $2, $3, $4) ON CONFLICT ON CONSTRAINT unique_window DO +UPDATE +set + count = rate_limit.count + $4 RETURNING rate_limit.count diff --git a/fuzzysearch-api/src/filters.rs b/fuzzysearch-api/src/filters.rs deleted file mode 100644 index c26e39a..0000000 --- a/fuzzysearch-api/src/filters.rs +++ /dev/null @@ -1,130 +0,0 @@ -use crate::{handlers, Pool}; -use crate::{types::*, Endpoints}; -use std::convert::Infallible; -use tracing_futures::Instrument; -use warp::{Filter, Rejection, Reply}; - -pub fn search( - db: Pool, - bkapi: bkapi_client::BKApiClient, - endpoints: Endpoints, -) -> impl Filter + Clone { - search_image(db.clone(), bkapi.clone(), endpoints) - .or(search_hashes(db.clone(), bkapi.clone())) - .or(search_file(db.clone())) - .or(check_handle(db.clone())) - .or(search_image_by_url(db, bkapi)) -} - -pub fn search_file(db: Pool) -> impl Filter + Clone { - warp::path("file") - .and(warp::header::headers_cloned()) - .and(warp::get()) - .and(warp::query::()) - .and(with_pool(db)) - .and(with_api_key()) - .and_then(|headers, opts, db, api_key| { - use tracing_opentelemetry::OpenTelemetrySpanExt; - - let span = tracing::info_span!("search_file", ?opts); - span.set_parent(with_telem(headers)); - span.in_scope(|| handlers::search_file(opts, db, api_key).in_current_span()) - }) -} - -pub fn search_image( - db: Pool, - bkapi: bkapi_client::BKApiClient, - endpoints: Endpoints, -) -> impl Filter + Clone { - warp::path("image") - .and(warp::header::headers_cloned()) - .and(warp::post()) - .and(warp::multipart::form().max_length(1024 * 1024 * 10)) - .and(warp::query::()) - .and(with_pool(db)) - .and(with_bkapi(bkapi)) - .and(with_api_key()) - .and(with_endpoints(endpoints)) - .and_then(|headers, form, opts, pool, bkapi, api_key, endpoints| { - use tracing_opentelemetry::OpenTelemetrySpanExt; - - let span = tracing::info_span!("search_image", ?opts); - span.set_parent(with_telem(headers)); - span.in_scope(|| { - handlers::search_image(form, opts, pool, bkapi, api_key, endpoints) - .in_current_span() - }) - }) -} - -pub fn search_image_by_url( - db: Pool, - bkapi: bkapi_client::BKApiClient, -) -> impl Filter + Clone { - warp::path("url") - .and(warp::get()) - .and(warp::query::()) - .and(with_pool(db)) - .and(with_bkapi(bkapi)) - .and(with_api_key()) - .and_then(handlers::search_image_by_url) -} - -pub fn search_hashes( - db: Pool, - bkapi: bkapi_client::BKApiClient, -) -> impl Filter + Clone { - warp::path("hashes") - .and(warp::header::headers_cloned()) - .and(warp::get()) - .and(warp::query::()) - .and(with_pool(db)) - .and(with_bkapi(bkapi)) - .and(with_api_key()) - .and_then(|headers, opts, db, bkapi, api_key| { - use tracing_opentelemetry::OpenTelemetrySpanExt; - - let span = tracing::info_span!("search_hashes", ?opts); - span.set_parent(with_telem(headers)); - span.in_scope(|| handlers::search_hashes(opts, db, bkapi, api_key).in_current_span()) - }) -} - -pub fn check_handle(db: Pool) -> impl Filter + Clone { - warp::path("handle") - .and(warp::get()) - .and(warp::query::()) - .and(with_pool(db)) - .and_then(handlers::check_handle) -} - -fn with_api_key() -> impl Filter + Clone { - warp::header::("x-api-key") -} - -fn with_pool(db: Pool) -> impl Filter + Clone { - warp::any().map(move || db.clone()) -} - -fn with_bkapi( - bkapi: bkapi_client::BKApiClient, -) -> impl Filter + Clone { - warp::any().map(move || bkapi.clone()) -} - -fn with_endpoints( - endpoints: Endpoints, -) -> impl Filter + Clone { - warp::any().map(move || endpoints.clone()) -} - -fn with_telem(headers: warp::http::HeaderMap) -> opentelemetry::Context { - let remote_context = opentelemetry::global::get_text_map_propagator(|propagator| { - propagator.extract(&opentelemetry_http::HeaderExtractor(&headers)) - }); - - tracing::trace!(?remote_context, "Got remote context"); - - remote_context -} diff --git a/fuzzysearch-api/src/handlers.rs b/fuzzysearch-api/src/handlers.rs deleted file mode 100644 index dbaccbc..0000000 --- a/fuzzysearch-api/src/handlers.rs +++ /dev/null @@ -1,515 +0,0 @@ -use futures::StreamExt; -use futures::TryStreamExt; -use hyper::StatusCode; -use lazy_static::lazy_static; -use prometheus::{register_histogram, register_int_counter, Histogram, IntCounter}; -use std::convert::TryInto; -use tracing::{span, warn}; -use tracing_futures::Instrument; -use warp::{Rejection, Reply}; - -use crate::models::image_query; -use crate::types::*; -use crate::Endpoints; -use crate::{early_return, rate_limit, Pool}; -use fuzzysearch_common::{ - trace::InjectContext, - types::{SearchResult, SiteInfo}, -}; - -lazy_static! { - static ref IMAGE_HASH_DURATION: Histogram = register_histogram!( - "fuzzysearch_api_image_hash_seconds", - "Duration to perform an image hash operation" - ) - .unwrap(); - static ref VIDEO_HASH_DURATION: Histogram = register_histogram!( - "fuzzysearch_api_video_hash_seconds", - "Duration to perform a video hash operation" - ) - .unwrap(); - static ref IMAGE_URL_DOWNLOAD_DURATION: Histogram = register_histogram!( - "fuzzysearch_api_image_url_download_seconds", - "Duration to download an image from a provided URL" - ) - .unwrap(); - static ref UNHANDLED_REJECTIONS: IntCounter = register_int_counter!( - "fuzzysearch_api_unhandled_rejections_count", - "Number of unhandled HTTP rejections" - ) - .unwrap(); -} - -#[derive(Debug)] -enum Error { - Postgres(sqlx::Error), - Reqwest(reqwest::Error), - Warp(warp::Error), - InvalidData, - InvalidImage, - ApiKey, - RateLimit, -} - -impl warp::Reply for Error { - fn into_response(self) -> warp::reply::Response { - let msg = match self { - Error::Postgres(_) | Error::Reqwest(_) | Error::Warp(_) => ErrorMessage { - code: 500, - message: "Internal server error".to_string(), - }, - Error::InvalidData => ErrorMessage { - code: 400, - message: "Invalid data provided".to_string(), - }, - Error::InvalidImage => ErrorMessage { - code: 400, - message: "Invalid image provided".to_string(), - }, - Error::ApiKey => ErrorMessage { - code: 401, - message: "Invalid API key".to_string(), - }, - Error::RateLimit => ErrorMessage { - code: 429, - message: "Too many requests".to_string(), - }, - }; - - let body = hyper::body::Body::from(serde_json::to_string(&msg).unwrap()); - - warp::http::Response::builder() - .status(msg.code) - .body(body) - .unwrap() - } -} - -impl From for Error { - fn from(err: sqlx::Error) -> Self { - Error::Postgres(err) - } -} - -impl From for Error { - fn from(err: reqwest::Error) -> Self { - Error::Reqwest(err) - } -} - -impl From for Error { - fn from(err: warp::Error) -> Self { - Self::Warp(err) - } -} - -#[tracing::instrument(skip(endpoints, form))] -async fn hash_input( - endpoints: &Endpoints, - mut form: warp::multipart::FormData, -) -> Result { - let mut image_part = None; - - tracing::debug!("looking at image parts"); - while let Ok(Some(part)) = form.try_next().await { - if part.name() == "image" { - image_part = Some(part); - } - } - - let image_part = image_part.ok_or(Error::InvalidImage)?; - - tracing::debug!("found image part, reading data"); - let bytes = image_part - .stream() - .fold(bytes::BytesMut::new(), |mut buf, chunk| { - use bytes::BufMut; - - buf.put(chunk.unwrap()); - async move { buf } - }) - .await; - let part = reqwest::multipart::Part::bytes(bytes.to_vec()); - - let form = reqwest::multipart::Form::new().part("image", part); - - tracing::debug!("sending image to hash input service"); - let client = reqwest::Client::new(); - let resp = client - .post(&endpoints.hash_input) - .inject_context() - .multipart(form) - .send() - .await?; - - tracing::debug!("got response"); - if resp.status() != StatusCode::OK { - return Err(Error::InvalidImage); - } - - let hash: i64 = resp - .text() - .await? - .parse() - .map_err(|_err| Error::InvalidImage)?; - - Ok(hash) -} - -pub async fn search_image( - form: warp::multipart::FormData, - opts: ImageSearchOpts, - db: Pool, - bkapi: bkapi_client::BKApiClient, - api_key: String, - endpoints: Endpoints, -) -> Result, Rejection> { - let image_remaining = rate_limit!(&api_key, &db, image_limit, "image"); - let hash_remaining = rate_limit!(&api_key, &db, hash_limit, "hash"); - - let num = early_return!(hash_input(&endpoints, form).await); - - let mut items = { - if opts.search_type == Some(ImageSearchType::Force) { - image_query(db.clone(), bkapi.clone(), vec![num], 10) - .await - .unwrap() - } else { - let results = image_query(db.clone(), bkapi.clone(), vec![num], 0) - .await - .unwrap(); - if results.is_empty() && opts.search_type != Some(ImageSearchType::Exact) { - image_query(db.clone(), bkapi.clone(), vec![num], 10) - .await - .unwrap() - } else { - results - } - } - }; - - items.sort_by(|a, b| { - a.distance - .unwrap_or(u64::max_value()) - .partial_cmp(&b.distance.unwrap_or(u64::max_value())) - .unwrap() - }); - - let similarity = ImageSimilarity { - hash: num, - matches: items, - }; - - let resp = warp::http::Response::builder() - .header("x-image-hash", num.to_string()) - .header("x-rate-limit-total-image", image_remaining.1.to_string()) - .header( - "x-rate-limit-remaining-image", - image_remaining.0.to_string(), - ) - .header("x-rate-limit-total-hash", hash_remaining.1.to_string()) - .header("x-rate-limit-remaining-hash", hash_remaining.0.to_string()) - .header("content-type", "application/json") - .body(serde_json::to_string(&similarity).unwrap()) - .unwrap(); - - Ok(Box::new(resp)) -} - -pub async fn search_hashes( - opts: HashSearchOpts, - db: Pool, - bkapi: bkapi_client::BKApiClient, - api_key: String, -) -> Result, Rejection> { - let pool = db.clone(); - - let hashes: Vec = opts - .hashes - .split(',') - .take(10) - .filter_map(|hash| hash.parse::().ok()) - .collect(); - - if hashes.is_empty() { - return Ok(Box::new(Error::InvalidData)); - } - - let image_remaining = rate_limit!(&api_key, &db, image_limit, "image", hashes.len() as i16); - - let results = - early_return!(image_query(pool, bkapi, hashes.clone(), opts.distance.unwrap_or(10)).await); - - let resp = warp::http::Response::builder() - .header("x-rate-limit-total-image", image_remaining.1.to_string()) - .header( - "x-rate-limit-remaining-image", - image_remaining.0.to_string(), - ) - .header("content-type", "application/json") - .body(serde_json::to_string(&results).unwrap()) - .unwrap(); - - Ok(Box::new(resp)) -} - -pub async fn search_file( - opts: FileSearchOpts, - db: Pool, - api_key: String, -) -> Result, Rejection> { - use sqlx::Row; - - let file_remaining = rate_limit!(&api_key, &db, name_limit, "file"); - - let query = if let Some(ref id) = opts.id { - sqlx::query( - "SELECT - submission.id, - submission.url, - submission.filename, - submission.file_id, - submission.rating, - submission.posted_at, - artist.name, - hashes.id hash_id - FROM - submission - JOIN artist - ON artist.id = submission.artist_id - JOIN hashes - ON hashes.furaffinity_id = submission.id - WHERE - file_id = $1 - LIMIT 10", - ) - .bind(id) - } else if let Some(ref name) = opts.name { - sqlx::query( - "SELECT - submission.id, - submission.url, - submission.filename, - submission.file_id, - submission.rating, - submission.posted_at, - artist.name, - hashes.id hash_id - FROM - submission - JOIN artist - ON artist.id = submission.artist_id - JOIN hashes - ON hashes.furaffinity_id = submission.id - WHERE - lower(filename) = lower($1) - LIMIT 10", - ) - .bind(name) - } else if let Some(ref url) = opts.url { - sqlx::query( - "SELECT - submission.id, - submission.url, - submission.filename, - submission.file_id, - submission.rating, - submission.posted_at, - artist.name, - hashes.id hash_id - FROM - submission - JOIN artist - ON artist.id = submission.artist_id - JOIN hashes - ON hashes.furaffinity_id = submission.id - WHERE - lower(url) = lower($1) - LIMIT 10", - ) - .bind(url) - } else if let Some(ref site_id) = opts.site_id { - sqlx::query( - "SELECT - submission.id, - submission.url, - submission.filename, - submission.file_id, - submission.rating, - submission.posted_at, - artist.name, - hashes.id hash_id - FROM - submission - JOIN artist - ON artist.id = submission.artist_id - JOIN hashes - ON hashes.furaffinity_id = submission.id - WHERE - submission.id = $1 - LIMIT 10", - ) - .bind(site_id) - } else { - return Ok(Box::new(Error::InvalidData)); - }; - - let matches: Result, _> = query - .map(|row| SearchResult { - site_id: row.get::("id") as i64, - site_id_str: row.get::("id").to_string(), - url: row.get("url"), - filename: row.get("filename"), - posted_at: row.get("posted_at"), - artists: row - .get::, _>("name") - .map(|artist| vec![artist]), - distance: None, - hash: None, - searched_hash: None, - site_info: Some(SiteInfo::FurAffinity { - file_id: row.get("file_id"), - }), - rating: row - .get::, _>("rating") - .and_then(|rating| rating.parse().ok()), - }) - .fetch_all(&db) - .await; - - let matches = early_return!(matches); - - let resp = warp::http::Response::builder() - .header("x-rate-limit-total-file", file_remaining.1.to_string()) - .header("x-rate-limit-remaining-file", file_remaining.0.to_string()) - .header("content-type", "application/json") - .body(serde_json::to_string(&matches).unwrap()) - .unwrap(); - - Ok(Box::new(resp)) -} - -pub async fn check_handle(opts: HandleOpts, db: Pool) -> Result, Rejection> { - let exists = if let Some(handle) = opts.twitter { - let result = sqlx::query_scalar!("SELECT exists(SELECT 1 FROM twitter_user WHERE lower(data->>'screen_name') = lower($1))", handle) - .fetch_optional(&db) - .await - .map(|row| row.flatten().unwrap_or(false)); - - early_return!(result) - } else { - false - }; - - Ok(Box::new(warp::reply::json(&exists))) -} - -pub async fn search_image_by_url( - opts: UrlSearchOpts, - db: Pool, - bkapi: bkapi_client::BKApiClient, - api_key: String, -) -> Result, Rejection> { - use bytes::BufMut; - - let url = opts.url; - - let image_remaining = rate_limit!(&api_key, &db, image_limit, "image"); - let hash_remaining = rate_limit!(&api_key, &db, hash_limit, "hash"); - - let _timer = IMAGE_URL_DOWNLOAD_DURATION.start_timer(); - - let mut resp = match reqwest::get(&url).await { - Ok(resp) => resp, - Err(_err) => return Ok(Box::new(Error::InvalidImage)), - }; - - let content_length = resp - .headers() - .get("content-length") - .and_then(|len| { - String::from_utf8_lossy(len.as_bytes()) - .parse::() - .ok() - }) - .unwrap_or(0); - - if content_length > 10_000_000 { - return Ok(Box::new(Error::InvalidImage)); - } - - let mut buf = bytes::BytesMut::with_capacity(content_length); - - while let Some(chunk) = early_return!(resp.chunk().await) { - if buf.len() + chunk.len() > 10_000_000 { - return Ok(Box::new(Error::InvalidImage)); - } - - buf.put(chunk); - } - - drop(_timer); - - let _timer = IMAGE_HASH_DURATION.start_timer(); - let hash = tokio::task::spawn_blocking(move || { - let hasher = fuzzysearch_common::get_hasher(); - let image = image::load_from_memory(&buf).unwrap(); - hasher.hash_image(&image) - }) - .instrument(span!(tracing::Level::TRACE, "hashing image")) - .await - .unwrap(); - drop(_timer); - - let hash: [u8; 8] = hash.as_bytes().try_into().unwrap(); - let num = i64::from_be_bytes(hash); - - let results = image_query(db.clone(), bkapi.clone(), vec![num], 3) - .await - .unwrap(); - - let resp = warp::http::Response::builder() - .header("x-image-hash", num.to_string()) - .header("x-rate-limit-total-image", image_remaining.1.to_string()) - .header( - "x-rate-limit-remaining-image", - image_remaining.0.to_string(), - ) - .header("x-rate-limit-total-hash", hash_remaining.1.to_string()) - .header("x-rate-limit-remaining-hash", hash_remaining.0.to_string()) - .header("content-type", "application/json") - .body(serde_json::to_string(&results).unwrap()) - .unwrap(); - - Ok(Box::new(resp)) -} - -#[tracing::instrument] -pub async fn handle_rejection(err: Rejection) -> Result, std::convert::Infallible> { - warn!("had rejection"); - - UNHANDLED_REJECTIONS.inc(); - - let (code, message) = if err.is_not_found() { - ( - warp::http::StatusCode::NOT_FOUND, - "This page does not exist", - ) - } else if err.find::().is_some() { - return Ok(Box::new(Error::InvalidData) as Box); - } else if err.find::().is_some() { - return Ok(Box::new(Error::InvalidData) as Box); - } else { - ( - warp::http::StatusCode::INTERNAL_SERVER_ERROR, - "An unknown error occured", - ) - }; - - let json = warp::reply::json(&ErrorMessage { - code: code.as_u16(), - message: message.into(), - }); - - Ok(Box::new(warp::reply::with_status(json, code))) -} diff --git a/fuzzysearch-api/src/main.rs b/fuzzysearch-api/src/main.rs index a9c9e6e..482974f 100644 --- a/fuzzysearch-api/src/main.rs +++ b/fuzzysearch-api/src/main.rs @@ -1,12 +1,16 @@ -#![recursion_limit = "256"] +use std::{borrow::Cow, str::FromStr}; -use warp::Filter; - -mod filters; -mod handlers; -mod models; -mod types; -mod utils; +use bkapi_client::BKApiClient; +use bytes::BufMut; +use hyper::StatusCode; +use poem::{error::ResponseError, listener::TcpListener, web::Data, EndpointExt, Request, Route}; +use poem_openapi::{ + auth::ApiKey, + param::{Path, Query}, + payload::{Json, Response}, + types::multipart::Upload, + Multipart, Object, OneOf, OpenApi, OpenApiService, SecurityScheme, +}; type Pool = sqlx::PgPool; @@ -16,14 +20,566 @@ pub struct Endpoints { pub bkapi: String, } +struct Api; + +/// Simple authentication using a static API key. Must be manually requested. +#[derive(SecurityScheme)] +#[oai( + type = "api_key", + key_name = "X-Api-Key", + in = "header", + checker = "api_checker" +)] +struct ApiKeyAuthorization(UserApiKey); + +struct UserApiKey { + id: i32, + name: Option, + owner_email: String, + name_limit: i16, + image_limit: i16, + hash_limit: i16, +} + +async fn api_checker(req: &Request, api_key: ApiKey) -> Option { + let pool: &Pool = req.data().unwrap(); + + sqlx::query_file_as!(UserApiKey, "queries/lookup_api_key.sql", api_key.key) + .fetch_optional(pool) + .await + .ok() + .flatten() +} + +#[derive(poem_openapi::Enum, Debug, PartialEq)] +#[oai(rename_all = "snake_case")] +enum KnownServiceName { + Twitter, +} + +#[derive(poem_openapi::Enum, Debug)] +#[oai(rename_all = "lowercase")] +enum Rating { + General, + Mature, + Adult, +} + +impl FromStr for Rating { + type Err = String; + + fn from_str(s: &str) -> Result { + let rating = match s { + "g" | "s" | "general" => Self::General, + "m" | "q" | "mature" => Self::Mature, + "a" | "e" | "adult" | "explicit" => Self::Adult, + _ => return Err(format!("unknown rating: {}", s)), + }; + + Ok(rating) + } +} + +#[derive(Object, Debug)] +#[oai(rename = "FurAffinity")] +struct FurAffinityExtra { + file_id: i32, +} + +#[derive(Object, Debug)] +#[oai(rename = "e621")] +struct E621Extra { + sources: Vec, +} + +#[derive(OneOf, Debug)] +#[oai(property_name = "site")] +enum SiteExtraData { + FurAffinity(FurAffinityExtra), + E621(E621Extra), +} + +#[derive(Object, Debug)] +struct HashLookupResult { + site_name: String, + site_id: i64, + site_id_str: String, + site_extra_data: Option, + + url: String, + filename: String, + artists: Option>, + rating: Option, + posted_at: Option>, + + hash: i64, + searched_hash: i64, + distance: u64, +} + +#[derive(serde::Serialize)] +struct HashSearch { + searched_hash: i64, + found_hash: i64, + distance: u64, +} + +#[derive(Debug, thiserror::Error)] +enum Error { + #[error("database error: {0}")] + Database(#[from] sqlx::Error), + #[error("network error: {0}")] + Network(#[from] reqwest::Error), + #[error("json error: {0}")] + Json(#[from] serde_json::Error), + + #[error("bad request: {0}")] + BadRequest(#[from] BadRequest), +} + +impl ResponseError for Error { + fn status(&self) -> hyper::StatusCode { + hyper::StatusCode::INTERNAL_SERVER_ERROR + } +} + +#[derive(Debug, thiserror::Error)] +#[error("bad request: {message}")] +struct BadRequest { + message: Cow<'static, str>, +} + +impl BadRequest { + fn with_message>>(message: M) -> Self { + Self { + message: message.into(), + } + } +} + +impl ResponseError for BadRequest { + fn status(&self) -> hyper::StatusCode { + hyper::StatusCode::BAD_REQUEST + } +} + +#[derive(Debug, thiserror::Error)] +#[error("rate limited")] +struct RateLimited { + bucket: String, +} + +impl ResponseError for RateLimited { + fn status(&self) -> hyper::StatusCode { + hyper::StatusCode::TOO_MANY_REQUESTS + } +} + +/// The status of an API key's rate limit. +#[derive(Debug, PartialEq)] +pub enum RateLimit { + /// This key is limited, we should deny the request. + Limited, + /// This key is available, contains the number of requests made. + Available((i16, i16)), +} + +async fn update_rate_limit( + pool: &Pool, + key_id: i32, + key_group_limit: i16, + group_name: &'static str, + incr_by: i16, +) -> Result { + let now = chrono::Utc::now(); + let timestamp = now.timestamp(); + let time_window = timestamp - (timestamp % 60); + + let count: i16 = sqlx::query_file_scalar!( + "queries/update_rate_limit.sql", + key_id, + time_window, + group_name, + incr_by + ) + .fetch_one(pool) + .await?; + + if count > key_group_limit { + Ok(RateLimit::Limited) + } else { + Ok(RateLimit::Available(( + key_group_limit - count, + key_group_limit, + ))) + } +} + +macro_rules! rate_limit { + ($api_key:expr, $db:expr, $limit:tt, $group:expr) => { + rate_limit!($api_key, $db, $limit, $group, 1) + }; + + ($api_key:expr, $db:expr, $limit:tt, $group:expr, $incr_by:expr) => {{ + let rate_limit = update_rate_limit($db, $api_key.0.id, $api_key.0.$limit, $group, $incr_by) + .await + .map_err(Error::from)?; + + match rate_limit { + RateLimit::Limited => { + return Err(RateLimited { + bucket: $group.to_string(), + } + .into()) + } + RateLimit::Available(count) => count, + } + }}; +} + +async fn lookup_hashes( + pool: &Pool, + bkapi: &BKApiClient, + hashes: &[i64], + distance: u64, +) -> Result, Error> { + if distance > 10 { + return Err(BadRequest::with_message(format!("distance too large: {}", distance)).into()); + } + + let index_hashes: Vec<_> = bkapi + .search_many(hashes, distance) + .await? + .into_iter() + .flat_map(|results| { + let hash = results.hash; + + results.hashes.into_iter().map(move |result| HashSearch { + searched_hash: hash, + found_hash: result.hash, + distance: result.distance, + }) + }) + .collect(); + + let data = serde_json::to_value(index_hashes)?; + + let results = sqlx::query_file!("queries/lookup_hashes.sql", data) + .map(|row| { + let site_extra_data = match row.site.as_deref() { + Some("FurAffinity") => Some(SiteExtraData::FurAffinity(FurAffinityExtra { + file_id: row.file_id.unwrap_or(-1), + })), + Some("e621") => Some(SiteExtraData::E621(E621Extra { + sources: row.sources.unwrap_or_default(), + })), + _ => None, + }; + + HashLookupResult { + site_name: row.site.unwrap_or_default(), + site_id: row.id.unwrap_or_default(), + site_id_str: row.id.unwrap_or_default().to_string(), + site_extra_data, + url: row.url.unwrap_or_default(), + filename: row.filename.unwrap_or_default(), + artists: row.artists, + posted_at: row.posted_at, + rating: row.rating.and_then(|rating| rating.parse().ok()), + hash: row.hash.unwrap_or_default(), + searched_hash: row.searched_hash.unwrap_or_default(), + distance: row.distance.unwrap_or_default() as u64, + } + }) + .fetch_all(pool) + .await?; + + Ok(results) +} + +#[derive(Debug, Multipart)] +struct ImageSearchPayload { + image: Upload, +} + +async fn hash_input( + client: &reqwest::Client, + hash_input_endpoint: &str, + image: reqwest::Body, +) -> Result { + let part = reqwest::multipart::Part::stream(image); + let form = reqwest::multipart::Form::new().part("image", part); + + let resp = client + .post(hash_input_endpoint) + .multipart(form) + .send() + .await?; + + if resp.status() != StatusCode::OK { + return Err(BadRequest::with_message("invalid image").into()); + } + + match resp.text().await?.parse() { + Ok(hash) => Ok(hash), + Err(_err) => Err(BadRequest::with_message("invalid image").into()), + } +} + +#[derive(poem_openapi::Enum, Debug, PartialEq)] +#[oai(rename_all = "lowercase")] +enum ImageSearchType { + Force, + Close, + Exact, +} + +#[derive(Object, Debug)] +struct ImageSearchResult { + hash: i64, + matches: Vec, +} + +#[derive(Object, Debug)] +struct FurAffinityFile { + id: i32, + url: Option, + filename: Option, + file_id: Option, + rating: Option, + posted_at: Option>, + artist: Option, + hash: Option, +} + +#[OpenApi] +impl Api { + /// Lookup images by hash + /// + /// Perform a lookup for up to 10 given hashes. + #[oai(path = "/hashes", method = "get")] + async fn hashes( + &self, + pool: Data<&Pool>, + bkapi: Data<&BKApiClient>, + auth: ApiKeyAuthorization, + hashes: Query, + distance: Query>, + ) -> poem::Result>>> { + let hashes: Vec = hashes + .0 + .split(',') + .take(10) + .filter_map(|hash| hash.parse().ok()) + .collect(); + + let image_remaining = rate_limit!(auth, pool.0, image_limit, "image", hashes.len() as i16); + + if hashes.is_empty() { + return Err(BadRequest::with_message("hashes must be provided").into()); + } + + let results = lookup_hashes(&pool, &bkapi, &hashes, distance.unwrap_or(3)).await?; + + let resp = Response::new(Json(results)) + .header("x-rate-limit-total-image", image_remaining.1) + .header("x-rate-limit-remaining-image", image_remaining.0); + + Ok(resp) + } + + /// Lookup images by image + /// + /// Perform a lookup with a given image. + #[oai(path = "/image", method = "post")] + async fn image( + &self, + pool: Data<&Pool>, + bkapi: Data<&BKApiClient>, + client: Data<&reqwest::Client>, + endpoints: Data<&Endpoints>, + auth: ApiKeyAuthorization, + search_type: Query>, + payload: ImageSearchPayload, + ) -> poem::Result>> { + let image_remaining = rate_limit!(auth, pool.0, image_limit, "image"); + let hash_remaining = rate_limit!(auth, pool.0, hash_limit, "hash"); + + let stream = tokio_util::io::ReaderStream::new(payload.image.into_async_read()); + let body = reqwest::Body::wrap_stream(stream); + + let hash = hash_input(&client, &endpoints.hash_input, body).await?; + + let search_type = search_type.0.unwrap_or(ImageSearchType::Close); + let hashes = vec![hash]; + + let mut results = { + if search_type == ImageSearchType::Force { + lookup_hashes(pool.0, bkapi.0, &hashes, 10).await? + } else { + let results = lookup_hashes(pool.0, bkapi.0, &hashes, 0).await?; + + if results.is_empty() && search_type != ImageSearchType::Exact { + lookup_hashes(pool.0, bkapi.0, &hashes, 10).await? + } else { + results + } + } + }; + + results.sort_by(|a, b| a.distance.partial_cmp(&b.distance).unwrap()); + + let resp = Response::new(Json(ImageSearchResult { + hash, + matches: results, + })) + .header("x-image-hash", hash) + .header("x-rate-limit-total-image", image_remaining.1) + .header("x-rate-limit-remaining-image", image_remaining.0) + .header("x-rate-limit-total-hash", hash_remaining.1) + .header("x-rate-limit-remaining-hash", hash_remaining.0); + + Ok(resp) + } + + /// Lookup images by image URL + /// + /// Perform a lookup for an image at the given URL. Image may not exceed 10MB. + #[oai(path = "/url", method = "get")] + async fn url( + &self, + pool: Data<&Pool>, + bkapi: Data<&BKApiClient>, + client: Data<&reqwest::Client>, + endpoints: Data<&Endpoints>, + auth: ApiKeyAuthorization, + url: Query, + distance: Query>, + ) -> poem::Result>> { + let image_remaining = rate_limit!(auth, pool.0, image_limit, "image"); + let hash_remaining = rate_limit!(auth, pool.0, hash_limit, "hash"); + + let mut resp = client.get(&url.0).send().await.map_err(Error::from)?; + + let distance = distance.unwrap_or(3); + + let content_length = resp + .headers() + .get("content-length") + .and_then(|len| { + String::from_utf8_lossy(len.as_bytes()) + .parse::() + .ok() + }) + .unwrap_or(0); + + if content_length > 10_000_000 { + return Err(BadRequest::with_message(format!( + "image too large: {} bytes", + content_length + )) + .into()); + } + + let mut buf = bytes::BytesMut::with_capacity(content_length); + + while let Some(chunk) = resp.chunk().await.map_err(Error::from)? { + if buf.len() + chunk.len() > 10_000_000 { + return Err(BadRequest::with_message(format!( + "image too large: {} bytes", + content_length + )) + .into()); + } + + buf.put(chunk); + } + + let body = reqwest::Body::from(buf.to_vec()); + let hash = hash_input(&client, &endpoints.hash_input, body).await?; + + let results = lookup_hashes(pool.0, bkapi.0, &[hash], distance).await?; + + let resp = Response::new(Json(ImageSearchResult { + hash, + matches: results, + })) + .header("x-image-hash", hash) + .header("x-rate-limit-total-image", image_remaining.1) + .header("x-rate-limit-remaining-image", image_remaining.0) + .header("x-rate-limit-total-hash", hash_remaining.1) + .header("x-rate-limit-remaining-hash", hash_remaining.0); + + Ok(resp) + } + + /// Lookup FurAffinity submission by File ID + #[oai(path = "/furaffinity/file_id", method = "get")] + async fn furaffinity_data( + &self, + pool: Data<&Pool>, + auth: ApiKeyAuthorization, + file_id: Query, + ) -> poem::Result>>> { + let file_remaining = rate_limit!(auth, pool.0, image_limit, "file"); + + let matches = sqlx::query_file!("queries/lookup_furaffinity_file_id.sql", file_id.0) + .map(|row| FurAffinityFile { + id: row.id, + url: row.url, + filename: row.filename, + file_id: row.file_id, + rating: row.rating.and_then(|rating| rating.parse().ok()), + posted_at: row.posted_at, + artist: Some(row.artist), + hash: row.hash, + }) + .fetch_all(pool.0) + .await + .map_err(Error::from)?; + + let resp = Response::new(Json(matches)) + .header("x-rate-limit-total-file", file_remaining.1) + .header("x-rate-limit-remaining-file", file_remaining.0); + + Ok(resp) + } + + /// Check if a handle is known for a given service + /// + /// If the handle is known, the associated media items should be available + /// in the search index. + #[oai(path = "/known/:service", method = "get")] + async fn known_service( + &self, + pool: Data<&Pool>, + service: Path, + handle: Query, + ) -> poem::Result> { + let handle_exists = match service.0 { + KnownServiceName::Twitter => { + sqlx::query_file_scalar!("queries/handle_twitter.sql", handle.0) + .fetch_one(pool.0) + .await + .map_err(poem::error::InternalServerError)? + } + }; + + Ok(Json(handle_exists)) + } +} + #[tokio::main] async fn main() { fuzzysearch_common::trace::configure_tracing("fuzzysearch-api"); fuzzysearch_common::trace::serve_metrics().await; - let s = std::env::var("DATABASE_URL").expect("Missing DATABASE_URL"); + let server_endpoint = + std::env::var("SERVER_ENDPOINT").unwrap_or_else(|_err| "http://localhost:8080".to_string()); - let db_pool = sqlx::PgPool::connect(&s) + let database_url = std::env::var("DATABASE_URL").expect("Missing DATABASE_URL"); + + let pool = sqlx::PgPool::connect(&database_url) .await .expect("Unable to create Postgres pool"); @@ -32,23 +588,29 @@ async fn main() { bkapi: std::env::var("ENDPOINT_BKAPI").expect("Missing ENDPOINT_BKAPI"), }; - let bkapi = bkapi_client::BKApiClient::new(&endpoints.bkapi); + let bkapi = BKApiClient::new(&endpoints.bkapi); - let log = warp::log("fuzzysearch-api"); - let cors = warp::cors() - .allow_any_origin() - .allow_headers(vec!["x-api-key"]) - .allow_methods(vec!["GET", "POST"]); + let cors = poem::middleware::Cors::new() + .allow_methods([poem::http::Method::GET, poem::http::Method::POST]); - let options = warp::options().map(|| "✓"); + let api_service = OpenApiService::new(Api, "FuzzySearch", "1.0").server(server_endpoint); + let api_spec_endpoint = api_service.spec_endpoint(); - let api = options.or(filters::search(db_pool, bkapi, endpoints)); - let routes = api - .or(warp::path::end() - .map(|| warp::redirect(warp::http::Uri::from_static("https://fuzzysearch.net")))) - .with(log) - .with(cors) - .recover(handlers::handle_rejection); + let docs = api_service.swagger_ui(); + let app = Route::new() + .nest("/", api_service) + .nest("/docs", docs) + .at("/openapi.json", api_spec_endpoint) + .data(pool) + .data(bkapi) + .data(endpoints) + .data(reqwest::Client::new()) + .with(poem::middleware::Tracing) + .with(poem::middleware::OpenTelemetryMetrics::new()) + .with(cors); - warp::serve(routes).run(([0, 0, 0, 0], 8080)).await; + poem::Server::new(TcpListener::bind("0.0.0.0:8080")) + .run(app) + .await + .unwrap(); } diff --git a/fuzzysearch-api/src/models.rs b/fuzzysearch-api/src/models.rs deleted file mode 100644 index 9c00270..0000000 --- a/fuzzysearch-api/src/models.rs +++ /dev/null @@ -1,191 +0,0 @@ -use lazy_static::lazy_static; -use prometheus::{register_histogram, Histogram}; - -use crate::types::*; -use crate::Pool; -use fuzzysearch_common::types::{SearchResult, SiteInfo}; - -lazy_static! { - static ref IMAGE_QUERY_DURATION: Histogram = register_histogram!( - "fuzzysearch_api_image_query_seconds", - "Duration to perform a single image lookup query" - ) - .unwrap(); -} - -#[tracing::instrument(skip(db))] -pub async fn lookup_api_key(key: &str, db: &sqlx::PgPool) -> Option { - sqlx::query_as!( - ApiKey, - "SELECT - api_key.id, - api_key.name_limit, - api_key.image_limit, - api_key.hash_limit, - api_key.name, - account.email owner_email - FROM - api_key - JOIN account - ON account.id = api_key.user_id - WHERE - api_key.key = $1 - ", - key - ) - .fetch_optional(db) - .await - .ok() - .flatten() -} - -#[derive(serde::Serialize)] -struct HashSearch { - searched_hash: i64, - found_hash: i64, - distance: u64, -} - -#[tracing::instrument(skip(pool, bkapi))] -pub async fn image_query( - pool: Pool, - bkapi: bkapi_client::BKApiClient, - hashes: Vec, - distance: i64, -) -> Result, sqlx::Error> { - let found_hashes: Vec = bkapi - .search_many(&hashes, distance as u64) - .await - .unwrap() - .into_iter() - .flat_map(|results| { - results - .hashes - .iter() - .map(|hash| HashSearch { - searched_hash: results.hash, - found_hash: hash.hash, - distance: hash.distance, - }) - .collect::>() - }) - .collect(); - - let timer = IMAGE_QUERY_DURATION.start_timer(); - let matches = sqlx::query!( - r#"WITH hashes AS ( - SELECT * FROM jsonb_to_recordset($1::jsonb) - AS hashes(searched_hash bigint, found_hash bigint, distance bigint) - ) - SELECT - 'FurAffinity' site, - submission.id, - submission.hash_int hash, - submission.url, - submission.filename, - ARRAY(SELECT artist.name) artists, - submission.file_id, - null sources, - submission.rating, - submission.posted_at, - hashes.searched_hash, - hashes.distance - FROM hashes - JOIN submission ON hashes.found_hash = submission.hash_int - JOIN artist ON submission.artist_id = artist.id - WHERE hash_int IN (SELECT hashes.found_hash) - UNION ALL - SELECT - 'e621' site, - e621.id, - e621.hash, - e621.data->'file'->>'url' url, - (e621.data->'file'->>'md5') || '.' || (e621.data->'file'->>'ext') filename, - ARRAY(SELECT jsonb_array_elements_text(e621.data->'tags'->'artist')) artists, - null file_id, - ARRAY(SELECT jsonb_array_elements_text(e621.data->'sources')) sources, - e621.data->>'rating' rating, - to_timestamp(data->>'created_at', 'YYYY-MM-DD"T"HH24:MI:SS"Z"') posted_at, - hashes.searched_hash, - hashes.distance - FROM hashes - JOIN e621 ON hashes.found_hash = e621.hash - WHERE e621.hash IN (SELECT hashes.found_hash) - UNION ALL - SELECT - 'Weasyl' site, - weasyl.id, - weasyl.hash, - weasyl.data->>'link' url, - null filename, - ARRAY(SELECT weasyl.data->>'owner_login') artists, - null file_id, - null sources, - weasyl.data->>'rating' rating, - to_timestamp(data->>'posted_at', 'YYYY-MM-DD"T"HH24:MI:SS"Z"') posted_at, - hashes.searched_hash, - hashes.distance - FROM hashes - JOIN weasyl ON hashes.found_hash = weasyl.hash - WHERE weasyl.hash IN (SELECT hashes.found_hash) - UNION ALL - SELECT - 'Twitter' site, - tweet.id, - tweet_media.hash, - tweet_media.url, - null filename, - ARRAY(SELECT tweet.data->'user'->>'screen_name') artists, - null file_id, - null sources, - CASE - WHEN (tweet.data->'possibly_sensitive')::boolean IS true THEN 'adult' - WHEN (tweet.data->'possibly_sensitive')::boolean IS false THEN 'general' - END rating, - to_timestamp(tweet.data->>'created_at', 'DY Mon DD HH24:MI:SS +0000 YYYY') posted_at, - hashes.searched_hash, - hashes.distance - FROM hashes - JOIN tweet_media ON hashes.found_hash = tweet_media.hash - JOIN tweet ON tweet_media.tweet_id = tweet.id - WHERE tweet_media.hash IN (SELECT hashes.found_hash)"#, - serde_json::to_value(&found_hashes).unwrap() - ) - .map(|row| { - use std::convert::TryFrom; - - let site_info = match row.site.as_deref() { - Some("FurAffinity") => SiteInfo::FurAffinity { - file_id: row.file_id.unwrap_or(-1), - }, - Some("e621") => SiteInfo::E621 { - sources: row.sources, - }, - Some("Twitter") => SiteInfo::Twitter, - Some("Weasyl") => SiteInfo::Weasyl, - _ => panic!("Got unknown site"), - }; - - SearchResult { - site_id: row.id.unwrap_or_default(), - site_info: Some(site_info), - rating: row.rating.and_then(|rating| rating.parse().ok()), - site_id_str: row.id.unwrap_or_default().to_string(), - url: row.url.unwrap_or_default(), - posted_at: row.posted_at, - hash: row.hash, - distance: row - .distance - .map(|distance| u64::try_from(distance).ok()) - .flatten(), - artists: row.artists, - filename: row.filename.unwrap_or_default(), - searched_hash: row.searched_hash, - } - }) - .fetch_all(&pool) - .await?; - timer.stop_and_record(); - - Ok(matches) -} diff --git a/fuzzysearch-api/src/types.rs b/fuzzysearch-api/src/types.rs deleted file mode 100644 index f20d879..0000000 --- a/fuzzysearch-api/src/types.rs +++ /dev/null @@ -1,76 +0,0 @@ -use serde::{Deserialize, Serialize}; - -use fuzzysearch_common::types::SearchResult; - -/// An API key representation from the database.alloc -/// -/// May contain information about the owner, always has rate limit information. -/// Limits are the number of requests allowed per minute. -#[derive(Debug)] -pub struct ApiKey { - pub id: i32, - pub name: Option, - pub owner_email: String, - pub name_limit: i16, - pub image_limit: i16, - pub hash_limit: i16, -} - -/// The status of an API key's rate limit. -#[derive(Debug, PartialEq)] -pub enum RateLimit { - /// This key is limited, we should deny the request. - Limited, - /// This key is available, contains the number of requests made. - Available((i16, i16)), -} - -#[derive(Debug, Deserialize)] -pub struct FileSearchOpts { - pub id: Option, - pub name: Option, - pub url: Option, - pub site_id: Option, -} - -#[derive(Debug, Deserialize)] -pub struct ImageSearchOpts { - #[serde(rename = "type")] - pub search_type: Option, -} - -#[derive(Debug, Deserialize, PartialEq)] -#[serde(rename_all = "lowercase")] -pub enum ImageSearchType { - Close, - Exact, - Force, -} - -#[derive(Debug, Serialize)] -pub struct ImageSimilarity { - pub hash: i64, - pub matches: Vec, -} - -#[derive(Serialize)] -pub struct ErrorMessage { - pub code: u16, - pub message: String, -} - -#[derive(Debug, Deserialize)] -pub struct HashSearchOpts { - pub hashes: String, - pub distance: Option, -} - -#[derive(Debug, Deserialize)] -pub struct HandleOpts { - pub twitter: Option, -} - -#[derive(Debug, Deserialize)] -pub struct UrlSearchOpts { - pub url: String, -} diff --git a/fuzzysearch-api/src/utils.rs b/fuzzysearch-api/src/utils.rs deleted file mode 100644 index dad259a..0000000 --- a/fuzzysearch-api/src/utils.rs +++ /dev/null @@ -1,108 +0,0 @@ -use crate::types::*; -use lazy_static::lazy_static; -use prometheus::{register_int_counter_vec, IntCounterVec}; - -lazy_static! { - pub static ref RATE_LIMIT_STATUS: IntCounterVec = register_int_counter_vec!( - "fuzzysearch_api_rate_limit_count", - "Number of allowed and rate limited requests", - &["status"] - ) - .unwrap(); -} - -#[macro_export] -macro_rules! rate_limit { - ($api_key:expr, $db:expr, $limit:tt, $group:expr) => { - rate_limit!($api_key, $db, $limit, $group, 1) - }; - - ($api_key:expr, $db:expr, $limit:tt, $group:expr, $incr_by:expr) => {{ - let api_key = match crate::models::lookup_api_key($api_key, $db).await { - Some(api_key) => api_key, - None => return Ok(Box::new(Error::ApiKey)), - }; - - let rate_limit = match crate::utils::update_rate_limit( - $db, - api_key.id, - api_key.$limit, - $group, - $incr_by, - ) - .await - { - Ok(rate_limit) => rate_limit, - Err(err) => return Ok(Box::new(Error::Postgres(err))), - }; - - match rate_limit { - crate::types::RateLimit::Limited => { - crate::utils::RATE_LIMIT_STATUS - .with_label_values(&["limited"]) - .inc(); - return Ok(Box::new(Error::RateLimit)); - } - crate::types::RateLimit::Available(count) => { - crate::utils::RATE_LIMIT_STATUS - .with_label_values(&["allowed"]) - .inc(); - count - } - } - }}; -} - -#[macro_export] -macro_rules! early_return { - ($val:expr) => { - match $val { - Ok(val) => val, - Err(err) => return Ok(Box::new(Error::from(err))), - } - }; -} - -/// Increment the rate limit for a group. -/// -/// We need to specify the ID of the API key to increment, the key's limit for -/// the specified group, the name of the group we're incrementing, and the -/// amount to increment for this request. This should remain as 1 except for -/// joined requests. -#[tracing::instrument(skip(db))] -pub async fn update_rate_limit( - db: &sqlx::PgPool, - key_id: i32, - key_group_limit: i16, - group_name: &'static str, - incr_by: i16, -) -> Result { - let now = chrono::Utc::now(); - let timestamp = now.timestamp(); - let time_window = timestamp - (timestamp % 60); - - let count: i16 = sqlx::query_scalar!( - "INSERT INTO - rate_limit (api_key_id, time_window, group_name, count) - VALUES - ($1, $2, $3, $4) - ON CONFLICT ON CONSTRAINT unique_window - DO UPDATE set count = rate_limit.count + $4 - RETURNING rate_limit.count", - key_id, - time_window, - group_name, - incr_by - ) - .fetch_one(db) - .await?; - - if count > key_group_limit { - Ok(RateLimit::Limited) - } else { - Ok(RateLimit::Available(( - key_group_limit - count, - key_group_limit, - ))) - } -}