From 4c0fb5ac2e3463facb264782ab6ea3343ecd38cd Mon Sep 17 00:00:00 2001 From: Syfaro Date: Sat, 21 Aug 2021 18:28:54 -0400 Subject: [PATCH] Setting for ingesters to download files. --- Cargo.lock | 315 ++++++--------------- fuzzysearch-common/Cargo.toml | 1 + fuzzysearch-common/src/download.rs | 27 ++ fuzzysearch-common/src/lib.rs | 2 + fuzzysearch-ingest-e621/src/main.rs | 81 ++++-- fuzzysearch-ingest-furaffinity/Cargo.toml | 1 - fuzzysearch-ingest-furaffinity/Dockerfile | 3 +- fuzzysearch-ingest-furaffinity/src/main.rs | 15 +- fuzzysearch-ingest-weasyl/src/main.rs | 19 +- 9 files changed, 217 insertions(+), 247 deletions(-) create mode 100644 fuzzysearch-common/src/download.rs diff --git a/Cargo.lock b/Cargo.lock index 024674c..1906d99 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -139,7 +139,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77f5f9d66a8730d0fae62c26f3424f5751e5518086628a40b7ab6fca4a705034" dependencies = [ "futures-core", - "paste 1.0.5", + "paste", "pin-project-lite", ] @@ -199,7 +199,7 @@ dependencies = [ "log", "mime", "once_cell", - "paste 1.0.5", + "paste", "pin-project", "regex", "serde", @@ -274,9 +274,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.42" +version = "1.0.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "595d3cfa7a60d4555cb5067b99f07142a08ea778de5cf993f7b75c7d8fabc486" +checksum = "28ae2b3dec75a406790005a200b1bd89785afc02517a00ca99ecfe093ee9e6cf" [[package]] name = "arrayvec" @@ -365,9 +365,9 @@ dependencies = [ [[package]] name = "bitflags" -version = "1.2.1" +version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitvec" @@ -441,12 +441,6 @@ version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "40e38929add23cdf8a366df9b0e088953150724bcbe5fc330b0d8eb3b328eec8" -[[package]] -name = "build_const" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4ae4235e6dac0694637c763029ecea1a2ec9e4e06ec2729bd21ba4d9c863eb7" - [[package]] name = "bumpalo" version = "3.7.0" @@ -510,14 +504,6 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" -[[package]] -name = "cfscrape" -version = "0.1.0" -source = "git+https://github.com/Syfaro/cfscrape-rs#439cc06b190f01d420090a0cbcd19332fff8f05b" -dependencies = [ - "pyo3", -] - [[package]] name = "chrono" version = "0.4.19" @@ -599,13 +585,19 @@ dependencies = [ [[package]] name = "crc" -version = "1.8.1" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d663548de7f5cca343f1e0a48d14dcfb0e9eb4e079ec58883b7251539fa10aeb" +checksum = "10c2722795460108a7872e1cd933a85d6ec38abc4baecad51028f702da28889f" dependencies = [ - "build_const", + "crc-catalog", ] +[[package]] +name = "crc-catalog" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccaeedb56da03b09f598226e25e80088cb4cd25f316e6e4df7d695f0feeb1403" + [[package]] name = "crc32fast" version = "1.2.1" @@ -706,16 +698,6 @@ dependencies = [ "syn", ] -[[package]] -name = "ctor" -version = "0.1.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e98e2ad1a782e33928b96fc3948e7c355e5af34ba4de7670fe8bac2a3b2006d" -dependencies = [ - "quote", - "syn", -] - [[package]] name = "deflate" version = "0.8.6" @@ -945,9 +927,8 @@ checksum = "fed34cd105917e91daa4da6b3728c47b068749d6a62c59811f06ed2ac71d9da7" [[package]] name = "furaffinity-rs" version = "0.1.0" -source = "git+https://github.com/Syfaro/furaffinity-rs?branch=main#9825e488e7c6737686e5f95dbcb56146aea6382e" +source = "git+https://github.com/Syfaro/furaffinity-rs?branch=main#dffa06f1bf803e3a327e5b0d757f20228c68f63d" dependencies = [ - "cfscrape", "chrono", "image", "img_hash", @@ -957,7 +938,6 @@ dependencies = [ "scraper", "sha2", "thiserror", - "tokio", ] [[package]] @@ -1012,6 +992,17 @@ dependencies = [ "futures-util", ] +[[package]] +name = "futures-intrusive" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62007592ac46aa7c2b6416f7deb9a8a8f63a01e0f1d6e1787d5630170db2b63e" +dependencies = [ + "futures-core", + "lock_api", + "parking_lot", +] + [[package]] name = "futures-io" version = "0.3.16" @@ -1117,6 +1108,7 @@ dependencies = [ "faktory", "ffmpeg-next", "futures", + "hex", "hyper", "image", "img_hash", @@ -1281,17 +1273,6 @@ dependencies = [ "wasi 0.10.2+wasi-snapshot-preview1", ] -[[package]] -name = "ghost" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a5bcf1bbeab73aa4cf2fde60a846858dc036163c7c33bec309f8d17de785479" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "gif" version = "0.11.2" @@ -1316,9 +1297,9 @@ checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" [[package]] name = "h2" -version = "0.3.3" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "825343c4eef0b63f541f8903f395dc5beb362a979b5799a84062527ef1e37726" +checksum = "d7f3675cfef6a30c8031cf9e6493ebdc3bb3272a3fea3923c4210d1830e6a472" dependencies = [ "bytes", "fnv", @@ -1465,9 +1446,9 @@ dependencies = [ [[package]] name = "httparse" -version = "1.4.1" +version = "1.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3a87b616e37e93c22fb19bcd386f02f3af5ea98a25670ad0fce773de23c5e68" +checksum = "acd94fdbe1d4ff688b67b04eee2e17bd50995534a61539e45adfefb45e5e5503" [[package]] name = "httpdate" @@ -1565,29 +1546,6 @@ dependencies = [ "hashbrown", ] -[[package]] -name = "indoc" -version = "0.3.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "47741a8bc60fb26eb8d6e0238bbb26d8575ff623fdc97b1a2c00c050b9684ed8" -dependencies = [ - "indoc-impl", - "proc-macro-hack", -] - -[[package]] -name = "indoc-impl" -version = "0.3.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce046d161f000fffde5f432a0d034d0341dc152643b2598ed5bfce44c4f3a8f0" -dependencies = [ - "proc-macro-hack", - "proc-macro2", - "quote", - "syn", - "unindent", -] - [[package]] name = "input_buffer" version = "0.4.0" @@ -1612,28 +1570,6 @@ version = "1.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48dc51180a9b377fd75814d0cc02199c20f8e99433d6762f650d39cdbbd3b56f" -[[package]] -name = "inventory" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f0f7efb804ec95e33db9ad49e4252f049e37e8b0a4652e3cd61f7999f2eff7f" -dependencies = [ - "ctor", - "ghost", - "inventory-impl", -] - -[[package]] -name = "inventory-impl" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75c094e94816723ab936484666968f5b58060492e880f3c8d00489a1e244fa51" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "ipnet" version = "2.3.1" @@ -1648,9 +1584,9 @@ checksum = "dd25036021b0de88a0aff6b850051563c6516d0bf53f8638938edbb9de732736" [[package]] name = "jobserver" -version = "0.1.23" +version = "0.1.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f5ca711fd837261e14ec9e674f092cbb931d3fa1482b017ae59328ddc6f3212b" +checksum = "af25a77299a7f711a01975c35a6a424eb6862092cc2d6c72c4ed6cbc56dfc1fa" dependencies = [ "libc", ] @@ -1666,9 +1602,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.52" +version = "0.3.53" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce791b7ca6638aae45be056e068fc756d871eb3b3b10b8efa62d1c9cec616752" +checksum = "e4bf49d50e2961077d9c99f4b7997d770a1114f087c3c2e0069b36c13fc2979d" dependencies = [ "wasm-bindgen", ] @@ -1706,9 +1642,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.99" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7f823d141fe0a24df1e23b4af4e3c7ba9e5966ec514ea068c93024aa7deb765" +checksum = "a1fa8cddc8fbbee11227ef194b5317ed014b8acbf15139bd716a18ad3fe99ec5" [[package]] name = "libloading" @@ -1799,9 +1735,9 @@ dependencies = [ [[package]] name = "matches" -version = "0.1.8" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ffc5c5338469d4d3ea17d269fa8ea3512ad247247c30bd2df69e68309ed0a08" +checksum = "a3e378b66a060d48947b590737b30a1be76706c8dd7b8ba0f2fe3989c68a853f" [[package]] name = "md-5" @@ -1816,9 +1752,9 @@ dependencies = [ [[package]] name = "memchr" -version = "2.4.0" +version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b16bd47d9e329435e309c58469fe0791c2d0d1ba96ec0954152a5ae2b04387dc" +checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" [[package]] name = "memoffset" @@ -2029,9 +1965,9 @@ dependencies = [ [[package]] name = "object" -version = "0.26.0" +version = "0.26.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c55827317fb4c08822499848a14237d2874d6f139828893017237e7ab93eb386" +checksum = "ee2766204889d09937d00bfbb7fec56bb2a199e2ade963cab19185d8a6104c7c" dependencies = [ "memchr", ] @@ -2050,9 +1986,9 @@ checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5" [[package]] name = "openssl" -version = "0.10.35" +version = "0.10.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "549430950c79ae24e6d02e0b7404534ecf311d94cc9f861e9e4020187d13d885" +checksum = "8d9facdb76fec0b73c406f125d44d86fdad818d66fef0531eec9233ca425ff4a" dependencies = [ "bitflags", "cfg-if 1.0.0", @@ -2070,9 +2006,9 @@ checksum = "28988d872ab76095a6e6ac88d99b54fd267702734fd7ffe610ca27f533ddb95a" [[package]] name = "openssl-sys" -version = "0.9.65" +version = "0.9.66" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a7907e3bfa08bb85105209cdfcb6c63d109f8f6c1ed6ca318fff5c1853fbc1d" +checksum = "1996d2d305e561b70d1ee0c53f1542833f4e1ac6ce9a6708b6ff2738ca67dc82" dependencies = [ "autocfg", "cc", @@ -2161,31 +2097,12 @@ dependencies = [ "winapi", ] -[[package]] -name = "paste" -version = "0.1.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45ca20c77d80be666aef2b45486da86238fabe33e38306bd3118fe4af33fa880" -dependencies = [ - "paste-impl", - "proc-macro-hack", -] - [[package]] name = "paste" version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "acbf547ad0c65e31259204bd90935776d1c693cec2f4ff7abb7a1bbbd40dfe58" -[[package]] -name = "paste-impl" -version = "0.1.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d95a7db200b97ef370c8e6de0088252f7e0dfff7d047a28528e47456c0fc98b6" -dependencies = [ - "proc-macro-hack", -] - [[package]] name = "peeking_take_while" version = "0.1.2" @@ -2421,47 +2338,9 @@ dependencies = [ [[package]] name = "protobuf" -version = "2.25.0" +version = "2.25.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "020f86b07722c5c4291f7c723eac4676b3892d47d9a7708dc2779696407f039b" - -[[package]] -name = "pyo3" -version = "0.12.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf6bbbe8f70d179260b3728e5d04eb012f4f0c7988e58c11433dd689cecaa72e" -dependencies = [ - "ctor", - "indoc", - "inventory", - "libc", - "parking_lot", - "paste 0.1.18", - "pyo3cls", - "unindent", -] - -[[package]] -name = "pyo3-derive-backend" -version = "0.12.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10ecd0eb6ed7b3d9965b4f4370b5b9e99e3e5e8742000e1c452c018f8c2a322f" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "pyo3cls" -version = "0.12.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d344fdaa6a834a06dd1720ff104ea12fe101dad2e8db89345af9db74c0bb11a0" -dependencies = [ - "pyo3-derive-backend", - "quote", - "syn", -] +checksum = "23129d50f2c9355ced935fce8a08bd706ee2e7ce2b3b33bf61dace0e379ac63a" [[package]] name = "quick-error" @@ -2491,9 +2370,9 @@ dependencies = [ [[package]] name = "r2d2_postgres" -version = "0.18.0" +version = "0.18.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7665d196d831b5c4f9ac49b3e2518e99555fe1941ccd103480817108c7c2f6e" +checksum = "7029c56be658cb54f321e0bee597810ee16796b735fa2559d7056bf06b12230b" dependencies = [ "postgres", "r2d2", @@ -2906,18 +2785,18 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.127" +version = "1.0.128" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f03b9878abf6d14e6779d3f24f07b2cfa90352cfec4acc5aab8f1ac7f146fae8" +checksum = "1056a0db1978e9dbf0f6e4fca677f6f9143dc1c19de346f22cac23e422196834" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.127" +version = "1.0.128" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a024926d3432516606328597e0f224a51355a493b49fdd67e9209187cbe55ecc" +checksum = "13af2fbb8b60a8950d6c72a56d2095c28870367cc8e10c55e9745bac4995a2c4" dependencies = [ "proc-macro2", "quote", @@ -3057,9 +2936,9 @@ dependencies = [ [[package]] name = "sqlx" -version = "0.5.5" +version = "0.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba82f79b31f30acebf19905bcd8b978f46891b9d0723f578447361a8910b6584" +checksum = "0e4b94ab0f8c21ee4899b93b06451ef5d965f1a355982ee73684338228498440" dependencies = [ "sqlx-core", "sqlx-macros", @@ -3067,9 +2946,9 @@ dependencies = [ [[package]] name = "sqlx-core" -version = "0.5.5" +version = "0.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f23af36748ec8ea8d49ef8499839907be41b0b1178a4e82b8cb45d29f531dc9" +checksum = "ec28b91a01e1fe286d6ba66f68289a2286df023fc97444e1fd86c2fd6d5dc026" dependencies = [ "ahash", "atoi", @@ -3085,6 +2964,7 @@ dependencies = [ "either", "futures-channel", "futures-core", + "futures-intrusive", "futures-util", "hashlink", "hex", @@ -3114,9 +2994,9 @@ dependencies = [ [[package]] name = "sqlx-macros" -version = "0.5.5" +version = "0.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "47e4a2349d1ffd60a03ca0de3f116ba55d7f406e55a0d84c64a5590866d94c06" +checksum = "4dc33c35d54774eed73d54568d47a6ac099aed8af5e1556a017c131be88217d5" dependencies = [ "dotenv", "either", @@ -3137,9 +3017,9 @@ dependencies = [ [[package]] name = "sqlx-rt" -version = "0.5.5" +version = "0.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8199b421ecf3493ee9ef3e7bc90c904844cfb2ea7ea2f57347a93f52bfd3e057" +checksum = "14302b678d9c76b28f2e60115211e25e0aabc938269991745a169753dc00e35c" dependencies = [ "native-tls", "once_cell", @@ -3266,9 +3146,9 @@ checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601" [[package]] name = "syn" -version = "1.0.74" +version = "1.0.75" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1873d832550d4588c3dbc20f01361ab00bfe741048f71e3fecf145a7cc18b29c" +checksum = "b7f58f7e8eaa0009c5fec437aabf511bd9933e4b2d7407bd05273c01a8906ea7" dependencies = [ "proc-macro2", "quote", @@ -3451,9 +3331,9 @@ checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c" [[package]] name = "tokio" -version = "1.9.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b7b349f11a7047e6d1276853e612d152f5e8a352c61917887cc2169e2366b4c" +checksum = "01cf844b23c6131f624accf65ce0e4e9956a8bb329400ea5bcc26ae3a5c20b0b" dependencies = [ "autocfg", "bytes", @@ -3572,9 +3452,9 @@ dependencies = [ [[package]] name = "tracing-actix-web" -version = "0.4.0-beta.9" +version = "0.4.0-beta.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c794ee03ca18c0d149e6928db480700644ab405f553f55dc0650f541e73dc180" +checksum = "29fe304c04a237147370dd8237cdf8e8b1056e5f732cec354f6512a756b0f28a" dependencies = [ "actix-web", "futures", @@ -3598,9 +3478,9 @@ dependencies = [ [[package]] name = "tracing-core" -version = "0.1.18" +version = "0.1.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9ff14f98b1a4b289c6248a023c1c2fa1491062964e9fed67ab29c4e4da4a052" +checksum = "2ca517f43f0fb96e0c3072ed5c275fe5eece87e8cb52f4a77b69226d3b1c9df8" dependencies = [ "lazy_static", ] @@ -3651,9 +3531,9 @@ dependencies = [ [[package]] name = "tracing-subscriber" -version = "0.2.19" +version = "0.2.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab69019741fca4d98be3c62d2b75254528b5432233fd8a4d2739fec20278de48" +checksum = "b9cbe87a2fa7e35900ce5de20220a582a9483a7063811defce79d7cbd59d4cfe" dependencies = [ "ansi_term", "chrono", @@ -3769,12 +3649,9 @@ dependencies = [ [[package]] name = "unicode-bidi" -version = "0.3.5" +version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eeb8be209bb1c96b7c177c7420d26e04eccacb0eeae6b980e35fcb74678107e0" -dependencies = [ - "matches", -] +checksum = "246f4c42e67e7a4e3c6106ff716a5d067d4132a642840b242e357e468a2a0085" [[package]] name = "unicode-normalization" @@ -3809,12 +3686,6 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" -[[package]] -name = "unindent" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f14ee04d9415b52b3aeab06258a3f07093182b88ba0f9b8d203f211a7a7d41c7" - [[package]] name = "url" version = "2.2.2" @@ -3907,9 +3778,9 @@ checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" [[package]] name = "wasm-bindgen" -version = "0.2.75" +version = "0.2.76" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b608ecc8f4198fe8680e2ed18eccab5f0cd4caaf3d83516fa5fb2e927fda2586" +checksum = "8ce9b1b516211d33767048e5d47fa2a381ed8b76fc48d2ce4aa39877f9f183e0" dependencies = [ "cfg-if 1.0.0", "serde", @@ -3919,9 +3790,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.75" +version = "0.2.76" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "580aa3a91a63d23aac5b6b267e2d13cb4f363e31dce6c352fca4752ae12e479f" +checksum = "cfe8dc78e2326ba5f845f4b5bf548401604fa20b1dd1d365fb73b6c1d6364041" dependencies = [ "bumpalo", "lazy_static", @@ -3934,9 +3805,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.25" +version = "0.4.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16646b21c3add8e13fdb8f20172f8a28c3dbf62f45406bcff0233188226cfe0c" +checksum = "95fded345a6559c2cfee778d562300c581f7d4ff3edb9b0d230d69800d213972" dependencies = [ "cfg-if 1.0.0", "js-sys", @@ -3946,9 +3817,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.75" +version = "0.2.76" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "171ebf0ed9e1458810dfcb31f2e766ad6b3a89dbda42d8901f2b268277e5f09c" +checksum = "44468aa53335841d9d6b6c023eaab07c0cd4bddbcfdee3e2bb1e8d2cb8069fef" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -3956,9 +3827,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.75" +version = "0.2.76" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c2657dd393f03aa2a659c25c6ae18a13a4048cebd220e147933ea837efc589f" +checksum = "0195807922713af1e67dc66132c7328206ed9766af3858164fb583eedc25fbad" dependencies = [ "proc-macro2", "quote", @@ -3969,15 +3840,15 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.75" +version = "0.2.76" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e0c4a743a309662d45f4ede961d7afa4ba4131a59a639f29b0069c3798bbcc2" +checksum = "acdb075a845574a1fa5f09fd77e43f7747599301ea3417a9fbffdeedfc1f4a29" [[package]] name = "web-sys" -version = "0.3.52" +version = "0.3.53" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01c70a82d842c9979078c772d4a1344685045f1a5628f677c2b2eab4dd7d2696" +checksum = "224b2f6b67919060055ef1a67807367c2066ed520c3862cc013d26cf893a783c" dependencies = [ "js-sys", "wasm-bindgen", @@ -3991,9 +3862,9 @@ checksum = "d8b77fdfd5a253be4ab714e4ffa3c49caf146b4de743e97510c0656cf90f1e8e" [[package]] name = "whoami" -version = "1.1.2" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4abacf325c958dfeaf1046931d37f2a901b6dfe0968ee965a29e94c6766b2af6" +checksum = "f7741161a40200a867c96dfa5574544efa4178cf4c8f770b62dd1cc0362d7ae1" dependencies = [ "wasm-bindgen", "web-sys", diff --git a/fuzzysearch-common/Cargo.toml b/fuzzysearch-common/Cargo.toml index 512a225..bdd57c3 100644 --- a/fuzzysearch-common/Cargo.toml +++ b/fuzzysearch-common/Cargo.toml @@ -26,6 +26,7 @@ serde_json = { version = "1", optional = true } base64 = "0.13" image = "0.23" img_hash = "3" +hex = "0.4" ffmpeg-next = { version = "4", optional = true } tempfile = { version = "3", optional = true } diff --git a/fuzzysearch-common/src/download.rs b/fuzzysearch-common/src/download.rs new file mode 100644 index 0000000..80b0214 --- /dev/null +++ b/fuzzysearch-common/src/download.rs @@ -0,0 +1,27 @@ +use tokio::io::AsyncWriteExt; + +pub async fn write_bytes(folder: &str, hash: &[u8], bytes: &[u8]) -> std::io::Result<()> { + let hex_hash = hex::encode(&hash); + tracing::debug!("writing {} to {}", hex_hash, folder); + + let hash_folder = std::path::PathBuf::from(folder) + .join(&hex_hash[0..2]) + .join(&hex_hash[2..4]); + + match tokio::fs::create_dir_all(&hash_folder).await { + Ok(_) => (), + Err(err) if err.kind() == std::io::ErrorKind::AlreadyExists => (), + Err(err) => return Err(err), + } + + let file_path = hash_folder.join(hex_hash); + let mut file = match tokio::fs::File::create(file_path).await { + Ok(file) => file, + Err(err) if err.kind() == std::io::ErrorKind::AlreadyExists => return Ok(()), + Err(err) => return Err(err), + }; + + file.write_all(bytes).await?; + + Ok(()) +} diff --git a/fuzzysearch-common/src/lib.rs b/fuzzysearch-common/src/lib.rs index 0a7d606..5afd0b3 100644 --- a/fuzzysearch-common/src/lib.rs +++ b/fuzzysearch-common/src/lib.rs @@ -7,6 +7,8 @@ pub mod video; #[cfg(feature = "trace")] pub mod trace; +pub mod download; + /// Create an instance of img_hash with project defaults. pub fn get_hasher() -> img_hash::Hasher<[u8; 8]> { use img_hash::{HashAlg::Gradient, HasherConfig}; diff --git a/fuzzysearch-ingest-e621/src/main.rs b/fuzzysearch-ingest-e621/src/main.rs index ef48200..31dafcc 100644 --- a/fuzzysearch-ingest-e621/src/main.rs +++ b/fuzzysearch-ingest-e621/src/main.rs @@ -37,6 +37,8 @@ async fn main() -> anyhow::Result<()> { let api_key = std::env::var("E621_API_KEY").expect_or_log("Missing E621_API_KEY"); let auth = (login, Some(api_key)); + let download_folder = std::env::var("DOWNLOAD_FOLDER").ok(); + let client = reqwest::ClientBuilder::default() .user_agent(USER_AGENT) .build()?; @@ -106,7 +108,7 @@ async fn main() -> anyhow::Result<()> { for post in posts { let _hist = SUBMISSION_DURATION.start_timer(); - insert_submission(&mut tx, &faktory, &client, post).await?; + insert_submission(&mut tx, &faktory, &client, post, &download_folder).await?; drop(_hist); SUBMISSION_BACKLOG.sub(1); @@ -216,14 +218,20 @@ async fn load_page( Ok(body) } -type ImageData = (Option, Option, Option>); +struct ImageData { + hash: Option, + hash_error: Option, + sha256: Option>, + bytes: Option>, +} -#[tracing::instrument(err, skip(conn, faktory, client, post), fields(id))] +#[tracing::instrument(err, skip(conn, faktory, client, post, download_folder), fields(id))] async fn insert_submission( conn: &mut sqlx::Transaction<'_, sqlx::Postgres>, faktory: &FaktoryClient, client: &reqwest::Client, post: &serde_json::Value, + download_folder: &Option, ) -> anyhow::Result<()> { let id = post .get("id") @@ -236,15 +244,36 @@ async fn insert_submission( tracing::trace!(?post, "Evaluating post"); - let (hash, hash_error, sha256): ImageData = if let Some((url, ext)) = get_post_url_ext(post) { - let (hash, hash_error, sha256) = - if url != "/images/deleted-preview.png" && (ext == "jpg" || ext == "png") { - load_image(client, url).await? - } else { - tracing::debug!("Ignoring post as it is deleted or not a supported image format"); + let ImageData { + hash, + hash_error, + sha256, + .. + } = if let Some((url, ext)) = get_post_url_ext(post) { + let ImageData { + hash, + hash_error, + sha256, + bytes, + } = if url != "/images/deleted-preview.png" && (ext == "jpg" || ext == "png") { + load_image(client, url).await? + } else { + tracing::debug!("Ignoring post as it is deleted or not a supported image format"); - (None, None, None) - }; + ImageData { + hash: None, + hash_error: None, + sha256: None, + bytes: None, + } + }; + + if let (Some(folder), Some(sha256), Some(bytes)) = (download_folder, &sha256, &bytes) { + if let Err(err) = fuzzysearch_common::download::write_bytes(folder, sha256, bytes).await + { + tracing::error!("Could not download file: {:?}", err); + } + } let artist = post .as_object() @@ -271,11 +300,21 @@ async fn insert_submission( }) .await?; - (hash, hash_error, sha256) + ImageData { + hash, + hash_error, + sha256, + bytes, + } } else { tracing::warn!("Post had missing URL or extension"); - (None, None, None) + ImageData { + hash: None, + hash_error: None, + sha256: None, + bytes: None, + } }; sqlx::query!( @@ -315,7 +354,7 @@ async fn load_image(client: &reqwest::Client, url: &str) -> anyhow::Result anyhow::Result img, Err(err) => { tracing::error!(?err, "Unable to open image"); - return Ok((None, Some(err.to_string()), Some(result))); + return Ok(ImageData { + hash: None, + hash_error: Some(err.to_string()), + sha256: Some(result), + bytes: Some(bytes), + }); } }; @@ -342,5 +386,10 @@ async fn load_image(client: &reqwest::Client, url: &str) -> anyhow::Result for RetryHandler { } } -#[tracing::instrument(skip(client, fa, faktory))] +#[tracing::instrument(skip(client, fa, faktory, download_folder))] async fn process_submission( client: &Client, fa: &furaffinity_rs::FurAffinity, faktory: &FaktoryClient, id: i32, + download_folder: &Option, ) { if has_submission(client, id).await { return; @@ -204,6 +205,14 @@ async fn process_submission( } }; + if let (Some(folder), Some(sha256), Some(bytes)) = + (download_folder, &sub.file_sha256, &sub.file) + { + if let Err(err) = fuzzysearch_common::download::write_bytes(folder, sha256, bytes).await { + tracing::error!("Could not download image: {:?}", err); + } + } + _timer.stop_and_record(); if let Err(err) = faktory @@ -233,6 +242,8 @@ async fn main() { std::env::var("FA_B").expect_or_log("Missing FA_B"), ); + let download_folder = std::env::var("DOWNLOAD_FOLDER").ok(); + let user_agent = std::env::var("USER_AGENT").expect_or_log("Missing USER_AGENT"); let client = reqwest::Client::builder() .timeout(std::time::Duration::from_secs(10)) @@ -281,7 +292,7 @@ async fn main() { .set(online.other as i64); for id in ids_to_check(&client, latest_id.0).await { - process_submission(&client, &fa, &faktory, id).await; + process_submission(&client, &fa, &faktory, id, &download_folder).await; } tracing::info!("Completed fetch, waiting a minute before loading more"); diff --git a/fuzzysearch-ingest-weasyl/src/main.rs b/fuzzysearch-ingest-weasyl/src/main.rs index 05b52f6..0075459 100644 --- a/fuzzysearch-ingest-weasyl/src/main.rs +++ b/fuzzysearch-ingest-weasyl/src/main.rs @@ -127,6 +127,7 @@ async fn process_submission( faktory: &FaktoryClient, body: serde_json::Value, sub: WeasylSubmission, + download_folder: &Option, ) -> anyhow::Result<()> { tracing::debug!("Processing submission"); @@ -135,7 +136,8 @@ async fn process_submission( .send() .await? .bytes() - .await?; + .await? + .to_vec(); let num = if let Ok(image) = image::load_from_memory(&data) { let hasher = fuzzysearch_common::get_hasher(); @@ -154,6 +156,12 @@ async fn process_submission( hasher.update(&data); let result: [u8; 32] = hasher.finalize().into(); + if let Some(folder) = download_folder { + if let Err(err) = fuzzysearch_common::download::write_bytes(folder, &result, &data).await { + tracing::error!("Could not download image: {:?}", err); + } + } + sqlx::query!( "INSERT INTO weasyl (id, hash, sha256, file_size, data) VALUES ($1, $2, $3, $4, $5)", sub.id, @@ -202,6 +210,7 @@ async fn main() { fuzzysearch_common::trace::serve_metrics().await; let api_key = std::env::var("WEASYL_APIKEY").unwrap_or_log(); + let download_folder = std::env::var("DOWNLOAD_FOLDER").ok(); let pool = sqlx::postgres::PgPoolOptions::new() .max_connections(2) @@ -238,9 +247,11 @@ async fn main() { } match load_submission(&client, &api_key, id).await.unwrap_or_log() { - (Some(sub), json) => process_submission(&pool, &client, &faktory, json, sub) - .await - .unwrap_or_log(), + (Some(sub), json) => { + process_submission(&pool, &client, &faktory, json, sub, &download_folder) + .await + .unwrap_or_log() + } (None, body) => insert_null(&pool, body, id).await.unwrap_or_log(), } }