From d557d9bbaef52b8b86904ceade7ae04b7666b210 Mon Sep 17 00:00:00 2001 From: Syfaro Date: Tue, 16 Feb 2021 23:43:33 -0500 Subject: [PATCH] Keep database updated. --- .dockerignore | 1 + .drone.yml | 25 ++ .gitignore | 1 + Cargo.lock | 762 +++++++++++++++++++++++++++++++++-------- Cargo.toml | 21 +- Dockerfile | 14 + sqlx-data.json | 37 ++ src/bin/import.rs | 130 ------- src/bin/load_hashes.rs | 161 --------- src/bin/update.rs | 171 --------- src/main.rs | 354 +++++++++++++++++++ 11 files changed, 1061 insertions(+), 616 deletions(-) create mode 100644 .dockerignore create mode 100644 .drone.yml create mode 100644 Dockerfile create mode 100644 sqlx-data.json delete mode 100644 src/bin/import.rs delete mode 100644 src/bin/load_hashes.rs delete mode 100644 src/bin/update.rs create mode 100644 src/main.rs diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..2f7896d --- /dev/null +++ b/.dockerignore @@ -0,0 +1 @@ +target/ diff --git a/.drone.yml b/.drone.yml new file mode 100644 index 0000000..644af61 --- /dev/null +++ b/.drone.yml @@ -0,0 +1,25 @@ +--- +kind: pipeline +type: docker +name: default + +platform: + os: linux + arch: amd64 + +steps: +- name: build-latest + image: plugins/docker + settings: + auto_tag: true + password: + from_secret: docker_password + registry: registry.huefox.com + repo: registry.huefox.com/e621-watcher + username: + from_secret: docker_username + when: + branch: + - master + +... diff --git a/.gitignore b/.gitignore index ea8c4bf..fedaa2b 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ /target +.env diff --git a/Cargo.lock b/Cargo.lock index ebe9e34..83f442e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -15,14 +15,59 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234" [[package]] -name = "async-trait" -version = "0.1.42" +name = "ahash" +version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d3a45e77e34375a7923b1e8febb049bb011f064714a8e17a1a616fef01da13d" +checksum = "739f4a8db6605981345c5654f3a85b056ce52f37a39d34da03f25bf2151ea16e" + +[[package]] +name = "ahash" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "796540673305a66d127804eef19ad696f1f204b8c1025aaca4958c17eab32877" dependencies = [ - "proc-macro2", - "quote", - "syn", + "getrandom 0.2.2", + "once_cell", + "version_check", +] + +[[package]] +name = "aho-corasick" +version = "0.7.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7404febffaa47dac81aa44dba71523c9d069b1bdc50a77db41195149e17f68e5" +dependencies = [ + "memchr", +] + +[[package]] +name = "ansi_term" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2" +dependencies = [ + "winapi", +] + +[[package]] +name = "anyhow" +version = "1.0.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "afddf7f520a80dbf76e6f50a35bca42a2331ef227a28b3b6dc5c2e2338d114b1" + +[[package]] +name = "arrayvec" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b" + +[[package]] +name = "atoi" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "616896e05fc0e2649463a93a15183c6a16bf03413a7af88ef1285ddedfa9cda5" +dependencies = [ + "num-traits", ] [[package]] @@ -43,37 +88,24 @@ version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd" -[[package]] -name = "bb8" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dae93eccab998c4b8703e3a6bbaa1714c38e445ebacb4bede25d0408521e293c" -dependencies = [ - "async-trait", - "futures-channel", - "futures-util", - "parking_lot", - "tokio", -] - -[[package]] -name = "bb8-postgres" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61fdf56d52b2cca401d2380407e5c35d3d25d3560224ecf74d6e4ca13e51239b" -dependencies = [ - "async-trait", - "bb8", - "tokio", - "tokio-postgres", -] - [[package]] name = "bitflags" version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" +[[package]] +name = "bitvec" +version = "0.19.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7ba35e9565969edb811639dbebfe34edc0368e472c5018474c8eb2543397f81" +dependencies = [ + "funty", + "radium", + "tap", + "wyz", +] + [[package]] name = "block-buffer" version = "0.9.0" @@ -83,6 +115,12 @@ dependencies = [ "generic-array", ] +[[package]] +name = "build_const" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39092a32794787acd8525ee150305ff051b0aa6cc2abaf193924f5ab05425f39" + [[package]] name = "bumpalo" version = "3.6.0" @@ -119,6 +157,19 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "chrono" +version = "0.4.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73" +dependencies = [ + "libc", + "num-integer", + "num-traits", + "time", + "winapi", +] + [[package]] name = "color_quant" version = "1.1.0" @@ -153,6 +204,15 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8aebca1129a03dc6dc2b127edd729435bbc4a37e1d5f4d7513165089ceb02634" +[[package]] +name = "crc" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d663548de7f5cca343f1e0a48d14dcfb0e9eb4e079ec58883b7251539fa10aeb" +dependencies = [ + "build_const", +] + [[package]] name = "crc32fast" version = "1.2.1" @@ -197,6 +257,16 @@ dependencies = [ "scopeguard", ] +[[package]] +name = "crossbeam-queue" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f6cb3c7f5b8e51bc3ebb73a2327ad4abdbd119dc13223f14f961d2f38486756" +dependencies = [ + "cfg-if", + "crossbeam-utils", +] + [[package]] name = "crossbeam-utils" version = "0.8.1" @@ -237,21 +307,31 @@ dependencies = [ "generic-array", ] +[[package]] +name = "dotenv" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77c90badedccf4105eca100756a0b1289e191f6fcbdadd3cee1d2f614f97da8f" + [[package]] name = "e621-watcher" version = "0.1.0" dependencies = [ - "bb8", - "bb8-postgres", - "futures", + "anyhow", "hamming", + "hyper", "image", "img_hash", + "lazy_static", + "prometheus", "reqwest", "serde", "serde_json", + "sha2", + "sqlx", "tokio", - "tokio-postgres", + "tracing", + "tracing-subscriber", ] [[package]] @@ -259,6 +339,9 @@ name = "either" version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" +dependencies = [ + "serde", +] [[package]] name = "encoding_rs" @@ -270,10 +353,16 @@ dependencies = [ ] [[package]] -name = "fallible-iterator" -version = "0.2.0" +name = "flate2" +version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" +checksum = "cd3aec53de10fe96d7d8c565eb17f2c687bb5518a2ec453b5b1252964526abe0" +dependencies = [ + "cfg-if", + "crc32fast", + "libc", + "miniz_oxide 0.4.3", +] [[package]] name = "fnv" @@ -306,6 +395,12 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "funty" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fed34cd105917e91daa4da6b3728c47b068749d6a62c59811f06ed2ac71d9da7" + [[package]] name = "futures" version = "0.3.12" @@ -346,7 +441,6 @@ dependencies = [ "futures-core", "futures-task", "futures-util", - "num_cpus", ] [[package]] @@ -412,6 +506,17 @@ dependencies = [ "version_check", ] +[[package]] +name = "getrandom" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce" +dependencies = [ + "cfg-if", + "libc", + "wasi 0.9.0+wasi-snapshot-preview1", +] + [[package]] name = "getrandom" version = "0.2.2" @@ -420,7 +525,7 @@ checksum = "c9495705279e7140bf035dde1f6e750c162df8b625267cd52cc44e0b156732c8" dependencies = [ "cfg-if", "libc", - "wasi", + "wasi 0.10.2+wasi-snapshot-preview1", ] [[package]] @@ -464,6 +569,27 @@ name = "hashbrown" version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d7afe4a420e3fe79967a00898cc1f4db7c8a49a9333a29f8a4bd76a253d5cd04" +dependencies = [ + "ahash 0.4.7", +] + +[[package]] +name = "hashlink" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d99cf782f0dc4372d26846bec3de7804ceb5df083c2d4462c0b8d2330e894fa8" +dependencies = [ + "hashbrown", +] + +[[package]] +name = "heck" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87cbf45460356b7deeb5e3415b5563308c0a9b057c85e12b06ad551f98d0a6ac" +dependencies = [ + "unicode-segmentation", +] [[package]] name = "hermit-abi" @@ -474,6 +600,12 @@ dependencies = [ "libc", ] +[[package]] +name = "hex" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "644f9158b2f133fd50f5fb3242878846d9eb792e445c893805ff0e3824006e35" + [[package]] name = "hmac" version = "0.10.1" @@ -533,7 +665,7 @@ dependencies = [ "httparse", "httpdate", "itoa", - "pin-project 1.0.5", + "pin-project", "socket2", "tokio", "tower-service", @@ -652,6 +784,19 @@ version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +[[package]] +name = "lexical-core" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21f866863575d0e1d654fbeeabdc927292fdf862873dc3c96c6f753357e13374" +dependencies = [ + "arrayvec", + "bitflags", + "cfg-if", + "ryu", + "static_assertions", +] + [[package]] name = "libc" version = "0.2.86" @@ -676,6 +821,21 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "maplit" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d" + +[[package]] +name = "matchers" +version = "0.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f099785f7595cc4b4553a174ce30dd7589ef93391ff414dbb67f62392b9e0ce1" +dependencies = [ + "regex-automata", +] + [[package]] name = "matches" version = "0.1.8" @@ -683,10 +843,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ffc5c5338469d4d3ea17d269fa8ea3512ad247247c30bd2df69e68309ed0a08" [[package]] -name = "md5" -version = "0.7.0" +name = "md-5" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771" +checksum = "7b5a279bb9607f9f53c22d496eade00d138d1bdcccd07d74650387cf94942a15" +dependencies = [ + "block-buffer", + "digest", + "opaque-debug", +] [[package]] name = "memchr" @@ -769,6 +934,19 @@ dependencies = [ "tempfile", ] +[[package]] +name = "nom" +version = "6.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7413f999671bd4745a7b624bd370a569fb6bc574b23c83a3c5ed2e453f3d5e2" +dependencies = [ + "bitvec", + "funty", + "lexical-core", + "memchr", + "version_check", +] + [[package]] name = "ntapi" version = "0.3.6" @@ -915,51 +1093,13 @@ version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e" -[[package]] -name = "phf" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3dfb61232e34fcb633f43d12c58f83c1df82962dcdfa565a4e866ffc17dafe12" -dependencies = [ - "phf_shared", -] - -[[package]] -name = "phf_shared" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c00cf8b9eafe68dde5e9eaa2cef8ee84a9336a47d566ec55ca16589633b65af7" -dependencies = [ - "siphasher", -] - -[[package]] -name = "pin-project" -version = "0.4.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ffbc8e94b38ea3d2d8ba92aea2983b503cd75d0888d75b86bb37970b5698e15" -dependencies = [ - "pin-project-internal 0.4.27", -] - [[package]] name = "pin-project" version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "96fa8ebb90271c4477f144354485b8068bd8f6b78b428b01ba892ca26caf0b63" dependencies = [ - "pin-project-internal 1.0.5", -] - -[[package]] -name = "pin-project-internal" -version = "0.4.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65ad2ae56b6abe3a1ee25f15ee605bacadb9a764edaba9c2bf4103800d4a1895" -dependencies = [ - "proc-macro2", - "quote", - "syn", + "pin-project-internal", ] [[package]] @@ -1003,37 +1143,6 @@ dependencies = [ "miniz_oxide 0.3.7", ] -[[package]] -name = "postgres-protocol" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70e34ad3dc5c56d036b9418185ee97e14b6766d55c8ccf9dc18302ad4e6371d9" -dependencies = [ - "base64 0.13.0", - "byteorder", - "bytes", - "fallible-iterator", - "hmac", - "md5", - "memchr", - "rand", - "sha2", - "stringprep", -] - -[[package]] -name = "postgres-types" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5493d9d4613b88b12433aa12890e74e74cd93fdc1e08b7c2aed4768aaae8414c" -dependencies = [ - "bytes", - "fallible-iterator", - "postgres-protocol", - "serde", - "serde_json", -] - [[package]] name = "ppv-lite86" version = "0.2.10" @@ -1061,6 +1170,43 @@ dependencies = [ "unicode-xid", ] +[[package]] +name = "procfs" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab8809e0c18450a2db0f236d2a44ec0b4c1412d0eb936233579f0990faa5d5cd" +dependencies = [ + "bitflags", + "byteorder", + "flate2", + "hex", + "lazy_static", + "libc", +] + +[[package]] +name = "prometheus" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8425533e7122f0c3cc7a37e6244b16ad3a2cc32ae7ac6276e2a75da0d9c200d" +dependencies = [ + "cfg-if", + "fnv", + "lazy_static", + "libc", + "parking_lot", + "procfs", + "protobuf", + "regex", + "thiserror", +] + +[[package]] +name = "protobuf" +version = "2.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73f72884896d22e0da0e5b266cb9a780b791f6c3b2f5beab6368d6cd4f0dbb86" + [[package]] name = "quote" version = "1.0.9" @@ -1070,6 +1216,25 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "radium" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "941ba9d78d8e2f7ce474c015eea4d9c6d25b6a3327f9832ee29a4de27f91bbb8" + +[[package]] +name = "rand" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" +dependencies = [ + "getrandom 0.1.16", + "libc", + "rand_chacha 0.2.2", + "rand_core 0.5.1", + "rand_hc 0.2.0", +] + [[package]] name = "rand" version = "0.8.3" @@ -1077,9 +1242,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ef9e7e66b4468674bfcb0c81af8b7fa0bb154fa9f28eb840da5c447baeb8d7e" dependencies = [ "libc", - "rand_chacha", - "rand_core", - "rand_hc", + "rand_chacha 0.3.0", + "rand_core 0.6.2", + "rand_hc 0.3.0", +] + +[[package]] +name = "rand_chacha" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402" +dependencies = [ + "ppv-lite86", + "rand_core 0.5.1", ] [[package]] @@ -1089,7 +1264,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e12735cf05c9e10bf21534da50a147b924d555dc7a547c42e6bb2d5b6017ae0d" dependencies = [ "ppv-lite86", - "rand_core", + "rand_core 0.6.2", +] + +[[package]] +name = "rand_core" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" +dependencies = [ + "getrandom 0.1.16", ] [[package]] @@ -1098,7 +1282,16 @@ version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34cf66eb183df1c5876e2dcf6b13d57340741e8dc255b48e40a26de954d06ae7" dependencies = [ - "getrandom", + "getrandom 0.2.2", +] + +[[package]] +name = "rand_hc" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" +dependencies = [ + "rand_core 0.5.1", ] [[package]] @@ -1107,7 +1300,7 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3190ef7066a446f2e7f42e239d161e905420ccab01eb967c9eb27d21b2322a73" dependencies = [ - "rand_core", + "rand_core 0.6.2", ] [[package]] @@ -1144,6 +1337,34 @@ dependencies = [ "bitflags", ] +[[package]] +name = "regex" +version = "1.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9251239e129e16308e70d853559389de218ac275b515068abc96829d05b948a" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", + "thread_local", +] + +[[package]] +name = "regex-automata" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae1ded71d66a4a97f5e961fd0cb25a5f366a42a41570d16a763a69c092c26ae4" +dependencies = [ + "byteorder", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.6.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5eb417147ba9860a96cfe72a0b93bf88fee1744b5636ec99ab20c1aa9376581" + [[package]] name = "remove_dir_all" version = "0.5.3" @@ -1287,6 +1508,7 @@ version = "1.0.62" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ea1c6153794552ea7cf7cf63b1231a25de00ec90db326ba6264440fa08e31486" dependencies = [ + "indexmap", "itoa", "ryu", "serde", @@ -1304,6 +1526,19 @@ dependencies = [ "serde", ] +[[package]] +name = "sha-1" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfebf75d25bd900fd1e7d11501efab59bc846dbc76196839663e6637bba9f25f" +dependencies = [ + "block-buffer", + "cfg-if", + "cpuid-bool", + "digest", + "opaque-debug", +] + [[package]] name = "sha2" version = "0.9.3" @@ -1317,6 +1552,15 @@ dependencies = [ "opaque-debug", ] +[[package]] +name = "sharded-slab" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79c719719ee05df97490f80a45acfc99e5a30ce98a1e4fb67aee422745ae14e3" +dependencies = [ + "lazy_static", +] + [[package]] name = "signal-hook-registry" version = "1.3.0" @@ -1326,12 +1570,6 @@ dependencies = [ "libc", ] -[[package]] -name = "siphasher" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa8f3741c7372e75519bd9346068370c9cdaabcc1f9599cbcf2a2719352286b7" - [[package]] name = "slab" version = "0.4.2" @@ -1355,6 +1593,116 @@ dependencies = [ "winapi", ] +[[package]] +name = "sqlformat" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74c70f0235b9925cbb106c52af1a28b5ea4885a8b851e328b8562e257a389c2d" +dependencies = [ + "lazy_static", + "maplit", + "nom", + "regex", + "unicode_categories", +] + +[[package]] +name = "sqlx" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2739d54a2ae9fdd0f545cb4e4b5574efb95e2ec71b7f921678e246fb20dcaaf" +dependencies = [ + "sqlx-core", + "sqlx-macros", +] + +[[package]] +name = "sqlx-core" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1cad9cae4ca8947eba1a90e8ec7d3c59e7a768e2f120dc9013b669c34a90711" +dependencies = [ + "ahash 0.6.3", + "atoi", + "base64 0.13.0", + "bitflags", + "byteorder", + "bytes", + "crc", + "crossbeam-channel", + "crossbeam-queue", + "crossbeam-utils", + "either", + "futures-channel", + "futures-core", + "futures-util", + "hashlink", + "hex", + "hmac", + "itoa", + "libc", + "log", + "md-5", + "memchr", + "once_cell", + "parking_lot", + "percent-encoding", + "rand 0.7.3", + "serde", + "serde_json", + "sha-1", + "sha2", + "smallvec", + "sqlformat", + "sqlx-rt", + "stringprep", + "thiserror", + "tokio-stream", + "url", + "whoami", +] + +[[package]] +name = "sqlx-macros" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01caee2b3935b4efe152f3262afbe51546ce3b1fc27ad61014e1b3cf5f55366e" +dependencies = [ + "dotenv", + "either", + "futures", + "heck", + "hex", + "once_cell", + "proc-macro2", + "quote", + "serde", + "serde_json", + "sha2", + "sqlx-core", + "sqlx-rt", + "syn", + "url", +] + +[[package]] +name = "sqlx-rt" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ce2e16b6774c671cc183e1d202386fdf9cde1e8468c1894a7f2a63eb671c4f4" +dependencies = [ + "native-tls", + "once_cell", + "tokio", + "tokio-native-tls", +] + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "strength_reduce" version = "0.2.3" @@ -1388,6 +1736,12 @@ dependencies = [ "unicode-xid", ] +[[package]] +name = "tap" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" + [[package]] name = "tempfile" version = "3.2.0" @@ -1396,12 +1750,41 @@ checksum = "dac1c663cfc93810f88aed9b8941d48cabf856a1b111c29a40439018d870eb22" dependencies = [ "cfg-if", "libc", - "rand", + "rand 0.8.3", "redox_syscall", "remove_dir_all", "winapi", ] +[[package]] +name = "thiserror" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76cc616c6abf8c8928e2fdcc0dbfab37175edd8fb49a4641066ad1364fdab146" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9be73a2caec27583d0046ef3796c3794f868a5bc813db689eed00c7631275cd1" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "thread_local" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8018d24e04c95ac8790716a5987d0fec4f8b27249ffa0f7d33f1369bdfb88cbd" +dependencies = [ + "once_cell", +] + [[package]] name = "tiff" version = "0.6.1" @@ -1413,6 +1796,16 @@ dependencies = [ "weezl", ] +[[package]] +name = "time" +version = "0.1.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca8a50ef2360fbd1eeb0ecd46795a87a19024eb4b53c5dc916ca1fd95fe62438" +dependencies = [ + "libc", + "winapi", +] + [[package]] name = "tinyvec" version = "1.1.1" @@ -1470,26 +1863,14 @@ dependencies = [ ] [[package]] -name = "tokio-postgres" -version = "0.7.0" +name = "tokio-stream" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cc9f82c2bfb06a33dd0dfb44b07ca98fe72df19e681d80c78d05a1bac2138e2" +checksum = "1981ad97df782ab506a1f43bf82c967326960d278acf3bf8279809648c3ff3ea" dependencies = [ - "async-trait", - "byteorder", - "bytes", - "fallible-iterator", - "futures", - "log", - "parking_lot", - "percent-encoding", - "phf", + "futures-core", "pin-project-lite", - "postgres-protocol", - "postgres-types", - "socket2", "tokio", - "tokio-util", ] [[package]] @@ -1520,9 +1901,21 @@ checksum = "f7d40a22fd029e33300d8d89a5cc8ffce18bb7c587662f54629e94c9de5487f3" dependencies = [ "cfg-if", "pin-project-lite", + "tracing-attributes", "tracing-core", ] +[[package]] +name = "tracing-attributes" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43f080ea7e4107844ef4766459426fa2d5c1ada2e47edba05dc7fa99d9629f47" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "tracing-core" version = "0.1.17" @@ -1534,14 +1927,57 @@ dependencies = [ [[package]] name = "tracing-futures" -version = "0.2.4" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab7bb6f14721aa00656086e9335d363c5c8747bae02ebe32ea2c7dece5689b4c" +checksum = "97d095ae15e245a057c8e8451bab9b3ee1e1f68e9ba2b4fbc18d0ac5237835f2" dependencies = [ - "pin-project 0.4.27", + "pin-project", "tracing", ] +[[package]] +name = "tracing-log" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e0f8c7178e13481ff6765bd169b33e8d554c5d2bbede5e32c356194be02b9b9" +dependencies = [ + "lazy_static", + "log", + "tracing-core", +] + +[[package]] +name = "tracing-serde" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb65ea441fbb84f9f6748fd496cf7f63ec9af5bca94dd86456978d055e8eb28b" +dependencies = [ + "serde", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1fa8f0c8f4c594e4fc9debc1990deab13238077271ba84dd853d54902ee3401" +dependencies = [ + "ansi_term", + "chrono", + "lazy_static", + "matchers", + "regex", + "serde", + "serde_json", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", + "tracing-serde", +] + [[package]] name = "transpose" version = "0.1.0" @@ -1588,12 +2024,24 @@ dependencies = [ "tinyvec", ] +[[package]] +name = "unicode-segmentation" +version = "1.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb0d2e7be6ae3a5fa87eed5fb451aff96f2573d2694942e40543ae0bbe19c796" + [[package]] name = "unicode-xid" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564" +[[package]] +name = "unicode_categories" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" + [[package]] name = "url" version = "2.2.0" @@ -1628,6 +2076,12 @@ dependencies = [ "try-lock", ] +[[package]] +name = "wasi" +version = "0.9.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" + [[package]] name = "wasi" version = "0.10.2+wasi-snapshot-preview1" @@ -1718,6 +2172,16 @@ version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a32b378380f4e9869b22f0b5177c68a5519f03b3454fde0b291455ddbae266c" +[[package]] +name = "whoami" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a921c0ad578a51c0b6c0bbb9b95f0ed11e90d61da506139e48a946edd11ee1e" +dependencies = [ + "wasm-bindgen", + "web-sys", +] + [[package]] name = "winapi" version = "0.3.9" @@ -1748,3 +2212,9 @@ checksum = "0120db82e8a1e0b9fb3345a539c478767c0048d842860994d96113d5b667bd69" dependencies = [ "winapi", ] + +[[package]] +name = "wyz" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85e60b0d1b5f99db2556934e21937020776a5d31520bf169e851ac44e6420214" diff --git a/Cargo.toml b/Cargo.toml index 8e1f73e..d780fa1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,20 +5,25 @@ authors = ["Syfaro "] edition = "2018" [dependencies] -reqwest = { version = "0.11", features = ["json"] } tokio = { version = "1", features = ["full"] } -futures = { version = "0.3", features = ["thread-pool"] } -hamming = "0.1.3" + +hyper = { version = "0.14", features = ["server"] } +reqwest = { version = "0.11", features = ["json"] } serde = "1" serde_json = "1" -tokio-postgres = { version = "0.7", features = ["with-serde_json-1"] } -bb8 = "0.7" -bb8-postgres = "0.7" +sqlx = { version = "0.5", features = ["runtime-tokio-native-tls", "postgres", "macros", "json", "offline"] } image = "0.23" +hamming = "0.1.3" img_hash = "3" +sha2 = "0.9" -[profile.dev.package."*"] -opt-level = 2 +tracing = "0.1" +tracing-subscriber = "0.2" + +anyhow = "1" + +lazy_static = "1" +prometheus = { version = "0.11", features = ["process"] } diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..36ee434 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,14 @@ +FROM rust:1-slim AS builder +WORKDIR /src +ENV SQLX_OFFLINE=true +RUN apt-get update -y && apt-get install -y libssl-dev pkg-config +COPY . . +RUN cargo install --root / --path . + +FROM debian:buster-slim +EXPOSE 8080 +ENV METRICS_HOST=0.0.0.0:8080 +WORKDIR /app +RUN apt-get update -y && apt-get install -y openssl ca-certificates && rm -rf /var/lib/apt/lists/* +COPY --from=builder /bin/e621-watcher /bin/e621-watcher +CMD ["/bin/e621-watcher"] diff --git a/sqlx-data.json b/sqlx-data.json new file mode 100644 index 0000000..65c5287 --- /dev/null +++ b/sqlx-data.json @@ -0,0 +1,37 @@ +{ + "db": "PostgreSQL", + "02b98e35cf7d650413c2730df732d7ae08119b11a5b2aaddcee08a7f06338924": { + "query": "SELECT max(id) max FROM e621", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "max", + "type_info": "Int4" + } + ], + "parameters": { + "Left": [] + }, + "nullable": [ + null + ] + } + }, + "a054594f7844f32e5968a54c0dab59716149a10411fcb16184a9070a82bb287d": { + "query": "INSERT INTO e621\n (id, data, hash, hash_error, sha256) VALUES\n ($1, $2, $3, $4, $5)\n ON CONFLICT (id) DO UPDATE SET\n data = EXCLUDED.data,\n hash = EXCLUDED.hash,\n hash_error = EXCLUDED.hash_error,\n sha256 = EXCLUDED.sha256", + "describe": { + "columns": [], + "parameters": { + "Left": [ + "Int4", + "Jsonb", + "Int8", + "Text", + "Bytea" + ] + }, + "nullable": [] + } + } +} \ No newline at end of file diff --git a/src/bin/import.rs b/src/bin/import.rs deleted file mode 100644 index 328da9b..0000000 --- a/src/bin/import.rs +++ /dev/null @@ -1,130 +0,0 @@ -async fn load_page( - client: &reqwest::Client, - before_id: Option, -) -> (Vec, serde_json::Value) { - println!("Loading page with before_id {:?}", before_id); - - let mut query: Vec<(&'static str, String)> = vec![("limit", "320".into())]; - - if let Some(before_id) = before_id { - query.push(("page", format!("b{}", before_id))); - if before_id <= 14 { - panic!("that's it."); - } - } - - let body = client - .get("https://e621.net/posts.json") - .query(&query) - .send() - .await - .expect("unable to make request") - .text() - .await - .expect("unable to convert to text"); - - let json = serde_json::from_str(&body).expect("Unable to parse data"); - - let page = match json { - serde_json::Value::Object(ref obj) => obj, - _ => panic!("top level value was not object"), - }; - - let posts = page - .get("posts") - .expect("unable to get posts object") - .as_array() - .expect("posts was not array"); - - let ids = posts - .iter() - .map(|post| { - let post = match post { - serde_json::Value::Object(post) => post, - _ => panic!("invalid post data"), - }; - - match post.get("id").expect("missing post id") { - serde_json::Value::Number(num) => { - num.as_i64().expect("invalid post id type") as i32 - } - _ => panic!("invalid post id"), - } - }) - .collect(); - - (ids, serde_json::Value::Array(posts.to_vec())) -} - -#[tokio::main] -async fn main() { - let dsn = std::env::var("POSTGRES_DSN").expect("missing postgres dsn"); - - let (db, connection) = tokio_postgres::connect(&dsn, tokio_postgres::NoTls) - .await - .expect("Unable to connect"); - - tokio::spawn(async move { - if let Err(e) = connection.await { - eprintln!("connection error: {}", e); - } - }); - - db.execute( - "CREATE TABLE IF NOT EXISTS e621 (id INTEGER PRIMARY KEY, hash BIGINT, data JSONB, hash_error TEXT)", - &[], - ) - .await - .expect("Unable to create table"); - - db.execute( - "CREATE OR REPLACE FUNCTION extract_post_data() RETURNS TRIGGER AS $$ - BEGIN - NEW.id = NEW.data->'id'; - RETURN NEW; - END $$ - LANGUAGE 'plpgsql'", - &[], - ) - .await - .expect("Unable to create function"); - - db.execute("DROP TRIGGER IF EXISTS call_extract_post_data ON e621", &[]) - .await - .expect("Unable to drop trigger"); - db.execute("CREATE TRIGGER call_extract_post_data BEFORE INSERT ON e621 FOR EACH ROW EXECUTE PROCEDURE extract_post_data()", &[]).await.expect("Unable to create trigger"); - - let mut min_id = db - .query_one("SELECT MIN(id) FROM e621", &[]) - .await - .map(|row| row.get("min")) - .expect("Unable to get min post"); - - let client = reqwest::Client::builder() - .user_agent("Syfaro test client syfaro@huefox.com") - .build() - .expect("Unable to build http client"); - - let mut now; - - loop { - now = std::time::Instant::now(); - - let (ids, post_data) = load_page(&client, min_id).await; - min_id = ids.into_iter().min(); - - db.execute( - "INSERT INTO e621 (data) SELECT json_array_elements($1::json)", - &[&post_data], - ) - .await - .expect("Unable to insert"); - - let elapsed = now.elapsed().as_millis() as u64; - if elapsed < 1000 { - let delay = 1000 - elapsed; - println!("delaying {}ms before loading next page", delay); - tokio::time::sleep(std::time::Duration::from_millis(delay)).await; - } - } -} diff --git a/src/bin/load_hashes.rs b/src/bin/load_hashes.rs deleted file mode 100644 index e701fd1..0000000 --- a/src/bin/load_hashes.rs +++ /dev/null @@ -1,161 +0,0 @@ -use bb8::Pool; -use bb8_postgres::PostgresConnectionManager; -use futures::StreamExt; - -struct NeededPost { - id: i32, - full_url: String, -} - -fn get_hasher() -> img_hash::Hasher<[u8; 8]> { - img_hash::HasherConfig::with_bytes_type::<[u8; 8]>() - .hash_alg(img_hash::HashAlg::Gradient) - .hash_size(8, 8) - .preproc_dct() - .to_hasher() -} - -async fn hash_url( - id: i32, - client: std::sync::Arc, - url: String, -) -> (i32, Result) { - let data = client - .get(&url) - .send() - .await - .expect("unable to get url") - .bytes() - .await - .expect("unable to get bytes"); - - let hasher = get_hasher(); - let image = match image::load_from_memory(&data) { - Ok(image) => image, - Err(e) => { - println!("{:?}", &data[0..50]); - return (id, Err(e)); - } - }; - - let hash = hasher.hash_image(&image); - let mut bytes: [u8; 8] = [0; 8]; - bytes.copy_from_slice(hash.as_bytes()); - - let num = i64::from_be_bytes(bytes); - - println!("{} - {}", url, num); - - (id, Ok(num)) -} - -async fn load_next_posts( - db: Pool>, -) -> Vec { - db.get() - .await - .unwrap() - .query( - "SELECT - id, - data->'file'->>'url' file_url - FROM - e621 - WHERE - hash IS NULL AND - hash_error IS NULL AND - data->'file'->>'ext' IN ('jpg', 'png') AND - data->'file'->>'url' <> '/images/deleted-preview.png' - ORDER BY id DESC - LIMIT 384", - &[], - ) - .await - .expect("Unable to get posts") - .into_iter() - .map(|row| NeededPost { - id: row.get("id"), - full_url: row.get("file_url"), - }) - .collect() -} - -#[tokio::main] -async fn main() { - let dsn = std::env::var("POSTGRES_DSN").expect("missing postgres dsn"); - - use std::str::FromStr; - let manager = PostgresConnectionManager::new( - tokio_postgres::Config::from_str(&dsn).expect("unable to parse postgres dsn"), - tokio_postgres::NoTls, - ); - - let pool = Pool::builder() - .build(manager) - .await - .expect("unable to build pool"); - - let client = reqwest::Client::builder() - .user_agent("Syfaro test client syfaro@huefox.com") - .build() - .expect("Unable to build http client"); - let client = std::sync::Arc::new(client); - - loop { - println!("running loop"); - - let needed_posts = load_next_posts(pool.clone()).await; - - if needed_posts.is_empty() { - println!("no posts, waiting a minute"); - tokio::time::sleep(std::time::Duration::from_secs(60)).await; - continue; - } - - futures::stream::iter( - needed_posts - .into_iter() - .map(|post| hash_url(post.id, client.clone(), post.full_url)), - ) - .buffer_unordered(8) - .for_each(|res: (i32, Result)| async { - let db = pool.get().await.expect("unable to get from pool"); - - match res { - (id, Ok(num)) => { - let mut conn = pool.get().await.unwrap(); - - let tx = conn - .transaction() - .await - .expect("Unable to create transaction"); - - tx.execute("UPDATE e621 SET hash = $2 WHERE id = $1", &[&id, &num]) - .await - .expect("Unable to update hash in database"); - - tx.execute( - "INSERT INTO hashes (e621_id, hash) VALUES ($1, $2)", - &[&id, &num], - ) - .await - .expect("Unable to insert hash to hashes table"); - - tx.commit().await.expect("Unable to commit tx"); - } - (id, Err(e)) => { - let desc = e.to_string(); - println!("[{}] hashing error - {}", id, desc); - db.execute( - "UPDATE e621 SET hash_error = $2 WHERE id = $1", - &[&id, &desc], - ) - .await - .expect("Unable to update hash error in database"); - } - } - () - }) - .await; - } -} diff --git a/src/bin/update.rs b/src/bin/update.rs deleted file mode 100644 index 36484ba..0000000 --- a/src/bin/update.rs +++ /dev/null @@ -1,171 +0,0 @@ -async fn load_page(client: &reqwest::Client, after_id: i32) -> (Vec, Vec) { - println!("Loading page with after_id {:?}", after_id); - - let mut query: Vec<(&'static str, String)> = vec![("limit", "320".into())]; - query.push(("page", format!("a{}", after_id))); - - let body = client - .get("https://e621.net/posts.json") - .query(&query) - .send() - .await - .expect("unable to make request") - .text() - .await - .expect("unable to convert to text"); - - let json = serde_json::from_str(&body).expect("Unable to parse data"); - - let page = match json { - serde_json::Value::Object(ref obj) => obj, - _ => panic!("top level value was not object"), - }; - - let posts = page - .get("posts") - .expect("unable to get posts object") - .as_array() - .expect("posts was not array"); - - let ids = posts - .iter() - .map(|post| { - let post = match post { - serde_json::Value::Object(post) => post, - _ => panic!("invalid post data"), - }; - - match post.get("id").expect("missing post id") { - serde_json::Value::Number(num) => { - num.as_i64().expect("invalid post id type") as i32 - } - _ => panic!("invalid post id"), - } - }) - .collect(); - - (ids, posts.to_vec()) -} - -async fn get_latest_id(client: &reqwest::Client) -> i32 { - println!("Looking up current highest ID"); - - let query = vec![("limit", "1")]; - - let body = client - .get("https://e621.net/posts.json") - .query(&query) - .send() - .await - .expect("unable to make request") - .text() - .await - .expect("unable to convert to text"); - - let json = serde_json::from_str(&body).expect("Unable to parse data"); - - let page = match json { - serde_json::Value::Object(ref obj) => obj, - _ => panic!("top level value was not object"), - }; - - let posts = page - .get("posts") - .expect("unable to get posts object") - .as_array() - .expect("posts was not array"); - - let ids: Vec = posts - .iter() - .map(|post| { - let post = match post { - serde_json::Value::Object(post) => post, - _ => panic!("invalid post data"), - }; - - match post.get("id").expect("missing post id") { - serde_json::Value::Number(num) => { - num.as_i64().expect("invalid post id type") as i32 - } - _ => panic!("invalid post id"), - } - }) - .collect(); - - ids.into_iter().max().expect("no ids found") -} - -#[tokio::main] -async fn main() { - let dsn = std::env::var("POSTGRES_DSN").expect("missing postgres dsn"); - - let (mut db, connection) = tokio_postgres::connect(&dsn, tokio_postgres::NoTls) - .await - .expect("Unable to connect"); - - tokio::spawn(async move { - if let Err(e) = connection.await { - eprintln!("connection error: {}", e); - } - }); - - let max_id: i32 = db - .query_one("SELECT max(id) FROM e621", &[]) - .await - .map(|row| row.get("max")) - .expect("Unable to get max post"); - - let client = reqwest::Client::builder() - .user_agent("Syfaro test client syfaro@huefox.com") - .build() - .expect("Unable to build http client"); - - println!("max is id: {}", max_id); - - let mut now; - - // Start with the minimum ID we're requesting being our previous highest - // ID found. - let mut min_id = max_id; - - // Find highest ID to look for. Once we get this value back, we've gotten - // as many new posts as we were looking for. - let latest_id = get_latest_id(&client).await; - - loop { - now = std::time::Instant::now(); - - // Load any posts with an ID higher than our previous run. - let (ids, post_data) = load_page(&client, min_id).await; - - // Calculate a new minimum value to find posts after by looking at the - // maximum value returned in this run. - min_id = *ids.iter().max().expect("no ids found"); - - let tx = db.transaction().await.expect("unable to start transaction"); - - for post in post_data { - tx.execute( - "INSERT INTO e621 (data) VALUES ($1::json) ON CONFLICT DO NOTHING", - &[&post], - ) - .await - .expect("Unable to insert"); - } - - tx.commit().await.expect("unable to commit transaction"); - - // If it contains the latest ID, we're done. - if ids.contains(&latest_id) { - println!("finished run, latest_id {}, max_id {}", latest_id, max_id); - break; - } - - let elapsed = now.elapsed().as_millis() as u64; - if elapsed < 1000 { - let delay = 1000 - elapsed; - println!("delaying {}ms before loading next page", delay); - tokio::time::sleep(std::time::Duration::from_millis(delay)).await; - } - } -} diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..0c93de6 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,354 @@ +use anyhow::Context; +use lazy_static::lazy_static; +use prometheus::{register_histogram, register_int_gauge, Histogram, IntGauge}; +use sqlx::Connection; + +static USER_AGENT: &str = "e621-watcher / FuzzySearch Ingester / Syfaro "; + +lazy_static! { + static ref SUBMISSION_BACKLOG: IntGauge = register_int_gauge!( + "fuzzysearch_watcher_e621_submission_backlog", + "Number of submissions behind the latest ID" + ) + .unwrap(); + static ref INDEX_DURATION: Histogram = register_histogram!( + "fuzzysearch_watcher_e621_index_duration", + "Duration to load an index of submissions" + ) + .unwrap(); + static ref SUBMISSION_DURATION: Histogram = register_histogram!( + "fuzzysearch_watcher_e621_submission_duration", + "Duration to ingest a submission" + ) + .unwrap(); +} + +#[tokio::main] +async fn main() -> anyhow::Result<()> { + tracing_subscriber::fmt::init(); + + create_metrics_server().await; + + let client = reqwest::ClientBuilder::default() + .user_agent(USER_AGENT) + .build()?; + + let mut conn = sqlx::PgConnection::connect(&std::env::var("DATABASE_URL")?).await?; + + let max_id: i32 = sqlx::query!("SELECT max(id) max FROM e621") + .fetch_one(&mut conn) + .await? + .max + .unwrap_or(0); + + tracing::info!(max_id, "Found maximum ID in database"); + + let mut now; + let mut min_id = max_id; + + let mut latest_id: Option = None; + + loop { + now = std::time::Instant::now(); + + let lid = match latest_id { + Some(latest_id) => latest_id, + None => { + let _hist = INDEX_DURATION.start_timer(); + let lid = get_latest_id(&client) + .await + .expect("Unable to get latest ID"); + drop(_hist); + + latest_id = Some(lid); + + lid + } + }; + + let _hist = INDEX_DURATION.start_timer(); + let page = load_page(&client, min_id).await?; + drop(_hist); + + let posts = get_page_posts(&page)?; + let post_ids = get_post_ids(&posts); + + tracing::trace!(?post_ids, "Collected posts"); + + min_id = match post_ids.iter().max() { + Some(id) => *id, + None => { + tracing::warn!("Found no new posts, sleeping"); + tokio::time::sleep(std::time::Duration::from_secs(60 * 5)).await; + continue; + } + }; + + SUBMISSION_BACKLOG.set((lid - min_id).into()); + + let mut tx = conn.begin().await?; + + for post in posts { + let _hist = SUBMISSION_DURATION.start_timer(); + insert_submission(&mut tx, &client, post).await?; + drop(_hist); + + SUBMISSION_BACKLOG.sub(1); + } + + tx.commit().await?; + + let elapsed = now.elapsed().as_millis() as u64; + if post_ids.contains(&lid) { + tracing::warn!(lid, "Page contained latest ID, sleeping"); + tokio::time::sleep(std::time::Duration::from_secs(60 * 5)).await; + + latest_id = None; + } else if elapsed < 1000 { + let delay = 1000 - elapsed; + tracing::warn!(delay, "Delaying before next request"); + tokio::time::sleep(std::time::Duration::from_millis(delay)).await; + } + } +} + +fn get_page_posts(page: &serde_json::Value) -> anyhow::Result<&Vec> { + let page = match page { + serde_json::Value::Object(ref obj) => obj, + _ => return Err(anyhow::anyhow!("Top level object was not an object")), + }; + + let posts = page + .get("posts") + .context("Page did not contain posts object")? + .as_array() + .context("Posts was not an array")?; + + Ok(posts) +} + +fn get_post_ids(posts: &[serde_json::Value]) -> Vec { + let ids: Vec = posts + .iter() + .filter_map(|post| { + let post = match post { + serde_json::Value::Object(post) => post, + _ => return None, + }; + + let id = match post.get("id")? { + serde_json::Value::Number(num) => num.as_i64()? as i32, + _ => return None, + }; + + Some(id) + }) + .collect(); + + ids +} + +#[tracing::instrument(err, skip(client))] +async fn get_latest_id(client: &reqwest::Client) -> anyhow::Result { + tracing::debug!("Looking up current highest ID"); + + let query = vec![("limit", "1")]; + + let page: serde_json::Value = client + .get("https://e621.net/posts.json") + .query(&query) + .send() + .await? + .json() + .await?; + + let posts = get_page_posts(&page)?; + + let id = get_post_ids(&posts) + .into_iter() + .max() + .context("Page had no IDs")?; + + tracing::info!(id, "Found maximum ID"); + + Ok(id) +} + +#[tracing::instrument(err, skip(client))] +async fn load_page(client: &reqwest::Client, after_id: i32) -> anyhow::Result { + tracing::debug!("Attempting to load page"); + + let query = vec![ + ("limit", "320".to_string()), + ("page", format!("a{}", after_id)), + ]; + + let body = client + .get("https://e621.net/posts.json") + .query(&query) + .send() + .await? + .json() + .await?; + + Ok(body) +} + +type ImageData = (Option, Option, Option>); + +#[tracing::instrument(err, skip(conn, client, post), fields(id))] +async fn insert_submission( + conn: &mut sqlx::Transaction<'_, sqlx::Postgres>, + client: &reqwest::Client, + post: &serde_json::Value, +) -> anyhow::Result<()> { + let id = post + .get("id") + .context("Post was missing ID")? + .as_i64() + .context("Post ID was not number")? as i32; + + tracing::Span::current().record("id", &id); + tracing::debug!("Inserting submission"); + + tracing::trace!(?post, "Evaluating post"); + + let (hash, hash_error, sha256): ImageData = if let Some((url, ext)) = get_post_url_ext(&post) { + if url != "/images/deleted-preview.png" && (ext == "jpg" || ext == "png") { + load_image(&client, &url).await? + } else { + tracing::debug!("Ignoring post as it is deleted or not a supported image format"); + + (None, None, None) + } + } else { + tracing::warn!("Post had missing URL or extension"); + + (None, None, None) + }; + + sqlx::query!( + "INSERT INTO e621 + (id, data, hash, hash_error, sha256) VALUES + ($1, $2, $3, $4, $5) + ON CONFLICT (id) DO UPDATE SET + data = EXCLUDED.data, + hash = EXCLUDED.hash, + hash_error = EXCLUDED.hash_error, + sha256 = EXCLUDED.sha256", + id, + post, + hash, + hash_error, + sha256 + ) + .execute(conn) + .await?; + + Ok(()) +} + +fn get_post_url_ext(post: &serde_json::Value) -> Option<(&str, &str)> { + let file = post.as_object()?.get("file")?.as_object()?; + + let url = file.get("url")?.as_str()?; + let ext = file.get("ext")?.as_str()?; + + Some((url, ext)) +} + +#[tracing::instrument(err, skip(client))] +async fn load_image(client: &reqwest::Client, url: &str) -> anyhow::Result { + use sha2::{Digest, Sha256}; + use std::convert::TryInto; + + let bytes = client.get(url).send().await?.bytes().await?; + + tracing::trace!(len = bytes.len(), "Got submission image bytes"); + + let mut hasher = Sha256::new(); + hasher.update(&bytes); + let result = hasher.finalize().to_vec(); + + tracing::trace!(?result, "Calculated image SHA256"); + + let hasher = get_hasher(); + let img = match image::load_from_memory(&bytes) { + Ok(img) => img, + Err(err) => { + tracing::error!(?err, "Unable to open image"); + return Ok((None, Some(err.to_string()), Some(result))); + } + }; + + tracing::trace!("Opened image successfully"); + + let hash = hasher.hash_image(&img); + let hash: [u8; 8] = hash.as_bytes().try_into()?; + let hash = i64::from_be_bytes(hash); + + tracing::trace!(?hash, "Calculated image hash"); + + Ok((Some(hash), None, Some(result))) +} + +fn get_hasher() -> img_hash::Hasher<[u8; 8]> { + img_hash::HasherConfig::with_bytes_type::<[u8; 8]>() + .hash_alg(img_hash::HashAlg::Gradient) + .hash_size(8, 8) + .preproc_dct() + .to_hasher() +} + +async fn provide_metrics( + _: hyper::Request, +) -> Result, std::convert::Infallible> { + use hyper::{Body, Response}; + use prometheus::{Encoder, TextEncoder}; + + let mut buffer = Vec::new(); + let encoder = TextEncoder::new(); + + let metric_families = prometheus::gather(); + encoder.encode(&metric_families, &mut buffer).unwrap(); + + Ok(Response::new(Body::from(buffer))) +} + +async fn create_metrics_server() { + use hyper::{ + service::{make_service_fn, service_fn}, + Server, + }; + use std::convert::Infallible; + use std::net::SocketAddr; + + let make_svc = + make_service_fn(|_conn| async { Ok::<_, Infallible>(service_fn(provide_metrics)) }); + + let addr: SocketAddr = std::env::var("METRICS_HOST") + .expect("Missing METRICS_HOST") + .parse() + .expect("Invalid METRICS_HOST"); + + let server = Server::bind(&addr).serve(make_svc); + + tokio::spawn(async move { server.await.expect("Metrics server error") }); +} + +#[cfg(test)] +mod tests { + #[tokio::test] + async fn test_get_latest_id() { + let client = reqwest::ClientBuilder::new() + .user_agent(super::USER_AGENT) + .build() + .unwrap(); + + let latest_id = super::get_latest_id(&client) + .await + .expect("No error should occur"); + + assert!(latest_id > 1_000_000, "Latest ID should be reasonably high"); + } +}