diff --git a/.drone.yml b/.drone.yml index ba1f691..9bb28ae 100644 --- a/.drone.yml +++ b/.drone.yml @@ -8,6 +8,12 @@ platform: arch: amd64 steps: +- name: test + image: rust:1-slim + commands: + - cargo build + - cargo test + - name: build-latest image: plugins/docker settings: @@ -22,19 +28,4 @@ steps: branch: - master -- name: build-branch - image: plugins/docker - settings: - password: - from_secret: docker_password - registry: registry.huefox.com - repo: registry.huefox.com/fuzzysearch - tags: ${DRONE_BRANCH} - username: - from_secret: docker_username - when: - branch: - exclude: - - master - ... diff --git a/Cargo.lock b/Cargo.lock index b00810d..9dedb09 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -21,6 +21,12 @@ dependencies = [ "winapi 0.3.9", ] +[[package]] +name = "anyhow" +version = "1.0.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c0df63cb2955042487fad3aefd2c6e3ae7389ac5dc1beb28921de0b69f779d4" + [[package]] name = "async-trait" version = "0.1.42" @@ -87,6 +93,26 @@ dependencies = [ "tokio-postgres", ] +[[package]] +name = "bindgen" +version = "0.54.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "66c0bb6167449588ff70803f4127f0684f9063097eca5016f37eb52b92c2cf36" +dependencies = [ + "bitflags", + "cexpr", + "cfg-if 0.1.10", + "clang-sys", + "lazy_static", + "lazycell", + "peeking_take_while", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex", +] + [[package]] name = "bitflags" version = "1.2.1" @@ -175,6 +201,15 @@ version = "1.0.66" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4c0496836a84f8d0495758516b8621a622beb77c0fed418570e50764093ced48" +[[package]] +name = "cexpr" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4aedb84272dbe89af497cf81375129abda4fc0a9e7c5d317498c15cc30c0d27" +dependencies = [ + "nom", +] + [[package]] name = "cfg-if" version = "0.1.10" @@ -200,6 +235,17 @@ dependencies = [ "winapi 0.3.9", ] +[[package]] +name = "clang-sys" +version = "0.29.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe6837df1d5cba2397b835c8530f51723267e16abbf83892e9e5af4f0e5dd10a" +dependencies = [ + "glob", + "libc", + "libloading", +] + [[package]] name = "cloudabi" version = "0.0.3" @@ -344,6 +390,31 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" +[[package]] +name = "ffmpeg-next" +version = "4.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e17e735bf446b8e57b794fcb5841106817e890de40275dfad367493a752c3e9" +dependencies = [ + "bitflags", + "ffmpeg-sys-next", + "libc", +] + +[[package]] +name = "ffmpeg-sys-next" +version = "4.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fde8cbf91a1b044b86d9e9e944c33806a68f5e34e4281033594ceaab47a3746" +dependencies = [ + "bindgen", + "cc", + "libc", + "num_cpus", + "pkg-config", + "vcpkg", +] + [[package]] name = "fnv" version = "1.0.7" @@ -481,11 +552,13 @@ dependencies = [ name = "fuzzysearch" version = "0.1.0" dependencies = [ + "anyhow", "bb8", "bb8-postgres", "bk-tree", "bytes 0.5.6", "chrono", + "ffmpeg-next", "futures", "futures-util", "hamming", @@ -494,6 +567,7 @@ dependencies = [ "opentelemetry", "opentelemetry-jaeger", "serde", + "tempfile", "tokio 0.3.5", "tokio-postgres", "tracing", @@ -556,6 +630,12 @@ dependencies = [ "weezl", ] +[[package]] +name = "glob" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" + [[package]] name = "h2" version = "0.2.7" @@ -807,12 +887,28 @@ version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +[[package]] +name = "lazycell" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" + [[package]] name = "libc" version = "0.2.81" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1482821306169ec4d07f6aca392a4681f66c75c9918aa49641a2595db64053cb" +[[package]] +name = "libloading" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2b111a074963af1d37a139918ac6d49ad1d0d5e47f72fd55388619691a7d753" +dependencies = [ + "cc", + "winapi 0.3.9", +] + [[package]] name = "lock_api" version = "0.4.2" @@ -998,6 +1094,16 @@ dependencies = [ "winapi 0.3.9", ] +[[package]] +name = "nom" +version = "5.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffb4262d26ed83a1c0a33a38fe2bb15797329c85770da05e6b828ddb782627af" +dependencies = [ + "memchr", + "version_check", +] + [[package]] name = "ntapi" version = "0.3.6" @@ -1144,6 +1250,12 @@ dependencies = [ "winapi 0.3.9", ] +[[package]] +name = "peeking_take_while" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" + [[package]] name = "percent-encoding" version = "2.1.0" @@ -1226,6 +1338,12 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "pkg-config" +version = "0.3.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3831453b3449ceb48b6d9c7ad7c96d5ea673e9b470a1dc578c2ce6521230884c" + [[package]] name = "png" version = "0.16.7" @@ -1550,6 +1668,12 @@ dependencies = [ "winapi 0.3.9", ] +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + [[package]] name = "rustc_version" version = "0.2.3" @@ -1717,6 +1841,12 @@ dependencies = [ "loom", ] +[[package]] +name = "shlex" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fdf1b9db47230893d76faad238fd6097fd6d6a9245cd7a4d90dbd639536bbd2" + [[package]] name = "siphasher" version = "0.3.3" @@ -2198,6 +2328,12 @@ version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "05e42f7c18b8f902290b009cde6d651262f956c98bc51bca4cd1d511c9cd85c7" +[[package]] +name = "vcpkg" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6454029bf181f092ad1b853286f23e2c507d8e8194d01d92da4a55c274a5508c" + [[package]] name = "version_check" version = "0.9.2" diff --git a/Cargo.toml b/Cargo.toml index 83a7ce7..2ac4070 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,8 +17,10 @@ tokio = { version = "0.3", features = ["macros", "rt-multi-thread", "sync"] } futures = "0.3" futures-util = "0.3" +anyhow = "1" chrono = "0.4" bytes = "0.5" +tempfile = "3" serde = { version = "1", features = ["derive"] } warp = "0.2" @@ -27,8 +29,10 @@ tokio-postgres = "0.6" bb8 = "0.6" bb8-postgres = "0.6" -img_hash = "3" image = "0.23" +ffmpeg-next = "4" + +img_hash = "3" hamming = "0.1" bk-tree = "0.3" diff --git a/src/main.rs b/src/main.rs index 5b542b5..6976f09 100644 --- a/src/main.rs +++ b/src/main.rs @@ -9,6 +9,7 @@ mod handlers; mod models; mod types; mod utils; +mod video; use warp::Filter; @@ -89,6 +90,8 @@ impl bk_tree::Metric for Hamming { #[tokio::main] async fn main() { + ffmpeg_next::init().expect("Unable to initialize ffmpeg"); + configure_tracing(); let s = std::env::var("POSTGRES_DSN").expect("Missing POSTGRES_DSN"); diff --git a/src/video.rs b/src/video.rs new file mode 100644 index 0000000..27a4180 --- /dev/null +++ b/src/video.rs @@ -0,0 +1,205 @@ +use std::convert::TryInto; +use std::io::Read; + +use ffmpeg_next::{ + format::{input, Pixel}, + media::Type as MediaType, + software::scaling::{context::Context, Flags as ScalingFlags}, + util::frame::Video, +}; +use image::{gif::GifDecoder, AnimationDecoder}; +use tempfile::NamedTempFile; + +use crate::get_hasher; + +/// Extract frames of a GIF into individual images and calculate a hash for each +/// frame. Results are kept in the same order as seen in the GIF. +/// +/// This is a blocking function. +#[tracing::instrument(skip(r))] +pub fn extract_gif_hashes(r: R) -> Result, image::ImageError> { + let hasher = crate::get_hasher(); + + // Begin by creating a new GifDecoder from our reader. Collect all frames + // from the GIF. + // + // FUTURE: profile memory usage of collecting all frames instead of iterating + let decoder = GifDecoder::new(r)?; + let frames = decoder.into_frames().collect_frames()?; + + tracing::trace!(frames = frames.len(), "Collected GIF frames"); + + // Allocate a Vec to hold all our hashes. + let mut hashes = Vec::with_capacity(frames.len()); + + // For each frame, get an ImageBuffer, hash the image, and append bytes into + // the results. + // + // FUTURE: should this be parallelized? + for frame in frames { + let buf = frame.buffer(); + + let hash = hasher.hash_image(buf); + let bytes = hash.as_bytes().try_into().unwrap(); + + hashes.push(bytes); + } + + Ok(hashes) +} + +/// Write the contents of `r` into a temporary file and return the handle to +/// that file. This file should automatically be deleted when the handle is +/// dropped. +/// +/// This is a blocking function. +fn write_temp_file(mut r: R) -> std::io::Result { + let mut f = NamedTempFile::new()?; + std::io::copy(&mut r, &mut f)?; + + Ok(f) +} + +/// Extract frames of a video into individual images and calculate a hash for +/// each frame. Results are kept in the same order as seen in the input. +/// +/// This is a blocking function. +#[tracing::instrument(skip(r))] +pub fn extract_video_hashes(r: R) -> anyhow::Result> { + let f = write_temp_file(r)?; + + // Create an input context from the given path. + // + // TODO: figure out if there's a way to provide data without creating a file + let mut ictx = input(&f.path())?; + + // Select the best video stream and find it's index. + let input = ictx + .streams() + .best(MediaType::Video) + .ok_or(ffmpeg_next::Error::StreamNotFound)?; + let stream_index = input.index(); + + // Create a new decoder that outputs 8-bit RGB colors with the same + // dimensions as the source. + let mut decoder = input.codec().decoder().video()?; + let mut scaler = Context::get( + decoder.format(), + decoder.width(), + decoder.height(), + Pixel::RGB24, + decoder.width(), + decoder.height(), + ScalingFlags::BILINEAR, + )?; + + tracing::trace!("Initialized ffmpeg with video input"); + + let mut hashes: Vec<[u8; 8]> = Vec::new(); + let hasher = get_hasher(); + + // Callback function run for each packet loaded by ffmpeg. It's responsible + // for processing each frame into a hash and storing it. + let mut receive_and_process_decoded_frames = + |decoder: &mut ffmpeg_next::decoder::Video| -> Result<(), ffmpeg_next::Error> { + let mut decoded = Video::empty(); + + while decoder.receive_frame(&mut decoded).is_ok() { + // Create a frame buffer and decode data into it. + let mut rgb_frame = Video::empty(); + scaler.run(&decoded, &mut rgb_frame)?; + + // Convert raw data into an RgbImage for use with image hashing. + let data = rgb_frame.data(0).to_vec(); + let im: image::RgbImage = + image::ImageBuffer::from_raw(decoder.width(), decoder.height(), data) + .expect("Image frame data was invalid"); + + // Hash frame, convert to [u8; 8]. + let hash = hasher.hash_image(&im); + let hash = hash.as_bytes(); + hashes.push( + hash.try_into() + .expect("img_hash provided incorrect number of bytes"), + ); + } + + Ok(()) + }; + + // Now that we've set up our callback, iterate through file packets, decode + // them, and send to our callback for processing. + for (stream, packet) in ictx.packets() { + if stream.index() != stream_index { + continue; + } + + decoder.send_packet(&packet)?; + receive_and_process_decoded_frames(&mut decoder)?; + } + + // Make sure all data has been processed with EOF. + decoder.send_eof()?; + receive_and_process_decoded_frames(&mut decoder)?; + + Ok(hashes) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_extract_gif_hashes() -> anyhow::Result<()> { + use std::fs::File; + + let gif = File::open("tests/fox.gif")?; + let hashes = extract_gif_hashes(&gif)?; + + assert_eq!( + hashes.len(), + 47, + "GIF did not have expected number of hashes" + ); + + assert_eq!( + hashes[0], + [154, 64, 160, 169, 170, 53, 181, 221], + "First frame had different hash" + ); + assert_eq!( + hashes[1], + [154, 64, 160, 169, 170, 53, 53, 221], + "Second frame had different hash" + ); + + Ok(()) + } + + #[test] + fn test_extract_video_hashes() -> anyhow::Result<()> { + use std::fs::File; + + let video = File::open("tests/video.webm")?; + let hashes = extract_video_hashes(&video)?; + + assert_eq!( + hashes.len(), + 126, + "Video did not have expected number of hashes" + ); + + assert_eq!( + hashes[0], + [60, 166, 75, 61, 48, 166, 73, 205], + "First frame had different hash" + ); + assert_eq!( + hashes[1], + [60, 166, 75, 61, 48, 166, 73, 205], + "Second frame had different hash" + ); + + Ok(()) + } +} diff --git a/tests/fox.gif b/tests/fox.gif new file mode 100644 index 0000000..16b20c2 Binary files /dev/null and b/tests/fox.gif differ diff --git a/tests/video.webm b/tests/video.webm new file mode 100644 index 0000000..724b612 Binary files /dev/null and b/tests/video.webm differ