use std::convert::TryInto; use std::io::Read; use ffmpeg_next::{ format::{input, Pixel}, media::Type as MediaType, software::scaling::{context::Context, Flags as ScalingFlags}, util::frame::Video, }; use image::{gif::GifDecoder, AnimationDecoder}; use tempfile::NamedTempFile; use crate::get_hasher; /// Extract frames of a GIF into individual images and calculate a hash for each /// frame. Results are kept in the same order as seen in the GIF. /// /// This is a blocking function. #[tracing::instrument(skip(r))] pub fn extract_gif_hashes(r: R) -> Result, image::ImageError> { let hasher = crate::get_hasher(); // Begin by creating a new GifDecoder from our reader. Collect all frames // from the GIF. // // FUTURE: profile memory usage of collecting all frames instead of iterating let decoder = GifDecoder::new(r)?; let frames = decoder.into_frames().collect_frames()?; tracing::trace!(frames = frames.len(), "Collected GIF frames"); // Allocate a Vec to hold all our hashes. let mut hashes = Vec::with_capacity(frames.len()); // For each frame, get an ImageBuffer, hash the image, and append bytes into // the results. // // FUTURE: should this be parallelized? for frame in frames { let buf = frame.buffer(); let hash = hasher.hash_image(buf); let bytes = hash.as_bytes().try_into().unwrap(); hashes.push(bytes); } Ok(hashes) } /// Write the contents of `r` into a temporary file and return the handle to /// that file. This file should automatically be deleted when the handle is /// dropped. /// /// This is a blocking function. fn write_temp_file(mut r: R) -> std::io::Result { let mut f = NamedTempFile::new()?; std::io::copy(&mut r, &mut f)?; Ok(f) } /// Extract frames of a video into individual images and calculate a hash for /// each frame. Results are kept in the same order as seen in the input. /// /// This is a blocking function. #[tracing::instrument(skip(r))] pub fn extract_video_hashes(r: R) -> anyhow::Result> { let f = write_temp_file(r)?; // Create an input context from the given path. // // TODO: figure out if there's a way to provide data without creating a file let mut ictx = input(&f.path())?; // Select the best video stream and find it's index. let input = ictx .streams() .best(MediaType::Video) .ok_or(ffmpeg_next::Error::StreamNotFound)?; let stream_index = input.index(); // Create a new decoder that outputs 8-bit RGB colors with the same // dimensions as the source. let mut decoder = input.codec().decoder().video()?; let mut scaler = Context::get( decoder.format(), decoder.width(), decoder.height(), Pixel::RGB24, decoder.width(), decoder.height(), ScalingFlags::BILINEAR, )?; tracing::trace!("Initialized ffmpeg with video input"); let mut hashes: Vec<[u8; 8]> = Vec::new(); let hasher = get_hasher(); // Callback function run for each packet loaded by ffmpeg. It's responsible // for processing each frame into a hash and storing it. let mut receive_and_process_decoded_frames = |decoder: &mut ffmpeg_next::decoder::Video| -> Result<(), ffmpeg_next::Error> { let mut decoded = Video::empty(); while decoder.receive_frame(&mut decoded).is_ok() { // Create a frame buffer and decode data into it. let mut rgb_frame = Video::empty(); scaler.run(&decoded, &mut rgb_frame)?; // Convert raw data into an RgbImage for use with image hashing. let data = rgb_frame.data(0).to_vec(); let im: image::RgbImage = image::ImageBuffer::from_raw(decoder.width(), decoder.height(), data) .expect("Image frame data was invalid"); // Hash frame, convert to [u8; 8]. let hash = hasher.hash_image(&im); let hash = hash.as_bytes(); hashes.push( hash.try_into() .expect("img_hash provided incorrect number of bytes"), ); } Ok(()) }; // Now that we've set up our callback, iterate through file packets, decode // them, and send to our callback for processing. for (stream, packet) in ictx.packets() { if stream.index() != stream_index { continue; } decoder.send_packet(&packet)?; receive_and_process_decoded_frames(&mut decoder)?; } // Make sure all data has been processed with EOF. decoder.send_eof()?; receive_and_process_decoded_frames(&mut decoder)?; Ok(hashes) } #[cfg(test)] mod tests { use super::*; #[test] fn test_extract_gif_hashes() -> anyhow::Result<()> { use std::fs::File; let gif = File::open("../tests/fox.gif")?; let hashes = extract_gif_hashes(&gif)?; assert_eq!( hashes.len(), 47, "GIF did not have expected number of hashes" ); assert_eq!( hashes[0], [154, 64, 160, 169, 170, 53, 53, 221], "First frame had different hash" ); assert_eq!( hashes[1], [154, 64, 160, 169, 170, 53, 53, 221], "Second frame had different hash" ); Ok(()) } #[test] fn test_extract_video_hashes() -> anyhow::Result<()> { use std::fs::File; let video = File::open("../tests/video.webm")?; let hashes = extract_video_hashes(&video)?; assert_eq!( hashes.len(), 126, "Video did not have expected number of hashes" ); assert_eq!( hashes[0], [60, 166, 75, 61, 48, 166, 75, 205], "First frame had different hash" ); assert_eq!( hashes[1], [60, 166, 75, 61, 48, 166, 75, 205], "Second frame had different hash" ); Ok(()) } }