mirror of
https://github.com/Syfaro/fuzzysearch.git
synced 2024-11-05 06:23:08 +00:00
Add methods to extract hashes from a GIF or video.
This commit is contained in:
parent
f6319e6d90
commit
9eb653ce6b
21
.drone.yml
21
.drone.yml
@ -8,6 +8,12 @@ platform:
|
||||
arch: amd64
|
||||
|
||||
steps:
|
||||
- name: test
|
||||
image: rust:1-slim
|
||||
commands:
|
||||
- cargo build
|
||||
- cargo test
|
||||
|
||||
- name: build-latest
|
||||
image: plugins/docker
|
||||
settings:
|
||||
@ -22,19 +28,4 @@ steps:
|
||||
branch:
|
||||
- master
|
||||
|
||||
- name: build-branch
|
||||
image: plugins/docker
|
||||
settings:
|
||||
password:
|
||||
from_secret: docker_password
|
||||
registry: registry.huefox.com
|
||||
repo: registry.huefox.com/fuzzysearch
|
||||
tags: ${DRONE_BRANCH}
|
||||
username:
|
||||
from_secret: docker_username
|
||||
when:
|
||||
branch:
|
||||
exclude:
|
||||
- master
|
||||
|
||||
...
|
||||
|
136
Cargo.lock
generated
136
Cargo.lock
generated
@ -21,6 +21,12 @@ dependencies = [
|
||||
"winapi 0.3.9",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anyhow"
|
||||
version = "1.0.35"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2c0df63cb2955042487fad3aefd2c6e3ae7389ac5dc1beb28921de0b69f779d4"
|
||||
|
||||
[[package]]
|
||||
name = "async-trait"
|
||||
version = "0.1.42"
|
||||
@ -87,6 +93,26 @@ dependencies = [
|
||||
"tokio-postgres",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bindgen"
|
||||
version = "0.54.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "66c0bb6167449588ff70803f4127f0684f9063097eca5016f37eb52b92c2cf36"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"cexpr",
|
||||
"cfg-if 0.1.10",
|
||||
"clang-sys",
|
||||
"lazy_static",
|
||||
"lazycell",
|
||||
"peeking_take_while",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"regex",
|
||||
"rustc-hash",
|
||||
"shlex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
version = "1.2.1"
|
||||
@ -175,6 +201,15 @@ version = "1.0.66"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4c0496836a84f8d0495758516b8621a622beb77c0fed418570e50764093ced48"
|
||||
|
||||
[[package]]
|
||||
name = "cexpr"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f4aedb84272dbe89af497cf81375129abda4fc0a9e7c5d317498c15cc30c0d27"
|
||||
dependencies = [
|
||||
"nom",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "0.1.10"
|
||||
@ -200,6 +235,17 @@ dependencies = [
|
||||
"winapi 0.3.9",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clang-sys"
|
||||
version = "0.29.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fe6837df1d5cba2397b835c8530f51723267e16abbf83892e9e5af4f0e5dd10a"
|
||||
dependencies = [
|
||||
"glob",
|
||||
"libc",
|
||||
"libloading",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cloudabi"
|
||||
version = "0.0.3"
|
||||
@ -344,6 +390,31 @@ version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7"
|
||||
|
||||
[[package]]
|
||||
name = "ffmpeg-next"
|
||||
version = "4.3.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5e17e735bf446b8e57b794fcb5841106817e890de40275dfad367493a752c3e9"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"ffmpeg-sys-next",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ffmpeg-sys-next"
|
||||
version = "4.3.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2fde8cbf91a1b044b86d9e9e944c33806a68f5e34e4281033594ceaab47a3746"
|
||||
dependencies = [
|
||||
"bindgen",
|
||||
"cc",
|
||||
"libc",
|
||||
"num_cpus",
|
||||
"pkg-config",
|
||||
"vcpkg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fnv"
|
||||
version = "1.0.7"
|
||||
@ -481,11 +552,13 @@ dependencies = [
|
||||
name = "fuzzysearch"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bb8",
|
||||
"bb8-postgres",
|
||||
"bk-tree",
|
||||
"bytes 0.5.6",
|
||||
"chrono",
|
||||
"ffmpeg-next",
|
||||
"futures",
|
||||
"futures-util",
|
||||
"hamming",
|
||||
@ -494,6 +567,7 @@ dependencies = [
|
||||
"opentelemetry",
|
||||
"opentelemetry-jaeger",
|
||||
"serde",
|
||||
"tempfile",
|
||||
"tokio 0.3.5",
|
||||
"tokio-postgres",
|
||||
"tracing",
|
||||
@ -556,6 +630,12 @@ dependencies = [
|
||||
"weezl",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "glob"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574"
|
||||
|
||||
[[package]]
|
||||
name = "h2"
|
||||
version = "0.2.7"
|
||||
@ -807,12 +887,28 @@ version = "1.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
||||
|
||||
[[package]]
|
||||
name = "lazycell"
|
||||
version = "1.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.81"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1482821306169ec4d07f6aca392a4681f66c75c9918aa49641a2595db64053cb"
|
||||
|
||||
[[package]]
|
||||
name = "libloading"
|
||||
version = "0.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f2b111a074963af1d37a139918ac6d49ad1d0d5e47f72fd55388619691a7d753"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"winapi 0.3.9",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lock_api"
|
||||
version = "0.4.2"
|
||||
@ -998,6 +1094,16 @@ dependencies = [
|
||||
"winapi 0.3.9",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nom"
|
||||
version = "5.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ffb4262d26ed83a1c0a33a38fe2bb15797329c85770da05e6b828ddb782627af"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
"version_check",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ntapi"
|
||||
version = "0.3.6"
|
||||
@ -1144,6 +1250,12 @@ dependencies = [
|
||||
"winapi 0.3.9",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "peeking_take_while"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099"
|
||||
|
||||
[[package]]
|
||||
name = "percent-encoding"
|
||||
version = "2.1.0"
|
||||
@ -1226,6 +1338,12 @@ version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
|
||||
|
||||
[[package]]
|
||||
name = "pkg-config"
|
||||
version = "0.3.19"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3831453b3449ceb48b6d9c7ad7c96d5ea673e9b470a1dc578c2ce6521230884c"
|
||||
|
||||
[[package]]
|
||||
name = "png"
|
||||
version = "0.16.7"
|
||||
@ -1550,6 +1668,12 @@ dependencies = [
|
||||
"winapi 0.3.9",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustc-hash"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
|
||||
|
||||
[[package]]
|
||||
name = "rustc_version"
|
||||
version = "0.2.3"
|
||||
@ -1717,6 +1841,12 @@ dependencies = [
|
||||
"loom",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "shlex"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7fdf1b9db47230893d76faad238fd6097fd6d6a9245cd7a4d90dbd639536bbd2"
|
||||
|
||||
[[package]]
|
||||
name = "siphasher"
|
||||
version = "0.3.3"
|
||||
@ -2198,6 +2328,12 @@ version = "0.7.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "05e42f7c18b8f902290b009cde6d651262f956c98bc51bca4cd1d511c9cd85c7"
|
||||
|
||||
[[package]]
|
||||
name = "vcpkg"
|
||||
version = "0.2.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6454029bf181f092ad1b853286f23e2c507d8e8194d01d92da4a55c274a5508c"
|
||||
|
||||
[[package]]
|
||||
name = "version_check"
|
||||
version = "0.9.2"
|
||||
|
@ -17,8 +17,10 @@ tokio = { version = "0.3", features = ["macros", "rt-multi-thread", "sync"] }
|
||||
futures = "0.3"
|
||||
futures-util = "0.3"
|
||||
|
||||
anyhow = "1"
|
||||
chrono = "0.4"
|
||||
bytes = "0.5"
|
||||
tempfile = "3"
|
||||
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
warp = "0.2"
|
||||
@ -27,8 +29,10 @@ tokio-postgres = "0.6"
|
||||
bb8 = "0.6"
|
||||
bb8-postgres = "0.6"
|
||||
|
||||
img_hash = "3"
|
||||
image = "0.23"
|
||||
ffmpeg-next = "4"
|
||||
|
||||
img_hash = "3"
|
||||
hamming = "0.1"
|
||||
|
||||
bk-tree = "0.3"
|
||||
|
@ -9,6 +9,7 @@ mod handlers;
|
||||
mod models;
|
||||
mod types;
|
||||
mod utils;
|
||||
mod video;
|
||||
|
||||
use warp::Filter;
|
||||
|
||||
@ -89,6 +90,8 @@ impl bk_tree::Metric<Node> for Hamming {
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
ffmpeg_next::init().expect("Unable to initialize ffmpeg");
|
||||
|
||||
configure_tracing();
|
||||
|
||||
let s = std::env::var("POSTGRES_DSN").expect("Missing POSTGRES_DSN");
|
||||
|
205
src/video.rs
Normal file
205
src/video.rs
Normal file
@ -0,0 +1,205 @@
|
||||
use std::convert::TryInto;
|
||||
use std::io::Read;
|
||||
|
||||
use ffmpeg_next::{
|
||||
format::{input, Pixel},
|
||||
media::Type as MediaType,
|
||||
software::scaling::{context::Context, Flags as ScalingFlags},
|
||||
util::frame::Video,
|
||||
};
|
||||
use image::{gif::GifDecoder, AnimationDecoder};
|
||||
use tempfile::NamedTempFile;
|
||||
|
||||
use crate::get_hasher;
|
||||
|
||||
/// Extract frames of a GIF into individual images and calculate a hash for each
|
||||
/// frame. Results are kept in the same order as seen in the GIF.
|
||||
///
|
||||
/// This is a blocking function.
|
||||
#[tracing::instrument(skip(r))]
|
||||
pub fn extract_gif_hashes<R: Read>(r: R) -> Result<Vec<[u8; 8]>, image::ImageError> {
|
||||
let hasher = crate::get_hasher();
|
||||
|
||||
// Begin by creating a new GifDecoder from our reader. Collect all frames
|
||||
// from the GIF.
|
||||
//
|
||||
// FUTURE: profile memory usage of collecting all frames instead of iterating
|
||||
let decoder = GifDecoder::new(r)?;
|
||||
let frames = decoder.into_frames().collect_frames()?;
|
||||
|
||||
tracing::trace!(frames = frames.len(), "Collected GIF frames");
|
||||
|
||||
// Allocate a Vec to hold all our hashes.
|
||||
let mut hashes = Vec::with_capacity(frames.len());
|
||||
|
||||
// For each frame, get an ImageBuffer, hash the image, and append bytes into
|
||||
// the results.
|
||||
//
|
||||
// FUTURE: should this be parallelized?
|
||||
for frame in frames {
|
||||
let buf = frame.buffer();
|
||||
|
||||
let hash = hasher.hash_image(buf);
|
||||
let bytes = hash.as_bytes().try_into().unwrap();
|
||||
|
||||
hashes.push(bytes);
|
||||
}
|
||||
|
||||
Ok(hashes)
|
||||
}
|
||||
|
||||
/// Write the contents of `r` into a temporary file and return the handle to
|
||||
/// that file. This file should automatically be deleted when the handle is
|
||||
/// dropped.
|
||||
///
|
||||
/// This is a blocking function.
|
||||
fn write_temp_file<R: Read>(mut r: R) -> std::io::Result<NamedTempFile> {
|
||||
let mut f = NamedTempFile::new()?;
|
||||
std::io::copy(&mut r, &mut f)?;
|
||||
|
||||
Ok(f)
|
||||
}
|
||||
|
||||
/// Extract frames of a video into individual images and calculate a hash for
|
||||
/// each frame. Results are kept in the same order as seen in the input.
|
||||
///
|
||||
/// This is a blocking function.
|
||||
#[tracing::instrument(skip(r))]
|
||||
pub fn extract_video_hashes<R: Read>(r: R) -> anyhow::Result<Vec<[u8; 8]>> {
|
||||
let f = write_temp_file(r)?;
|
||||
|
||||
// Create an input context from the given path.
|
||||
//
|
||||
// TODO: figure out if there's a way to provide data without creating a file
|
||||
let mut ictx = input(&f.path())?;
|
||||
|
||||
// Select the best video stream and find it's index.
|
||||
let input = ictx
|
||||
.streams()
|
||||
.best(MediaType::Video)
|
||||
.ok_or(ffmpeg_next::Error::StreamNotFound)?;
|
||||
let stream_index = input.index();
|
||||
|
||||
// Create a new decoder that outputs 8-bit RGB colors with the same
|
||||
// dimensions as the source.
|
||||
let mut decoder = input.codec().decoder().video()?;
|
||||
let mut scaler = Context::get(
|
||||
decoder.format(),
|
||||
decoder.width(),
|
||||
decoder.height(),
|
||||
Pixel::RGB24,
|
||||
decoder.width(),
|
||||
decoder.height(),
|
||||
ScalingFlags::BILINEAR,
|
||||
)?;
|
||||
|
||||
tracing::trace!("Initialized ffmpeg with video input");
|
||||
|
||||
let mut hashes: Vec<[u8; 8]> = Vec::new();
|
||||
let hasher = get_hasher();
|
||||
|
||||
// Callback function run for each packet loaded by ffmpeg. It's responsible
|
||||
// for processing each frame into a hash and storing it.
|
||||
let mut receive_and_process_decoded_frames =
|
||||
|decoder: &mut ffmpeg_next::decoder::Video| -> Result<(), ffmpeg_next::Error> {
|
||||
let mut decoded = Video::empty();
|
||||
|
||||
while decoder.receive_frame(&mut decoded).is_ok() {
|
||||
// Create a frame buffer and decode data into it.
|
||||
let mut rgb_frame = Video::empty();
|
||||
scaler.run(&decoded, &mut rgb_frame)?;
|
||||
|
||||
// Convert raw data into an RgbImage for use with image hashing.
|
||||
let data = rgb_frame.data(0).to_vec();
|
||||
let im: image::RgbImage =
|
||||
image::ImageBuffer::from_raw(decoder.width(), decoder.height(), data)
|
||||
.expect("Image frame data was invalid");
|
||||
|
||||
// Hash frame, convert to [u8; 8].
|
||||
let hash = hasher.hash_image(&im);
|
||||
let hash = hash.as_bytes();
|
||||
hashes.push(
|
||||
hash.try_into()
|
||||
.expect("img_hash provided incorrect number of bytes"),
|
||||
);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
};
|
||||
|
||||
// Now that we've set up our callback, iterate through file packets, decode
|
||||
// them, and send to our callback for processing.
|
||||
for (stream, packet) in ictx.packets() {
|
||||
if stream.index() != stream_index {
|
||||
continue;
|
||||
}
|
||||
|
||||
decoder.send_packet(&packet)?;
|
||||
receive_and_process_decoded_frames(&mut decoder)?;
|
||||
}
|
||||
|
||||
// Make sure all data has been processed with EOF.
|
||||
decoder.send_eof()?;
|
||||
receive_and_process_decoded_frames(&mut decoder)?;
|
||||
|
||||
Ok(hashes)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_extract_gif_hashes() -> anyhow::Result<()> {
|
||||
use std::fs::File;
|
||||
|
||||
let gif = File::open("tests/fox.gif")?;
|
||||
let hashes = extract_gif_hashes(&gif)?;
|
||||
|
||||
assert_eq!(
|
||||
hashes.len(),
|
||||
47,
|
||||
"GIF did not have expected number of hashes"
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
hashes[0],
|
||||
[154, 64, 160, 169, 170, 53, 181, 221],
|
||||
"First frame had different hash"
|
||||
);
|
||||
assert_eq!(
|
||||
hashes[1],
|
||||
[154, 64, 160, 169, 170, 53, 53, 221],
|
||||
"Second frame had different hash"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_video_hashes() -> anyhow::Result<()> {
|
||||
use std::fs::File;
|
||||
|
||||
let video = File::open("tests/video.webm")?;
|
||||
let hashes = extract_video_hashes(&video)?;
|
||||
|
||||
assert_eq!(
|
||||
hashes.len(),
|
||||
126,
|
||||
"Video did not have expected number of hashes"
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
hashes[0],
|
||||
[60, 166, 75, 61, 48, 166, 73, 205],
|
||||
"First frame had different hash"
|
||||
);
|
||||
assert_eq!(
|
||||
hashes[1],
|
||||
[60, 166, 75, 61, 48, 166, 73, 205],
|
||||
"Second frame had different hash"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
BIN
tests/fox.gif
Normal file
BIN
tests/fox.gif
Normal file
Binary file not shown.
After Width: | Height: | Size: 1.1 MiB |
BIN
tests/video.webm
Normal file
BIN
tests/video.webm
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user