No longer use LUT for 32bit input
That means you’ll finally get full 32bit precision, not just 8bit. This comes at a cost (~20% lower FPS), but it’s a lot more accurate, and there’s potentially room for optimization in the future because we actually do math now instead of just hacky lookups in a 256-element vector.
This commit is contained in:
parent
d762821b88
commit
8ef0654595
@ -3,12 +3,11 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion};
|
||||
|
||||
fn mask_value(c: &mut Criterion) {
|
||||
black_box(FLOAT_RANGE.iter().count());
|
||||
let ls = black_box(10.0);
|
||||
let y = black_box(0.412323);
|
||||
let ls = black_box(calc_luma_scaling(0.412323, 10.0));
|
||||
c.bench_function("mask value y=0.412", |b| {
|
||||
b.iter(|| {
|
||||
FLOAT_RANGE.iter().for_each(|&x| {
|
||||
black_box(get_mask_value(black_box(x), y, ls));
|
||||
black_box(get_mask_value(black_box(x), ls));
|
||||
});
|
||||
})
|
||||
});
|
||||
|
57
src/mask.rs
57
src/mask.rs
@ -17,14 +17,14 @@ lazy_static! {
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn get_mask_value(x: f32, y: f32, luma_scaling: f32) -> f32 {
|
||||
pub fn get_mask_value(x: f32, luma_scaling: f32) -> f32 {
|
||||
f32::powf(
|
||||
1.0 - (x
|
||||
* (x.mul_add(
|
||||
x.mul_add(x.mul_add(x.mul_add(18.188, -45.47), 36.624), -9.466),
|
||||
1.124,
|
||||
))),
|
||||
y * y * luma_scaling,
|
||||
luma_scaling,
|
||||
)
|
||||
}
|
||||
|
||||
@ -39,17 +39,11 @@ macro_rules! from_property {
|
||||
|
||||
macro_rules! int_filter {
|
||||
($type:ty, $fname:ident) => {
|
||||
fn $fname(
|
||||
frame: &mut FrameRefMut,
|
||||
src_frame: FrameRef,
|
||||
depth: u8,
|
||||
average: f32,
|
||||
luma_scaling: f32,
|
||||
) {
|
||||
fn $fname(frame: &mut FrameRefMut, src_frame: FrameRef, depth: u8, luma_scaling: f32) {
|
||||
let max = ((1 << depth) - 1) as f32;
|
||||
let lut: Vec<$type> = FLOAT_RANGE
|
||||
.iter()
|
||||
.map(|x| (get_mask_value(*x, average, luma_scaling) * max) as $type)
|
||||
.map(|x| (get_mask_value(*x, luma_scaling) * max) as $type)
|
||||
.collect();
|
||||
for row in 0..frame.height(0) {
|
||||
for (pixel, src_pixel) in frame
|
||||
@ -67,18 +61,14 @@ macro_rules! int_filter {
|
||||
};
|
||||
}
|
||||
|
||||
fn filter_for_float(frame: &mut FrameRefMut, src_frame: FrameRef, average: f32, luma_scaling: f32) {
|
||||
let lut: Vec<f32> = FLOAT_RANGE
|
||||
.iter()
|
||||
.map(|x| get_mask_value(*x, average, luma_scaling))
|
||||
.collect();
|
||||
fn filter_for_float(frame: &mut FrameRefMut, src_frame: FrameRef, luma_scaling: f32) {
|
||||
for row in 0..frame.height(0) {
|
||||
frame
|
||||
.plane_row_mut::<f32>(0, row)
|
||||
.iter_mut()
|
||||
.zip(src_frame.plane_row::<f32>(0, row))
|
||||
.for_each(|(pixel, src_pixel)| unsafe {
|
||||
ptr::write(pixel, lut[(src_pixel.min(1.0).max(0.0) * 255.99) as usize]);
|
||||
ptr::write(pixel, get_mask_value(*src_pixel, luma_scaling));
|
||||
});
|
||||
}
|
||||
}
|
||||
@ -153,15 +143,30 @@ impl<'core> Filter<'core> for Mask<'core> {
|
||||
match depth {
|
||||
0..=8 => {
|
||||
int_filter!(u8, filter_8bit);
|
||||
filter_8bit(&mut frame, src_frame, depth, average, self.luma_scaling)
|
||||
filter_8bit(
|
||||
&mut frame,
|
||||
src_frame,
|
||||
depth,
|
||||
calc_luma_scaling(average, self.luma_scaling),
|
||||
)
|
||||
}
|
||||
9..=16 => {
|
||||
int_filter!(u16, filter_16bit);
|
||||
filter_16bit(&mut frame, src_frame, depth, average, self.luma_scaling)
|
||||
filter_16bit(
|
||||
&mut frame,
|
||||
src_frame,
|
||||
depth,
|
||||
calc_luma_scaling(average, self.luma_scaling),
|
||||
)
|
||||
}
|
||||
17..=32 => {
|
||||
int_filter!(u32, filter_32bit);
|
||||
filter_32bit(&mut frame, src_frame, depth, average, self.luma_scaling)
|
||||
filter_32bit(
|
||||
&mut frame,
|
||||
src_frame,
|
||||
depth,
|
||||
calc_luma_scaling(average, self.luma_scaling),
|
||||
)
|
||||
}
|
||||
_ => bail!(format!(
|
||||
"{}: input depth {} not supported",
|
||||
@ -170,13 +175,21 @@ impl<'core> Filter<'core> for Mask<'core> {
|
||||
}
|
||||
}
|
||||
SampleType::Float => {
|
||||
filter_for_float(&mut frame, src_frame, average, self.luma_scaling);
|
||||
filter_for_float(
|
||||
&mut frame,
|
||||
src_frame,
|
||||
calc_luma_scaling(average, self.luma_scaling),
|
||||
);
|
||||
}
|
||||
}
|
||||
Ok(frame.into())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn calc_luma_scaling(average: f32, luma_scaling: f32) -> f32 {
|
||||
average * average * luma_scaling
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
@ -193,13 +206,13 @@ mod tests {
|
||||
.iter()
|
||||
.zip(EXPECTED_MASK_02.iter())
|
||||
.for_each(|(&x, &exp)| {
|
||||
assert!((get_mask_value(x, 0.2, 10.0) - exp).abs() < 0.0001);
|
||||
assert!((get_mask_value(x, calc_luma_scaling(0.2, 10.0)) - exp).abs() < 0.0001);
|
||||
});
|
||||
FLOAT_RANGE
|
||||
.iter()
|
||||
.zip(EXPECTED_MASK_08.iter())
|
||||
.for_each(|(&x, &exp)| {
|
||||
assert!((get_mask_value(x, 0.8, 10.0) - exp).abs() < 0.0001);
|
||||
assert!((get_mask_value(x, calc_luma_scaling(0.8, 10.0)) - exp).abs() < 0.0001);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user