No longer use LUT for 32bit input
That means you’ll finally get full 32bit precision, not just 8bit. This comes at a cost (~20% lower FPS), but it’s a lot more accurate, and there’s potentially room for optimization in the future because we actually do math now instead of just hacky lookups in a 256-element vector.
This commit is contained in:
parent
d762821b88
commit
8ef0654595
|
@ -3,12 +3,11 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion};
|
||||||
|
|
||||||
fn mask_value(c: &mut Criterion) {
|
fn mask_value(c: &mut Criterion) {
|
||||||
black_box(FLOAT_RANGE.iter().count());
|
black_box(FLOAT_RANGE.iter().count());
|
||||||
let ls = black_box(10.0);
|
let ls = black_box(calc_luma_scaling(0.412323, 10.0));
|
||||||
let y = black_box(0.412323);
|
|
||||||
c.bench_function("mask value y=0.412", |b| {
|
c.bench_function("mask value y=0.412", |b| {
|
||||||
b.iter(|| {
|
b.iter(|| {
|
||||||
FLOAT_RANGE.iter().for_each(|&x| {
|
FLOAT_RANGE.iter().for_each(|&x| {
|
||||||
black_box(get_mask_value(black_box(x), y, ls));
|
black_box(get_mask_value(black_box(x), ls));
|
||||||
});
|
});
|
||||||
})
|
})
|
||||||
});
|
});
|
||||||
|
|
57
src/mask.rs
57
src/mask.rs
|
@ -17,14 +17,14 @@ lazy_static! {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn get_mask_value(x: f32, y: f32, luma_scaling: f32) -> f32 {
|
pub fn get_mask_value(x: f32, luma_scaling: f32) -> f32 {
|
||||||
f32::powf(
|
f32::powf(
|
||||||
1.0 - (x
|
1.0 - (x
|
||||||
* (x.mul_add(
|
* (x.mul_add(
|
||||||
x.mul_add(x.mul_add(x.mul_add(18.188, -45.47), 36.624), -9.466),
|
x.mul_add(x.mul_add(x.mul_add(18.188, -45.47), 36.624), -9.466),
|
||||||
1.124,
|
1.124,
|
||||||
))),
|
))),
|
||||||
y * y * luma_scaling,
|
luma_scaling,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -39,17 +39,11 @@ macro_rules! from_property {
|
||||||
|
|
||||||
macro_rules! int_filter {
|
macro_rules! int_filter {
|
||||||
($type:ty, $fname:ident) => {
|
($type:ty, $fname:ident) => {
|
||||||
fn $fname(
|
fn $fname(frame: &mut FrameRefMut, src_frame: FrameRef, depth: u8, luma_scaling: f32) {
|
||||||
frame: &mut FrameRefMut,
|
|
||||||
src_frame: FrameRef,
|
|
||||||
depth: u8,
|
|
||||||
average: f32,
|
|
||||||
luma_scaling: f32,
|
|
||||||
) {
|
|
||||||
let max = ((1 << depth) - 1) as f32;
|
let max = ((1 << depth) - 1) as f32;
|
||||||
let lut: Vec<$type> = FLOAT_RANGE
|
let lut: Vec<$type> = FLOAT_RANGE
|
||||||
.iter()
|
.iter()
|
||||||
.map(|x| (get_mask_value(*x, average, luma_scaling) * max) as $type)
|
.map(|x| (get_mask_value(*x, luma_scaling) * max) as $type)
|
||||||
.collect();
|
.collect();
|
||||||
for row in 0..frame.height(0) {
|
for row in 0..frame.height(0) {
|
||||||
for (pixel, src_pixel) in frame
|
for (pixel, src_pixel) in frame
|
||||||
|
@ -67,18 +61,14 @@ macro_rules! int_filter {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
fn filter_for_float(frame: &mut FrameRefMut, src_frame: FrameRef, average: f32, luma_scaling: f32) {
|
fn filter_for_float(frame: &mut FrameRefMut, src_frame: FrameRef, luma_scaling: f32) {
|
||||||
let lut: Vec<f32> = FLOAT_RANGE
|
|
||||||
.iter()
|
|
||||||
.map(|x| get_mask_value(*x, average, luma_scaling))
|
|
||||||
.collect();
|
|
||||||
for row in 0..frame.height(0) {
|
for row in 0..frame.height(0) {
|
||||||
frame
|
frame
|
||||||
.plane_row_mut::<f32>(0, row)
|
.plane_row_mut::<f32>(0, row)
|
||||||
.iter_mut()
|
.iter_mut()
|
||||||
.zip(src_frame.plane_row::<f32>(0, row))
|
.zip(src_frame.plane_row::<f32>(0, row))
|
||||||
.for_each(|(pixel, src_pixel)| unsafe {
|
.for_each(|(pixel, src_pixel)| unsafe {
|
||||||
ptr::write(pixel, lut[(src_pixel.min(1.0).max(0.0) * 255.99) as usize]);
|
ptr::write(pixel, get_mask_value(*src_pixel, luma_scaling));
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -153,15 +143,30 @@ impl<'core> Filter<'core> for Mask<'core> {
|
||||||
match depth {
|
match depth {
|
||||||
0..=8 => {
|
0..=8 => {
|
||||||
int_filter!(u8, filter_8bit);
|
int_filter!(u8, filter_8bit);
|
||||||
filter_8bit(&mut frame, src_frame, depth, average, self.luma_scaling)
|
filter_8bit(
|
||||||
|
&mut frame,
|
||||||
|
src_frame,
|
||||||
|
depth,
|
||||||
|
calc_luma_scaling(average, self.luma_scaling),
|
||||||
|
)
|
||||||
}
|
}
|
||||||
9..=16 => {
|
9..=16 => {
|
||||||
int_filter!(u16, filter_16bit);
|
int_filter!(u16, filter_16bit);
|
||||||
filter_16bit(&mut frame, src_frame, depth, average, self.luma_scaling)
|
filter_16bit(
|
||||||
|
&mut frame,
|
||||||
|
src_frame,
|
||||||
|
depth,
|
||||||
|
calc_luma_scaling(average, self.luma_scaling),
|
||||||
|
)
|
||||||
}
|
}
|
||||||
17..=32 => {
|
17..=32 => {
|
||||||
int_filter!(u32, filter_32bit);
|
int_filter!(u32, filter_32bit);
|
||||||
filter_32bit(&mut frame, src_frame, depth, average, self.luma_scaling)
|
filter_32bit(
|
||||||
|
&mut frame,
|
||||||
|
src_frame,
|
||||||
|
depth,
|
||||||
|
calc_luma_scaling(average, self.luma_scaling),
|
||||||
|
)
|
||||||
}
|
}
|
||||||
_ => bail!(format!(
|
_ => bail!(format!(
|
||||||
"{}: input depth {} not supported",
|
"{}: input depth {} not supported",
|
||||||
|
@ -170,13 +175,21 @@ impl<'core> Filter<'core> for Mask<'core> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
SampleType::Float => {
|
SampleType::Float => {
|
||||||
filter_for_float(&mut frame, src_frame, average, self.luma_scaling);
|
filter_for_float(
|
||||||
|
&mut frame,
|
||||||
|
src_frame,
|
||||||
|
calc_luma_scaling(average, self.luma_scaling),
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(frame.into())
|
Ok(frame.into())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn calc_luma_scaling(average: f32, luma_scaling: f32) -> f32 {
|
||||||
|
average * average * luma_scaling
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
@ -193,13 +206,13 @@ mod tests {
|
||||||
.iter()
|
.iter()
|
||||||
.zip(EXPECTED_MASK_02.iter())
|
.zip(EXPECTED_MASK_02.iter())
|
||||||
.for_each(|(&x, &exp)| {
|
.for_each(|(&x, &exp)| {
|
||||||
assert!((get_mask_value(x, 0.2, 10.0) - exp).abs() < 0.0001);
|
assert!((get_mask_value(x, calc_luma_scaling(0.2, 10.0)) - exp).abs() < 0.0001);
|
||||||
});
|
});
|
||||||
FLOAT_RANGE
|
FLOAT_RANGE
|
||||||
.iter()
|
.iter()
|
||||||
.zip(EXPECTED_MASK_08.iter())
|
.zip(EXPECTED_MASK_08.iter())
|
||||||
.for_each(|(&x, &exp)| {
|
.for_each(|(&x, &exp)| {
|
||||||
assert!((get_mask_value(x, 0.8, 10.0) - exp).abs() < 0.0001);
|
assert!((get_mask_value(x, calc_luma_scaling(0.8, 10.0)) - exp).abs() < 0.0001);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user