Skip to content

Commit

Permalink
simplify BoxBlurCT
Browse files Browse the repository at this point in the history
  • Loading branch information
dnjulek committed Jan 20, 2025
1 parent 7ab9a9b commit fcdf191
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 63 deletions.
30 changes: 1 addition & 29 deletions src/filters/boxblur_comptime.zig
Original file line number Diff line number Diff line change
Expand Up @@ -7,35 +7,7 @@ const math = std.math;

const allocator = std.heap.c_allocator;

pub fn hvBlur(comptime T: type, src: []const T, dst: []T, stride: u32, w: u32, h: u32, radius: u32) void {
switch (radius) {
1 => hvBlurCT(T, 1, src, dst, stride, w, h),
2 => hvBlurCT(T, 2, src, dst, stride, w, h),
3 => hvBlurCT(T, 3, src, dst, stride, w, h),
4 => hvBlurCT(T, 4, src, dst, stride, w, h),
5 => hvBlurCT(T, 5, src, dst, stride, w, h),
6 => hvBlurCT(T, 6, src, dst, stride, w, h),
7 => hvBlurCT(T, 7, src, dst, stride, w, h),
8 => hvBlurCT(T, 8, src, dst, stride, w, h),
9 => hvBlurCT(T, 9, src, dst, stride, w, h),
10 => hvBlurCT(T, 10, src, dst, stride, w, h),
11 => hvBlurCT(T, 11, src, dst, stride, w, h),
12 => hvBlurCT(T, 12, src, dst, stride, w, h),
13 => hvBlurCT(T, 13, src, dst, stride, w, h),
14 => hvBlurCT(T, 14, src, dst, stride, w, h),
15 => hvBlurCT(T, 15, src, dst, stride, w, h),
16 => hvBlurCT(T, 16, src, dst, stride, w, h),
17 => hvBlurCT(T, 17, src, dst, stride, w, h),
18 => hvBlurCT(T, 18, src, dst, stride, w, h),
19 => hvBlurCT(T, 19, src, dst, stride, w, h),
20 => hvBlurCT(T, 20, src, dst, stride, w, h),
21 => hvBlurCT(T, 21, src, dst, stride, w, h),
22 => hvBlurCT(T, 22, src, dst, stride, w, h),
else => unreachable,
}
}

fn hvBlurCT(comptime T: type, comptime radius: u32, src: []const T, dst: []T, stride: u32, w: u32, h: u32) void {
pub fn hvBlur(comptime T: type, comptime radius: u32, src: []const T, dst: []T, stride: u32, w: u32, h: u32) void {
const ksize: u32 = (radius << 1) + 1;
const iradius: i32 = @bitCast(radius);
const ih: i32 = @bitCast(h);
Expand Down
25 changes: 2 additions & 23 deletions src/filters/boxblur_runtime.zig
Original file line number Diff line number Diff line change
Expand Up @@ -8,27 +8,6 @@ const math = std.math;

const allocator = std.heap.c_allocator;

pub fn hvBlur(comptime T: type, src: zapi.ZFrameRO, dst: zapi.ZFrameRW, d: *Data) void {
const temp1 = allocator.alloc(T, d.tmp_size) catch unreachable;
const temp2 = allocator.alloc(T, d.tmp_size) catch unreachable;
defer allocator.free(temp1);
defer allocator.free(temp2);

var plane: u32 = 0;
while (plane < d.vi.format.numPlanes) : (plane += 1) {
if (!(d.planes[plane])) {
continue;
}

const srcp = src.getReadSlice2(T, plane);
const dstp = dst.getWriteSlice2(T, plane);
const w, const h, const stride = src.getDimensions2(T, plane);

hblur(T, srcp, dstp, stride, w, h, d.hradius, d.hpasses, temp1, temp2);
vblur(T, dstp, dstp, stride, w, h, d.vradius, d.vpasses, temp1, temp2);
}
}

inline fn blurInt(comptime T: type, srcp: []const T, src_step: u32, dstp: []T, dst_step: u32, len: u32, radius: u32) void {
const ksize: u32 = (radius << 1) + 1;
const inv: u64 = @divTrunc(((1 << 32) + @as(u64, radius)), ksize);
Expand Down Expand Up @@ -140,7 +119,7 @@ inline fn blur_passes(comptime T: type, srcp: []const T, dstp: []T, step: u32, l
}
}

fn hblur(comptime T: type, srcp: []const T, dstp: []T, stride: u32, w: u32, h: u32, radius: u32, passes: i32, temp1: []T, temp2: []T) void {
pub fn hblur(comptime T: type, srcp: []const T, dstp: []T, stride: u32, w: u32, h: u32, radius: u32, passes: i32, temp1: []T, temp2: []T) void {
if ((passes > 0) and (radius > 0)) {
var y: u32 = 0;
while (y < h) : (y += 1) {
Expand All @@ -166,7 +145,7 @@ fn hblur(comptime T: type, srcp: []const T, dstp: []T, stride: u32, w: u32, h: u
}
}

fn vblur(comptime T: type, srcp: []const T, dstp: []T, stride: u32, w: u32, h: u32, radius: u32, passes: i32, temp1: []T, temp2: []T) void {
pub fn vblur(comptime T: type, srcp: []const T, dstp: []T, stride: u32, w: u32, h: u32, radius: u32, passes: i32, temp1: []T, temp2: []T) void {
if ((passes > 0) and (radius > 0)) {
var x: u32 = 0;
while (x < w) : (x += 1) {
Expand Down
53 changes: 42 additions & 11 deletions src/vapoursynth/boxblur.zig
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ pub const Data = struct {
planes: [3]bool,
};

pub fn BoxBlurCT(comptime T: type) type {
pub fn BoxBlurCT(comptime T: type, radius: comptime_int) type {
return struct {
pub fn getFrame(n: c_int, activation_reason: vs.ActivationReason, instance_data: ?*anyopaque, frame_data: ?*?*anyopaque, frame_ctx: ?*vs.FrameContext, core: ?*vs.Core, vsapi: ?*const vs.API) callconv(.C) ?*const vs.Frame {
_ = frame_data;
Expand All @@ -45,7 +45,7 @@ pub fn BoxBlurCT(comptime T: type) type {
const srcp = src.getReadSlice2(T, plane);
const dstp = dst.getWriteSlice2(T, plane);
const w, const h, const stride = src.getDimensions2(T, plane);
boxblur_ct.hvBlur(T, srcp, dstp, stride, w, h, d.hradius);
boxblur_ct.hvBlur(T, radius, srcp, dstp, stride, w, h);
}

return dst.frame;
Expand All @@ -66,10 +66,28 @@ fn BoxBlurRT(comptime T: type) type {
vsapi.?.requestFrameFilter.?(n, d.node, frame_ctx);
} else if (activation_reason == .AllFramesReady) {
const src = zapi.ZFrame.init(d.node, n, frame_ctx, core, vsapi);
defer src.deinit();
const dst = src.newVideoFrame2(d.planes);
defer src.deinit();

const temp1 = allocator.alloc(T, d.tmp_size) catch unreachable;
const temp2 = allocator.alloc(T, d.tmp_size) catch unreachable;
defer allocator.free(temp1);
defer allocator.free(temp2);

var plane: u32 = 0;
while (plane < d.vi.format.numPlanes) : (plane += 1) {
if (!(d.planes[plane])) {
continue;
}

const srcp = src.getReadSlice2(T, plane);
const dstp = dst.getWriteSlice2(T, plane);
const w, const h, const stride = src.getDimensions2(T, plane);

boxblur_rt.hblur(T, srcp, dstp, stride, w, h, d.hradius, d.hpasses, temp1, temp2);
boxblur_rt.vblur(T, dstp, dstp, stride, w, h, d.vradius, d.vpasses, temp1, temp2);
}

boxblur_rt.hvBlur(T, src, dst, d);
return dst.frame;
}

Expand Down Expand Up @@ -126,12 +144,25 @@ pub export fn boxBlurCreate(in: ?*const vs.Map, out: ?*vs.Map, user_data: ?*anyo
};

const use_rt: bool = (d.hradius != d.vradius) or (d.hradius > 22) or (d.hpasses > 1) or (d.vpasses > 1);
const getFrame = switch (dt) {
.U8 => if (use_rt) &BoxBlurRT(u8).getFrame else &BoxBlurCT(u8).getFrame,
.U16 => if (use_rt) &BoxBlurRT(u16).getFrame else &BoxBlurCT(u16).getFrame,
.F16 => if (use_rt) &BoxBlurRT(f16).getFrame else &BoxBlurCT(f16).getFrame,
.F32 => if (use_rt) &BoxBlurRT(f32).getFrame else &BoxBlurCT(f32).getFrame,
};
var get_frame: vs.FilterGetFrame = undefined;
if (use_rt) {
get_frame = switch (dt) {
.U8 => &BoxBlurRT(u8).getFrame,
.U16 => &BoxBlurRT(u16).getFrame,
.F16 => &BoxBlurRT(f16).getFrame,
.F32 => &BoxBlurRT(f32).getFrame,
};
} else {
get_frame = switch (d.hradius) {
inline 1...22 => |r| switch (dt) {
.U8 => &BoxBlurCT(u8, r).getFrame,
.U16 => &BoxBlurCT(u16, r).getFrame,
.F16 => &BoxBlurCT(f16, r).getFrame,
.F32 => &BoxBlurCT(f32, r).getFrame,
},
else => unreachable,
};
}

vsapi.?.createVideoFilter.?(out, filter_name, d.vi, getFrame, boxBlurFree, .Parallel, &deps, deps.len, data, core);
vsapi.?.createVideoFilter.?(out, filter_name, d.vi, get_frame, boxBlurFree, .Parallel, &deps, deps.len, data, core);
}

0 comments on commit fcdf191

Please sign in to comment.