From a787265af20411f247798485f0560525f970bd4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lucas=20Roman=C3=B3?= <9062026+lucasromanosantos@users.noreply.github.com> Date: Wed, 13 Jul 2022 18:14:30 -0300 Subject: [PATCH] examples: add image-blur example MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Lucas RomanĂ³ <9062026+lucasromanosantos@users.noreply.github.com> --- build.zig | 1 + examples/LICENSE | 1 + examples/image-blur/blur.wgsl | 83 +++++ .../image-blur/fullscreen_textured_quad.wgsl | 38 +++ examples/image-blur/main.zig | 290 ++++++++++++++++++ 5 files changed, 413 insertions(+) create mode 100644 examples/image-blur/blur.wgsl create mode 100644 examples/image-blur/fullscreen_textured_quad.wgsl create mode 100644 examples/image-blur/main.zig diff --git a/build.zig b/build.zig index 382c9791..5da56433 100644 --- a/build.zig +++ b/build.zig @@ -45,6 +45,7 @@ pub fn build(b: *std.build.Builder) void { .{ .name = "fractal-cube", .packages = &[_]Pkg{Packages.zmath} }, .{ .name = "textured-cube", .packages = &[_]Pkg{ Packages.zmath, Packages.zigimg } }, .{ .name = "ecs-app", .packages = &[_]Pkg{} }, + .{ .name = "image-blur", .packages = &[_]Pkg{Packages.zigimg} }, // NOTE: examples with std_platform_only should be placed at last .{ .name = "gkurve", .packages = &[_]Pkg{ Packages.zmath, Packages.zigimg, freetype.pkg }, .std_platform_only = true }, }) |example| { diff --git a/examples/LICENSE b/examples/LICENSE index efb81fcf..c7e6fc5d 100644 --- a/examples/LICENSE +++ b/examples/LICENSE @@ -6,3 +6,4 @@ The following examples have been ported from https://github.com/austinEng/webgpu * ./instanced-cube * ./textured-cube * ./fractal-cube +* ./image-blur diff --git a/examples/image-blur/blur.wgsl b/examples/image-blur/blur.wgsl new file mode 100644 index 00000000..100ef9b4 --- /dev/null +++ b/examples/image-blur/blur.wgsl @@ -0,0 +1,83 @@ +struct Params { + filterDim : u32, + blockDim : u32, +} + +@group(0) @binding(0) var samp : sampler; +@group(0) @binding(1) var params : Params; +@group(1) @binding(1) var inputTex : texture_2d; +@group(1) @binding(2) var outputTex : texture_storage_2d; + +struct Flip { + value : u32, +} +@group(1) @binding(3) var flip : Flip; + +// This shader blurs the input texture in one direction, depending on whether +// |flip.value| is 0 or 1. +// It does so by running (128 / 4) threads per workgroup to load 128 +// texels into 4 rows of shared memory. Each thread loads a +// 4 x 4 block of texels to take advantage of the texture sampling +// hardware. +// Then, each thread computes the blur result by averaging the adjacent texel values +// in shared memory. +// Because we're operating on a subset of the texture, we cannot compute all of the +// results since not all of the neighbors are available in shared memory. +// Specifically, with 128 x 128 tiles, we can only compute and write out +// square blocks of size 128 - (filterSize - 1). We compute the number of blocks +// needed in Javascript and dispatch that amount. + +var tile : array, 128>, 4>; + +@stage(compute) @workgroup_size(32, 1, 1) +fn main( + @builtin(workgroup_id) WorkGroupID : vec3, + @builtin(local_invocation_id) LocalInvocationID : vec3 +) { + let filterOffset : u32 = (params.filterDim - 1u) / 2u; + let dims : vec2 = textureDimensions(inputTex, 0); + + let baseIndex = vec2( + WorkGroupID.xy * vec2(params.blockDim, 4u) + + LocalInvocationID.xy * vec2(4u, 1u) + ) - vec2(i32(filterOffset), 0); + + for (var r : u32 = 0u; r < 4u; r = r + 1u) { + for (var c : u32 = 0u; c < 4u; c = c + 1u) { + var loadIndex = baseIndex + vec2(i32(c), i32(r)); + if (flip.value != 0u) { + loadIndex = loadIndex.yx; + } + + tile[r][4u * LocalInvocationID.x + c] = textureSampleLevel( + inputTex, + samp, + (vec2(loadIndex) + vec2(0.25, 0.25)) / vec2(dims), + 0.0 + ).rgb; + } + } + + workgroupBarrier(); + + for (var r : u32 = 0u; r < 4u; r = r + 1u) { + for (var c : u32 = 0u; c < 4u; c = c + 1u) { + var writeIndex = baseIndex + vec2(i32(c), i32(r)); + if (flip.value != 0u) { + writeIndex = writeIndex.yx; + } + + let center : u32 = 4u * LocalInvocationID.x + c; + if (center >= filterOffset && + center < 128u - filterOffset && + all(writeIndex < dims)) { + var acc : vec3 = vec3(0.0, 0.0, 0.0); + for (var f : u32 = 0u; f < params.filterDim; f = f + 1u) { + var i : u32 = center + f - filterOffset; + acc = acc + (1.0 / f32(params.filterDim)) * tile[r][i]; + } + textureStore(outputTex, writeIndex, vec4(acc, 1.0)); + } + } + } +} diff --git a/examples/image-blur/fullscreen_textured_quad.wgsl b/examples/image-blur/fullscreen_textured_quad.wgsl new file mode 100644 index 00000000..ca35dc81 --- /dev/null +++ b/examples/image-blur/fullscreen_textured_quad.wgsl @@ -0,0 +1,38 @@ +@group(0) @binding(0) var mySampler : sampler; +@group(0) @binding(1) var myTexture : texture_2d; + +struct VertexOutput { + @builtin(position) Position : vec4, + @location(0) fragUV : vec2, +} + +@stage(vertex) +fn vert_main(@builtin(vertex_index) VertexIndex : u32) -> VertexOutput { + var pos = array, 6>( + vec2( 1.0, 1.0), + vec2( 1.0, -1.0), + vec2(-1.0, -1.0), + vec2( 1.0, 1.0), + vec2(-1.0, -1.0), + vec2(-1.0, 1.0) + ); + + var uv = array, 6>( + vec2(1.0, 0.0), + vec2(1.0, 1.0), + vec2(0.0, 1.0), + vec2(1.0, 0.0), + vec2(0.0, 1.0), + vec2(0.0, 0.0) + ); + + var output : VertexOutput; + output.Position = vec4(pos[VertexIndex], 0.0, 1.0); + output.fragUV = uv[VertexIndex]; + return output; +} + +@stage(fragment) +fn frag_main(@location(0) fragUV : vec2) -> @location(0) vec4 { + return textureSample(myTexture, mySampler, fragUV); +} diff --git a/examples/image-blur/main.zig b/examples/image-blur/main.zig new file mode 100644 index 00000000..ad1568fe --- /dev/null +++ b/examples/image-blur/main.zig @@ -0,0 +1,290 @@ +const std = @import("std"); +const mach = @import("mach"); +const gpu = @import("gpu"); +const zigimg = @import("zigimg"); + +queue: gpu.Queue, +blur_pipeline: gpu.ComputePipeline, +fullscreen_quad_pipeline: gpu.RenderPipeline, +cube_texture: gpu.Texture, +textures: [2]gpu.Texture, +blur_params_buffer: gpu.Buffer, +compute_constants: gpu.BindGroup, +compute_bind_group_0: gpu.BindGroup, +compute_bind_group_1: gpu.BindGroup, +compute_bind_group_2: gpu.BindGroup, +show_result_bind_group: gpu.BindGroup, +img_size: gpu.Extent3D, + +pub const App = @This(); + +// Constants from the blur.wgsl shader +const tile_dimension: u32 = 128; +const batch: [2]u32 = .{ 4, 4 }; + +// Currently hardcoded +const filter_size: u32 = 15; +const iterations: u32 = 2; +var block_dimension: u32 = tile_dimension - (filter_size - 1); + +pub fn init(app: *App, core: *mach.Core) !void { + const queue = core.device.getQueue(); + + try core.setOptions(.{ + .size_min = .{ .width = 20, .height = 20 }, + }); + + const blur_shader_module = core.device.createShaderModule(&.{ + .label = "blur shader module", + .code = .{ .wgsl = @embedFile("blur.wgsl") }, + }); + + const blur_pipeline_descriptor = gpu.ComputePipeline.Descriptor{ + .compute = gpu.ProgrammableStageDescriptor{ + .module = blur_shader_module, + .entry_point = "main", + }, + }; + + const blur_pipeline = core.device.createComputePipeline(&blur_pipeline_descriptor); + + const fullscreen_quad_vs_module = core.device.createShaderModule(&.{ + .label = "fullscreen quad vertex shader", + .code = .{ .wgsl = @embedFile("fullscreen_textured_quad.wgsl") }, + }); + + const fullscreen_quad_fs_module = core.device.createShaderModule(&.{ + .label = "fullscreen quad fragment shader", + .code = .{ .wgsl = @embedFile("fullscreen_textured_quad.wgsl") }, + }); + + const blend = gpu.BlendState{ + .color = .{ + .operation = .add, + .src_factor = .one, + .dst_factor = .zero, + }, + .alpha = .{ + .operation = .add, + .src_factor = .one, + .dst_factor = .zero, + }, + }; + + const color_target = gpu.ColorTargetState{ + .format = core.swap_chain_format, + .blend = &blend, + .write_mask = gpu.ColorWriteMask.all, + }; + + const fragment_state = gpu.FragmentState{ + .module = fullscreen_quad_fs_module, + .entry_point = "frag_main", + .targets = &.{color_target}, + .constants = null, + }; + + const fullscreen_quad_pipeline_descriptor = gpu.RenderPipeline.Descriptor{ + .layout = null, + .fragment = &fragment_state, + .vertex = .{ + .module = fullscreen_quad_vs_module, + .entry_point = "vert_main", + .buffers = null, + }, + }; + + const fullscreen_quad_pipeline = core.device.createRenderPipeline(&fullscreen_quad_pipeline_descriptor); + + const sampler = core.device.createSampler(&.{ + .mag_filter = .linear, + .min_filter = .linear, + }); + + const img = try zigimg.Image.fromMemory(core.allocator, @embedFile("../assets/gotta-go-fast.png")); + defer img.deinit(); + + const img_size = gpu.Extent3D{ .width = @intCast(u32, img.width), .height = @intCast(u32, img.height) }; + + const cube_texture = core.device.createTexture(&.{ + .size = img_size, + .format = .rgba8_unorm, + .usage = .{ + .texture_binding = true, + .copy_dst = true, + .render_attachment = true, + }, + }); + + const data_layout = gpu.Texture.DataLayout{ + .bytes_per_row = @intCast(u32, img.width * 4), + .rows_per_image = @intCast(u32, img.height), + }; + + switch (img.pixels.?) { + .Rgba32 => |pixels| queue.writeTexture(&.{ .texture = cube_texture }, &data_layout, &img_size, zigimg.color.Rgba32, pixels), + .Rgb24 => |pixels| { + const data = try rgb24ToRgba32(core.allocator, pixels); + defer data.deinit(core.allocator); + queue.writeTexture(&.{ .texture = cube_texture }, &data_layout, &img_size, zigimg.color.Rgba32, data.Rgba32); + }, + else => @panic("unsupported image color format"), + } + + var textures: [2]gpu.Texture = undefined; + for (textures) |_, i| { + textures[i] = core.device.createTexture(&.{ + .size = img_size, + .format = .rgba8_unorm, + .usage = .{ + .storage_binding = true, + .texture_binding = true, + .copy_dst = true, + }, + }); + } + + // the shader blurs the input texture in one direction, + // depending on whether flip value is 0 or 1 + var flip: [2]gpu.Buffer = undefined; + for (flip) |_, i| { + const buffer = core.device.createBuffer(&.{ + .usage = .{ .uniform = true }, + .size = @sizeOf(u32), + .mapped_at_creation = true, + }); + + const buffer_mapped = buffer.getMappedRange(u32, 0, 1); + buffer_mapped[0] = @intCast(u32, i); + buffer.unmap(); + + flip[i] = buffer; + } + + const blur_params_buffer = core.device.createBuffer(&.{ + .size = 8, + .usage = .{ .copy_dst = true, .uniform = true }, + }); + + const compute_constants = core.device.createBindGroup(&gpu.BindGroup.Descriptor{ + .layout = blur_pipeline.getBindGroupLayout(0), + .entries = &[_]gpu.BindGroup.Entry{ + gpu.BindGroup.Entry.sampler(0, sampler), + gpu.BindGroup.Entry.buffer(1, blur_params_buffer, 0, 8), + }, + }); + + const compute_bind_group_0 = core.device.createBindGroup(&gpu.BindGroup.Descriptor{ + .layout = blur_pipeline.getBindGroupLayout(1), + .entries = &[_]gpu.BindGroup.Entry{ + gpu.BindGroup.Entry.textureView(1, cube_texture.createView(&gpu.TextureView.Descriptor{})), + gpu.BindGroup.Entry.textureView(2, textures[0].createView(&gpu.TextureView.Descriptor{})), + gpu.BindGroup.Entry.buffer(3, flip[0], 0, 4), + }, + }); + + const compute_bind_group_1 = core.device.createBindGroup(&gpu.BindGroup.Descriptor{ + .layout = blur_pipeline.getBindGroupLayout(1), + .entries = &[_]gpu.BindGroup.Entry{ + gpu.BindGroup.Entry.textureView(1, textures[0].createView(&gpu.TextureView.Descriptor{})), + gpu.BindGroup.Entry.textureView(2, textures[1].createView(&gpu.TextureView.Descriptor{})), + gpu.BindGroup.Entry.buffer(3, flip[1], 0, 4), + }, + }); + + const compute_bind_group_2 = core.device.createBindGroup(&gpu.BindGroup.Descriptor{ + .layout = blur_pipeline.getBindGroupLayout(1), + .entries = &[_]gpu.BindGroup.Entry{ + gpu.BindGroup.Entry.textureView(1, textures[1].createView(&gpu.TextureView.Descriptor{})), + gpu.BindGroup.Entry.textureView(2, textures[0].createView(&gpu.TextureView.Descriptor{})), + gpu.BindGroup.Entry.buffer(3, flip[0], 0, 4), + }, + }); + + const show_result_bind_group = core.device.createBindGroup(&gpu.BindGroup.Descriptor{ + .layout = fullscreen_quad_pipeline.getBindGroupLayout(0), + .entries = &[_]gpu.BindGroup.Entry{ + gpu.BindGroup.Entry.sampler(0, sampler), + gpu.BindGroup.Entry.textureView(1, textures[1].createView(&gpu.TextureView.Descriptor{})), + }, + }); + + const blur_params_buffer_data = [_]u32{ filter_size, block_dimension }; + queue.writeBuffer(blur_params_buffer, 0, u32, &blur_params_buffer_data); + + app.queue = queue; + app.blur_pipeline = blur_pipeline; + app.fullscreen_quad_pipeline = fullscreen_quad_pipeline; + app.cube_texture = cube_texture; + app.textures = textures; + app.blur_params_buffer = blur_params_buffer; + app.compute_constants = compute_constants; + app.compute_bind_group_0 = compute_bind_group_0; + app.compute_bind_group_1 = compute_bind_group_1; + app.compute_bind_group_2 = compute_bind_group_2; + app.show_result_bind_group = show_result_bind_group; + app.img_size = img_size; +} + +pub fn deinit(_: *App, _: *mach.Core) void {} + +pub fn update(app: *App, core: *mach.Core) !void { + const back_buffer_view = core.swap_chain.?.getCurrentTextureView(); + const encoder = core.device.createCommandEncoder(null); + + const compute_pass = encoder.beginComputePass(null); + compute_pass.setPipeline(app.blur_pipeline); + compute_pass.setBindGroup(0, app.compute_constants, &.{}); + + const width: u32 = @intCast(u32, app.img_size.width); + const height: u32 = @intCast(u32, app.img_size.height); + compute_pass.setBindGroup(1, app.compute_bind_group_0, &.{}); + compute_pass.dispatch(try std.math.divCeil(u32, width, block_dimension), try std.math.divCeil(u32, height, batch[1]), 1); + + compute_pass.setBindGroup(1, app.compute_bind_group_1, &.{}); + compute_pass.dispatch(try std.math.divCeil(u32, height, block_dimension), try std.math.divCeil(u32, width, batch[1]), 1); + + var i: u32 = 0; + while (i < iterations - 1) : (i += 1) { + compute_pass.setBindGroup(1, app.compute_bind_group_2, &.{}); + compute_pass.dispatch(try std.math.divCeil(u32, width, block_dimension), try std.math.divCeil(u32, height, batch[1]), 1); + + compute_pass.setBindGroup(1, app.compute_bind_group_1, &.{}); + compute_pass.dispatch(try std.math.divCeil(u32, height, block_dimension), try std.math.divCeil(u32, width, batch[1]), 1); + } + compute_pass.end(); + + const color_attachment = gpu.RenderPassColorAttachment{ + .view = back_buffer_view, + .resolve_target = null, + .clear_value = std.mem.zeroes(gpu.Color), + .load_op = .clear, + .store_op = .store, + }; + + const render_pass_descriptor = gpu.RenderPassEncoder.Descriptor{ .color_attachments = &[_]gpu.RenderPassColorAttachment{ + color_attachment, + } }; + + const render_pass = encoder.beginRenderPass(&render_pass_descriptor); + render_pass.setPipeline(app.fullscreen_quad_pipeline); + render_pass.setBindGroup(0, app.show_result_bind_group, &.{}); + render_pass.draw(6, 1, 0, 0); + render_pass.end(); + + var command = encoder.finish(null); + encoder.release(); + app.queue.submit(&.{command}); + command.release(); + core.swap_chain.?.present(); + back_buffer_view.release(); +} + +fn rgb24ToRgba32(allocator: std.mem.Allocator, in: []zigimg.color.Rgb24) !zigimg.color.ColorStorage { + const out = try zigimg.color.ColorStorage.init(allocator, .Rgba32, in.len); + var i: usize = 0; + while (i < in.len) : (i += 1) { + out.Rgba32[i] = zigimg.color.Rgba32{ .R = in[i].R, .G = in[i].G, .B = in[i].B, .A = 255 }; + } + return out; +}