From a787265af20411f247798485f0560525f970bd4a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lucas=20Roman=C3=B3?=
 <9062026+lucasromanosantos@users.noreply.github.com>
Date: Wed, 13 Jul 2022 18:14:30 -0300
Subject: [PATCH] examples: add image-blur example
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Lucas Romanó <9062026+lucasromanosantos@users.noreply.github.com>
---
 build.zig                                     |   1 +
 examples/LICENSE                              |   1 +
 examples/image-blur/blur.wgsl                 |  83 +++++
 .../image-blur/fullscreen_textured_quad.wgsl  |  38 +++
 examples/image-blur/main.zig                  | 290 ++++++++++++++++++
 5 files changed, 413 insertions(+)
 create mode 100644 examples/image-blur/blur.wgsl
 create mode 100644 examples/image-blur/fullscreen_textured_quad.wgsl
 create mode 100644 examples/image-blur/main.zig
diff --git a/build.zig b/build.zig
index 382c9791..5da56433 100644
--- a/build.zig
+++ b/build.zig
@@ -45,6 +45,7 @@ pub fn build(b: *std.build.Builder) void {
         .{ .name = "fractal-cube", .packages = &[_]Pkg{Packages.zmath} },
         .{ .name = "textured-cube", .packages = &[_]Pkg{ Packages.zmath, Packages.zigimg } },
         .{ .name = "ecs-app", .packages = &[_]Pkg{} },
+        .{ .name = "image-blur", .packages = &[_]Pkg{Packages.zigimg} },
         // NOTE: examples with std_platform_only should be placed at last
         .{ .name = "gkurve", .packages = &[_]Pkg{ Packages.zmath, Packages.zigimg, freetype.pkg }, .std_platform_only = true },
     }) |example| {
diff --git a/examples/LICENSE b/examples/LICENSE
index efb81fcf..c7e6fc5d 100644
--- a/examples/LICENSE
+++ b/examples/LICENSE
@@ -6,3 +6,4 @@ The following examples have been ported from https://github.com/austinEng/webgpu
 * ./instanced-cube
 * ./textured-cube
 * ./fractal-cube
+* ./image-blur
diff --git a/examples/image-blur/blur.wgsl b/examples/image-blur/blur.wgsl
new file mode 100644
index 00000000..100ef9b4
--- /dev/null
+++ b/examples/image-blur/blur.wgsl
@@ -0,0 +1,83 @@
+struct Params {
+  filterDim : u32,
+  blockDim : u32,
+}
+
+@group(0) @binding(0) var samp : sampler;
+@group(0) @binding(1) var<uniform> params : Params;
+@group(1) @binding(1) var inputTex : texture_2d<f32>;
+@group(1) @binding(2) var outputTex : texture_storage_2d<rgba8unorm, write>;
+
+struct Flip {
+  value : u32,
+}
+@group(1) @binding(3) var<uniform> flip : Flip;
+
+// This shader blurs the input texture in one direction, depending on whether
+// |flip.value| is 0 or 1.
+// It does so by running (128 / 4) threads per workgroup to load 128
+// texels into 4 rows of shared memory. Each thread loads a
+// 4 x 4 block of texels to take advantage of the texture sampling
+// hardware.
+// Then, each thread computes the blur result by averaging the adjacent texel values
+// in shared memory.
+// Because we're operating on a subset of the texture, we cannot compute all of the
+// results since not all of the neighbors are available in shared memory.
+// Specifically, with 128 x 128 tiles, we can only compute and write out
+// square blocks of size 128 - (filterSize - 1). We compute the number of blocks
+// needed in Javascript and dispatch that amount.
+
+var<workgroup> tile : array<array<vec3<f32>, 128>, 4>;
+
+@stage(compute) @workgroup_size(32, 1, 1)
+fn main(
+  @builtin(workgroup_id) WorkGroupID : vec3<u32>,
+  @builtin(local_invocation_id) LocalInvocationID : vec3<u32>
+) {
+  let filterOffset : u32 = (params.filterDim - 1u) / 2u;
+  let dims : vec2<i32> = textureDimensions(inputTex, 0);
+
+  let baseIndex = vec2<i32>(
+    WorkGroupID.xy * vec2<u32>(params.blockDim, 4u) +
+    LocalInvocationID.xy * vec2<u32>(4u, 1u)
+  ) - vec2<i32>(i32(filterOffset), 0);
+
+  for (var r : u32 = 0u; r < 4u; r = r + 1u) {
+    for (var c : u32 = 0u; c < 4u; c = c + 1u) {
+      var loadIndex = baseIndex + vec2<i32>(i32(c), i32(r));
+      if (flip.value != 0u) {
+        loadIndex = loadIndex.yx;
+      }
+
+      tile[r][4u * LocalInvocationID.x + c] = textureSampleLevel(
+        inputTex,
+        samp,
+        (vec2<f32>(loadIndex) + vec2<f32>(0.25, 0.25)) / vec2<f32>(dims),
+        0.0
+      ).rgb;
+    }
+  }
+
+  workgroupBarrier();
+
+  for (var r : u32 = 0u; r < 4u; r = r + 1u) {
+    for (var c : u32 = 0u; c < 4u; c = c + 1u) {
+      var writeIndex = baseIndex + vec2<i32>(i32(c), i32(r));
+      if (flip.value != 0u) {
+        writeIndex = writeIndex.yx;
+      }
+
+      let center : u32 = 4u * LocalInvocationID.x + c;
+      if (center >= filterOffset &&
+          center < 128u - filterOffset &&
+          all(writeIndex < dims)) {
+        var acc : vec3<f32> = vec3<f32>(0.0, 0.0, 0.0);
+        for (var f : u32 = 0u; f < params.filterDim; f = f + 1u) {
+          var i : u32 = center + f - filterOffset;
+          acc = acc + (1.0 / f32(params.filterDim)) * tile[r][i];
+        }
+        textureStore(outputTex, writeIndex, vec4<f32>(acc, 1.0));
+      }
+    }
+  }
+}
diff --git a/examples/image-blur/fullscreen_textured_quad.wgsl b/examples/image-blur/fullscreen_textured_quad.wgsl
new file mode 100644
index 00000000..ca35dc81
--- /dev/null
+++ b/examples/image-blur/fullscreen_textured_quad.wgsl
@@ -0,0 +1,38 @@
+@group(0) @binding(0) var mySampler : sampler;
+@group(0) @binding(1) var myTexture : texture_2d<f32>;
+
+struct VertexOutput {
+  @builtin(position) Position : vec4<f32>,
+  @location(0) fragUV : vec2<f32>,
+}
+
+@stage(vertex)
+fn vert_main(@builtin(vertex_index) VertexIndex : u32) -> VertexOutput {
+  var pos = array<vec2<f32>, 6>(
+    vec2<f32>( 1.0,  1.0),
+    vec2<f32>( 1.0, -1.0),
+    vec2<f32>(-1.0, -1.0),
+    vec2<f32>( 1.0,  1.0),
+    vec2<f32>(-1.0, -1.0),
+    vec2<f32>(-1.0,  1.0)
+  );
+
+  var uv = array<vec2<f32>, 6>(
+    vec2<f32>(1.0, 0.0),
+    vec2<f32>(1.0, 1.0),
+    vec2<f32>(0.0, 1.0),
+    vec2<f32>(1.0, 0.0),
+    vec2<f32>(0.0, 1.0),
+    vec2<f32>(0.0, 0.0)
+  );
+
+  var output : VertexOutput;
+  output.Position = vec4<f32>(pos[VertexIndex], 0.0, 1.0);
+  output.fragUV = uv[VertexIndex];
+  return output;
+}
+
+@stage(fragment)
+fn frag_main(@location(0) fragUV : vec2<f32>) -> @location(0) vec4<f32> {
+  return textureSample(myTexture, mySampler, fragUV);
+}
diff --git a/examples/image-blur/main.zig b/examples/image-blur/main.zig
new file mode 100644
index 00000000..ad1568fe
--- /dev/null
+++ b/examples/image-blur/main.zig
@@ -0,0 +1,290 @@
+const std = @import("std");
+const mach = @import("mach");
+const gpu = @import("gpu");
+const zigimg = @import("zigimg");
+
+queue: gpu.Queue,
+blur_pipeline: gpu.ComputePipeline,
+fullscreen_quad_pipeline: gpu.RenderPipeline,
+cube_texture: gpu.Texture,
+textures: [2]gpu.Texture,
+blur_params_buffer: gpu.Buffer,
+compute_constants: gpu.BindGroup,
+compute_bind_group_0: gpu.BindGroup,
+compute_bind_group_1: gpu.BindGroup,
+compute_bind_group_2: gpu.BindGroup,
+show_result_bind_group: gpu.BindGroup,
+img_size: gpu.Extent3D,
+
+pub const App = @This();
+
+// Constants from the blur.wgsl shader
+const tile_dimension: u32 = 128;
+const batch: [2]u32 = .{ 4, 4 };
+
+// Currently hardcoded
+const filter_size: u32 = 15;
+const iterations: u32 = 2;
+var block_dimension: u32 = tile_dimension - (filter_size - 1);
+
+pub fn init(app: *App, core: *mach.Core) !void {
+    const queue = core.device.getQueue();
+
+    try core.setOptions(.{
+        .size_min = .{ .width = 20, .height = 20 },
+    });
+
+    const blur_shader_module = core.device.createShaderModule(&.{
+        .label = "blur shader module",
+        .code = .{ .wgsl = @embedFile("blur.wgsl") },
+    });
+
+    const blur_pipeline_descriptor = gpu.ComputePipeline.Descriptor{
+        .compute = gpu.ProgrammableStageDescriptor{
+            .module = blur_shader_module,
+            .entry_point = "main",
+        },
+    };
+
+    const blur_pipeline = core.device.createComputePipeline(&blur_pipeline_descriptor);
+
+    const fullscreen_quad_vs_module = core.device.createShaderModule(&.{
+        .label = "fullscreen quad vertex shader",
+        .code = .{ .wgsl = @embedFile("fullscreen_textured_quad.wgsl") },
+    });
+
+    const fullscreen_quad_fs_module = core.device.createShaderModule(&.{
+        .label = "fullscreen quad fragment shader",
+        .code = .{ .wgsl = @embedFile("fullscreen_textured_quad.wgsl") },
+    });
+
+    const blend = gpu.BlendState{
+        .color = .{
+            .operation = .add,
+            .src_factor = .one,
+            .dst_factor = .zero,
+        },
+        .alpha = .{
+            .operation = .add,
+            .src_factor = .one,
+            .dst_factor = .zero,
+        },
+    };
+
+    const color_target = gpu.ColorTargetState{
+        .format = core.swap_chain_format,
+        .blend = &blend,
+        .write_mask = gpu.ColorWriteMask.all,
+    };
+
+    const fragment_state = gpu.FragmentState{
+        .module = fullscreen_quad_fs_module,
+        .entry_point = "frag_main",
+        .targets = &.{color_target},
+        .constants = null,
+    };
+
+    const fullscreen_quad_pipeline_descriptor = gpu.RenderPipeline.Descriptor{
+        .layout = null,
+        .fragment = &fragment_state,
+        .vertex = .{
+            .module = fullscreen_quad_vs_module,
+            .entry_point = "vert_main",
+            .buffers = null,
+        },
+    };
+
+    const fullscreen_quad_pipeline = core.device.createRenderPipeline(&fullscreen_quad_pipeline_descriptor);
+
+    const sampler = core.device.createSampler(&.{
+        .mag_filter = .linear,
+        .min_filter = .linear,
+    });
+
+    const img = try zigimg.Image.fromMemory(core.allocator, @embedFile("../assets/gotta-go-fast.png"));
+    defer img.deinit();
+
+    const img_size = gpu.Extent3D{ .width = @intCast(u32, img.width), .height = @intCast(u32, img.height) };
+
+    const cube_texture = core.device.createTexture(&.{
+        .size = img_size,
+        .format = .rgba8_unorm,
+        .usage = .{
+            .texture_binding = true,
+            .copy_dst = true,
+            .render_attachment = true,
+        },
+    });
+
+    const data_layout = gpu.Texture.DataLayout{
+        .bytes_per_row = @intCast(u32, img.width * 4),
+        .rows_per_image = @intCast(u32, img.height),
+    };
+
+    switch (img.pixels.?) {
+        .Rgba32 => |pixels| queue.writeTexture(&.{ .texture = cube_texture }, &data_layout, &img_size, zigimg.color.Rgba32, pixels),
+        .Rgb24 => |pixels| {
+            const data = try rgb24ToRgba32(core.allocator, pixels);
+            defer data.deinit(core.allocator);
+            queue.writeTexture(&.{ .texture = cube_texture }, &data_layout, &img_size, zigimg.color.Rgba32, data.Rgba32);
+        },
+        else => @panic("unsupported image color format"),
+    }
+
+    var textures: [2]gpu.Texture = undefined;
+    for (textures) |_, i| {
+        textures[i] = core.device.createTexture(&.{
+            .size = img_size,
+            .format = .rgba8_unorm,
+            .usage = .{
+                .storage_binding = true,
+                .texture_binding = true,
+                .copy_dst = true,
+            },
+        });
+    }
+
+    // the shader blurs the input texture in one direction,
+    // depending on whether flip value is 0 or 1
+    var flip: [2]gpu.Buffer = undefined;
+    for (flip) |_, i| {
+        const buffer = core.device.createBuffer(&.{
+            .usage = .{ .uniform = true },
+            .size = @sizeOf(u32),
+            .mapped_at_creation = true,
+        });
+
+        const buffer_mapped = buffer.getMappedRange(u32, 0, 1);
+        buffer_mapped[0] = @intCast(u32, i);
+        buffer.unmap();
+
+        flip[i] = buffer;
+    }
+
+    const blur_params_buffer = core.device.createBuffer(&.{
+        .size = 8,
+        .usage = .{ .copy_dst = true, .uniform = true },
+    });
+
+    const compute_constants = core.device.createBindGroup(&gpu.BindGroup.Descriptor{
+        .layout = blur_pipeline.getBindGroupLayout(0),
+        .entries = &[_]gpu.BindGroup.Entry{
+            gpu.BindGroup.Entry.sampler(0, sampler),
+            gpu.BindGroup.Entry.buffer(1, blur_params_buffer, 0, 8),
+        },
+    });
+
+    const compute_bind_group_0 = core.device.createBindGroup(&gpu.BindGroup.Descriptor{
+        .layout = blur_pipeline.getBindGroupLayout(1),
+        .entries = &[_]gpu.BindGroup.Entry{
+            gpu.BindGroup.Entry.textureView(1, cube_texture.createView(&gpu.TextureView.Descriptor{})),
+            gpu.BindGroup.Entry.textureView(2, textures[0].createView(&gpu.TextureView.Descriptor{})),
+            gpu.BindGroup.Entry.buffer(3, flip[0], 0, 4),
+        },
+    });
+
+    const compute_bind_group_1 = core.device.createBindGroup(&gpu.BindGroup.Descriptor{
+        .layout = blur_pipeline.getBindGroupLayout(1),
+        .entries = &[_]gpu.BindGroup.Entry{
+            gpu.BindGroup.Entry.textureView(1, textures[0].createView(&gpu.TextureView.Descriptor{})),
+            gpu.BindGroup.Entry.textureView(2, textures[1].createView(&gpu.TextureView.Descriptor{})),
+            gpu.BindGroup.Entry.buffer(3, flip[1], 0, 4),
+        },
+    });
+
+    const compute_bind_group_2 = core.device.createBindGroup(&gpu.BindGroup.Descriptor{
+        .layout = blur_pipeline.getBindGroupLayout(1),
+        .entries = &[_]gpu.BindGroup.Entry{
+            gpu.BindGroup.Entry.textureView(1, textures[1].createView(&gpu.TextureView.Descriptor{})),
+            gpu.BindGroup.Entry.textureView(2, textures[0].createView(&gpu.TextureView.Descriptor{})),
+            gpu.BindGroup.Entry.buffer(3, flip[0], 0, 4),
+        },
+    });
+
+    const show_result_bind_group = core.device.createBindGroup(&gpu.BindGroup.Descriptor{
+        .layout = fullscreen_quad_pipeline.getBindGroupLayout(0),
+        .entries = &[_]gpu.BindGroup.Entry{
+            gpu.BindGroup.Entry.sampler(0, sampler),
+            gpu.BindGroup.Entry.textureView(1, textures[1].createView(&gpu.TextureView.Descriptor{})),
+        },
+    });
+
+    const blur_params_buffer_data = [_]u32{ filter_size, block_dimension };
+    queue.writeBuffer(blur_params_buffer, 0, u32, &blur_params_buffer_data);
+
+    app.queue = queue;
+    app.blur_pipeline = blur_pipeline;
+    app.fullscreen_quad_pipeline = fullscreen_quad_pipeline;
+    app.cube_texture = cube_texture;
+    app.textures = textures;
+    app.blur_params_buffer = blur_params_buffer;
+    app.compute_constants = compute_constants;
+    app.compute_bind_group_0 = compute_bind_group_0;
+    app.compute_bind_group_1 = compute_bind_group_1;
+    app.compute_bind_group_2 = compute_bind_group_2;
+    app.show_result_bind_group = show_result_bind_group;
+    app.img_size = img_size;
+}
+
+pub fn deinit(_: *App, _: *mach.Core) void {}
+
+pub fn update(app: *App, core: *mach.Core) !void {
+    const back_buffer_view = core.swap_chain.?.getCurrentTextureView();
+    const encoder = core.device.createCommandEncoder(null);
+
+    const compute_pass = encoder.beginComputePass(null);
+    compute_pass.setPipeline(app.blur_pipeline);
+    compute_pass.setBindGroup(0, app.compute_constants, &.{});
+
+    const width: u32 = @intCast(u32, app.img_size.width);
+    const height: u32 = @intCast(u32, app.img_size.height);
+    compute_pass.setBindGroup(1, app.compute_bind_group_0, &.{});
+    compute_pass.dispatch(try std.math.divCeil(u32, width, block_dimension), try std.math.divCeil(u32, height, batch[1]), 1);
+
+    compute_pass.setBindGroup(1, app.compute_bind_group_1, &.{});
+    compute_pass.dispatch(try std.math.divCeil(u32, height, block_dimension), try std.math.divCeil(u32, width, batch[1]), 1);
+
+    var i: u32 = 0;
+    while (i < iterations - 1) : (i += 1) {
+        compute_pass.setBindGroup(1, app.compute_bind_group_2, &.{});
+        compute_pass.dispatch(try std.math.divCeil(u32, width, block_dimension), try std.math.divCeil(u32, height, batch[1]), 1);
+
+        compute_pass.setBindGroup(1, app.compute_bind_group_1, &.{});
+        compute_pass.dispatch(try std.math.divCeil(u32, height, block_dimension), try std.math.divCeil(u32, width, batch[1]), 1);
+    }
+    compute_pass.end();
+
+    const color_attachment = gpu.RenderPassColorAttachment{
+        .view = back_buffer_view,
+        .resolve_target = null,
+        .clear_value = std.mem.zeroes(gpu.Color),
+        .load_op = .clear,
+        .store_op = .store,
+    };
+
+    const render_pass_descriptor = gpu.RenderPassEncoder.Descriptor{ .color_attachments = &[_]gpu.RenderPassColorAttachment{
+        color_attachment,
+    } };
+
+    const render_pass = encoder.beginRenderPass(&render_pass_descriptor);
+    render_pass.setPipeline(app.fullscreen_quad_pipeline);
+    render_pass.setBindGroup(0, app.show_result_bind_group, &.{});
+    render_pass.draw(6, 1, 0, 0);
+    render_pass.end();
+
+    var command = encoder.finish(null);
+    encoder.release();
+    app.queue.submit(&.{command});
+    command.release();
+    core.swap_chain.?.present();
+    back_buffer_view.release();
+}
+
+fn rgb24ToRgba32(allocator: std.mem.Allocator, in: []zigimg.color.Rgb24) !zigimg.color.ColorStorage {
+    const out = try zigimg.color.ColorStorage.init(allocator, .Rgba32, in.len);
+    var i: usize = 0;
+    while (i < in.len) : (i += 1) {
+        out.Rgba32[i] = zigimg.color.Rgba32{ .R = in[i].R, .G = in[i].G, .B = in[i].B, .A = 255 };
+    }
+    return out;
+}