examples: add image-blur example

Signed-off-by: Lucas Romanó <9062026+lucasromanosantos@users.noreply.github.com>
This commit is contained in:
Lucas Romanó 2022-07-13 18:14:30 -03:00 committed by Stephen Gutekanst
parent 9ece370059
commit a787265af2
5 changed files with 413 additions and 0 deletions

View file

@ -6,3 +6,4 @@ The following examples have been ported from https://github.com/austinEng/webgpu
* ./instanced-cube
* ./textured-cube
* ./fractal-cube
* ./image-blur

View file

@ -0,0 +1,83 @@
struct Params {
filterDim : u32,
blockDim : u32,
}
@group(0) @binding(0) var samp : sampler;
@group(0) @binding(1) var<uniform> params : Params;
@group(1) @binding(1) var inputTex : texture_2d<f32>;
@group(1) @binding(2) var outputTex : texture_storage_2d<rgba8unorm, write>;
struct Flip {
value : u32,
}
@group(1) @binding(3) var<uniform> flip : Flip;
// This shader blurs the input texture in one direction, depending on whether
// |flip.value| is 0 or 1.
// It does so by running (128 / 4) threads per workgroup to load 128
// texels into 4 rows of shared memory. Each thread loads a
// 4 x 4 block of texels to take advantage of the texture sampling
// hardware.
// Then, each thread computes the blur result by averaging the adjacent texel values
// in shared memory.
// Because we're operating on a subset of the texture, we cannot compute all of the
// results since not all of the neighbors are available in shared memory.
// Specifically, with 128 x 128 tiles, we can only compute and write out
// square blocks of size 128 - (filterSize - 1). We compute the number of blocks
// needed in Javascript and dispatch that amount.
var<workgroup> tile : array<array<vec3<f32>, 128>, 4>;
@stage(compute) @workgroup_size(32, 1, 1)
fn main(
@builtin(workgroup_id) WorkGroupID : vec3<u32>,
@builtin(local_invocation_id) LocalInvocationID : vec3<u32>
) {
let filterOffset : u32 = (params.filterDim - 1u) / 2u;
let dims : vec2<i32> = textureDimensions(inputTex, 0);
let baseIndex = vec2<i32>(
WorkGroupID.xy * vec2<u32>(params.blockDim, 4u) +
LocalInvocationID.xy * vec2<u32>(4u, 1u)
) - vec2<i32>(i32(filterOffset), 0);
for (var r : u32 = 0u; r < 4u; r = r + 1u) {
for (var c : u32 = 0u; c < 4u; c = c + 1u) {
var loadIndex = baseIndex + vec2<i32>(i32(c), i32(r));
if (flip.value != 0u) {
loadIndex = loadIndex.yx;
}
tile[r][4u * LocalInvocationID.x + c] = textureSampleLevel(
inputTex,
samp,
(vec2<f32>(loadIndex) + vec2<f32>(0.25, 0.25)) / vec2<f32>(dims),
0.0
).rgb;
}
}
workgroupBarrier();
for (var r : u32 = 0u; r < 4u; r = r + 1u) {
for (var c : u32 = 0u; c < 4u; c = c + 1u) {
var writeIndex = baseIndex + vec2<i32>(i32(c), i32(r));
if (flip.value != 0u) {
writeIndex = writeIndex.yx;
}
let center : u32 = 4u * LocalInvocationID.x + c;
if (center >= filterOffset &&
center < 128u - filterOffset &&
all(writeIndex < dims)) {
var acc : vec3<f32> = vec3<f32>(0.0, 0.0, 0.0);
for (var f : u32 = 0u; f < params.filterDim; f = f + 1u) {
var i : u32 = center + f - filterOffset;
acc = acc + (1.0 / f32(params.filterDim)) * tile[r][i];
}
textureStore(outputTex, writeIndex, vec4<f32>(acc, 1.0));
}
}
}
}

View file

@ -0,0 +1,38 @@
@group(0) @binding(0) var mySampler : sampler;
@group(0) @binding(1) var myTexture : texture_2d<f32>;
struct VertexOutput {
@builtin(position) Position : vec4<f32>,
@location(0) fragUV : vec2<f32>,
}
@stage(vertex)
fn vert_main(@builtin(vertex_index) VertexIndex : u32) -> VertexOutput {
var pos = array<vec2<f32>, 6>(
vec2<f32>( 1.0, 1.0),
vec2<f32>( 1.0, -1.0),
vec2<f32>(-1.0, -1.0),
vec2<f32>( 1.0, 1.0),
vec2<f32>(-1.0, -1.0),
vec2<f32>(-1.0, 1.0)
);
var uv = array<vec2<f32>, 6>(
vec2<f32>(1.0, 0.0),
vec2<f32>(1.0, 1.0),
vec2<f32>(0.0, 1.0),
vec2<f32>(1.0, 0.0),
vec2<f32>(0.0, 1.0),
vec2<f32>(0.0, 0.0)
);
var output : VertexOutput;
output.Position = vec4<f32>(pos[VertexIndex], 0.0, 1.0);
output.fragUV = uv[VertexIndex];
return output;
}
@stage(fragment)
fn frag_main(@location(0) fragUV : vec2<f32>) -> @location(0) vec4<f32> {
return textureSample(myTexture, mySampler, fragUV);
}

View file

@ -0,0 +1,290 @@
const std = @import("std");
const mach = @import("mach");
const gpu = @import("gpu");
const zigimg = @import("zigimg");
queue: gpu.Queue,
blur_pipeline: gpu.ComputePipeline,
fullscreen_quad_pipeline: gpu.RenderPipeline,
cube_texture: gpu.Texture,
textures: [2]gpu.Texture,
blur_params_buffer: gpu.Buffer,
compute_constants: gpu.BindGroup,
compute_bind_group_0: gpu.BindGroup,
compute_bind_group_1: gpu.BindGroup,
compute_bind_group_2: gpu.BindGroup,
show_result_bind_group: gpu.BindGroup,
img_size: gpu.Extent3D,
pub const App = @This();
// Constants from the blur.wgsl shader
const tile_dimension: u32 = 128;
const batch: [2]u32 = .{ 4, 4 };
// Currently hardcoded
const filter_size: u32 = 15;
const iterations: u32 = 2;
var block_dimension: u32 = tile_dimension - (filter_size - 1);
pub fn init(app: *App, core: *mach.Core) !void {
const queue = core.device.getQueue();
try core.setOptions(.{
.size_min = .{ .width = 20, .height = 20 },
});
const blur_shader_module = core.device.createShaderModule(&.{
.label = "blur shader module",
.code = .{ .wgsl = @embedFile("blur.wgsl") },
});
const blur_pipeline_descriptor = gpu.ComputePipeline.Descriptor{
.compute = gpu.ProgrammableStageDescriptor{
.module = blur_shader_module,
.entry_point = "main",
},
};
const blur_pipeline = core.device.createComputePipeline(&blur_pipeline_descriptor);
const fullscreen_quad_vs_module = core.device.createShaderModule(&.{
.label = "fullscreen quad vertex shader",
.code = .{ .wgsl = @embedFile("fullscreen_textured_quad.wgsl") },
});
const fullscreen_quad_fs_module = core.device.createShaderModule(&.{
.label = "fullscreen quad fragment shader",
.code = .{ .wgsl = @embedFile("fullscreen_textured_quad.wgsl") },
});
const blend = gpu.BlendState{
.color = .{
.operation = .add,
.src_factor = .one,
.dst_factor = .zero,
},
.alpha = .{
.operation = .add,
.src_factor = .one,
.dst_factor = .zero,
},
};
const color_target = gpu.ColorTargetState{
.format = core.swap_chain_format,
.blend = &blend,
.write_mask = gpu.ColorWriteMask.all,
};
const fragment_state = gpu.FragmentState{
.module = fullscreen_quad_fs_module,
.entry_point = "frag_main",
.targets = &.{color_target},
.constants = null,
};
const fullscreen_quad_pipeline_descriptor = gpu.RenderPipeline.Descriptor{
.layout = null,
.fragment = &fragment_state,
.vertex = .{
.module = fullscreen_quad_vs_module,
.entry_point = "vert_main",
.buffers = null,
},
};
const fullscreen_quad_pipeline = core.device.createRenderPipeline(&fullscreen_quad_pipeline_descriptor);
const sampler = core.device.createSampler(&.{
.mag_filter = .linear,
.min_filter = .linear,
});
const img = try zigimg.Image.fromMemory(core.allocator, @embedFile("../assets/gotta-go-fast.png"));
defer img.deinit();
const img_size = gpu.Extent3D{ .width = @intCast(u32, img.width), .height = @intCast(u32, img.height) };
const cube_texture = core.device.createTexture(&.{
.size = img_size,
.format = .rgba8_unorm,
.usage = .{
.texture_binding = true,
.copy_dst = true,
.render_attachment = true,
},
});
const data_layout = gpu.Texture.DataLayout{
.bytes_per_row = @intCast(u32, img.width * 4),
.rows_per_image = @intCast(u32, img.height),
};
switch (img.pixels.?) {
.Rgba32 => |pixels| queue.writeTexture(&.{ .texture = cube_texture }, &data_layout, &img_size, zigimg.color.Rgba32, pixels),
.Rgb24 => |pixels| {
const data = try rgb24ToRgba32(core.allocator, pixels);
defer data.deinit(core.allocator);
queue.writeTexture(&.{ .texture = cube_texture }, &data_layout, &img_size, zigimg.color.Rgba32, data.Rgba32);
},
else => @panic("unsupported image color format"),
}
var textures: [2]gpu.Texture = undefined;
for (textures) |_, i| {
textures[i] = core.device.createTexture(&.{
.size = img_size,
.format = .rgba8_unorm,
.usage = .{
.storage_binding = true,
.texture_binding = true,
.copy_dst = true,
},
});
}
// the shader blurs the input texture in one direction,
// depending on whether flip value is 0 or 1
var flip: [2]gpu.Buffer = undefined;
for (flip) |_, i| {
const buffer = core.device.createBuffer(&.{
.usage = .{ .uniform = true },
.size = @sizeOf(u32),
.mapped_at_creation = true,
});
const buffer_mapped = buffer.getMappedRange(u32, 0, 1);
buffer_mapped[0] = @intCast(u32, i);
buffer.unmap();
flip[i] = buffer;
}
const blur_params_buffer = core.device.createBuffer(&.{
.size = 8,
.usage = .{ .copy_dst = true, .uniform = true },
});
const compute_constants = core.device.createBindGroup(&gpu.BindGroup.Descriptor{
.layout = blur_pipeline.getBindGroupLayout(0),
.entries = &[_]gpu.BindGroup.Entry{
gpu.BindGroup.Entry.sampler(0, sampler),
gpu.BindGroup.Entry.buffer(1, blur_params_buffer, 0, 8),
},
});
const compute_bind_group_0 = core.device.createBindGroup(&gpu.BindGroup.Descriptor{
.layout = blur_pipeline.getBindGroupLayout(1),
.entries = &[_]gpu.BindGroup.Entry{
gpu.BindGroup.Entry.textureView(1, cube_texture.createView(&gpu.TextureView.Descriptor{})),
gpu.BindGroup.Entry.textureView(2, textures[0].createView(&gpu.TextureView.Descriptor{})),
gpu.BindGroup.Entry.buffer(3, flip[0], 0, 4),
},
});
const compute_bind_group_1 = core.device.createBindGroup(&gpu.BindGroup.Descriptor{
.layout = blur_pipeline.getBindGroupLayout(1),
.entries = &[_]gpu.BindGroup.Entry{
gpu.BindGroup.Entry.textureView(1, textures[0].createView(&gpu.TextureView.Descriptor{})),
gpu.BindGroup.Entry.textureView(2, textures[1].createView(&gpu.TextureView.Descriptor{})),
gpu.BindGroup.Entry.buffer(3, flip[1], 0, 4),
},
});
const compute_bind_group_2 = core.device.createBindGroup(&gpu.BindGroup.Descriptor{
.layout = blur_pipeline.getBindGroupLayout(1),
.entries = &[_]gpu.BindGroup.Entry{
gpu.BindGroup.Entry.textureView(1, textures[1].createView(&gpu.TextureView.Descriptor{})),
gpu.BindGroup.Entry.textureView(2, textures[0].createView(&gpu.TextureView.Descriptor{})),
gpu.BindGroup.Entry.buffer(3, flip[0], 0, 4),
},
});
const show_result_bind_group = core.device.createBindGroup(&gpu.BindGroup.Descriptor{
.layout = fullscreen_quad_pipeline.getBindGroupLayout(0),
.entries = &[_]gpu.BindGroup.Entry{
gpu.BindGroup.Entry.sampler(0, sampler),
gpu.BindGroup.Entry.textureView(1, textures[1].createView(&gpu.TextureView.Descriptor{})),
},
});
const blur_params_buffer_data = [_]u32{ filter_size, block_dimension };
queue.writeBuffer(blur_params_buffer, 0, u32, &blur_params_buffer_data);
app.queue = queue;
app.blur_pipeline = blur_pipeline;
app.fullscreen_quad_pipeline = fullscreen_quad_pipeline;
app.cube_texture = cube_texture;
app.textures = textures;
app.blur_params_buffer = blur_params_buffer;
app.compute_constants = compute_constants;
app.compute_bind_group_0 = compute_bind_group_0;
app.compute_bind_group_1 = compute_bind_group_1;
app.compute_bind_group_2 = compute_bind_group_2;
app.show_result_bind_group = show_result_bind_group;
app.img_size = img_size;
}
pub fn deinit(_: *App, _: *mach.Core) void {}
pub fn update(app: *App, core: *mach.Core) !void {
const back_buffer_view = core.swap_chain.?.getCurrentTextureView();
const encoder = core.device.createCommandEncoder(null);
const compute_pass = encoder.beginComputePass(null);
compute_pass.setPipeline(app.blur_pipeline);
compute_pass.setBindGroup(0, app.compute_constants, &.{});
const width: u32 = @intCast(u32, app.img_size.width);
const height: u32 = @intCast(u32, app.img_size.height);
compute_pass.setBindGroup(1, app.compute_bind_group_0, &.{});
compute_pass.dispatch(try std.math.divCeil(u32, width, block_dimension), try std.math.divCeil(u32, height, batch[1]), 1);
compute_pass.setBindGroup(1, app.compute_bind_group_1, &.{});
compute_pass.dispatch(try std.math.divCeil(u32, height, block_dimension), try std.math.divCeil(u32, width, batch[1]), 1);
var i: u32 = 0;
while (i < iterations - 1) : (i += 1) {
compute_pass.setBindGroup(1, app.compute_bind_group_2, &.{});
compute_pass.dispatch(try std.math.divCeil(u32, width, block_dimension), try std.math.divCeil(u32, height, batch[1]), 1);
compute_pass.setBindGroup(1, app.compute_bind_group_1, &.{});
compute_pass.dispatch(try std.math.divCeil(u32, height, block_dimension), try std.math.divCeil(u32, width, batch[1]), 1);
}
compute_pass.end();
const color_attachment = gpu.RenderPassColorAttachment{
.view = back_buffer_view,
.resolve_target = null,
.clear_value = std.mem.zeroes(gpu.Color),
.load_op = .clear,
.store_op = .store,
};
const render_pass_descriptor = gpu.RenderPassEncoder.Descriptor{ .color_attachments = &[_]gpu.RenderPassColorAttachment{
color_attachment,
} };
const render_pass = encoder.beginRenderPass(&render_pass_descriptor);
render_pass.setPipeline(app.fullscreen_quad_pipeline);
render_pass.setBindGroup(0, app.show_result_bind_group, &.{});
render_pass.draw(6, 1, 0, 0);
render_pass.end();
var command = encoder.finish(null);
encoder.release();
app.queue.submit(&.{command});
command.release();
core.swap_chain.?.present();
back_buffer_view.release();
}
fn rgb24ToRgba32(allocator: std.mem.Allocator, in: []zigimg.color.Rgb24) !zigimg.color.ColorStorage {
const out = try zigimg.color.ColorStorage.init(allocator, .Rgba32, in.len);
var i: usize = 0;
while (i < in.len) : (i += 1) {
out.Rgba32[i] = zigimg.color.Rgba32{ .R = in[i].R, .G = in[i].G, .B = in[i].B, .A = 255 };
}
return out;
}