const std = @import("std"); const builtin = @import("builtin"); const mach = @import("main.zig"); const sysaudio = mach.sysaudio; const testing = mach.testing; pub const Opus = @import("mach-opus"); const Audio = @This(); pub const mach_module = .mach_audio; pub const mach_systems = .{ .init, .tick, .deinit }; /// The length of a @Vector(len, f32) used for SIMD mixing of audio buffers. Audio buffers must be /// aligned to simd_vector_length * @sizeOf(f32). pub const simd_vector_length = std.simd.suggestVectorLength(f32) orelse 1; pub const alignment = simd_vector_length * @sizeOf(f32); const log = std.log.scoped(mach_module); // The number of milliseconds worth of audio to render ahead of time. The lower this number is, the // less latency there is in playing new audio. The higher this number is, the less chance there is // of glitchy audio playback. // // By default, we use three times 1/60th of a second - i.e. 3 frames could drop before audio would // stop playing smoothly assuming a 60hz application render rate. ms_render_ahead: f32 = 16, buffers: mach.Objects( .{}, struct { /// The actual audio samples samples: []align(alignment) const f32, /// The number of channels in the samples buffer channels: u8, /// Volume multiplier volume: f32 = 1.0, /// Whether the buffer should be playing currently playing: bool = true, /// The currently playhead of the samples index: usize = 0, }, ), /// Whether to debug audio sync issues debug: bool = false, /// Callback which is ran when buffers change state from playing -> not playing on_state_change: ?mach.FunctionID = null, /// Audio player (has global volume controls) player: sysaudio.Player, // Internal fields allocator: std.mem.Allocator, ctx: sysaudio.Context, output: SampleBuffer, mixing_buffer: ?std.ArrayListAlignedUnmanaged(f32, alignment) = null, shutdown: std.atomic.Value(bool) = .init(false), mod: mach.Mod(Audio), driver_needs_num_samples: usize = 0, const SampleBuffer = std.fifo.LinearFifo(u8, .Dynamic); pub fn init(audio: *Audio, audio_mod: mach.Mod(Audio)) !void { // TODO(allocator): find a better way for modules to get allocators const allocator = std.heap.c_allocator; const ctx = try sysaudio.Context.init(null, allocator, .{}); try ctx.refresh(); // TODO(audio): let people handle these errors // TODO(audio): enable selecting non-default devices const device = ctx.defaultDevice(.playback) orelse return error.NoDeviceFound; var player = try ctx.createPlayer(device, writeFn, .{ .user_data = audio, .sample_rate = 48000 }); log.info("opened audio device: channels={} sample_rate={} format={s}", .{ player.channels().len, player.sampleRate(), @tagName(player.format()) }); const debug_str = std.process.getEnvVarOwned( allocator, "MACH_DEBUG_AUDIO", ) catch |err| switch (err) { error.EnvironmentVariableNotFound => null, else => return err, }; const debug = if (debug_str) |s| blk: { defer allocator.free(s); break :blk std.ascii.eqlIgnoreCase(s, "true"); } else false; audio.* = .{ .buffers = audio.buffers, .allocator = allocator, .ctx = ctx, .player = player, .output = SampleBuffer.init(allocator), .debug = debug, .mod = audio_mod, }; try player.start(); } pub fn deinit(audio: *Audio) void { audio.shutdown.store(true, .release); audio.player.deinit(); audio.ctx.deinit(); if (audio.mixing_buffer) |*b| b.deinit(audio.allocator); } /// Audio.tick is called on the high-priority OS audio thread when the audio driver is waiting for /// more audio samples because the audio.output buffer does not currently have enough to satisfy the /// driver. /// /// Its goal is to fill the audio.output buffer with enough samples to satisfy the immediate /// requirements of the audio driver (audio.driver_needs_num_samples), and prepare some amount of /// additional samples ahead of time to satisfy the driver in the future. pub fn tick(audio: *Audio, audio_mod: mach.Mod(Audio)) !void { // If the other thread called deinit(), return. if (audio.shutdown.load(.acquire)) { return; } const allocator = audio.allocator; const player = &audio.player; const player_channels: u8 = @intCast(player.channels().len); const driver_needs = audio.driver_needs_num_samples; // How many audio samples we will render ahead by const samples_per_ms = @as(f32, @floatFromInt(player.sampleRate())) / 1000.0; const render_ahead: u32 = @as(u32, @intFromFloat(@trunc(audio.ms_render_ahead * samples_per_ms))) * player_channels; // Our goal is to satisfy the driver's immediate needs, plus prepare render_head number of samples. const goal_pre_rendered = driver_needs + render_ahead; const already_prepared = audio.output.readableLength() / player.format().size(); const render_num_samples = if (already_prepared > goal_pre_rendered) 0 else goal_pre_rendered - already_prepared; if (render_num_samples < 0) @panic("invariant: Audio.tick ran when more audio samples are not needed"); // Ensure our f32 mixing buffer has enough space for the samples we will render right now. // This will allocate to grow but never shrink. var mixing_buffer = if (audio.mixing_buffer) |*b| b else blk: { const b = try std.ArrayListAlignedUnmanaged(f32, alignment).initCapacity(allocator, render_num_samples); audio.mixing_buffer = b; break :blk &audio.mixing_buffer.?; }; try mixing_buffer.resize(allocator, render_num_samples); // grows, but never shrinks // Zero the mixing buffer to silence: if no audio is mixed in below, then we want silence // not undefined memory noise. @memset(mixing_buffer.items, 0); var did_state_change = false; { audio.buffers.lock(); defer audio.buffers.unlock(); var buffers = audio.buffers.slice(); while (buffers.next()) |buf_id| { var buffer = audio.buffers.getValue(buf_id); if (!buffer.playing) continue; defer audio.buffers.setValue(buf_id, buffer); const new_index = mixSamples( mixing_buffer.items, player_channels, buffer.samples, buffer.index, buffer.channels, buffer.volume, ); if (new_index >= buffer.samples.len) { // No longer playing, we've read all samples did_state_change = true; buffer.playing = false; buffer.index = 0; } else buffer.index = new_index; } } if (did_state_change) if (audio.on_state_change) |f| audio_mod.run(f); // Write our rendered samples to the fifo, expanding its size as needed and converting our f32 // samples to the format the driver expects. const out_buffer_len = render_num_samples * player.format().size(); const out_buffer = try audio.output.writableWithSize(out_buffer_len); // TODO(audio): handle potential OOM here better std.debug.assert(mixing_buffer.items.len == render_num_samples); sysaudio.convertTo( f32, mixing_buffer.items[0..], player.format(), out_buffer[0..out_buffer_len], // writableWithSize may return a larger slice than needed ); audio.output.update(out_buffer_len); } /// Called by the system audio driver when the output buffer needs to be filled. Called on a /// dedicated OS thread for high-priority audio. Its goal is to fill the output buffer as quickly /// as possible and return, else audio skips will occur. fn writeFn(audio_opaque: ?*anyopaque, output: []u8) void { const audio: *Audio = @ptrCast(@alignCast(audio_opaque)); const format_size = audio.player.format().size(); // If the other thread called deinit(), write zeros to the buffer (no sound) and return. if (audio.shutdown.load(.acquire)) { @memset(output, 0); return; } // Do we have enough audio samples in our audio.output buffer to fill the output buffer? // // This is the most common case, because audio.output should have much more data prepared // ahead of time than what the audio driver needs. var read_slice = audio.output.readableSlice(0); if (read_slice.len >= output.len) { if (read_slice.len > output.len) read_slice = read_slice[0..output.len]; @memcpy(output[0..read_slice.len], read_slice); audio.output.discard(read_slice.len); return; } // At this point, we don't have enough audio data prepared in our audio.output buffer. so we // must prepare it now. while (true) { // Run the audio tick function, which should fill the audio.output buffer with more audio // samples. audio.driver_needs_num_samples = @divExact(output.len, format_size); audio.mod.call(.tick); // Check if we now have enough data in our audio.output buffer. If we do, then we're done. read_slice = audio.output.readableSlice(0); if (read_slice.len >= output.len) { if (read_slice.len > output.len) read_slice = read_slice[0..output.len]; @memcpy(output[0..read_slice.len], read_slice); audio.output.discard(read_slice.len); return; } // The audio tick didn't produce enough data, this might indicate some subtle mismatch in // the audio tick function not producing a multiple of the audio driver's actual buffer // size. if (audio.debug) log.debug("resync, found {} samples but need {} (nano timestamp {})", .{ @divExact(read_slice.len, format_size), @divExact(output.len, format_size), std.time.nanoTimestamp(), }); // If the other thread called deinit(), write zeros to the buffer (no sound) and return. if (audio.shutdown.load(.acquire)) { @memset(output, 0); return; } } } /// Mixes audio samples using SIMD. Returns the src_index progressed by the number of samples /// consumed. inline fn mixSamples( /// The destination where audio buffers should be mixed into. This buffer will be populated with /// as many samples from src as possible, until either dst is full or src has no more available. dst: []align(alignment) f32, /// The number of channels in the dst buffer. dst_channels: u8, /// The audio buffer whose samples src[src_index..] should be mixed into the dst src: []align(alignment) const f32, src_index: usize, /// The number of channels in the src buffer src_channels: u8, /// The volume/gain that should be applied to samples in src before mixing them into dst. src_volume: f32, ) usize { const dst_frames = dst.len / dst_channels; const src_frames = (src.len - src_index) / src_channels; const frames_to_process = @min(dst_frames, src_frames); const samples_to_process = frames_to_process * src_channels; if (samples_to_process == 0) return src_index; const Vec = @Vector(simd_vector_length, f32); const volume_vec: Vec = @splat(src_volume); var current_index = src_index; // Handle unaligned start if necessary, since src[src_index..] may not be SIMD aligned - so // we handle the starting portion with scalars instead. const src_ptr: [*]align(alignment) const f32 = @ptrCast(src.ptr); const misalignment = (@intFromPtr(src_ptr + current_index) % alignment) / @sizeOf(f32); if (misalignment != 0) { const scalar_count = alignment / @sizeOf(f32) - misalignment; const end_index = @min(current_index + scalar_count, src_index + samples_to_process); while (current_index < end_index) : (current_index += 1) { const src_sample = src[current_index] * src_volume; const frame_index = (current_index - src_index) / src_channels; const dst_index = frame_index * dst_channels; var channel: u8 = 0; while (channel < dst_channels) : (channel += 1) { const src_channel = if (channel < src_channels) channel else channel % src_channels; if (src_channel == (current_index - src_index) % src_channels) { dst[dst_index + channel] += src_sample; } } } } // SIMD processing for aligned portion const remaining_samples = samples_to_process - (current_index - src_index); const vec_samples = remaining_samples / simd_vector_length; const vec_count = vec_samples * simd_vector_length; var vec_index: usize = 0; while (vec_index < vec_count) : (vec_index += simd_vector_length) { const src_offset = current_index + vec_index; const src_vec: Vec = src[src_offset..][0..simd_vector_length].*; const scaled_vec = src_vec * volume_vec; const frame_index = (src_offset - src_index) / src_channels; var dst_base = frame_index * dst_channels; var i: usize = 0; while (i < simd_vector_length) : (i += 1) { const sample = scaled_vec[i]; const src_channel = (src_offset - src_index + i) % src_channels; var channel: u8 = 0; while (channel < dst_channels) : (channel += 1) { const dst_channel = if (channel < src_channels) channel else channel % src_channels; if (dst_channel == src_channel) dst[dst_base + channel] += sample; } if (src_channel == src_channels - 1) dst_base += dst_channels; } } current_index += vec_count; // Handle remaining samples, similar to how we may need to handle an unaligned start we also // need to handle an unaligned end - if dst wants more samples but not a full SIMD vector worth // at the end. while (current_index < src_index + samples_to_process) : (current_index += 1) { const src_sample = src[current_index] * src_volume; const frame_index = (current_index - src_index) / src_channels; const dst_index = frame_index * dst_channels; var channel: u8 = 0; while (channel < dst_channels) : (channel += 1) { const src_channel = if (channel < src_channels) channel else channel % src_channels; if (src_channel == (current_index - src_index) % src_channels) { dst[dst_index + channel] += src_sample; } } } return current_index; } test "mixSamples - basic mono to mono mixing" { var dst_buffer align(alignment) = [_]f32{0} ** 16; const src_buffer align(alignment) = [_]f32{ 1.0, 2.0, 3.0, 4.0 } ** 4; const new_index = mixSamples( &dst_buffer, 1, // dst_channels &src_buffer, 0, // src_index 1, // src_channels 0.5, // src_volume ); try testing.expect(usize, 16).eql(new_index); try testing.expect(f32, 0.5).eql(dst_buffer[0]); try testing.expect(f32, 1.0).eql(dst_buffer[1]); try testing.expect(f32, 1.5).eql(dst_buffer[2]); try testing.expect(f32, 2.0).eql(dst_buffer[3]); } test "mixSamples - stereo to stereo mixing" { var dst_buffer align(alignment) = [_]f32{0} ** 16; const src_buffer align(alignment) = [_]f32{ 1.0, -1.0, 2.0, -2.0, 3.0, -3.0, 4.0, -4.0 } ** 2; const new_index = mixSamples( &dst_buffer, 2, // dst_channels &src_buffer, 0, // src_index 2, // src_channels 1.0, // src_volume ); try testing.expect(usize, 16).eql(new_index); try testing.expect(f32, 1.0).eql(dst_buffer[0]); // Left try testing.expect(f32, -1.0).eql(dst_buffer[1]); // Right try testing.expect(f32, 2.0).eql(dst_buffer[2]); // Left try testing.expect(f32, -2.0).eql(dst_buffer[3]); // Right } test "mixSamples - mono to stereo mixing (channel duplication)" { var dst_buffer align(alignment) = [_]f32{0} ** 16; const src_buffer align(alignment) = [_]f32{ 1.0, 2.0, 3.0, 4.0 } ** 2; const new_index = mixSamples( &dst_buffer, 2, // dst_channels &src_buffer, 0, // src_index 1, // src_channels 1.0, // src_volume ); try testing.expect(usize, 8).eql(new_index); try testing.expect(f32, 1.0).eql(dst_buffer[0]); // Left try testing.expect(f32, 1.0).eql(dst_buffer[1]); // Right try testing.expect(f32, 2.0).eql(dst_buffer[2]); // Left try testing.expect(f32, 2.0).eql(dst_buffer[3]); // Right } test "mixSamples - partial buffer processing" { var dst_buffer align(alignment) = [_]f32{0} ** 8; const src_buffer align(alignment) = [_]f32{ 1.0, 2.0, 3.0, 4.0 } ** 4; const new_index = mixSamples( &dst_buffer, 1, // dst_channels &src_buffer, 4, // src_index 1, // src_channels 1.0, // src_volume ); try testing.expect(usize, 12).eql(new_index); try testing.expect(f32, 1.0).eql(dst_buffer[0]); try testing.expect(f32, 2.0).eql(dst_buffer[1]); try testing.expect(f32, 3.0).eql(dst_buffer[2]); } test "mixSamples - mixing with volume adjustment" { var dst_buffer align(alignment) = [_]f32{0} ** 8; const src_buffer align(alignment) = [_]f32{ 1.0, 2.0, 3.0, 4.0 } ** 2; const new_index = mixSamples( &dst_buffer, 1, // dst_channels &src_buffer, 0, // src_index 1, // src_channels 0.5, // src_volume ); try testing.expect(usize, 8).eql(new_index); try testing.expect(f32, 0.5).eql(dst_buffer[0]); try testing.expect(f32, 1.0).eql(dst_buffer[1]); try testing.expect(f32, 1.5).eql(dst_buffer[2]); } test "mixSamples - accumulation test" { var dst_buffer align(alignment) = [_]f32{1.0} ** 8; const src_buffer align(alignment) = [_]f32{ 1.0, 2.0, 3.0, 4.0 } ** 2; const new_index = mixSamples( &dst_buffer, 1, // dst_channels &src_buffer, 0, // src_index 1, // src_channels 1.0, // src_volume ); try testing.expect(usize, 8).eql(new_index); try testing.expect(f32, 2.0).eql(dst_buffer[0]); try testing.expect(f32, 3.0).eql(dst_buffer[1]); try testing.expect(f32, 4.0).eql(dst_buffer[2]); }