feat: formatting strings at runtime

This commit is contained in:
Brett Broadhurst 2026-04-05 14:45:10 -06:00
parent 6924e929d8
commit d2cd7fa888
Failed to generate hash of commit
10 changed files with 211 additions and 34 deletions

View file

@ -29,7 +29,6 @@ pub const Node = struct {
number_literal,
string_literal,
string_expr,
empty_string,
identifier,
add_expr,
subtract_expr,

View file

@ -61,7 +61,6 @@ fn nodeTagToString(tag: Ast.Node.Tag) []const u8 {
.true_literal => "TrueLiteral",
.number_literal => "NumberLiteral",
.string_literal => "StringLiteral",
.empty_string => "EmptyString",
.identifier => "Identifier",
.add_expr => "AddExpr",
.subtract_expr => "SubtractExpr",
@ -348,7 +347,6 @@ fn renderAstWalk(
.true_literal,
.number_literal,
.string_literal,
.empty_string,
.identifier,
.parameter_decl,
.ref_parameter_decl,
@ -359,6 +357,7 @@ fn renderAstWalk(
.block_stmt,
.choice_stmt,
.content_stmt,
.string_expr,
=> {
const data = node.data.list;
for (data.items) |child_node| try children.append(gpa, child_node);
@ -416,7 +415,6 @@ fn renderAstWalk(
.knot_prototype,
.function_decl,
.stitch_decl,
.string_expr,
.divert_expr,
.selector_expr,
.call_expr,

View file

@ -739,9 +739,29 @@ fn stringLiteral(gi: *GenIr, node: *const Ast.Node) InnerError!Ir.Inst.Ref {
return gi.addStr(str.index, str.len);
}
fn stringExpr(gen: *GenIr, expr_node: *const Ast.Node) InnerError!Ir.Inst.Ref {
const first_node = expr_node.data.bin.lhs.?;
return stringLiteral(gen, first_node);
fn stringExpr(gi: *GenIr, scope: *Scope, node: *const Ast.Node) InnerError!Ir.Inst.Ref {
const data = node.data.list;
const gpa = gi.astgen.gpa;
const scratch_top = gi.astgen.scratch.items.len;
for (data.items) |sub_node| {
const result = switch (sub_node.tag) {
.inline_logic_expr => try inlineLogicExpr(gi, scope, sub_node),
.string_literal => try stringLiteral(gi, sub_node),
inline else => |_| unreachable,
};
try gi.astgen.scratch.append(gpa, @intFromEnum(result));
}
const multi_op_len = gi.astgen.scratch.items.len - scratch_top;
const extra_len = @typeInfo(Ir.Inst.MultiOp).@"struct".fields.len + multi_op_len;
try gi.astgen.extra.ensureUnusedCapacity(gpa, extra_len);
const extra_index = gi.astgen.addExtraAssumeCapacity(Ir.Inst.MultiOp{
.operands_len = @intCast(multi_op_len),
});
gi.astgen.appendBlockBody(@ptrCast(gi.astgen.scratch.items[scratch_top..]));
return gi.addPayloadNodeWithIndex(.str_format, node, extra_index);
}
fn identifier(
@ -764,8 +784,7 @@ fn expr(gi: *GenIr, scope: *Scope, optional_node: ?*const Ast.Node) InnerError!I
.false_literal => return .bool_false,
.number_literal => return numberLiteral(gi, node),
.string_literal => return stringLiteral(gi, node),
.string_expr => return stringExpr(gi, node),
.empty_string => return stringLiteral(gi, node),
.string_expr => return stringExpr(gi, scope, node),
.identifier => return identifier(gi, scope, node),
.add_expr => return binaryOp(gi, scope, node, .add),
.subtract_expr => return binaryOp(gi, scope, node, .sub),

View file

@ -55,6 +55,10 @@ pub fn nullTerminatedString(ir: Ir, index: NullTerminatedString) [:0]const u8 {
return slice[0..std.mem.indexOfScalar(u8, slice, 0).? :0];
}
pub fn refSlice(code: Ir, start: usize, len: usize) []Inst.Ref {
return @ptrCast(code.extra[start..][0..len]);
}
pub fn bodySlice(ir: Ir, start: usize, len: usize) []Inst.Index {
return @ptrCast(ir.extra[start..][0..len]);
}
@ -195,6 +199,8 @@ pub const Inst = struct {
float,
/// Uses the `str` union field.
str,
/// Uses the `payload` union field. Payload is `MultiOp`.
str_format,
/// Short-circuiting boolean `and`. `lhs` is a boolean `Ref` and the other operand
/// is a block, which is evaluated if `lhs` is `true`.
/// Uses the `payload` union field. Payload is `BoolBr`.
@ -291,6 +297,10 @@ pub const Inst = struct {
field_name_start: NullTerminatedString,
};
pub const MultiOp = struct {
operands_len: u32,
};
pub const Block = struct {
body_len: u32,
};
@ -383,6 +393,7 @@ pub const Inst = struct {
.int,
.float,
.str,
.str_format,
.block,
.condbr,
.switch_br,

View file

@ -679,22 +679,53 @@ fn parseExpression(p: *Parse) Error!?*Ast.Node {
fn parseStringExpr(p: *Parse) Error!*Ast.Node {
assert(p.token.tag == .double_quote);
const scratch_top = p.scratch.items.len;
const main_token = p.nextToken();
var fragment_start = main_token.loc.end;
while (true) switch (p.token.tag) {
.double_quote, .newline, .eof => break,
else => _ = p.nextToken(),
loop: while (true) switch (p.token.tag) {
.double_quote, .newline, .eof => {
if (fragment_start < p.token.loc.start) {
const node = try Ast.Node.createLeaf(p.arena, .string_literal, .{
.start = fragment_start,
.end = p.token.loc.start,
});
try p.scratch.append(p.gpa, node);
}
break :loop;
},
.left_brace => {
if (fragment_start < p.token.loc.start) {
const node = try Ast.Node.createLeaf(p.arena, .string_literal, .{
.start = fragment_start,
.end = p.token.loc.start,
});
try p.scratch.append(p.gpa, node);
}
const lbrace_token = p.nextToken();
const expr = try p.expectExpr();
const rbrace_token = try p.expectToken(.right_brace, false);
const node = try Ast.Node.createBinary(p.arena, .inline_logic_expr, .{
.start = lbrace_token.loc.start,
.end = rbrace_token.loc.end,
}, expr, null);
try p.scratch.append(p.gpa, node);
fragment_start = rbrace_token.loc.end;
},
else => {
_ = p.nextToken();
},
};
const last_token = try p.expectToken(.double_quote, true);
const expr = try Ast.Node.createLeaf(p.arena, .string_literal, .{
.start = main_token.loc.end,
.end = last_token.loc.start,
});
return .createBinary(p.arena, .string_expr, .{
const list = try p.makeNodeSliceFromScratch(scratch_top);
return .createList(p.arena, .string_expr, .{
.start = main_token.loc.start,
.end = p.token.loc.start,
}, expr, null);
.end = last_token.loc.end,
}, list);
}
fn parseExprStmt(p: *Parse, lhs: ?*Ast.Node) Error!*Ast.Node {

View file

@ -425,6 +425,30 @@ fn irStr(sema: *Sema, inst: Ir.Inst.Index) InnerError!ValueInfo {
return .{ .value = ip_index };
}
fn irStrFormat(
sema: *Sema,
builder: *Builder,
_: *Block,
inst: Ir.Inst.Index,
) InnerError!ValueInfo {
const data = sema.ir.instructions[@intFromEnum(inst)].data.payload;
const extra = sema.ir.extraData(Ir.Inst.MultiOp, data.extra_index);
const args_slice = sema.ir.refSlice(extra.end, extra.data.operands_len);
try builder.addByteOp(.string_builder);
var index: usize = 0;
while (index < args_slice.len) : (index += 1) {
const arg = args_slice[index];
const arg_inst = sema.resolveInst(arg);
try builder.ensureLoad(arg_inst);
try builder.addByteOp(.string_append);
}
try builder.addByteOp(.string_freeze);
return .stack;
}
fn irUnaryOp(
sema: *Sema,
builder: *Builder,
@ -1081,6 +1105,7 @@ fn analyzeBodyInner(
.int => try irInt(sema, inst),
.float => try irFloat(sema, inst),
.str => try irStr(sema, inst),
.str_format => try irStrFormat(sema, builder, block, inst),
.add => try irBinaryOp(sema, builder, inst, .add),
.sub => try irBinaryOp(sema, builder, inst, .sub),
.mul => try irBinaryOp(sema, builder, inst, .mul),

View file

@ -31,6 +31,7 @@ stack: []Value = &.{},
call_stack: []CallFrame = &.{},
code_chunks: std.ArrayListUnmanaged(*Object.Code) = .empty,
/// Linked list of all tracked runtime objects.
/// We don't currently have a garbage collector, though this will necessary.
gc_objects: std.SinglyLinkedList = .{},
/// Global constants pool.
constants_pool: []const Value = &.{},
@ -110,6 +111,9 @@ pub const Opcode = enum(u8) {
br_table,
br_dispatch,
br_select_index,
string_builder,
string_append,
string_freeze,
_,
};
@ -138,6 +142,13 @@ pub const Value = union(enum) {
};
}
pub fn castObject(v: Value, comptime T: type) *T {
return switch (v) {
.object => |object| return @ptrCast(object),
else => unreachable,
};
}
pub fn isNumeric(v: Value) bool {
return v == .int or v == .float;
}
@ -230,24 +241,20 @@ pub const Value = union(enum) {
pub fn eql(lhs: Value, rhs: Value) bool {
return switch (lhs) {
.nil => rhs == .nil,
.bool => |l| switch (rhs) {
.bool => |r| l == r,
else => false,
},
.int => |l| switch (rhs) {
.int => |r| l == r,
.float => |r| @as(f64, @floatFromInt(l)) == r,
else => false,
},
.float => |l| switch (rhs) {
.int => |r| l == @as(f64, @floatFromInt(r)),
.float => |r| l == r,
else => false,
},
.object => |lobj| switch (rhs) {
.object => |robj| Object.eql(lobj, robj),
else => false,
@ -771,6 +778,32 @@ fn step(vm: *Story, variables: *VariablesState) !StepSignal {
return error.InvalidArgument;
}
},
.string_builder => {
const builder_object = try Object.StringBuilder.create(vm);
try pushStack(vm, .{ .object = &builder_object.base });
frame.ip += 1;
},
.string_append => {
if (popStack(vm)) |value| {
if (peekStack(vm, 0)) |builder| {
const string_builder = builder.castObject(Object.StringBuilder);
try string_builder.append(value);
frame.ip += 1;
continue;
}
}
return error.InvalidArgument;
},
.string_freeze => {
if (popStack(vm)) |value| {
const string_builder = value.castObject(Object.StringBuilder);
const frozen = try string_builder.freeze(vm);
try pushStack(vm, .{ .object = &frozen.base });
frame.ip += 1;
continue;
}
return error.InvalidArgument;
},
else => return error.InvalidInstruction,
}
}

View file

@ -22,7 +22,6 @@ fn dumpByteInst(
op: Opcode,
) !usize {
const code = knot.code;
assert(code.bytecode.len > offset + 1);
const arg = code.bytecode[offset + 1];
if (op == .load_const) {
@ -162,6 +161,9 @@ pub fn dumpInst(
.br_table => return self.dumpSimpleInst(w, offset, op),
.br_dispatch => return self.dumpSimpleInst(w, offset, op),
.br_select_index => return self.dumpSimpleInst(w, offset, op),
.string_builder => return self.dumpSimpleInst(w, offset, op),
.string_append => return self.dumpSimpleInst(w, offset, op),
.string_freeze => return self.dumpSimpleInst(w, offset, op),
else => |code| {
try w.print("Unknown opcode 0x{x:0>4}\n", .{@intFromEnum(code)});
return offset + 1;

View file

@ -15,12 +15,14 @@ pub const Tag = enum {
string,
code,
knot,
string_builder,
pub fn tagBytes(tag: Tag) []const u8 {
return switch (tag) {
.string => "String",
.code => "Code",
.knot => "Knot",
.string_builder => "StringBuilder",
};
}
@ -29,6 +31,7 @@ pub const Tag = enum {
.string => Object.String,
.code => Object.Code,
.knot => Object.Knot,
.string_builder => Object.StringBuilder,
};
}
};
@ -43,6 +46,7 @@ pub fn eql(lhs: *Object, rhs: *Object) bool {
},
.code => |_| false,
.knot => |_| false,
.string_builder => |_| false,
};
}
@ -71,9 +75,8 @@ pub const String = struct {
};
pub fn create(story: *Story, options: Options) error{OutOfMemory}!*Object.String {
const gpa = story.gpa;
const alloc_len = @sizeOf(Type) + options.bytes.len + 1;
const raw = try gpa.alignedAlloc(u8, .of(Type), alloc_len);
const raw = try story.gpa.alignedAlloc(u8, .of(Type), alloc_len);
const object: *Type = @ptrCast(raw);
object.* = .{
@ -88,16 +91,14 @@ pub const String = struct {
object.bytes = buf.ptr;
@memcpy(buf[0..options.bytes.len], options.bytes);
buf[options.bytes.len] = 0;
story.gc_objects.prepend(&object.base.node);
return object;
}
pub fn destroy(obj: *String, story: *Story) void {
const gpa = story.gpa;
const alloc_len = @sizeOf(Type) + obj.length + 1;
const base: [*]align(@alignOf(Type)) u8 = @ptrCast(obj);
gpa.free(base[0..alloc_len]);
story.gpa.free(base[0..alloc_len]);
}
pub fn toSlice(obj: *const Object.String) []const u8 {
@ -123,10 +124,9 @@ pub const String = struct {
}
pub fn concat(story: *Story, lhs: *String, rhs: *String) !*Object.String {
const gpa = story.gpa;
const length = lhs.length + rhs.length;
const bytes = try gpa.alloc(u8, length + 1);
defer gpa.free(bytes);
const bytes = try story.gpa.alloc(u8, length + 1);
defer story.gpa.free(bytes);
@memcpy(bytes[0..lhs.length], lhs.bytes[0..lhs.length]);
@memcpy(bytes[lhs.length..], rhs.bytes[0..rhs.length]);
@ -230,3 +230,41 @@ pub const Knot = struct {
gpa.free(base[0..alloc_len]);
}
};
pub const StringBuilder = struct {
base: Object,
inner: std.Io.Writer.Allocating,
const Type = StringBuilder;
pub fn create(story: *Story) error{OutOfMemory}!*Object.StringBuilder {
const raw = try story.gpa.alignedAlloc(u8, .of(Type), @sizeOf(Type));
const obj: *Type = @ptrCast(raw);
obj.* = .{
.base = .{ .tag = .string_builder },
.inner = .init(story.gpa),
};
story.gc_objects.prepend(&obj.base.node);
return obj;
}
pub fn destroy(obj: *StringBuilder, story: *Story) void {
obj.inner.deinit();
const alloc_len = @sizeOf(Type);
const base: [*]align(@alignOf(Type)) u8 = @ptrCast(obj);
story.gpa.free(base[0..alloc_len]);
}
pub fn append(obj: *StringBuilder, value: Story.Value) error{OutOfMemory}!void {
return obj.inner.writer.print("{f}", .{value}) catch |err| switch (err) {
error.WriteFailed => error.OutOfMemory,
};
}
pub fn freeze(obj: *StringBuilder, story: *Story) error{OutOfMemory}!*Object.String {
const str_bytes = try obj.inner.toOwnedSlice();
defer story.gpa.free(str_bytes);
return .create(story, .{ .bytes = str_bytes });
}
};

View file

@ -1,5 +1,6 @@
const std = @import("std");
const Ir = @import("Ir.zig");
const assert = std.debug.assert;
pub const Writer = struct {
code: Ir,
@ -58,11 +59,30 @@ pub const Writer = struct {
try w.print("{d}", .{data});
}
fn writeStringInst(self: *Writer, w: *std.Io.Writer, inst: Ir.Inst.Index) Error!void {
fn writeStrInst(self: *Writer, w: *std.Io.Writer, inst: Ir.Inst.Index) Error!void {
const data = self.code.instructions[@intFromEnum(inst)].data.str;
try self.writeStringRef(w, data.start);
}
fn writeMultiOpInst(self: *Writer, w: *std.Io.Writer, inst: Ir.Inst.Index) Error!void {
const data = self.code.instructions[@intFromEnum(inst)].data.payload;
const extra = self.code.extraData(Ir.Inst.MultiOp, data.extra_index);
const body = self.code.refSlice(extra.end, extra.data.operands_len);
assert(body.len != 0);
try w.writeAll("{");
var index: u32 = 0;
if (body.len > 1) {
while (index < body.len - 1) : (index += 1) {
try self.writeInstRef(w, body[index]);
try w.writeAll(", ");
}
}
try self.writeInstRef(w, body[index]);
try w.writeAll("}");
}
fn writeStrTokInst(self: *Writer, w: *std.Io.Writer, inst: Ir.Inst.Index) Error!void {
const data = self.code.instructions[@intFromEnum(inst)].data.str_tok;
try self.writeStringRef(w, data.start);
@ -357,7 +377,8 @@ pub const Writer = struct {
.cmp_lte => try self.writeBinaryInst(w, inst),
.int => try self.writeIntInst(w, inst),
.float => try self.writeFloatInst(w, inst),
.str => try self.writeStringInst(w, inst),
.str => try self.writeStrInst(w, inst),
.str_format => try self.writeMultiOpInst(w, inst),
.content_push => try self.writeUnaryInst(w, inst),
.content_line => try self.writeUnaryInst(w, inst),
.content_glue => try self.writeUnaryInst(w, inst),