feat: interning formatted strings

This commit is contained in:
Brett Broadhurst 2026-04-07 12:36:11 -06:00
parent d2cd7fa888
commit 874fd61a37
Failed to generate hash of commit
5 changed files with 423 additions and 292 deletions

View file

@ -2,8 +2,8 @@ const std = @import("std");
const Ast = @import("Ast.zig");
const Ir = @import("Ir.zig");
const Story = @import("Story.zig");
const InternPool = @import("InternPool.zig");
const compile = @import("compile.zig");
const InternPool = compile.InternPool;
const Module = compile.Module;
const assert = std.debug.assert;
const Sema = @This();
@ -37,35 +37,49 @@ pub const ValueInfo = union(enum) {
pub const Value = struct {
ip_index: InternPool.Index,
pub fn fromInterned(index: InternPool.Index) Value {
assert(index != .none);
return .{ .ip_index = index };
}
pub fn toInterned(value: Value) InternPool.Index {
assert(value.ip_index != .none);
return value.ip_index;
}
pub const Unwrapped = union(enum) {
nil,
bool: bool,
int: i64,
float: f64,
str: InternPool.NullTerminatedString,
pub fn toFloat(v: Unwrapped) f64 {
return switch (v) {
.nil => 0.0,
.bool => |boolean| @floatFromInt(@intFromBool(boolean)),
.int => |int| @floatFromInt(int),
.float => |float| float,
.str => unreachable,
};
}
pub fn isTruthy(v: Unwrapped) bool {
return switch (v) {
//.null => false,
.bool => |boolean| boolean,
.nil => false,
.bool => |b| b,
.int => |int| int != 0,
.float => |float| float != 0.0,
//.str => true,
.str => true,
};
}
pub fn coerce(value: Unwrapped) Unwrapped {
return switch (value) {
.bool => |boolean| .{ .int = if (boolean) 1 else 0 },
.int => value,
.float => value,
.bool => |boolean| .{ .int = if (boolean) 1 else 0 },
//else => null,
else => unreachable,
};
}
};
@ -75,19 +89,9 @@ pub const Value = struct {
.bool => |boolean| return .{ .bool = boolean },
.int => |int| return .{ .int = int },
.float => |float| return .{ .float = @bitCast(float) },
.str => @panic("String unwrapping not implemented!"),
.str => |str| return .{ .str = str },
}
}
pub fn fromInterned(index: InternPool.Index) Value {
assert(index != .none);
return .{ .ip_index = index };
}
pub fn toInterned(value: Value) InternPool.Index {
assert(value.ip_index != .none);
return value.ip_index;
}
};
pub const SrcLoc = struct {
@ -121,10 +125,10 @@ fn resolveInst(sema: *Sema, ref: Ir.Inst.Ref) ValueInfo {
}
fn resolveValue(_: *Sema, info: ValueInfo) ?Value {
switch (info) {
.value => |value| return .fromInterned(value),
else => return null,
}
return switch (info) {
.value => |value| .fromInterned(value),
else => null,
};
}
pub fn lookupIdentifier(
@ -163,7 +167,7 @@ pub const Block = struct {
pub const Builder = struct {
sema: *Sema,
namespace: *Module.Namespace,
code: *Module.CodeChunk,
code: *InternPool.CodeChunk,
constants_map: std.AutoHashMapUnmanaged(InternPool.Index, u8) = .empty,
labels: std.ArrayListUnmanaged(Label) = .empty,
fixups: std.ArrayListUnmanaged(Fixup) = .empty,
@ -209,19 +213,15 @@ pub const Builder = struct {
fn addByteOp(builder: *Builder, op: Story.Opcode) error{OutOfMemory}!void {
const gpa = builder.sema.gpa;
const bytecode = &builder.code.bytecode;
//const byte_index = bytecode.items.len;
try bytecode.append(gpa, @intFromEnum(op));
//return .toIndex();
}
fn addConstOp(builder: *Builder, op: Story.Opcode, arg: u8) error{OutOfMemory}!void {
const gpa = builder.sema.gpa;
const bytecode = &builder.code.bytecode;
//const byte_index = bytecode.items.len;
try bytecode.ensureUnusedCapacity(gpa, 2);
bytecode.appendAssumeCapacity(@intFromEnum(op));
bytecode.appendAssumeCapacity(arg);
//return .{ .index = @intCast(byte_index) };
}
fn addJumpOp(builder: *Builder, op: Story.Opcode) error{OutOfMemory}!u32 {
@ -298,7 +298,7 @@ pub const Builder = struct {
}
}
fn ensureLoad(self: *Builder, info: ValueInfo) InnerError!void {
fn materialize(self: *Builder, info: ValueInfo) InnerError!void {
switch (info) {
.none => unreachable, // caller should never load .none
.stack => {},
@ -319,11 +319,49 @@ pub const Builder = struct {
}
};
fn foldArith(
lhs: Value.Unwrapped,
rhs: Value.Unwrapped,
op: Story.Opcode,
) !Value.Unwrapped {
fn coerceToString(sema: *Sema, value: Value.Unwrapped) !InternPool.Index {
const ip = &sema.module.intern_pool;
var scratch_buffer: [64]u8 = undefined;
const str_index = switch (value) {
.nil => blk: {
const interned = try ip.getOrPutString(sema.gpa, "");
break :blk interned;
},
.bool => |bit| blk: {
const interned = try ip.getOrPutString(sema.gpa, if (bit) "true" else "false");
break :blk interned;
},
.int => |int| blk: {
const bytes = std.fmt.bufPrint(&scratch_buffer, "{d}", .{int}) catch |err| switch (err) {
error.NoSpaceLeft => unreachable,
};
const interned = try ip.getOrPutString(sema.gpa, bytes);
break :blk interned;
},
.float => |float| blk: {
if (std.math.isNan(float))
break :blk try ip.getOrPutString(sema.gpa, "NaN");
if (std.math.isInf(float))
break :blk try ip.getOrPutString(sema.gpa, if (float > 0) "Inf" else "-Inf");
var bytes = std.fmt.bufPrint(&scratch_buffer, "{d:.7}", .{float}) catch |err| switch (err) {
error.NoSpaceLeft => unreachable,
else => |e| return e,
};
if (std.mem.indexOfScalar(u8, bytes, '.')) |dot| {
var end = bytes.len;
while (end > dot + 2 and bytes[end - 1] == '0') end -= 1;
bytes = bytes[0..end];
}
break :blk try ip.getOrPutString(sema.gpa, bytes);
},
.str => |str| str,
};
return ip.getOrPutValue(sema.gpa, .{ .str = str_index });
}
fn foldArith(lhs: Value.Unwrapped, rhs: Value.Unwrapped, op: Story.Opcode) !Value.Unwrapped {
const l = lhs.coerce();
const r = rhs.coerce();
if (l == .int and r == .int) {
@ -361,11 +399,7 @@ fn foldArith(
};
}
fn foldCmp(
lhs: Value.Unwrapped,
rhs: Value.Unwrapped,
op: Story.Opcode,
) !Value.Unwrapped {
fn foldCmp(lhs: Value.Unwrapped, rhs: Value.Unwrapped, op: Story.Opcode) !Value.Unwrapped {
switch (op) {
.cmp_eq => return .{ .bool = std.meta.eql(lhs, rhs) },
.cmp_neq => return .{ .bool = !std.meta.eql(lhs, rhs) },
@ -384,27 +418,23 @@ fn foldCmp(
return .{ .bool = result };
}
fn foldConstant(
lhs: Value.Unwrapped,
rhs: Value.Unwrapped,
op: Story.Opcode,
) !Value.Unwrapped {
switch (op) {
fn foldConstant(lhs: Value.Unwrapped, rhs: Value.Unwrapped, op: Story.Opcode) !Value.Unwrapped {
return switch (op) {
.add,
.sub,
.mul,
.div,
.mod,
=> return foldArith(lhs, rhs, op),
=> foldArith(lhs, rhs, op),
.cmp_eq,
.cmp_neq,
.cmp_lt,
.cmp_gt,
.cmp_lte,
.cmp_gte,
=> return foldCmp(lhs, rhs, op),
=> foldCmp(lhs, rhs, op),
else => unreachable,
}
};
}
fn irInt(sema: *Sema, inst: Ir.Inst.Index) InnerError!ValueInfo {
@ -420,29 +450,82 @@ fn irFloat(sema: *Sema, inst: Ir.Inst.Index) InnerError!ValueInfo {
}
fn irStr(sema: *Sema, inst: Ir.Inst.Index) InnerError!ValueInfo {
const data = sema.ir.instructions[@intFromEnum(inst)].data.str;
const ip_index = try sema.module.intern_pool.getOrPutStr(sema.gpa, data.start);
const bytes = sema.ir.instructions[@intFromEnum(inst)].data.str.get(sema.ir);
const ip_index = try sema.addStr(bytes);
return .{ .value = ip_index };
}
fn addStr(sema: *Sema, bytes: []const u8) InnerError!InternPool.Index {
const str_value = try sema.module.intern_pool.getOrPutString(sema.gpa, bytes);
return sema.module.intern_pool.getOrPutValue(sema.gpa, .{
.str = str_value,
});
}
fn irStrFormat(
sema: *Sema,
builder: *Builder,
_: *Block,
inst: Ir.Inst.Index,
) InnerError!ValueInfo {
const ip = &sema.module.intern_pool;
const data = sema.ir.instructions[@intFromEnum(inst)].data.payload;
const extra = sema.ir.extraData(Ir.Inst.MultiOp, data.extra_index);
const args_slice = sema.ir.refSlice(extra.end, extra.data.operands_len);
const StrFragment = union(enum) {
interned: InternPool.Index,
unknown: ValueInfo,
};
var all_const = true;
var scratch: std.ArrayList(StrFragment) = .empty;
defer scratch.deinit(sema.gpa);
for (args_slice) |arg| {
const arg_inst = sema.resolveInst(arg);
if (sema.resolveValue(arg_inst)) |arg_info| {
const str = try sema.coerceToString(arg_info.unwrap(ip));
try scratch.append(sema.gpa, .{ .interned = str });
} else {
all_const = false;
try scratch.append(sema.gpa, .{ .unknown = arg_inst });
}
}
if (all_const) {
var buffer: std.ArrayList(u8) = .empty;
defer buffer.deinit(sema.gpa);
for (scratch.items) |frag| {
const t = ip.indexToKey(frag.interned);
try buffer.appendSlice(sema.gpa, ip.nullTerminatedString(t.str));
}
return .{ .value = try sema.addStr(buffer.items) };
}
try builder.addByteOp(.string_builder);
var index: usize = 0;
while (index < args_slice.len) : (index += 1) {
const arg = args_slice[index];
const arg_inst = sema.resolveInst(arg);
try builder.ensureLoad(arg_inst);
try builder.addByteOp(.string_append);
var i: usize = 0;
while (i < scratch.items.len) {
var buffer: std.ArrayList(u8) = .empty;
defer buffer.deinit(sema.gpa);
while (i < scratch.items.len) : (i += 1) {
const frag = scratch.items[i];
if (frag != .interned) break;
const t = ip.indexToKey(frag.interned);
try buffer.appendSlice(sema.gpa, ip.nullTerminatedString(t.str));
}
if (buffer.items.len > 0) {
const val = try sema.addStr(buffer.items);
try builder.materialize(.{ .value = val });
try builder.addByteOp(.string_append);
}
if (i < scratch.items.len) {
const val = scratch.items[i].unknown;
try builder.materialize(val);
try builder.addByteOp(.string_append);
i += 1;
}
}
try builder.addByteOp(.string_freeze);
@ -456,14 +539,15 @@ fn irUnaryOp(
op: Story.Opcode,
) InnerError!ValueInfo {
const gpa = sema.gpa;
const data = sema.ir.instructions[@intFromEnum(inst)].data.un;
const ip = &sema.module.intern_pool;
const data = sema.ir.instructions[@intFromEnum(inst)].data.un;
const lhs = sema.resolveInst(data.lhs);
//const lhs_src: SrcLoc = .{ .src_offset = 0 };
//try sema.analyzeArithmeticArg(builder, lhs, lhs_src);
if (sema.resolveValue(lhs)) |lhs_info| {
switch (lhs_info.unwrap(ip)) {
.nil => unreachable,
.bool => |boolean| {
const new_value = switch (op) {
.not => !boolean,
@ -488,10 +572,11 @@ fn irUnaryOp(
};
return .{ .value = try ip.getOrPutFloat(gpa, new_value) };
},
.str => unreachable,
}
}
try builder.ensureLoad(lhs);
try builder.materialize(lhs);
try builder.addByteOp(op);
return .stack;
}
@ -517,15 +602,17 @@ fn irBinaryOp(
const lhs_coerced = lhs_value.unwrap(ip).coerce();
const rhs_coerced = rhs_value.unwrap(ip).coerce();
return switch (try foldConstant(lhs_coerced, rhs_coerced, op)) {
.nil => unreachable,
.bool => |boolean| .{ .value = ip.getOrPutBool(boolean) },
.int => |int| .{ .value = try ip.getOrPutInt(gpa, int) },
.float => |float| .{ .value = try ip.getOrPutFloat(gpa, float) },
.str => unreachable,
};
}
}
try builder.ensureLoad(lhs);
try builder.ensureLoad(rhs);
try builder.materialize(lhs);
try builder.materialize(rhs);
try builder.addByteOp(op);
return .stack;
}
@ -580,7 +667,7 @@ fn irLogicalOp(
}
const else_label = try builder.addLabel();
try builder.ensureLoad(lhs);
try builder.materialize(lhs);
try builder.addFixup(if (is_logical_or) .jmp_t else .jmp_f, else_label);
try builder.addByteOp(.pop);
@ -591,7 +678,7 @@ fn irLogicalOp(
};
const rhs = try sema.analyzeInlineBody(builder, &block, body);
try builder.ensureLoad(rhs);
try builder.materialize(rhs);
builder.setLabel(else_label);
return .none;
}
@ -604,13 +691,14 @@ fn irDeclRef(
inline_block: bool,
) InnerError!ValueInfo {
const data = sema.ir.instructions[@intFromEnum(inst)].data.str_tok;
const ip_index = try sema.module.intern_pool.getOrPutStr(sema.gpa, data.start);
const decl_name = try sema.addStr(sema.ir.nullTerminatedString(data.start));
const src_loc: SrcLoc = .{ .src_offset = data.src_offset };
const ident = try sema.lookupIdentifier(builder, ip_index, src_loc);
const ident = try sema.lookupIdentifier(builder, decl_name, src_loc);
if (inline_block) {
switch (ident.tag) {
.knot, .stitch, .function => unreachable,
.var_const => return sema.resolveGlobalDecl(builder, block, ip_index, src_loc),
.var_const => return sema.resolveGlobalDecl(builder, block, decl_name, src_loc),
.var_mut => return sema.fail(
src_loc,
"global variable assignments cannot refer to other variables",
@ -619,11 +707,11 @@ fn irDeclRef(
}
} else {
switch (ident.tag) {
.knot => return .{ .knot = ip_index },
.stitch => return .{ .stitch = ip_index },
.function => return .{ .function = ip_index },
.var_mut => return .{ .variable = ip_index },
.var_const => return .{ .variable = ip_index },
.knot => return .{ .knot = decl_name },
.stitch => return .{ .stitch = decl_name },
.function => return .{ .function = decl_name },
.var_mut => return .{ .variable = decl_name },
.var_const => return .{ .variable = decl_name },
}
}
}
@ -641,7 +729,7 @@ fn irStore(sema: *Sema, builder: *Builder, inst: Ir.Inst.Index) InnerError!void
const rhs = sema.resolveInst(data.rhs);
const src: SrcLoc = .{ .src_offset = 0 };
try builder.ensureLoad(rhs);
try builder.materialize(rhs);
switch (lhs) {
.none => unreachable,
@ -665,7 +753,7 @@ fn irLoad(sema: *Sema, builder: *Builder, inst: Ir.Inst.Index) InnerError!ValueI
const lhs = sema.resolveInst(data.lhs);
if (lhs == .value) return lhs;
try builder.ensureLoad(lhs);
try builder.materialize(lhs);
return .stack;
}
@ -682,7 +770,7 @@ fn irCondBr(
const else_label = try builder.addLabel();
const end_label = try builder.addLabel();
const condition = sema.resolveInst(extra.data.condition);
if (condition != .none) try builder.ensureLoad(condition);
if (condition != .none) try builder.materialize(condition);
try builder.addFixup(.jmp_f, else_label);
try builder.addByteOp(.pop);
@ -734,7 +822,7 @@ fn irSwitchBlock(
defer case_labels.deinit(gpa);
const condition = sema.resolveInst(extra.data.operand);
if (condition != .none) try builder.ensureLoad(condition);
if (condition != .none) try builder.materialize(condition);
var switch_block: Block = .{
.parent_block = parent_block,
@ -752,7 +840,7 @@ fn irSwitchBlock(
case_labels.appendAssumeCapacity(case_label_index);
try builder.addConstOp(.load, @intCast(cmp_var));
try builder.ensureLoad(case_expr);
try builder.materialize(case_expr);
try builder.addByteOp(.cmp_eq);
try builder.addFixup(.jmp_t, case_label_index);
try builder.addByteOp(.pop);
@ -805,7 +893,7 @@ fn irContentPush(sema: *Sema, builder: *Builder, inst: Ir.Inst.Index) InnerError
const data = sema.ir.instructions[@intFromEnum(inst)].data.un;
const lhs = sema.resolveInst(data.lhs);
if (lhs == .none) return error.AnalysisFail;
if (lhs != .stack) try builder.ensureLoad(lhs);
if (lhs != .stack) try builder.materialize(lhs);
try builder.addByteOp(.stream_push);
}
@ -881,7 +969,7 @@ fn irRet(sema: *Sema, builder: *Builder, inst: Ir.Inst.Index) InnerError!void {
const data = sema.ir.instructions[@intFromEnum(inst)].data.un;
const lhs = sema.resolveInst(data.lhs);
if (lhs != .none) {
try builder.ensureLoad(lhs);
try builder.materialize(lhs);
} else {
try builder.addByteOp(.stream_glue);
}
@ -924,10 +1012,13 @@ fn irCall(
.field => {
const callee = sema.resolveInst(extra.data.obj_ptr);
const target = try analyzeCallTarget(sema, builder, callee_src, callee);
const ip_index = try sema.module.intern_pool.getOrPutStr(
const str = try sema.module.intern_pool.getOrPutString(
sema.gpa,
extra.data.field_name_start,
extra.data.field_name_start.bytes(sema.ir),
);
const ip_index = try sema.module.intern_pool.getOrPutValue(sema.gpa, .{
.str = str,
});
const e = try sema.lookupInNamespace(target.namespace.?, ip_index, callee_src);
switch (e.tag) {
.function => {
@ -947,7 +1038,7 @@ fn irCall(
defer arg_start = arg_end;
const arg_body = body[arg_start..arg_end];
const arg_value = try sema.analyzeInlineBody(builder, block, @ptrCast(arg_body));
if (arg_value != .none) try builder.ensureLoad(arg_value);
if (arg_value != .none) try builder.materialize(arg_value);
}
try builder.addConstOp(.call, @intCast(args_len));
return .stack;
@ -977,10 +1068,13 @@ fn irDivert(
.field => {
const callee = sema.resolveInst(extra.data.obj_ptr);
const target = try analyzeDivertTarget(sema, builder, callee_src, callee);
const ip_index = try sema.module.intern_pool.getOrPutStr(
const str = try sema.module.intern_pool.getOrPutString(
sema.gpa,
extra.data.field_name_start,
extra.data.field_name_start.bytes(sema.ir),
);
const ip_index = try sema.module.intern_pool.getOrPutValue(sema.gpa, .{
.str = str,
});
const e = try sema.lookupInNamespace(target.namespace.?, ip_index, callee_src);
switch (e.tag) {
.knot => {
@ -1000,7 +1094,7 @@ fn irDivert(
defer arg_start = arg_end;
const arg_body = body[arg_start..arg_end];
const arg_value = try analyzeInlineBody(sema, builder, block, @ptrCast(arg_body));
if (arg_value != .none) try builder.ensureLoad(arg_value);
if (arg_value != .none) try builder.materialize(arg_value);
}
try builder.addConstOp(.divert, @intCast(args_len));
}
@ -1057,7 +1151,7 @@ fn analyzeCallTarget(
) !Module.Namespace.Decl {
switch (callee) {
.function => |ip_index| {
try builder.ensureLoad(callee);
try builder.materialize(callee);
return sema.lookupIdentifier(builder, ip_index, src);
},
else => return sema.fail(src, "invalid call target", .{}),
@ -1072,7 +1166,7 @@ fn analyzeDivertTarget(
) !Module.Namespace.Decl {
switch (callee) {
.knot => |ip_index| {
try builder.ensureLoad(callee);
try builder.materialize(callee);
return sema.lookupIdentifier(builder, ip_index, src);
},
else => return sema.fail(src, "invalid divert target", .{}),
@ -1245,12 +1339,17 @@ fn analyzeNestedDecl(
const data = sema.ir.instructions[@intFromEnum(inst)].data.payload;
const extra = sema.ir.extraData(Ir.Inst.Declaration, data.extra_index).data;
const decl = sema.ir.instructions[@intFromEnum(extra.value)];
const decl_name = try sema.module.intern_pool.getOrPutStr(sema.gpa, extra.name);
const decl_name = try sema.module.intern_pool.getOrPutString(
sema.gpa,
sema.ir.nullTerminatedString(extra.name),
);
const ip_index = try sema.module.intern_pool.getOrPutValue(sema.gpa, .{ .str = decl_name });
switch (decl.tag) {
.decl_stitch => {
const child_namespace = try sema.module.createNamespace(namespace);
try namespace.decls.put(sema.arena, decl_name, .{
try namespace.decls.put(sema.arena, ip_index, .{
.tag = .knot,
.decl_inst = extra.value,
.args_count = 0,
@ -1258,7 +1357,7 @@ fn analyzeNestedDecl(
});
try sema.module.queueWorkItem(.{
.tag = .stitch,
.decl_name = decl_name,
.decl_name = ip_index,
.inst_index = extra.value,
.namespace = child_namespace,
});
@ -1267,15 +1366,16 @@ fn analyzeNestedDecl(
}
}
fn scanTopLevelDecl(
sema: *Sema,
namespace: *Module.Namespace,
inst: Ir.Inst.Index,
) !void {
fn scanTopLevelDecl(sema: *Sema, namespace: *Module.Namespace, inst: Ir.Inst.Index) !void {
const data = sema.ir.instructions[@intFromEnum(inst)].data.payload;
const extra = sema.ir.extraData(Ir.Inst.Declaration, data.extra_index).data;
const decl_inst = sema.ir.instructions[@intFromEnum(extra.value)];
const decl_name = try sema.module.intern_pool.getOrPutStr(sema.gpa, extra.name);
const decl_str = try sema.module.intern_pool.getOrPutString(
sema.gpa,
sema.ir.nullTerminatedString(extra.name),
);
const decl_name = try sema.module.intern_pool.getOrPutValue(sema.gpa, .{ .str = decl_str });
const src_loc: SrcLoc = .{ .src_offset = data.src_offset };
switch (decl_inst.tag) {
@ -1306,6 +1406,7 @@ fn scanTopLevelDecl(
gop.value_ptr.* = .{
.tag = .knot,
.decl_inst = extra.value,
// FIXME: This will be necessary for argument count checks.
.args_count = 0,
.namespace = child_namespace,
};
@ -1331,6 +1432,7 @@ fn scanTopLevelDecl(
gop.value_ptr.* = .{
.tag = .stitch,
.decl_inst = extra.value,
// FIXME: This will be necessary for argument count checks.
.args_count = 0,
.namespace = child_namespace,
};
@ -1351,6 +1453,7 @@ fn scanTopLevelDecl(
gop.value_ptr.* = .{
.tag = .function,
.decl_inst = extra.value,
// FIXME: This will be necessary for argument count checks.
.args_count = 0,
.namespace = child_namespace,
};