diff --git a/src/InternPool.zig b/src/InternPool.zig new file mode 100644 index 0000000..1ae52de --- /dev/null +++ b/src/InternPool.zig @@ -0,0 +1,133 @@ +const std = @import("std"); +const StringIndexAdapter = std.hash_map.StringIndexAdapter; +const StringIndexContext = std.hash_map.StringIndexContext; +const Ir = @import("Ir.zig"); +const Module = @import("compile.zig").Module; +const assert = std.debug.assert; +const InternPool = @This(); + +values: std.ArrayListUnmanaged(Key) = .empty, +values_map: std.AutoHashMapUnmanaged(Key, Index) = .empty, +string_bytes: std.ArrayListUnmanaged(u8) = .empty, +string_table: std.HashMapUnmanaged(u32, void, StringIndexContext, std.hash_map.default_max_load_percentage) = .empty, + +pub const Index = enum(u32) { + bool_true, + bool_false, + none, + _, +}; + +pub const Key = union(enum) { + bool: bool, + int: i64, + float: u64, // We can't hash floating point numbers. + str: NullTerminatedString, +}; + +pub const Global = struct { + key: InternPool.Index, + value: InternPool.Index, +}; + +pub const Knot = struct { + name_index: InternPool.Index, + code_chunk: *CodeChunk, + + pub const Index = enum(u32) { + _, + }; +}; + +pub const Stitch = struct { + knot_index: ?Knot.Index, + code_chunk: *CodeChunk, + name_index: InternPool.Index, + + pub const Index = enum(u32) { + _, + }; +}; + +pub const CodeChunk = struct { + args_count: u32 = 0, + locals_count: u32 = 0, + stack_size: u32 = 0, + constants: std.ArrayListUnmanaged(u8) = .empty, + bytecode: std.ArrayListUnmanaged(u8) = .empty, +}; + +pub fn deinit(ip: *InternPool, gpa: std.mem.Allocator) void { + ip.values.deinit(gpa); + ip.values_map.deinit(gpa); + ip.string_bytes.deinit(gpa); + ip.string_table.deinit(gpa); + ip.* = undefined; +} + +pub const NullTerminatedString = enum(u32) { + empty, + _, +}; + +pub fn indexToKey(ip: *const InternPool, index: Index) Key { + const value = ip.values.items[@intFromEnum(index)]; + return value; +} + +pub fn getOrPutValue(ip: *InternPool, gpa: std.mem.Allocator, key: Key) error{OutOfMemory}!Index { + if (ip.values_map.get(key)) |index| { + return index; + } else { + const new_index: Index = @enumFromInt(ip.values.items.len); + try ip.values.append(gpa, key); + try ip.values_map.put(gpa, key, new_index); + return new_index; + } +} + +pub fn getOrPutBool(_: *InternPool, value: bool) Index { + return if (value) return .bool_true else .bool_false; +} + +pub fn getOrPutInt(ip: *InternPool, gpa: std.mem.Allocator, value: i64) error{OutOfMemory}!Index { + return ip.getOrPutValue(gpa, .{ .int = value }); +} + +pub fn getOrPutFloat(ip: *InternPool, gpa: std.mem.Allocator, value: f64) error{OutOfMemory}!Index { + return ip.getOrPutValue(gpa, .{ .float = @bitCast(value) }); +} + +pub fn getOrPutString( + ip: *InternPool, + gpa: std.mem.Allocator, + bytes: []const u8, +) error{OutOfMemory}!NullTerminatedString { + const str_index: u32 = @intCast(ip.string_bytes.items.len); + try ip.string_bytes.ensureUnusedCapacity(gpa, bytes.len + 1); + ip.string_bytes.appendSliceAssumeCapacity(bytes); + + const key: []const u8 = ip.string_bytes.items[str_index..]; + const gop = try ip.string_table.getOrPutContextAdapted(gpa, key, StringIndexAdapter{ + .bytes = &ip.string_bytes, + }, StringIndexContext{ + .bytes = &ip.string_bytes, + }); + if (gop.found_existing) { + ip.string_bytes.shrinkRetainingCapacity(str_index); + return @enumFromInt(gop.key_ptr.*); + } else { + gop.key_ptr.* = str_index; + try ip.string_bytes.append(gpa, 0); + return @enumFromInt(gop.key_ptr.*); + } +} + +pub fn internedValue(ip: *const InternPool, index: Index) Key { + return ip.values.items[@intFromEnum(index)]; +} + +pub fn nullTerminatedString(ip: *const InternPool, index: NullTerminatedString) [:0]const u8 { + const slice = ip.string_bytes.items[@intFromEnum(index)..]; + return slice[0..std.mem.indexOfScalar(u8, slice, 0).? :0]; +} diff --git a/src/Ir.zig b/src/Ir.zig index d7ac890..d9ba499 100644 --- a/src/Ir.zig +++ b/src/Ir.zig @@ -48,6 +48,10 @@ pub fn extraData(ir: Ir, comptime T: type, index: usize) ExtraData(T) { pub const NullTerminatedString = enum(u32) { empty, _, + + pub fn bytes(str: NullTerminatedString, ir: Ir) [:0]const u8 { + return ir.nullTerminatedString(str); + } }; pub fn nullTerminatedString(ir: Ir, index: NullTerminatedString) [:0]const u8 { diff --git a/src/Sema.zig b/src/Sema.zig index f5427b0..dd06133 100644 --- a/src/Sema.zig +++ b/src/Sema.zig @@ -2,8 +2,8 @@ const std = @import("std"); const Ast = @import("Ast.zig"); const Ir = @import("Ir.zig"); const Story = @import("Story.zig"); +const InternPool = @import("InternPool.zig"); const compile = @import("compile.zig"); -const InternPool = compile.InternPool; const Module = compile.Module; const assert = std.debug.assert; const Sema = @This(); @@ -37,35 +37,49 @@ pub const ValueInfo = union(enum) { pub const Value = struct { ip_index: InternPool.Index, + pub fn fromInterned(index: InternPool.Index) Value { + assert(index != .none); + return .{ .ip_index = index }; + } + + pub fn toInterned(value: Value) InternPool.Index { + assert(value.ip_index != .none); + return value.ip_index; + } + pub const Unwrapped = union(enum) { + nil, bool: bool, int: i64, float: f64, + str: InternPool.NullTerminatedString, pub fn toFloat(v: Unwrapped) f64 { return switch (v) { + .nil => 0.0, .bool => |boolean| @floatFromInt(@intFromBool(boolean)), .int => |int| @floatFromInt(int), .float => |float| float, + .str => unreachable, }; } pub fn isTruthy(v: Unwrapped) bool { return switch (v) { - //.null => false, - .bool => |boolean| boolean, + .nil => false, + .bool => |b| b, .int => |int| int != 0, .float => |float| float != 0.0, - //.str => true, + .str => true, }; } pub fn coerce(value: Unwrapped) Unwrapped { return switch (value) { + .bool => |boolean| .{ .int = if (boolean) 1 else 0 }, .int => value, .float => value, - .bool => |boolean| .{ .int = if (boolean) 1 else 0 }, - //else => null, + else => unreachable, }; } }; @@ -75,19 +89,9 @@ pub const Value = struct { .bool => |boolean| return .{ .bool = boolean }, .int => |int| return .{ .int = int }, .float => |float| return .{ .float = @bitCast(float) }, - .str => @panic("String unwrapping not implemented!"), + .str => |str| return .{ .str = str }, } } - - pub fn fromInterned(index: InternPool.Index) Value { - assert(index != .none); - return .{ .ip_index = index }; - } - - pub fn toInterned(value: Value) InternPool.Index { - assert(value.ip_index != .none); - return value.ip_index; - } }; pub const SrcLoc = struct { @@ -121,10 +125,10 @@ fn resolveInst(sema: *Sema, ref: Ir.Inst.Ref) ValueInfo { } fn resolveValue(_: *Sema, info: ValueInfo) ?Value { - switch (info) { - .value => |value| return .fromInterned(value), - else => return null, - } + return switch (info) { + .value => |value| .fromInterned(value), + else => null, + }; } pub fn lookupIdentifier( @@ -163,7 +167,7 @@ pub const Block = struct { pub const Builder = struct { sema: *Sema, namespace: *Module.Namespace, - code: *Module.CodeChunk, + code: *InternPool.CodeChunk, constants_map: std.AutoHashMapUnmanaged(InternPool.Index, u8) = .empty, labels: std.ArrayListUnmanaged(Label) = .empty, fixups: std.ArrayListUnmanaged(Fixup) = .empty, @@ -209,19 +213,15 @@ pub const Builder = struct { fn addByteOp(builder: *Builder, op: Story.Opcode) error{OutOfMemory}!void { const gpa = builder.sema.gpa; const bytecode = &builder.code.bytecode; - //const byte_index = bytecode.items.len; try bytecode.append(gpa, @intFromEnum(op)); - //return .toIndex(); } fn addConstOp(builder: *Builder, op: Story.Opcode, arg: u8) error{OutOfMemory}!void { const gpa = builder.sema.gpa; const bytecode = &builder.code.bytecode; - //const byte_index = bytecode.items.len; try bytecode.ensureUnusedCapacity(gpa, 2); bytecode.appendAssumeCapacity(@intFromEnum(op)); bytecode.appendAssumeCapacity(arg); - //return .{ .index = @intCast(byte_index) }; } fn addJumpOp(builder: *Builder, op: Story.Opcode) error{OutOfMemory}!u32 { @@ -298,7 +298,7 @@ pub const Builder = struct { } } - fn ensureLoad(self: *Builder, info: ValueInfo) InnerError!void { + fn materialize(self: *Builder, info: ValueInfo) InnerError!void { switch (info) { .none => unreachable, // caller should never load .none .stack => {}, @@ -319,11 +319,49 @@ pub const Builder = struct { } }; -fn foldArith( - lhs: Value.Unwrapped, - rhs: Value.Unwrapped, - op: Story.Opcode, -) !Value.Unwrapped { +fn coerceToString(sema: *Sema, value: Value.Unwrapped) !InternPool.Index { + const ip = &sema.module.intern_pool; + var scratch_buffer: [64]u8 = undefined; + const str_index = switch (value) { + .nil => blk: { + const interned = try ip.getOrPutString(sema.gpa, ""); + break :blk interned; + }, + .bool => |bit| blk: { + const interned = try ip.getOrPutString(sema.gpa, if (bit) "true" else "false"); + break :blk interned; + }, + .int => |int| blk: { + const bytes = std.fmt.bufPrint(&scratch_buffer, "{d}", .{int}) catch |err| switch (err) { + error.NoSpaceLeft => unreachable, + }; + const interned = try ip.getOrPutString(sema.gpa, bytes); + break :blk interned; + }, + .float => |float| blk: { + if (std.math.isNan(float)) + break :blk try ip.getOrPutString(sema.gpa, "NaN"); + + if (std.math.isInf(float)) + break :blk try ip.getOrPutString(sema.gpa, if (float > 0) "Inf" else "-Inf"); + + var bytes = std.fmt.bufPrint(&scratch_buffer, "{d:.7}", .{float}) catch |err| switch (err) { + error.NoSpaceLeft => unreachable, + else => |e| return e, + }; + if (std.mem.indexOfScalar(u8, bytes, '.')) |dot| { + var end = bytes.len; + while (end > dot + 2 and bytes[end - 1] == '0') end -= 1; + bytes = bytes[0..end]; + } + break :blk try ip.getOrPutString(sema.gpa, bytes); + }, + .str => |str| str, + }; + return ip.getOrPutValue(sema.gpa, .{ .str = str_index }); +} + +fn foldArith(lhs: Value.Unwrapped, rhs: Value.Unwrapped, op: Story.Opcode) !Value.Unwrapped { const l = lhs.coerce(); const r = rhs.coerce(); if (l == .int and r == .int) { @@ -361,11 +399,7 @@ fn foldArith( }; } -fn foldCmp( - lhs: Value.Unwrapped, - rhs: Value.Unwrapped, - op: Story.Opcode, -) !Value.Unwrapped { +fn foldCmp(lhs: Value.Unwrapped, rhs: Value.Unwrapped, op: Story.Opcode) !Value.Unwrapped { switch (op) { .cmp_eq => return .{ .bool = std.meta.eql(lhs, rhs) }, .cmp_neq => return .{ .bool = !std.meta.eql(lhs, rhs) }, @@ -384,27 +418,23 @@ fn foldCmp( return .{ .bool = result }; } -fn foldConstant( - lhs: Value.Unwrapped, - rhs: Value.Unwrapped, - op: Story.Opcode, -) !Value.Unwrapped { - switch (op) { +fn foldConstant(lhs: Value.Unwrapped, rhs: Value.Unwrapped, op: Story.Opcode) !Value.Unwrapped { + return switch (op) { .add, .sub, .mul, .div, .mod, - => return foldArith(lhs, rhs, op), + => foldArith(lhs, rhs, op), .cmp_eq, .cmp_neq, .cmp_lt, .cmp_gt, .cmp_lte, .cmp_gte, - => return foldCmp(lhs, rhs, op), + => foldCmp(lhs, rhs, op), else => unreachable, - } + }; } fn irInt(sema: *Sema, inst: Ir.Inst.Index) InnerError!ValueInfo { @@ -420,29 +450,82 @@ fn irFloat(sema: *Sema, inst: Ir.Inst.Index) InnerError!ValueInfo { } fn irStr(sema: *Sema, inst: Ir.Inst.Index) InnerError!ValueInfo { - const data = sema.ir.instructions[@intFromEnum(inst)].data.str; - const ip_index = try sema.module.intern_pool.getOrPutStr(sema.gpa, data.start); + const bytes = sema.ir.instructions[@intFromEnum(inst)].data.str.get(sema.ir); + const ip_index = try sema.addStr(bytes); return .{ .value = ip_index }; } +fn addStr(sema: *Sema, bytes: []const u8) InnerError!InternPool.Index { + const str_value = try sema.module.intern_pool.getOrPutString(sema.gpa, bytes); + return sema.module.intern_pool.getOrPutValue(sema.gpa, .{ + .str = str_value, + }); +} + fn irStrFormat( sema: *Sema, builder: *Builder, _: *Block, inst: Ir.Inst.Index, ) InnerError!ValueInfo { + const ip = &sema.module.intern_pool; const data = sema.ir.instructions[@intFromEnum(inst)].data.payload; const extra = sema.ir.extraData(Ir.Inst.MultiOp, data.extra_index); const args_slice = sema.ir.refSlice(extra.end, extra.data.operands_len); + const StrFragment = union(enum) { + interned: InternPool.Index, + unknown: ValueInfo, + }; + + var all_const = true; + var scratch: std.ArrayList(StrFragment) = .empty; + defer scratch.deinit(sema.gpa); + + for (args_slice) |arg| { + const arg_inst = sema.resolveInst(arg); + if (sema.resolveValue(arg_inst)) |arg_info| { + const str = try sema.coerceToString(arg_info.unwrap(ip)); + try scratch.append(sema.gpa, .{ .interned = str }); + } else { + all_const = false; + try scratch.append(sema.gpa, .{ .unknown = arg_inst }); + } + } + if (all_const) { + var buffer: std.ArrayList(u8) = .empty; + defer buffer.deinit(sema.gpa); + for (scratch.items) |frag| { + const t = ip.indexToKey(frag.interned); + try buffer.appendSlice(sema.gpa, ip.nullTerminatedString(t.str)); + } + return .{ .value = try sema.addStr(buffer.items) }; + } + try builder.addByteOp(.string_builder); - var index: usize = 0; - while (index < args_slice.len) : (index += 1) { - const arg = args_slice[index]; - const arg_inst = sema.resolveInst(arg); - try builder.ensureLoad(arg_inst); - try builder.addByteOp(.string_append); + var i: usize = 0; + while (i < scratch.items.len) { + var buffer: std.ArrayList(u8) = .empty; + defer buffer.deinit(sema.gpa); + + while (i < scratch.items.len) : (i += 1) { + const frag = scratch.items[i]; + if (frag != .interned) break; + const t = ip.indexToKey(frag.interned); + try buffer.appendSlice(sema.gpa, ip.nullTerminatedString(t.str)); + } + if (buffer.items.len > 0) { + const val = try sema.addStr(buffer.items); + try builder.materialize(.{ .value = val }); + try builder.addByteOp(.string_append); + } + if (i < scratch.items.len) { + const val = scratch.items[i].unknown; + try builder.materialize(val); + try builder.addByteOp(.string_append); + i += 1; + } } try builder.addByteOp(.string_freeze); @@ -456,14 +539,15 @@ fn irUnaryOp( op: Story.Opcode, ) InnerError!ValueInfo { const gpa = sema.gpa; - const data = sema.ir.instructions[@intFromEnum(inst)].data.un; const ip = &sema.module.intern_pool; + const data = sema.ir.instructions[@intFromEnum(inst)].data.un; const lhs = sema.resolveInst(data.lhs); //const lhs_src: SrcLoc = .{ .src_offset = 0 }; //try sema.analyzeArithmeticArg(builder, lhs, lhs_src); if (sema.resolveValue(lhs)) |lhs_info| { switch (lhs_info.unwrap(ip)) { + .nil => unreachable, .bool => |boolean| { const new_value = switch (op) { .not => !boolean, @@ -488,10 +572,11 @@ fn irUnaryOp( }; return .{ .value = try ip.getOrPutFloat(gpa, new_value) }; }, + .str => unreachable, } } - try builder.ensureLoad(lhs); + try builder.materialize(lhs); try builder.addByteOp(op); return .stack; } @@ -517,15 +602,17 @@ fn irBinaryOp( const lhs_coerced = lhs_value.unwrap(ip).coerce(); const rhs_coerced = rhs_value.unwrap(ip).coerce(); return switch (try foldConstant(lhs_coerced, rhs_coerced, op)) { + .nil => unreachable, .bool => |boolean| .{ .value = ip.getOrPutBool(boolean) }, .int => |int| .{ .value = try ip.getOrPutInt(gpa, int) }, .float => |float| .{ .value = try ip.getOrPutFloat(gpa, float) }, + .str => unreachable, }; } } - try builder.ensureLoad(lhs); - try builder.ensureLoad(rhs); + try builder.materialize(lhs); + try builder.materialize(rhs); try builder.addByteOp(op); return .stack; } @@ -580,7 +667,7 @@ fn irLogicalOp( } const else_label = try builder.addLabel(); - try builder.ensureLoad(lhs); + try builder.materialize(lhs); try builder.addFixup(if (is_logical_or) .jmp_t else .jmp_f, else_label); try builder.addByteOp(.pop); @@ -591,7 +678,7 @@ fn irLogicalOp( }; const rhs = try sema.analyzeInlineBody(builder, &block, body); - try builder.ensureLoad(rhs); + try builder.materialize(rhs); builder.setLabel(else_label); return .none; } @@ -604,13 +691,14 @@ fn irDeclRef( inline_block: bool, ) InnerError!ValueInfo { const data = sema.ir.instructions[@intFromEnum(inst)].data.str_tok; - const ip_index = try sema.module.intern_pool.getOrPutStr(sema.gpa, data.start); + const decl_name = try sema.addStr(sema.ir.nullTerminatedString(data.start)); const src_loc: SrcLoc = .{ .src_offset = data.src_offset }; - const ident = try sema.lookupIdentifier(builder, ip_index, src_loc); + const ident = try sema.lookupIdentifier(builder, decl_name, src_loc); + if (inline_block) { switch (ident.tag) { .knot, .stitch, .function => unreachable, - .var_const => return sema.resolveGlobalDecl(builder, block, ip_index, src_loc), + .var_const => return sema.resolveGlobalDecl(builder, block, decl_name, src_loc), .var_mut => return sema.fail( src_loc, "global variable assignments cannot refer to other variables", @@ -619,11 +707,11 @@ fn irDeclRef( } } else { switch (ident.tag) { - .knot => return .{ .knot = ip_index }, - .stitch => return .{ .stitch = ip_index }, - .function => return .{ .function = ip_index }, - .var_mut => return .{ .variable = ip_index }, - .var_const => return .{ .variable = ip_index }, + .knot => return .{ .knot = decl_name }, + .stitch => return .{ .stitch = decl_name }, + .function => return .{ .function = decl_name }, + .var_mut => return .{ .variable = decl_name }, + .var_const => return .{ .variable = decl_name }, } } } @@ -641,7 +729,7 @@ fn irStore(sema: *Sema, builder: *Builder, inst: Ir.Inst.Index) InnerError!void const rhs = sema.resolveInst(data.rhs); const src: SrcLoc = .{ .src_offset = 0 }; - try builder.ensureLoad(rhs); + try builder.materialize(rhs); switch (lhs) { .none => unreachable, @@ -665,7 +753,7 @@ fn irLoad(sema: *Sema, builder: *Builder, inst: Ir.Inst.Index) InnerError!ValueI const lhs = sema.resolveInst(data.lhs); if (lhs == .value) return lhs; - try builder.ensureLoad(lhs); + try builder.materialize(lhs); return .stack; } @@ -682,7 +770,7 @@ fn irCondBr( const else_label = try builder.addLabel(); const end_label = try builder.addLabel(); const condition = sema.resolveInst(extra.data.condition); - if (condition != .none) try builder.ensureLoad(condition); + if (condition != .none) try builder.materialize(condition); try builder.addFixup(.jmp_f, else_label); try builder.addByteOp(.pop); @@ -734,7 +822,7 @@ fn irSwitchBlock( defer case_labels.deinit(gpa); const condition = sema.resolveInst(extra.data.operand); - if (condition != .none) try builder.ensureLoad(condition); + if (condition != .none) try builder.materialize(condition); var switch_block: Block = .{ .parent_block = parent_block, @@ -752,7 +840,7 @@ fn irSwitchBlock( case_labels.appendAssumeCapacity(case_label_index); try builder.addConstOp(.load, @intCast(cmp_var)); - try builder.ensureLoad(case_expr); + try builder.materialize(case_expr); try builder.addByteOp(.cmp_eq); try builder.addFixup(.jmp_t, case_label_index); try builder.addByteOp(.pop); @@ -805,7 +893,7 @@ fn irContentPush(sema: *Sema, builder: *Builder, inst: Ir.Inst.Index) InnerError const data = sema.ir.instructions[@intFromEnum(inst)].data.un; const lhs = sema.resolveInst(data.lhs); if (lhs == .none) return error.AnalysisFail; - if (lhs != .stack) try builder.ensureLoad(lhs); + if (lhs != .stack) try builder.materialize(lhs); try builder.addByteOp(.stream_push); } @@ -881,7 +969,7 @@ fn irRet(sema: *Sema, builder: *Builder, inst: Ir.Inst.Index) InnerError!void { const data = sema.ir.instructions[@intFromEnum(inst)].data.un; const lhs = sema.resolveInst(data.lhs); if (lhs != .none) { - try builder.ensureLoad(lhs); + try builder.materialize(lhs); } else { try builder.addByteOp(.stream_glue); } @@ -924,10 +1012,13 @@ fn irCall( .field => { const callee = sema.resolveInst(extra.data.obj_ptr); const target = try analyzeCallTarget(sema, builder, callee_src, callee); - const ip_index = try sema.module.intern_pool.getOrPutStr( + const str = try sema.module.intern_pool.getOrPutString( sema.gpa, - extra.data.field_name_start, + extra.data.field_name_start.bytes(sema.ir), ); + const ip_index = try sema.module.intern_pool.getOrPutValue(sema.gpa, .{ + .str = str, + }); const e = try sema.lookupInNamespace(target.namespace.?, ip_index, callee_src); switch (e.tag) { .function => { @@ -947,7 +1038,7 @@ fn irCall( defer arg_start = arg_end; const arg_body = body[arg_start..arg_end]; const arg_value = try sema.analyzeInlineBody(builder, block, @ptrCast(arg_body)); - if (arg_value != .none) try builder.ensureLoad(arg_value); + if (arg_value != .none) try builder.materialize(arg_value); } try builder.addConstOp(.call, @intCast(args_len)); return .stack; @@ -977,10 +1068,13 @@ fn irDivert( .field => { const callee = sema.resolveInst(extra.data.obj_ptr); const target = try analyzeDivertTarget(sema, builder, callee_src, callee); - const ip_index = try sema.module.intern_pool.getOrPutStr( + const str = try sema.module.intern_pool.getOrPutString( sema.gpa, - extra.data.field_name_start, + extra.data.field_name_start.bytes(sema.ir), ); + const ip_index = try sema.module.intern_pool.getOrPutValue(sema.gpa, .{ + .str = str, + }); const e = try sema.lookupInNamespace(target.namespace.?, ip_index, callee_src); switch (e.tag) { .knot => { @@ -1000,7 +1094,7 @@ fn irDivert( defer arg_start = arg_end; const arg_body = body[arg_start..arg_end]; const arg_value = try analyzeInlineBody(sema, builder, block, @ptrCast(arg_body)); - if (arg_value != .none) try builder.ensureLoad(arg_value); + if (arg_value != .none) try builder.materialize(arg_value); } try builder.addConstOp(.divert, @intCast(args_len)); } @@ -1057,7 +1151,7 @@ fn analyzeCallTarget( ) !Module.Namespace.Decl { switch (callee) { .function => |ip_index| { - try builder.ensureLoad(callee); + try builder.materialize(callee); return sema.lookupIdentifier(builder, ip_index, src); }, else => return sema.fail(src, "invalid call target", .{}), @@ -1072,7 +1166,7 @@ fn analyzeDivertTarget( ) !Module.Namespace.Decl { switch (callee) { .knot => |ip_index| { - try builder.ensureLoad(callee); + try builder.materialize(callee); return sema.lookupIdentifier(builder, ip_index, src); }, else => return sema.fail(src, "invalid divert target", .{}), @@ -1245,12 +1339,17 @@ fn analyzeNestedDecl( const data = sema.ir.instructions[@intFromEnum(inst)].data.payload; const extra = sema.ir.extraData(Ir.Inst.Declaration, data.extra_index).data; const decl = sema.ir.instructions[@intFromEnum(extra.value)]; - const decl_name = try sema.module.intern_pool.getOrPutStr(sema.gpa, extra.name); + + const decl_name = try sema.module.intern_pool.getOrPutString( + sema.gpa, + sema.ir.nullTerminatedString(extra.name), + ); + const ip_index = try sema.module.intern_pool.getOrPutValue(sema.gpa, .{ .str = decl_name }); switch (decl.tag) { .decl_stitch => { const child_namespace = try sema.module.createNamespace(namespace); - try namespace.decls.put(sema.arena, decl_name, .{ + try namespace.decls.put(sema.arena, ip_index, .{ .tag = .knot, .decl_inst = extra.value, .args_count = 0, @@ -1258,7 +1357,7 @@ fn analyzeNestedDecl( }); try sema.module.queueWorkItem(.{ .tag = .stitch, - .decl_name = decl_name, + .decl_name = ip_index, .inst_index = extra.value, .namespace = child_namespace, }); @@ -1267,15 +1366,16 @@ fn analyzeNestedDecl( } } -fn scanTopLevelDecl( - sema: *Sema, - namespace: *Module.Namespace, - inst: Ir.Inst.Index, -) !void { +fn scanTopLevelDecl(sema: *Sema, namespace: *Module.Namespace, inst: Ir.Inst.Index) !void { const data = sema.ir.instructions[@intFromEnum(inst)].data.payload; const extra = sema.ir.extraData(Ir.Inst.Declaration, data.extra_index).data; const decl_inst = sema.ir.instructions[@intFromEnum(extra.value)]; - const decl_name = try sema.module.intern_pool.getOrPutStr(sema.gpa, extra.name); + + const decl_str = try sema.module.intern_pool.getOrPutString( + sema.gpa, + sema.ir.nullTerminatedString(extra.name), + ); + const decl_name = try sema.module.intern_pool.getOrPutValue(sema.gpa, .{ .str = decl_str }); const src_loc: SrcLoc = .{ .src_offset = data.src_offset }; switch (decl_inst.tag) { @@ -1306,6 +1406,7 @@ fn scanTopLevelDecl( gop.value_ptr.* = .{ .tag = .knot, .decl_inst = extra.value, + // FIXME: This will be necessary for argument count checks. .args_count = 0, .namespace = child_namespace, }; @@ -1331,6 +1432,7 @@ fn scanTopLevelDecl( gop.value_ptr.* = .{ .tag = .stitch, .decl_inst = extra.value, + // FIXME: This will be necessary for argument count checks. .args_count = 0, .namespace = child_namespace, }; @@ -1351,6 +1453,7 @@ fn scanTopLevelDecl( gop.value_ptr.* = .{ .tag = .function, .decl_inst = extra.value, + // FIXME: This will be necessary for argument count checks. .args_count = 0, .namespace = child_namespace, }; diff --git a/src/Story.zig b/src/Story.zig index c8c0a48..fbde3ad 100644 --- a/src/Story.zig +++ b/src/Story.zig @@ -35,7 +35,7 @@ code_chunks: std.ArrayListUnmanaged(*Object.Code) = .empty, gc_objects: std.SinglyLinkedList = .{}, /// Global constants pool. constants_pool: []const Value = &.{}, -// FIXME: This was a hack to keep string bytes alive. +/// Interned string bytes. string_bytes: []const u8 = &.{}, dump_writer: ?*std.Io.Writer = null, @@ -304,16 +304,19 @@ pub const Value = union(enum) { } pub fn format(value: Value, writer: *std.Io.Writer) error{WriteFailed}!void { + var scratch_buffer: [64]u8 = undefined; switch (value) { .nil => try writer.writeAll(value.tagBytes()), .bool => |boolean| try writer.writeAll(if (boolean) "true" else "false"), .int => |int| try writer.print("{d}", .{int}), .float => |float| { - var buf: [64]u8 = undefined; - if (std.math.isNan(float)) return writer.writeAll("NaN"); - if (std.math.isInf(float)) return writer.writeAll(if (float > 0) "Inf" else "-Inf"); + if (std.math.isNan(float)) + return writer.writeAll("NaN"); - var str = std.fmt.bufPrint(&buf, "{d:.7}", .{float}) catch |err| switch (err) { + if (std.math.isInf(float)) + return writer.writeAll(if (float > 0) "Inf" else "-Inf"); + + var str = std.fmt.bufPrint(&scratch_buffer, "{d:.7}", .{float}) catch |err| switch (err) { error.NoSpaceLeft => unreachable, else => |e| return e, }; @@ -322,14 +325,14 @@ pub const Value = union(enum) { while (end > dot + 2 and str[end - 1] == '0') end -= 1; str = str[0..end]; } - try writer.writeAll(str); + return writer.writeAll(str); }, .object => |object| switch (object.tag) { .string => { const typed: *const Object.String = @ptrCast(object); - try writer.writeAll(typed.toSlice()); + return writer.writeAll(typed.toSlice()); }, - else => try writer.print("<{s} {*}>", .{ object.tag.tagBytes(), object }), + else => return writer.print("<{s} {*}>", .{ object.tag.tagBytes(), object }), }, } } diff --git a/src/compile.zig b/src/compile.zig index 46d76d9..1cec0e4 100644 --- a/src/compile.zig +++ b/src/compile.zig @@ -4,6 +4,7 @@ const AstGen = @import("AstGen.zig"); const Sema = @import("Sema.zig"); const Ir = @import("Ir.zig"); const Story = @import("Story.zig"); +const InternPool = @import("InternPool.zig"); const Value = Story.Value; const Object = Story.Object; const assert = std.debug.assert; @@ -79,91 +80,6 @@ test IntrusiveQueue { try testing.expect(q.pop() == null); } -pub const InternPool = struct { - values: std.ArrayListUnmanaged(Key) = .empty, - values_map: std.AutoHashMapUnmanaged(Key, Index) = .empty, - code_chunks: std.ArrayListUnmanaged(*Module.CodeChunk) = .empty, - - pub const Index = enum(u32) { - bool_true, - bool_false, - none, - _, - }; - - pub const Key = union(enum) { - bool: bool, - int: i64, - float: u64, // We can't hash floating point numbers. - str: Ir.NullTerminatedString, - }; - - pub fn getOrPutValue( - ip: *InternPool, - gpa: std.mem.Allocator, - key: Key, - ) error{OutOfMemory}!Index { - if (ip.values_map.get(key)) |index| { - return index; - } else { - const new_index: Index = @enumFromInt(ip.values.items.len); - try ip.values.append(gpa, key); - try ip.values_map.put(gpa, key, new_index); - return new_index; - } - } - - pub fn getOrPutBool(_: *InternPool, value: bool) Index { - return if (value) return .bool_true else .bool_false; - } - - pub fn getOrPutInt( - ip: *InternPool, - gpa: std.mem.Allocator, - value: i64, - ) error{OutOfMemory}!Index { - return ip.getOrPutValue(gpa, .{ .int = value }); - } - - pub fn getOrPutFloat( - ip: *InternPool, - gpa: std.mem.Allocator, - value: f64, - ) error{OutOfMemory}!Index { - return ip.getOrPutValue(gpa, .{ .float = @bitCast(value) }); - } - - pub fn getOrPutStr( - ip: *InternPool, - gpa: std.mem.Allocator, - value: Ir.NullTerminatedString, - ) error{OutOfMemory}!Index { - return ip.getOrPutValue(gpa, .{ .str = value }); - } - - pub fn getStrBytes(ip: *InternPool, ir: Ir, index: Index) []const u8 { - assert(ip.values.items.len > @intFromEnum(index)); - const c = ip.values.items[@intFromEnum(index)]; - return ir.nullTerminatedString(c.str); - } - - pub fn getCodeChunk(ip: *InternPool, index: Module.CodeChunk.Index) *Module.CodeChunk { - assert(ip.code_chunks.items.len > @intFromEnum(index)); - return ip.code_chunks.items[@intFromEnum(index)]; - } - - pub fn deinit(ip: *InternPool, gpa: std.mem.Allocator) void { - for (ip.code_chunks.items) |chunk| { - chunk.constants.deinit(gpa); - chunk.bytecode.deinit(gpa); - } - ip.values.deinit(gpa); - ip.values_map.deinit(gpa); - ip.code_chunks.deinit(gpa); - ip.* = undefined; - } -}; - // TODO: Revisit this. We might not need this at all. pub const WorkItem = struct { tag: Tag, @@ -186,37 +102,13 @@ pub const Module = struct { arena: std.mem.Allocator, tree: Ast, ir: Ir, - globals: std.ArrayListUnmanaged(Global) = .empty, - knots: std.ArrayListUnmanaged(Knot) = .empty, - stitches: std.ArrayListUnmanaged(Stitch) = .empty, + globals: std.ArrayListUnmanaged(InternPool.Global) = .empty, + knots: std.ArrayListUnmanaged(InternPool.Knot) = .empty, + stitches: std.ArrayListUnmanaged(InternPool.Stitch) = .empty, errors: std.ArrayListUnmanaged(Error) = .empty, intern_pool: InternPool = .{}, work_queue: WorkQueue = .{}, - pub const Global = struct { - key: InternPool.Index, - value: InternPool.Index, - }; - - pub const Knot = struct { - name_index: InternPool.Index, - code_index: CodeChunk.Index, - - pub const Index = enum(u32) { - _, - }; - }; - - pub const Stitch = struct { - knot_index: ?Knot.Index, - code_index: CodeChunk.Index, - name_index: InternPool.Index, - - pub const Index = enum(u32) { - _, - }; - }; - pub const Namespace = struct { parent: ?*Namespace, decls: std.AutoArrayHashMapUnmanaged(InternPool.Index, Decl), @@ -244,18 +136,6 @@ pub const Module = struct { }; }; - pub const CodeChunk = struct { - args_count: u32 = 0, - locals_count: u32 = 0, - stack_size: u32 = 0, - constants: std.ArrayListUnmanaged(u8) = .empty, - bytecode: std.ArrayListUnmanaged(u8) = .empty, - - pub const Index = enum(u32) { - _, - }; - }; - pub const Error = struct { line: usize, column: usize, @@ -270,7 +150,7 @@ pub const Module = struct { const extra = mod.ir.extraData(Ir.Inst.Block, data.extra_index); const top_level_decls = mod.ir.bodySlice(extra.end, extra.data.body_len); - var knot_index: ?Knot.Index = null; + var knot_index: ?InternPool.Knot.Index = null; var sema: Sema = .{ .module = mod, .gpa = gpa, @@ -284,9 +164,7 @@ pub const Module = struct { try sema.scanTopLevelDecls(file_scope, top_level_decls); while (mod.work_queue.pop()) |work_unit| { - const chunk_index = mod.intern_pool.code_chunks.items.len; const code_chunk = try mod.createCodeChunk(); - try mod.intern_pool.code_chunks.append(gpa, code_chunk); var builder: Sema.Builder = .{ .sema = &sema, @@ -305,7 +183,7 @@ pub const Module = struct { knot_index = @enumFromInt(mod.knots.items.len); try mod.knots.append(gpa, .{ .name_index = work_unit.decl_name, - .code_index = @enumFromInt(chunk_index), + .code_chunk = code_chunk, }); }, .stitch => { @@ -316,7 +194,7 @@ pub const Module = struct { try mod.stitches.append(gpa, .{ .knot_index = knot_index, .name_index = work_unit.decl_name, - .code_index = @enumFromInt(chunk_index), + .code_chunk = code_chunk, }); }, .function => { @@ -327,7 +205,7 @@ pub const Module = struct { try mod.stitches.append(gpa, .{ .knot_index = null, .name_index = work_unit.decl_name, - .code_index = @enumFromInt(chunk_index), + .code_chunk = code_chunk, }); }, } @@ -396,9 +274,10 @@ pub const Module = struct { }); } } else { + try module.intern_pool.string_bytes.append(gpa, 0); try module.intern_pool.values.append(gpa, .{ .bool = true }); try module.intern_pool.values.append(gpa, .{ .bool = false }); - try module.intern_pool.values.append(gpa, .{ .str = @enumFromInt(0) }); + try module.intern_pool.values.append(gpa, .{ .str = .empty }); // TODO: Revisit this. module.generateFile() catch |err| switch (err) { error.OutOfMemory => return error.OutOfMemory, @@ -409,86 +288,90 @@ pub const Module = struct { return module; } - fn makeValueFromInterned( - mod: *Module, - story: *Story, - value: InternPool.Key, - ) !Value { - switch (value) { - .bool => |boolean| return .{ .bool = boolean }, - .int => |int| return .{ .int = @intCast(int) }, - .float => |float| return .{ .float = @bitCast(float) }, - .str => |str| { - const bytes = mod.ir.nullTerminatedString(str); + fn storyStr(story: *Story, index: InternPool.NullTerminatedString) [:0]const u8 { + const slice = story.string_bytes[@intFromEnum(index)..]; + return slice[0..std.mem.indexOfScalar(u8, slice, 0).? :0]; + } + + fn makeValueFromInterned(story: *Story, value: InternPool.Key) !Story.Value { + return switch (value) { + .bool => |boolean| .{ .bool = boolean }, + .int => |int| .{ .int = @intCast(int) }, + .float => |float| .{ .float = @bitCast(float) }, + .str => |index| blk: { const str_object = try Object.String.create(story, .{ - .bytes = bytes, + .bytes = storyStr(story, index), }); - return .{ .object = &str_object.base }; + break :blk .{ .object = &str_object.base }; }, + }; + } + + fn makeKnotObject(story: *Story, name_bytes: []const u8, code: *InternPool.CodeChunk) !*Object.Knot { + return Object.Knot.create(story, .{ + .name = name_bytes, + .code = try Object.Code.create(story, .{ + .args_count = @intCast(code.args_count), + .locals_count = @intCast(code.locals_count), + .stack_size = @intCast(code.stack_size), + .constants = try code.constants.toOwnedSlice(story.gpa), + .code_bytes = try code.bytecode.toOwnedSlice(story.gpa), + }), + }); + } + + fn makeConstantsPool(mod: *Module, story: *Story) ![]const Story.Value { + const ip = &mod.intern_pool; + const gpa = mod.gpa; + var constants_pool: std.ArrayListUnmanaged(Value) = .empty; + defer constants_pool.deinit(mod.gpa); + try constants_pool.ensureUnusedCapacity(mod.gpa, ip.values.items.len); + + for (ip.values.items) |value| { + const obj = try makeValueFromInterned(story, value); + constants_pool.appendAssumeCapacity(obj); } + return constants_pool.toOwnedSlice(gpa); } pub fn setupStoryRuntime(mod: *Module, gpa: std.mem.Allocator, story: *Story) !void { - assert(mod.errors.items.len == 0); - const constants_len = mod.intern_pool.values.items.len; - var constants_pool: std.ArrayListUnmanaged(Value) = .empty; - try constants_pool.ensureUnusedCapacity(gpa, constants_len); - defer constants_pool.deinit(gpa); + const ip = &mod.intern_pool; + story.string_bytes = try ip.string_bytes.toOwnedSlice(mod.gpa); + story.constants_pool = try makeConstantsPool(mod, story); - for (mod.intern_pool.values.items) |value| { - const obj = try mod.makeValueFromInterned(story, value); - constants_pool.appendAssumeCapacity(obj); - } for (mod.globals.items) |global| { - const key_bytes = mod.intern_pool.getStrBytes(mod.ir, global.key); - const value = mod.intern_pool.values.items[@intFromEnum(global.value)]; - const obj = try mod.makeValueFromInterned(story, value); + const ip_key = ip.internedValue(global.key); + const ip_value = ip.internedValue(global.value); + + const key_bytes = storyStr(story, ip_key.str); + const obj = try makeValueFromInterned(story, ip_value); try story.globals.put(gpa, key_bytes, obj); } for (mod.knots.items) |knot| { - const name_bytes = mod.intern_pool.getStrBytes(mod.ir, knot.name_index); - const code_chunk = mod.intern_pool.getCodeChunk(knot.code_index); - const knot_object = try Object.Knot.create(story, .{ - .name = name_bytes, - .code = try Object.Code.create(story, .{ - .args_count = @intCast(code_chunk.args_count), - .locals_count = @intCast(code_chunk.locals_count), - .stack_size = @intCast(code_chunk.stack_size), - .constants = try code_chunk.constants.toOwnedSlice(gpa), - .code_bytes = try code_chunk.bytecode.toOwnedSlice(gpa), - }), - }); - const value: Value = .{ .object = &knot_object.base }; - try story.globals.put(gpa, name_bytes, value); + const ip_key = ip.internedValue(knot.name_index); + const key_bytes = storyStr(story, ip_key.str); + + const knot_object = try makeKnotObject(story, key_bytes, knot.code_chunk); + const value: Story.Value = .{ .object = &knot_object.base }; + try story.globals.put(gpa, key_bytes, value); } for (mod.stitches.items) |stitch| { - const name_bytes = mod.intern_pool.getStrBytes(mod.ir, stitch.name_index); - const code_chunk = mod.intern_pool.getCodeChunk(stitch.code_index); - const stitch_obj = try Object.Knot.create(story, .{ - .name = name_bytes, - .code = try Object.Code.create(story, .{ - .args_count = @intCast(code_chunk.args_count), - .locals_count = @intCast(code_chunk.locals_count), - .stack_size = @intCast(code_chunk.stack_size), - .constants = try code_chunk.constants.toOwnedSlice(gpa), - .code_bytes = try code_chunk.bytecode.toOwnedSlice(gpa), - }), - }); + const ip_key = ip.internedValue(stitch.name_index); + const key_bytes = storyStr(story, ip_key.str); + const stitch_object = try makeKnotObject(story, key_bytes, stitch.code_chunk); + if (stitch.knot_index) |index| { const parent_knot = mod.knots.items[@intFromEnum(index)]; - const parent_knot_name = mod.intern_pool.getStrBytes(mod.ir, parent_knot.name_index); + const s_key_value = ip.internedValue(parent_knot.name_index); + const parent_knot_name = storyStr(story, s_key_value.str); const parent_knot_value = story.globals.get(parent_knot_name).?; const parent_knot_obj: *Object.Knot = @ptrCast(parent_knot_value.object); - try parent_knot_obj.members.put(gpa, name_bytes, &stitch_obj.base); + try parent_knot_obj.members.put(gpa, key_bytes, &stitch_object.base); } else { - const value: Value = .{ .object = &stitch_obj.base }; - try story.globals.put(gpa, name_bytes, value); + const value: Story.Value = .{ .object = &stitch_object.base }; + try story.globals.put(gpa, key_bytes, value); } } - - story.constants_pool = try constants_pool.toOwnedSlice(gpa); - story.string_bytes = mod.ir.string_bytes; - mod.ir.string_bytes = &.{}; } pub fn createNamespace(mod: *Module, parent: ?*Namespace) error{OutOfMemory}!*Namespace { @@ -500,8 +383,8 @@ pub const Module = struct { return ns; } - pub fn createCodeChunk(mod: *Module) error{OutOfMemory}!*CodeChunk { - const chunk = try mod.arena.create(CodeChunk); + pub fn createCodeChunk(mod: *Module) error{OutOfMemory}!*InternPool.CodeChunk { + const chunk = try mod.arena.create(InternPool.CodeChunk); chunk.* = .{}; return chunk; } @@ -589,3 +472,8 @@ pub fn findLineColumn(source: []const u8, byte_offset: usize) Loc { .source_line = source[line_start..i], }; } + +fn hack(bytes: []const u8, index: InternPool.NullTerminatedString) [:0]const u8 { + const slice = bytes[@intFromEnum(index)..]; + return slice[0..std.mem.indexOfScalar(u8, slice, 0).? :0]; +}