const std = @import("std"); const Ir = @import("Ir.zig"); const Story = @import("Story.zig"); const Object = Story.Object; const assert = std.debug.assert; const Sema = @This(); gpa: std.mem.Allocator, ir: *const Ir, constants: std.ArrayListUnmanaged(CompiledStory.Constant) = .empty, constant_map: std.AutoHashMapUnmanaged(CompiledStory.Constant, u32) = .empty, globals: std.ArrayListUnmanaged(u32) = .empty, knots: std.ArrayListUnmanaged(CompiledStory.Knot) = .empty, const InnerError = error{ OutOfMemory, TooManyConstants, InvalidJump, }; const Ref = union(enum) { bool_true, bool_false, none, index: u32, constant: u32, global: u32, local: u32, }; fn deinit(sema: *Sema) void { const gpa = sema.gpa; sema.constants.deinit(gpa); sema.constant_map.deinit(gpa); sema.globals.deinit(gpa); sema.knots.deinit(gpa); sema.* = undefined; } fn fail(_: *Sema, message: []const u8) InnerError { @panic(message); } fn getConstant(sema: *Sema, data: CompiledStory.Constant) !Ref { const gpa = sema.gpa; if (sema.constant_map.get(data)) |index| { return .{ .constant = index }; } else { const index = sema.constants.items.len; try sema.constants.append(gpa, data); try sema.constant_map.put(gpa, data, @intCast(index)); return .{ .constant = @intCast(index) }; } } fn addGlobal(sema: *Sema, name: Ir.NullTerminatedString) !Ref { const gpa = sema.gpa; const interned = try sema.getConstant(.{ .string = name }); try sema.globals.append(gpa, interned.constant); return .{ .global = interned.constant }; } fn getGlobal(sema: *Sema, name: Ir.NullTerminatedString) !Ref { const interned = try sema.getConstant(.{ .string = name }); for (sema.ir.globals) |global| { if (name == global.name) { return .{ .global = interned.constant }; } } return sema.fail("unknown global variable"); } fn irInteger(sema: *Sema, inst: Ir.Inst.Index) InnerError!Ref { const data = sema.ir.instructions[@intFromEnum(inst)].data.integer; return sema.getConstant(.{ .integer = data.value }); } fn irString(sema: *Sema, inst: Ir.Inst.Index) InnerError!Ref { const data = sema.ir.instructions[@intFromEnum(inst)].data.string; return sema.getConstant(.{ .string = data.start }); } fn irUnary( sema: *Sema, chunk: *Chunk, inst: Ir.Inst.Index, op: Story.Opcode, ) InnerError!Ref { const data = sema.ir.instructions[@intFromEnum(inst)].data.un; const lhs = chunk.resolveInst(data.lhs); _ = try chunk.doLoad(lhs); return chunk.addByteOp(op); } fn irBinary( sema: *Sema, chunk: *Chunk, inst: Ir.Inst.Index, op: Story.Opcode, ) InnerError!Ref { const data = sema.ir.instructions[@intFromEnum(inst)].data.bin; const lhs = chunk.resolveInst(data.lhs); const rhs = chunk.resolveInst(data.rhs); _ = try chunk.doLoad(lhs); _ = try chunk.doLoad(rhs); return chunk.addByteOp(op); } fn irAlloc(_: *Sema, chunk: *Chunk, _: Ir.Inst.Index) InnerError!Ref { const local_index = chunk.knot.stack_size; // TODO: Add constraints on how many temporaries we can have. // max(u8) or max(u16) are most likey appropriate. chunk.knot.stack_size += 1; return .{ .local = local_index }; } fn irStore(sema: *Sema, chunk: *Chunk, inst: Ir.Inst.Index) InnerError!void { const data = sema.ir.instructions[@intFromEnum(inst)].data.bin; const lhs = chunk.resolveInst(data.lhs); const rhs = chunk.resolveInst(data.rhs); _ = try chunk.doLoad(rhs); switch (lhs) { .bool_true, .bool_false => unreachable, // TODO: "Cannot assign to boolean" .none => unreachable, .constant => |_| unreachable, // TODO: "Cannot assign to constant" .global => |id| _ = try chunk.addConstOp(.store_global, @intCast(id)), .local => |id| _ = try chunk.addConstOp(.store, @intCast(id)), .index => unreachable, } _ = try chunk.addByteOp(.pop); } // TODO: Check what the target is! fn irLoad(sema: *Sema, chunk: *Chunk, inst: Ir.Inst.Index) InnerError!Ref { const data = sema.ir.instructions[@intFromEnum(inst)].data.un; const lhs = chunk.resolveInst(data.lhs); return chunk.doLoad(lhs); } fn irCondBr(sema: *Sema, chunk: *Chunk, inst: Ir.Inst.Index) InnerError!Ref { const data = sema.ir.instructions[@intFromEnum(inst)].data.payload; const extra = sema.ir.extraData(Ir.Inst.CondBr, data.payload_index); const then_body = sema.ir.bodySlice(extra.end, extra.data.then_body_len); const else_body = sema.ir.bodySlice(extra.end + then_body.len, extra.data.else_body_len); const else_label = try chunk.addLabel(); const end_label = try chunk.addLabel(); const condition = chunk.resolveInst(extra.data.condition); _ = try chunk.doLoad(condition); try chunk.addFixup(.jmp_f, else_label); _ = try chunk.addByteOp(.pop); try blockBodyInner(sema, chunk, then_body); try chunk.addFixup(.jmp, end_label); chunk.setLabel(else_label); _ = try chunk.addByteOp(.pop); try blockBodyInner(sema, chunk, else_body); chunk.setLabel(end_label); return .none; } fn irBreak(sema: *Sema, inst: Ir.Inst.Index) InnerError!void { _ = sema; _ = inst; } fn irBlock(sema: *Sema, chunk: *Chunk, inst: Ir.Inst.Index) InnerError!void { const data = sema.ir.instructions[@intFromEnum(inst)].data.payload; const extra = sema.ir.extraData(Ir.Inst.Block, data.payload_index); const body = sema.ir.bodySlice(extra.end, extra.data.body_len); return blockBodyInner(sema, chunk, body); } fn irSwitchBr(sema: *Sema, chunk: *Chunk, inst: Ir.Inst.Index) InnerError!void { const data = sema.ir.instructions[@intFromEnum(inst)].data.payload; const extra = sema.ir.extraData(Ir.Inst.SwitchBr, data.payload_index); const cases_slice = sema.ir.bodySlice(extra.end, extra.data.cases_len); var case_labels: std.ArrayListUnmanaged(usize) = .empty; try case_labels.ensureUnusedCapacity(sema.gpa, cases_slice.len + 1); defer case_labels.deinit(sema.gpa); // TODO: Do something with this value? //const condition = chunk.resolveInst(extra.data.operand); const exit_label = try chunk.addLabel(); const cmp_var = chunk.knot.stack_size; chunk.knot.stack_size += 1; _ = try chunk.addConstOp(.store, @intCast(cmp_var)); for (cases_slice) |case_index| { const case_extra = sema.ir.extraData(Ir.Inst.SwitchBr.Case, @intFromEnum(case_index)); const case_expr = chunk.resolveInst(case_extra.data.operand); const case_label_index = try chunk.addLabel(); case_labels.appendAssumeCapacity(case_label_index); _ = try chunk.addConstOp(.load, @intCast(cmp_var)); _ = try chunk.doLoad(case_expr); _ = try chunk.addByteOp(.cmp_eq); _ = try chunk.addFixup(.jmp_t, case_label_index); _ = try chunk.addByteOp(.pop); } const else_label = try chunk.addLabel(); try chunk.addFixup(.jmp, else_label); for (cases_slice, case_labels.items) |case_index, label_index| { const case_extra = sema.ir.extraData(Ir.Inst.SwitchBr.Case, @intFromEnum(case_index)); const case_body = sema.ir.bodySlice(case_extra.end, case_extra.data.body_len); chunk.setLabel(label_index); _ = try chunk.addByteOp(.pop); try blockBodyInner(sema, chunk, case_body); try chunk.addFixup(.jmp, exit_label); } const else_body = sema.ir.bodySlice( extra.end + extra.data.cases_len, extra.data.else_body_len, ); chunk.setLabel(else_label); try blockBodyInner(sema, chunk, else_body); chunk.setLabel(exit_label); } fn irContentPush(sema: *Sema, chunk: *Chunk, inst: Ir.Inst.Index) InnerError!Ref { const data = sema.ir.instructions[@intFromEnum(inst)].data.un; const lhs = chunk.resolveInst(data.lhs); _ = try chunk.doLoad(lhs); return chunk.addByteOp(.stream_push); } fn irContentFlush(_: *Sema, chunk: *Chunk, _: Ir.Inst.Index) InnerError!Ref { return chunk.addByteOp(.stream_flush); } fn irChoiceBr(sema: *Sema, chunk: *Chunk, inst: Ir.Inst.Index) InnerError!void { const data = sema.ir.instructions[@intFromEnum(inst)].data.payload; const choice_extra = sema.ir.extraData(Ir.Inst.ChoiceBr, data.payload_index); const options_slice = sema.ir.bodySlice(choice_extra.end, choice_extra.data.cases_len); var branch_labels: std.ArrayListUnmanaged(usize) = .empty; try branch_labels.ensureUnusedCapacity(sema.gpa, options_slice.len + 1); defer branch_labels.deinit(sema.gpa); for (options_slice) |option_index| { const case_extra = sema.ir.extraData(Ir.Inst.ChoiceBr.Case, @intFromEnum(option_index)); const case_label = try chunk.addLabel(); branch_labels.appendAssumeCapacity(case_label); switch (case_extra.data.operand_1) { .none => {}, else => |content| { const content_inst = chunk.resolveInst(content); _ = try chunk.doLoad(content_inst); _ = try chunk.addByteOp(.stream_push); }, } switch (case_extra.data.operand_2) { .none => {}, else => |content| { const content_inst = chunk.resolveInst(content); _ = try chunk.doLoad(content_inst); _ = try chunk.addByteOp(.stream_push); }, } try chunk.addFixupAbsolute(.br_push, case_label); } _ = try chunk.addByteOp(.br_table); _ = try chunk.addByteOp(.br_select_index); _ = try chunk.addByteOp(.br_dispatch); for (options_slice, branch_labels.items) |option_index, label| { const case_extra = sema.ir.extraData(Ir.Inst.ChoiceBr.Case, @intFromEnum(option_index)); const body_slice = sema.ir.bodySlice(case_extra.end, case_extra.data.body_len); chunk.setLabel(label); switch (case_extra.data.operand_1) { .none => {}, else => |content| { const content_inst = chunk.resolveInst(content); _ = try chunk.doLoad(content_inst); _ = try chunk.addByteOp(.stream_push); }, } switch (case_extra.data.operand_3) { .none => {}, else => |content| { const content_inst = chunk.resolveInst(content); _ = try chunk.doLoad(content_inst); _ = try chunk.addByteOp(.stream_push); }, } _ = try chunk.addByteOp(.stream_flush); try blockBodyInner(sema, chunk, body_slice); } } fn irImplicitRet(_: *Sema, chunk: *Chunk, _: Ir.Inst.Index) InnerError!Ref { return chunk.addByteOp(.exit); } fn irDeclRef(sema: *Sema, _: *Chunk, inst: Ir.Inst.Index) InnerError!Ref { const data = sema.ir.instructions[@intFromEnum(inst)].data.string; return sema.getGlobal(data.start); } fn irDeclVar( sema: *Sema, chunk: *Chunk, name: Ir.NullTerminatedString, inst: Ir.Inst.Index, ) InnerError!void { const data = sema.ir.instructions[@intFromEnum(inst)].data.payload; const extra = sema.ir.extraData(Ir.Inst.Block, data.payload_index); const body = sema.ir.bodySlice(extra.end, extra.data.body_len); try blockBodyInner(sema, chunk, body); // FIXME: hack { const last_inst = body[body.len - 1].toRef(); const val = chunk.resolveInst(last_inst); _ = try chunk.doLoad(val); } const global = try sema.addGlobal(name); _ = try chunk.addConstOp(.store_global, @intCast(global.global)); _ = try chunk.addByteOp(.pop); } fn irDeclKnot( sema: *Sema, name_ref: Ir.NullTerminatedString, inst: Ir.Inst.Index, ) InnerError!void { const gpa = sema.gpa; const data = sema.ir.instructions[@intFromEnum(inst)].data.payload; const extra = sema.ir.extraData(Ir.Inst.Knot, data.payload_index); var knot: CompiledStory.Knot = .{ .name = name_ref, .arity = 0, .stack_size = 0, }; var chunk: Chunk = .{ .sema = sema, .knot = &knot, }; defer chunk.fixups.deinit(gpa); defer chunk.labels.deinit(gpa); defer chunk.inst_map.deinit(gpa); const body = sema.ir.bodySlice(extra.end, extra.data.body_len); try blockBodyInner(sema, &chunk, body); _ = try chunk.addByteOp(.exit); try chunk.resolveLabels(); try sema.knots.append(gpa, knot); } fn irDeclaration(sema: *Sema, parent_chunk: ?*Chunk, inst: Ir.Inst.Index) !void { const data = sema.ir.instructions[@intFromEnum(inst)].data.payload; const extra = sema.ir.extraData(Ir.Inst.Declaration, data.payload_index).data; const value_data = sema.ir.instructions[@intFromEnum(extra.value)]; switch (value_data.tag) { .decl_var => try irDeclVar(sema, parent_chunk.?, extra.name, extra.value), .decl_knot => try irDeclKnot(sema, extra.name, extra.value), else => unreachable, } } fn blockBodyInner(sema: *Sema, chunk: *Chunk, body: []const Ir.Inst.Index) InnerError!void { const gpa = sema.gpa; for (body) |inst| { const data = sema.ir.instructions[@intFromEnum(inst)]; const ref: Ref = switch (data.tag) { .file => unreachable, .declaration => { try irDeclaration(sema, chunk, inst); continue; }, .decl_var => unreachable, // handled in declaration() .decl_knot => unreachable, // handled in declaration() .switch_br => { try irSwitchBr(sema, chunk, inst); continue; }, .alloc => try irAlloc(sema, chunk, inst), .store => { try irStore(sema, chunk, inst); continue; }, .load => try irLoad(sema, chunk, inst), .add => try irBinary(sema, chunk, inst, .add), .sub => try irBinary(sema, chunk, inst, .sub), .mul => try irBinary(sema, chunk, inst, .mul), .div => try irBinary(sema, chunk, inst, .div), .mod => try irBinary(sema, chunk, inst, .mod), .neg => try irUnary(sema, chunk, inst, .neg), .not => try irUnary(sema, chunk, inst, .not), .cmp_eq => try irBinary(sema, chunk, inst, .cmp_eq), .cmp_neq => blk: { _ = try irBinary(sema, chunk, inst, .cmp_eq); const tmp = try chunk.addByteOp(.not); break :blk tmp; }, .cmp_lt => try irBinary(sema, chunk, inst, .cmp_lt), .cmp_lte => try irBinary(sema, chunk, inst, .cmp_lte), .cmp_gt => try irBinary(sema, chunk, inst, .cmp_gt), .cmp_gte => try irBinary(sema, chunk, inst, .cmp_gte), .decl_ref => try irDeclRef(sema, chunk, inst), .integer => try irInteger(sema, inst), .string => try irString(sema, inst), .condbr => try irCondBr(sema, chunk, inst), .@"break" => { try irBreak(sema, inst); continue; }, .block => { try irBlock(sema, chunk, inst); continue; }, .content_push => try irContentPush(sema, chunk, inst), .content_flush => try irContentFlush(sema, chunk, inst), .choice_br => { try irChoiceBr(sema, chunk, inst); continue; }, .implicit_ret => try irImplicitRet(sema, chunk, inst), }; try chunk.inst_map.put(gpa, inst, ref); } } fn file(sema: *Sema, inst: Ir.Inst.Index) !void { const data = sema.ir.instructions[@intFromEnum(inst)].data.payload; const extra = sema.ir.extraData(Ir.Inst.Block, data.payload_index); const body = sema.ir.bodySlice(extra.end, extra.data.body_len); // FIXME: We are going to get burned by this if we don't formalize it. // Adding common constants to the constant pool. _ = try sema.getConstant(.{ .integer = 0 }); _ = try sema.getConstant(.{ .integer = 1 }); for (body) |body_index| try irDeclaration(sema, null, body_index); } const Chunk = struct { sema: *Sema, knot: *CompiledStory.Knot, labels: std.ArrayListUnmanaged(Label) = .empty, fixups: std.ArrayListUnmanaged(Fixup) = .empty, inst_map: std.AutoHashMapUnmanaged(Ir.Inst.Index, Ref) = .empty, const dummy_address = 0xffffffff; const Label = struct { code_offset: usize, }; const Fixup = struct { mode: enum { relative, absolute, }, label_index: u32, code_offset: u32, }; fn addByteOp(chunk: *Chunk, op: Story.Opcode) error{OutOfMemory}!Ref { const gpa = chunk.sema.gpa; const bytecode = &chunk.knot.bytecode; const byte_index = bytecode.items.len; try bytecode.append(gpa, @intFromEnum(op)); return .{ .index = @intCast(byte_index) }; } fn addConstOp(chunk: *Chunk, op: Story.Opcode, arg: u8) error{OutOfMemory}!Ref { const gpa = chunk.sema.gpa; const bytecode = &chunk.knot.bytecode; const byte_index = bytecode.items.len; try bytecode.ensureUnusedCapacity(gpa, 2); bytecode.appendAssumeCapacity(@intFromEnum(op)); bytecode.appendAssumeCapacity(arg); return .{ .index = @intCast(byte_index) }; } fn addJumpOp(chunk: *Chunk, op: Story.Opcode) error{OutOfMemory}!Ref { const gpa = chunk.sema.gpa; const bytecode = &chunk.knot.bytecode; try bytecode.ensureUnusedCapacity(gpa, 3); bytecode.appendAssumeCapacity(@intFromEnum(op)); bytecode.appendAssumeCapacity(0xff); bytecode.appendAssumeCapacity(0xff); return .{ .index = @intCast(bytecode.items.len - 2) }; } fn resolveInst(chunk: *Chunk, ref: Ir.Inst.Ref) Ref { if (ref.toIndex()) |index| { return chunk.inst_map.get(index).?; } switch (ref) { .bool_true => return .bool_true, .bool_false => return .bool_false, else => return .{ .constant = @intFromEnum(ref) }, } } fn addFixup(chunk: *Chunk, op: Story.Opcode, label: usize) !void { const code_ref = try chunk.addJumpOp(op); return chunk.fixups.append(chunk.sema.gpa, .{ .mode = .relative, .label_index = @intCast(label), .code_offset = code_ref.index, }); } fn addFixupAbsolute(chunk: *Chunk, op: Story.Opcode, label: usize) !void { const code_ref = try chunk.addJumpOp(op); return chunk.fixups.append(chunk.sema.gpa, .{ .mode = .absolute, .label_index = @intCast(label), .code_offset = code_ref.index, }); } fn addLabel(chunk: *Chunk) error{OutOfMemory}!usize { const label_index = chunk.labels.items.len; try chunk.labels.append(chunk.sema.gpa, .{ .code_offset = dummy_address, }); return label_index; } fn setLabel(chunk: *Chunk, label_index: usize) void { const code_offset = chunk.knot.bytecode.items.len; assert(label_index <= chunk.labels.items.len); const label_data = &chunk.labels.items[label_index]; label_data.code_offset = code_offset; } fn resolveLabels(chunk: *Chunk) !void { const start_index = 0; const end_index = chunk.fixups.items.len; const bytecode = &chunk.knot.bytecode; for (chunk.fixups.items[start_index..end_index]) |fixup| { const label = chunk.labels.items[fixup.label_index]; assert(label.code_offset != dummy_address); const target_offset: usize = switch (fixup.mode) { .relative => label.code_offset - fixup.code_offset - 2, .absolute => label.code_offset, }; if (target_offset >= std.math.maxInt(u16)) { std.debug.print("Too much code to jump over!\n", .{}); return error.InvalidJump; } assert(bytecode.capacity >= label.code_offset + 2); bytecode.items[fixup.code_offset] = @intCast((target_offset >> 8) & 0xff); bytecode.items[fixup.code_offset + 1] = @intCast(target_offset & 0xff); } } fn doLoad(chunk: *Chunk, ref: Ref) InnerError!Ref { const gpa = chunk.sema.gpa; switch (ref) { .bool_true => return chunk.addByteOp(.true), .bool_false => return chunk.addByteOp(.false), .none => return ref, .constant => |id| { // TODO: This isn't great. New constant indexes are // created each time. const ref_const = chunk.knot.constants.items.len; try chunk.knot.constants.append(gpa, id); return chunk.addConstOp(.load_const, @intCast(ref_const)); }, .global => |id| { // TODO: This isn't great. New constant indexes are // created each time. const ref_const = chunk.knot.constants.items.len; try chunk.knot.constants.append(gpa, id); return chunk.addConstOp(.load_global, @intCast(ref_const)); }, .local => |id| return chunk.addConstOp(.load, @intCast(id)), .index => return ref, } } }; pub const CompiledStory = struct { knots: []Knot, constants: []Constant, globals: []u32, pub const Knot = struct { name: Ir.NullTerminatedString, arity: u32, stack_size: u32, constants: std.ArrayListUnmanaged(u32) = .empty, bytecode: std.ArrayListUnmanaged(u8) = .empty, }; pub const Constant = union(enum) { integer: u64, string: Ir.NullTerminatedString, }; pub fn deinit(self: *CompiledStory, gpa: std.mem.Allocator) void { for (self.knots) |*knot| { knot.constants.deinit(gpa); knot.bytecode.deinit(gpa); } gpa.free(self.knots); gpa.free(self.globals); gpa.free(self.constants); self.* = undefined; } pub fn buildRuntime( self: *CompiledStory, gpa: std.mem.Allocator, ir: Ir, story: *Story, ) !void { const constants_pool = &story.constants_pool; try constants_pool.ensureUnusedCapacity(gpa, self.constants.len); try story.paths.ensureUnusedCapacity(gpa, self.knots.len); try story.globals.ensureUnusedCapacity(gpa, @intCast(self.globals.len)); for (self.constants) |constant| { switch (constant) { .integer => |value| { const object: *Object.Number = try .create(story, .{ .integer = @intCast(value), }); constants_pool.appendAssumeCapacity(&object.base); }, .string => |ref| { const bytes = ir.nullTerminatedString(ref); const object: *Object.String = try .create(story, bytes); constants_pool.appendAssumeCapacity(&object.base); }, } } for (self.globals) |global_index| { const str = self.constants[global_index]; const name_bytes = ir.nullTerminatedString(str.string); story.globals.putAssumeCapacity(name_bytes, null); } for (self.knots) |*knot| { const runtime_chunk: *Object.ContentPath = try .create(story, .{ .name = try .create(story, ir.nullTerminatedString(knot.name)), .arity = @intCast(knot.arity), .locals_count = @intCast(knot.stack_size - knot.arity), .const_pool = try knot.constants.toOwnedSlice(gpa), .bytes = try knot.bytecode.toOwnedSlice(gpa), }); story.paths.appendAssumeCapacity(&runtime_chunk.base); } } }; pub fn compile(gpa: std.mem.Allocator, ir: *const Ir) !CompiledStory { var sema: Sema = .{ .gpa = gpa, .ir = ir, }; defer sema.deinit(); try file(&sema, .file_inst); return .{ .constants = try sema.constants.toOwnedSlice(gpa), .globals = try sema.globals.toOwnedSlice(gpa), .knots = try sema.knots.toOwnedSlice(gpa), }; }