ink/src/Sema.zig

684 lines
24 KiB
Zig

const std = @import("std");
const Ir = @import("Ir.zig");
const Story = @import("Story.zig");
const Object = Story.Object;
const assert = std.debug.assert;
const Sema = @This();
gpa: std.mem.Allocator,
ir: *const Ir,
constants: std.ArrayListUnmanaged(CompiledStory.Constant) = .empty,
constant_map: std.AutoHashMapUnmanaged(CompiledStory.Constant, u32) = .empty,
globals: std.ArrayListUnmanaged(u32) = .empty,
knots: std.ArrayListUnmanaged(CompiledStory.Knot) = .empty,
const InnerError = error{
OutOfMemory,
TooManyConstants,
InvalidJump,
};
const Ref = union(enum) {
bool_true,
bool_false,
none,
index: u32,
constant: u32,
global: u32,
local: u32,
};
fn deinit(sema: *Sema) void {
const gpa = sema.gpa;
sema.constants.deinit(gpa);
sema.constant_map.deinit(gpa);
sema.globals.deinit(gpa);
sema.knots.deinit(gpa);
sema.* = undefined;
}
fn fail(_: *Sema, message: []const u8) InnerError {
@panic(message);
}
fn getConstant(sema: *Sema, data: CompiledStory.Constant) !Ref {
const gpa = sema.gpa;
if (sema.constant_map.get(data)) |index| {
return .{ .constant = index };
} else {
const index = sema.constants.items.len;
try sema.constants.append(gpa, data);
try sema.constant_map.put(gpa, data, @intCast(index));
return .{ .constant = @intCast(index) };
}
}
fn addGlobal(sema: *Sema, name: Ir.NullTerminatedString) !Ref {
const gpa = sema.gpa;
const interned = try sema.getConstant(.{ .string = name });
try sema.globals.append(gpa, interned.constant);
return .{ .global = interned.constant };
}
fn getGlobal(sema: *Sema, name: Ir.NullTerminatedString) !Ref {
const interned = try sema.getConstant(.{ .string = name });
for (sema.ir.globals) |global| {
if (name == global.name) {
return .{ .global = interned.constant };
}
}
return sema.fail("unknown global variable");
}
fn irInteger(sema: *Sema, inst: Ir.Inst.Index) InnerError!Ref {
const data = sema.ir.instructions[@intFromEnum(inst)].data.integer;
return sema.getConstant(.{ .integer = data.value });
}
fn irString(sema: *Sema, inst: Ir.Inst.Index) InnerError!Ref {
const data = sema.ir.instructions[@intFromEnum(inst)].data.string;
return sema.getConstant(.{ .string = data.start });
}
fn irUnary(
sema: *Sema,
chunk: *Chunk,
inst: Ir.Inst.Index,
op: Story.Opcode,
) InnerError!Ref {
const data = sema.ir.instructions[@intFromEnum(inst)].data.un;
const lhs = chunk.resolveInst(data.lhs);
_ = try chunk.doLoad(lhs);
return chunk.addByteOp(op);
}
fn irBinary(
sema: *Sema,
chunk: *Chunk,
inst: Ir.Inst.Index,
op: Story.Opcode,
) InnerError!Ref {
const data = sema.ir.instructions[@intFromEnum(inst)].data.bin;
const lhs = chunk.resolveInst(data.lhs);
const rhs = chunk.resolveInst(data.rhs);
_ = try chunk.doLoad(lhs);
_ = try chunk.doLoad(rhs);
return chunk.addByteOp(op);
}
fn irAlloc(_: *Sema, chunk: *Chunk, _: Ir.Inst.Index) InnerError!Ref {
const local_index = chunk.knot.stack_size;
// TODO: Add constraints on how many temporaries we can have.
// max(u8) or max(u16) are most likey appropriate.
chunk.knot.stack_size += 1;
return .{ .local = local_index };
}
fn irStore(sema: *Sema, chunk: *Chunk, inst: Ir.Inst.Index) InnerError!void {
const data = sema.ir.instructions[@intFromEnum(inst)].data.bin;
const lhs = chunk.resolveInst(data.lhs);
const rhs = chunk.resolveInst(data.rhs);
_ = try chunk.doLoad(rhs);
switch (lhs) {
.bool_true, .bool_false => unreachable, // TODO: "Cannot assign to boolean"
.none => unreachable,
.constant => |_| unreachable, // TODO: "Cannot assign to constant"
.global => |id| _ = try chunk.addConstOp(.store_global, @intCast(id)),
.local => |id| _ = try chunk.addConstOp(.store, @intCast(id)),
.index => unreachable,
}
_ = try chunk.addByteOp(.pop);
}
// TODO: Check what the target is!
fn irLoad(sema: *Sema, chunk: *Chunk, inst: Ir.Inst.Index) InnerError!Ref {
const data = sema.ir.instructions[@intFromEnum(inst)].data.un;
const lhs = chunk.resolveInst(data.lhs);
return chunk.doLoad(lhs);
}
fn irCondBr(sema: *Sema, chunk: *Chunk, inst: Ir.Inst.Index) InnerError!Ref {
const data = sema.ir.instructions[@intFromEnum(inst)].data.payload;
const extra = sema.ir.extraData(Ir.Inst.CondBr, data.payload_index);
const then_body = sema.ir.bodySlice(extra.end, extra.data.then_body_len);
const else_body = sema.ir.bodySlice(extra.end + then_body.len, extra.data.else_body_len);
const else_label = try chunk.addLabel();
const end_label = try chunk.addLabel();
const condition = chunk.resolveInst(extra.data.condition);
_ = try chunk.doLoad(condition);
try chunk.addFixup(.jmp_f, else_label);
_ = try chunk.addByteOp(.pop);
try blockBodyInner(sema, chunk, then_body);
try chunk.addFixup(.jmp, end_label);
chunk.setLabel(else_label);
_ = try chunk.addByteOp(.pop);
try blockBodyInner(sema, chunk, else_body);
chunk.setLabel(end_label);
return .none;
}
fn irBreak(sema: *Sema, inst: Ir.Inst.Index) InnerError!void {
_ = sema;
_ = inst;
}
fn irBlock(sema: *Sema, chunk: *Chunk, inst: Ir.Inst.Index) InnerError!void {
const data = sema.ir.instructions[@intFromEnum(inst)].data.payload;
const extra = sema.ir.extraData(Ir.Inst.Block, data.payload_index);
const body = sema.ir.bodySlice(extra.end, extra.data.body_len);
return blockBodyInner(sema, chunk, body);
}
fn irSwitchBr(sema: *Sema, chunk: *Chunk, inst: Ir.Inst.Index) InnerError!void {
const data = sema.ir.instructions[@intFromEnum(inst)].data.payload;
const extra = sema.ir.extraData(Ir.Inst.SwitchBr, data.payload_index);
const cases_slice = sema.ir.bodySlice(extra.end, extra.data.cases_len);
var case_labels: std.ArrayListUnmanaged(usize) = .empty;
try case_labels.ensureUnusedCapacity(sema.gpa, cases_slice.len + 1);
defer case_labels.deinit(sema.gpa);
// TODO: Do something with this value?
//const condition = chunk.resolveInst(extra.data.operand);
const exit_label = try chunk.addLabel();
const cmp_var = chunk.knot.stack_size;
chunk.knot.stack_size += 1;
_ = try chunk.addConstOp(.store, @intCast(cmp_var));
for (cases_slice) |case_index| {
const case_extra = sema.ir.extraData(Ir.Inst.SwitchBr.Case, @intFromEnum(case_index));
const case_expr = chunk.resolveInst(case_extra.data.operand);
const case_label_index = try chunk.addLabel();
case_labels.appendAssumeCapacity(case_label_index);
_ = try chunk.addConstOp(.load, @intCast(cmp_var));
_ = try chunk.doLoad(case_expr);
_ = try chunk.addByteOp(.cmp_eq);
_ = try chunk.addFixup(.jmp_t, case_label_index);
_ = try chunk.addByteOp(.pop);
}
const else_label = try chunk.addLabel();
try chunk.addFixup(.jmp, else_label);
for (cases_slice, case_labels.items) |case_index, label_index| {
const case_extra = sema.ir.extraData(Ir.Inst.SwitchBr.Case, @intFromEnum(case_index));
const case_body = sema.ir.bodySlice(case_extra.end, case_extra.data.body_len);
chunk.setLabel(label_index);
_ = try chunk.addByteOp(.pop);
try blockBodyInner(sema, chunk, case_body);
try chunk.addFixup(.jmp, exit_label);
}
const else_body = sema.ir.bodySlice(
extra.end + extra.data.cases_len,
extra.data.else_body_len,
);
chunk.setLabel(else_label);
try blockBodyInner(sema, chunk, else_body);
chunk.setLabel(exit_label);
}
fn irContentPush(sema: *Sema, chunk: *Chunk, inst: Ir.Inst.Index) InnerError!Ref {
const data = sema.ir.instructions[@intFromEnum(inst)].data.un;
const lhs = chunk.resolveInst(data.lhs);
_ = try chunk.doLoad(lhs);
return chunk.addByteOp(.stream_push);
}
fn irContentFlush(_: *Sema, chunk: *Chunk, _: Ir.Inst.Index) InnerError!Ref {
return chunk.addByteOp(.stream_flush);
}
fn irChoiceBr(sema: *Sema, chunk: *Chunk, inst: Ir.Inst.Index) InnerError!void {
const data = sema.ir.instructions[@intFromEnum(inst)].data.payload;
const choice_extra = sema.ir.extraData(Ir.Inst.ChoiceBr, data.payload_index);
const options_slice = sema.ir.bodySlice(choice_extra.end, choice_extra.data.cases_len);
var branch_labels: std.ArrayListUnmanaged(usize) = .empty;
try branch_labels.ensureUnusedCapacity(sema.gpa, options_slice.len + 1);
defer branch_labels.deinit(sema.gpa);
for (options_slice) |option_index| {
const case_extra = sema.ir.extraData(Ir.Inst.ChoiceBr.Case, @intFromEnum(option_index));
const case_label = try chunk.addLabel();
branch_labels.appendAssumeCapacity(case_label);
switch (case_extra.data.operand_1) {
.none => {},
else => |content| {
const content_inst = chunk.resolveInst(content);
_ = try chunk.doLoad(content_inst);
_ = try chunk.addByteOp(.stream_push);
},
}
switch (case_extra.data.operand_2) {
.none => {},
else => |content| {
const content_inst = chunk.resolveInst(content);
_ = try chunk.doLoad(content_inst);
_ = try chunk.addByteOp(.stream_push);
},
}
try chunk.addFixupAbsolute(.br_push, case_label);
}
_ = try chunk.addByteOp(.br_table);
_ = try chunk.addByteOp(.br_select_index);
_ = try chunk.addByteOp(.br_dispatch);
for (options_slice, branch_labels.items) |option_index, label| {
const case_extra = sema.ir.extraData(Ir.Inst.ChoiceBr.Case, @intFromEnum(option_index));
const body_slice = sema.ir.bodySlice(case_extra.end, case_extra.data.body_len);
chunk.setLabel(label);
switch (case_extra.data.operand_1) {
.none => {},
else => |content| {
const content_inst = chunk.resolveInst(content);
_ = try chunk.doLoad(content_inst);
_ = try chunk.addByteOp(.stream_push);
},
}
switch (case_extra.data.operand_3) {
.none => {},
else => |content| {
const content_inst = chunk.resolveInst(content);
_ = try chunk.doLoad(content_inst);
_ = try chunk.addByteOp(.stream_push);
},
}
_ = try chunk.addByteOp(.stream_flush);
try blockBodyInner(sema, chunk, body_slice);
}
}
fn irImplicitRet(_: *Sema, chunk: *Chunk, _: Ir.Inst.Index) InnerError!Ref {
return chunk.addByteOp(.exit);
}
fn irDeclRef(sema: *Sema, _: *Chunk, inst: Ir.Inst.Index) InnerError!Ref {
const data = sema.ir.instructions[@intFromEnum(inst)].data.string;
return sema.getGlobal(data.start);
}
fn irDeclVar(
sema: *Sema,
chunk: *Chunk,
name: Ir.NullTerminatedString,
inst: Ir.Inst.Index,
) InnerError!void {
const data = sema.ir.instructions[@intFromEnum(inst)].data.payload;
const extra = sema.ir.extraData(Ir.Inst.Block, data.payload_index);
const body = sema.ir.bodySlice(extra.end, extra.data.body_len);
try blockBodyInner(sema, chunk, body);
// FIXME: hack
{
const last_inst = body[body.len - 1].toRef();
const val = chunk.resolveInst(last_inst);
_ = try chunk.doLoad(val);
}
const global = try sema.addGlobal(name);
_ = try chunk.addConstOp(.store_global, @intCast(global.global));
_ = try chunk.addByteOp(.pop);
}
fn irDeclKnot(
sema: *Sema,
name_ref: Ir.NullTerminatedString,
inst: Ir.Inst.Index,
) InnerError!void {
const gpa = sema.gpa;
const data = sema.ir.instructions[@intFromEnum(inst)].data.payload;
const extra = sema.ir.extraData(Ir.Inst.Knot, data.payload_index);
var knot: CompiledStory.Knot = .{
.name = name_ref,
.arity = 0,
.stack_size = 0,
};
var chunk: Chunk = .{
.sema = sema,
.knot = &knot,
};
defer chunk.fixups.deinit(gpa);
defer chunk.labels.deinit(gpa);
defer chunk.inst_map.deinit(gpa);
const body = sema.ir.bodySlice(extra.end, extra.data.body_len);
try blockBodyInner(sema, &chunk, body);
_ = try chunk.addByteOp(.exit);
try chunk.resolveLabels();
try sema.knots.append(gpa, knot);
}
fn irDeclaration(sema: *Sema, parent_chunk: ?*Chunk, inst: Ir.Inst.Index) !void {
const data = sema.ir.instructions[@intFromEnum(inst)].data.payload;
const extra = sema.ir.extraData(Ir.Inst.Declaration, data.payload_index).data;
const value_data = sema.ir.instructions[@intFromEnum(extra.value)];
switch (value_data.tag) {
.decl_var => try irDeclVar(sema, parent_chunk.?, extra.name, extra.value),
.decl_knot => try irDeclKnot(sema, extra.name, extra.value),
else => unreachable,
}
}
fn blockBodyInner(sema: *Sema, chunk: *Chunk, body: []const Ir.Inst.Index) InnerError!void {
const gpa = sema.gpa;
for (body) |inst| {
const data = sema.ir.instructions[@intFromEnum(inst)];
const ref: Ref = switch (data.tag) {
.file => unreachable,
.declaration => {
try irDeclaration(sema, chunk, inst);
continue;
},
.decl_var => unreachable, // handled in declaration()
.decl_knot => unreachable, // handled in declaration()
.switch_br => {
try irSwitchBr(sema, chunk, inst);
continue;
},
.alloc => try irAlloc(sema, chunk, inst),
.store => {
try irStore(sema, chunk, inst);
continue;
},
.load => try irLoad(sema, chunk, inst),
.add => try irBinary(sema, chunk, inst, .add),
.sub => try irBinary(sema, chunk, inst, .sub),
.mul => try irBinary(sema, chunk, inst, .mul),
.div => try irBinary(sema, chunk, inst, .div),
.mod => try irBinary(sema, chunk, inst, .mod),
.neg => try irUnary(sema, chunk, inst, .neg),
.not => try irUnary(sema, chunk, inst, .not),
.cmp_eq => try irBinary(sema, chunk, inst, .cmp_eq),
.cmp_neq => blk: {
_ = try irBinary(sema, chunk, inst, .cmp_eq);
const tmp = try chunk.addByteOp(.not);
break :blk tmp;
},
.cmp_lt => try irBinary(sema, chunk, inst, .cmp_lt),
.cmp_lte => try irBinary(sema, chunk, inst, .cmp_lte),
.cmp_gt => try irBinary(sema, chunk, inst, .cmp_gt),
.cmp_gte => try irBinary(sema, chunk, inst, .cmp_gte),
.decl_ref => try irDeclRef(sema, chunk, inst),
.integer => try irInteger(sema, inst),
.string => try irString(sema, inst),
.condbr => try irCondBr(sema, chunk, inst),
.@"break" => {
try irBreak(sema, inst);
continue;
},
.block => {
try irBlock(sema, chunk, inst);
continue;
},
.content_push => try irContentPush(sema, chunk, inst),
.content_flush => try irContentFlush(sema, chunk, inst),
.choice_br => {
try irChoiceBr(sema, chunk, inst);
continue;
},
.implicit_ret => try irImplicitRet(sema, chunk, inst),
};
try chunk.inst_map.put(gpa, inst, ref);
}
}
fn file(sema: *Sema, inst: Ir.Inst.Index) !void {
const data = sema.ir.instructions[@intFromEnum(inst)].data.payload;
const extra = sema.ir.extraData(Ir.Inst.Block, data.payload_index);
const body = sema.ir.bodySlice(extra.end, extra.data.body_len);
// FIXME: We are going to get burned by this if we don't formalize it.
// Adding common constants to the constant pool.
_ = try sema.getConstant(.{ .integer = 0 });
_ = try sema.getConstant(.{ .integer = 1 });
for (body) |body_index| try irDeclaration(sema, null, body_index);
}
const Chunk = struct {
sema: *Sema,
knot: *CompiledStory.Knot,
labels: std.ArrayListUnmanaged(Label) = .empty,
fixups: std.ArrayListUnmanaged(Fixup) = .empty,
inst_map: std.AutoHashMapUnmanaged(Ir.Inst.Index, Ref) = .empty,
const dummy_address = 0xffffffff;
const Label = struct {
code_offset: usize,
};
const Fixup = struct {
mode: enum {
relative,
absolute,
},
label_index: u32,
code_offset: u32,
};
fn addByteOp(chunk: *Chunk, op: Story.Opcode) error{OutOfMemory}!Ref {
const gpa = chunk.sema.gpa;
const bytecode = &chunk.knot.bytecode;
const byte_index = bytecode.items.len;
try bytecode.append(gpa, @intFromEnum(op));
return .{ .index = @intCast(byte_index) };
}
fn addConstOp(chunk: *Chunk, op: Story.Opcode, arg: u8) error{OutOfMemory}!Ref {
const gpa = chunk.sema.gpa;
const bytecode = &chunk.knot.bytecode;
const byte_index = bytecode.items.len;
try bytecode.ensureUnusedCapacity(gpa, 2);
bytecode.appendAssumeCapacity(@intFromEnum(op));
bytecode.appendAssumeCapacity(arg);
return .{ .index = @intCast(byte_index) };
}
fn addJumpOp(chunk: *Chunk, op: Story.Opcode) error{OutOfMemory}!Ref {
const gpa = chunk.sema.gpa;
const bytecode = &chunk.knot.bytecode;
try bytecode.ensureUnusedCapacity(gpa, 3);
bytecode.appendAssumeCapacity(@intFromEnum(op));
bytecode.appendAssumeCapacity(0xff);
bytecode.appendAssumeCapacity(0xff);
return .{ .index = @intCast(bytecode.items.len - 2) };
}
fn resolveInst(chunk: *Chunk, ref: Ir.Inst.Ref) Ref {
if (ref.toIndex()) |index| {
return chunk.inst_map.get(index).?;
}
switch (ref) {
.bool_true => return .bool_true,
.bool_false => return .bool_false,
else => return .{ .constant = @intFromEnum(ref) },
}
}
fn addFixup(chunk: *Chunk, op: Story.Opcode, label: usize) !void {
const code_ref = try chunk.addJumpOp(op);
return chunk.fixups.append(chunk.sema.gpa, .{
.mode = .relative,
.label_index = @intCast(label),
.code_offset = code_ref.index,
});
}
fn addFixupAbsolute(chunk: *Chunk, op: Story.Opcode, label: usize) !void {
const code_ref = try chunk.addJumpOp(op);
return chunk.fixups.append(chunk.sema.gpa, .{
.mode = .absolute,
.label_index = @intCast(label),
.code_offset = code_ref.index,
});
}
fn addLabel(chunk: *Chunk) error{OutOfMemory}!usize {
const label_index = chunk.labels.items.len;
try chunk.labels.append(chunk.sema.gpa, .{
.code_offset = dummy_address,
});
return label_index;
}
fn setLabel(chunk: *Chunk, label_index: usize) void {
const code_offset = chunk.knot.bytecode.items.len;
assert(label_index <= chunk.labels.items.len);
const label_data = &chunk.labels.items[label_index];
label_data.code_offset = code_offset;
}
fn resolveLabels(chunk: *Chunk) !void {
const start_index = 0;
const end_index = chunk.fixups.items.len;
const bytecode = &chunk.knot.bytecode;
for (chunk.fixups.items[start_index..end_index]) |fixup| {
const label = chunk.labels.items[fixup.label_index];
assert(label.code_offset != dummy_address);
const target_offset: usize = switch (fixup.mode) {
.relative => label.code_offset - fixup.code_offset - 2,
.absolute => label.code_offset,
};
if (target_offset >= std.math.maxInt(u16)) {
std.debug.print("Too much code to jump over!\n", .{});
return error.InvalidJump;
}
assert(bytecode.capacity >= label.code_offset + 2);
bytecode.items[fixup.code_offset] = @intCast((target_offset >> 8) & 0xff);
bytecode.items[fixup.code_offset + 1] = @intCast(target_offset & 0xff);
}
}
fn doLoad(chunk: *Chunk, ref: Ref) InnerError!Ref {
const gpa = chunk.sema.gpa;
switch (ref) {
.bool_true => return chunk.addByteOp(.true),
.bool_false => return chunk.addByteOp(.false),
.none => return ref,
.constant => |id| {
// TODO: This isn't great. New constant indexes are
// created each time.
const ref_const = chunk.knot.constants.items.len;
try chunk.knot.constants.append(gpa, id);
return chunk.addConstOp(.load_const, @intCast(ref_const));
},
.global => |id| {
// TODO: This isn't great. New constant indexes are
// created each time.
const ref_const = chunk.knot.constants.items.len;
try chunk.knot.constants.append(gpa, id);
return chunk.addConstOp(.load_global, @intCast(ref_const));
},
.local => |id| return chunk.addConstOp(.load, @intCast(id)),
.index => return ref,
}
}
};
pub const CompiledStory = struct {
knots: []Knot,
constants: []Constant,
globals: []u32,
pub const Knot = struct {
name: Ir.NullTerminatedString,
arity: u32,
stack_size: u32,
constants: std.ArrayListUnmanaged(u32) = .empty,
bytecode: std.ArrayListUnmanaged(u8) = .empty,
};
pub const Constant = union(enum) {
integer: u64,
string: Ir.NullTerminatedString,
};
pub fn deinit(self: *CompiledStory, gpa: std.mem.Allocator) void {
for (self.knots) |*knot| {
knot.constants.deinit(gpa);
knot.bytecode.deinit(gpa);
}
gpa.free(self.knots);
gpa.free(self.globals);
gpa.free(self.constants);
self.* = undefined;
}
pub fn buildRuntime(
self: *CompiledStory,
gpa: std.mem.Allocator,
ir: Ir,
story: *Story,
) !void {
const constants_pool = &story.constants_pool;
try constants_pool.ensureUnusedCapacity(gpa, self.constants.len);
try story.paths.ensureUnusedCapacity(gpa, self.knots.len);
try story.globals.ensureUnusedCapacity(gpa, @intCast(self.globals.len));
for (self.constants) |constant| {
switch (constant) {
.integer => |value| {
const object: *Object.Number = try .create(story, .{
.integer = @intCast(value),
});
constants_pool.appendAssumeCapacity(&object.base);
},
.string => |ref| {
const bytes = ir.nullTerminatedString(ref);
const object: *Object.String = try .create(story, bytes);
constants_pool.appendAssumeCapacity(&object.base);
},
}
}
for (self.globals) |global_index| {
const str = self.constants[global_index];
const name_bytes = ir.nullTerminatedString(str.string);
story.globals.putAssumeCapacity(name_bytes, null);
}
for (self.knots) |*knot| {
const runtime_chunk: *Object.ContentPath = try .create(story, .{
.name = try .create(story, ir.nullTerminatedString(knot.name)),
.arity = @intCast(knot.arity),
.locals_count = @intCast(knot.stack_size - knot.arity),
.const_pool = try knot.constants.toOwnedSlice(gpa),
.bytes = try knot.bytecode.toOwnedSlice(gpa),
});
story.paths.appendAssumeCapacity(&runtime_chunk.base);
}
}
};
pub fn compile(gpa: std.mem.Allocator, ir: *const Ir) !CompiledStory {
var sema: Sema = .{
.gpa = gpa,
.ir = ir,
};
defer sema.deinit();
try file(&sema, .file_inst);
return .{
.constants = try sema.constants.toOwnedSlice(gpa),
.globals = try sema.globals.toOwnedSlice(gpa),
.knots = try sema.knots.toOwnedSlice(gpa),
};
}