feat: basic code generation

This commit is contained in:
Brett Broadhurst 2026-03-02 13:20:02 -07:00
parent 3ab279de0f
commit 55346fcd85
Failed to generate hash of commit
4 changed files with 579 additions and 16 deletions

View file

@ -1,8 +1,554 @@
const std = @import("std");
const Ast = @import("./Ast.zig");
const Story = @import("./Story.zig");
const Ast = @import("Ast.zig");
const Story = @import("Story.zig");
const StringIndexAdapter = std.hash_map.StringIndexAdapter;
const StringIndexContext = std.hash_map.StringIndexContext;
const assert = std.debug.assert;
const AstGen = @This();
gpa: std.mem.Allocator,
tree: *const Ast,
story: *Story,
// TODO: Output string_bytes.
// NOTE: String bytes have a 4GiB limit.
string_bytes: std.ArrayListUnmanaged(u8) = .empty,
string_table: std.HashMapUnmanaged(u32, void, StringIndexContext, std.hash_map.default_max_load_percentage) = .empty,
label_stack: std.ArrayListUnmanaged(Label) = .empty,
jump_stack: std.ArrayListUnmanaged(Jump) = .empty,
errors: std.ArrayListUnmanaged(Ast.Error) = .empty,
default_knot_name: [:0]const u8 = "@main@",
pub const CheckError = anyerror;
pub const Symbol = union(enum) {
global_variable: struct {
is_constant: bool,
constant_slot: usize,
node: *const Ast.Node,
},
local_variable: struct {
stack_slot: usize,
node: *const Ast.Node,
},
parameter: struct {
constant_slot: usize,
stack_slot: usize,
},
};
pub const Jump = struct {
label_index: usize,
code_offset: usize,
};
pub const Label = struct {
code_offset: usize,
};
pub const Chunk = struct {
name: [:0]const u8,
arity: usize,
locals_count: usize,
const_pool: std.ArrayListUnmanaged(*Story.Object),
bytes: std.ArrayListUnmanaged(u8),
pub fn finalize(chunk: *Chunk, scope: *Scope) !*Story.Object.ContentPath {
const gpa = scope.global.gpa;
const story = scope.global.story;
try scope.backpatch();
const const_pool = try chunk.const_pool.toOwnedSlice(gpa);
const bytes = try chunk.bytes.toOwnedSlice(gpa);
const knot_name = try Story.Object.String.create(story, chunk.name);
const content_path = try Story.Object.ContentPath.create(
story,
knot_name,
chunk.arity,
chunk.locals_count,
const_pool,
bytes,
);
return content_path;
}
pub fn deinit(chunk: *Chunk, gpa: std.mem.Allocator) void {
chunk.const_pool.deinit(gpa);
chunk.bytes.deinit(gpa);
chunk.* = undefined;
}
};
pub const Scope = struct {
parent: ?*Scope,
global: *AstGen,
chunk: *Chunk,
symbol_table: std.StringHashMapUnmanaged(Symbol),
jump_stack_top: usize,
label_stack_top: usize,
exit_label: ?usize,
pub fn deinit(scope: *Scope) void {
const gpa = scope.global.gpa;
scope.symbol_table.deinit(gpa);
}
pub fn fail(
scope: *Scope,
tag: Ast.Error.Tag,
node: *const Ast.Node,
) error{ SemanticError, OutOfMemory } {
const gpa = scope.global.gpa;
const err: Ast.Error = .{
.tag = tag,
.loc = .{
.start = node.loc.start,
.end = node.loc.end,
},
};
try scope.global.errors.append(gpa, err);
return error.SemanticError;
}
pub fn lookupIdentifier(self: *Scope, node: *const Ast.Node) ?Symbol {
const token_bytes = self.global.getLexemeFromNode(node);
var current_scope: ?*Scope = self;
while (current_scope) |scope| : (current_scope = scope.parent) {
const symbol_table = &scope.symbol_table;
const result = symbol_table.get(token_bytes);
if (result) |symbol| return symbol;
}
return null;
}
pub fn insertIdentifier(scope: *Scope, node: *const Ast.Node, symbol: Symbol) !void {
const gpa = scope.global.gpa;
const symbol_table = &scope.symbol_table;
const token_bytes = scope.global.getLexemeFromNode(node);
return symbol_table.put(gpa, token_bytes, symbol);
}
pub fn makeSubBlock(parent_scope: *Scope) Scope {
const global = parent_scope.global;
const current_chunk = parent_scope.chunk;
return .{
.parent = parent_scope,
.global = global,
.chunk = current_chunk,
.symbol_table = .empty,
.jump_stack_top = global.jump_stack.items.len,
.label_stack_top = global.label_stack.items.len,
.exit_label = null,
};
}
pub fn makeLabel(scope: *Scope) !usize {
const gpa = scope.global.gpa;
const label_stack = &scope.global.label_stack;
const label_id = label_stack.items.len;
const label_data: Label = .{ .code_offset = 0xffffffff };
try label_stack.append(gpa, label_data);
return label_id;
}
pub fn setLabel(scope: *Scope, label_id: usize) void {
const chunk = scope.chunk;
const code_offset = chunk.bytes.items.len;
const label_stack = &scope.global.label_stack;
assert(label_id <= label_stack.items.len);
const label_data = &label_stack.items[label_id];
label_data.code_offset = code_offset;
}
pub fn makeConstant(scope: *Scope, object: *Story.Object) !usize {
const gpa = scope.global.gpa;
const chunk = scope.chunk;
const const_id = chunk.const_pool.items.len;
try chunk.const_pool.append(gpa, object);
return const_id;
}
pub const IndexSlice = struct {
index: u32,
len: u32,
};
pub fn makeString(scope: *Scope, bytes: []const u8) !IndexSlice {
const global = scope.global;
const gpa = scope.global.gpa;
const str_index: u32 = @intCast(global.string_bytes.items.len);
const string_bytes = &global.string_bytes;
try global.string_bytes.appendSlice(gpa, bytes);
const key: []const u8 = global.string_bytes.items[str_index..];
const gop = try global.string_table.getOrPutContextAdapted(gpa, key, StringIndexAdapter{
.bytes = string_bytes,
}, StringIndexContext{
.bytes = string_bytes,
});
if (gop.found_existing) {
string_bytes.shrinkRetainingCapacity(str_index);
return .{ .index = gop.key_ptr.*, .len = @intCast(key.len) };
} else {
gop.key_ptr.* = str_index;
try string_bytes.append(gpa, 0);
return .{ .index = str_index, .len = @intCast(key.len) };
}
}
pub fn makeGlobal(scope: *Scope, node: *const Ast.Node, symbol: Symbol) !void {
const gpa = scope.global.gpa;
const global_data = scope.global;
const token_bytes = global_data.getLexemeFromNode(node);
try global_data.story.globals.put(gpa, token_bytes, null);
return scope.insertIdentifier(node, symbol);
}
pub fn emitByte(scope: *Scope, byte: u8) !void {
return scope.chunk.bytes.append(scope.global.gpa, byte);
}
pub fn emitSimpleInst(scope: *Scope, op: Story.Opcode) !void {
return scope.emitByte(@intFromEnum(op));
}
pub fn emitConstInst(scope: *Scope, op: Story.Opcode, arg: usize) !void {
if (arg >= 256) return error.TooManyConstants;
try scope.emitSimpleInst(op);
try scope.emitByte(@intCast(arg));
}
pub fn emitJumpInst(scope: *Scope, op: Story.Opcode) !void {
const bytes = &scope.chunk.bytes;
try scope.emitSimpleInst(op);
try scope.emitByte(0xff);
try scope.emitByte(0xff);
return bytes.items.len - 2;
}
pub fn backpatch(_: *Scope) !void {}
};
pub fn deinit(astgen: *AstGen) void {
const gpa = astgen.gpa;
astgen.string_bytes.deinit(gpa);
astgen.string_table.deinit(gpa);
astgen.label_stack.deinit(gpa);
astgen.jump_stack.deinit(gpa);
astgen.errors.deinit(gpa);
}
fn getLexemeFromNode(astgen: *const AstGen, node: *const Ast.Node) []const u8 {
assert(node.loc.start <= node.loc.end);
const source_bytes = astgen.tree.source;
return source_bytes[node.loc.start..node.loc.end];
}
fn checkUnaryOp(scope: *Scope, node: *const Ast.Node, op: Story.Opcode) CheckError!void {
try checkExpr(scope, node.data.bin.lhs);
try scope.emitSimpleInst(op);
}
fn checkBinaryOp(scope: *Scope, node: *const Ast.Node, op: Story.Opcode) CheckError!void {
const bin_data = node.data.bin;
assert(bin_data.lhs != null and bin_data.rhs != null);
try checkExpr(scope, bin_data.lhs);
try checkExpr(scope, bin_data.rhs);
try scope.emitSimpleInst(op);
}
fn checkLogicalOp(scope: *Scope, node: *const Ast.Node, binary_or: bool) CheckError!void {
const data = node.data.bin;
assert(data.lhs != null and data.rhs != null);
try checkExpr(scope, data.lhs);
const else_branch = try scope.emitJumpInst(if (binary_or) .jmp_t else .jmp_f);
try scope.emitSimpleInst(.pop);
try checkExpr(scope, data.rhs);
try scope.patchJump(else_branch);
}
fn checkTrueLiteral(scope: *Scope, _: *const Ast.Node) CheckError!void {
try scope.emitSimpleInst(.true);
}
fn checkFalseLiteral(scope: *Scope, _: *const Ast.Node) CheckError!void {
try scope.emitSimpleInst(.false);
}
fn checkNumberLiteral(scope: *Scope, node: *const Ast.Node) CheckError!void {
const lexeme = getLexemeFromNode(scope.global, node);
const number_value = try std.fmt.parseFloat(f64, lexeme);
const number_object = try Story.Object.Number.create(
scope.global.story,
.{ .floating = number_value },
);
const constant_id = try scope.makeConstant(@ptrCast(number_object));
try scope.emitConstInst(.load_const, constant_id);
}
fn checkStringLiteral(scope: *Scope, node: *const Ast.Node) CheckError!void {
const string_bytes = getLexemeFromNode(scope.global, node);
_ = try scope.makeString(string_bytes);
const string_object = try Story.Object.String.create(scope.global.story, string_bytes);
const constant_id = try scope.makeConstant(@ptrCast(string_object));
try scope.emitConstInst(.load_const, constant_id);
}
fn checkStringExpr(scope: *Scope, node: *const Ast.Node) CheckError!void {
assert(node.data.bin.lhs != null);
const expr_node = node.data.bin.lhs orelse return;
return checkStringLiteral(scope, expr_node);
}
fn checkIdentifier(scope: *Scope, node: *const Ast.Node) !void {
if (scope.lookupIdentifier(node)) |symbol| {
switch (symbol) {
.global_variable => |data| {
return scope.emitConstInst(.load_global, data.constant_slot);
},
.local_variable => |data| {
return scope.emitConstInst(.load, data.stack_slot);
},
.parameter => |data| {
return scope.emitConstInst(.load, data.stack_slot);
},
}
}
return scope.fail(.unknown_identifier, node);
}
fn checkExpr(scope: *Scope, expr: ?*const Ast.Node) CheckError!void {
const expr_node = expr orelse return;
switch (expr_node.tag) {
.true_literal => try checkTrueLiteral(scope, expr_node),
.false_literal => try checkFalseLiteral(scope, expr_node),
.number_literal => try checkNumberLiteral(scope, expr_node),
.string_literal => try checkStringLiteral(scope, expr_node),
.string_expr => try checkStringExpr(scope, expr_node),
.identifier => try checkIdentifier(scope, expr_node),
.add_expr => try checkBinaryOp(scope, expr_node, .add),
.subtract_expr => try checkBinaryOp(scope, expr_node, .sub),
.multiply_expr => try checkBinaryOp(scope, expr_node, .mul),
.divide_expr => try checkBinaryOp(scope, expr_node, .div),
.mod_expr => try checkBinaryOp(scope, expr_node, .mod),
.negate_expr => try checkUnaryOp(scope, expr_node, .neg),
else => return error.NotImplemented,
}
}
fn checkExprStmt(scope: *Scope, stmt: *const Ast.Node) CheckError!void {
const expr_node = stmt.data.bin.lhs orelse return;
try checkExpr(scope, expr_node);
try scope.emitSimpleInst(.pop);
}
fn checkInlineLogicExpr(scope: *Scope, expr: *const Ast.Node) CheckError!void {
assert(expr.data.bin.lhs != null);
return checkExpr(scope, expr.data.bin.lhs);
}
fn checkContentExpr(scope: *Scope, expr: *const Ast.Node) CheckError!void {
const node_list = expr.data.list.items orelse return;
for (node_list) |child_node| {
switch (child_node.tag) {
.string_literal => {
try checkStringLiteral(scope, child_node);
try scope.emitSimpleInst(.content);
},
.inline_logic_expr => {
try checkInlineLogicExpr(scope, child_node);
try scope.emitSimpleInst(.content);
},
else => return error.NotImplemented,
}
}
}
fn checkContentStmt(scope: *Scope, stmt: *const Ast.Node) CheckError!void {
const expr_node = stmt.data.bin.lhs orelse return;
return checkContentExpr(scope, expr_node);
}
fn checkBlockStmt(parent_scope: *Scope, block_stmt: *const Ast.Node) CheckError!void {
var block_scope = parent_scope.makeSubBlock();
defer block_scope.deinit();
const children = block_stmt.data.list.items orelse return;
for (children) |child_stmt| try checkStmt(&block_scope, child_stmt);
}
fn checkAssignStmt(scope: *Scope, stmt: *const Ast.Node) CheckError!void {
const lhs = stmt.data.bin.lhs orelse return error.CompilerBug;
const rhs = stmt.data.bin.rhs orelse return error.CompilerBug;
if (scope.lookupIdentifier(lhs)) |symbol| {
switch (symbol) {
.global_variable => |data| {
if (data.is_constant) {
return scope.fail(.assignment_to_const, lhs);
}
try checkExpr(scope, rhs);
try scope.emitConstInst(.store_global, data.constant_slot);
try scope.emitSimpleInst(.pop);
return;
},
.local_variable => |data| {
try checkExpr(scope, rhs);
try scope.emitConstInst(.store, data.stack_slot);
try scope.emitSimpleInst(.pop);
return;
},
else => unreachable,
}
}
return scope.fail(.unknown_identifier, lhs);
}
fn checkVarDecl(scope: *Scope, decl_node: *const Ast.Node) !void {
const identifier_node = decl_node.data.bin.lhs orelse return error.Fucked;
const expr_node = decl_node.data.bin.rhs orelse return error.Fucked;
try checkExpr(scope, expr_node);
switch (decl_node.tag) {
.temp_decl => {
const stack_slot = scope.chunk.arity + scope.chunk.locals_count;
const symbol: Symbol = .{
.local_variable = .{
.node = decl_node,
.stack_slot = stack_slot,
},
};
scope.chunk.locals_count += 1;
try scope.insertIdentifier(identifier_node, symbol);
try scope.emitConstInst(.store, stack_slot);
},
.var_decl, .const_decl => |tag| {
const string_bytes = getLexemeFromNode(scope.global, identifier_node);
_ = try scope.makeString(string_bytes);
const string_object = try Story.Object.String.create(scope.global.story, string_bytes);
const constant_slot = try scope.makeConstant(@ptrCast(string_object));
const symbol: Symbol = .{
.global_variable = .{
.is_constant = tag == .const_decl,
.node = decl_node,
.constant_slot = constant_slot,
},
};
try scope.makeGlobal(identifier_node, symbol);
try scope.emitConstInst(.store_global, constant_slot);
},
else => unreachable,
}
try scope.emitSimpleInst(.pop);
}
fn checkStmt(scope: *Scope, stmt: *const Ast.Node) CheckError!void {
switch (stmt.tag) {
.var_decl => try checkVarDecl(scope, stmt),
.const_decl => try checkVarDecl(scope, stmt),
.temp_decl => try checkVarDecl(scope, stmt),
.assign_stmt => try checkAssignStmt(scope, stmt),
.content_stmt => try checkContentStmt(scope, stmt),
.expr_stmt => try checkExprStmt(scope, stmt),
else => return error.NotImplemented,
}
}
fn checkAnonymousKnot(parent_scope: *Scope, body: *const Ast.Node) CheckError!void {
const exit_label = try parent_scope.makeLabel();
parent_scope.exit_label = exit_label;
try checkBlockStmt(parent_scope, body);
parent_scope.setLabel(exit_label);
try parent_scope.emitSimpleInst(.exit);
}
fn checkFile(astgen: *AstGen, file: *const Ast.Node) CheckError!void {
const gpa = astgen.gpa;
var main_chunk: Chunk = .{
.name = astgen.default_knot_name,
.arity = 0,
.locals_count = 0,
.bytes = .empty,
.const_pool = .empty,
};
defer main_chunk.deinit(gpa);
var file_scope: Scope = .{
.parent = null,
.global = astgen,
.chunk = &main_chunk,
.symbol_table = .empty,
.jump_stack_top = astgen.jump_stack.items.len,
.label_stack_top = astgen.label_stack.items.len,
.exit_label = null,
};
defer file_scope.deinit();
_ = try file_scope.makeString(astgen.default_knot_name);
const children = file.data.list.items orelse return;
if (children.len == 0) return;
// TODO: intern paths
const first_child = children[0];
if (first_child.tag == .block_stmt) {
const chunk = file_scope.chunk;
try checkAnonymousKnot(&file_scope, first_child);
const content_path = try chunk.finalize(&file_scope);
try astgen.story.paths.append(gpa, @ptrCast(content_path));
}
}
fn dumpStringsWithHex(astgen: *const AstGen) void {
var start: usize = 0;
const bytes = astgen.string_bytes.items;
while (start < bytes.len) {
const end = std.mem.indexOfScalarPos(u8, bytes, start, 0) orelse break;
const s = bytes[start..end];
std.debug.print("[{d:04}] ", .{start});
for (s) |b| std.debug.print("{x:02} ", .{b});
std.debug.print("00 {s}\n", .{s});
start = end + 1;
}
}
/// Perform code generation via tree-walk.
pub fn generate(_: std.mem.Allocator, _: *const Ast) !Story {
return error.OutOfMemory;
pub fn generate(gpa: std.mem.Allocator, tree: *const Ast) !Story {
const root_node = tree.root orelse return error.Fucked;
var story: Story = .{
.allocator = gpa,
.is_exited = false,
.can_advance = false,
.gc_objects = .{},
.globals = .empty,
.paths = .empty,
.stack = .empty,
.call_stack = .empty,
};
var astgen: AstGen = .{
.gpa = gpa,
.tree = tree,
.story = &story,
};
defer astgen.deinit();
try astgen.string_bytes.append(gpa, 0);
try checkFile(&astgen, root_node);
dumpStringsWithHex(&astgen);
return story;
}