feat: basic code generation

This commit is contained in:
Brett Broadhurst 2026-03-02 13:20:02 -07:00
parent 3ab279de0f
commit 55346fcd85
Failed to generate hash of commit
4 changed files with 579 additions and 16 deletions

View file

@ -1,8 +1,554 @@
const std = @import("std");
const Ast = @import("./Ast.zig");
const Story = @import("./Story.zig");
const Ast = @import("Ast.zig");
const Story = @import("Story.zig");
const StringIndexAdapter = std.hash_map.StringIndexAdapter;
const StringIndexContext = std.hash_map.StringIndexContext;
const assert = std.debug.assert;
const AstGen = @This();
gpa: std.mem.Allocator,
tree: *const Ast,
story: *Story,
// TODO: Output string_bytes.
// NOTE: String bytes have a 4GiB limit.
string_bytes: std.ArrayListUnmanaged(u8) = .empty,
string_table: std.HashMapUnmanaged(u32, void, StringIndexContext, std.hash_map.default_max_load_percentage) = .empty,
label_stack: std.ArrayListUnmanaged(Label) = .empty,
jump_stack: std.ArrayListUnmanaged(Jump) = .empty,
errors: std.ArrayListUnmanaged(Ast.Error) = .empty,
default_knot_name: [:0]const u8 = "@main@",
pub const CheckError = anyerror;
pub const Symbol = union(enum) {
global_variable: struct {
is_constant: bool,
constant_slot: usize,
node: *const Ast.Node,
},
local_variable: struct {
stack_slot: usize,
node: *const Ast.Node,
},
parameter: struct {
constant_slot: usize,
stack_slot: usize,
},
};
pub const Jump = struct {
label_index: usize,
code_offset: usize,
};
pub const Label = struct {
code_offset: usize,
};
pub const Chunk = struct {
name: [:0]const u8,
arity: usize,
locals_count: usize,
const_pool: std.ArrayListUnmanaged(*Story.Object),
bytes: std.ArrayListUnmanaged(u8),
pub fn finalize(chunk: *Chunk, scope: *Scope) !*Story.Object.ContentPath {
const gpa = scope.global.gpa;
const story = scope.global.story;
try scope.backpatch();
const const_pool = try chunk.const_pool.toOwnedSlice(gpa);
const bytes = try chunk.bytes.toOwnedSlice(gpa);
const knot_name = try Story.Object.String.create(story, chunk.name);
const content_path = try Story.Object.ContentPath.create(
story,
knot_name,
chunk.arity,
chunk.locals_count,
const_pool,
bytes,
);
return content_path;
}
pub fn deinit(chunk: *Chunk, gpa: std.mem.Allocator) void {
chunk.const_pool.deinit(gpa);
chunk.bytes.deinit(gpa);
chunk.* = undefined;
}
};
pub const Scope = struct {
parent: ?*Scope,
global: *AstGen,
chunk: *Chunk,
symbol_table: std.StringHashMapUnmanaged(Symbol),
jump_stack_top: usize,
label_stack_top: usize,
exit_label: ?usize,
pub fn deinit(scope: *Scope) void {
const gpa = scope.global.gpa;
scope.symbol_table.deinit(gpa);
}
pub fn fail(
scope: *Scope,
tag: Ast.Error.Tag,
node: *const Ast.Node,
) error{ SemanticError, OutOfMemory } {
const gpa = scope.global.gpa;
const err: Ast.Error = .{
.tag = tag,
.loc = .{
.start = node.loc.start,
.end = node.loc.end,
},
};
try scope.global.errors.append(gpa, err);
return error.SemanticError;
}
pub fn lookupIdentifier(self: *Scope, node: *const Ast.Node) ?Symbol {
const token_bytes = self.global.getLexemeFromNode(node);
var current_scope: ?*Scope = self;
while (current_scope) |scope| : (current_scope = scope.parent) {
const symbol_table = &scope.symbol_table;
const result = symbol_table.get(token_bytes);
if (result) |symbol| return symbol;
}
return null;
}
pub fn insertIdentifier(scope: *Scope, node: *const Ast.Node, symbol: Symbol) !void {
const gpa = scope.global.gpa;
const symbol_table = &scope.symbol_table;
const token_bytes = scope.global.getLexemeFromNode(node);
return symbol_table.put(gpa, token_bytes, symbol);
}
pub fn makeSubBlock(parent_scope: *Scope) Scope {
const global = parent_scope.global;
const current_chunk = parent_scope.chunk;
return .{
.parent = parent_scope,
.global = global,
.chunk = current_chunk,
.symbol_table = .empty,
.jump_stack_top = global.jump_stack.items.len,
.label_stack_top = global.label_stack.items.len,
.exit_label = null,
};
}
pub fn makeLabel(scope: *Scope) !usize {
const gpa = scope.global.gpa;
const label_stack = &scope.global.label_stack;
const label_id = label_stack.items.len;
const label_data: Label = .{ .code_offset = 0xffffffff };
try label_stack.append(gpa, label_data);
return label_id;
}
pub fn setLabel(scope: *Scope, label_id: usize) void {
const chunk = scope.chunk;
const code_offset = chunk.bytes.items.len;
const label_stack = &scope.global.label_stack;
assert(label_id <= label_stack.items.len);
const label_data = &label_stack.items[label_id];
label_data.code_offset = code_offset;
}
pub fn makeConstant(scope: *Scope, object: *Story.Object) !usize {
const gpa = scope.global.gpa;
const chunk = scope.chunk;
const const_id = chunk.const_pool.items.len;
try chunk.const_pool.append(gpa, object);
return const_id;
}
pub const IndexSlice = struct {
index: u32,
len: u32,
};
pub fn makeString(scope: *Scope, bytes: []const u8) !IndexSlice {
const global = scope.global;
const gpa = scope.global.gpa;
const str_index: u32 = @intCast(global.string_bytes.items.len);
const string_bytes = &global.string_bytes;
try global.string_bytes.appendSlice(gpa, bytes);
const key: []const u8 = global.string_bytes.items[str_index..];
const gop = try global.string_table.getOrPutContextAdapted(gpa, key, StringIndexAdapter{
.bytes = string_bytes,
}, StringIndexContext{
.bytes = string_bytes,
});
if (gop.found_existing) {
string_bytes.shrinkRetainingCapacity(str_index);
return .{ .index = gop.key_ptr.*, .len = @intCast(key.len) };
} else {
gop.key_ptr.* = str_index;
try string_bytes.append(gpa, 0);
return .{ .index = str_index, .len = @intCast(key.len) };
}
}
pub fn makeGlobal(scope: *Scope, node: *const Ast.Node, symbol: Symbol) !void {
const gpa = scope.global.gpa;
const global_data = scope.global;
const token_bytes = global_data.getLexemeFromNode(node);
try global_data.story.globals.put(gpa, token_bytes, null);
return scope.insertIdentifier(node, symbol);
}
pub fn emitByte(scope: *Scope, byte: u8) !void {
return scope.chunk.bytes.append(scope.global.gpa, byte);
}
pub fn emitSimpleInst(scope: *Scope, op: Story.Opcode) !void {
return scope.emitByte(@intFromEnum(op));
}
pub fn emitConstInst(scope: *Scope, op: Story.Opcode, arg: usize) !void {
if (arg >= 256) return error.TooManyConstants;
try scope.emitSimpleInst(op);
try scope.emitByte(@intCast(arg));
}
pub fn emitJumpInst(scope: *Scope, op: Story.Opcode) !void {
const bytes = &scope.chunk.bytes;
try scope.emitSimpleInst(op);
try scope.emitByte(0xff);
try scope.emitByte(0xff);
return bytes.items.len - 2;
}
pub fn backpatch(_: *Scope) !void {}
};
pub fn deinit(astgen: *AstGen) void {
const gpa = astgen.gpa;
astgen.string_bytes.deinit(gpa);
astgen.string_table.deinit(gpa);
astgen.label_stack.deinit(gpa);
astgen.jump_stack.deinit(gpa);
astgen.errors.deinit(gpa);
}
fn getLexemeFromNode(astgen: *const AstGen, node: *const Ast.Node) []const u8 {
assert(node.loc.start <= node.loc.end);
const source_bytes = astgen.tree.source;
return source_bytes[node.loc.start..node.loc.end];
}
fn checkUnaryOp(scope: *Scope, node: *const Ast.Node, op: Story.Opcode) CheckError!void {
try checkExpr(scope, node.data.bin.lhs);
try scope.emitSimpleInst(op);
}
fn checkBinaryOp(scope: *Scope, node: *const Ast.Node, op: Story.Opcode) CheckError!void {
const bin_data = node.data.bin;
assert(bin_data.lhs != null and bin_data.rhs != null);
try checkExpr(scope, bin_data.lhs);
try checkExpr(scope, bin_data.rhs);
try scope.emitSimpleInst(op);
}
fn checkLogicalOp(scope: *Scope, node: *const Ast.Node, binary_or: bool) CheckError!void {
const data = node.data.bin;
assert(data.lhs != null and data.rhs != null);
try checkExpr(scope, data.lhs);
const else_branch = try scope.emitJumpInst(if (binary_or) .jmp_t else .jmp_f);
try scope.emitSimpleInst(.pop);
try checkExpr(scope, data.rhs);
try scope.patchJump(else_branch);
}
fn checkTrueLiteral(scope: *Scope, _: *const Ast.Node) CheckError!void {
try scope.emitSimpleInst(.true);
}
fn checkFalseLiteral(scope: *Scope, _: *const Ast.Node) CheckError!void {
try scope.emitSimpleInst(.false);
}
fn checkNumberLiteral(scope: *Scope, node: *const Ast.Node) CheckError!void {
const lexeme = getLexemeFromNode(scope.global, node);
const number_value = try std.fmt.parseFloat(f64, lexeme);
const number_object = try Story.Object.Number.create(
scope.global.story,
.{ .floating = number_value },
);
const constant_id = try scope.makeConstant(@ptrCast(number_object));
try scope.emitConstInst(.load_const, constant_id);
}
fn checkStringLiteral(scope: *Scope, node: *const Ast.Node) CheckError!void {
const string_bytes = getLexemeFromNode(scope.global, node);
_ = try scope.makeString(string_bytes);
const string_object = try Story.Object.String.create(scope.global.story, string_bytes);
const constant_id = try scope.makeConstant(@ptrCast(string_object));
try scope.emitConstInst(.load_const, constant_id);
}
fn checkStringExpr(scope: *Scope, node: *const Ast.Node) CheckError!void {
assert(node.data.bin.lhs != null);
const expr_node = node.data.bin.lhs orelse return;
return checkStringLiteral(scope, expr_node);
}
fn checkIdentifier(scope: *Scope, node: *const Ast.Node) !void {
if (scope.lookupIdentifier(node)) |symbol| {
switch (symbol) {
.global_variable => |data| {
return scope.emitConstInst(.load_global, data.constant_slot);
},
.local_variable => |data| {
return scope.emitConstInst(.load, data.stack_slot);
},
.parameter => |data| {
return scope.emitConstInst(.load, data.stack_slot);
},
}
}
return scope.fail(.unknown_identifier, node);
}
fn checkExpr(scope: *Scope, expr: ?*const Ast.Node) CheckError!void {
const expr_node = expr orelse return;
switch (expr_node.tag) {
.true_literal => try checkTrueLiteral(scope, expr_node),
.false_literal => try checkFalseLiteral(scope, expr_node),
.number_literal => try checkNumberLiteral(scope, expr_node),
.string_literal => try checkStringLiteral(scope, expr_node),
.string_expr => try checkStringExpr(scope, expr_node),
.identifier => try checkIdentifier(scope, expr_node),
.add_expr => try checkBinaryOp(scope, expr_node, .add),
.subtract_expr => try checkBinaryOp(scope, expr_node, .sub),
.multiply_expr => try checkBinaryOp(scope, expr_node, .mul),
.divide_expr => try checkBinaryOp(scope, expr_node, .div),
.mod_expr => try checkBinaryOp(scope, expr_node, .mod),
.negate_expr => try checkUnaryOp(scope, expr_node, .neg),
else => return error.NotImplemented,
}
}
fn checkExprStmt(scope: *Scope, stmt: *const Ast.Node) CheckError!void {
const expr_node = stmt.data.bin.lhs orelse return;
try checkExpr(scope, expr_node);
try scope.emitSimpleInst(.pop);
}
fn checkInlineLogicExpr(scope: *Scope, expr: *const Ast.Node) CheckError!void {
assert(expr.data.bin.lhs != null);
return checkExpr(scope, expr.data.bin.lhs);
}
fn checkContentExpr(scope: *Scope, expr: *const Ast.Node) CheckError!void {
const node_list = expr.data.list.items orelse return;
for (node_list) |child_node| {
switch (child_node.tag) {
.string_literal => {
try checkStringLiteral(scope, child_node);
try scope.emitSimpleInst(.content);
},
.inline_logic_expr => {
try checkInlineLogicExpr(scope, child_node);
try scope.emitSimpleInst(.content);
},
else => return error.NotImplemented,
}
}
}
fn checkContentStmt(scope: *Scope, stmt: *const Ast.Node) CheckError!void {
const expr_node = stmt.data.bin.lhs orelse return;
return checkContentExpr(scope, expr_node);
}
fn checkBlockStmt(parent_scope: *Scope, block_stmt: *const Ast.Node) CheckError!void {
var block_scope = parent_scope.makeSubBlock();
defer block_scope.deinit();
const children = block_stmt.data.list.items orelse return;
for (children) |child_stmt| try checkStmt(&block_scope, child_stmt);
}
fn checkAssignStmt(scope: *Scope, stmt: *const Ast.Node) CheckError!void {
const lhs = stmt.data.bin.lhs orelse return error.CompilerBug;
const rhs = stmt.data.bin.rhs orelse return error.CompilerBug;
if (scope.lookupIdentifier(lhs)) |symbol| {
switch (symbol) {
.global_variable => |data| {
if (data.is_constant) {
return scope.fail(.assignment_to_const, lhs);
}
try checkExpr(scope, rhs);
try scope.emitConstInst(.store_global, data.constant_slot);
try scope.emitSimpleInst(.pop);
return;
},
.local_variable => |data| {
try checkExpr(scope, rhs);
try scope.emitConstInst(.store, data.stack_slot);
try scope.emitSimpleInst(.pop);
return;
},
else => unreachable,
}
}
return scope.fail(.unknown_identifier, lhs);
}
fn checkVarDecl(scope: *Scope, decl_node: *const Ast.Node) !void {
const identifier_node = decl_node.data.bin.lhs orelse return error.Fucked;
const expr_node = decl_node.data.bin.rhs orelse return error.Fucked;
try checkExpr(scope, expr_node);
switch (decl_node.tag) {
.temp_decl => {
const stack_slot = scope.chunk.arity + scope.chunk.locals_count;
const symbol: Symbol = .{
.local_variable = .{
.node = decl_node,
.stack_slot = stack_slot,
},
};
scope.chunk.locals_count += 1;
try scope.insertIdentifier(identifier_node, symbol);
try scope.emitConstInst(.store, stack_slot);
},
.var_decl, .const_decl => |tag| {
const string_bytes = getLexemeFromNode(scope.global, identifier_node);
_ = try scope.makeString(string_bytes);
const string_object = try Story.Object.String.create(scope.global.story, string_bytes);
const constant_slot = try scope.makeConstant(@ptrCast(string_object));
const symbol: Symbol = .{
.global_variable = .{
.is_constant = tag == .const_decl,
.node = decl_node,
.constant_slot = constant_slot,
},
};
try scope.makeGlobal(identifier_node, symbol);
try scope.emitConstInst(.store_global, constant_slot);
},
else => unreachable,
}
try scope.emitSimpleInst(.pop);
}
fn checkStmt(scope: *Scope, stmt: *const Ast.Node) CheckError!void {
switch (stmt.tag) {
.var_decl => try checkVarDecl(scope, stmt),
.const_decl => try checkVarDecl(scope, stmt),
.temp_decl => try checkVarDecl(scope, stmt),
.assign_stmt => try checkAssignStmt(scope, stmt),
.content_stmt => try checkContentStmt(scope, stmt),
.expr_stmt => try checkExprStmt(scope, stmt),
else => return error.NotImplemented,
}
}
fn checkAnonymousKnot(parent_scope: *Scope, body: *const Ast.Node) CheckError!void {
const exit_label = try parent_scope.makeLabel();
parent_scope.exit_label = exit_label;
try checkBlockStmt(parent_scope, body);
parent_scope.setLabel(exit_label);
try parent_scope.emitSimpleInst(.exit);
}
fn checkFile(astgen: *AstGen, file: *const Ast.Node) CheckError!void {
const gpa = astgen.gpa;
var main_chunk: Chunk = .{
.name = astgen.default_knot_name,
.arity = 0,
.locals_count = 0,
.bytes = .empty,
.const_pool = .empty,
};
defer main_chunk.deinit(gpa);
var file_scope: Scope = .{
.parent = null,
.global = astgen,
.chunk = &main_chunk,
.symbol_table = .empty,
.jump_stack_top = astgen.jump_stack.items.len,
.label_stack_top = astgen.label_stack.items.len,
.exit_label = null,
};
defer file_scope.deinit();
_ = try file_scope.makeString(astgen.default_knot_name);
const children = file.data.list.items orelse return;
if (children.len == 0) return;
// TODO: intern paths
const first_child = children[0];
if (first_child.tag == .block_stmt) {
const chunk = file_scope.chunk;
try checkAnonymousKnot(&file_scope, first_child);
const content_path = try chunk.finalize(&file_scope);
try astgen.story.paths.append(gpa, @ptrCast(content_path));
}
}
fn dumpStringsWithHex(astgen: *const AstGen) void {
var start: usize = 0;
const bytes = astgen.string_bytes.items;
while (start < bytes.len) {
const end = std.mem.indexOfScalarPos(u8, bytes, start, 0) orelse break;
const s = bytes[start..end];
std.debug.print("[{d:04}] ", .{start});
for (s) |b| std.debug.print("{x:02} ", .{b});
std.debug.print("00 {s}\n", .{s});
start = end + 1;
}
}
/// Perform code generation via tree-walk.
pub fn generate(_: std.mem.Allocator, _: *const Ast) !Story {
return error.OutOfMemory;
pub fn generate(gpa: std.mem.Allocator, tree: *const Ast) !Story {
const root_node = tree.root orelse return error.Fucked;
var story: Story = .{
.allocator = gpa,
.is_exited = false,
.can_advance = false,
.gc_objects = .{},
.globals = .empty,
.paths = .empty,
.stack = .empty,
.call_stack = .empty,
};
var astgen: AstGen = .{
.gpa = gpa,
.tree = tree,
.story = &story,
};
defer astgen.deinit();
try astgen.string_bytes.append(gpa, 0);
try checkFile(&astgen, root_node);
dumpStringsWithHex(&astgen);
return story;
}

View file

@ -77,31 +77,38 @@ pub fn deinit(story: *Story) void {
story.call_stack.deinit(gpa);
}
pub fn dump(story: *Story, writer: *std.Io.Writer) !void {
const story_dumper: Dumper = .{ .story = story, .writer = writer };
for (story.paths.items) |path_object| {
try story_dumper.dump(@ptrCast(path_object));
}
}
pub fn trace(story: *Story, writer: *std.Io.Writer, frame: *CallFrame) !void {
try writer.print("\tStack => stack_pointer={d}, objects=[", .{frame.sp});
const dumper = Dumper{ .story = story, .writer = writer };
const story_dumper: Dumper = .{ .story = story, .writer = writer };
const stack = &story.stack;
const stack_top = story.stack.items.len;
if (stack_top > 0) {
const last_slot = stack.items[stack.items.len - 1];
for (stack.items[frame.sp .. stack.items.len - 1]) |slot| {
if (slot) |object| {
try dumper.printObject(object);
try story_dumper.dumpObject(object);
} else {
try writer.writeAll("NULL");
}
try writer.writeAll(", ");
}
if (last_slot) |object| {
try dumper.printObject(object);
try story_dumper.dumpObject(object);
} else {
try writer.writeAll("NULL");
}
}
try writer.writeAll("]\n");
_ = try dumper.dumpInst(frame.callee, frame.ip, true);
_ = try story_dumper.dumpInst(frame.callee, frame.ip, true);
return writer.flush();
}
@ -177,7 +184,7 @@ fn execute(vm: *Story, writer: *std.Io.Writer) !void {
const code = std.mem.bytesAsSlice(Opcode, frame.callee.bytes);
while (true) {
if (vm.trace_writer) |w| {
if (vm.dump_writer) |w| {
vm.trace(w, frame) catch {};
}
switch (code[frame.ip]) {

View file

@ -16,7 +16,7 @@ fn dumpByteInst(d: Dumper, context: *const Object.ContentPath, offset: usize, op
const arg = context.bytes[offset + 1];
if (op == .load_const) {
try d.writer.print("{s} {d} (", .{ @tagName(op), arg });
try printObject(d.writer, context.const_pool[arg]);
try d.dumpObject(context.const_pool[arg]);
try d.writer.print(")\n", .{});
} else {
try d.writer.print("{s} {x}\n", .{ @tagName(op), arg });
@ -141,28 +141,28 @@ fn getObjectType(object: *const Object) []const u8 {
}
}
pub fn printObject(writer: *std.Io.Writer, object: *const Object) !void {
pub fn dumpObject(d: Dumper, object: *const Object) !void {
const type_string = getObjectType(object);
switch (object.tag) {
.number => {
const typed_object: *const Object.Number = @ptrCast(object);
switch (typed_object.data) {
.boolean => |value| {
try writer.print("<type={s} value={s}, address={*}>", .{
try d.writer.print("<type={s} value={s}, address={*}>", .{
type_string,
if (value) "true" else "false",
object,
});
},
.floating => |value| {
try writer.print("<type={s} value={d}, address={*}>", .{
try d.writer.print("<type={s} value={d}, address={*}>", .{
type_string,
value,
object,
});
},
.integer => |value| {
try writer.print("<type={s} value={d}, address={*}>", .{
try d.writer.print("<type={s} value={d}, address={*}>", .{
type_string,
value,
object,
@ -173,14 +173,14 @@ pub fn printObject(writer: *std.Io.Writer, object: *const Object) !void {
.string => {
const typed_object: *const Object.String = @ptrCast(object);
const string_bytes = typed_object.bytes[0..typed_object.length];
try writer.print("<type={s} value=\"{s}\", address={*}>", .{
try d.writer.print("<type={s} value=\"{s}\", address={*}>", .{
type_string,
string_bytes,
object,
});
},
.content_path => {
try writer.print("<type={s} address={*}>", .{ type_string, object });
try d.writer.print("<type={s} address={*}>", .{ type_string, object });
},
}
}

View file

@ -85,6 +85,16 @@ fn mainArgs(
.use_color = use_color,
});
defer story.deinit();
try story.dump(&stderr_writer.interface);
if (compile_only) return;
while (story.can_advance) {
const content = try story.advance();
defer gpa.free(content);
std.debug.print("{s}\n", .{content});
}
}
fn readSourceFile(gpa: std.mem.Allocator, file_reader: *std.fs.File.Reader) ![:0]u8 {