diff --git a/src/AstGen.zig b/src/AstGen.zig index 73a98ce..d7488bf 100644 --- a/src/AstGen.zig +++ b/src/AstGen.zig @@ -1,8 +1,554 @@ const std = @import("std"); -const Ast = @import("./Ast.zig"); -const Story = @import("./Story.zig"); +const Ast = @import("Ast.zig"); +const Story = @import("Story.zig"); +const StringIndexAdapter = std.hash_map.StringIndexAdapter; +const StringIndexContext = std.hash_map.StringIndexContext; +const assert = std.debug.assert; +const AstGen = @This(); + +gpa: std.mem.Allocator, +tree: *const Ast, +story: *Story, +// TODO: Output string_bytes. +// NOTE: String bytes have a 4GiB limit. +string_bytes: std.ArrayListUnmanaged(u8) = .empty, +string_table: std.HashMapUnmanaged(u32, void, StringIndexContext, std.hash_map.default_max_load_percentage) = .empty, +label_stack: std.ArrayListUnmanaged(Label) = .empty, +jump_stack: std.ArrayListUnmanaged(Jump) = .empty, +errors: std.ArrayListUnmanaged(Ast.Error) = .empty, +default_knot_name: [:0]const u8 = "@main@", + +pub const CheckError = anyerror; + +pub const Symbol = union(enum) { + global_variable: struct { + is_constant: bool, + constant_slot: usize, + node: *const Ast.Node, + }, + local_variable: struct { + stack_slot: usize, + node: *const Ast.Node, + }, + parameter: struct { + constant_slot: usize, + stack_slot: usize, + }, +}; + +pub const Jump = struct { + label_index: usize, + code_offset: usize, +}; + +pub const Label = struct { + code_offset: usize, +}; + +pub const Chunk = struct { + name: [:0]const u8, + arity: usize, + locals_count: usize, + const_pool: std.ArrayListUnmanaged(*Story.Object), + bytes: std.ArrayListUnmanaged(u8), + + pub fn finalize(chunk: *Chunk, scope: *Scope) !*Story.Object.ContentPath { + const gpa = scope.global.gpa; + const story = scope.global.story; + + try scope.backpatch(); + + const const_pool = try chunk.const_pool.toOwnedSlice(gpa); + const bytes = try chunk.bytes.toOwnedSlice(gpa); + const knot_name = try Story.Object.String.create(story, chunk.name); + const content_path = try Story.Object.ContentPath.create( + story, + knot_name, + chunk.arity, + chunk.locals_count, + const_pool, + bytes, + ); + return content_path; + } + + pub fn deinit(chunk: *Chunk, gpa: std.mem.Allocator) void { + chunk.const_pool.deinit(gpa); + chunk.bytes.deinit(gpa); + chunk.* = undefined; + } +}; + +pub const Scope = struct { + parent: ?*Scope, + global: *AstGen, + chunk: *Chunk, + symbol_table: std.StringHashMapUnmanaged(Symbol), + jump_stack_top: usize, + label_stack_top: usize, + exit_label: ?usize, + + pub fn deinit(scope: *Scope) void { + const gpa = scope.global.gpa; + scope.symbol_table.deinit(gpa); + } + + pub fn fail( + scope: *Scope, + tag: Ast.Error.Tag, + node: *const Ast.Node, + ) error{ SemanticError, OutOfMemory } { + const gpa = scope.global.gpa; + const err: Ast.Error = .{ + .tag = tag, + .loc = .{ + .start = node.loc.start, + .end = node.loc.end, + }, + }; + + try scope.global.errors.append(gpa, err); + return error.SemanticError; + } + + pub fn lookupIdentifier(self: *Scope, node: *const Ast.Node) ?Symbol { + const token_bytes = self.global.getLexemeFromNode(node); + var current_scope: ?*Scope = self; + while (current_scope) |scope| : (current_scope = scope.parent) { + const symbol_table = &scope.symbol_table; + const result = symbol_table.get(token_bytes); + if (result) |symbol| return symbol; + } + return null; + } + + pub fn insertIdentifier(scope: *Scope, node: *const Ast.Node, symbol: Symbol) !void { + const gpa = scope.global.gpa; + const symbol_table = &scope.symbol_table; + const token_bytes = scope.global.getLexemeFromNode(node); + return symbol_table.put(gpa, token_bytes, symbol); + } + + pub fn makeSubBlock(parent_scope: *Scope) Scope { + const global = parent_scope.global; + const current_chunk = parent_scope.chunk; + + return .{ + .parent = parent_scope, + .global = global, + .chunk = current_chunk, + .symbol_table = .empty, + .jump_stack_top = global.jump_stack.items.len, + .label_stack_top = global.label_stack.items.len, + .exit_label = null, + }; + } + + pub fn makeLabel(scope: *Scope) !usize { + const gpa = scope.global.gpa; + const label_stack = &scope.global.label_stack; + const label_id = label_stack.items.len; + const label_data: Label = .{ .code_offset = 0xffffffff }; + + try label_stack.append(gpa, label_data); + return label_id; + } + + pub fn setLabel(scope: *Scope, label_id: usize) void { + const chunk = scope.chunk; + const code_offset = chunk.bytes.items.len; + const label_stack = &scope.global.label_stack; + assert(label_id <= label_stack.items.len); + + const label_data = &label_stack.items[label_id]; + label_data.code_offset = code_offset; + } + + pub fn makeConstant(scope: *Scope, object: *Story.Object) !usize { + const gpa = scope.global.gpa; + const chunk = scope.chunk; + const const_id = chunk.const_pool.items.len; + + try chunk.const_pool.append(gpa, object); + return const_id; + } + + pub const IndexSlice = struct { + index: u32, + len: u32, + }; + + pub fn makeString(scope: *Scope, bytes: []const u8) !IndexSlice { + const global = scope.global; + const gpa = scope.global.gpa; + const str_index: u32 = @intCast(global.string_bytes.items.len); + const string_bytes = &global.string_bytes; + try global.string_bytes.appendSlice(gpa, bytes); + + const key: []const u8 = global.string_bytes.items[str_index..]; + const gop = try global.string_table.getOrPutContextAdapted(gpa, key, StringIndexAdapter{ + .bytes = string_bytes, + }, StringIndexContext{ + .bytes = string_bytes, + }); + if (gop.found_existing) { + string_bytes.shrinkRetainingCapacity(str_index); + return .{ .index = gop.key_ptr.*, .len = @intCast(key.len) }; + } else { + gop.key_ptr.* = str_index; + try string_bytes.append(gpa, 0); + return .{ .index = str_index, .len = @intCast(key.len) }; + } + } + + pub fn makeGlobal(scope: *Scope, node: *const Ast.Node, symbol: Symbol) !void { + const gpa = scope.global.gpa; + const global_data = scope.global; + const token_bytes = global_data.getLexemeFromNode(node); + try global_data.story.globals.put(gpa, token_bytes, null); + return scope.insertIdentifier(node, symbol); + } + + pub fn emitByte(scope: *Scope, byte: u8) !void { + return scope.chunk.bytes.append(scope.global.gpa, byte); + } + + pub fn emitSimpleInst(scope: *Scope, op: Story.Opcode) !void { + return scope.emitByte(@intFromEnum(op)); + } + + pub fn emitConstInst(scope: *Scope, op: Story.Opcode, arg: usize) !void { + if (arg >= 256) return error.TooManyConstants; + + try scope.emitSimpleInst(op); + try scope.emitByte(@intCast(arg)); + } + + pub fn emitJumpInst(scope: *Scope, op: Story.Opcode) !void { + const bytes = &scope.chunk.bytes; + try scope.emitSimpleInst(op); + try scope.emitByte(0xff); + try scope.emitByte(0xff); + return bytes.items.len - 2; + } + + pub fn backpatch(_: *Scope) !void {} +}; + +pub fn deinit(astgen: *AstGen) void { + const gpa = astgen.gpa; + astgen.string_bytes.deinit(gpa); + astgen.string_table.deinit(gpa); + astgen.label_stack.deinit(gpa); + astgen.jump_stack.deinit(gpa); + astgen.errors.deinit(gpa); +} + +fn getLexemeFromNode(astgen: *const AstGen, node: *const Ast.Node) []const u8 { + assert(node.loc.start <= node.loc.end); + const source_bytes = astgen.tree.source; + return source_bytes[node.loc.start..node.loc.end]; +} + +fn checkUnaryOp(scope: *Scope, node: *const Ast.Node, op: Story.Opcode) CheckError!void { + try checkExpr(scope, node.data.bin.lhs); + try scope.emitSimpleInst(op); +} + +fn checkBinaryOp(scope: *Scope, node: *const Ast.Node, op: Story.Opcode) CheckError!void { + const bin_data = node.data.bin; + assert(bin_data.lhs != null and bin_data.rhs != null); + try checkExpr(scope, bin_data.lhs); + try checkExpr(scope, bin_data.rhs); + try scope.emitSimpleInst(op); +} + +fn checkLogicalOp(scope: *Scope, node: *const Ast.Node, binary_or: bool) CheckError!void { + const data = node.data.bin; + assert(data.lhs != null and data.rhs != null); + + try checkExpr(scope, data.lhs); + const else_branch = try scope.emitJumpInst(if (binary_or) .jmp_t else .jmp_f); + try scope.emitSimpleInst(.pop); + try checkExpr(scope, data.rhs); + try scope.patchJump(else_branch); +} + +fn checkTrueLiteral(scope: *Scope, _: *const Ast.Node) CheckError!void { + try scope.emitSimpleInst(.true); +} + +fn checkFalseLiteral(scope: *Scope, _: *const Ast.Node) CheckError!void { + try scope.emitSimpleInst(.false); +} + +fn checkNumberLiteral(scope: *Scope, node: *const Ast.Node) CheckError!void { + const lexeme = getLexemeFromNode(scope.global, node); + const number_value = try std.fmt.parseFloat(f64, lexeme); + const number_object = try Story.Object.Number.create( + scope.global.story, + .{ .floating = number_value }, + ); + const constant_id = try scope.makeConstant(@ptrCast(number_object)); + + try scope.emitConstInst(.load_const, constant_id); +} + +fn checkStringLiteral(scope: *Scope, node: *const Ast.Node) CheckError!void { + const string_bytes = getLexemeFromNode(scope.global, node); + _ = try scope.makeString(string_bytes); + + const string_object = try Story.Object.String.create(scope.global.story, string_bytes); + const constant_id = try scope.makeConstant(@ptrCast(string_object)); + + try scope.emitConstInst(.load_const, constant_id); +} + +fn checkStringExpr(scope: *Scope, node: *const Ast.Node) CheckError!void { + assert(node.data.bin.lhs != null); + const expr_node = node.data.bin.lhs orelse return; + return checkStringLiteral(scope, expr_node); +} + +fn checkIdentifier(scope: *Scope, node: *const Ast.Node) !void { + if (scope.lookupIdentifier(node)) |symbol| { + switch (symbol) { + .global_variable => |data| { + return scope.emitConstInst(.load_global, data.constant_slot); + }, + .local_variable => |data| { + return scope.emitConstInst(.load, data.stack_slot); + }, + .parameter => |data| { + return scope.emitConstInst(.load, data.stack_slot); + }, + } + } + return scope.fail(.unknown_identifier, node); +} + +fn checkExpr(scope: *Scope, expr: ?*const Ast.Node) CheckError!void { + const expr_node = expr orelse return; + switch (expr_node.tag) { + .true_literal => try checkTrueLiteral(scope, expr_node), + .false_literal => try checkFalseLiteral(scope, expr_node), + .number_literal => try checkNumberLiteral(scope, expr_node), + .string_literal => try checkStringLiteral(scope, expr_node), + .string_expr => try checkStringExpr(scope, expr_node), + .identifier => try checkIdentifier(scope, expr_node), + .add_expr => try checkBinaryOp(scope, expr_node, .add), + .subtract_expr => try checkBinaryOp(scope, expr_node, .sub), + .multiply_expr => try checkBinaryOp(scope, expr_node, .mul), + .divide_expr => try checkBinaryOp(scope, expr_node, .div), + .mod_expr => try checkBinaryOp(scope, expr_node, .mod), + .negate_expr => try checkUnaryOp(scope, expr_node, .neg), + else => return error.NotImplemented, + } +} + +fn checkExprStmt(scope: *Scope, stmt: *const Ast.Node) CheckError!void { + const expr_node = stmt.data.bin.lhs orelse return; + try checkExpr(scope, expr_node); + try scope.emitSimpleInst(.pop); +} + +fn checkInlineLogicExpr(scope: *Scope, expr: *const Ast.Node) CheckError!void { + assert(expr.data.bin.lhs != null); + return checkExpr(scope, expr.data.bin.lhs); +} + +fn checkContentExpr(scope: *Scope, expr: *const Ast.Node) CheckError!void { + const node_list = expr.data.list.items orelse return; + for (node_list) |child_node| { + switch (child_node.tag) { + .string_literal => { + try checkStringLiteral(scope, child_node); + try scope.emitSimpleInst(.content); + }, + .inline_logic_expr => { + try checkInlineLogicExpr(scope, child_node); + try scope.emitSimpleInst(.content); + }, + else => return error.NotImplemented, + } + } +} + +fn checkContentStmt(scope: *Scope, stmt: *const Ast.Node) CheckError!void { + const expr_node = stmt.data.bin.lhs orelse return; + return checkContentExpr(scope, expr_node); +} + +fn checkBlockStmt(parent_scope: *Scope, block_stmt: *const Ast.Node) CheckError!void { + var block_scope = parent_scope.makeSubBlock(); + defer block_scope.deinit(); + const children = block_stmt.data.list.items orelse return; + for (children) |child_stmt| try checkStmt(&block_scope, child_stmt); +} + +fn checkAssignStmt(scope: *Scope, stmt: *const Ast.Node) CheckError!void { + const lhs = stmt.data.bin.lhs orelse return error.CompilerBug; + const rhs = stmt.data.bin.rhs orelse return error.CompilerBug; + + if (scope.lookupIdentifier(lhs)) |symbol| { + switch (symbol) { + .global_variable => |data| { + if (data.is_constant) { + return scope.fail(.assignment_to_const, lhs); + } + try checkExpr(scope, rhs); + try scope.emitConstInst(.store_global, data.constant_slot); + try scope.emitSimpleInst(.pop); + return; + }, + .local_variable => |data| { + try checkExpr(scope, rhs); + try scope.emitConstInst(.store, data.stack_slot); + try scope.emitSimpleInst(.pop); + return; + }, + else => unreachable, + } + } + return scope.fail(.unknown_identifier, lhs); +} + +fn checkVarDecl(scope: *Scope, decl_node: *const Ast.Node) !void { + const identifier_node = decl_node.data.bin.lhs orelse return error.Fucked; + const expr_node = decl_node.data.bin.rhs orelse return error.Fucked; + + try checkExpr(scope, expr_node); + switch (decl_node.tag) { + .temp_decl => { + const stack_slot = scope.chunk.arity + scope.chunk.locals_count; + const symbol: Symbol = .{ + .local_variable = .{ + .node = decl_node, + .stack_slot = stack_slot, + }, + }; + scope.chunk.locals_count += 1; + + try scope.insertIdentifier(identifier_node, symbol); + try scope.emitConstInst(.store, stack_slot); + }, + .var_decl, .const_decl => |tag| { + const string_bytes = getLexemeFromNode(scope.global, identifier_node); + _ = try scope.makeString(string_bytes); + const string_object = try Story.Object.String.create(scope.global.story, string_bytes); + const constant_slot = try scope.makeConstant(@ptrCast(string_object)); + const symbol: Symbol = .{ + .global_variable = .{ + .is_constant = tag == .const_decl, + .node = decl_node, + .constant_slot = constant_slot, + }, + }; + + try scope.makeGlobal(identifier_node, symbol); + try scope.emitConstInst(.store_global, constant_slot); + }, + else => unreachable, + } + + try scope.emitSimpleInst(.pop); +} + +fn checkStmt(scope: *Scope, stmt: *const Ast.Node) CheckError!void { + switch (stmt.tag) { + .var_decl => try checkVarDecl(scope, stmt), + .const_decl => try checkVarDecl(scope, stmt), + .temp_decl => try checkVarDecl(scope, stmt), + .assign_stmt => try checkAssignStmt(scope, stmt), + .content_stmt => try checkContentStmt(scope, stmt), + .expr_stmt => try checkExprStmt(scope, stmt), + else => return error.NotImplemented, + } +} + +fn checkAnonymousKnot(parent_scope: *Scope, body: *const Ast.Node) CheckError!void { + const exit_label = try parent_scope.makeLabel(); + parent_scope.exit_label = exit_label; + + try checkBlockStmt(parent_scope, body); + parent_scope.setLabel(exit_label); + try parent_scope.emitSimpleInst(.exit); +} + +fn checkFile(astgen: *AstGen, file: *const Ast.Node) CheckError!void { + const gpa = astgen.gpa; + var main_chunk: Chunk = .{ + .name = astgen.default_knot_name, + .arity = 0, + .locals_count = 0, + .bytes = .empty, + .const_pool = .empty, + }; + defer main_chunk.deinit(gpa); + + var file_scope: Scope = .{ + .parent = null, + .global = astgen, + .chunk = &main_chunk, + .symbol_table = .empty, + .jump_stack_top = astgen.jump_stack.items.len, + .label_stack_top = astgen.label_stack.items.len, + .exit_label = null, + }; + defer file_scope.deinit(); + + _ = try file_scope.makeString(astgen.default_knot_name); + const children = file.data.list.items orelse return; + if (children.len == 0) return; + + // TODO: intern paths + + const first_child = children[0]; + if (first_child.tag == .block_stmt) { + const chunk = file_scope.chunk; + try checkAnonymousKnot(&file_scope, first_child); + const content_path = try chunk.finalize(&file_scope); + try astgen.story.paths.append(gpa, @ptrCast(content_path)); + } +} + +fn dumpStringsWithHex(astgen: *const AstGen) void { + var start: usize = 0; + const bytes = astgen.string_bytes.items; + + while (start < bytes.len) { + const end = std.mem.indexOfScalarPos(u8, bytes, start, 0) orelse break; + const s = bytes[start..end]; + + std.debug.print("[{d:04}] ", .{start}); + for (s) |b| std.debug.print("{x:02} ", .{b}); + std.debug.print("00 {s}\n", .{s}); + start = end + 1; + } +} /// Perform code generation via tree-walk. -pub fn generate(_: std.mem.Allocator, _: *const Ast) !Story { - return error.OutOfMemory; +pub fn generate(gpa: std.mem.Allocator, tree: *const Ast) !Story { + const root_node = tree.root orelse return error.Fucked; + var story: Story = .{ + .allocator = gpa, + .is_exited = false, + .can_advance = false, + .gc_objects = .{}, + .globals = .empty, + .paths = .empty, + .stack = .empty, + .call_stack = .empty, + }; + var astgen: AstGen = .{ + .gpa = gpa, + .tree = tree, + .story = &story, + }; + defer astgen.deinit(); + + try astgen.string_bytes.append(gpa, 0); + try checkFile(&astgen, root_node); + dumpStringsWithHex(&astgen); + return story; } diff --git a/src/Story.zig b/src/Story.zig index 88db9cd..4539e8d 100644 --- a/src/Story.zig +++ b/src/Story.zig @@ -77,31 +77,38 @@ pub fn deinit(story: *Story) void { story.call_stack.deinit(gpa); } +pub fn dump(story: *Story, writer: *std.Io.Writer) !void { + const story_dumper: Dumper = .{ .story = story, .writer = writer }; + for (story.paths.items) |path_object| { + try story_dumper.dump(@ptrCast(path_object)); + } +} + pub fn trace(story: *Story, writer: *std.Io.Writer, frame: *CallFrame) !void { try writer.print("\tStack => stack_pointer={d}, objects=[", .{frame.sp}); - const dumper = Dumper{ .story = story, .writer = writer }; + const story_dumper: Dumper = .{ .story = story, .writer = writer }; const stack = &story.stack; const stack_top = story.stack.items.len; if (stack_top > 0) { const last_slot = stack.items[stack.items.len - 1]; for (stack.items[frame.sp .. stack.items.len - 1]) |slot| { if (slot) |object| { - try dumper.printObject(object); + try story_dumper.dumpObject(object); } else { try writer.writeAll("NULL"); } try writer.writeAll(", "); } if (last_slot) |object| { - try dumper.printObject(object); + try story_dumper.dumpObject(object); } else { try writer.writeAll("NULL"); } } try writer.writeAll("]\n"); - _ = try dumper.dumpInst(frame.callee, frame.ip, true); + _ = try story_dumper.dumpInst(frame.callee, frame.ip, true); return writer.flush(); } @@ -177,7 +184,7 @@ fn execute(vm: *Story, writer: *std.Io.Writer) !void { const code = std.mem.bytesAsSlice(Opcode, frame.callee.bytes); while (true) { - if (vm.trace_writer) |w| { + if (vm.dump_writer) |w| { vm.trace(w, frame) catch {}; } switch (code[frame.ip]) { diff --git a/src/Story/Dumper.zig b/src/Story/Dumper.zig index 41b8bc2..137c968 100644 --- a/src/Story/Dumper.zig +++ b/src/Story/Dumper.zig @@ -16,7 +16,7 @@ fn dumpByteInst(d: Dumper, context: *const Object.ContentPath, offset: usize, op const arg = context.bytes[offset + 1]; if (op == .load_const) { try d.writer.print("{s} {d} (", .{ @tagName(op), arg }); - try printObject(d.writer, context.const_pool[arg]); + try d.dumpObject(context.const_pool[arg]); try d.writer.print(")\n", .{}); } else { try d.writer.print("{s} {x}\n", .{ @tagName(op), arg }); @@ -141,28 +141,28 @@ fn getObjectType(object: *const Object) []const u8 { } } -pub fn printObject(writer: *std.Io.Writer, object: *const Object) !void { +pub fn dumpObject(d: Dumper, object: *const Object) !void { const type_string = getObjectType(object); switch (object.tag) { .number => { const typed_object: *const Object.Number = @ptrCast(object); switch (typed_object.data) { .boolean => |value| { - try writer.print("", .{ + try d.writer.print("", .{ type_string, if (value) "true" else "false", object, }); }, .floating => |value| { - try writer.print("", .{ + try d.writer.print("", .{ type_string, value, object, }); }, .integer => |value| { - try writer.print("", .{ + try d.writer.print("", .{ type_string, value, object, @@ -173,14 +173,14 @@ pub fn printObject(writer: *std.Io.Writer, object: *const Object) !void { .string => { const typed_object: *const Object.String = @ptrCast(object); const string_bytes = typed_object.bytes[0..typed_object.length]; - try writer.print("", .{ + try d.writer.print("", .{ type_string, string_bytes, object, }); }, .content_path => { - try writer.print("", .{ type_string, object }); + try d.writer.print("", .{ type_string, object }); }, } } diff --git a/src/main.zig b/src/main.zig index 4d5842f..5666f81 100644 --- a/src/main.zig +++ b/src/main.zig @@ -85,6 +85,16 @@ fn mainArgs( .use_color = use_color, }); defer story.deinit(); + + try story.dump(&stderr_writer.interface); + if (compile_only) return; + + while (story.can_advance) { + const content = try story.advance(); + defer gpa.free(content); + + std.debug.print("{s}\n", .{content}); + } } fn readSourceFile(gpa: std.mem.Allocator, file_reader: *std.fs.File.Reader) ![:0]u8 {