diff --git a/src/AstGen.zig b/src/AstGen.zig index e31fa7d..646bc2b 100644 --- a/src/AstGen.zig +++ b/src/AstGen.zig @@ -128,7 +128,12 @@ fn nullTerminatedString(astgen: *AstGen, str: Ir.NullTerminatedString) [:0]const return slice[0..std.mem.indexOfScalar(u8, slice, 0).? :0]; } -fn stringFromBytes(astgen: *AstGen, bytes: []const u8) error{OutOfMemory}!Ir.NullTerminatedString { +const IndexSlice = struct { + index: Ir.NullTerminatedString, + len: u32, +}; + +fn strFromSlice(astgen: *AstGen, bytes: []const u8) error{OutOfMemory}!IndexSlice { const gpa = astgen.gpa; const string_bytes = &astgen.string_bytes; const str_index: u32 = @intCast(string_bytes.items.len); @@ -142,17 +147,23 @@ fn stringFromBytes(astgen: *AstGen, bytes: []const u8) error{OutOfMemory}!Ir.Nul }); if (gop.found_existing) { string_bytes.shrinkRetainingCapacity(str_index); - return @enumFromInt(gop.key_ptr.*); + return .{ + .index = @enumFromInt(gop.key_ptr.*), + .len = @intCast(key.len), + }; } else { gop.key_ptr.* = str_index; try string_bytes.append(gpa, 0); - return @enumFromInt(str_index); + return .{ + .index = @enumFromInt(str_index), + .len = @intCast(key.len), + }; } } -fn stringFromNode(astgen: *AstGen, node: *const Ast.Node) !Ir.NullTerminatedString { +fn strFromNode(astgen: *AstGen, node: *const Ast.Node) !IndexSlice { const name_bytes = astgen.tree.nodeSlice(node); - return astgen.stringFromBytes(name_bytes); + return astgen.strFromSlice(name_bytes); } fn qualifiedString( @@ -332,14 +343,8 @@ const GenIr = struct { } fn addInt(gi: *GenIr, value: u64) !Ir.Inst.Ref { - return add(gi, .{ .tag = .integer, .data = .{ - .integer = .{ .value = value }, - } }); - } - - fn addStr(gi: *GenIr, tag: Ir.Inst.Tag, str: Ir.NullTerminatedString) !Ir.Inst.Ref { - return add(gi, .{ .tag = tag, .data = .{ - .string = .{ .start = str }, + return add(gi, .{ .tag = .int, .data = .{ + .int = value, } }); } @@ -360,14 +365,33 @@ const GenIr = struct { } }); } - fn addDeclRef(gi: *GenIr, decl_ref: Ir.NullTerminatedString) !Ir.Inst.Ref { - return add(gi, .{ .tag = .decl_ref, .data = .{ - .string = .{ - .start = decl_ref, - }, + fn addStr( + gi: *GenIr, + str: Ir.NullTerminatedString, + str_len: usize, + ) !Ir.Inst.Ref { + assert(str_len <= std.math.maxInt(u32)); + return add(gi, .{ .tag = .str, .data = .{ + .str = .{ .start = str, .len = @intCast(str_len) }, } }); } + fn addStrTok( + block: *GenIr, + tag: Ir.Inst.Tag, + str_index: Ir.NullTerminatedString, + byte_offset: usize, + ) !Ir.Inst.Ref { + assert(byte_offset <= std.math.maxInt(u32)); + return block.add(.{ + .tag = tag, + .data = .{ .str_tok = .{ + .start = str_index, + .src_offset = @intCast(byte_offset), + } }, + }); + } + fn addPayloadNode(gi: *GenIr, tag: Ir.Inst.Tag, extra: anytype) !Ir.Inst.Ref { const gpa = gi.astgen.gpa; try gi.instructions.ensureUnusedCapacity(gpa, 1); @@ -675,13 +699,8 @@ fn numberLiteral(block: *GenIr, node: *const Ast.Node) InnerError!Ir.Inst.Ref { } fn stringLiteral(gi: *GenIr, node: *const Ast.Node) InnerError!Ir.Inst.Ref { - const str = try gi.astgen.stringFromNode(node); - return gi.add(.{ - .tag = .string, - .data = .{ .string = .{ - .start = str, - } }, - }); + const str = try gi.astgen.strFromNode(node); + return gi.addStr(str.index, str.len); } fn stringExpr(gen: *GenIr, expr_node: *const Ast.Node) InnerError!Ir.Inst.Ref { @@ -689,13 +708,16 @@ fn stringExpr(gen: *GenIr, expr_node: *const Ast.Node) InnerError!Ir.Inst.Ref { return stringLiteral(gen, first_node); } -fn identifier(gi: *GenIr, scope: *Scope, node: *const Ast.Node) InnerError!Ir.Inst.Ref { - const astgen = gi.astgen; - const str = try astgen.stringFromNode(node); - if (scope.lookup(str)) |decl| { - return gi.addUnaryNode(.load, decl.inst_index.toRef()); +fn identifier( + block: *GenIr, + scope: *Scope, + node: *const Ast.Node, +) InnerError!Ir.Inst.Ref { + const str = try block.astgen.strFromNode(node); + if (scope.lookup(str.index)) |decl| { + return block.addUnaryNode(.load, decl.inst_index.toRef()); } - return gi.addDeclRef(str); + return block.addStrTok(.decl_ref, str.index, node.loc.start); } fn expr(gi: *GenIr, scope: *Scope, optional_expr: ?*const Ast.Node) InnerError!Ir.Inst.Ref { @@ -1014,10 +1036,10 @@ fn assignStmt(gi: *GenIr, scope: *Scope, node: *const Ast.Node) InnerError!void const astgen = gi.astgen; const identifier_node = node.data.bin.lhs.?; const expr_node = node.data.bin.rhs.?; - const name_ref = try astgen.stringFromNode(identifier_node); + const name_str = try astgen.strFromNode(identifier_node); // TODO: Support globals as well - if (scope.lookup(name_ref)) |decl| { + if (scope.lookup(name_str.index)) |decl| { const expr_result = try expr(gi, scope, expr_node); _ = try gi.addBinaryNode(.store, decl.inst_index.toRef(), expr_result); return; @@ -1112,12 +1134,12 @@ fn fieldAccess(gi: *GenIr, scope: *Scope, node: *const Ast.Node) InnerError!Ir.I const data = node.data.bin; assert(data.rhs.?.tag == .identifier); - const field_str = try gi.astgen.stringFromNode(data.rhs.?); + const field_str = try gi.astgen.strFromNode(data.rhs.?); const lhs = try expr(gi, scope, data.lhs.?); return gi.addPayloadNode(.field_ptr, Ir.Inst.Field{ .lhs = lhs, - .field_name_start = field_str, + .field_name_start = field_str.index, }); } @@ -1133,10 +1155,10 @@ fn calleeExpr(gi: *GenIr, scope: *Scope, node: *const Ast.Node) InnerError!Calle const call_target = data.rhs.?; assert(call_target.tag == .identifier); - const field_str = try gi.astgen.stringFromNode(call_target); + const field_str = try gi.astgen.strFromNode(call_target); const lhs = try expr(gi, scope, data.lhs.?); return .{ - .field = .{ .obj_ptr = lhs, .field_name_start = field_str }, + .field = .{ .obj_ptr = lhs, .field_name_start = field_str.index }, }; }, .identifier => { @@ -1265,9 +1287,9 @@ fn tempDecl(gi: *GenIr, scope: *Scope, decl_node: *const Ast.Node) !void { const astgen = gi.astgen; const identifier_node = decl_node.data.bin.lhs.?; const expr_node = decl_node.data.bin.rhs.?; - const name_ref = try astgen.stringFromNode(identifier_node); + const name_str = try astgen.strFromNode(identifier_node); - if (scope.lookup(name_ref)) |_| { + if (scope.lookup(name_str.index)) |_| { return fail(astgen, decl_node, "duplicate identifier", .{}); } @@ -1275,7 +1297,7 @@ fn tempDecl(gi: *GenIr, scope: *Scope, decl_node: *const Ast.Node) !void { const expr_result = try expr(gi, scope, expr_node); _ = try gi.addBinaryNode(.store, alloc_inst, expr_result); - return scope.insert(name_ref, .{ + return scope.insert(name_str.index, .{ .decl_node = decl_node, .inst_index = alloc_inst.toIndex().?, }); @@ -1297,9 +1319,11 @@ fn varDecl(gi: *GenIr, scope: *Scope, decl_node: *const Ast.Node) !void { _ = try expr(&decl_block, scope, expr_node); const var_inst = try decl_block.addVar(); + + const name_str = try astgen.strFromNode(identifier_node); try setDeclaration(decl_inst.toIndex().?, .{ .tag = .variable, - .name = try astgen.stringFromNode(identifier_node), + .name = name_str.index, .ref = var_inst, .decl_node = decl_node, .body_block = &decl_block, @@ -1353,10 +1377,11 @@ fn defaultBlock( const block_stmts = body_node.data.list.items.?; try blockInner(&decl_scope, scope, block_stmts); + const name_str = try astgen.strFromSlice(Story.default_knot_name); const knot_inst = try decl_scope.addKnot(); try setDeclaration(decl_inst, .{ .tag = .knot, - .name = try astgen.stringFromBytes(Story.default_knot_name), + .name = name_str.index, .ref = knot_inst, .decl_node = body_node, .body_block = &decl_scope, @@ -1387,11 +1412,11 @@ fn stitchDeclInner( const args_list = args_node.data.list.items.?; for (args_list) |arg| { assert(arg.tag == .parameter_decl); - const arg_str = try astgen.stringFromNode(arg); - const arg_inst = try decl_block.addStr(.param, arg_str); + const arg_str = try astgen.strFromNode(arg); + const arg_inst = try decl_block.addStrTok(.param, arg_str.index, arg.loc.start); // TODO: Maybe make decl accept a ref? - try scope.insert(arg_str, .{ + try scope.insert(arg_str.index, .{ .decl_node = arg, .inst_index = arg_inst.toIndex().?, }); @@ -1403,12 +1428,11 @@ fn stitchDeclInner( _ = try decl_block.addUnaryNode(.implicit_ret, .none); } - const knot_inst = try decl_block.addKnot(); - const name_str = try astgen.stringFromNode(identifier_node); + const name_str = try astgen.strFromNode(identifier_node); try setDeclaration(decl_inst, .{ .tag = .knot, - .name = try astgen.qualifiedString(scope.namespace_prefix, name_str), - .ref = knot_inst, + .name = try astgen.qualifiedString(scope.namespace_prefix, name_str.index), + .ref = try decl_block.addKnot(), .decl_node = decl_node, .body_block = &decl_block, }); @@ -1444,8 +1468,8 @@ fn knotDecl(gi: *GenIr, parent_scope: *Scope, decl_node: *const Ast.Node) InnerE try stitchDeclInner(gi, &decl_scope, decl_node, prototype_node, null); } - const name_str = try gi.astgen.stringFromNode(identifier_node); - try decl_scope.setNamespacePrefix(name_str); + const name_str = try gi.astgen.strFromNode(identifier_node); + try decl_scope.setNamespacePrefix(name_str.index); for (nested_decls_list[start_index..]) |nested_decl_node| { switch (nested_decl_node.tag) { diff --git a/src/Ir.zig b/src/Ir.zig index 06cb5cf..3f47ea0 100644 --- a/src/Ir.zig +++ b/src/Ir.zig @@ -166,16 +166,24 @@ pub const Inst = struct { declaration, decl_knot, decl_var, + /// Uses the `str_tok` union field. decl_ref, alloc, load, store, + /// Uses the `bin` union field. add, + /// Uses the `bin` union field. sub, + /// Uses the `bin` union field. mul, + /// Uses the `bin` union field. div, + /// Uses the `bin` union field. mod, + /// Uses the `un` union field. neg, + /// Uses the `un` union field. not, cmp_eq, cmp_neq, @@ -183,8 +191,10 @@ pub const Inst = struct { cmp_gte, cmp_lt, cmp_lte, - integer, - string, + /// Uses the `int` union field. + int, + /// Uses the `str` union field. + str, block, condbr, @"break", @@ -212,17 +222,22 @@ pub const Inst = struct { lhs: Ref, rhs: Ref, }, - integer: struct { - value: u64, - }, - string: struct { + int: u64, + str: struct { /// Offset into `string_bytes`. start: NullTerminatedString, + /// Number of bytes in the string. + len: u32, - pub fn get(self: @This(), ir: Ir) []const u8 { - return nullTerminatedString(ir, self.start); + pub fn get(self: @This(), code: Ir) []const u8 { + return code.string_bytes[@intFromEnum(self.start)..][0..self.len]; } }, + str_tok: struct { + /// Offset into `string_bytes`. + start: NullTerminatedString, + src_offset: u32, + }, }; pub const Declaration = struct { diff --git a/src/Sema.zig b/src/Sema.zig index 9dde3f4..b615284 100644 --- a/src/Sema.zig +++ b/src/Sema.zig @@ -1,19 +1,25 @@ const std = @import("std"); +const Ast = @import("Ast.zig"); const Ir = @import("Ir.zig"); const Story = @import("Story.zig"); -const Compilation = @import("compile.zig").Compilation; +const compile = @import("compile.zig"); +const Compilation = compile.Compilation; const assert = std.debug.assert; const Sema = @This(); gpa: std.mem.Allocator, -ir: *const Ir, +arena: std.mem.Allocator, +tree: Ast, +ir: Ir, constants: std.ArrayListUnmanaged(Compilation.Constant) = .empty, constant_map: std.AutoHashMapUnmanaged(Compilation.Constant, u32) = .empty, knots: std.ArrayListUnmanaged(Compilation.Knot) = .empty, globals: std.ArrayListUnmanaged(u32) = .empty, +errors: *std.ArrayListUnmanaged(Compilation.Error), const InnerError = error{ OutOfMemory, + AnalysisFail, TooManyConstants, InvalidJump, }; @@ -37,8 +43,26 @@ pub fn deinit(sema: *Sema) void { sema.* = undefined; } -fn fail(_: *Sema, message: []const u8) InnerError { - @panic(message); +pub const SrcLoc = struct { + byte_offset: u32, +}; + +fn fail( + sema: *Sema, + src: SrcLoc, + comptime format: []const u8, + args: anytype, +) error{ OutOfMemory, AnalysisFail } { + // TODO: Revisit this + const message = try std.fmt.allocPrint(sema.arena, format, args); + const loc = compile.findLineColumn(sema.tree.source, src.byte_offset); + try sema.errors.append(sema.gpa, .{ + .line = loc.line, + .column = loc.column, + .snippet = loc.source_line, + .message = message, + }); + return error.AnalysisFail; } fn getConstant(sema: *Sema, data: Compilation.Constant) !Ref { @@ -61,23 +85,23 @@ fn addGlobal(sema: *Sema, name: Ir.NullTerminatedString) !Ref { return .{ .global = interned.constant }; } -fn getGlobal(sema: *Sema, name: Ir.NullTerminatedString) !Ref { +fn getGlobal(sema: *Sema, name: Ir.NullTerminatedString) !?Ref { const interned = try sema.getConstant(.{ .string = name }); for (sema.ir.globals) |global| { if (global.name == name) { return .{ .global = interned.constant }; } } - return fail(sema, "unknown global variable"); + return null; } fn irInteger(sema: *Sema, inst: Ir.Inst.Index) InnerError!Ref { - const data = sema.ir.instructions[@intFromEnum(inst)].data.integer; - return sema.getConstant(.{ .integer = data.value }); + const value = sema.ir.instructions[@intFromEnum(inst)].data.int; + return sema.getConstant(.{ .integer = value }); } fn irString(sema: *Sema, inst: Ir.Inst.Index) InnerError!Ref { - const data = sema.ir.instructions[@intFromEnum(inst)].data.string; + const data = sema.ir.instructions[@intFromEnum(inst)].data.str; return sema.getConstant(.{ .string = data.start }); } @@ -308,9 +332,20 @@ fn irImplicitRet(_: *Sema, chunk: *Chunk, _: Ir.Inst.Index) InnerError!Ref { return chunk.addByteOp(.exit); } +fn resolveGlobal( + sema: *Sema, + byte_offset: u32, + global_name: Ir.NullTerminatedString, +) !Ref { + if (try sema.getGlobal(global_name)) |global| { + return global; + } + return fail(sema, .{ .byte_offset = byte_offset }, "unknown global variable", .{}); +} + fn irDeclRef(sema: *Sema, _: *Chunk, inst: Ir.Inst.Index) InnerError!Ref { - const data = sema.ir.instructions[@intFromEnum(inst)].data.string; - return sema.getGlobal(data.start); + const data = sema.ir.instructions[@intFromEnum(inst)].data.str_tok; + return resolveGlobal(sema, data.src_offset, data.start); } fn irDeclVar( @@ -470,8 +505,8 @@ fn blockBodyInner(sema: *Sema, chunk: *Chunk, body: []const Ir.Inst.Index) Inner .cmp_gt => try irBinary(sema, chunk, inst, .cmp_gt), .cmp_gte => try irBinary(sema, chunk, inst, .cmp_gte), .decl_ref => try irDeclRef(sema, chunk, inst), - .integer => try irInteger(sema, inst), - .string => try irString(sema, inst), + .int => try irInteger(sema, inst), + .str => try irString(sema, inst), .condbr => try irCondBr(sema, chunk, inst), .@"break" => { try irBreak(sema, inst); @@ -505,7 +540,7 @@ fn blockBodyInner(sema: *Sema, chunk: *Chunk, body: []const Ir.Inst.Index) Inner } } -pub fn analyzeFile(sema: *Sema, inst: Ir.Inst.Index) !void { +pub fn analyzeFile(sema: *Sema, inst: Ir.Inst.Index) InnerError!void { const data = sema.ir.instructions[@intFromEnum(inst)].data.payload; const extra = sema.ir.extraData(Ir.Inst.Block, data.payload_index); const body = sema.ir.bodySlice(extra.end, extra.data.body_len); diff --git a/src/compile.zig b/src/compile.zig index d99b99b..a9cfe7f 100644 --- a/src/compile.zig +++ b/src/compile.zig @@ -53,6 +53,7 @@ pub const Compilation = struct { try w.splatByteAll(' ', column - 1); } try w.writeAll("^\n"); + return w.flush(); } pub const CompileOptions = struct { @@ -78,7 +79,10 @@ pub const Compilation = struct { var sema: Sema = .{ .gpa = gpa, - .ir = &ir, + .arena = arena, + .tree = ast, + .ir = ir, + .errors = &errors, }; defer sema.deinit(); @@ -119,7 +123,12 @@ pub const Compilation = struct { } break :fatal true; } else fatal: { - try sema.analyzeFile(.file_inst); + sema.analyzeFile(.file_inst) catch |err| switch (err) { + error.OutOfMemory => return error.OutOfMemory, + error.AnalysisFail => break :fatal true, + // TODO: These errors should be handled... + else => |e| return e, + }; break :fatal false; }; return .{ @@ -195,13 +204,13 @@ pub const Compilation = struct { } }; -const Loc = struct { +pub const Loc = struct { line: usize, column: usize, source_line: []const u8, }; -fn findLineColumn(source: []const u8, byte_offset: usize) Loc { +pub fn findLineColumn(source: []const u8, byte_offset: usize) Loc { var line: usize = 0; var column: usize = 0; var line_start: usize = 0; diff --git a/src/error_tests.zig b/src/error_tests.zig index 411fcea..d1abaac 100644 --- a/src/error_tests.zig +++ b/src/error_tests.zig @@ -1,6 +1,18 @@ const std = @import("std"); const Compilation = @import("compile.zig").Compilation; +test "compiler: unknown global variable" { + try testEqual( + \\{a} + , + \\:1:2: error: unknown global variable + \\1 | {a} + \\ | ^ + \\ + , + ); +} + test "compiler: global variable shadowing" { try testEqual( \\VAR a = 0 diff --git a/src/print_ir.zig b/src/print_ir.zig index 467f7f1..ce9dc0c 100644 --- a/src/print_ir.zig +++ b/src/print_ir.zig @@ -49,12 +49,17 @@ pub const Writer = struct { } fn writeIntegerInst(self: *Writer, w: *std.Io.Writer, inst: Ir.Inst.Index) Error!void { - const data = self.code.instructions[@intFromEnum(inst)].data.integer; - try w.print("{d}", .{data.value}); + const data = self.code.instructions[@intFromEnum(inst)].data.int; + try w.print("{d}", .{data}); } fn writeStringInst(self: *Writer, w: *std.Io.Writer, inst: Ir.Inst.Index) Error!void { - const data = self.code.instructions[@intFromEnum(inst)].data.string; + const data = self.code.instructions[@intFromEnum(inst)].data.str; + try self.writeStringRef(w, data.start); + } + + fn writeStrTokInst(self: *Writer, w: *std.Io.Writer, inst: Ir.Inst.Index) Error!void { + const data = self.code.instructions[@intFromEnum(inst)].data.str_tok; try self.writeStringRef(w, data.start); } @@ -123,11 +128,6 @@ pub const Writer = struct { try w.writeAll("]"); } - fn writeParamInst(self: *Writer, w: *std.Io.Writer, inst: Ir.Inst.Index) Error!void { - const data = self.code.instructions[@intFromEnum(inst)].data.string; - try self.writeStringRef(w, data.start); - } - fn writeBreakInst(self: *Writer, w: *std.Io.Writer, inst: Ir.Inst.Index) Error!void { const data = self.code.instructions[@intFromEnum(inst)].data.payload; const extra = self.code.extraData(Ir.Inst.Break, data.payload_index); @@ -274,7 +274,7 @@ pub const Writer = struct { .declaration => try self.writeDeclarationInst(w, inst), .decl_knot => try self.writeKnotDeclInst(w, inst), .decl_var => try self.writeVarDeclInst(w, inst), - .decl_ref => try self.writeStringInst(w, inst), + .decl_ref => try self.writeStrTokInst(w, inst), .condbr => try self.writeCondbrInst(w, inst), .@"break" => try self.writeBreakInst(w, inst), .switch_br => try self.writeSwitchBrInst(w, inst), @@ -295,8 +295,8 @@ pub const Writer = struct { .cmp_gte => try self.writeBinaryInst(w, inst), .cmp_lt => try self.writeBinaryInst(w, inst), .cmp_lte => try self.writeBinaryInst(w, inst), - .integer => try self.writeIntegerInst(w, inst), - .string => try self.writeStringInst(w, inst), + .int => try self.writeIntegerInst(w, inst), + .str => try self.writeStringInst(w, inst), .content_push => try self.writeUnaryInst(w, inst), .content_flush => try self.writeUnaryInst(w, inst), .choice_br => try self.writeChoiceBrInst(w, inst), @@ -306,7 +306,7 @@ pub const Writer = struct { .field_call => try self.writeCallInst(w, inst, .field), .field_divert => try self.writeCallInst(w, inst, .field), .field_ptr => try self.writeFieldPtrInst(w, inst), - .param => try self.writeParamInst(w, inst), + .param => try self.writeStrTokInst(w, inst), } try w.writeAll(")"); try w.writeAll("\n");