summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMitchell Hashimoto <m@mitchellh.com>2025-09-20 19:49:29 -0700
committerMitchell Hashimoto <m@mitchellh.com>2025-09-20 20:28:25 -0700
commit10dc9353b7b5d85179ef7b1305fe9d5a73ff392d (patch)
treefec32318bccc6a55023b00ba23a60b35cf26a4b4 /src
parent511314e1a1ecfa5ed21dbc8eabfd0aeecb27b792 (diff)
unicode: delete props.zig and clean up symbols deps too
Follow up to #8810 Same reasoning.
Diffstat (limited to 'src')
-rw-r--r--src/benchmark/IsSymbol.zig5
-rw-r--r--src/build/UnicodeTables.zig4
-rw-r--r--src/main_ghostty.zig2
-rw-r--r--src/renderer/cell.zig2
-rw-r--r--src/unicode/grapheme.zig5
-rw-r--r--src/unicode/main.zig2
-rw-r--r--src/unicode/props.zig181
-rw-r--r--src/unicode/props_ziglyph.zig2
-rw-r--r--src/unicode/symbols_table.zig17
-rw-r--r--src/unicode/symbols_ziglyph.zig (renamed from src/unicode/symbols.zig)14
10 files changed, 29 insertions, 205 deletions
diff --git a/src/benchmark/IsSymbol.zig b/src/benchmark/IsSymbol.zig
index 5b2ffd425..ce635626a 100644
--- a/src/benchmark/IsSymbol.zig
+++ b/src/benchmark/IsSymbol.zig
@@ -10,7 +10,8 @@ const Allocator = std.mem.Allocator;
const Benchmark = @import("Benchmark.zig");
const options = @import("options.zig");
const UTF8Decoder = @import("../terminal/UTF8Decoder.zig");
-const symbols = @import("../unicode/symbols.zig");
+const symbols = @import("../unicode/symbols_ziglyph.zig");
+const symbols_table = @import("../unicode/symbols_table.zig").table;
const log = std.log.scoped(.@"is-symbol-bench");
@@ -127,7 +128,7 @@ fn stepTable(ptr: *anyopaque) Benchmark.Error!void {
const cp_, const consumed = d.next(c);
assert(consumed);
if (cp_) |cp| {
- std.mem.doNotOptimizeAway(symbols.table.get(cp));
+ std.mem.doNotOptimizeAway(symbols_table.get(cp));
}
}
}
diff --git a/src/build/UnicodeTables.zig b/src/build/UnicodeTables.zig
index 7c1229f7f..6733b5315 100644
--- a/src/build/UnicodeTables.zig
+++ b/src/build/UnicodeTables.zig
@@ -15,7 +15,7 @@ pub fn init(b: *std.Build) !UnicodeTables {
const props_exe = b.addExecutable(.{
.name = "props-unigen",
.root_module = b.createModule(.{
- .root_source_file = b.path("src/unicode/props.zig"),
+ .root_source_file = b.path("src/unicode/props_ziglyph.zig"),
.target = b.graph.host,
.strip = false,
.omit_frame_pointer = false,
@@ -26,7 +26,7 @@ pub fn init(b: *std.Build) !UnicodeTables {
const symbols_exe = b.addExecutable(.{
.name = "symbols-unigen",
.root_module = b.createModule(.{
- .root_source_file = b.path("src/unicode/symbols.zig"),
+ .root_source_file = b.path("src/unicode/symbols_ziglyph.zig"),
.target = b.graph.host,
.strip = false,
.omit_frame_pointer = false,
diff --git a/src/main_ghostty.zig b/src/main_ghostty.zig
index aca33a510..555dd16bf 100644
--- a/src/main_ghostty.zig
+++ b/src/main_ghostty.zig
@@ -191,6 +191,8 @@ test {
_ = @import("simd/main.zig");
_ = @import("synthetic/main.zig");
_ = @import("unicode/main.zig");
+ _ = @import("unicode/props_ziglyph.zig");
+ _ = @import("unicode/symbols_ziglyph.zig");
// Extra
_ = @import("extra/bash.zig");
diff --git a/src/renderer/cell.zig b/src/renderer/cell.zig
index 6ada849ed..3cf306f91 100644
--- a/src/renderer/cell.zig
+++ b/src/renderer/cell.zig
@@ -6,7 +6,7 @@ const terminal = @import("../terminal/main.zig");
const renderer = @import("../renderer.zig");
const shaderpkg = renderer.Renderer.API.shaders;
const ArrayListCollection = @import("../datastruct/array_list_collection.zig").ArrayListCollection;
-const symbols = @import("../unicode/symbols.zig").table;
+const symbols = @import("../unicode/symbols_table.zig").table;
/// The possible cell content keys that exist.
pub const Key = enum {
diff --git a/src/unicode/grapheme.zig b/src/unicode/grapheme.zig
index 7847ef6f5..bfc09b854 100644
--- a/src/unicode/grapheme.zig
+++ b/src/unicode/grapheme.zig
@@ -1,7 +1,6 @@
const std = @import("std");
-const props = @import("props.zig");
-const GraphemeBoundaryClass = props.GraphemeBoundaryClass;
-const table = props.table;
+const table = @import("props_table.zig").table;
+const GraphemeBoundaryClass = @import("Properties.zig").GraphemeBoundaryClass;
/// Determines if there is a grapheme break between two codepoints. This
/// must be called sequentially maintaining the state between calls.
diff --git a/src/unicode/main.zig b/src/unicode/main.zig
index ae50075ff..cb2fb567f 100644
--- a/src/unicode/main.zig
+++ b/src/unicode/main.zig
@@ -7,7 +7,5 @@ pub const graphemeBreak = grapheme.graphemeBreak;
pub const GraphemeBreakState = grapheme.BreakState;
test {
- _ = @import("props_ziglyph.zig");
- _ = @import("symbols.zig");
@import("std").testing.refAllDecls(@This());
}
diff --git a/src/unicode/props.zig b/src/unicode/props.zig
deleted file mode 100644
index 7edb3761c..000000000
--- a/src/unicode/props.zig
+++ /dev/null
@@ -1,181 +0,0 @@
-const props = @This();
-const std = @import("std");
-const assert = std.debug.assert;
-const ziglyph = @import("ziglyph");
-const lut = @import("lut.zig");
-
-/// The lookup tables for Ghostty.
-pub const table = table: {
- // This is only available after running main() below as part of the Ghostty
- // build.zig, but due to Zig's lazy analysis we can still reference it here.
- const generated = @import("unicode_tables").Tables(Properties);
- const Tables = lut.Tables(Properties);
- break :table Tables{
- .stage1 = &generated.stage1,
- .stage2 = &generated.stage2,
- .stage3 = &generated.stage3,
- };
-};
-
-/// Property set per codepoint that Ghostty cares about.
-///
-/// Adding to this lets you find new properties but also potentially makes
-/// our lookup tables less efficient. Any changes to this should run the
-/// benchmarks in src/bench to verify that we haven't regressed.
-pub const Properties = struct {
- /// Codepoint width. We clamp to [0, 2] since Ghostty handles control
- /// characters and we max out at 2 for wide characters (i.e. 3-em dash
- /// becomes a 2-em dash).
- width: u2 = 0,
-
- /// Grapheme boundary class.
- grapheme_boundary_class: GraphemeBoundaryClass = .invalid,
-
- // Needed for lut.Generator
- pub fn eql(a: Properties, b: Properties) bool {
- return a.width == b.width and
- a.grapheme_boundary_class == b.grapheme_boundary_class;
- }
-
- // Needed for lut.Generator
- pub fn format(
- self: Properties,
- comptime layout: []const u8,
- opts: std.fmt.FormatOptions,
- writer: anytype,
- ) !void {
- _ = layout;
- _ = opts;
- try std.fmt.format(writer,
- \\.{{
- \\ .width= {},
- \\ .grapheme_boundary_class= .{s},
- \\}}
- , .{
- self.width,
- @tagName(self.grapheme_boundary_class),
- });
- }
-};
-
-/// Possible grapheme boundary classes. This isn't an exhaustive list:
-/// we omit control, CR, LF, etc. because in Ghostty's usage that are
-/// impossible because they're handled by the terminal.
-pub const GraphemeBoundaryClass = enum(u4) {
- invalid,
- L,
- V,
- T,
- LV,
- LVT,
- prepend,
- extend,
- zwj,
- spacing_mark,
- regional_indicator,
- extended_pictographic,
- extended_pictographic_base, // \p{Extended_Pictographic} & \p{Emoji_Modifier_Base}
- emoji_modifier, // \p{Emoji_Modifier}
-
- /// Gets the grapheme boundary class for a codepoint. This is VERY
- /// SLOW. The use case for this is only in generating lookup tables.
- pub fn init(cp: u21) GraphemeBoundaryClass {
- // We special-case modifier bases because we should not break
- // if a modifier isn't next to a base.
- if (ziglyph.emoji.isEmojiModifierBase(cp)) {
- assert(ziglyph.emoji.isExtendedPictographic(cp));
- return .extended_pictographic_base;
- }
-
- if (ziglyph.emoji.isEmojiModifier(cp)) return .emoji_modifier;
- if (ziglyph.emoji.isExtendedPictographic(cp)) return .extended_pictographic;
- if (ziglyph.grapheme_break.isL(cp)) return .L;
- if (ziglyph.grapheme_break.isV(cp)) return .V;
- if (ziglyph.grapheme_break.isT(cp)) return .T;
- if (ziglyph.grapheme_break.isLv(cp)) return .LV;
- if (ziglyph.grapheme_break.isLvt(cp)) return .LVT;
- if (ziglyph.grapheme_break.isPrepend(cp)) return .prepend;
- if (ziglyph.grapheme_break.isExtend(cp)) return .extend;
- if (ziglyph.grapheme_break.isZwj(cp)) return .zwj;
- if (ziglyph.grapheme_break.isSpacingmark(cp)) return .spacing_mark;
- if (ziglyph.grapheme_break.isRegionalIndicator(cp)) return .regional_indicator;
-
- // This is obviously not INVALID invalid, there is SOME grapheme
- // boundary class for every codepoint. But we don't care about
- // anything that doesn't fit into the above categories.
- return .invalid;
- }
-
- /// Returns true if this is an extended pictographic type. This
- /// should be used instead of comparing the enum value directly
- /// because we classify multiple.
- pub fn isExtendedPictographic(self: GraphemeBoundaryClass) bool {
- return switch (self) {
- .extended_pictographic,
- .extended_pictographic_base,
- => true,
-
- else => false,
- };
- }
-};
-
-pub fn get(cp: u21) Properties {
- const zg_width = ziglyph.display_width.codePointWidth(cp, .half);
-
- return .{
- .width = @intCast(@min(2, @max(0, zg_width))),
- .grapheme_boundary_class = .init(cp),
- };
-}
-
-/// Runnable binary to generate the lookup tables and output to stdout.
-pub fn main() !void {
- var arena_state = std.heap.ArenaAllocator.init(std.heap.page_allocator);
- defer arena_state.deinit();
- const alloc = arena_state.allocator();
-
- const gen: lut.Generator(
- Properties,
- struct {
- pub fn get(ctx: @This(), cp: u21) !Properties {
- _ = ctx;
- return props.get(cp);
- }
-
- pub fn eql(ctx: @This(), a: Properties, b: Properties) bool {
- _ = ctx;
- return a.eql(b);
- }
- },
- ) = .{};
-
- const t = try gen.generate(alloc);
- defer alloc.free(t.stage1);
- defer alloc.free(t.stage2);
- defer alloc.free(t.stage3);
- try t.writeZig(std.io.getStdOut().writer());
-
- // Uncomment when manually debugging to see our table sizes.
- // std.log.warn("stage1={} stage2={} stage3={}", .{
- // t.stage1.len,
- // t.stage2.len,
- // t.stage3.len,
- // });
-}
-
-// This is not very fast in debug modes, so its commented by default.
-// IMPORTANT: UNCOMMENT THIS WHENEVER MAKING CODEPOINTWIDTH CHANGES.
-// test "unicode props: tables match ziglyph" {
-// const testing = std.testing;
-//
-// const min = 0xFF + 1; // start outside ascii
-// for (min..std.math.maxInt(u21)) |cp| {
-// const t = table.get(@intCast(cp));
-// const zg = @min(2, @max(0, ziglyph.display_width.codePointWidth(@intCast(cp), .half)));
-// if (t.width != zg) {
-// std.log.warn("mismatch cp=U+{x} t={} zg={}", .{ cp, t, zg });
-// try testing.expect(false);
-// }
-// }
-// }
diff --git a/src/unicode/props_ziglyph.zig b/src/unicode/props_ziglyph.zig
index fd123f3b5..9af60e337 100644
--- a/src/unicode/props_ziglyph.zig
+++ b/src/unicode/props_ziglyph.zig
@@ -40,7 +40,7 @@ pub fn get(cp: u21) Properties {
const zg_width = ziglyph.display_width.codePointWidth(cp, .half);
return .{
.width = @intCast(@min(2, @max(0, zg_width))),
- .grapheme_boundary_class = .init(cp),
+ .grapheme_boundary_class = graphemeBoundaryClass(cp),
};
}
diff --git a/src/unicode/symbols_table.zig b/src/unicode/symbols_table.zig
new file mode 100644
index 000000000..28263b9be
--- /dev/null
+++ b/src/unicode/symbols_table.zig
@@ -0,0 +1,17 @@
+const lut = @import("lut.zig");
+
+/// The lookup tables for Ghostty.
+pub const table = table: {
+ // This is only available after running a generator as part of the Ghostty
+ // build.zig process, but due to Zig's lazy analysis we can still reference
+ // it here.
+ //
+ // An example process is the `main` in `symbols_ziglyph.zig`
+ const generated = @import("symbols_tables").Tables(bool);
+ const Tables = lut.Tables(bool);
+ break :table Tables{
+ .stage1 = &generated.stage1,
+ .stage2 = &generated.stage2,
+ .stage3 = &generated.stage3,
+ };
+};
diff --git a/src/unicode/symbols.zig b/src/unicode/symbols_ziglyph.zig
index 3c2a84e76..0b01e5398 100644
--- a/src/unicode/symbols.zig
+++ b/src/unicode/symbols_ziglyph.zig
@@ -4,19 +4,6 @@ const assert = std.debug.assert;
const ziglyph = @import("ziglyph");
const lut = @import("lut.zig");
-/// The lookup tables for Ghostty.
-pub const table = table: {
- // This is only available after running main() below as part of the Ghostty
- // build.zig, but due to Zig's lazy analysis we can still reference it here.
- const generated = @import("symbols_tables").Tables(bool);
- const Tables = lut.Tables(bool);
- break :table Tables{
- .stage1 = &generated.stage1,
- .stage2 = &generated.stage2,
- .stage3 = &generated.stage3,
- };
-};
-
/// Returns true of the codepoint is a "symbol-like" character, which
/// for now we define as anything in a private use area and anything
/// in several unicode blocks:
@@ -82,6 +69,7 @@ test "unicode symbols: tables match ziglyph" {
if (std.valgrind.runningOnValgrind() > 0) return error.SkipZigTest;
const testing = std.testing;
+ const table = @import("symbols_table.zig").table;
for (0..std.math.maxInt(u21)) |cp| {
const t = table.get(@intCast(cp));