summaryrefslogtreecommitdiff
path: root/src/unicode
diff options
context:
space:
mode:
authorJacob Sandlund <jacob@jacobsandlund.com>2025-09-23 09:36:41 -0400
committerJacob Sandlund <jacob@jacobsandlund.com>2025-09-23 09:36:41 -0400
commitb01770c21c9b393d6dcfacae75d9d1159be33f9a (patch)
tree7bbfbdad2377f29aa11f9f90cb97d7c6e2bd306f /src/unicode
parent7b0722bf16043fe7ee099e2fd8ca11c78c976bc5 (diff)
parent3eb646ea6baf5eb45bbd39cd7ad624cda0264554 (diff)
Merge remote-tracking branch 'upstream/main' into jacob/uucode
Diffstat (limited to 'src/unicode')
-rw-r--r--src/unicode/Properties.zig75
-rw-r--r--src/unicode/grapheme.zig5
-rw-r--r--src/unicode/main.zig7
-rw-r--r--src/unicode/props.zig251
-rw-r--r--src/unicode/props_table.zig96
-rw-r--r--src/unicode/props_uucode.zig95
-rw-r--r--src/unicode/symbols_table.zig (renamed from src/unicode/symbols.zig)49
7 files changed, 276 insertions, 302 deletions
diff --git a/src/unicode/Properties.zig b/src/unicode/Properties.zig
new file mode 100644
index 000000000..b7840743a
--- /dev/null
+++ b/src/unicode/Properties.zig
@@ -0,0 +1,75 @@
+//! Property set per codepoint that Ghostty cares about.
+//!
+//! Adding to this lets you find new properties but also potentially makes
+//! our lookup tables less efficient. Any changes to this should run the
+//! benchmarks in src/bench to verify that we haven't regressed.
+const Properties = @This();
+
+const std = @import("std");
+
+/// Codepoint width. We clamp to [0, 2] since Ghostty handles control
+/// characters and we max out at 2 for wide characters (i.e. 3-em dash
+/// becomes a 2-em dash).
+width: u2 = 0,
+
+/// Grapheme boundary class.
+grapheme_boundary_class: GraphemeBoundaryClass = .invalid,
+
+// Needed for lut.Generator
+pub fn eql(a: Properties, b: Properties) bool {
+ return a.width == b.width and
+ a.grapheme_boundary_class == b.grapheme_boundary_class;
+}
+
+// Needed for lut.Generator
+pub fn format(
+ self: Properties,
+ comptime layout: []const u8,
+ opts: std.fmt.FormatOptions,
+ writer: anytype,
+) !void {
+ _ = layout;
+ _ = opts;
+ try std.fmt.format(writer,
+ \\.{{
+ \\ .width= {},
+ \\ .grapheme_boundary_class= .{s},
+ \\}}
+ , .{
+ self.width,
+ @tagName(self.grapheme_boundary_class),
+ });
+}
+
+/// Possible grapheme boundary classes. This isn't an exhaustive list:
+/// we omit control, CR, LF, etc. because in Ghostty's usage that are
+/// impossible because they're handled by the terminal.
+pub const GraphemeBoundaryClass = enum(u4) {
+ invalid,
+ L,
+ V,
+ T,
+ LV,
+ LVT,
+ prepend,
+ extend,
+ zwj,
+ spacing_mark,
+ regional_indicator,
+ extended_pictographic,
+ extended_pictographic_base, // \p{Extended_Pictographic} & \p{Emoji_Modifier_Base}
+ emoji_modifier, // \p{Emoji_Modifier}
+
+ /// Returns true if this is an extended pictographic type. This
+ /// should be used instead of comparing the enum value directly
+ /// because we classify multiple.
+ pub fn isExtendedPictographic(self: GraphemeBoundaryClass) bool {
+ return switch (self) {
+ .extended_pictographic,
+ .extended_pictographic_base,
+ => true,
+
+ else => false,
+ };
+ }
+};
diff --git a/src/unicode/grapheme.zig b/src/unicode/grapheme.zig
index a028e5690..2311bbeec 100644
--- a/src/unicode/grapheme.zig
+++ b/src/unicode/grapheme.zig
@@ -1,7 +1,6 @@
const std = @import("std");
-const props = @import("props.zig");
-const GraphemeBoundaryClass = props.GraphemeBoundaryClass;
-const table = props.table;
+const table = @import("props_table.zig").table;
+const GraphemeBoundaryClass = @import("Properties.zig").GraphemeBoundaryClass;
/// Determines if there is a grapheme break between two codepoints. This
/// must be called sequentially maintaining the state between calls.
diff --git a/src/unicode/main.zig b/src/unicode/main.zig
index 17c86deca..cb2fb567f 100644
--- a/src/unicode/main.zig
+++ b/src/unicode/main.zig
@@ -1,14 +1,11 @@
pub const lut = @import("lut.zig");
const grapheme = @import("grapheme.zig");
-const props = @import("props.zig");
-pub const table = props.table;
-pub const Properties = props.Properties;
-pub const getProperties = props.get;
+pub const table = @import("props_table.zig").table;
+pub const Properties = @import("Properties.zig");
pub const graphemeBreak = grapheme.graphemeBreak;
pub const GraphemeBreakState = grapheme.BreakState;
test {
- _ = @import("symbols.zig");
@import("std").testing.refAllDecls(@This());
}
diff --git a/src/unicode/props.zig b/src/unicode/props.zig
deleted file mode 100644
index 7f3a3ece5..000000000
--- a/src/unicode/props.zig
+++ /dev/null
@@ -1,251 +0,0 @@
-const props = @This();
-const std = @import("std");
-const assert = std.debug.assert;
-const uucode = @import("uucode");
-const lut = @import("lut.zig");
-
-/// The lookup tables for Ghostty.
-pub const table = table: {
- // This is only available after running main() below as part of the Ghostty
- // build.zig, but due to Zig's lazy analysis we can still reference it here.
- const generated = @import("unicode_tables").Tables(Properties);
- const Tables = lut.Tables(Properties);
- break :table Tables{
- .stage1 = &generated.stage1,
- .stage2 = &generated.stage2,
- .stage3 = &generated.stage3,
- };
-};
-
-/// Property set per codepoint that Ghostty cares about.
-///
-/// Adding to this lets you find new properties but also potentially makes
-/// our lookup tables less efficient. Any changes to this should run the
-/// benchmarks in src/bench to verify that we haven't regressed.
-pub const Properties = struct {
- /// Codepoint width. We clamp to [0, 2] since Ghostty handles control
- /// characters and we max out at 2 for wide characters (i.e. 3-em dash
- /// becomes a 2-em dash).
- width: u2 = 0,
-
- /// Grapheme boundary class.
- grapheme_boundary_class: GraphemeBoundaryClass = .invalid,
-
- // Needed for lut.Generator
- pub fn eql(a: Properties, b: Properties) bool {
- return a.width == b.width and
- a.grapheme_boundary_class == b.grapheme_boundary_class;
- }
-
- // Needed for lut.Generator
- pub fn format(
- self: Properties,
- comptime layout: []const u8,
- opts: std.fmt.FormatOptions,
- writer: anytype,
- ) !void {
- _ = layout;
- _ = opts;
- try std.fmt.format(writer,
- \\.{{
- \\ .width= {},
- \\ .grapheme_boundary_class= .{s},
- \\}}
- , .{
- self.width,
- @tagName(self.grapheme_boundary_class),
- });
- }
-};
-
-/// Possible grapheme boundary classes. This isn't an exhaustive list:
-/// we omit control, CR, LF, etc. because in Ghostty's usage that are
-/// impossible because they're handled by the terminal.
-pub const GraphemeBoundaryClass = enum(u4) {
- invalid,
- L,
- V,
- T,
- LV,
- LVT,
- prepend,
- extend,
- zwj,
- spacing_mark,
- regional_indicator,
- extended_pictographic,
- extended_pictographic_base, // \p{Extended_Pictographic} & \p{Emoji_Modifier_Base}
- emoji_modifier, // \p{Emoji_Modifier}
-
- /// Gets the grapheme boundary class for a codepoint.
- /// The use case for this is only in generating lookup tables.
- pub fn init(cp: u21) GraphemeBoundaryClass {
- if (cp > uucode.config.max_code_point) return .invalid;
- if (uucode.get(.is_emoji_modifier, cp)) return .emoji_modifier;
- if (uucode.get(.is_emoji_modifier_base, cp)) return .extended_pictographic_base;
-
- return switch (uucode.get(.grapheme_break, cp)) {
- .extended_pictographic => .extended_pictographic,
- .l => .L,
- .v => .V,
- .t => .T,
- .lv => .LV,
- .lvt => .LVT,
- .prepend => .prepend,
- .zwj => .zwj,
- .spacing_mark => .spacing_mark,
- .regional_indicator => .regional_indicator,
-
- .zwnj,
- .indic_conjunct_break_extend,
- .indic_conjunct_break_linker,
- => .extend,
-
- // This is obviously not INVALID invalid, there is SOME grapheme
- // boundary class for every codepoint. But we don't care about
- // anything that doesn't fit into the above categories.
- .other,
- .indic_conjunct_break_consonant,
- .cr,
- .lf,
- .control,
- => .invalid,
- };
- }
-
- /// Returns true if this is an extended pictographic type. This
- /// should be used instead of comparing the enum value directly
- /// because we classify multiple.
- pub fn isExtendedPictographic(self: GraphemeBoundaryClass) bool {
- return switch (self) {
- .extended_pictographic,
- .extended_pictographic_base,
- => true,
-
- else => false,
- };
- }
-};
-
-pub fn get(cp: u21) Properties {
- const width = if (cp > uucode.config.max_code_point)
- 1
- else
- uucode.get(.width, cp);
-
- return .{
- .width = width,
- .grapheme_boundary_class = .init(cp),
- };
-}
-
-/// Runnable binary to generate the lookup tables and output to stdout.
-pub fn main() !void {
- var arena_state = std.heap.ArenaAllocator.init(std.heap.page_allocator);
- defer arena_state.deinit();
- const alloc = arena_state.allocator();
-
- const gen: lut.Generator(
- Properties,
- struct {
- pub fn get(ctx: @This(), cp: u21) !Properties {
- _ = ctx;
- return props.get(cp);
- }
-
- pub fn eql(ctx: @This(), a: Properties, b: Properties) bool {
- _ = ctx;
- return a.eql(b);
- }
- },
- ) = .{};
-
- const t = try gen.generate(alloc);
- defer alloc.free(t.stage1);
- defer alloc.free(t.stage2);
- defer alloc.free(t.stage3);
- try t.writeZig(std.io.getStdOut().writer());
-
- // Uncomment when manually debugging to see our table sizes.
- // std.log.warn("stage1={} stage2={} stage3={}", .{
- // t.stage1.len,
- // t.stage2.len,
- // t.stage3.len,
- // });
-}
-
-test "unicode props: tables match uucode" {
- if (std.valgrind.runningOnValgrind() > 0) return error.SkipZigTest;
-
- const testing = std.testing;
-
- const min = 0xFF + 1; // start outside ascii
- const max = std.math.maxInt(u21) + 1;
- for (min..max) |cp| {
- const t = table.get(@intCast(cp));
- const uu = if (cp > uucode.config.max_code_point)
- 1
- else
- uucode.get(.width, @intCast(cp));
- if (t.width != uu) {
- std.log.warn("mismatch cp=U+{x} t={} uu={}", .{ cp, t.width, uu });
- try testing.expect(false);
- }
- }
-}
-
-test "unicode props: tables match ziglyph" {
- if (std.valgrind.runningOnValgrind() > 0) return error.SkipZigTest;
-
- const ziglyph = @import("ziglyph");
- const testing = std.testing;
-
- const min = 0xFF + 1; // start outside ascii
- const max = std.math.maxInt(u21) + 1;
- for (min..max) |cp| {
- const t = table.get(@intCast(cp));
- const zg = @min(2, @max(0, ziglyph.display_width.codePointWidth(@intCast(cp), .half)));
- if (t.width != zg) {
-
- // Known exceptions
- if (cp == 0x0897) continue; // non-spacing mark (t = 0)
- if (cp == 0x2065) continue; // unassigned (t = 1)
- if (cp >= 0x2630 and cp <= 0x2637) continue; // east asian width is wide (t = 2)
- if (cp >= 0x268A and cp <= 0x268F) continue; // east asian width is wide (t = 2)
- if (cp >= 0x2FFC and cp <= 0x2FFF) continue; // east asian width is wide (t = 2)
- if (cp == 0x31E4 or cp == 0x31E5) continue; // east asian width is wide (t = 2)
- if (cp == 0x31EF) continue; // east asian width is wide (t = 2)
- if (cp >= 0x4DC0 and cp <= 0x4DFF) continue; // east asian width is wide (t = 2)
- if (cp >= 0xFFF0 and cp <= 0xFFF8) continue; // unassigned (t = 1)
- if (cp >= 0xFFF0 and cp <= 0xFFF8) continue; // unassigned (t = 1)
- if (cp >= 0x10D69 and cp <= 0x10D6D) continue; // non-spacing mark, despite being east asian width normal (t = 0)
- if (cp >= 0x10EFC and cp <= 0x10EFF) continue; // non-spacing mark, despite being east asian width normal (t = 0)
- if (cp >= 0x113BB and cp <= 0x113C0) continue; // non-spacing mark, despite being east asian width normal (t = 0)
- if (cp == 0x113CE) continue; // non-spacing mark, despite being east asian width normal (t = 0)
- if (cp == 0x113D0) continue; // non-spacing mark, despite being east asian width normal (t = 0)
- if (cp == 0x113D2) continue; // non-spacing mark, despite being east asian width normal (t = 0)
- if (cp == 0x113E1) continue; // non-spacing mark, despite being east asian width normal (t = 0)
- if (cp == 0x113E2) continue; // non-spacing mark, despite being east asian width normal (t = 0)
- if (cp == 0x1171E) continue; // mark spacing combining (t = 1)
- if (cp == 0x11F5A) continue; // non-spacing mark, despite being east asian width normal (t = 0)
- if (cp == 0x1611E) continue; // non-spacing mark, despite being east asian width normal (t = 0)
- if (cp == 0x1611F) continue; // non-spacing mark, despite being east asian width normal (t = 0)
- if (cp >= 0x16120 and cp <= 0x1612F) continue; // non-spacing mark, despite being east asian width normal (t = 0)
- if (cp >= 0xE0000 and cp <= 0xE0FFF) continue; // ziglyph ignores these with 0, but many are unassigned (t = 1)
- if (cp == 0x18CFF) continue; // east asian width is wide (t = 2)
- if (cp >= 0x1D300 and cp <= 0x1D376) continue; // east asian width is wide (t = 2)
- if (cp == 0x1E5EE) continue; // non-spacing mark, despite being east asian width normal (t = 0)
- if (cp == 0x1E5EF) continue; // non-spacing mark, despite being east asian width normal (t = 0)
- if (cp == 0x1FA89) continue; // east asian width is wide (t = 2)
- if (cp == 0x1FA8F) continue; // east asian width is wide (t = 2)
- if (cp == 0x1FABE) continue; // east asian width is wide (t = 2)
- if (cp == 0x1FAC6) continue; // east asian width is wide (t = 2)
- if (cp == 0x1FADC) continue; // east asian width is wide (t = 2)
- if (cp == 0x1FADF) continue; // east asian width is wide (t = 2)
- if (cp == 0x1FAE9) continue; // east asian width is wide (t = 2)
-
- std.log.warn("mismatch cp=U+{x} t={} zg={}", .{ cp, t.width, zg });
- try testing.expect(false);
- }
- }
-}
diff --git a/src/unicode/props_table.zig b/src/unicode/props_table.zig
new file mode 100644
index 000000000..d4ddfebbb
--- /dev/null
+++ b/src/unicode/props_table.zig
@@ -0,0 +1,96 @@
+const std = @import("std");
+const Properties = @import("Properties.zig");
+const lut = @import("lut.zig");
+
+/// The lookup tables for Ghostty.
+pub const table = table: {
+ // This is only available after running a generator as part of the Ghostty
+ // build.zig process, but due to Zig's lazy analysis we can still reference
+ // it here.
+ //
+ // An example process is the `main` in `props_ziglyph.zig`
+ const generated = @import("unicode_tables").Tables(Properties);
+ const Tables = lut.Tables(Properties);
+ break :table Tables{
+ .stage1 = &generated.stage1,
+ .stage2 = &generated.stage2,
+ .stage3 = &generated.stage3,
+ };
+};
+
+test "unicode props: tables match uucode" {
+ if (std.valgrind.runningOnValgrind() > 0) return error.SkipZigTest;
+
+ const uucode = @import("uucode");
+ const testing = std.testing;
+
+ const min = 0xFF + 1; // start outside ascii
+ const max = std.math.maxInt(u21) + 1;
+ for (min..max) |cp| {
+ const t = table.get(@intCast(cp));
+ const uu = if (cp > uucode.config.max_code_point)
+ 1
+ else
+ uucode.get(.width, @intCast(cp));
+ if (t.width != uu) {
+ std.log.warn("mismatch cp=U+{x} t={} uu={}", .{ cp, t.width, uu });
+ try testing.expect(false);
+ }
+ }
+}
+
+test "unicode props: tables match ziglyph" {
+ if (std.valgrind.runningOnValgrind() > 0) return error.SkipZigTest;
+
+ const ziglyph = @import("ziglyph");
+ const testing = std.testing;
+
+ const min = 0xFF + 1; // start outside ascii
+ const max = std.math.maxInt(u21) + 1;
+ for (min..max) |cp| {
+ const t = table.get(@intCast(cp));
+ const zg = @min(2, @max(0, ziglyph.display_width.codePointWidth(@intCast(cp), .half)));
+ if (t.width != zg) {
+
+ // Known exceptions
+ if (cp == 0x0897) continue; // non-spacing mark (t = 0)
+ if (cp == 0x2065) continue; // unassigned (t = 1)
+ if (cp >= 0x2630 and cp <= 0x2637) continue; // east asian width is wide (t = 2)
+ if (cp >= 0x268A and cp <= 0x268F) continue; // east asian width is wide (t = 2)
+ if (cp >= 0x2FFC and cp <= 0x2FFF) continue; // east asian width is wide (t = 2)
+ if (cp == 0x31E4 or cp == 0x31E5) continue; // east asian width is wide (t = 2)
+ if (cp == 0x31EF) continue; // east asian width is wide (t = 2)
+ if (cp >= 0x4DC0 and cp <= 0x4DFF) continue; // east asian width is wide (t = 2)
+ if (cp >= 0xFFF0 and cp <= 0xFFF8) continue; // unassigned (t = 1)
+ if (cp >= 0xFFF0 and cp <= 0xFFF8) continue; // unassigned (t = 1)
+ if (cp >= 0x10D69 and cp <= 0x10D6D) continue; // non-spacing mark, despite being east asian width normal (t = 0)
+ if (cp >= 0x10EFC and cp <= 0x10EFF) continue; // non-spacing mark, despite being east asian width normal (t = 0)
+ if (cp >= 0x113BB and cp <= 0x113C0) continue; // non-spacing mark, despite being east asian width normal (t = 0)
+ if (cp == 0x113CE) continue; // non-spacing mark, despite being east asian width normal (t = 0)
+ if (cp == 0x113D0) continue; // non-spacing mark, despite being east asian width normal (t = 0)
+ if (cp == 0x113D2) continue; // non-spacing mark, despite being east asian width normal (t = 0)
+ if (cp == 0x113E1) continue; // non-spacing mark, despite being east asian width normal (t = 0)
+ if (cp == 0x113E2) continue; // non-spacing mark, despite being east asian width normal (t = 0)
+ if (cp == 0x1171E) continue; // mark spacing combining (t = 1)
+ if (cp == 0x11F5A) continue; // non-spacing mark, despite being east asian width normal (t = 0)
+ if (cp == 0x1611E) continue; // non-spacing mark, despite being east asian width normal (t = 0)
+ if (cp == 0x1611F) continue; // non-spacing mark, despite being east asian width normal (t = 0)
+ if (cp >= 0x16120 and cp <= 0x1612F) continue; // non-spacing mark, despite being east asian width normal (t = 0)
+ if (cp >= 0xE0000 and cp <= 0xE0FFF) continue; // ziglyph ignores these with 0, but many are unassigned (t = 1)
+ if (cp == 0x18CFF) continue; // east asian width is wide (t = 2)
+ if (cp >= 0x1D300 and cp <= 0x1D376) continue; // east asian width is wide (t = 2)
+ if (cp == 0x1E5EE) continue; // non-spacing mark, despite being east asian width normal (t = 0)
+ if (cp == 0x1E5EF) continue; // non-spacing mark, despite being east asian width normal (t = 0)
+ if (cp == 0x1FA89) continue; // east asian width is wide (t = 2)
+ if (cp == 0x1FA8F) continue; // east asian width is wide (t = 2)
+ if (cp == 0x1FABE) continue; // east asian width is wide (t = 2)
+ if (cp == 0x1FAC6) continue; // east asian width is wide (t = 2)
+ if (cp == 0x1FADC) continue; // east asian width is wide (t = 2)
+ if (cp == 0x1FADF) continue; // east asian width is wide (t = 2)
+ if (cp == 0x1FAE9) continue; // east asian width is wide (t = 2)
+
+ std.log.warn("mismatch cp=U+{x} t={} zg={}", .{ cp, t.width, zg });
+ try testing.expect(false);
+ }
+ }
+}
diff --git a/src/unicode/props_uucode.zig b/src/unicode/props_uucode.zig
new file mode 100644
index 000000000..449c04ddf
--- /dev/null
+++ b/src/unicode/props_uucode.zig
@@ -0,0 +1,95 @@
+const props = @This();
+const std = @import("std");
+const assert = std.debug.assert;
+const uucode = @import("uucode");
+const lut = @import("lut.zig");
+const Properties = @import("Properties.zig");
+const GraphemeBoundaryClass = Properties.GraphemeBoundaryClass;
+
+/// Gets the grapheme boundary class for a codepoint.
+/// The use case for this is only in generating lookup tables.
+fn graphemeBoundaryClass(cp: u21) GraphemeBoundaryClass {
+ if (cp > uucode.config.max_code_point) return .invalid;
+
+ // We special-case modifier bases because we should not break
+ // if a modifier isn't next to a base.
+ if (uucode.get(.is_emoji_modifier, cp)) return .emoji_modifier;
+ if (uucode.get(.is_emoji_modifier_base, cp)) return .extended_pictographic_base;
+
+ return switch (uucode.get(.grapheme_break, cp)) {
+ .extended_pictographic => .extended_pictographic,
+ .l => .L,
+ .v => .V,
+ .t => .T,
+ .lv => .LV,
+ .lvt => .LVT,
+ .prepend => .prepend,
+ .zwj => .zwj,
+ .spacing_mark => .spacing_mark,
+ .regional_indicator => .regional_indicator,
+
+ .zwnj,
+ .indic_conjunct_break_extend,
+ .indic_conjunct_break_linker,
+ => .extend,
+
+ // This is obviously not INVALID invalid, there is SOME grapheme
+ // boundary class for every codepoint. But we don't care about
+ // anything that doesn't fit into the above categories. Also note
+ // that `indic_conjunct_break_consonant` is `other` in
+ // 'GraphemeBreakProperty.txt' (it's missing).
+ .other,
+ .indic_conjunct_break_consonant,
+ .cr,
+ .lf,
+ .control,
+ => .invalid,
+ };
+}
+
+pub fn get(cp: u21) Properties {
+ const width = if (cp > uucode.config.max_code_point)
+ 1
+ else
+ uucode.get(.width, cp);
+
+ return .{
+ .width = width,
+ .grapheme_boundary_class = .init(cp),
+ };
+}
+
+/// Runnable binary to generate the lookup tables and output to stdout.
+pub fn main() !void {
+ var arena_state = std.heap.ArenaAllocator.init(std.heap.page_allocator);
+ defer arena_state.deinit();
+ const alloc = arena_state.allocator();
+
+ const gen: lut.Generator(
+ Properties,
+ struct {
+ pub fn get(ctx: @This(), cp: u21) !Properties {
+ _ = ctx;
+ return props.get(cp);
+ }
+
+ pub fn eql(ctx: @This(), a: Properties, b: Properties) bool {
+ _ = ctx;
+ return a.eql(b);
+ }
+ },
+ ) = .{};
+
+ const t = try gen.generate(alloc);
+ defer alloc.free(t.stage1);
+ defer alloc.free(t.stage2);
+ defer alloc.free(t.stage3);
+ try t.writeZig(std.io.getStdOut().writer());
+
+ // Uncomment when manually debugging to see our table sizes.
+ // std.log.warn("stage1={} stage2={} stage3={}", .{
+ // t.stage1.len,
+ // t.stage2.len,
+ // t.stage3.len,
+ // });
+}
diff --git a/src/unicode/symbols.zig b/src/unicode/symbols_table.zig
index 8ac0edcd3..af77d88fd 100644
--- a/src/unicode/symbols.zig
+++ b/src/unicode/symbols_table.zig
@@ -1,13 +1,13 @@
-const props = @This();
const std = @import("std");
-const assert = std.debug.assert;
-const uucode = @import("uucode");
const lut = @import("lut.zig");
/// The lookup tables for Ghostty.
pub const table = table: {
- // This is only available after running main() below as part of the Ghostty
- // build.zig, but due to Zig's lazy analysis we can still reference it here.
+ // This is only available after running a generator as part of the Ghostty
+ // build.zig process, but due to Zig's lazy analysis we can still reference
+ // it here.
+ //
+ // An example process is the `main` in `symbols_ziglyph.zig`
const generated = @import("symbols_tables").Tables(bool);
const Tables = lut.Tables(bool);
break :table Tables{
@@ -17,47 +17,10 @@ pub const table = table: {
};
};
-/// Runnable binary to generate the lookup tables and output to stdout.
-pub fn main() !void {
- var arena_state = std.heap.ArenaAllocator.init(std.heap.page_allocator);
- defer arena_state.deinit();
- const alloc = arena_state.allocator();
-
- const gen: lut.Generator(
- bool,
- struct {
- pub fn get(ctx: @This(), cp: u21) !bool {
- _ = ctx;
- return if (cp > uucode.config.max_code_point)
- false
- else
- uucode.get(.is_symbol, @intCast(cp));
- }
-
- pub fn eql(ctx: @This(), a: bool, b: bool) bool {
- _ = ctx;
- return a == b;
- }
- },
- ) = .{};
-
- const t = try gen.generate(alloc);
- defer alloc.free(t.stage1);
- defer alloc.free(t.stage2);
- defer alloc.free(t.stage3);
- try t.writeZig(std.io.getStdOut().writer());
-
- // Uncomment when manually debugging to see our table sizes.
- // std.log.warn("stage1={} stage2={} stage3={}", .{
- // t.stage1.len,
- // t.stage2.len,
- // t.stage3.len,
- // });
-}
-
test "unicode symbols: tables match uucode" {
if (std.valgrind.runningOnValgrind() > 0) return error.SkipZigTest;
+ const uucode = @import("uucode");
const testing = std.testing;
for (0..std.math.maxInt(u21)) |cp| {