summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJacob Sandlund <jacob@jacobsandlund.com>2025-09-18 11:46:05 -0400
committerJacob Sandlund <jacob@jacobsandlund.com>2025-09-18 11:46:05 -0400
commit69594119c320920d7795214ef4cc4afa3699d3fa (patch)
tree591d193a33f884a02b879e7ae3fafa06c06d3c3e
parent285a33fbc0cdbefd250fe6448e4c9b41e14ba7b9 (diff)
fix up diff from benchmarks, and add tests against ziglyph
-rw-r--r--build.zig8
-rw-r--r--build.zig.zon9
-rw-r--r--build.zig.zon.json11
-rw-r--r--build.zig.zon.nix14
-rw-r--r--build.zig.zon.txt3
-rw-r--r--flatpak/zig-packages.json12
-rw-r--r--src/benchmark/GraphemeBreak.zig2
-rw-r--r--src/benchmark/IsSymbol.zig9
-rw-r--r--src/build/SharedDeps.zig10
-rw-r--r--src/build/UnicodeTables.zig10
-rw-r--r--src/build/uucode_config.zig2
-rw-r--r--src/input/Binding.zig4
-rw-r--r--src/terminal/Terminal.zig2
-rw-r--r--src/unicode/grapheme.zig27
-rw-r--r--src/unicode/main.zig1
-rw-r--r--src/unicode/props.zig201
-rw-r--r--src/unicode/symbols.zig61
17 files changed, 226 insertions, 160 deletions
diff --git a/build.zig b/build.zig
index 38cfd0e56..61bcd575b 100644
--- a/build.zig
+++ b/build.zig
@@ -234,6 +234,14 @@ pub fn build(b: *std.Build) !void {
if (config.emit_test_exe) b.installArtifact(test_exe);
_ = try deps.add(test_exe);
+ // Only need ziglyph for tests
+ if (b.lazyDependency("ziglyph", .{
+ .target = test_exe.root_module.resolved_target.?,
+ .optimize = test_exe.root_module.optimize.?,
+ })) |dep| {
+ test_exe.root_module.addImport("ziglyph", dep.module("ziglyph"));
+ }
+
// Normal test running
const test_run = b.addRunArtifact(test_exe);
test_step.dependOn(&test_run.step);
diff --git a/build.zig.zon b/build.zig.zon
index 2c8a8fd68..953ec2f79 100644
--- a/build.zig.zon
+++ b/build.zig.zon
@@ -36,9 +36,14 @@
.hash = "N-V-__8AAB9YCQBaZtQjJZVndk-g_GDIK-NTZcIa63bFp9yZ",
.lazy = true,
},
+ .ziglyph = .{
+ .url = "https://deps.files.ghostty.org/ziglyph-b89d43d1e3fb01b6074bc1f7fc980324b04d26a5.tar.gz",
+ .hash = "ziglyph-0.11.2-AAAAAHPtHwB4Mbzn1KvOV7Wpjo82NYEc_v0WC8oCLrkf",
+ .lazy = true,
+ },
.uucode = .{
- .url = "https://github.com/jacobsandlund/uucode/archive/ef173d765bd756eeecf7ce89f93c4f70c9038ab6.tar.gz",
- .hash = "uucode-0.0.0-ZZjBPtMqQABaVqHdy8MX_XwChpQyZBAGchp-1cPuiQ6J",
+ .url = "https://github.com/jacobsandlund/uucode/archive/3512203ca991c02b2500392d1d51226c48131c99.tar.gz",
+ .hash = "uucode-0.0.0-ZZjBPgErQADBJsnLdcZKdRk94lB28CbKC4OrUDPOnSeV",
},
.zig_wayland = .{
// codeberg ifreund/zig-wayland
diff --git a/build.zig.zon.json b/build.zig.zon.json
index 7af90834f..1b2ccebe1 100644
--- a/build.zig.zon.json
+++ b/build.zig.zon.json
@@ -109,10 +109,10 @@
"url": "https://deps.files.ghostty.org/utfcpp-1220d4d18426ca72fc2b7e56ce47273149815501d0d2395c2a98c726b31ba931e641.tar.gz",
"hash": "sha256-/8ZooxDndgfTk/PBizJxXyI9oerExNbgV5oR345rWc8="
},
- "uucode-0.0.0-ZZjBPtMqQABaVqHdy8MX_XwChpQyZBAGchp-1cPuiQ6J": {
+ "uucode-0.0.0-ZZjBPgErQADBJsnLdcZKdRk94lB28CbKC4OrUDPOnSeV": {
"name": "uucode",
- "url": "https://github.com/jacobsandlund/uucode/archive/ef173d765bd756eeecf7ce89f93c4f70c9038ab6.tar.gz",
- "hash": "sha256-NFBH94kHmaxsFLBEePgdLjOt3JfbPn8cTQ1ZHiH6xBg="
+ "url": "https://github.com/jacobsandlund/uucode/archive/3512203ca991c02b2500392d1d51226c48131c99.tar.gz",
+ "hash": "sha256-nbbeHgvkoMmr5DJN0qRF776hu3waTL85d8dGpvYsZBw="
},
"vaxis-0.1.0-BWNV_FUICQAFZnTCL11TUvnUr1Y0_ZdqtXHhd51d76Rn": {
"name": "vaxis",
@@ -169,6 +169,11 @@
"url": "git+https://github.com/TUSF/zigimg#31268548fe3276c0e95f318a6c0d2ab10565b58d",
"hash": "sha256-oblfr2FIzuqq0FLo/RrzCwUX1NJJuT53EwD3nP3KwN0="
},
+ "ziglyph-0.11.2-AAAAAHPtHwB4Mbzn1KvOV7Wpjo82NYEc_v0WC8oCLrkf": {
+ "name": "ziglyph",
+ "url": "https://deps.files.ghostty.org/ziglyph-b89d43d1e3fb01b6074bc1f7fc980324b04d26a5.tar.gz",
+ "hash": "sha256-cse98+Ft8QUjX+P88yyYfaxJOJGQ9M7Ymw7jFxDz89k="
+ },
"N-V-__8AAB0eQwD-0MdOEBmz7intriBReIsIDNlukNVoNu6o": {
"name": "zlib",
"url": "https://deps.files.ghostty.org/zlib-1220fed0c74e1019b3ee29edae2051788b080cd96e90d56836eea857b0b966742efb.tar.gz",
diff --git a/build.zig.zon.nix b/build.zig.zon.nix
index aff14c289..2cedd8fba 100644
--- a/build.zig.zon.nix
+++ b/build.zig.zon.nix
@@ -259,11 +259,11 @@ in
};
}
{
- name = "uucode-0.0.0-ZZjBPtMqQABaVqHdy8MX_XwChpQyZBAGchp-1cPuiQ6J";
+ name = "uucode-0.0.0-ZZjBPgErQADBJsnLdcZKdRk94lB28CbKC4OrUDPOnSeV";
path = fetchZigArtifact {
name = "uucode";
- url = "https://github.com/jacobsandlund/uucode/archive/ef173d765bd756eeecf7ce89f93c4f70c9038ab6.tar.gz";
- hash = "sha256-NFBH94kHmaxsFLBEePgdLjOt3JfbPn8cTQ1ZHiH6xBg=";
+ url = "https://github.com/jacobsandlund/uucode/archive/3512203ca991c02b2500392d1d51226c48131c99.tar.gz";
+ hash = "sha256-nbbeHgvkoMmr5DJN0qRF776hu3waTL85d8dGpvYsZBw=";
};
}
{
@@ -355,6 +355,14 @@ in
};
}
{
+ name = "ziglyph-0.11.2-AAAAAHPtHwB4Mbzn1KvOV7Wpjo82NYEc_v0WC8oCLrkf";
+ path = fetchZigArtifact {
+ name = "ziglyph";
+ url = "https://deps.files.ghostty.org/ziglyph-b89d43d1e3fb01b6074bc1f7fc980324b04d26a5.tar.gz";
+ hash = "sha256-cse98+Ft8QUjX+P88yyYfaxJOJGQ9M7Ymw7jFxDz89k=";
+ };
+ }
+ {
name = "N-V-__8AAB0eQwD-0MdOEBmz7intriBReIsIDNlukNVoNu6o";
path = fetchZigArtifact {
name = "zlib";
diff --git a/build.zig.zon.txt b/build.zig.zon.txt
index 1ee2923e3..9a7dd59ba 100644
--- a/build.zig.zon.txt
+++ b/build.zig.zon.txt
@@ -26,8 +26,9 @@ https://deps.files.ghostty.org/wayland-9cb3d7aa9dc995ffafdbdef7ab86a949d0fb0e7d.
https://deps.files.ghostty.org/wayland-protocols-258d8f88f2c8c25a830c6316f87d23ce1a0f12d9.tar.gz
https://deps.files.ghostty.org/wuffs-122037b39d577ec2db3fd7b2130e7b69ef6cc1807d68607a7c232c958315d381b5cd.tar.gz
https://deps.files.ghostty.org/zig_js-12205a66d423259567764fa0fc60c82be35365c21aeb76c5a7dc99698401f4f6fefc.tar.gz
+https://deps.files.ghostty.org/ziglyph-b89d43d1e3fb01b6074bc1f7fc980324b04d26a5.tar.gz
https://deps.files.ghostty.org/zlib-1220fed0c74e1019b3ee29edae2051788b080cd96e90d56836eea857b0b966742efb.tar.gz
-https://github.com/jacobsandlund/uucode/archive/ef173d765bd756eeecf7ce89f93c4f70c9038ab6.tar.gz
+https://github.com/jacobsandlund/uucode/archive/3512203ca991c02b2500392d1d51226c48131c99.tar.gz
https://github.com/jcollie/ghostty-gobject/releases/download/0.15.1-2025-09-04-48-1/ghostty-gobject-0.15.1-2025-09-04-48-1.tar.zst
https://github.com/mitchellh/libxev/archive/7f803181b158a10fec8619f793e3b4df515566cb.tar.gz
https://github.com/mitchellh/zig-objc/archive/c9e917a4e15a983b672ca779c7985d738a2d517c.tar.gz
diff --git a/flatpak/zig-packages.json b/flatpak/zig-packages.json
index ec2e72b9e..f43d2e9f7 100644
--- a/flatpak/zig-packages.json
+++ b/flatpak/zig-packages.json
@@ -133,9 +133,9 @@
},
{
"type": "archive",
- "url": "https://github.com/jacobsandlund/uucode/archive/ef173d765bd756eeecf7ce89f93c4f70c9038ab6.tar.gz",
- "dest": "vendor/p/uucode-0.0.0-ZZjBPtMqQABaVqHdy8MX_XwChpQyZBAGchp-1cPuiQ6J",
- "sha256": "345047f7890799ac6c14b04478f81d2e33addc97db3e7f1c4d0d591e21fac418"
+ "url": "https://github.com/jacobsandlund/uucode/archive/3512203ca991c02b2500392d1d51226c48131c99.tar.gz",
+ "dest": "vendor/p/uucode-0.0.0-ZZjBPgErQADBJsnLdcZKdRk94lB28CbKC4OrUDPOnSeV",
+ "sha256": "9db6de1e0be4a0c9abe4324dd2a445efbea1bb7c1a4cbf3977c746a6f62c641c"
},
{
"type": "git",
@@ -205,6 +205,12 @@
},
{
"type": "archive",
+ "url": "https://deps.files.ghostty.org/ziglyph-b89d43d1e3fb01b6074bc1f7fc980324b04d26a5.tar.gz",
+ "dest": "vendor/p/ziglyph-0.11.2-AAAAAHPtHwB4Mbzn1KvOV7Wpjo82NYEc_v0WC8oCLrkf",
+ "sha256": "72c7bdf3e16df105235fe3fcf32c987dac49389190f4ced89b0ee31710f3f3d9"
+ },
+ {
+ "type": "archive",
"url": "https://deps.files.ghostty.org/zlib-1220fed0c74e1019b3ee29edae2051788b080cd96e90d56836eea857b0b966742efb.tar.gz",
"dest": "vendor/p/N-V-__8AAB0eQwD-0MdOEBmz7intriBReIsIDNlukNVoNu6o",
"sha256": "17e88863f3600672ab49182f217281b6fc4d3c762bde361935e436a95214d05c"
diff --git a/src/benchmark/GraphemeBreak.zig b/src/benchmark/GraphemeBreak.zig
index b3b169909..28de82593 100644
--- a/src/benchmark/GraphemeBreak.zig
+++ b/src/benchmark/GraphemeBreak.zig
@@ -21,7 +21,7 @@ data_f: ?std.fs.File = null,
pub const Options = struct {
/// The type of codepoint width calculation to use.
- mode: Mode = .noop,
+ mode: Mode = .table,
/// The data to read as a filepath. If this is "-" then
/// we will read stdin. If this is unset, then we will
diff --git a/src/benchmark/IsSymbol.zig b/src/benchmark/IsSymbol.zig
index 0997da41d..09b61fceb 100644
--- a/src/benchmark/IsSymbol.zig
+++ b/src/benchmark/IsSymbol.zig
@@ -128,14 +128,7 @@ fn stepTable(ptr: *anyopaque) Benchmark.Error!void {
const cp_, const consumed = d.next(c);
assert(consumed);
if (cp_) |cp| {
- if (uucode.getX(.is_symbol, cp) != symbols.table.get(cp)) {
- std.debug.panic("uucode and table disagree on codepoint {d}: uucode={}, table={}", .{
- cp,
- uucode.getX(.is_symbol, cp),
- symbols.table.get(cp),
- });
- }
- //std.mem.doNotOptimizeAway(symbols.table.get(cp));
+ std.mem.doNotOptimizeAway(symbols.table.get(cp));
}
}
}
diff --git a/src/build/SharedDeps.zig b/src/build/SharedDeps.zig
index fd3f91d89..68f0fb64f 100644
--- a/src/build/SharedDeps.zig
+++ b/src/build/SharedDeps.zig
@@ -15,13 +15,13 @@ help_strings: HelpStrings,
metallib: ?*MetallibStep,
unicode_tables: UnicodeTables,
framedata: GhosttyFrameData,
-uucode_tables_zig: std.Build.LazyPath,
+uucode_tables: std.Build.LazyPath,
/// Used to keep track of a list of file sources.
pub const LazyPathList = std.ArrayList(std.Build.LazyPath);
pub fn init(b: *std.Build, cfg: *const Config) !SharedDeps {
- const uucode_tables_zig = blk: {
+ const uucode_tables = blk: {
const uucode = b.dependency("uucode", .{
.build_config_path = b.path("src/build/uucode_config.zig"),
});
@@ -32,9 +32,9 @@ pub fn init(b: *std.Build, cfg: *const Config) !SharedDeps {
var result: SharedDeps = .{
.config = cfg,
.help_strings = try .init(b, cfg),
- .unicode_tables = try .init(b, uucode_tables_zig),
+ .unicode_tables = try .init(b, uucode_tables),
.framedata = try .init(b),
- .uucode_tables_zig = uucode_tables_zig,
+ .uucode_tables = uucode_tables,
// Setup by retarget
.options = undefined,
@@ -423,7 +423,7 @@ pub fn add(
if (b.lazyDependency("uucode", .{
.target = target,
.optimize = optimize,
- .@"tables.zig" = self.uucode_tables_zig,
+ .tables_path = self.uucode_tables,
.build_config_path = b.path("src/build/uucode_config.zig"),
})) |dep| {
step.root_module.addImport("uucode", dep.module("uucode"));
diff --git a/src/build/UnicodeTables.zig b/src/build/UnicodeTables.zig
index a947ce137..4b5f6db99 100644
--- a/src/build/UnicodeTables.zig
+++ b/src/build/UnicodeTables.zig
@@ -11,7 +11,7 @@ symbols_exe: *std.Build.Step.Compile,
props_output: std.Build.LazyPath,
symbols_output: std.Build.LazyPath,
-pub fn init(b: *std.Build, uucode_tables_zig: std.Build.LazyPath) !UnicodeTables {
+pub fn init(b: *std.Build, uucode_tables: std.Build.LazyPath) !UnicodeTables {
const props_exe = b.addExecutable(.{
.name = "props-unigen",
.root_module = b.createModule(.{
@@ -36,7 +36,7 @@ pub fn init(b: *std.Build, uucode_tables_zig: std.Build.LazyPath) !UnicodeTables
if (b.lazyDependency("uucode", .{
.target = b.graph.host,
- .@"tables.zig" = uucode_tables_zig,
+ .tables_path = uucode_tables,
.build_config_path = b.path("src/build/uucode_config.zig"),
})) |dep| {
inline for (&.{ props_exe, symbols_exe }) |exe| {
@@ -46,14 +46,12 @@ pub fn init(b: *std.Build, uucode_tables_zig: std.Build.LazyPath) !UnicodeTables
const props_run = b.addRunArtifact(props_exe);
const symbols_run = b.addRunArtifact(symbols_exe);
- const props_output = props_run.addOutputFileArg("props_table.zig");
- const symbols_output = symbols_run.addOutputFileArg("symbols_table.zig");
return .{
.props_exe = props_exe,
.symbols_exe = symbols_exe,
- .props_output = props_output,
- .symbols_output = symbols_output,
+ .props_output = props_run.captureStdOut(),
+ .symbols_output = symbols_run.captureStdOut(),
};
}
diff --git a/src/build/uucode_config.zig b/src/build/uucode_config.zig
index fcc50057e..6e2e263bd 100644
--- a/src/build/uucode_config.zig
+++ b/src/build/uucode_config.zig
@@ -65,7 +65,7 @@ pub const tables = [_]config.Table{
.fields = &.{
d.field("is_emoji_presentation"),
d.field("case_folding_full"),
- // Alternative:
+ // TODO: Alternatively, use:
// d.field("case_folding_simple"),
d.field("is_emoji_modifier"),
d.field("is_emoji_modifier_base"),
diff --git a/src/input/Binding.zig b/src/input/Binding.zig
index 039a6ac89..467dd5949 100644
--- a/src/input/Binding.zig
+++ b/src/input/Binding.zig
@@ -1609,8 +1609,8 @@ pub const Trigger = struct {
.unicode => |cp| std.hash.autoHash(
hasher,
foldedCodepoint(cp),
- // Alternative, just use simple case folding, and delete
- // `foldedCodepoint` below:
+ // TODO: Alternatively, just use simple case folding, and
+ // delete `foldedCodepoint` below:
// uucode.get(.case_folding_simple, cp),
),
}
diff --git a/src/terminal/Terminal.zig b/src/terminal/Terminal.zig
index 229b6e100..2d191077a 100644
--- a/src/terminal/Terminal.zig
+++ b/src/terminal/Terminal.zig
@@ -345,7 +345,7 @@ pub fn print(self: *Terminal, c: u21) !void {
if (c == 0xFE0F or c == 0xFE0E) {
// This only applies to emoji
const prev_props = unicode.getProperties(prev.cell.content.codepoint);
- const emoji = unicode.isExtendedPictographic(prev_props.grapheme_boundary_class);
+ const emoji = prev_props.grapheme_boundary_class.isExtendedPictographic();
if (!emoji) return;
switch (c) {
diff --git a/src/unicode/grapheme.zig b/src/unicode/grapheme.zig
index b0cb4ead9..f3edb58b2 100644
--- a/src/unicode/grapheme.zig
+++ b/src/unicode/grapheme.zig
@@ -2,7 +2,6 @@ const std = @import("std");
const props = @import("props.zig");
const GraphemeBoundaryClass = props.GraphemeBoundaryClass;
const table = props.table;
-const isExtendedPictographic = props.isExtendedPictographic;
/// Determines if there is a grapheme break between two codepoints. This
/// must be called sequentially maintaining the state between calls.
@@ -81,7 +80,7 @@ fn graphemeBreakClass(
state: *BreakState,
) bool {
// GB11: Emoji Extend* ZWJ x Emoji
- if (!state.extended_pictographic and isExtendedPictographic(gbc1)) {
+ if (!state.extended_pictographic and gbc1.isExtendedPictographic()) {
state.extended_pictographic = true;
}
@@ -132,7 +131,7 @@ fn graphemeBreakClass(
// GB11: Emoji Extend* ZWJ x Emoji
if (state.extended_pictographic and
gbc1 == .zwj and
- isExtendedPictographic(gbc2))
+ gbc2.isExtendedPictographic())
{
state.extended_pictographic = false;
return false;
@@ -156,38 +155,36 @@ fn graphemeBreakClass(
/// TODO: this is hard to build with newer zig build, so
/// https://github.com/ghostty-org/ghostty/pull/7806 took the approach of
/// adding a `-Demit-unicode-test` option for `zig build`, but that
-/// hasn't been done here yet.
-/// TODO: this also still uses `ziglyph`, but could be switched to use
-/// `uucode`'s grapheme break once that is implemented.
+/// hasn't been done here.
pub fn main() !void {
- const ziglyph = @import("ziglyph");
+ const uucode = @import("uucode");
// Set the min and max to control the test range.
const min = 0;
const max = std.math.maxInt(u21) + 1;
var state: BreakState = .{};
- var zg_state: u3 = 0;
+ var uu_state: uucode.grapheme.BreakState = .default;
for (min..max) |cp1| {
if (cp1 % 1000 == 0) std.log.warn("progress cp1={}", .{cp1});
if (cp1 == '\r' or cp1 == '\n' or
- ziglyph.grapheme_break.isControl(@intCast(cp1))) continue;
+ uucode.get(.grapheme_break, @intCast(cp1)) == .control) continue;
for (min..max) |cp2| {
if (cp2 == '\r' or cp2 == '\n' or
- ziglyph.grapheme_break.isControl(@intCast(cp2))) continue;
+ uucode.get(.grapheme_break, @intCast(cp1)) == .control) continue;
const gb = graphemeBreak(@intCast(cp1), @intCast(cp2), &state);
- const zg_gb = ziglyph.graphemeBreak(@intCast(cp1), @intCast(cp2), &zg_state);
- if (gb != zg_gb) {
- std.log.warn("cp1={x} cp2={x} gb={} state={} zg_gb={} zg_state={}", .{
+ const uu_gb = uucode.grapheme.isBreak(@intCast(cp1), @intCast(cp2), &uu_state);
+ if (gb != uu_gb) {
+ std.log.warn("cp1={x} cp2={x} gb={} state={} uu_gb={} uu_state={}", .{
cp1,
cp2,
gb,
state,
- zg_gb,
- zg_state,
+ uu_gb,
+ uu_state,
});
}
}
diff --git a/src/unicode/main.zig b/src/unicode/main.zig
index e053976bc..17c86deca 100644
--- a/src/unicode/main.zig
+++ b/src/unicode/main.zig
@@ -7,7 +7,6 @@ pub const Properties = props.Properties;
pub const getProperties = props.get;
pub const graphemeBreak = grapheme.graphemeBreak;
pub const GraphemeBreakState = grapheme.BreakState;
-pub const isExtendedPictographic = props.isExtendedPictographic;
test {
_ = @import("symbols.zig");
diff --git a/src/unicode/props.zig b/src/unicode/props.zig
index 0c11f3dc9..53493b2ff 100644
--- a/src/unicode/props.zig
+++ b/src/unicode/props.zig
@@ -76,66 +76,66 @@ pub const GraphemeBoundaryClass = enum(u4) {
extended_pictographic,
extended_pictographic_base, // \p{Extended_Pictographic} & \p{Emoji_Modifier_Base}
emoji_modifier, // \p{Emoji_Modifier}
-};
-
-/// Gets the grapheme boundary class for a codepoint.
-/// The use case for this is only in generating lookup tables.
-fn computeGraphemeBoundaryClass(cp: u21) GraphemeBoundaryClass {
- if (cp > uucode.config.max_code_point) return .invalid;
- if (uucode.get(.is_emoji_modifier, cp)) return .emoji_modifier;
- if (uucode.get(.is_emoji_modifier_base, cp)) return .extended_pictographic_base;
-
- return switch (uucode.get(.grapheme_break, cp)) {
- .extended_pictographic => .extended_pictographic,
- .l => .L,
- .v => .V,
- .t => .T,
- .lv => .LV,
- .lvt => .LVT,
- .prepend => .prepend,
- .zwj => .zwj,
- .spacing_mark => .spacing_mark,
- .regional_indicator => .regional_indicator,
-
- .zwnj,
- .indic_conjunct_break_extend,
- .indic_conjunct_break_linker,
- => .extend,
-
- // This is obviously not INVALID invalid, there is SOME grapheme
- // boundary class for every codepoint. But we don't care about
- // anything that doesn't fit into the above categories.
- .other,
- .indic_conjunct_break_consonant,
- .cr,
- .lf,
- .control,
- => .invalid,
- };
-}
-/// Returns true if this is an extended pictographic type. This
-/// should be used instead of comparing the enum value directly
-/// because we classify multiple.
-pub fn isExtendedPictographic(self: GraphemeBoundaryClass) bool {
- return switch (self) {
- .extended_pictographic,
- .extended_pictographic_base,
- => true,
+ /// Gets the grapheme boundary class for a codepoint.
+ /// The use case for this is only in generating lookup tables.
+ pub fn init(cp: u21) GraphemeBoundaryClass {
+ if (cp > uucode.config.max_code_point) return .invalid;
+ if (uucode.get(.is_emoji_modifier, cp)) return .emoji_modifier;
+ if (uucode.get(.is_emoji_modifier_base, cp)) return .extended_pictographic_base;
+
+ return switch (uucode.get(.grapheme_break, cp)) {
+ .extended_pictographic => .extended_pictographic,
+ .l => .L,
+ .v => .V,
+ .t => .T,
+ .lv => .LV,
+ .lvt => .LVT,
+ .prepend => .prepend,
+ .zwj => .zwj,
+ .spacing_mark => .spacing_mark,
+ .regional_indicator => .regional_indicator,
+
+ .zwnj,
+ .indic_conjunct_break_extend,
+ .indic_conjunct_break_linker,
+ => .extend,
+
+ // This is obviously not INVALID invalid, there is SOME grapheme
+ // boundary class for every codepoint. But we don't care about
+ // anything that doesn't fit into the above categories.
+ .other,
+ .indic_conjunct_break_consonant,
+ .cr,
+ .lf,
+ .control,
+ => .invalid,
+ };
+ }
- else => false,
- };
-}
+ /// Returns true if this is an extended pictographic type. This
+ /// should be used instead of comparing the enum value directly
+ /// because we classify multiple.
+ pub fn isExtendedPictographic(self: GraphemeBoundaryClass) bool {
+ return switch (self) {
+ .extended_pictographic,
+ .extended_pictographic_base,
+ => true,
+
+ else => false,
+ };
+ }
+};
pub fn get(cp: u21) Properties {
const width = if (cp > uucode.config.max_code_point)
- 0
+ 1
else
uucode.getX(.width, cp);
return .{
.width = width,
- .grapheme_boundary_class = computeGraphemeBoundaryClass(cp),
+ .grapheme_boundary_class = .init(cp),
};
}
@@ -145,13 +145,6 @@ pub fn main() !void {
defer arena_state.deinit();
const alloc = arena_state.allocator();
- var args_iter = try std.process.argsWithAllocator(alloc);
- defer args_iter.deinit();
- _ = args_iter.skip(); // Skip program name
-
- const output_path = args_iter.next() orelse std.debug.panic("No output file arg for props exe!", .{});
- std.debug.print("Unicode props_table output_path = {s}\n", .{output_path});
-
const gen: lut.Generator(
Properties,
struct {
@@ -171,10 +164,7 @@ pub fn main() !void {
defer alloc.free(t.stage1);
defer alloc.free(t.stage2);
defer alloc.free(t.stage3);
- var out_file = try std.fs.cwd().createFile(output_path, .{});
- defer out_file.close();
- const writer = out_file.writer();
- try t.writeZig(writer);
+ try t.writeZig(std.io.getStdOut().writer());
// Uncomment when manually debugging to see our table sizes.
// std.log.warn("stage1={} stage2={} stage3={}", .{
@@ -186,17 +176,78 @@ pub fn main() !void {
// This is not very fast in debug modes, so its commented by default.
// IMPORTANT: UNCOMMENT THIS WHENEVER MAKING CODEPOINTWIDTH CHANGES.
-// test "unicode props: tables match uucode" {
-// const testing = std.testing;
-//
-// const min = 0xFF + 1; // start outside ascii
-// const max = std.math.maxInt(u21) + 1;
-// for (min..max) |cp| {
-// const t = table.get(@intCast(cp));
-// const uu = @min(2, @max(0, uucode.get(.wcwidth, @intCast(cp))));
-// if (t.width != uu) {
-// std.log.warn("mismatch cp=U+{x} t={} uucode={}", .{ cp, t, uu });
-// try testing.expect(false);
-// }
-// }
-//}
+test "unicode props: tables match uucode" {
+ if (std.valgrind.runningOnValgrind() > 0) return error.SkipZigTest;
+
+ const testing = std.testing;
+
+ const min = 0xFF + 1; // start outside ascii
+ const max = std.math.maxInt(u21) + 1;
+ for (min..max) |cp| {
+ const t = table.get(@intCast(cp));
+ const uu = if (cp > uucode.config.max_code_point)
+ 1
+ else
+ uucode.getX(.width, @intCast(cp));
+ if (t.width != uu) {
+ std.log.warn("mismatch cp=U+{x} t={} uu={}", .{ cp, t.width, uu });
+ try testing.expect(false);
+ }
+ }
+}
+
+test "unicode props: tables match ziglyph" {
+ if (std.valgrind.runningOnValgrind() > 0) return error.SkipZigTest;
+
+ const ziglyph = @import("ziglyph");
+ const testing = std.testing;
+
+ const min = 0xFF + 1; // start outside ascii
+ const max = std.math.maxInt(u21) + 1;
+ for (min..max) |cp| {
+ const t = table.get(@intCast(cp));
+ const zg = @min(2, @max(0, ziglyph.display_width.codePointWidth(@intCast(cp), .half)));
+ if (t.width != zg) {
+
+ // Known exceptions
+ if (cp == 0x0897) continue; // non-spacing mark (t = 0)
+ if (cp == 0x2065) continue; // unassigned (t = 1)
+ if (cp >= 0x2630 and cp <= 0x2637) continue; // east asian width is wide (t = 2)
+ if (cp >= 0x268A and cp <= 0x268F) continue; // east asian width is wide (t = 2)
+ if (cp >= 0x2FFC and cp <= 0x2FFF) continue; // east asian width is wide (t = 2)
+ if (cp == 0x31E4 or cp == 0x31E5) continue; // east asian width is wide (t = 2)
+ if (cp == 0x31EF) continue; // east asian width is wide (t = 2)
+ if (cp >= 0x4DC0 and cp <= 0x4DFF) continue; // east asian width is wide (t = 2)
+ if (cp >= 0xFFF0 and cp <= 0xFFF8) continue; // unassigned (t = 1)
+ if (cp >= 0xFFF0 and cp <= 0xFFF8) continue; // unassigned (t = 1)
+ if (cp >= 0x10D69 and cp <= 0x10D6D) continue; // non-spacing mark, despite being east asian width normal (t = 0)
+ if (cp >= 0x10EFC and cp <= 0x10EFF) continue; // non-spacing mark, despite being east asian width normal (t = 0)
+ if (cp >= 0x113BB and cp <= 0x113C0) continue; // non-spacing mark, despite being east asian width normal (t = 0)
+ if (cp == 0x113CE) continue; // non-spacing mark, despite being east asian width normal (t = 0)
+ if (cp == 0x113D0) continue; // non-spacing mark, despite being east asian width normal (t = 0)
+ if (cp == 0x113D2) continue; // non-spacing mark, despite being east asian width normal (t = 0)
+ if (cp == 0x113E1) continue; // non-spacing mark, despite being east asian width normal (t = 0)
+ if (cp == 0x113E2) continue; // non-spacing mark, despite being east asian width normal (t = 0)
+ if (cp == 0x1171E) continue; // mark spacing combining (t = 1)
+ if (cp == 0x11F5A) continue; // non-spacing mark, despite being east asian width normal (t = 0)
+ if (cp == 0x1611E) continue; // non-spacing mark, despite being east asian width normal (t = 0)
+ if (cp == 0x1611F) continue; // non-spacing mark, despite being east asian width normal (t = 0)
+ if (cp >= 0x16120 and cp <= 0x1612F) continue; // non-spacing mark, despite being east asian width normal (t = 0)
+ if (cp >= 0xE0000 and cp <= 0xE0FFF) continue; // ziglyph ignores these with 0, but many are unassigned (t = 1)
+ if (cp == 0x18CFF) continue; // east asian width is wide (t = 2)
+ if (cp >= 0x1D300 and cp <= 0x1D376) continue; // east asian width is wide (t = 2)
+ if (cp == 0x1E5EE) continue; // non-spacing mark, despite being east asian width normal (t = 0)
+ if (cp == 0x1E5EF) continue; // non-spacing mark, despite being east asian width normal (t = 0)
+ if (cp == 0x1FA89) continue; // east asian width is wide (t = 2)
+ if (cp == 0x1FA8F) continue; // east asian width is wide (t = 2)
+ if (cp == 0x1FABE) continue; // east asian width is wide (t = 2)
+ if (cp == 0x1FAC6) continue; // east asian width is wide (t = 2)
+ if (cp == 0x1FADC) continue; // east asian width is wide (t = 2)
+ if (cp == 0x1FADF) continue; // east asian width is wide (t = 2)
+ if (cp == 0x1FAE9) continue; // east asian width is wide (t = 2)
+
+ std.log.warn("mismatch cp=U+{x} t={} zg={}", .{ cp, t.width, zg });
+ try testing.expect(false);
+ }
+ }
+}
diff --git a/src/unicode/symbols.zig b/src/unicode/symbols.zig
index 8150d279f..e5c09a7b0 100644
--- a/src/unicode/symbols.zig
+++ b/src/unicode/symbols.zig
@@ -17,37 +17,12 @@ pub const table = table: {
};
};
-/// Returns true of the codepoint is a "symbol-like" character, which
-/// for now we define as anything in a private use area and anything
-/// in several unicode blocks:
-/// - Dingbats
-/// - Emoticons
-/// - Miscellaneous Symbols
-/// - Enclosed Alphanumerics
-/// - Enclosed Alphanumeric Supplement
-/// - Miscellaneous Symbols and Pictographs
-/// - Transport and Map Symbols
-///
-/// In the future it may be prudent to expand this to encompass more
-/// symbol-like characters, and/or exclude some PUA sections.
-pub fn isSymbol(cp: u21) bool {
- // TODO: probably can remove this method and just call uucode directly
- return uucode.getX(.is_symbol, cp);
-}
-
/// Runnable binary to generate the lookup tables and output to stdout.
pub fn main() !void {
var arena_state = std.heap.ArenaAllocator.init(std.heap.page_allocator);
defer arena_state.deinit();
const alloc = arena_state.allocator();
- var args_iter = try std.process.argsWithAllocator(alloc);
- defer args_iter.deinit();
- _ = args_iter.skip(); // Skip program name
-
- const output_path = args_iter.next() orelse std.debug.panic("No output file arg for symbols exe!", .{});
- std.debug.print("Unicode symbols_table output_path = {s}\n", .{output_path});
-
const gen: lut.Generator(
bool,
struct {
@@ -56,7 +31,7 @@ pub fn main() !void {
return if (cp > uucode.config.max_code_point)
false
else
- isSymbol(@intCast(cp));
+ uucode.getX(.is_symbol, @intCast(cp));
}
pub fn eql(ctx: @This(), a: bool, b: bool) bool {
@@ -70,10 +45,7 @@ pub fn main() !void {
defer alloc.free(t.stage1);
defer alloc.free(t.stage2);
defer alloc.free(t.stage3);
- var out_file = try std.fs.cwd().createFile(output_path, .{});
- defer out_file.close();
- const writer = out_file.writer();
- try t.writeZig(writer);
+ try t.writeZig(std.io.getStdOut().writer());
// Uncomment when manually debugging to see our table sizes.
// std.log.warn("stage1={} stage2={} stage3={}", .{
@@ -83,8 +55,6 @@ pub fn main() !void {
// });
}
-// This is not very fast in debug modes, so its commented by default.
-// IMPORTANT: UNCOMMENT THIS WHENEVER MAKING CHANGES.
test "unicode symbols: tables match uucode" {
if (std.valgrind.runningOnValgrind() > 0) return error.SkipZigTest;
@@ -95,7 +65,7 @@ test "unicode symbols: tables match uucode" {
const uu = if (cp > uucode.config.max_code_point)
false
else
- isSymbol(@intCast(cp));
+ uucode.getX(.is_symbol, @intCast(cp));
if (t != uu) {
std.log.warn("mismatch cp=U+{x} t={} uu={}", .{ cp, t, uu });
@@ -103,3 +73,28 @@ test "unicode symbols: tables match uucode" {
}
}
}
+
+test "unicode symbols: tables match ziglyph" {
+ if (std.valgrind.runningOnValgrind() > 0) return error.SkipZigTest;
+
+ const ziglyph = @import("ziglyph");
+ const testing = std.testing;
+
+ for (0..std.math.maxInt(u21)) |cp_usize| {
+ const cp: u21 = @intCast(cp_usize);
+ const t = table.get(cp);
+ const zg = ziglyph.general_category.isPrivateUse(cp) or
+ ziglyph.blocks.isDingbats(cp) or
+ ziglyph.blocks.isEmoticons(cp) or
+ ziglyph.blocks.isMiscellaneousSymbols(cp) or
+ ziglyph.blocks.isEnclosedAlphanumerics(cp) or
+ ziglyph.blocks.isEnclosedAlphanumericSupplement(cp) or
+ ziglyph.blocks.isMiscellaneousSymbolsAndPictographs(cp) or
+ ziglyph.blocks.isTransportAndMapSymbols(cp);
+
+ if (t != zg) {
+ std.log.warn("mismatch cp=U+{x} t={} zg={}", .{ cp, t, zg });
+ try testing.expect(false);
+ }
+ }
+}