diff options
| author | Mitchell Hashimoto <mitchell.hashimoto@gmail.com> | 2024-02-09 20:29:36 -0800 |
|---|---|---|
| committer | Mitchell Hashimoto <mitchell.hashimoto@gmail.com> | 2024-02-09 20:29:36 -0800 |
| commit | 132fbb3a4695b09d8674914e8d68a660fb28df6d (patch) | |
| tree | ca2b57d7e429d84953dc962ae16d815157b483ce /src/unicode/grapheme.zig | |
| parent | c47ad97f62ca1f5e6132d46839b7cda999af461b (diff) | |
unicode: use packed struct for break state
Diffstat (limited to 'src/unicode/grapheme.zig')
| -rw-r--r-- | src/unicode/grapheme.zig | 55 |
1 files changed, 17 insertions, 38 deletions
diff --git a/src/unicode/grapheme.zig b/src/unicode/grapheme.zig index 19437844c..d4c146e49 100644 --- a/src/unicode/grapheme.zig +++ b/src/unicode/grapheme.zig @@ -18,19 +18,27 @@ const table = props.table; /// line feeds, and carriage returns are expected to be filtered out before /// calling this function. This is because this function is tuned for /// Ghostty. -pub fn graphemeBreak(cp1: u21, cp2: u21, state: *u3) bool { +pub fn graphemeBreak(cp1: u21, cp2: u21, state: *BreakState) bool { const gbc1 = table.get(cp1).grapheme_boundary_class; const gbc2 = table.get(cp2).grapheme_boundary_class; return graphemeBreakClass(gbc1, gbc2, state); } +/// The state that must be maintained between calls to `graphemeBreak`. +pub const BreakState = packed struct(u2) { + extended_pictographic: bool = false, + regional_indicator: bool = false, +}; + fn graphemeBreakClass( gbc1: GraphemeBoundaryClass, gbc2: GraphemeBoundaryClass, - state: *u3, + state: *BreakState, ) bool { // GB11: Emoji Extend* ZWJ x Emoji - if (!hasXpic(state) and gbc1 == .extended_pictographic) setXpic(state); + if (!state.extended_pictographic and gbc1 == .extended_pictographic) { + state.extended_pictographic = true; + } // These two properties are ignored because they're not relevant to // Ghostty -- they're filtered out before checking grapheme boundaries. @@ -67,56 +75,27 @@ fn graphemeBreakClass( // GB12, GB13: RI x RI if (gbc1 == .regional_indicator and gbc2 == .regional_indicator) { - if (hasRegional(state)) { - unsetRegional(state); + if (state.regional_indicator) { + state.regional_indicator = false; return true; } else { - setRegional(state); + state.regional_indicator = true; return false; } } // GB11: Emoji Extend* ZWJ x Emoji - if (hasXpic(state) and + if (state.extended_pictographic and gbc1 == .zwj and gbc2 == .extended_pictographic) { - unsetXpic(state); + state.extended_pictographic = false; return false; } return true; } -const State = packed struct(u2) { - extended_pictographic: bool = false, - regional_indicator: bool = false, -}; - -fn hasXpic(state: *const u3) bool { - return state.* & 1 == 1; -} - -fn setXpic(state: *u3) void { - state.* |= 1; -} - -fn unsetXpic(state: *u3) void { - state.* ^= 1; -} - -fn hasRegional(state: *const u3) bool { - return state.* & 2 == 2; -} - -fn setRegional(state: *u3) void { - state.* |= 2; -} - -fn unsetRegional(state: *u3) void { - state.* ^= 2; -} - /// If you build this file as a binary, we will verify the grapheme break /// implementation. This iterates over billions of codepoints so it is /// SLOW. It's not meant to be run in CI, but it's useful for debugging. @@ -127,7 +106,7 @@ pub fn main() !void { const min = 0; const max = std.math.maxInt(u21) + 1; - var state: u3 = 0; + var state: BreakState = .{}; var zg_state: u3 = 0; for (min..max) |cp1| { if (cp1 % 1000 == 0) std.log.warn("progress cp1={}", .{cp1}); |
