summaryrefslogtreecommitdiff
path: root/src/unicode/grapheme.zig
diff options
context:
space:
mode:
authorMitchell Hashimoto <mitchell.hashimoto@gmail.com>2024-02-09 20:29:36 -0800
committerMitchell Hashimoto <mitchell.hashimoto@gmail.com>2024-02-09 20:29:36 -0800
commit132fbb3a4695b09d8674914e8d68a660fb28df6d (patch)
treeca2b57d7e429d84953dc962ae16d815157b483ce /src/unicode/grapheme.zig
parentc47ad97f62ca1f5e6132d46839b7cda999af461b (diff)
unicode: use packed struct for break state
Diffstat (limited to 'src/unicode/grapheme.zig')
-rw-r--r--src/unicode/grapheme.zig55
1 files changed, 17 insertions, 38 deletions
diff --git a/src/unicode/grapheme.zig b/src/unicode/grapheme.zig
index 19437844c..d4c146e49 100644
--- a/src/unicode/grapheme.zig
+++ b/src/unicode/grapheme.zig
@@ -18,19 +18,27 @@ const table = props.table;
/// line feeds, and carriage returns are expected to be filtered out before
/// calling this function. This is because this function is tuned for
/// Ghostty.
-pub fn graphemeBreak(cp1: u21, cp2: u21, state: *u3) bool {
+pub fn graphemeBreak(cp1: u21, cp2: u21, state: *BreakState) bool {
const gbc1 = table.get(cp1).grapheme_boundary_class;
const gbc2 = table.get(cp2).grapheme_boundary_class;
return graphemeBreakClass(gbc1, gbc2, state);
}
+/// The state that must be maintained between calls to `graphemeBreak`.
+pub const BreakState = packed struct(u2) {
+ extended_pictographic: bool = false,
+ regional_indicator: bool = false,
+};
+
fn graphemeBreakClass(
gbc1: GraphemeBoundaryClass,
gbc2: GraphemeBoundaryClass,
- state: *u3,
+ state: *BreakState,
) bool {
// GB11: Emoji Extend* ZWJ x Emoji
- if (!hasXpic(state) and gbc1 == .extended_pictographic) setXpic(state);
+ if (!state.extended_pictographic and gbc1 == .extended_pictographic) {
+ state.extended_pictographic = true;
+ }
// These two properties are ignored because they're not relevant to
// Ghostty -- they're filtered out before checking grapheme boundaries.
@@ -67,56 +75,27 @@ fn graphemeBreakClass(
// GB12, GB13: RI x RI
if (gbc1 == .regional_indicator and gbc2 == .regional_indicator) {
- if (hasRegional(state)) {
- unsetRegional(state);
+ if (state.regional_indicator) {
+ state.regional_indicator = false;
return true;
} else {
- setRegional(state);
+ state.regional_indicator = true;
return false;
}
}
// GB11: Emoji Extend* ZWJ x Emoji
- if (hasXpic(state) and
+ if (state.extended_pictographic and
gbc1 == .zwj and
gbc2 == .extended_pictographic)
{
- unsetXpic(state);
+ state.extended_pictographic = false;
return false;
}
return true;
}
-const State = packed struct(u2) {
- extended_pictographic: bool = false,
- regional_indicator: bool = false,
-};
-
-fn hasXpic(state: *const u3) bool {
- return state.* & 1 == 1;
-}
-
-fn setXpic(state: *u3) void {
- state.* |= 1;
-}
-
-fn unsetXpic(state: *u3) void {
- state.* ^= 1;
-}
-
-fn hasRegional(state: *const u3) bool {
- return state.* & 2 == 2;
-}
-
-fn setRegional(state: *u3) void {
- state.* |= 2;
-}
-
-fn unsetRegional(state: *u3) void {
- state.* ^= 2;
-}
-
/// If you build this file as a binary, we will verify the grapheme break
/// implementation. This iterates over billions of codepoints so it is
/// SLOW. It's not meant to be run in CI, but it's useful for debugging.
@@ -127,7 +106,7 @@ pub fn main() !void {
const min = 0;
const max = std.math.maxInt(u21) + 1;
- var state: u3 = 0;
+ var state: BreakState = .{};
var zg_state: u3 = 0;
for (min..max) |cp1| {
if (cp1 % 1000 == 0) std.log.warn("progress cp1={}", .{cp1});