summaryrefslogtreecommitdiff
path: root/src/unicode/Properties.zig
blob: c8c4a581c5b6c4e5b1b74ede99346ea31213054a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
//! Property set per codepoint that Ghostty cares about.
//!
//! Adding to this lets you find new properties but also potentially makes
//! our lookup tables less efficient. Any changes to this should run the
//! benchmarks in src/bench to verify that we haven't regressed.
const Properties = @This();

const std = @import("std");

/// Codepoint width. We clamp to [0, 2] since Ghostty handles control
/// characters and we max out at 2 for wide characters (i.e. 3-em dash
/// becomes a 2-em dash).
width: u2 = 0,

/// Grapheme boundary class.
grapheme_boundary_class: GraphemeBoundaryClass = .invalid,

// Needed for lut.Generator
pub fn eql(a: Properties, b: Properties) bool {
    return a.width == b.width and
        a.grapheme_boundary_class == b.grapheme_boundary_class;
}

// Needed for lut.Generator
pub fn format(
    self: Properties,
    writer: *std.Io.Writer,
) !void {
    try writer.print(
        \\.{{
        \\    .width= {},
        \\    .grapheme_boundary_class= .{s},
        \\}}
    , .{
        self.width,
        @tagName(self.grapheme_boundary_class),
    });
}

/// Possible grapheme boundary classes. This isn't an exhaustive list:
/// we omit control, CR, LF, etc. because in Ghostty's usage that are
/// impossible because they're handled by the terminal.
pub const GraphemeBoundaryClass = enum(u4) {
    invalid,
    L,
    V,
    T,
    LV,
    LVT,
    prepend,
    extend,
    zwj,
    spacing_mark,
    regional_indicator,
    extended_pictographic,
    extended_pictographic_base, // \p{Extended_Pictographic} & \p{Emoji_Modifier_Base}
    emoji_modifier, // \p{Emoji_Modifier}

    /// Returns true if this is an extended pictographic type. This
    /// should be used instead of comparing the enum value directly
    /// because we classify multiple.
    pub fn isExtendedPictographic(self: GraphemeBoundaryClass) bool {
        return switch (self) {
            .extended_pictographic,
            .extended_pictographic_base,
            => true,

            else => false,
        };
    }
};