summaryrefslogtreecommitdiff
path: root/src/font
diff options
context:
space:
mode:
authorMitchell Hashimoto <mitchell.hashimoto@gmail.com>2024-04-02 14:59:54 -0700
committerMitchell Hashimoto <mitchell.hashimoto@gmail.com>2024-04-05 09:29:41 -0700
commit4eccd42f6b281e06038ac3d412586a239d27209e (patch)
tree039b0922966aab4fd98a7ef362102329c734120b /src/font
parentbd479db09f2929ea3e0cb02803ecc18b03513b91 (diff)
font: CodepointResolver beginnings
Diffstat (limited to 'src/font')
-rw-r--r--src/font/CodepointResolver.zig387
-rw-r--r--src/font/Collection.zig26
-rw-r--r--src/font/main.zig2
3 files changed, 413 insertions, 2 deletions
diff --git a/src/font/CodepointResolver.zig b/src/font/CodepointResolver.zig
new file mode 100644
index 000000000..4efeafbbc
--- /dev/null
+++ b/src/font/CodepointResolver.zig
@@ -0,0 +1,387 @@
+//! CodepointResolver maps a codepoint to a font. It is more dynamic
+//! than "Collection" since it supports mapping codepoint ranges to
+//! specific fonts, searching for fallback fonts, and more.
+//!
+//! To initialize the codepoint resolver, manually initialize using
+//! Zig initialization syntax: .{}-style. Set the fields you want set,
+//! and begin using the resolver.
+//!
+//! Deinit must still be called on the resolver to free any memory
+//! allocated during use. All functions that take allocators should use
+//! the same allocator.
+const CodepointResolver = @This();
+
+const std = @import("std");
+const Allocator = std.mem.Allocator;
+const ziglyph = @import("ziglyph");
+const font = @import("main.zig");
+const CodepointMap = font.CodepointMap;
+const Collection = font.Collection;
+const Discover = font.Discover;
+const DiscoveryDescriptor = font.discovery.Descriptor;
+const Face = font.Face;
+const Library = font.Library;
+const Presentation = font.Presentation;
+const SpriteFace = font.SpriteFace;
+const Style = font.Style;
+
+const log = std.log.scoped(.font_codepoint_resolver);
+
+/// The underlying collection of fonts. This will be modified as
+/// new fonts are found via the resolver. The resolver takes ownership
+/// of the collection and will deinit it when it is deinitialized.
+collection: Collection,
+
+/// The set of statuses and whether they're enabled or not. This defaults
+/// to true. This can be changed at runtime with no ill effect.
+styles: StyleStatus = StyleStatus.initFill(true),
+
+/// If discovery is available, we'll look up fonts where we can't find
+/// the codepoint. This can be set after initialization.
+discover: ?*Discover = null,
+
+/// A map of codepoints to font requests for codepoint-level overrides.
+/// The memory associated with the map is owned by the caller and is not
+/// modified or freed by Group.
+codepoint_map: ?CodepointMap = null,
+
+/// The descriptor cache is used to cache the descriptor to font face
+/// mapping for codepoint maps.
+descriptor_cache: DescriptorCache = .{},
+
+/// Set this to a non-null value to enable sprite glyph drawing. If this
+/// isn't enabled we'll just fall through to trying to use regular fonts
+/// to render sprite glyphs. But more than likely, if this isn't set then
+/// terminal rendering will look wrong.
+sprite: ?SpriteFace = null,
+
+pub fn deinit(self: *CodepointResolver, alloc: Allocator) void {
+ self.collection.deinit(alloc);
+ self.descriptor_cache.deinit(alloc);
+}
+
+/// Looks up the font that should be used for a specific codepoint.
+/// The font index is valid as long as font faces aren't removed. This
+/// isn't cached; it is expected that downstream users handle caching if
+/// that is important.
+///
+/// Optionally, a presentation format can be specified. This presentation
+/// format will be preferred but if it can't be found in this format,
+/// any format will be accepted. If presentation is null, the UCD
+/// (Unicode Character Database) will be used to determine the default
+/// presentation for the codepoint.
+/// a code point.
+///
+/// This logic is relatively complex so the exact algorithm is documented
+/// here. If this gets out of sync with the code, ask questions.
+///
+/// 1. If a font style is requested that is disabled (i.e. bold),
+/// we start over with the regular font style. The regular font style
+/// cannot be disabled, but it can be replaced with a stylized font
+/// face.
+///
+/// 2. If there is a codepoint override for the codepoint, we satisfy
+/// that requirement if we can, no matter what style or presentation.
+///
+/// 3. If this is a sprite codepoint (such as an underline), then the
+/// sprite font always is the result.
+///
+/// 4. If the exact style and presentation request can be satisfied by
+/// one of our loaded fonts, we return that value. We search loaded
+/// fonts in the order they're added to the group, so the caller must
+/// set the priority order.
+///
+/// 5. If the style isn't regular, we restart this process at this point
+/// but with the regular style. This lets us fall back to regular with
+/// our loaded fonts before trying a fallback. We'd rather show a regular
+/// version of a codepoint from a loaded font than find a new font in
+/// the correct style because styles in other fonts often change
+/// metrics like glyph widths.
+///
+/// 6. If the style is regular, and font discovery is enabled, we look
+/// for a fallback font to satisfy our request.
+///
+/// 7. Finally, as a last resort, we fall back to restarting this whole
+/// process with a regular font face satisfying ANY presentation for
+/// the codepoint. If this fails, we return null.
+///
+pub fn getIndex(
+ self: *CodepointResolver,
+ alloc: Allocator,
+ cp: u32,
+ style: Style,
+ p: ?Presentation,
+) ?Collection.Index {
+ // If we've disabled a font style, then fall back to regular.
+ if (style != .regular and !self.styles.get(style)) {
+ return self.getIndex(alloc, cp, .regular, p);
+ }
+
+ // Codepoint overrides.
+ if (self.indexForCodepointOverride(alloc, cp)) |idx_| {
+ if (idx_) |idx| return idx;
+ } else |err| {
+ log.warn("codepoint override failed codepoint={} err={}", .{ cp, err });
+ }
+
+ // If we have sprite drawing enabled, check if our sprite face can
+ // handle this.
+ if (self.sprite) |sprite| {
+ if (sprite.hasCodepoint(cp, p)) {
+ return Collection.Index.initSpecial(.sprite);
+ }
+ }
+
+ // Build our presentation mode. If we don't have an explicit presentation
+ // given then we use the UCD (Unicode Character Database) to determine
+ // the default presentation. Note there is some inefficiency here because
+ // we'll do this muliple times if we recurse, but this is a cached function
+ // call higher up (GroupCache) so this should be rare.
+ const p_mode: Collection.PresentationMode = if (p) |v| .{ .explicit = v } else .{
+ .default = if (ziglyph.emoji.isEmojiPresentation(@intCast(cp)))
+ .emoji
+ else
+ .text,
+ };
+
+ // If we can find the exact value, then return that.
+ if (self.collection.getIndex(cp, style, p_mode)) |value| return value;
+
+ // If we're not a regular font style, try looking for a regular font
+ // that will satisfy this request. Blindly looking for unmatched styled
+ // fonts to satisfy one codepoint results in some ugly rendering.
+ if (style != .regular) {
+ if (self.getIndex(alloc, cp, .regular, p)) |value| return value;
+ }
+
+ // If we are regular, try looking for a fallback using discovery.
+ if (style == .regular and font.Discover != void) {
+ log.debug("searching for a fallback font for cp={X}", .{cp});
+ if (self.discover) |disco| discover: {
+ const load_opts = self.collection.load_options orelse
+ break :discover;
+ var disco_it = disco.discover(alloc, .{
+ .codepoint = cp,
+ .size = load_opts.size.points,
+ .bold = style == .bold or style == .bold_italic,
+ .italic = style == .italic or style == .bold_italic,
+ .monospace = false,
+ }) catch break :discover;
+ defer disco_it.deinit();
+
+ while (true) {
+ var deferred_face = (disco_it.next() catch |err| {
+ log.warn("fallback search failed with error err={}", .{err});
+ break;
+ }) orelse break;
+
+ // Discovery is supposed to only return faces that have our
+ // codepoint but we can't search presentation in discovery so
+ // we have to check it here.
+ const face: Collection.Entry = .{ .fallback_deferred = deferred_face };
+ if (!face.hasCodepoint(cp, p_mode)) {
+ deferred_face.deinit();
+ continue;
+ }
+
+ var buf: [256]u8 = undefined;
+ log.info("found codepoint 0x{X} in fallback face={s}", .{
+ cp,
+ deferred_face.name(&buf) catch "<error>",
+ });
+ return self.collection.add(alloc, style, face) catch {
+ deferred_face.deinit();
+ break :discover;
+ };
+ }
+
+ log.debug("no fallback face found for cp={X}", .{cp});
+ }
+ }
+
+ // If this is already regular, we're done falling back.
+ if (style == .regular and p == null) return null;
+
+ // For non-regular fonts, we fall back to regular with any presentation
+ return self.collection.getIndex(cp, .regular, .{ .any = {} });
+}
+
+/// Checks if the codepoint is in the map of codepoint overrides,
+/// finds the override font, and returns it.
+fn indexForCodepointOverride(
+ self: *CodepointResolver,
+ alloc: Allocator,
+ cp: u32,
+) !?Collection.Index {
+ // If discovery is disabled then we can't do codepoint overrides
+ // since the override is based on discovery to find the font.
+ if (comptime font.Discover == void) return null;
+
+ // Get our codepoint map. If we have no map set then we have no
+ // codepoint overrides and we're done.
+ const map = self.codepoint_map orelse return null;
+
+ // If we have a codepoint too large or isn't in the map, then we
+ // don't have an override. The map returns a descriptor that can be
+ // used with font discovery to search for a matching font.
+ const cp_u21 = std.math.cast(u21, cp) orelse return null;
+ const desc = map.get(cp_u21) orelse return null;
+
+ // Fast path: the descriptor is already loaded. This means that we
+ // already did the search before and we have an exact font for this
+ // codepoint.
+ const idx_: ?Collection.Index = self.descriptor_cache.get(desc) orelse idx: {
+ // Slow path: we have to find this descriptor and load the font
+ const discover = self.discover orelse return null;
+ var disco_it = try discover.discover(alloc, desc);
+ defer disco_it.deinit();
+
+ const face = (try disco_it.next()) orelse {
+ log.warn(
+ "font lookup for codepoint map failed codepoint={} err=FontNotFound",
+ .{cp},
+ );
+
+ // Add null to the cache so we don't do a lookup again later.
+ try self.descriptor_cache.put(alloc, desc, null);
+ return null;
+ };
+
+ // Add the font to our list of fonts so we can get an index for it,
+ // and ensure the index is stored in the descriptor cache for next time.
+ const idx = try self.collection.add(
+ alloc,
+ .regular,
+ .{ .deferred = face },
+ );
+ try self.descriptor_cache.put(alloc, desc, idx);
+
+ break :idx idx;
+ };
+
+ // The descriptor cache will populate null if the descriptor is not found
+ // to avoid expensive discoveries later, so if it is null then we already
+ // searched and found nothing.
+ const idx = idx_ orelse return null;
+
+ // We need to verify that this index has the codepoint we want.
+ if (self.collection.hasCodepoint(idx, cp, null)) {
+ log.debug("codepoint override based on config codepoint={} family={s}", .{
+ cp,
+ desc.family orelse "",
+ });
+
+ return idx;
+ }
+
+ return null;
+}
+
+/// Packed array of booleans to indicate if a style is enabled or not.
+pub const StyleStatus = std.EnumArray(Style, bool);
+
+/// Map of descriptors to faces. This is used with manual codepoint maps
+/// to ensure that we don't load the same font multiple times.
+///
+/// Note that the current implementation will load the same font multiple
+/// times if the font used for a codepoint map is identical to a font used
+/// for a regular style. That's just an inefficient choice made now because
+/// the implementation is simpler and codepoint maps matching a regular
+/// font is a rare case.
+const DescriptorCache = std.HashMapUnmanaged(
+ DiscoveryDescriptor,
+ ?Collection.Index,
+ struct {
+ const KeyType = DiscoveryDescriptor;
+
+ pub fn hash(ctx: @This(), k: KeyType) u64 {
+ _ = ctx;
+ return k.hashcode();
+ }
+
+ pub fn eql(ctx: @This(), a: KeyType, b: KeyType) bool {
+ // Note that this means its possible to have two different
+ // descriptors match when there is a hash collision so we
+ // should button this up later.
+ return ctx.hash(a) == ctx.hash(b);
+ }
+ },
+ std.hash_map.default_max_load_percentage,
+);
+
+test getIndex {
+ const testing = std.testing;
+ const alloc = testing.allocator;
+ const testFont = @import("test.zig").fontRegular;
+ const testEmoji = @import("test.zig").fontEmoji;
+ const testEmojiText = @import("test.zig").fontEmojiText;
+
+ var lib = try Library.init();
+ defer lib.deinit();
+
+ var c = try Collection.init(alloc);
+ c.load_options = .{ .library = lib };
+
+ _ = try c.add(alloc, .regular, .{ .loaded = try Face.init(
+ lib,
+ testFont,
+ .{ .size = .{ .points = 12, .xdpi = 96, .ydpi = 96 } },
+ ) });
+ if (font.options.backend != .coretext) {
+ // Coretext doesn't support Noto's format
+ _ = try c.add(
+ alloc,
+ .regular,
+ .{ .loaded = try Face.init(
+ lib,
+ testEmoji,
+ .{ .size = .{ .points = 12 } },
+ ) },
+ );
+ }
+ _ = try c.add(
+ alloc,
+ .regular,
+ .{ .loaded = try Face.init(
+ lib,
+ testEmojiText,
+ .{ .size = .{ .points = 12 } },
+ ) },
+ );
+
+ var r: CodepointResolver = .{ .collection = c };
+ defer r.deinit(alloc);
+
+ // Should find all visible ASCII
+ var i: u32 = 32;
+ while (i < 127) : (i += 1) {
+ const idx = r.getIndex(alloc, i, .regular, null).?;
+ try testing.expectEqual(Style.regular, idx.style);
+ try testing.expectEqual(@as(Collection.Index.IndexInt, 0), idx.idx);
+ }
+
+ // Try emoji
+ {
+ const idx = r.getIndex(alloc, '🥸', .regular, null).?;
+ try testing.expectEqual(Style.regular, idx.style);
+ try testing.expectEqual(@as(Collection.Index.IndexInt, 1), idx.idx);
+ }
+
+ // Try text emoji
+ {
+ const idx = r.getIndex(alloc, 0x270C, .regular, .text).?;
+ try testing.expectEqual(Style.regular, idx.style);
+ const text_idx = if (font.options.backend == .coretext) 1 else 2;
+ try testing.expectEqual(@as(Collection.Index.IndexInt, text_idx), idx.idx);
+ }
+ {
+ const idx = r.getIndex(alloc, 0x270C, .regular, .emoji).?;
+ try testing.expectEqual(Style.regular, idx.style);
+ try testing.expectEqual(@as(Collection.Index.IndexInt, 1), idx.idx);
+ }
+
+ // Box glyph should be null since we didn't set a box font
+ {
+ try testing.expect(r.getIndex(alloc, 0x1FB00, .regular, null) == null);
+ }
+}
diff --git a/src/font/Collection.zig b/src/font/Collection.zig
index e3ad969bc..4d623967a 100644
--- a/src/font/Collection.zig
+++ b/src/font/Collection.zig
@@ -5,7 +5,7 @@
//!
//! The purpose of a collection is to store a list of fonts by style
//! and priority order. A collection does not handle searching for font
-//! callbacks, rasterization, etc.
+//! callbacks, rasterization, etc. For this, see CodepointResolver.
//!
//! The collection can contain both loaded and deferred faces. Deferred faces
//! typically use less memory while still providing some necessary information
@@ -152,6 +152,24 @@ pub fn getIndex(
return null;
}
+/// Check if a specific font index has a specific codepoint. This does not
+/// necessarily force the font to load. The presentation value "p" will
+/// verify the Emoji representation matches if it is non-null. If "p" is
+/// null then any presentation will be accepted.
+pub fn hasCodepoint(
+ self: *const Collection,
+ index: Index,
+ cp: u32,
+ p: ?Presentation,
+) bool {
+ const list = self.faces.get(index.style);
+ if (index.idx >= list.items.len) return false;
+ return list.items[index.idx].hasCodepoint(
+ cp,
+ if (p) |v| .{ .explicit = v } else .{ .any = {} },
+ );
+}
+
/// Automatically create an italicized font from the regular
/// font face if we don't have one already. If we already have
/// an italicized font face, this does nothing.
@@ -315,7 +333,11 @@ pub const Entry = union(enum) {
}
/// True if this face satisfies the given codepoint and presentation.
- fn hasCodepoint(self: Entry, cp: u32, p_mode: PresentationMode) bool {
+ pub fn hasCodepoint(
+ self: Entry,
+ cp: u32,
+ p_mode: PresentationMode,
+ ) bool {
return switch (self) {
// Non-fallback fonts require explicit presentation matching but
// otherwise don't care about presentation
diff --git a/src/font/main.zig b/src/font/main.zig
index 91a620053..0798bae05 100644
--- a/src/font/main.zig
+++ b/src/font/main.zig
@@ -6,6 +6,7 @@ pub const Atlas = @import("Atlas.zig");
pub const discovery = @import("discovery.zig");
pub const face = @import("face.zig");
pub const CodepointMap = @import("CodepointMap.zig");
+pub const CodepointResolver = @import("CodepointResolver.zig");
pub const Collection = @import("Collection.zig");
pub const DeferredFace = @import("DeferredFace.zig");
pub const Face = face.Face;
@@ -17,6 +18,7 @@ pub const shape = @import("shape.zig");
pub const Shaper = shape.Shaper;
pub const sprite = @import("sprite.zig");
pub const Sprite = sprite.Sprite;
+pub const SpriteFace = sprite.Face;
pub const Descriptor = discovery.Descriptor;
pub const Discover = discovery.Discover;
pub usingnamespace @import("library.zig");