diff options
| author | Mitchell Hashimoto <m@mitchellh.com> | 2024-11-07 14:38:54 -0800 |
|---|---|---|
| committer | Mitchell Hashimoto <m@mitchellh.com> | 2024-11-07 14:39:10 -0800 |
| commit | a436bd0af62a4bdc5af14774b955f7b46ccd9deb (patch) | |
| tree | 561636d47031fd8a33e5a2be1ee2fcee1858123c /src/datastruct/cache_table.zig | |
| parent | aed51fd0b00018eeea71ba1afcb48d18792ae46c (diff) | |
move datastructures to dedicated "datastruct" package
Diffstat (limited to 'src/datastruct/cache_table.zig')
| -rw-r--r-- | src/datastruct/cache_table.zig | 181 |
1 files changed, 181 insertions, 0 deletions
diff --git a/src/datastruct/cache_table.zig b/src/datastruct/cache_table.zig new file mode 100644 index 000000000..40d36cc24 --- /dev/null +++ b/src/datastruct/cache_table.zig @@ -0,0 +1,181 @@ +const fastmem = @import("../fastmem.zig"); + +const std = @import("std"); +const assert = std.debug.assert; + +/// An associative data structure used for efficiently storing and +/// retrieving values which are able to be recomputed if necessary. +/// +/// This structure is effectively a hash table with fixed-sized buckets. +/// +/// When inserting an item in to a full bucket, the least recently used +/// item is replaced. +/// +/// To achieve this, when an item is accessed, it's moved to the end of +/// the bucket, and the rest of the items are moved over to fill the gap. +/// +/// This should provide very good query performance and keep frequently +/// accessed items cached indefinitely. +/// +/// Parameters: +/// +/// `Context` +/// A type containing methods to define CacheTable behaviors. +/// - `fn hash(*Context, K) u64` - Return a hash for a key. +/// - `fn eql(*Context, K, K) bool` - Check two keys for equality. +/// +/// - `fn evicted(*Context, K, V) void` - [OPTIONAL] Eviction callback. +/// If present, called whenever an item is evicted from the cache. +/// +/// `bucket_count` +/// Should ideally be close to the median number of important items that +/// you expect to be cached at any given point. This is required to be a +/// power of 2 since performance suffers if it's not and there's no good +/// reason to allow it to be anything else. +/// +/// `bucket_size` +/// should be larger if you expect a large number of unimportant items to +/// enter the cache at a time. Having larger buckets will avoid important +/// items being dropped from the cache prematurely. +/// +pub fn CacheTable( + comptime K: type, + comptime V: type, + comptime Context: type, + comptime bucket_count: usize, + comptime bucket_size: u8, +) type { + return struct { + const Self = @This(); + + const KV = struct { + key: K, + value: V, + }; + + comptime { + assert(std.math.isPowerOfTwo(bucket_count)); + assert(bucket_count <= std.math.maxInt(usize)); + } + + /// `bucket_count` buckets containing `bucket_size` KV pairs each. + /// + /// We don't need to initialize this memory because we don't use it + /// unless it's within a bucket's stored length, which will guarantee + /// that we put actual items there. + buckets: [bucket_count][bucket_size]KV = undefined, + + /// We use this array to keep track of how many slots in each bucket + /// have actual items in them. Once all the buckets fill up this will + /// become a pointless check, but hopefully branch prediction picks + /// up on it at that point. The memory cost isn't too bad since it's + /// just bytes, so should be a fraction the size of the main table. + lengths: [bucket_count]u8 = [_]u8{0} ** bucket_count, + + /// An instance of the context structure. + /// Must be initialized before calling any operations. + context: Context, + + /// Adds an item to the cache table. If an old value was removed to + /// make room then it is returned in a struct with its key and value. + pub fn put(self: *Self, key: K, value: V) ?KV { + const kv: KV = .{ .key = key, .value = value }; + const idx: usize = @intCast(self.context.hash(key) % bucket_count); + + // If we have space available in the bucket then we just append + if (self.lengths[idx] < bucket_size) { + self.buckets[idx][self.lengths[idx]] = kv; + self.lengths[idx] += 1; + return null; + } + assert(self.lengths[idx] == bucket_size); + + // Append our new item and return the oldest + const evicted = fastmem.rotateIn(KV, &self.buckets[idx], kv); + + // The Context is allowed to register an eviction hook. + if (comptime @hasDecl(Context, "evicted")) self.context.evicted( + evicted.key, + evicted.value, + ); + + return evicted; + } + + /// Retrieves an item from the cache table. + /// + /// Returns null if no item is found with the provided key. + pub fn get(self: *Self, key: K) ?V { + const idx: usize = @intCast(self.context.hash(key) % bucket_count); + const len = self.lengths[idx]; + var i: usize = len; + while (i > 0) { + i -= 1; + if (self.context.eql(key, self.buckets[idx][i].key)) { + defer fastmem.rotateOnce(KV, self.buckets[idx][i..len]); + return self.buckets[idx][i].value; + } + } + + return null; + } + + /// Removes all items from the cache table. + /// + /// If your `Context` has an `evicted` method, + /// it will be called with all removed items. + pub fn clear(self: *Self) void { + if (comptime @hasDecl(Context, "evicted")) { + for (self.buckets, self.lengths) |b, l| { + for (b[0..l]) |kv| { + self.context.evicted(kv.key, kv.value); + } + } + } + + @memset(&self.lengths, 0); + } + }; +} + +/// Creates a Context automatically for the given key type. This uses the +/// same logic as std.hash_map.AutoContext today since the API matches. +fn AutoContext(comptime K: type) type { + return std.hash_map.AutoContext(K); +} + +test CacheTable { + const testing = std.testing; + + // Construct a table that purposely has a predictable hash so we can + // test all edge cases. + const T = CacheTable(u32, u32, struct { + pub fn hash(self: *const @This(), key: u32) u64 { + _ = self; + return @intCast(key); + } + + pub fn eql(self: *const @This(), a: u32, b: u32) bool { + _ = self; + return a == b; + } + }, 2, 2); + var t: T = .{ .context = .{} }; + + // Fill the table + try testing.expect(t.put(0, 0) == null); + try testing.expect(t.put(1, 0) == null); + try testing.expect(t.put(2, 0) == null); + try testing.expect(t.put(3, 0) == null); + + // It should now be full, so any insert should evict the oldest item. + // NOTE: For the sake of this test, we're assuming that the first item + // is evicted but we don't need to promise this. + try testing.expectEqual(T.KV{ + .key = 0, + .value = 0, + }, t.put(4, 0).?); + + // The first item should now be gone + try testing.expect(t.get(0) == null); +} |
