summaryrefslogtreecommitdiff
path: root/src/datastruct/cache_table.zig
diff options
context:
space:
mode:
authorMitchell Hashimoto <m@mitchellh.com>2024-11-07 14:38:54 -0800
committerMitchell Hashimoto <m@mitchellh.com>2024-11-07 14:39:10 -0800
commita436bd0af62a4bdc5af14774b955f7b46ccd9deb (patch)
tree561636d47031fd8a33e5a2be1ee2fcee1858123c /src/datastruct/cache_table.zig
parentaed51fd0b00018eeea71ba1afcb48d18792ae46c (diff)
move datastructures to dedicated "datastruct" package
Diffstat (limited to 'src/datastruct/cache_table.zig')
-rw-r--r--src/datastruct/cache_table.zig181
1 files changed, 181 insertions, 0 deletions
diff --git a/src/datastruct/cache_table.zig b/src/datastruct/cache_table.zig
new file mode 100644
index 000000000..40d36cc24
--- /dev/null
+++ b/src/datastruct/cache_table.zig
@@ -0,0 +1,181 @@
+const fastmem = @import("../fastmem.zig");
+
+const std = @import("std");
+const assert = std.debug.assert;
+
+/// An associative data structure used for efficiently storing and
+/// retrieving values which are able to be recomputed if necessary.
+///
+/// This structure is effectively a hash table with fixed-sized buckets.
+///
+/// When inserting an item in to a full bucket, the least recently used
+/// item is replaced.
+///
+/// To achieve this, when an item is accessed, it's moved to the end of
+/// the bucket, and the rest of the items are moved over to fill the gap.
+///
+/// This should provide very good query performance and keep frequently
+/// accessed items cached indefinitely.
+///
+/// Parameters:
+///
+/// `Context`
+/// A type containing methods to define CacheTable behaviors.
+/// - `fn hash(*Context, K) u64` - Return a hash for a key.
+/// - `fn eql(*Context, K, K) bool` - Check two keys for equality.
+///
+/// - `fn evicted(*Context, K, V) void` - [OPTIONAL] Eviction callback.
+/// If present, called whenever an item is evicted from the cache.
+///
+/// `bucket_count`
+/// Should ideally be close to the median number of important items that
+/// you expect to be cached at any given point. This is required to be a
+/// power of 2 since performance suffers if it's not and there's no good
+/// reason to allow it to be anything else.
+///
+/// `bucket_size`
+/// should be larger if you expect a large number of unimportant items to
+/// enter the cache at a time. Having larger buckets will avoid important
+/// items being dropped from the cache prematurely.
+///
+pub fn CacheTable(
+ comptime K: type,
+ comptime V: type,
+ comptime Context: type,
+ comptime bucket_count: usize,
+ comptime bucket_size: u8,
+) type {
+ return struct {
+ const Self = @This();
+
+ const KV = struct {
+ key: K,
+ value: V,
+ };
+
+ comptime {
+ assert(std.math.isPowerOfTwo(bucket_count));
+ assert(bucket_count <= std.math.maxInt(usize));
+ }
+
+ /// `bucket_count` buckets containing `bucket_size` KV pairs each.
+ ///
+ /// We don't need to initialize this memory because we don't use it
+ /// unless it's within a bucket's stored length, which will guarantee
+ /// that we put actual items there.
+ buckets: [bucket_count][bucket_size]KV = undefined,
+
+ /// We use this array to keep track of how many slots in each bucket
+ /// have actual items in them. Once all the buckets fill up this will
+ /// become a pointless check, but hopefully branch prediction picks
+ /// up on it at that point. The memory cost isn't too bad since it's
+ /// just bytes, so should be a fraction the size of the main table.
+ lengths: [bucket_count]u8 = [_]u8{0} ** bucket_count,
+
+ /// An instance of the context structure.
+ /// Must be initialized before calling any operations.
+ context: Context,
+
+ /// Adds an item to the cache table. If an old value was removed to
+ /// make room then it is returned in a struct with its key and value.
+ pub fn put(self: *Self, key: K, value: V) ?KV {
+ const kv: KV = .{ .key = key, .value = value };
+ const idx: usize = @intCast(self.context.hash(key) % bucket_count);
+
+ // If we have space available in the bucket then we just append
+ if (self.lengths[idx] < bucket_size) {
+ self.buckets[idx][self.lengths[idx]] = kv;
+ self.lengths[idx] += 1;
+ return null;
+ }
+ assert(self.lengths[idx] == bucket_size);
+
+ // Append our new item and return the oldest
+ const evicted = fastmem.rotateIn(KV, &self.buckets[idx], kv);
+
+ // The Context is allowed to register an eviction hook.
+ if (comptime @hasDecl(Context, "evicted")) self.context.evicted(
+ evicted.key,
+ evicted.value,
+ );
+
+ return evicted;
+ }
+
+ /// Retrieves an item from the cache table.
+ ///
+ /// Returns null if no item is found with the provided key.
+ pub fn get(self: *Self, key: K) ?V {
+ const idx: usize = @intCast(self.context.hash(key) % bucket_count);
+ const len = self.lengths[idx];
+ var i: usize = len;
+ while (i > 0) {
+ i -= 1;
+ if (self.context.eql(key, self.buckets[idx][i].key)) {
+ defer fastmem.rotateOnce(KV, self.buckets[idx][i..len]);
+ return self.buckets[idx][i].value;
+ }
+ }
+
+ return null;
+ }
+
+ /// Removes all items from the cache table.
+ ///
+ /// If your `Context` has an `evicted` method,
+ /// it will be called with all removed items.
+ pub fn clear(self: *Self) void {
+ if (comptime @hasDecl(Context, "evicted")) {
+ for (self.buckets, self.lengths) |b, l| {
+ for (b[0..l]) |kv| {
+ self.context.evicted(kv.key, kv.value);
+ }
+ }
+ }
+
+ @memset(&self.lengths, 0);
+ }
+ };
+}
+
+/// Creates a Context automatically for the given key type. This uses the
+/// same logic as std.hash_map.AutoContext today since the API matches.
+fn AutoContext(comptime K: type) type {
+ return std.hash_map.AutoContext(K);
+}
+
+test CacheTable {
+ const testing = std.testing;
+
+ // Construct a table that purposely has a predictable hash so we can
+ // test all edge cases.
+ const T = CacheTable(u32, u32, struct {
+ pub fn hash(self: *const @This(), key: u32) u64 {
+ _ = self;
+ return @intCast(key);
+ }
+
+ pub fn eql(self: *const @This(), a: u32, b: u32) bool {
+ _ = self;
+ return a == b;
+ }
+ }, 2, 2);
+ var t: T = .{ .context = .{} };
+
+ // Fill the table
+ try testing.expect(t.put(0, 0) == null);
+ try testing.expect(t.put(1, 0) == null);
+ try testing.expect(t.put(2, 0) == null);
+ try testing.expect(t.put(3, 0) == null);
+
+ // It should now be full, so any insert should evict the oldest item.
+ // NOTE: For the sake of this test, we're assuming that the first item
+ // is evicted but we don't need to promise this.
+ try testing.expectEqual(T.KV{
+ .key = 0,
+ .value = 0,
+ }, t.put(4, 0).?);
+
+ // The first item should now be gone
+ try testing.expect(t.get(0) == null);
+}