summaryrefslogtreecommitdiff
path: root/src/cli/CommaSplitter.zig
diff options
context:
space:
mode:
authorJeffrey C. Ollie <jeff@ocjtech.us>2025-09-23 21:53:52 -0500
committerJeffrey C. Ollie <jeff@ocjtech.us>2025-09-23 21:53:52 -0500
commit5f3fd9742fb5eb68de945a37069e8b813ea6d667 (patch)
tree8722143ac55858bf679d6a9aa309730770523589 /src/cli/CommaSplitter.zig
parent5265414a36d9b5ad942ca997adf7348b8d0bd5d4 (diff)
rename Splitter-CommaSplitter
Diffstat (limited to 'src/cli/CommaSplitter.zig')
-rw-r--r--src/cli/CommaSplitter.zig424
1 files changed, 424 insertions, 0 deletions
diff --git a/src/cli/CommaSplitter.zig b/src/cli/CommaSplitter.zig
new file mode 100644
index 000000000..3168c1ffa
--- /dev/null
+++ b/src/cli/CommaSplitter.zig
@@ -0,0 +1,424 @@
+//! Iterator to split a string into fields by commas, taking into account
+//! quotes and escapes.
+//!
+//! Supports the same escapes as in Zig literal strings.
+//!
+//! Quotes must begin and end with a double quote (`"`). It is an error to not
+//! end a quote that was begun. To include a double quote inside a quote (or to
+//! not have a double quote start a quoted section) escape it with a backslash.
+//!
+//! Single quotes (`'`) are not special, they do not begin a quoted block.
+//!
+//! Zig multiline string literals are NOT supported.
+//!
+//! Quotes and escapes are not stripped or decoded, that must be handled as a
+//! separate step!
+const CommaSplitter = @This();
+
+pub const Error = error{
+ UnclosedQuote,
+ UnfinishedEscape,
+ IllegalEscape,
+};
+
+/// the string that we are splitting
+str: []const u8,
+/// how much of the string has been consumed so far
+index: usize,
+
+/// initialize a splitter with the given string
+pub fn init(str: []const u8) CommaSplitter {
+ return .{
+ .str = str,
+ .index = 0,
+ };
+}
+
+/// return the next field, null if no more fields
+pub fn next(self: *CommaSplitter) Error!?[]const u8 {
+ if (self.index >= self.str.len) return null;
+
+ // where the current field starts
+ const start = self.index;
+ // state of state machine
+ const State = enum {
+ normal,
+ quoted,
+ escape,
+ hexescape,
+ unicodeescape,
+ };
+ // keep track of the state to return to when done processing an escape
+ // sequence.
+ var last: State = .normal;
+ // used to count number of digits seen in a hex escape
+ var hexescape_digits: usize = 0;
+ // sub-state of parsing hex escapes
+ var unicodeescape_state: enum {
+ start,
+ digits,
+ } = .start;
+ // number of digits in a unicode escape seen so far
+ var unicodeescape_digits: usize = 0;
+ // accumulator for value of unicode escape
+ var unicodeescape_value: usize = 0;
+
+ loop: switch (State.normal) {
+ .normal => {
+ if (self.index >= self.str.len) return self.str[start..];
+ switch (self.str[self.index]) {
+ ',' => {
+ self.index += 1;
+ return self.str[start .. self.index - 1];
+ },
+ '"' => {
+ self.index += 1;
+ continue :loop .quoted;
+ },
+ '\\' => {
+ self.index += 1;
+ last = .normal;
+ continue :loop .escape;
+ },
+ else => {
+ self.index += 1;
+ continue :loop .normal;
+ },
+ }
+ },
+ .quoted => {
+ if (self.index >= self.str.len) return error.UnclosedQuote;
+ switch (self.str[self.index]) {
+ '"' => {
+ self.index += 1;
+ continue :loop .normal;
+ },
+ '\\' => {
+ self.index += 1;
+ last = .quoted;
+ continue :loop .escape;
+ },
+ else => {
+ self.index += 1;
+ continue :loop .quoted;
+ },
+ }
+ },
+ .escape => {
+ if (self.index >= self.str.len) return error.UnfinishedEscape;
+ switch (self.str[self.index]) {
+ 'n', 'r', 't', '\\', '\'', '"' => {
+ self.index += 1;
+ continue :loop last;
+ },
+ 'x' => {
+ self.index += 1;
+ hexescape_digits = 0;
+ continue :loop .hexescape;
+ },
+ 'u' => {
+ self.index += 1;
+ unicodeescape_state = .start;
+ unicodeescape_digits = 0;
+ unicodeescape_value = 0;
+ continue :loop .unicodeescape;
+ },
+ else => return error.IllegalEscape,
+ }
+ },
+ .hexescape => {
+ if (self.index >= self.str.len) return error.UnfinishedEscape;
+ switch (self.str[self.index]) {
+ '0'...'9', 'a'...'f', 'A'...'F' => {
+ self.index += 1;
+ hexescape_digits += 1;
+ if (hexescape_digits == 2) continue :loop last;
+ continue :loop .hexescape;
+ },
+ else => return error.IllegalEscape,
+ }
+ },
+ .unicodeescape => {
+ if (self.index >= self.str.len) return error.UnfinishedEscape;
+ switch (unicodeescape_state) {
+ .start => {
+ switch (self.str[self.index]) {
+ '{' => {
+ self.index += 1;
+ unicodeescape_value = 0;
+ unicodeescape_state = .digits;
+ continue :loop .unicodeescape;
+ },
+ else => return error.IllegalEscape,
+ }
+ },
+ .digits => {
+ switch (self.str[self.index]) {
+ '}' => {
+ self.index += 1;
+ if (unicodeescape_digits == 0) return error.IllegalEscape;
+ continue :loop last;
+ },
+ '0'...'9' => |d| {
+ self.index += 1;
+ unicodeescape_digits += 1;
+ unicodeescape_value <<= 4;
+ unicodeescape_value += d - '0';
+ },
+ 'a'...'f' => |d| {
+ self.index += 1;
+ unicodeescape_digits += 1;
+ unicodeescape_value <<= 4;
+ unicodeescape_value += d - 'a';
+ },
+ 'A'...'F' => |d| {
+ self.index += 1;
+ unicodeescape_digits += 1;
+ unicodeescape_value <<= 4;
+ unicodeescape_value += d - 'A';
+ },
+ else => return error.IllegalEscape,
+ }
+ if (unicodeescape_value > 0x10ffff) return error.IllegalEscape;
+ continue :loop .unicodeescape;
+ },
+ }
+ },
+ }
+}
+
+/// Return any remaining string data, whether it has a comma or not.
+pub fn rest(self: *CommaSplitter) ?[]const u8 {
+ if (self.index >= self.str.len) return null;
+ defer self.index = self.str.len;
+ return self.str[self.index..];
+}
+
+test "splitter 1" {
+ const std = @import("std");
+ const testing = std.testing;
+
+ var s: CommaSplitter = .init("a,b,c");
+ try testing.expectEqualStrings("a", (try s.next()).?);
+ try testing.expectEqualStrings("b", (try s.next()).?);
+ try testing.expectEqualStrings("c", (try s.next()).?);
+ try testing.expect(null == try s.next());
+}
+
+test "splitter 2" {
+ const std = @import("std");
+ const testing = std.testing;
+
+ var s: CommaSplitter = .init("");
+ try testing.expect(null == try s.next());
+}
+
+test "splitter 3" {
+ const std = @import("std");
+ const testing = std.testing;
+
+ var s: CommaSplitter = .init("a");
+ try testing.expectEqualStrings("a", (try s.next()).?);
+ try testing.expect(null == try s.next());
+}
+
+test "splitter 4" {
+ const std = @import("std");
+ const testing = std.testing;
+
+ var s: CommaSplitter = .init("\\x5a");
+ try testing.expectEqualStrings("\\x5a", (try s.next()).?);
+ try testing.expect(null == try s.next());
+}
+
+test "splitter 5" {
+ const std = @import("std");
+ const testing = std.testing;
+
+ var s: CommaSplitter = .init("'a',b");
+ try testing.expectEqualStrings("'a'", (try s.next()).?);
+ try testing.expectEqualStrings("b", (try s.next()).?);
+ try testing.expect(null == try s.next());
+}
+
+test "splitter 6" {
+ const std = @import("std");
+ const testing = std.testing;
+
+ var s: CommaSplitter = .init("'a,b',c");
+ try testing.expectEqualStrings("'a", (try s.next()).?);
+ try testing.expectEqualStrings("b'", (try s.next()).?);
+ try testing.expectEqualStrings("c", (try s.next()).?);
+ try testing.expect(null == try s.next());
+}
+
+test "splitter 7" {
+ const std = @import("std");
+ const testing = std.testing;
+
+ var s: CommaSplitter = .init("\"a,b\",c");
+ try testing.expectEqualStrings("\"a,b\"", (try s.next()).?);
+ try testing.expectEqualStrings("c", (try s.next()).?);
+ try testing.expect(null == try s.next());
+}
+
+test "splitter 8" {
+ const std = @import("std");
+ const testing = std.testing;
+
+ var s: CommaSplitter = .init(" a , b ");
+ try testing.expectEqualStrings(" a ", (try s.next()).?);
+ try testing.expectEqualStrings(" b ", (try s.next()).?);
+ try testing.expect(null == try s.next());
+}
+
+test "splitter 9" {
+ const std = @import("std");
+ const testing = std.testing;
+
+ var s: CommaSplitter = .init("\\x");
+ try testing.expectError(error.UnfinishedEscape, s.next());
+}
+
+test "splitter 10" {
+ const std = @import("std");
+ const testing = std.testing;
+
+ var s: CommaSplitter = .init("\\x5");
+ try testing.expectError(error.UnfinishedEscape, s.next());
+}
+
+test "splitter 11" {
+ const std = @import("std");
+ const testing = std.testing;
+
+ var s: CommaSplitter = .init("\\u");
+ try testing.expectError(error.UnfinishedEscape, s.next());
+}
+
+test "splitter 12" {
+ const std = @import("std");
+ const testing = std.testing;
+
+ var s: CommaSplitter = .init("\\u{");
+ try testing.expectError(error.UnfinishedEscape, s.next());
+}
+
+test "splitter 13" {
+ const std = @import("std");
+ const testing = std.testing;
+
+ var s: CommaSplitter = .init("\\u{}");
+ try testing.expectError(error.IllegalEscape, s.next());
+}
+
+test "splitter 14" {
+ const std = @import("std");
+ const testing = std.testing;
+
+ var s: CommaSplitter = .init("\\u{h1}");
+ try testing.expectError(error.IllegalEscape, s.next());
+}
+
+test "splitter 15" {
+ const std = @import("std");
+ const testing = std.testing;
+
+ var s: CommaSplitter = .init("\\u{10ffff}");
+ try testing.expectEqualStrings("\\u{10ffff}", (try s.next()).?);
+ try testing.expect(null == try s.next());
+}
+
+test "splitter 16" {
+ const std = @import("std");
+ const testing = std.testing;
+
+ var s: CommaSplitter = .init("\\u{110000}");
+ try testing.expectError(error.IllegalEscape, s.next());
+}
+
+test "splitter 17" {
+ const std = @import("std");
+ const testing = std.testing;
+
+ var s: CommaSplitter = .init("\\d");
+ try testing.expectError(error.IllegalEscape, s.next());
+}
+
+test "splitter 18" {
+ const std = @import("std");
+ const testing = std.testing;
+
+ var s: CommaSplitter = .init("\\n\\r\\t\\\"\\'\\\\");
+ try testing.expectEqualStrings("\\n\\r\\t\\\"\\'\\\\", (try s.next()).?);
+ try testing.expect(null == try s.next());
+}
+
+test "splitter 19" {
+ const std = @import("std");
+ const testing = std.testing;
+
+ var s: CommaSplitter = .init("\"abc'def'ghi\"");
+ try testing.expectEqualStrings("\"abc'def'ghi\"", (try s.next()).?);
+ try testing.expect(null == try s.next());
+}
+
+test "splitter 20" {
+ const std = @import("std");
+ const testing = std.testing;
+
+ var s: CommaSplitter = .init("\",\",abc");
+ try testing.expectEqualStrings("\",\"", (try s.next()).?);
+ try testing.expectEqualStrings("abc", (try s.next()).?);
+ try testing.expect(null == try s.next());
+}
+
+test "splitter 21" {
+ const std = @import("std");
+ const testing = std.testing;
+
+ var s: CommaSplitter = .init("'a','b', 'c'");
+ try testing.expectEqualStrings("'a'", (try s.next()).?);
+ try testing.expectEqualStrings("'b'", (try s.next()).?);
+ try testing.expectEqualStrings(" 'c'", (try s.next()).?);
+ try testing.expect(null == try s.next());
+}
+
+test "splitter 22" {
+ const std = @import("std");
+ const testing = std.testing;
+
+ var s: CommaSplitter = .init("abc\"def");
+ try testing.expectError(error.UnclosedQuote, s.next());
+}
+
+test "splitter 23" {
+ const std = @import("std");
+ const testing = std.testing;
+
+ var s: CommaSplitter = .init("title:\"Focus Split: Up\",description:\"Focus the split above, if it exists.\",action:goto_split:up");
+ try testing.expectEqualStrings("title:\"Focus Split: Up\"", (try s.next()).?);
+ try testing.expectEqualStrings("description:\"Focus the split above, if it exists.\"", (try s.next()).?);
+ try testing.expectEqualStrings("action:goto_split:up", (try s.next()).?);
+ try testing.expect(null == try s.next());
+}
+
+test "splitter 24" {
+ const std = @import("std");
+ const testing = std.testing;
+
+ var s: CommaSplitter = .init("a,b,c,def");
+ try testing.expectEqualStrings("a", (try s.next()).?);
+ try testing.expectEqualStrings("b", (try s.next()).?);
+ try testing.expectEqualStrings("c,def", s.rest().?);
+ try testing.expect(null == try s.next());
+}
+
+test "splitter 25" {
+ const std = @import("std");
+ const testing = std.testing;
+
+ var s: CommaSplitter = .init("a,\\u{10,df}");
+ try testing.expectEqualStrings("a", (try s.next()).?);
+ try testing.expectError(error.IllegalEscape, s.next());
+}