feat: reliabletxt and wsv

dev
LeRoyce Pearson 2024-01-27 20:15:14 -07:00
commit 151470e5d3
11 changed files with 791 additions and 0 deletions

2
.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
zig-out/
zig-cache/

91
build.zig Normal file
View File

@ -0,0 +1,91 @@
const std = @import("std");
// Although this function looks imperative, note that its job is to
// declaratively construct a build graph that will be executed by an external
// runner.
pub fn build(b: *std.Build) void {
// Standard target options allows the person running `zig build` to choose
// what target to build for. Here we do not override the defaults, which
// means any target is allowed, and the default is native. Other options
// for restricting supported target set are available.
const target = b.standardTargetOptions(.{});
// Standard optimization options allow the person running `zig build` to select
// between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall. Here we do not
// set a preferred release mode, allowing the user to decide how to optimize.
const optimize = b.standardOptimizeOption(.{});
const lib = b.addStaticLibrary(.{
.name = "stenway-formats",
// In this case the main source file is merely a path, however, in more
// complicated build scripts, this could be a generated file.
.root_source_file = .{ .path = "src/root.zig" },
.target = target,
.optimize = optimize,
});
// This declares intent for the library to be installed into the standard
// location when the user invokes the "install" step (the default step when
// running `zig build`).
b.installArtifact(lib);
const exe = b.addExecutable(.{
.name = "stenway-formats",
.root_source_file = .{ .path = "src/main.zig" },
.target = target,
.optimize = optimize,
});
// This declares intent for the executable to be installed into the
// standard location when the user invokes the "install" step (the default
// step when running `zig build`).
b.installArtifact(exe);
// This *creates* a Run step in the build graph, to be executed when another
// step is evaluated that depends on it. The next line below will establish
// such a dependency.
const run_cmd = b.addRunArtifact(exe);
// By making the run step depend on the install step, it will be run from the
// installation directory rather than directly from within the cache directory.
// This is not necessary, however, if the application depends on other installed
// files, this ensures they will be present and in the expected location.
run_cmd.step.dependOn(b.getInstallStep());
// This allows the user to pass arguments to the application in the build
// command itself, like this: `zig build run -- arg1 arg2 etc`
if (b.args) |args| {
run_cmd.addArgs(args);
}
// This creates a build step. It will be visible in the `zig build --help` menu,
// and can be selected like this: `zig build run`
// This will evaluate the `run` step rather than the default, which is "install".
const run_step = b.step("run", "Run the app");
run_step.dependOn(&run_cmd.step);
// Creates a step for unit testing. This only builds the test executable
// but does not run it.
const lib_unit_tests = b.addTest(.{
.root_source_file = .{ .path = "src/root.zig" },
.target = target,
.optimize = optimize,
});
const run_lib_unit_tests = b.addRunArtifact(lib_unit_tests);
const exe_unit_tests = b.addTest(.{
.root_source_file = .{ .path = "src/main.zig" },
.target = target,
.optimize = optimize,
});
const run_exe_unit_tests = b.addRunArtifact(exe_unit_tests);
// Similar to creating the run step earlier, this exposes a `test` step to
// the `zig build --help` menu, providing a way for the user to request
// running the unit tests.
const test_step = b.step("test", "Run unit tests");
test_step.dependOn(&run_lib_unit_tests.step);
test_step.dependOn(&run_exe_unit_tests.step);
}

62
build.zig.zon Normal file
View File

@ -0,0 +1,62 @@
.{
.name = "stenway-formats",
// This is a [Semantic Version](https://semver.org/).
// In a future version of Zig it will be used for package deduplication.
.version = "0.0.0",
// This field is optional.
// This is currently advisory only; Zig does not yet do anything
// with this value.
//.minimum_zig_version = "0.11.0",
// This field is optional.
// Each dependency must either provide a `url` and `hash`, or a `path`.
// `zig build --fetch` can be used to fetch all dependencies of a package, recursively.
// Once all dependencies are fetched, `zig build` no longer requires
// internet connectivity.
.dependencies = .{
// See `zig fetch --save <url>` for a command-line interface for adding dependencies.
//.example = .{
// // When updating this field to a new URL, be sure to delete the corresponding
// // `hash`, otherwise you are communicating that you expect to find the old hash at
// // the new URL.
// .url = "https://example.com/foo.tar.gz",
//
// // This is computed from the file contents of the directory of files that is
// // obtained after fetching `url` and applying the inclusion rules given by
// // `paths`.
// //
// // This field is the source of truth; packages do not come from a `url`; they
// // come from a `hash`. `url` is just one of many possible mirrors for how to
// // obtain a package matching this `hash`.
// //
// // Uses the [multihash](https://multiformats.io/multihash/) format.
// .hash = "...",
//
// // When this is provided, the package is found in a directory relative to the
// // build root. In this case the package's hash is irrelevant and therefore not
// // computed. This field and `url` are mutually exclusive.
// .path = "foo",
//},
},
// Specifies the set of files and directories that are included in this package.
// Only files and directories listed here are included in the `hash` that
// is computed for this package.
// Paths are relative to the build root. Use the empty string (`""`) to refer to
// the build root itself.
// A directory listed here means that all files within, recursively, are included.
.paths = .{
// This makes *all* files, recursively, included in this package. It is generally
// better to explicitly list the files and directories instead, to insure that
// fetching from tarballs, file system paths, and version control all result
// in the same contents hash.
"",
// For example...
//"build.zig",
//"build.zig.zon",
//"src",
//"LICENSE",
//"README.md",
},
}

24
src/main.zig Normal file
View File

@ -0,0 +1,24 @@
const std = @import("std");
pub fn main() !void {
// Prints to stderr (it's a shortcut based on `std.io.getStdErr()`)
std.debug.print("All your {s} are belong to us.\n", .{"codebase"});
// stdout is for the actual output of your application, for example if you
// are implementing gzip, then only the compressed bytes should be sent to
// stdout, not any debugging messages.
const stdout_file = std.io.getStdOut().writer();
var bw = std.io.bufferedWriter(stdout_file);
const stdout = bw.writer();
try stdout.print("Run `zig build test` to run the tests.\n", .{});
try bw.flush(); // don't forget to flush!
}
test "simple test" {
var list = std.ArrayList(i32).init(std.testing.allocator);
defer list.deinit(); // try commenting this out and see if zig detects the memory leak!
try list.append(42);
try std.testing.expectEqual(@as(i32, 42), list.pop());
}

70
src/reliabletxt.zig Normal file
View File

@ -0,0 +1,70 @@
//! https://dev.stenway.com/ReliableTXT/Specification.html
pub const Encoding = enum {
utf8,
/// Big Endian
utf16,
/// Little Endian
utf16_reverse,
/// Big Endian
utf32,
};
pub fn detectEncoding(contents: []const u8) !Encoding {
if (std.mem.startsWith(u8, contents, "\xEF\xBB\xBF")) {
return Encoding.utf8;
} else if (std.mem.startsWith(u8, contents, "\xFE\xFF")) {
return Encoding.utf16;
} else if (std.mem.startsWith(u8, contents, "\xFF\xFE")) {
return Encoding.utf16_reverse;
} else if (std.mem.startsWith(u8, contents, "\x00\x00\xFE\xFF")) {
return Encoding.utf32;
}
return error.InvalidEncoding;
}
test detectEncoding {
try testing.expectEqual(Encoding.utf8, detectEncoding("\xEF\xBB\xBFaaa!"));
try testing.expectEqual(Encoding.utf16_reverse, detectEncoding(std.mem.sliceAsBytes(&[_]u16{
std.mem.nativeToLittle(u16, 0xFE_FF),
std.mem.nativeToLittle(u16, 'a'),
std.mem.nativeToLittle(u16, 'a'),
std.mem.nativeToLittle(u16, 'a'),
std.mem.nativeToLittle(u16, '!'),
})));
try testing.expectEqual(Encoding.utf16, detectEncoding(std.mem.sliceAsBytes(&[_]u16{
std.mem.nativeToBig(u16, 0xFE_FF),
std.mem.nativeToBig(u16, 'a'),
std.mem.nativeToBig(u16, 'a'),
std.mem.nativeToBig(u16, 'a'),
std.mem.nativeToBig(u16, '!'),
})));
try testing.expectEqual(Encoding.utf32, detectEncoding(std.mem.sliceAsBytes(&[_]u32{
std.mem.nativeToBig(u32, 0x00_00_FE_FF),
std.mem.nativeToBig(u32, 'a'),
std.mem.nativeToBig(u32, 'a'),
std.mem.nativeToBig(u32, 'a'),
std.mem.nativeToBig(u32, '!'),
})));
}
pub const File = union(Encoding) {
utf8: []const u8,
utf16: []const u16,
utf16_reverse: []const u16,
utf32: []const u32,
};
pub fn parse(contents: []const u8) !File {
switch (try detectEncoding(contents)) {
.utf8 => return .{ .utf8 = contents[3..] },
.utf16 => return .{ .utf16 = @as([*]const u16, @ptrCast(@alignCast(contents[2..])))[0 .. contents[2..].len / @sizeOf(u16)] },
.utf16_reverse => return .{ .utf16_reverse = @as([*]const u16, @ptrCast(@alignCast(contents[2..])))[0 .. contents[2..].len / @sizeOf(u16)] },
.utf32 => return .{ .utf32 = @as([*]const u32, @ptrCast(@alignCast(contents[4..])))[0 .. contents[4..].len / @sizeOf(u32)] },
}
}
const testing = std.testing;
const std = @import("std");

7
src/root.zig Normal file
View File

@ -0,0 +1,7 @@
pub const reliabletxt = @import("./reliabletxt.zig");
pub const wsv = @import("./wsv.zig");
test {
_ = reliabletxt;
_ = wsv;
}

BIN
src/testdata/Example01_Table_UTF16.txt vendored Normal file

Binary file not shown.

BIN
src/testdata/Example01_Table_UTF16R.txt vendored Normal file

Binary file not shown.

BIN
src/testdata/Example01_Table_UTF32.txt vendored Normal file

Binary file not shown.

14
src/testdata/Example01_Table_UTF8.txt vendored Normal file
View File

@ -0,0 +1,14 @@
a U+0061 61 0061 "Latin Small Letter A"
~ U+007E 7E 007E Tilde
¥ U+00A5 C2_A5 00A5 "Yen Sign"
» U+00BB C2_BB 00BB "Right-Pointing Double Angle Quotation Mark"
½ U+00BD C2_BD 00BD "Vulgar Fraction One Half"
¿ U+00BF C2_BF 00BF "Inverted Question Mark"
ß U+00DF C3_9F 00DF "Latin Small Letter Sharp S"
ä U+00E4 C3_A4 00E4 "Latin Small Letter A with Diaeresis"
ï U+00EF C3_AF 00EF "Latin Small Letter I with Diaeresis"
œ U+0153 C5_93 0153 "Latin Small Ligature Oe"
€ U+20AC E2_82_AC 20AC "Euro Sign"
東 U+6771 E6_9D_B1 6771 "CJK Unified Ideograph-6771"
𝄞 U+1D11E F0_9D_84_9E D834_DD1E "Musical Symbol G Clef"
𠀇 U+20007 F0_A0_80_87 D840_DC07 "CJK Unified Ideograph-20007"

521
src/wsv.zig Normal file
View File

@ -0,0 +1,521 @@
const Table = union(reliabletxt.Encoding) {
utf8: [][]?[]u8,
utf16: [][]?[]u16,
utf16_reverse: [][]?[]u16,
utf32: [][]?[]u32,
pub fn free(this: @This(), gpa: std.mem.Allocator) void {
switch (this) {
.utf8 => |table| {
for (table) |row| {
for (row) |value_opt| {
if (value_opt) |value| {
gpa.free(value);
}
}
gpa.free(row);
}
gpa.free(table);
},
else => std.debug.panic("unimplemented", .{}),
}
}
};
const ParseState = enum { default, string, string_double_quote, string_line_break_escape, comment };
pub fn parseAlloc(gpa: std.mem.Allocator, contents_any: []const u8) !Table {
switch (try reliabletxt.parse(contents_any)) {
.utf8 => |contents_utf8| {
var table = std.ArrayList([]?[]u8).init(gpa);
defer table.deinit();
const utf8_view = try std.unicode.Utf8View.init(contents_utf8);
var utf8_iter = utf8_view.iterator();
var line_buf = std.ArrayList(?[]u8).init(gpa);
defer line_buf.deinit();
var value_buf = std.ArrayList(u8).init(gpa);
defer value_buf.deinit();
var state = ParseState.default;
while (utf8_iter.nextCodepoint()) |codepoint| {
switch (state) {
.default => switch (codepoint) {
'\n' => {
try table.ensureUnusedCapacity(1);
if (value_buf.items.len > 0) {
try line_buf.ensureUnusedCapacity(1);
const value = try value_buf.toOwnedSlice();
line_buf.appendAssumeCapacity(value);
}
const line = try line_buf.toOwnedSlice();
table.appendAssumeCapacity(line);
},
'"' => state = .string,
' ',
'\t',
=> {
if (value_buf.items.len > 0) {
try line_buf.ensureUnusedCapacity(1);
const value = try value_buf.toOwnedSlice();
line_buf.appendAssumeCapacity(value);
}
},
'#' => {
try table.ensureUnusedCapacity(1);
if (value_buf.items.len > 0) {
try line_buf.ensureUnusedCapacity(1);
const value = try value_buf.toOwnedSlice();
line_buf.appendAssumeCapacity(value);
}
const line = try line_buf.toOwnedSlice();
table.appendAssumeCapacity(line);
state = .comment;
},
else => |character| {
const codepoint_len = try std.unicode.utf8CodepointSequenceLength(character);
try value_buf.ensureUnusedCapacity(codepoint_len);
const buf = value_buf.unusedCapacitySlice()[0..codepoint_len];
_ = try std.unicode.utf8Encode(character, buf);
value_buf.items.len += codepoint_len;
},
},
.string => switch (codepoint) {
'\n' => {
// TODO: diagnostic: string not closed
return error.StringNotClosed;
},
'"' => state = .string_double_quote,
else => |character| {
const codepoint_len = try std.unicode.utf8CodepointSequenceLength(character);
try value_buf.ensureUnusedCapacity(codepoint_len);
const buf = value_buf.unusedCapacitySlice()[0..codepoint_len];
_ = try std.unicode.utf8Encode(character, buf);
value_buf.items.len += codepoint_len;
},
},
.string_double_quote => switch (codepoint) {
'"' => {
try value_buf.append('"');
state = .string;
},
'/' => state = .string_line_break_escape,
'\n' => {
try table.ensureUnusedCapacity(1);
if (value_buf.items.len > 0) {
try line_buf.ensureUnusedCapacity(1);
const value = try value_buf.toOwnedSlice();
line_buf.appendAssumeCapacity(value);
}
const line = try line_buf.toOwnedSlice();
table.appendAssumeCapacity(line);
},
'#' => {
try table.ensureUnusedCapacity(1);
if (value_buf.items.len > 0) {
try line_buf.ensureUnusedCapacity(1);
const value = try value_buf.toOwnedSlice();
line_buf.appendAssumeCapacity(value);
}
const line = try line_buf.toOwnedSlice();
table.appendAssumeCapacity(line);
state = .comment;
},
' ',
'\t',
=> {
try line_buf.ensureUnusedCapacity(1);
const value = try value_buf.toOwnedSlice();
line_buf.appendAssumeCapacity(value);
state = .default;
},
else => |character| {
const codepoint_len = try std.unicode.utf8CodepointSequenceLength(character);
try value_buf.ensureUnusedCapacity(codepoint_len);
const buf = value_buf.unusedCapacitySlice()[0..codepoint_len];
_ = try std.unicode.utf8Encode(character, buf);
value_buf.items.len += codepoint_len;
},
},
.string_line_break_escape => switch (codepoint) {
'"' => {
try value_buf.append('\n');
state = .string;
},
else => {
// TODO: diagnostic: invalid string line break
return error.InvalidStringLineBreak;
},
},
.comment => switch (codepoint) {
'\n' => state = .default,
else => {},
},
}
}
{
try table.ensureUnusedCapacity(1);
if (value_buf.items.len > 0) {
try line_buf.ensureUnusedCapacity(1);
const value = try value_buf.toOwnedSlice();
line_buf.appendAssumeCapacity(value);
}
const line = try line_buf.toOwnedSlice();
table.appendAssumeCapacity(line);
}
const utf8_table = try table.toOwnedSlice();
return .{ .utf8 = utf8_table };
},
else => return error.Unimplemented,
}
}
fn expectEqualUTF8Tables(expected_table: []const []const ?[]const u8, actual_table: []const []const ?[]const u8) !void {
var is_errors = false;
if (expected_table.len != actual_table.len) {
std.debug.print("Expected table to have {} rows, found {} rows\n", .{ expected_table.len, actual_table.len });
return error.TestExpectedEqual;
}
for (expected_table, actual_table, 0..) |expected_row, actual_row, row| {
for (expected_row, actual_row, 0..) |expected_value, actual_value, col| {
if (expected_value == null and actual_value != null) {
std.debug.print(
\\at row {}, column {}
\\ expected null
\\ found "{}"
\\
, .{ row, col, std.zig.fmtEscapes(actual_value.?) });
is_errors = true;
}
if (expected_value != null and actual_value == null) {
std.debug.print(
\\at row {}, column {}
\\ expected "{}"
\\ found null
\\
, .{ row, col, std.zig.fmtEscapes(expected_value.?) });
is_errors = true;
}
if (!std.mem.eql(u8, expected_value.?, actual_value.?)) {
std.debug.print(
\\at row {}, column {}
\\ expected "{}"
\\ found "{}"
\\
, .{ row, col, std.zig.fmtEscapes(expected_value.?), std.zig.fmtEscapes(actual_value.?) });
is_errors = true;
}
}
}
if (is_errors) {
return error.TestExpectedEqual;
}
}
test parseAlloc {
const table = try parseAlloc(testing.allocator, @embedFile("./testdata/Example01_Table_UTF8.txt"));
defer table.free(testing.allocator);
try testing.expectEqual(reliabletxt.Encoding.utf8, @as(reliabletxt.Encoding, table));
const utf8_table = table.utf8;
try expectEqualUTF8Tables(
&.{
&.{ "a", "U+0061", "61", "0061", "Latin Small Letter A" },
&.{ "~", "U+007E", "7E", "007E", "Tilde" },
&.{ "¥", "U+00A5", "C2_A5", "00A5", "Yen Sign" },
&.{ "»", "U+00BB", "C2_BB", "00BB", "Right-Pointing Double Angle Quotation Mark" },
&.{ "½", "U+00BD", "C2_BD", "00BD", "Vulgar Fraction One Half" },
&.{ "¿", "U+00BF", "C2_BF", "00BF", "Inverted Question Mark" },
&.{ "ß", "U+00DF", "C3_9F", "00DF", "Latin Small Letter Sharp S" },
&.{ "ä", "U+00E4", "C3_A4", "00E4", "Latin Small Letter A with Diaeresis" },
&.{ "ï", "U+00EF", "C3_AF", "00EF", "Latin Small Letter I with Diaeresis" },
&.{ "œ", "U+0153", "C5_93", "0153", "Latin Small Ligature Oe" },
&.{ "", "U+20AC", "E2_82_AC", "20AC", "Euro Sign" },
&.{ "", "U+6771", "E6_9D_B1", "6771", "CJK Unified Ideograph-6771" },
&.{ "𝄞", "U+1D11E", "F0_9D_84_9E", "D834_DD1E", "Musical Symbol G Clef" },
&.{ "𠀇", "U+20007", "F0_A0_80_87", "D840_DC07", "CJK Unified Ideograph-20007" },
},
utf8_table,
);
}
pub fn decodeString(encoded_string: []const u8, buffer: []u8) ![]const u8 {
const State = enum {
default,
double_quote,
double_quote_slash,
};
if (encoded_string.len < 1 or encoded_string[0] != '"' or encoded_string[encoded_string.len - 1] != '"') return error.InvalidFormat;
var state = State.default;
var write_pos: usize = 0;
for (encoded_string[1 .. encoded_string.len - 1]) |encoded_character| {
switch (state) {
.default => switch (encoded_character) {
'\n' => return error.InvalidFormat,
'"' => state = .double_quote,
else => {
if (write_pos >= buffer.len) return error.OutOfMemory;
buffer[write_pos] = encoded_character;
write_pos += 1;
},
},
.double_quote => switch (encoded_character) {
'"' => {
if (write_pos >= buffer.len) return error.OutOfMemory;
buffer[write_pos] = encoded_character;
write_pos += 1;
state = .default;
},
'/' => state = .double_quote_slash,
else => return error.InvalidFormat,
},
.double_quote_slash => switch (encoded_character) {
'"' => {
if (write_pos >= buffer.len) return error.OutOfMemory;
buffer[write_pos] = '\n';
write_pos += 1;
state = .default;
},
else => return error.InvalidFormat,
},
}
}
return buffer[0..write_pos];
}
test decodeString {
var buffer: [128]u8 = undefined;
try testing.expectEqualStrings("", try decodeString("\"\"", &buffer));
try testing.expectEqualStrings("Latin Small Letter A", try decodeString("\"Latin Small Letter A\"", &buffer));
try testing.expectEqualStrings("See these \"quotes\" I'm making with my claw hands? It means I don't belive you.", try decodeString("\"See these \"\"quotes\"\" I'm making with my claw hands? It means I don't belive you.\"", &buffer));
try testing.expectEqualStrings("Line 1\nLine 2", try decodeString("\"Line 1\"/\"Line 2\"", &buffer));
}
pub fn parseIter(contents_any: []const u8) !Iterator {
switch (try reliabletxt.parse(contents_any)) {
.utf8 => |contents_utf8| {
const utf8_view = try std.unicode.Utf8View.init(contents_utf8);
return Iterator{ .utf8 = .{
.utf8_iter = utf8_view.iterator(),
} };
},
else => return error.Unimplemented,
}
}
pub const Iterator = union(enum) {
utf8: Utf8Iterator,
_,
};
pub const Utf8Iterator = struct {
utf8_iter: std.unicode.Utf8Iterator,
pub const Item = union(enum) {
newline,
/// A value not surrounded by quotes. Can't include any whitespace.
value: []const u8,
/// A value surrounded by quotes. May include escaped double quotes or escaped newlines.
string: []const u8,
null,
};
const ParseState = enum { default, value, string, string_double_quote, string_line_break_escape, comment };
pub fn next(this: *@This()) !?Item {
var state = Utf8Iterator.ParseState.default;
var value_start: usize = this.utf8_iter.i;
while (this.utf8_iter.nextCodepoint()) |codepoint| {
switch (state) {
.default => switch (codepoint) {
'\n' => return Item.newline,
'"' => state = .string,
' ',
'\t',
=> value_start = this.utf8_iter.i,
'#' => state = .comment,
else => state = .value,
},
.value => switch (codepoint) {
// TODO: Add other whitespace characters
'\n',
' ',
'\t',
=> {
this.utf8_iter.i -= std.unicode.utf8CodepointSequenceLength(codepoint) catch unreachable;
return Item{ .value = this.utf8_iter.bytes[value_start..this.utf8_iter.i] };
},
'"' => return error.DoubleQuoteInValue,
else => {},
},
.string => switch (codepoint) {
'\n' => {
// TODO: diagnostic: string not closed
return error.StringNotClosed;
},
'"' => state = .string_double_quote,
else => {},
},
.string_double_quote => switch (codepoint) {
'"' => state = .string,
'/' => state = .string_line_break_escape,
// TODO: Add other whitespace characters
'\n',
'#',
' ',
'\t',
=> {
// we roll back here so it can be handled in the next iteration of the loop
this.utf8_iter.i -= std.unicode.utf8CodepointSequenceLength(codepoint) catch unreachable;
return Item{ .string = this.utf8_iter.bytes[value_start..this.utf8_iter.i] };
},
else => {},
},
.string_line_break_escape => switch (codepoint) {
'"' => state = .string,
else => {
// TODO: diagnostic: invalid string line break
return error.InvalidStringLineBreak;
},
},
.comment => switch (codepoint) {
'\n' => state = .default,
else => {},
},
}
}
return null;
}
};
fn expectEqualUTF8TablesIter(expected_table: []const []const ?[]const u8, actual_table: Utf8Iterator) !void {
var actual_table_iter = actual_table;
var is_errors = false;
var expected_row_index: usize = 0;
var expected_value_index: usize = 0;
while (try actual_table_iter.next()) |actual_parse_event| {
if (expected_row_index > expected_table.len) {
std.debug.print("Expected table to have at most {} rows, found more rows\n", .{expected_row_index});
is_errors = true;
break;
}
switch (actual_parse_event) {
.newline => {
expected_row_index += 1;
expected_value_index = 0;
},
.value => |actual_value_str| {
const expected_value = expected_table[expected_row_index][expected_value_index];
if (expected_value == null) {
std.debug.print(
\\at row {}, column {}
\\ expected null
\\ found "{}"
\\
, .{ expected_row_index, expected_value_index, std.zig.fmtEscapes(actual_value_str) });
is_errors = true;
} else if (!std.mem.eql(u8, expected_value.?, actual_value_str)) {
std.debug.print(
\\at row {}, column {}
\\ expected "{}"
\\ found "{}"
\\
, .{ expected_row_index, expected_value_index, std.zig.fmtEscapes(expected_value.?), std.zig.fmtEscapes(actual_value_str) });
is_errors = true;
}
expected_value_index += 1;
},
.string => |actual_string_encoded| {
var decode_buf: [128]u8 = undefined;
const actual_value_str = try decodeString(actual_string_encoded, &decode_buf);
const expected_value = expected_table[expected_row_index][expected_value_index];
if (expected_value == null) {
std.debug.print(
\\at row {}, column {}
\\ expected null
\\ found "{}"
\\
, .{ expected_row_index, expected_value_index, std.zig.fmtEscapes(actual_value_str) });
is_errors = true;
} else if (!std.mem.eql(u8, expected_value.?, actual_value_str)) {
std.debug.print(
\\at row {}, column {}
\\ expected "{}"
\\ found "{}"
\\
, .{ expected_row_index, expected_value_index, std.zig.fmtEscapes(expected_value.?), std.zig.fmtEscapes(actual_value_str) });
is_errors = true;
}
expected_value_index += 1;
},
.null => {
const expected_value = expected_table[expected_row_index][expected_value_index];
if (expected_value != null) {
std.debug.print(
\\at row {}, column {}
\\ expected "{}"
\\ found null
\\
, .{ expected_row_index, expected_value_index, std.zig.fmtEscapes(expected_value.?) });
is_errors = true;
}
},
}
}
if (is_errors) {
return error.TestExpectedEqual;
}
}
test parseIter {
try expectEqualUTF8TablesIter(
&.{
&.{ "a", "U+0061", "61", "0061", "Latin Small Letter A" },
&.{ "~", "U+007E", "7E", "007E", "Tilde" },
&.{ "¥", "U+00A5", "C2_A5", "00A5", "Yen Sign" },
&.{ "»", "U+00BB", "C2_BB", "00BB", "Right-Pointing Double Angle Quotation Mark" },
&.{ "½", "U+00BD", "C2_BD", "00BD", "Vulgar Fraction One Half" },
&.{ "¿", "U+00BF", "C2_BF", "00BF", "Inverted Question Mark" },
&.{ "ß", "U+00DF", "C3_9F", "00DF", "Latin Small Letter Sharp S" },
&.{ "ä", "U+00E4", "C3_A4", "00E4", "Latin Small Letter A with Diaeresis" },
&.{ "ï", "U+00EF", "C3_AF", "00EF", "Latin Small Letter I with Diaeresis" },
&.{ "œ", "U+0153", "C5_93", "0153", "Latin Small Ligature Oe" },
&.{ "", "U+20AC", "E2_82_AC", "20AC", "Euro Sign" },
&.{ "", "U+6771", "E6_9D_B1", "6771", "CJK Unified Ideograph-6771" },
&.{ "𝄞", "U+1D11E", "F0_9D_84_9E", "D834_DD1E", "Musical Symbol G Clef" },
&.{ "𠀇", "U+20007", "F0_A0_80_87", "D840_DC07", "CJK Unified Ideograph-20007" },
},
(try parseIter(@embedFile("./testdata/Example01_Table_UTF8.txt"))).utf8,
);
}
const reliabletxt = @import("./reliabletxt.zig");
const testing = std.testing;
const std = @import("std");