From 9e88718080907f8d54843451a3afbd461c34cca3 Mon Sep 17 00:00:00 2001 From: Pierre Le Marre Date: Sun, 29 Oct 2023 07:44:39 +0100 Subject: [PATCH] rules: skip heading UTF-8 encoded BOM (U+FEFF) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Leading BOM is legal and is used as a signature — an indication that an otherwise unmarked text file is in UTF-8. See: https://www.unicode.org/faq/utf_bom.html#bom5 for further details. --- src/xkbcomp/rules.c | 4 ++++ test/data/rules/utf-8_with_bom | 22 ++++++++++++++++++++++ test/rules-file.c | 12 ++++++++++++ 3 files changed, 38 insertions(+) create mode 100644 test/data/rules/utf-8_with_bom diff --git a/src/xkbcomp/rules.c b/src/xkbcomp/rules.c index f5d9c49..daa4f3d 100644 --- a/src/xkbcomp/rules.c +++ b/src/xkbcomp/rules.c @@ -1099,6 +1099,10 @@ read_rules_file(struct xkb_context *ctx, scanner_init(&scanner, matcher->ctx, string, size, path, NULL); + /* Skip UTF-8 encoded BOM (U+FEFF) */ + /* See: https://www.unicode.org/faq/utf_bom.html#bom5 */ + scanner_str(&scanner, "\xef\xbb\xbf", 3); + ret = matcher_match(matcher, &scanner, include_depth, string, size, path); unmap_file(string, size); diff --git a/test/data/rules/utf-8_with_bom b/test/data/rules/utf-8_with_bom new file mode 100644 index 0000000..a3c3a73 --- /dev/null +++ b/test/data/rules/utf-8_with_bom @@ -0,0 +1,22 @@ +// NOTE: this file is encoded in UTF-8 with a leading BOM (U+FEFF) +! model = keycodes + my_model = my_keycodes + * = default_keycodes + +! layout variant = symbols + my_layout my_variant = my_symbols+extra_variant + +! layout = symbols + my_layout = my_symbols + * = default_symbols + +! model = types + my_model = my_types + * = default_types + +! model = compat + my_model = my_compat + * = default_compat + +! option = compat + my_option = |some:compat diff --git a/test/rules-file.c b/test/rules-file.c index d217ba9..302aa68 100644 --- a/test/rules-file.c +++ b/test/rules-file.c @@ -94,6 +94,18 @@ main(int argc, char *argv[]) ctx = test_get_context(0); assert(ctx); + struct test_data test_utf_8_with_bom = { + .rules = "utf-8_with_bom", + + .model = "my_model", .layout = "my_layout", .variant = "my_variant", + .options = "my_option", + + .keycodes = "my_keycodes", .types = "my_types", + .compat = "my_compat|some:compat", + .symbols = "my_symbols+extra_variant", + }; + assert(test_rules(ctx, &test_utf_8_with_bom)); + struct test_data test1 = { .rules = "simple",