rules: skip heading UTF-8 encoded BOM (U+FEFF)

Leading BOM is legal and is used as a signature — an indication that
an otherwise unmarked text file is in UTF-8.
See: https://www.unicode.org/faq/utf_bom.html#bom5 for further details.
master
Pierre Le Marre 2023-10-29 07:44:39 +01:00 committed by Wismill
parent f937c30820
commit 9e88718080
3 changed files with 38 additions and 0 deletions

View File

@ -1099,6 +1099,10 @@ read_rules_file(struct xkb_context *ctx,
scanner_init(&scanner, matcher->ctx, string, size, path, NULL); scanner_init(&scanner, matcher->ctx, string, size, path, NULL);
/* Skip UTF-8 encoded BOM (U+FEFF) */
/* See: https://www.unicode.org/faq/utf_bom.html#bom5 */
scanner_str(&scanner, "\xef\xbb\xbf", 3);
ret = matcher_match(matcher, &scanner, include_depth, string, size, path); ret = matcher_match(matcher, &scanner, include_depth, string, size, path);
unmap_file(string, size); unmap_file(string, size);

View File

@ -0,0 +1,22 @@
// NOTE: this file is encoded in UTF-8 with a leading BOM (U+FEFF)
! model = keycodes
my_model = my_keycodes
* = default_keycodes
! layout variant = symbols
my_layout my_variant = my_symbols+extra_variant
! layout = symbols
my_layout = my_symbols
* = default_symbols
! model = types
my_model = my_types
* = default_types
! model = compat
my_model = my_compat
* = default_compat
! option = compat
my_option = |some:compat

View File

@ -94,6 +94,18 @@ main(int argc, char *argv[])
ctx = test_get_context(0); ctx = test_get_context(0);
assert(ctx); assert(ctx);
struct test_data test_utf_8_with_bom = {
.rules = "utf-8_with_bom",
.model = "my_model", .layout = "my_layout", .variant = "my_variant",
.options = "my_option",
.keycodes = "my_keycodes", .types = "my_types",
.compat = "my_compat|some:compat",
.symbols = "my_symbols+extra_variant",
};
assert(test_rules(ctx, &test_utf_8_with_bom));
struct test_data test1 = { struct test_data test1 = {
.rules = "simple", .rules = "simple",