xkbcomp: skip heading UTF-8 encoded BOM (U+FEFF)

Leading BOM is legal and is used as a signature — an indication that
an otherwise unmarked text file is in UTF-8.
See: https://www.unicode.org/faq/utf_bom.html#bom5 for further
details.
master
Pierre Le Marre 2023-10-29 07:31:34 +01:00 committed by Wismill
parent 59886e4183
commit f937c30820
2 changed files with 17 additions and 0 deletions

View File

@ -199,6 +199,11 @@ XkbParseString(struct xkb_context *ctx, const char *string, size_t len,
{
struct scanner scanner;
scanner_init(&scanner, ctx, string, len, file_name, NULL);
/* Skip UTF-8 encoded BOM (U+FEFF) */
/* See: https://www.unicode.org/faq/utf_bom.html#bom5 */
scanner_str(&scanner, "\xef\xbb\xbf", 3);
return parse(ctx, &scanner, map);
}

View File

@ -78,6 +78,18 @@ main(int argc, char *argv[])
keymap = test_compile_buffer(ctx, "", 0);
assert(!keymap);
/* Accept UTF-8 encoded BOM (U+FEFF) */
const char *bom =
"\xef\xbb\xbfxkb_keymap {"
" xkb_keycodes { include \"evdev\" };"
" xkb_types { include \"complete\" };"
" xkb_compat { include \"complete\" };"
" xkb_symbols { include \"pc\" };"
"};";
keymap = test_compile_buffer(ctx, bom, strlen(bom));
assert(keymap);
xkb_keymap_unref(keymap);
/* Make sure we can recompile our output for a normal keymap from rules. */
keymap = test_compile_rules(ctx, NULL, NULL,
"ru,ca,de,us", ",multix,neo,intl", NULL);