From f937c30820766e22c2ba5ad905eaa8cb4878294c Mon Sep 17 00:00:00 2001 From: Pierre Le Marre Date: Sun, 29 Oct 2023 07:31:34 +0100 Subject: [PATCH] xkbcomp: skip heading UTF-8 encoded BOM (U+FEFF) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Leading BOM is legal and is used as a signature — an indication that an otherwise unmarked text file is in UTF-8. See: https://www.unicode.org/faq/utf_bom.html#bom5 for further details. --- src/xkbcomp/scanner.c | 5 +++++ test/buffercomp.c | 12 ++++++++++++ 2 files changed, 17 insertions(+) diff --git a/src/xkbcomp/scanner.c b/src/xkbcomp/scanner.c index 57babbb..8bff375 100644 --- a/src/xkbcomp/scanner.c +++ b/src/xkbcomp/scanner.c @@ -199,6 +199,11 @@ XkbParseString(struct xkb_context *ctx, const char *string, size_t len, { struct scanner scanner; scanner_init(&scanner, ctx, string, len, file_name, NULL); + + /* Skip UTF-8 encoded BOM (U+FEFF) */ + /* See: https://www.unicode.org/faq/utf_bom.html#bom5 */ + scanner_str(&scanner, "\xef\xbb\xbf", 3); + return parse(ctx, &scanner, map); } diff --git a/test/buffercomp.c b/test/buffercomp.c index 9a76036..b9b5e9d 100644 --- a/test/buffercomp.c +++ b/test/buffercomp.c @@ -78,6 +78,18 @@ main(int argc, char *argv[]) keymap = test_compile_buffer(ctx, "", 0); assert(!keymap); + /* Accept UTF-8 encoded BOM (U+FEFF) */ + const char *bom = + "\xef\xbb\xbfxkb_keymap {" + " xkb_keycodes { include \"evdev\" };" + " xkb_types { include \"complete\" };" + " xkb_compat { include \"complete\" };" + " xkb_symbols { include \"pc\" };" + "};"; + keymap = test_compile_buffer(ctx, bom, strlen(bom)); + assert(keymap); + xkb_keymap_unref(keymap); + /* Make sure we can recompile our output for a normal keymap from rules. */ keymap = test_compile_rules(ctx, NULL, NULL, "ru,ca,de,us", ",multix,neo,intl", NULL);