From 82e9293e12a8a566352d19a808f897308524b606 Mon Sep 17 00:00:00 2001 From: Pierre Le Marre Date: Mon, 30 Oct 2023 15:28:10 +0100 Subject: [PATCH] xkbcomp: early detection of invalid encoding --- src/xkbcomp/scanner.c | 15 ++++++++-- test/buffercomp.c | 65 +++++++++++++++++++++++++++++++++++-------- 2 files changed, 66 insertions(+), 14 deletions(-) diff --git a/src/xkbcomp/scanner.c b/src/xkbcomp/scanner.c index 8bff375..7db9a7f 100644 --- a/src/xkbcomp/scanner.c +++ b/src/xkbcomp/scanner.c @@ -200,9 +200,18 @@ XkbParseString(struct xkb_context *ctx, const char *string, size_t len, struct scanner scanner; scanner_init(&scanner, ctx, string, len, file_name, NULL); - /* Skip UTF-8 encoded BOM (U+FEFF) */ - /* See: https://www.unicode.org/faq/utf_bom.html#bom5 */ - scanner_str(&scanner, "\xef\xbb\xbf", 3); + /* Basic detection of wrong character encoding. + The first character relevant to the grammar must be ASCII: + whitespace, section, comment */ + if (!scanner_check_supported_char_encoding(&scanner)) { + scanner_err(&scanner, + "This could be a file encoding issue. " + "Supported encodings must be backward compatible with ASCII."); + scanner_err(&scanner, + "E.g. ISO/CEI 8859 and UTF-8 are supported " + "but UTF-16, UTF-32 and CP1026 are not."); + return NULL; + } return parse(ctx, &scanner, map); } diff --git a/test/buffercomp.c b/test/buffercomp.c index b9b5e9d..091a876 100644 --- a/test/buffercomp.c +++ b/test/buffercomp.c @@ -31,6 +31,59 @@ #define DATA_PATH "keymaps/stringcomp.data" +static bool +test_encodings(struct xkb_context *ctx) +{ + struct xkb_keymap *keymap; + + /* Accept UTF-8 encoded BOM (U+FEFF) */ + const char utf8_with_bom[] = + "\xef\xbb\xbfxkb_keymap {" + " xkb_keycodes { include \"evdev\" };" + " xkb_types { include \"complete\" };" + " xkb_compat { include \"complete\" };" + " xkb_symbols { include \"pc\" };" + "};"; + keymap = test_compile_buffer(ctx, utf8_with_bom, sizeof(utf8_with_bom)); + assert(keymap); + xkb_keymap_unref(keymap); + + /* Reject UTF-16LE encoded string */ + const char utf16_le[] = + "x\0k\0b\0_\0k\0e\0y\0m\0a\0p\0 \0{\0\n\0" + " \0 \0x\0k\0b\0_\0k\0e\0y\0c\0o\0d\0e\0s\0 \0{\0 \0i\0n\0c\0l\0u\0d\0e\0 \0\"\0e\0v\0d\0e\0v\0\"\0 \0}\0;\0\n\0" + " \0 \0x\0k\0b\0_\0t\0y\0p\0e\0s\0 \0{\0 \0i\0n\0c\0l\0u\0d\0e\0 \0\"\0c\0o\0m\0p\0l\0e\0t\0e\0\"\0 \0}\0;\0\n\0" + " \0 \0x\0k\0b\0_\0c\0o\0m\0p\0a\0t\0 \0{\0 \0i\0n\0c\0l\0u\0d\0e\0 \0\"\0c\0o\0m\0p\0l\0e\0t\0e\0\"\0 \0}\0;\0\n\0" + " \0 \0x\0k\0b\0_\0s\0y\0m\0b\0o\0l\0s\0 \0{\0 \0i\0n\0c\0l\0u\0d\0e\0 \0\"\0p\0c\0\"\0 \0}\0;\0\n\0" + "}\0;\0"; + keymap = test_compile_buffer(ctx, utf16_le, sizeof(utf16_le)); + assert(!keymap); + + /* Reject UTF-16LE with BOM encoded string */ + const char utf16_le_with_bom[] = + "\xff\xfex\0k\0b\0_\0k\0e\0y\0m\0a\0p\0 \0{\0\n\0" + " \0 \0x\0k\0b\0_\0k\0e\0y\0c\0o\0d\0e\0s\0 \0{\0 \0i\0n\0c\0l\0u\0d\0e\0 \0\"\0e\0v\0d\0e\0v\0\"\0 \0}\0;\0\n\0" + " \0 \0x\0k\0b\0_\0t\0y\0p\0e\0s\0 \0{\0 \0i\0n\0c\0l\0u\0d\0e\0 \0\"\0c\0o\0m\0p\0l\0e\0t\0e\0\"\0 \0}\0;\0\n\0" + " \0 \0x\0k\0b\0_\0c\0o\0m\0p\0a\0t\0 \0{\0 \0i\0n\0c\0l\0u\0d\0e\0 \0\"\0c\0o\0m\0p\0l\0e\0t\0e\0\"\0 \0}\0;\0\n\0" + " \0 \0x\0k\0b\0_\0s\0y\0m\0b\0o\0l\0s\0 \0{\0 \0i\0n\0c\0l\0u\0d\0e\0 \0\"\0p\0c\0\"\0 \0}\0;\0\n\0" + "}\0;\0"; + keymap = test_compile_buffer(ctx, utf16_le_with_bom, sizeof(utf16_le_with_bom)); + assert(!keymap); + + /* Reject UTF-16BE encoded string */ + const char utf16_be[] = + "\0x\0k\0b\0_\0k\0e\0y\0m\0a\0p\0 \0{\0\n\0" + " \0 \0x\0k\0b\0_\0k\0e\0y\0c\0o\0d\0e\0s\0 \0{\0 \0i\0n\0c\0l\0u\0d\0e\0 \0\"\0e\0v\0d\0e\0v\0\"\0 \0}\0;\0\n\0" + " \0 \0x\0k\0b\0_\0t\0y\0p\0e\0s\0 \0{\0 \0i\0n\0c\0l\0u\0d\0e\0 \0\"\0c\0o\0m\0p\0l\0e\0t\0e\0\"\0 \0}\0;\0\n\0" + " \0 \0x\0k\0b\0_\0c\0o\0m\0p\0a\0t\0 \0{\0 \0i\0n\0c\0l\0u\0d\0e\0 \0\"\0c\0o\0m\0p\0l\0e\0t\0e\0\"\0 \0}\0;\0\n\0" + " \0 \0x\0k\0b\0_\0s\0y\0m\0b\0o\0l\0s\0 \0{\0 \0i\0n\0c\0l\0u\0d\0e\0 \0\"\0p\0c\0\"\0 \0}\0;\0\n\0" + "}\0;"; + keymap = test_compile_buffer(ctx, utf16_be, sizeof(utf16_be)); + assert(!keymap); + + return true; +} + int main(int argc, char *argv[]) { @@ -78,17 +131,7 @@ main(int argc, char *argv[]) keymap = test_compile_buffer(ctx, "", 0); assert(!keymap); - /* Accept UTF-8 encoded BOM (U+FEFF) */ - const char *bom = - "\xef\xbb\xbfxkb_keymap {" - " xkb_keycodes { include \"evdev\" };" - " xkb_types { include \"complete\" };" - " xkb_compat { include \"complete\" };" - " xkb_symbols { include \"pc\" };" - "};"; - keymap = test_compile_buffer(ctx, bom, strlen(bom)); - assert(keymap); - xkb_keymap_unref(keymap); + assert(test_encodings(ctx)); /* Make sure we can recompile our output for a normal keymap from rules. */ keymap = test_compile_rules(ctx, NULL, NULL,