From 3aaa4e2a534267dda2a22e97db3efcb8e4757536 Mon Sep 17 00:00:00 2001 From: Pierre Le Marre Date: Mon, 30 Oct 2023 15:51:34 +0100 Subject: [PATCH] rules: early detection of invalid encoding --- src/xkbcomp/rules.c | 25 +++++++++++++-------- test/data/rules/utf-16be_with_bom | Bin 0 -> 1146 bytes test/data/rules/utf-16le_with_bom | Bin 0 -> 1146 bytes test/data/rules/utf-32be | Bin 0 -> 2288 bytes test/rules-file.c | 36 ++++++++++++++++++++++++++++++ 5 files changed, 52 insertions(+), 9 deletions(-) create mode 100644 test/data/rules/utf-16be_with_bom create mode 100644 test/data/rules/utf-16le_with_bom create mode 100644 test/data/rules/utf-32be diff --git a/src/xkbcomp/rules.c b/src/xkbcomp/rules.c index daa4f3d..918fe31 100644 --- a/src/xkbcomp/rules.c +++ b/src/xkbcomp/rules.c @@ -1084,29 +1084,36 @@ read_rules_file(struct xkb_context *ctx, FILE *file, const char *path) { - bool ret = false; + bool ret; char *string; size_t size; struct scanner scanner; - ret = map_file(file, &string, &size); - if (!ret) { + if (!map_file(file, &string, &size)) { log_err(ctx, XKB_LOG_MESSAGE_NO_ID, "Couldn't read rules file \"%s\": %s\n", path, strerror(errno)); - goto out; + return false; } scanner_init(&scanner, matcher->ctx, string, size, path, NULL); - /* Skip UTF-8 encoded BOM (U+FEFF) */ - /* See: https://www.unicode.org/faq/utf_bom.html#bom5 */ - scanner_str(&scanner, "\xef\xbb\xbf", 3); + /* Basic detection of wrong character encoding. + The first character relevant to the grammar must be ASCII: + whitespace, !, / (for comment) */ + if (!scanner_check_supported_char_encoding(&scanner)) { + scanner_err(&scanner, + "This could be a file encoding issue. " + "Supported encodings must be backward compatible with ASCII."); + scanner_err(&scanner, + "E.g. ISO/CEI 8859 and UTF-8 are supported " + "but UTF-16, UTF-32 and CP1026 are not."); + unmap_file(string, size); + return false; + } ret = matcher_match(matcher, &scanner, include_depth, string, size, path); - unmap_file(string, size); -out: return ret; } diff --git a/test/data/rules/utf-16be_with_bom b/test/data/rules/utf-16be_with_bom new file mode 100644 index 0000000000000000000000000000000000000000..ea44babc56209518ae6860a59719fc3a70f6ee8c GIT binary patch literal 1146 zcmb7^Pfx-?5XIk_Pf>3wF~*Ar6E7xk^3O%jq)HKDOARe5iJx8l4NGiySprSdc6N5& zn>Vx5{`~5kE783k^r(TZuxm{r8XKsMHwWc3BR(Q@_u+U*Rq-4%xNaVfMqOq^7Iy)b4jQK3k36 z5Y<&{8-JKi`sqgxsbV$do-1SJo-@t$tWp#9ChOff-;a%eIPho)fde{gS9q?Sr!g9Z2axb86}pOra;ZoOQdJ zxSp#d)N5+=_4;`KBIAP0WBF8X)A4worhZ>>zr$T#9ND$oVfMqOq^7Iy)b4jQK3k37 zkm~BS%^&8|eoX~VQ{GX#v-_NGb6vgq3eIhblbR_lnbL5AtxG;*5pEjeoDtu=0jn_M IpT(#D0I|cSrT_o{ literal 0 HcmV?d00001 diff --git a/test/data/rules/utf-32be b/test/data/rules/utf-32be new file mode 100644 index 0000000000000000000000000000000000000000..588e32e07ff105a4446bdf57ea97e51d97782c47 GIT binary patch literal 2288 zcmcJRNlU~)5Jq$MSCls$5%J|cB91FX@y zty$08wDfu&erLXIIjR#iWnDu&Or0{mujZuC&pi5!soB-rU3ru~oBw~lC7!Xp`Usx8 z+D-lr(aKs&f4j8C_O(fWo_^-{*t+%9cSqf^Jnf@5wy$kDl-}B>dZHhGOTEtD>tNnQ d_EMWp#+LmyL@zbI$isKQSoiXs@IOnV_BW|Tq^SS^ literal 0 HcmV?d00001 diff --git a/test/rules-file.c b/test/rules-file.c index 302aa68..726ec89 100644 --- a/test/rules-file.c +++ b/test/rules-file.c @@ -106,6 +106,42 @@ main(int argc, char *argv[]) }; assert(test_rules(ctx, &test_utf_8_with_bom)); + struct test_data test_utf_16le_with_bom = { + .rules = "utf-16le_with_bom", + + .model = "my_model", .layout = "my_layout", .variant = "my_variant", + .options = "my_option", + + .keycodes = "my_keycodes", .types = "my_types", + .compat = "my_compat|some:compat", + .symbols = "my_symbols+extra_variant", + }; + assert(!test_rules(ctx, &test_utf_16le_with_bom)); + + struct test_data test_utf_16be_with_bom = { + .rules = "utf-16be_with_bom", + + .model = "my_model", .layout = "my_layout", .variant = "my_variant", + .options = "my_option", + + .keycodes = "my_keycodes", .types = "my_types", + .compat = "my_compat|some:compat", + .symbols = "my_symbols+extra_variant", + }; + assert(!test_rules(ctx, &test_utf_16be_with_bom)); + + struct test_data test_utf_32be = { + .rules = "utf-32be", + + .model = "my_model", .layout = "my_layout", .variant = "my_variant", + .options = "my_option", + + .keycodes = "my_keycodes", .types = "my_types", + .compat = "my_compat|some:compat", + .symbols = "my_symbols+extra_variant", + }; + assert(!test_rules(ctx, &test_utf_32be)); + struct test_data test1 = { .rules = "simple",