diff --git a/src/xkbcomp/rules.c b/src/xkbcomp/rules.c index daa4f3d..918fe31 100644 --- a/src/xkbcomp/rules.c +++ b/src/xkbcomp/rules.c @@ -1084,29 +1084,36 @@ read_rules_file(struct xkb_context *ctx, FILE *file, const char *path) { - bool ret = false; + bool ret; char *string; size_t size; struct scanner scanner; - ret = map_file(file, &string, &size); - if (!ret) { + if (!map_file(file, &string, &size)) { log_err(ctx, XKB_LOG_MESSAGE_NO_ID, "Couldn't read rules file \"%s\": %s\n", path, strerror(errno)); - goto out; + return false; } scanner_init(&scanner, matcher->ctx, string, size, path, NULL); - /* Skip UTF-8 encoded BOM (U+FEFF) */ - /* See: https://www.unicode.org/faq/utf_bom.html#bom5 */ - scanner_str(&scanner, "\xef\xbb\xbf", 3); + /* Basic detection of wrong character encoding. + The first character relevant to the grammar must be ASCII: + whitespace, !, / (for comment) */ + if (!scanner_check_supported_char_encoding(&scanner)) { + scanner_err(&scanner, + "This could be a file encoding issue. " + "Supported encodings must be backward compatible with ASCII."); + scanner_err(&scanner, + "E.g. ISO/CEI 8859 and UTF-8 are supported " + "but UTF-16, UTF-32 and CP1026 are not."); + unmap_file(string, size); + return false; + } ret = matcher_match(matcher, &scanner, include_depth, string, size, path); - unmap_file(string, size); -out: return ret; } diff --git a/test/data/rules/utf-16be_with_bom b/test/data/rules/utf-16be_with_bom new file mode 100644 index 0000000..ea44bab Binary files /dev/null and b/test/data/rules/utf-16be_with_bom differ diff --git a/test/data/rules/utf-16le_with_bom b/test/data/rules/utf-16le_with_bom new file mode 100644 index 0000000..9faf37e Binary files /dev/null and b/test/data/rules/utf-16le_with_bom differ diff --git a/test/data/rules/utf-32be b/test/data/rules/utf-32be new file mode 100644 index 0000000..588e32e Binary files /dev/null and b/test/data/rules/utf-32be differ diff --git a/test/rules-file.c b/test/rules-file.c index 302aa68..726ec89 100644 --- a/test/rules-file.c +++ b/test/rules-file.c @@ -106,6 +106,42 @@ main(int argc, char *argv[]) }; assert(test_rules(ctx, &test_utf_8_with_bom)); + struct test_data test_utf_16le_with_bom = { + .rules = "utf-16le_with_bom", + + .model = "my_model", .layout = "my_layout", .variant = "my_variant", + .options = "my_option", + + .keycodes = "my_keycodes", .types = "my_types", + .compat = "my_compat|some:compat", + .symbols = "my_symbols+extra_variant", + }; + assert(!test_rules(ctx, &test_utf_16le_with_bom)); + + struct test_data test_utf_16be_with_bom = { + .rules = "utf-16be_with_bom", + + .model = "my_model", .layout = "my_layout", .variant = "my_variant", + .options = "my_option", + + .keycodes = "my_keycodes", .types = "my_types", + .compat = "my_compat|some:compat", + .symbols = "my_symbols+extra_variant", + }; + assert(!test_rules(ctx, &test_utf_16be_with_bom)); + + struct test_data test_utf_32be = { + .rules = "utf-32be", + + .model = "my_model", .layout = "my_layout", .variant = "my_variant", + .options = "my_option", + + .keycodes = "my_keycodes", .types = "my_types", + .compat = "my_compat|some:compat", + .symbols = "my_symbols+extra_variant", + }; + assert(!test_rules(ctx, &test_utf_32be)); + struct test_data test1 = { .rules = "simple",