rules: early detection of invalid encoding

master
Pierre Le Marre 2023-10-30 15:51:34 +01:00 committed by Wismill
parent 82e9293e12
commit 3aaa4e2a53
5 changed files with 52 additions and 9 deletions

View File

@ -1084,29 +1084,36 @@ read_rules_file(struct xkb_context *ctx,
FILE *file, FILE *file,
const char *path) const char *path)
{ {
bool ret = false; bool ret;
char *string; char *string;
size_t size; size_t size;
struct scanner scanner; struct scanner scanner;
ret = map_file(file, &string, &size); if (!map_file(file, &string, &size)) {
if (!ret) {
log_err(ctx, XKB_LOG_MESSAGE_NO_ID, log_err(ctx, XKB_LOG_MESSAGE_NO_ID,
"Couldn't read rules file \"%s\": %s\n", "Couldn't read rules file \"%s\": %s\n",
path, strerror(errno)); path, strerror(errno));
goto out; return false;
} }
scanner_init(&scanner, matcher->ctx, string, size, path, NULL); scanner_init(&scanner, matcher->ctx, string, size, path, NULL);
/* Skip UTF-8 encoded BOM (U+FEFF) */ /* Basic detection of wrong character encoding.
/* See: https://www.unicode.org/faq/utf_bom.html#bom5 */ The first character relevant to the grammar must be ASCII:
scanner_str(&scanner, "\xef\xbb\xbf", 3); whitespace, !, / (for comment) */
if (!scanner_check_supported_char_encoding(&scanner)) {
scanner_err(&scanner,
"This could be a file encoding issue. "
"Supported encodings must be backward compatible with ASCII.");
scanner_err(&scanner,
"E.g. ISO/CEI 8859 and UTF-8 are supported "
"but UTF-16, UTF-32 and CP1026 are not.");
unmap_file(string, size);
return false;
}
ret = matcher_match(matcher, &scanner, include_depth, string, size, path); ret = matcher_match(matcher, &scanner, include_depth, string, size, path);
unmap_file(string, size); unmap_file(string, size);
out:
return ret; return ret;
} }

Binary file not shown.

Binary file not shown.

BIN
test/data/rules/utf-32be Normal file

Binary file not shown.

View File

@ -106,6 +106,42 @@ main(int argc, char *argv[])
}; };
assert(test_rules(ctx, &test_utf_8_with_bom)); assert(test_rules(ctx, &test_utf_8_with_bom));
struct test_data test_utf_16le_with_bom = {
.rules = "utf-16le_with_bom",
.model = "my_model", .layout = "my_layout", .variant = "my_variant",
.options = "my_option",
.keycodes = "my_keycodes", .types = "my_types",
.compat = "my_compat|some:compat",
.symbols = "my_symbols+extra_variant",
};
assert(!test_rules(ctx, &test_utf_16le_with_bom));
struct test_data test_utf_16be_with_bom = {
.rules = "utf-16be_with_bom",
.model = "my_model", .layout = "my_layout", .variant = "my_variant",
.options = "my_option",
.keycodes = "my_keycodes", .types = "my_types",
.compat = "my_compat|some:compat",
.symbols = "my_symbols+extra_variant",
};
assert(!test_rules(ctx, &test_utf_16be_with_bom));
struct test_data test_utf_32be = {
.rules = "utf-32be",
.model = "my_model", .layout = "my_layout", .variant = "my_variant",
.options = "my_option",
.keycodes = "my_keycodes", .types = "my_types",
.compat = "my_compat|some:compat",
.symbols = "my_symbols+extra_variant",
};
assert(!test_rules(ctx, &test_utf_32be));
struct test_data test1 = { struct test_data test1 = {
.rules = "simple", .rules = "simple",