Compose: skip heading UTF-8 encoded BOM (U+FEFF)

Leading BOM is legal and is used as a signature — an indication that
an otherwise unmarked text file is in UTF-8.
See: https://www.unicode.org/faq/utf_bom.html#bom5 for further details.
master
Pierre Le Marre 2023-10-29 07:20:29 +01:00 committed by Wismill
parent 6073565903
commit 59886e4183
2 changed files with 15 additions and 0 deletions

View File

@ -534,6 +534,10 @@ initial:
production.mods = 0;
production.modmask = 0;
/* Skip UTF-8 encoded BOM (U+FEFF) */
/* See: https://www.unicode.org/faq/utf_bom.html#bom5 */
scanner_str(s, "\xef\xbb\xbf", 3);
/* fallthrough */
initial_eol:

View File

@ -172,6 +172,16 @@ test_compose_seq_buffer(struct xkb_context *ctx, const char *buffer, ...)
return ok;
}
static void
test_compose_utf8_bom(struct xkb_context *ctx)
{
const char *buffer = "\xef\xbb\xbf<A> : X";
assert(test_compose_seq_buffer(ctx, buffer,
XKB_KEY_A, XKB_COMPOSE_FEED_ACCEPTED, XKB_COMPOSE_COMPOSED, "X", XKB_KEY_X,
XKB_KEY_NoSymbol));
}
static void
test_seqs(struct xkb_context *ctx)
{
@ -723,6 +733,7 @@ main(int argc, char *argv[])
unsetenv("XLOCALEDIR");
#endif
test_compose_utf8_bom(ctx);
test_seqs(ctx);
test_conflicting(ctx);
test_XCOMPOSEFILE(ctx);