From 59886e4183f07a90b485d88603cc34a398fa618f Mon Sep 17 00:00:00 2001 From: Pierre Le Marre Date: Sun, 29 Oct 2023 07:20:29 +0100 Subject: [PATCH] Compose: skip heading UTF-8 encoded BOM (U+FEFF) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Leading BOM is legal and is used as a signature — an indication that an otherwise unmarked text file is in UTF-8. See: https://www.unicode.org/faq/utf_bom.html#bom5 for further details. --- src/compose/parser.c | 4 ++++ test/compose.c | 11 +++++++++++ 2 files changed, 15 insertions(+) diff --git a/src/compose/parser.c b/src/compose/parser.c index 5545a33..6740f21 100644 --- a/src/compose/parser.c +++ b/src/compose/parser.c @@ -534,6 +534,10 @@ initial: production.mods = 0; production.modmask = 0; + /* Skip UTF-8 encoded BOM (U+FEFF) */ + /* See: https://www.unicode.org/faq/utf_bom.html#bom5 */ + scanner_str(s, "\xef\xbb\xbf", 3); + /* fallthrough */ initial_eol: diff --git a/test/compose.c b/test/compose.c index 8c633d7..d7192f6 100644 --- a/test/compose.c +++ b/test/compose.c @@ -172,6 +172,16 @@ test_compose_seq_buffer(struct xkb_context *ctx, const char *buffer, ...) return ok; } +static void +test_compose_utf8_bom(struct xkb_context *ctx) +{ + const char *buffer = "\xef\xbb\xbf : X"; + assert(test_compose_seq_buffer(ctx, buffer, + XKB_KEY_A, XKB_COMPOSE_FEED_ACCEPTED, XKB_COMPOSE_COMPOSED, "X", XKB_KEY_X, + XKB_KEY_NoSymbol)); +} + + static void test_seqs(struct xkb_context *ctx) { @@ -723,6 +733,7 @@ main(int argc, char *argv[]) unsetenv("XLOCALEDIR"); #endif + test_compose_utf8_bom(ctx); test_seqs(ctx); test_conflicting(ctx); test_XCOMPOSEFILE(ctx);