xkbcli-compile-compose: Fix string result escaping
Currently the result string is not escaped and may produce invalid results. Fixed by introducing an ad-hoc escape function and relative tests.master
parent
d826d70b9b
commit
0a577a0998
|
@ -0,0 +1,95 @@
|
||||||
|
/*
|
||||||
|
* Copyright © 2023 Pierre Le Marre <dev@wismill.eu>
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice (including the next
|
||||||
|
* paragraph) shall be included in all copies or substantial portions of the
|
||||||
|
* Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||||
|
* DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef COMPOSE_DUMP_H
|
||||||
|
#define COMPOSE_DUMP_H
|
||||||
|
|
||||||
|
#include "config.h"
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#include "src/utils.h"
|
||||||
|
|
||||||
|
/* Ad-hoc escaping for UTF-8 string
|
||||||
|
*
|
||||||
|
* Note that it only escapes the strict minimum to get a valid Compose file.
|
||||||
|
* It also escapes hexadecimal digits after an hexadecimal escape. This is not
|
||||||
|
* strictly needed by the current implementation: "\x0abcg" parses as "␊bcg",
|
||||||
|
* but better be cautious than sorry and produce "\x0a\x62\x63g" instead.
|
||||||
|
* In the latter string there is no ambiguity and no need to know the maximum
|
||||||
|
* number of digits supported by the escape sequence.
|
||||||
|
*/
|
||||||
|
static inline char*
|
||||||
|
escape_utf8_string_literal(const char *from)
|
||||||
|
{
|
||||||
|
const size_t length = strlen(from);
|
||||||
|
/* Longest escape is converting ASCII character to "\xNN" */
|
||||||
|
char* to = calloc(4 * length + 1, sizeof(to));
|
||||||
|
if (!to)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
size_t t = 0;
|
||||||
|
bool previous_is_hex_escape = false;
|
||||||
|
uint8_t nbytes = 0;
|
||||||
|
for (size_t f = 0; f < length;) {
|
||||||
|
if ((unsigned char) from[f] < 0x80) {
|
||||||
|
/* ASCII */
|
||||||
|
if (from[f] <= 0x10 || from[f] == 0x7f ||
|
||||||
|
(is_xdigit(from[f]) && previous_is_hex_escape))
|
||||||
|
{
|
||||||
|
/* Control character or
|
||||||
|
hexadecimal digit following an hexadecimal escape */
|
||||||
|
snprintf_safe(&to[t], 5, "\\x%02x", from[f]);
|
||||||
|
t += 4;
|
||||||
|
previous_is_hex_escape = true;
|
||||||
|
} else if (from[f] == '"' || from[f] == '\\') {
|
||||||
|
/* Quote and backslash */
|
||||||
|
snprintf_safe(&to[t], 3, "\\%c", from[f]);
|
||||||
|
t += 2;
|
||||||
|
previous_is_hex_escape = false;
|
||||||
|
} else {
|
||||||
|
/* Other characters */
|
||||||
|
to[t++] = from[f];
|
||||||
|
previous_is_hex_escape = false;
|
||||||
|
}
|
||||||
|
f++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
/* Test next byte for the next Unicode codepoint’s bytes count */
|
||||||
|
else if ((unsigned char) from[f] < 0xe0)
|
||||||
|
nbytes = 2;
|
||||||
|
else if ((unsigned char) from[f] < 0xf0)
|
||||||
|
nbytes = 3;
|
||||||
|
else
|
||||||
|
nbytes = 4;
|
||||||
|
memcpy(&to[t], &from[f], nbytes);
|
||||||
|
t += nbytes;
|
||||||
|
f += nbytes;
|
||||||
|
previous_is_hex_escape = false;
|
||||||
|
}
|
||||||
|
to[t++] = '\0';
|
||||||
|
return realloc(to, t);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
|
@ -468,6 +468,24 @@ resolve_modifier(const char *name)
|
||||||
return XKB_MOD_INVALID;
|
return XKB_MOD_INVALID;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Parse a string literal ("...") and return the corresponding unescaped string,
|
||||||
|
* or NULL if it fails.
|
||||||
|
* This is aimed only for testing (un)escaping characters. */
|
||||||
|
char *
|
||||||
|
parse_string_literal(struct xkb_context *ctx, const char *string)
|
||||||
|
{
|
||||||
|
struct scanner s;
|
||||||
|
union lvalue val;
|
||||||
|
scanner_init(&s, ctx, string, strlen(string), "(unamed)", NULL);
|
||||||
|
switch (lex(&s, &val)) {
|
||||||
|
case TOK_STRING:
|
||||||
|
return strdup(val.string.str);
|
||||||
|
default:
|
||||||
|
fprintf(stderr, "ERROR: %s\n", s.s);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
parse(struct xkb_compose_table *table, struct scanner *s,
|
parse(struct xkb_compose_table *table, struct scanner *s,
|
||||||
unsigned include_depth);
|
unsigned include_depth);
|
||||||
|
|
|
@ -27,6 +27,9 @@
|
||||||
#define MAX_LHS_LEN 10
|
#define MAX_LHS_LEN 10
|
||||||
#define MAX_INCLUDE_DEPTH 5
|
#define MAX_INCLUDE_DEPTH 5
|
||||||
|
|
||||||
|
char *
|
||||||
|
parse_string_literal(struct xkb_context *ctx, const char *string);
|
||||||
|
|
||||||
bool
|
bool
|
||||||
parse_string(struct xkb_compose_table *table,
|
parse_string(struct xkb_compose_table *table,
|
||||||
const char *string, size_t len,
|
const char *string, size_t len,
|
||||||
|
|
128
test/compose.c
128
test/compose.c
|
@ -22,10 +22,14 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "config.h"
|
#include "config.h"
|
||||||
|
#include <time.h>
|
||||||
|
|
||||||
#include "xkbcommon/xkbcommon-compose.h"
|
#include "xkbcommon/xkbcommon-compose.h"
|
||||||
|
|
||||||
#include "test.h"
|
#include "test.h"
|
||||||
|
#include "src/utf8.h"
|
||||||
|
#include "src/compose/parser.h"
|
||||||
|
#include "src/compose/dump.h"
|
||||||
|
|
||||||
static const char *
|
static const char *
|
||||||
compose_status_string(enum xkb_compose_status status)
|
compose_status_string(enum xkb_compose_status status)
|
||||||
|
@ -769,18 +773,121 @@ test_traverse(struct xkb_context *ctx)
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
test_escape_sequences(struct xkb_context *ctx)
|
test_decode_escape_sequences(struct xkb_context *ctx)
|
||||||
{
|
{
|
||||||
/* The following escape sequences should be ignored:
|
/* The following escape sequences should be ignored:
|
||||||
* • \401 overflows
|
* • \401 overflows
|
||||||
* • \0 and \x0 produce NULL
|
* • \0 and \x0 produce NULL
|
||||||
*/
|
*/
|
||||||
const char *table_string = "<o> <e> : \"\\401f\\x0o\\0o\" X\n";
|
const char table_string_1[] = "<o> <e> : \"\\401f\\x0o\\0o\" X\n";
|
||||||
|
|
||||||
assert(test_compose_seq_buffer(ctx, table_string,
|
assert(test_compose_seq_buffer(ctx, table_string_1,
|
||||||
XKB_KEY_o, XKB_COMPOSE_FEED_ACCEPTED, XKB_COMPOSE_COMPOSING, "", XKB_KEY_NoSymbol,
|
XKB_KEY_o, XKB_COMPOSE_FEED_ACCEPTED, XKB_COMPOSE_COMPOSING, "", XKB_KEY_NoSymbol,
|
||||||
XKB_KEY_e, XKB_COMPOSE_FEED_ACCEPTED, XKB_COMPOSE_COMPOSED, "foo", XKB_KEY_X,
|
XKB_KEY_e, XKB_COMPOSE_FEED_ACCEPTED, XKB_COMPOSE_COMPOSED, "foo", XKB_KEY_X,
|
||||||
XKB_KEY_NoSymbol));
|
XKB_KEY_NoSymbol));
|
||||||
|
|
||||||
|
/* Test various cases */
|
||||||
|
const char table_string_2[] =
|
||||||
|
"<a> : \"\\x0abcg\\\"x\" A\n" /* hexadecimal sequence has max 2 chars */
|
||||||
|
"<b> : \"éxyz\" B\n" /* non-ASCII (2 bytes) */
|
||||||
|
"<c> : \"€xyz\" C\n" /* non-ASCII (3 bytes) */
|
||||||
|
"<d> : \"✨xyz\" D\n" /* non-ASCII (4 bytes) */
|
||||||
|
"<e> : \"✨\\x0aé\\x0a€x\\\"\" E\n"
|
||||||
|
"<f> : \"\" F\n";
|
||||||
|
|
||||||
|
assert(test_compose_seq_buffer(ctx, table_string_2,
|
||||||
|
XKB_KEY_a, XKB_COMPOSE_FEED_ACCEPTED, XKB_COMPOSE_COMPOSED, "\x0a""bcg\"x", XKB_KEY_A,
|
||||||
|
XKB_KEY_b, XKB_COMPOSE_FEED_ACCEPTED, XKB_COMPOSE_COMPOSED, "éxyz", XKB_KEY_B,
|
||||||
|
XKB_KEY_c, XKB_COMPOSE_FEED_ACCEPTED, XKB_COMPOSE_COMPOSED, "€xyz", XKB_KEY_C,
|
||||||
|
XKB_KEY_d, XKB_COMPOSE_FEED_ACCEPTED, XKB_COMPOSE_COMPOSED, "✨xyz", XKB_KEY_D,
|
||||||
|
XKB_KEY_e, XKB_COMPOSE_FEED_ACCEPTED, XKB_COMPOSE_COMPOSED, "✨\x0aé\x0a€x\"", XKB_KEY_E,
|
||||||
|
XKB_KEY_f, XKB_COMPOSE_FEED_ACCEPTED, XKB_COMPOSE_COMPOSED, "", XKB_KEY_F,
|
||||||
|
XKB_KEY_NoSymbol));
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint32_t
|
||||||
|
random_non_null_unicode_char(bool ascii)
|
||||||
|
{
|
||||||
|
if (ascii)
|
||||||
|
return 0x01 + (rand() % 0x80);
|
||||||
|
switch (rand() % 5) {
|
||||||
|
case 0:
|
||||||
|
/* U+0080..U+07FF: 2 bytes in UTF-8 */
|
||||||
|
return 0x80 + (rand() % 0x800);
|
||||||
|
case 1:
|
||||||
|
/* U+0800..U+FFFF: 3 bytes in UTF-8 */
|
||||||
|
return 0x800 + (rand() % 0x10000);
|
||||||
|
case 2:
|
||||||
|
/* U+10000..U+10FFFF: 4 bytes in UTF-8 */
|
||||||
|
return 0x10000 + (rand() % 0x110000);
|
||||||
|
default:
|
||||||
|
/* NOTE: Higher probability for ASCII */
|
||||||
|
/* U+0001..U+007F: 1 byte in UTF-8 */
|
||||||
|
return 0x01 + (rand() % 0x80);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
test_encode_escape_sequences(struct xkb_context *ctx)
|
||||||
|
{
|
||||||
|
char *escaped;
|
||||||
|
|
||||||
|
/* Test empty string */
|
||||||
|
escaped = escape_utf8_string_literal("");
|
||||||
|
assert_streq_not_null("Empty string", "", escaped);
|
||||||
|
free(escaped);
|
||||||
|
|
||||||
|
/* Test specific ASCII characters: ", \ */
|
||||||
|
escaped = escape_utf8_string_literal("\"\\");
|
||||||
|
assert_streq_not_null("Quote and backslash", "\\\"\\\\", escaped);
|
||||||
|
free(escaped);
|
||||||
|
|
||||||
|
/* Test round-trip of random strings */
|
||||||
|
# define SAMPLE_SIZE 1000
|
||||||
|
# define MIN_CODE_POINT 0x0001
|
||||||
|
# define MAX_CODE_POINTS_COUNT 15
|
||||||
|
char buf[1 + MAX_CODE_POINTS_COUNT * 4];
|
||||||
|
for (int ascii = 1; ascii >= 0; ascii--) {
|
||||||
|
for (size_t s = 0; s < SAMPLE_SIZE; s++) {
|
||||||
|
/* Create the string */
|
||||||
|
size_t length = 1 + (rand() % MAX_CODE_POINTS_COUNT);
|
||||||
|
size_t c = 0;
|
||||||
|
for (size_t idx = 0; idx < length; idx++) {
|
||||||
|
int nbytes;
|
||||||
|
/* Get a random Unicode code point and encode it in UTF-8 */
|
||||||
|
do {
|
||||||
|
const uint32_t cp = random_non_null_unicode_char(ascii);
|
||||||
|
nbytes = utf32_to_utf8(cp, &buf[c]);
|
||||||
|
} while (!nbytes); /* Handle invalid code point in UTF-8 */
|
||||||
|
c += nbytes - 1;
|
||||||
|
assert(c <= sizeof(buf) - 1);
|
||||||
|
}
|
||||||
|
assert_printf(buf[c] == '\0', "NULL-terminated string\n");
|
||||||
|
assert_printf(strlen(buf) == c, "Contains no NULL char\n");
|
||||||
|
assert_printf(is_valid_utf8(buf, c),
|
||||||
|
"Invalid input UTF-8 string: \"%s\"\n", buf);
|
||||||
|
/* Escape the string */
|
||||||
|
escaped = escape_utf8_string_literal(buf);
|
||||||
|
if (!escaped)
|
||||||
|
break;
|
||||||
|
assert_printf(is_valid_utf8(escaped, strlen(escaped)),
|
||||||
|
"Invalid input UTF-8 string: %s\n", escaped);
|
||||||
|
char *string_literal = asprintf_safe("\"%s\"", escaped);
|
||||||
|
if (!string_literal) {
|
||||||
|
free(escaped);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
/* Unescape the string */
|
||||||
|
char *unescaped = parse_string_literal(ctx, string_literal);
|
||||||
|
assert_streq_not_null("Escaped string", buf, unescaped);
|
||||||
|
free(unescaped);
|
||||||
|
free(string_literal);
|
||||||
|
free(escaped);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
# undef SAMPLE_SIZE
|
||||||
|
# undef MIN_CODE_POINT
|
||||||
|
# undef MAX_CODE_POINTS_COUNT
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
|
@ -791,10 +898,20 @@ main(int argc, char *argv[])
|
||||||
ctx = test_get_context(CONTEXT_NO_FLAG);
|
ctx = test_get_context(CONTEXT_NO_FLAG);
|
||||||
assert(ctx);
|
assert(ctx);
|
||||||
|
|
||||||
|
/* Initialize pseudo-random generator with program arg or current time */
|
||||||
|
int seed;
|
||||||
|
if (argc == 2) {
|
||||||
|
seed = atoi(argv[1]);
|
||||||
|
} else {
|
||||||
|
seed = time(NULL);
|
||||||
|
}
|
||||||
|
fprintf(stderr, "Seed for the pseudo-random generator: %d\n", seed);
|
||||||
|
srand(seed);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Ensure no environment variables but “top_srcdir” is set. This ensures
|
* Ensure no environment variables but “top_srcdir” is set. This ensures
|
||||||
* that user Compose file paths are unset before the tests and set
|
* that user Compose file paths are unset before the tests and set
|
||||||
* explicitely when necessary.
|
* explicitly when necessary.
|
||||||
*/
|
*/
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
const char *srcdir = getenv("top_srcdir");
|
const char *srcdir = getenv("top_srcdir");
|
||||||
|
@ -818,7 +935,8 @@ main(int argc, char *argv[])
|
||||||
test_include(ctx);
|
test_include(ctx);
|
||||||
test_override(ctx);
|
test_override(ctx);
|
||||||
test_traverse(ctx);
|
test_traverse(ctx);
|
||||||
test_escape_sequences(ctx);
|
test_decode_escape_sequences(ctx);
|
||||||
|
test_encode_escape_sequences(ctx);
|
||||||
|
|
||||||
xkb_context_unref(ctx);
|
xkb_context_unref(ctx);
|
||||||
return 0;
|
return 0;
|
||||||
|
|
10
test/test.h
10
test/test.h
|
@ -34,6 +34,16 @@
|
||||||
/* Automake test exit code to signify SKIP (à la PASS, FAIL, etc). */
|
/* Automake test exit code to signify SKIP (à la PASS, FAIL, etc). */
|
||||||
#define SKIP_TEST 77
|
#define SKIP_TEST 77
|
||||||
|
|
||||||
|
#define assert_printf(cond, ...) \
|
||||||
|
if (!(cond)) { \
|
||||||
|
fprintf(stderr, "Assertion failure: " __VA_ARGS__); \
|
||||||
|
assert(cond); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define assert_streq_not_null(test_name, expected, got) \
|
||||||
|
assert_printf(streq_not_null(expected, got), \
|
||||||
|
test_name ". Expected \"%s\", got: \"%s\"\n", expected, got)
|
||||||
|
|
||||||
/* The offset between KEY_* numbering, and keycodes in the XKB evdev
|
/* The offset between KEY_* numbering, and keycodes in the XKB evdev
|
||||||
* dataset. */
|
* dataset. */
|
||||||
#define EVDEV_OFFSET 8
|
#define EVDEV_OFFSET 8
|
||||||
|
|
|
@ -25,12 +25,12 @@
|
||||||
|
|
||||||
#include <getopt.h>
|
#include <getopt.h>
|
||||||
#include <locale.h>
|
#include <locale.h>
|
||||||
#include <stdio.h>
|
#include <stdbool.h>
|
||||||
#include <stdlib.h>
|
|
||||||
|
|
||||||
#include "xkbcommon/xkbcommon.h"
|
#include "xkbcommon/xkbcommon.h"
|
||||||
#include "xkbcommon/xkbcommon-keysyms.h"
|
#include "xkbcommon/xkbcommon-keysyms.h"
|
||||||
#include "xkbcommon/xkbcommon-compose.h"
|
#include "xkbcommon/xkbcommon-compose.h"
|
||||||
|
#include "src/compose/dump.h"
|
||||||
|
|
||||||
static void
|
static void
|
||||||
usage(FILE *fp, char *progname)
|
usage(FILE *fp, char *progname)
|
||||||
|
@ -56,7 +56,7 @@ usage(FILE *fp, char *progname)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static bool
|
||||||
print_compose_table_entry(struct xkb_compose_table_entry *entry)
|
print_compose_table_entry(struct xkb_compose_table_entry *entry)
|
||||||
{
|
{
|
||||||
size_t nsyms;
|
size_t nsyms;
|
||||||
|
@ -69,10 +69,17 @@ print_compose_table_entry(struct xkb_compose_table_entry *entry)
|
||||||
printf(" ");
|
printf(" ");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
printf(":");
|
printf(" : ");
|
||||||
const char *utf8 = xkb_compose_table_entry_utf8(entry);
|
const char *utf8 = xkb_compose_table_entry_utf8(entry);
|
||||||
if (*utf8 != '\0') {
|
if (*utf8 != '\0') {
|
||||||
printf(" \"%s\"", utf8);
|
char *escaped = escape_utf8_string_literal(utf8);
|
||||||
|
if (!escaped) {
|
||||||
|
fprintf(stderr, "ERROR: Cannot escape the string: allocation error\n");
|
||||||
|
return false;
|
||||||
|
} else {
|
||||||
|
printf(" \"%s\"", escaped);
|
||||||
|
free(escaped);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
const xkb_keysym_t keysym = xkb_compose_table_entry_keysym(entry);
|
const xkb_keysym_t keysym = xkb_compose_table_entry_keysym(entry);
|
||||||
if (keysym != XKB_KEY_NoSymbol) {
|
if (keysym != XKB_KEY_NoSymbol) {
|
||||||
|
@ -80,6 +87,7 @@ print_compose_table_entry(struct xkb_compose_table_entry *entry)
|
||||||
printf(" %s", buf);
|
printf(" %s", buf);
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
|
@ -182,10 +190,15 @@ main(int argc, char *argv[])
|
||||||
struct xkb_compose_table_iterator *iter = xkb_compose_table_iterator_new(compose_table);
|
struct xkb_compose_table_iterator *iter = xkb_compose_table_iterator_new(compose_table);
|
||||||
struct xkb_compose_table_entry *entry;
|
struct xkb_compose_table_entry *entry;
|
||||||
while ((entry = xkb_compose_table_iterator_next(iter))) {
|
while ((entry = xkb_compose_table_iterator_next(iter))) {
|
||||||
print_compose_table_entry(entry);
|
if (!print_compose_table_entry(entry)) {
|
||||||
|
ret = EXIT_FAILURE;
|
||||||
|
goto entry_error;
|
||||||
}
|
}
|
||||||
xkb_compose_table_iterator_free(iter);
|
}
|
||||||
|
ret = EXIT_SUCCESS;
|
||||||
|
|
||||||
|
entry_error:
|
||||||
|
xkb_compose_table_iterator_free(iter);
|
||||||
out:
|
out:
|
||||||
xkb_compose_table_unref(compose_table);
|
xkb_compose_table_unref(compose_table);
|
||||||
file_error:
|
file_error:
|
||||||
|
|
Loading…
Reference in New Issue