xkbcli-compile-compose: Fix string result escaping
Currently the result string is not escaped and may produce invalid results. Fixed by introducing an ad-hoc escape function and relative tests.master
parent
d826d70b9b
commit
0a577a0998
|
@ -0,0 +1,95 @@
|
|||
/*
|
||||
* Copyright © 2023 Pierre Le Marre <dev@wismill.eu>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef COMPOSE_DUMP_H
|
||||
#define COMPOSE_DUMP_H
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "src/utils.h"
|
||||
|
||||
/* Ad-hoc escaping for UTF-8 string
|
||||
*
|
||||
* Note that it only escapes the strict minimum to get a valid Compose file.
|
||||
* It also escapes hexadecimal digits after an hexadecimal escape. This is not
|
||||
* strictly needed by the current implementation: "\x0abcg" parses as "␊bcg",
|
||||
* but better be cautious than sorry and produce "\x0a\x62\x63g" instead.
|
||||
* In the latter string there is no ambiguity and no need to know the maximum
|
||||
* number of digits supported by the escape sequence.
|
||||
*/
|
||||
static inline char*
|
||||
escape_utf8_string_literal(const char *from)
|
||||
{
|
||||
const size_t length = strlen(from);
|
||||
/* Longest escape is converting ASCII character to "\xNN" */
|
||||
char* to = calloc(4 * length + 1, sizeof(to));
|
||||
if (!to)
|
||||
return NULL;
|
||||
|
||||
size_t t = 0;
|
||||
bool previous_is_hex_escape = false;
|
||||
uint8_t nbytes = 0;
|
||||
for (size_t f = 0; f < length;) {
|
||||
if ((unsigned char) from[f] < 0x80) {
|
||||
/* ASCII */
|
||||
if (from[f] <= 0x10 || from[f] == 0x7f ||
|
||||
(is_xdigit(from[f]) && previous_is_hex_escape))
|
||||
{
|
||||
/* Control character or
|
||||
hexadecimal digit following an hexadecimal escape */
|
||||
snprintf_safe(&to[t], 5, "\\x%02x", from[f]);
|
||||
t += 4;
|
||||
previous_is_hex_escape = true;
|
||||
} else if (from[f] == '"' || from[f] == '\\') {
|
||||
/* Quote and backslash */
|
||||
snprintf_safe(&to[t], 3, "\\%c", from[f]);
|
||||
t += 2;
|
||||
previous_is_hex_escape = false;
|
||||
} else {
|
||||
/* Other characters */
|
||||
to[t++] = from[f];
|
||||
previous_is_hex_escape = false;
|
||||
}
|
||||
f++;
|
||||
continue;
|
||||
}
|
||||
/* Test next byte for the next Unicode codepoint’s bytes count */
|
||||
else if ((unsigned char) from[f] < 0xe0)
|
||||
nbytes = 2;
|
||||
else if ((unsigned char) from[f] < 0xf0)
|
||||
nbytes = 3;
|
||||
else
|
||||
nbytes = 4;
|
||||
memcpy(&to[t], &from[f], nbytes);
|
||||
t += nbytes;
|
||||
f += nbytes;
|
||||
previous_is_hex_escape = false;
|
||||
}
|
||||
to[t++] = '\0';
|
||||
return realloc(to, t);
|
||||
}
|
||||
|
||||
#endif
|
|
@ -468,6 +468,24 @@ resolve_modifier(const char *name)
|
|||
return XKB_MOD_INVALID;
|
||||
}
|
||||
|
||||
/* Parse a string literal ("...") and return the corresponding unescaped string,
|
||||
* or NULL if it fails.
|
||||
* This is aimed only for testing (un)escaping characters. */
|
||||
char *
|
||||
parse_string_literal(struct xkb_context *ctx, const char *string)
|
||||
{
|
||||
struct scanner s;
|
||||
union lvalue val;
|
||||
scanner_init(&s, ctx, string, strlen(string), "(unamed)", NULL);
|
||||
switch (lex(&s, &val)) {
|
||||
case TOK_STRING:
|
||||
return strdup(val.string.str);
|
||||
default:
|
||||
fprintf(stderr, "ERROR: %s\n", s.s);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
parse(struct xkb_compose_table *table, struct scanner *s,
|
||||
unsigned include_depth);
|
||||
|
|
|
@ -27,6 +27,9 @@
|
|||
#define MAX_LHS_LEN 10
|
||||
#define MAX_INCLUDE_DEPTH 5
|
||||
|
||||
char *
|
||||
parse_string_literal(struct xkb_context *ctx, const char *string);
|
||||
|
||||
bool
|
||||
parse_string(struct xkb_compose_table *table,
|
||||
const char *string, size_t len,
|
||||
|
|
128
test/compose.c
128
test/compose.c
|
@ -22,10 +22,14 @@
|
|||
*/
|
||||
|
||||
#include "config.h"
|
||||
#include <time.h>
|
||||
|
||||
#include "xkbcommon/xkbcommon-compose.h"
|
||||
|
||||
#include "test.h"
|
||||
#include "src/utf8.h"
|
||||
#include "src/compose/parser.h"
|
||||
#include "src/compose/dump.h"
|
||||
|
||||
static const char *
|
||||
compose_status_string(enum xkb_compose_status status)
|
||||
|
@ -769,18 +773,121 @@ test_traverse(struct xkb_context *ctx)
|
|||
}
|
||||
|
||||
static void
|
||||
test_escape_sequences(struct xkb_context *ctx)
|
||||
test_decode_escape_sequences(struct xkb_context *ctx)
|
||||
{
|
||||
/* The following escape sequences should be ignored:
|
||||
* • \401 overflows
|
||||
* • \0 and \x0 produce NULL
|
||||
*/
|
||||
const char *table_string = "<o> <e> : \"\\401f\\x0o\\0o\" X\n";
|
||||
const char table_string_1[] = "<o> <e> : \"\\401f\\x0o\\0o\" X\n";
|
||||
|
||||
assert(test_compose_seq_buffer(ctx, table_string,
|
||||
assert(test_compose_seq_buffer(ctx, table_string_1,
|
||||
XKB_KEY_o, XKB_COMPOSE_FEED_ACCEPTED, XKB_COMPOSE_COMPOSING, "", XKB_KEY_NoSymbol,
|
||||
XKB_KEY_e, XKB_COMPOSE_FEED_ACCEPTED, XKB_COMPOSE_COMPOSED, "foo", XKB_KEY_X,
|
||||
XKB_KEY_NoSymbol));
|
||||
|
||||
/* Test various cases */
|
||||
const char table_string_2[] =
|
||||
"<a> : \"\\x0abcg\\\"x\" A\n" /* hexadecimal sequence has max 2 chars */
|
||||
"<b> : \"éxyz\" B\n" /* non-ASCII (2 bytes) */
|
||||
"<c> : \"€xyz\" C\n" /* non-ASCII (3 bytes) */
|
||||
"<d> : \"✨xyz\" D\n" /* non-ASCII (4 bytes) */
|
||||
"<e> : \"✨\\x0aé\\x0a€x\\\"\" E\n"
|
||||
"<f> : \"\" F\n";
|
||||
|
||||
assert(test_compose_seq_buffer(ctx, table_string_2,
|
||||
XKB_KEY_a, XKB_COMPOSE_FEED_ACCEPTED, XKB_COMPOSE_COMPOSED, "\x0a""bcg\"x", XKB_KEY_A,
|
||||
XKB_KEY_b, XKB_COMPOSE_FEED_ACCEPTED, XKB_COMPOSE_COMPOSED, "éxyz", XKB_KEY_B,
|
||||
XKB_KEY_c, XKB_COMPOSE_FEED_ACCEPTED, XKB_COMPOSE_COMPOSED, "€xyz", XKB_KEY_C,
|
||||
XKB_KEY_d, XKB_COMPOSE_FEED_ACCEPTED, XKB_COMPOSE_COMPOSED, "✨xyz", XKB_KEY_D,
|
||||
XKB_KEY_e, XKB_COMPOSE_FEED_ACCEPTED, XKB_COMPOSE_COMPOSED, "✨\x0aé\x0a€x\"", XKB_KEY_E,
|
||||
XKB_KEY_f, XKB_COMPOSE_FEED_ACCEPTED, XKB_COMPOSE_COMPOSED, "", XKB_KEY_F,
|
||||
XKB_KEY_NoSymbol));
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
random_non_null_unicode_char(bool ascii)
|
||||
{
|
||||
if (ascii)
|
||||
return 0x01 + (rand() % 0x80);
|
||||
switch (rand() % 5) {
|
||||
case 0:
|
||||
/* U+0080..U+07FF: 2 bytes in UTF-8 */
|
||||
return 0x80 + (rand() % 0x800);
|
||||
case 1:
|
||||
/* U+0800..U+FFFF: 3 bytes in UTF-8 */
|
||||
return 0x800 + (rand() % 0x10000);
|
||||
case 2:
|
||||
/* U+10000..U+10FFFF: 4 bytes in UTF-8 */
|
||||
return 0x10000 + (rand() % 0x110000);
|
||||
default:
|
||||
/* NOTE: Higher probability for ASCII */
|
||||
/* U+0001..U+007F: 1 byte in UTF-8 */
|
||||
return 0x01 + (rand() % 0x80);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
test_encode_escape_sequences(struct xkb_context *ctx)
|
||||
{
|
||||
char *escaped;
|
||||
|
||||
/* Test empty string */
|
||||
escaped = escape_utf8_string_literal("");
|
||||
assert_streq_not_null("Empty string", "", escaped);
|
||||
free(escaped);
|
||||
|
||||
/* Test specific ASCII characters: ", \ */
|
||||
escaped = escape_utf8_string_literal("\"\\");
|
||||
assert_streq_not_null("Quote and backslash", "\\\"\\\\", escaped);
|
||||
free(escaped);
|
||||
|
||||
/* Test round-trip of random strings */
|
||||
# define SAMPLE_SIZE 1000
|
||||
# define MIN_CODE_POINT 0x0001
|
||||
# define MAX_CODE_POINTS_COUNT 15
|
||||
char buf[1 + MAX_CODE_POINTS_COUNT * 4];
|
||||
for (int ascii = 1; ascii >= 0; ascii--) {
|
||||
for (size_t s = 0; s < SAMPLE_SIZE; s++) {
|
||||
/* Create the string */
|
||||
size_t length = 1 + (rand() % MAX_CODE_POINTS_COUNT);
|
||||
size_t c = 0;
|
||||
for (size_t idx = 0; idx < length; idx++) {
|
||||
int nbytes;
|
||||
/* Get a random Unicode code point and encode it in UTF-8 */
|
||||
do {
|
||||
const uint32_t cp = random_non_null_unicode_char(ascii);
|
||||
nbytes = utf32_to_utf8(cp, &buf[c]);
|
||||
} while (!nbytes); /* Handle invalid code point in UTF-8 */
|
||||
c += nbytes - 1;
|
||||
assert(c <= sizeof(buf) - 1);
|
||||
}
|
||||
assert_printf(buf[c] == '\0', "NULL-terminated string\n");
|
||||
assert_printf(strlen(buf) == c, "Contains no NULL char\n");
|
||||
assert_printf(is_valid_utf8(buf, c),
|
||||
"Invalid input UTF-8 string: \"%s\"\n", buf);
|
||||
/* Escape the string */
|
||||
escaped = escape_utf8_string_literal(buf);
|
||||
if (!escaped)
|
||||
break;
|
||||
assert_printf(is_valid_utf8(escaped, strlen(escaped)),
|
||||
"Invalid input UTF-8 string: %s\n", escaped);
|
||||
char *string_literal = asprintf_safe("\"%s\"", escaped);
|
||||
if (!string_literal) {
|
||||
free(escaped);
|
||||
break;
|
||||
}
|
||||
/* Unescape the string */
|
||||
char *unescaped = parse_string_literal(ctx, string_literal);
|
||||
assert_streq_not_null("Escaped string", buf, unescaped);
|
||||
free(unescaped);
|
||||
free(string_literal);
|
||||
free(escaped);
|
||||
}
|
||||
}
|
||||
# undef SAMPLE_SIZE
|
||||
# undef MIN_CODE_POINT
|
||||
# undef MAX_CODE_POINTS_COUNT
|
||||
}
|
||||
|
||||
int
|
||||
|
@ -791,10 +898,20 @@ main(int argc, char *argv[])
|
|||
ctx = test_get_context(CONTEXT_NO_FLAG);
|
||||
assert(ctx);
|
||||
|
||||
/* Initialize pseudo-random generator with program arg or current time */
|
||||
int seed;
|
||||
if (argc == 2) {
|
||||
seed = atoi(argv[1]);
|
||||
} else {
|
||||
seed = time(NULL);
|
||||
}
|
||||
fprintf(stderr, "Seed for the pseudo-random generator: %d\n", seed);
|
||||
srand(seed);
|
||||
|
||||
/*
|
||||
* Ensure no environment variables but “top_srcdir” is set. This ensures
|
||||
* that user Compose file paths are unset before the tests and set
|
||||
* explicitely when necessary.
|
||||
* explicitly when necessary.
|
||||
*/
|
||||
#ifdef __linux__
|
||||
const char *srcdir = getenv("top_srcdir");
|
||||
|
@ -818,7 +935,8 @@ main(int argc, char *argv[])
|
|||
test_include(ctx);
|
||||
test_override(ctx);
|
||||
test_traverse(ctx);
|
||||
test_escape_sequences(ctx);
|
||||
test_decode_escape_sequences(ctx);
|
||||
test_encode_escape_sequences(ctx);
|
||||
|
||||
xkb_context_unref(ctx);
|
||||
return 0;
|
||||
|
|
10
test/test.h
10
test/test.h
|
@ -34,6 +34,16 @@
|
|||
/* Automake test exit code to signify SKIP (à la PASS, FAIL, etc). */
|
||||
#define SKIP_TEST 77
|
||||
|
||||
#define assert_printf(cond, ...) \
|
||||
if (!(cond)) { \
|
||||
fprintf(stderr, "Assertion failure: " __VA_ARGS__); \
|
||||
assert(cond); \
|
||||
}
|
||||
|
||||
#define assert_streq_not_null(test_name, expected, got) \
|
||||
assert_printf(streq_not_null(expected, got), \
|
||||
test_name ". Expected \"%s\", got: \"%s\"\n", expected, got)
|
||||
|
||||
/* The offset between KEY_* numbering, and keycodes in the XKB evdev
|
||||
* dataset. */
|
||||
#define EVDEV_OFFSET 8
|
||||
|
|
|
@ -25,12 +25,12 @@
|
|||
|
||||
#include <getopt.h>
|
||||
#include <locale.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "xkbcommon/xkbcommon.h"
|
||||
#include "xkbcommon/xkbcommon-keysyms.h"
|
||||
#include "xkbcommon/xkbcommon-compose.h"
|
||||
#include "src/compose/dump.h"
|
||||
|
||||
static void
|
||||
usage(FILE *fp, char *progname)
|
||||
|
@ -56,7 +56,7 @@ usage(FILE *fp, char *progname)
|
|||
);
|
||||
}
|
||||
|
||||
static void
|
||||
static bool
|
||||
print_compose_table_entry(struct xkb_compose_table_entry *entry)
|
||||
{
|
||||
size_t nsyms;
|
||||
|
@ -69,10 +69,17 @@ print_compose_table_entry(struct xkb_compose_table_entry *entry)
|
|||
printf(" ");
|
||||
}
|
||||
}
|
||||
printf(":");
|
||||
printf(" : ");
|
||||
const char *utf8 = xkb_compose_table_entry_utf8(entry);
|
||||
if (*utf8 != '\0') {
|
||||
printf(" \"%s\"", utf8);
|
||||
char *escaped = escape_utf8_string_literal(utf8);
|
||||
if (!escaped) {
|
||||
fprintf(stderr, "ERROR: Cannot escape the string: allocation error\n");
|
||||
return false;
|
||||
} else {
|
||||
printf(" \"%s\"", escaped);
|
||||
free(escaped);
|
||||
}
|
||||
}
|
||||
const xkb_keysym_t keysym = xkb_compose_table_entry_keysym(entry);
|
||||
if (keysym != XKB_KEY_NoSymbol) {
|
||||
|
@ -80,6 +87,7 @@ print_compose_table_entry(struct xkb_compose_table_entry *entry)
|
|||
printf(" %s", buf);
|
||||
}
|
||||
printf("\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
int
|
||||
|
@ -182,10 +190,15 @@ main(int argc, char *argv[])
|
|||
struct xkb_compose_table_iterator *iter = xkb_compose_table_iterator_new(compose_table);
|
||||
struct xkb_compose_table_entry *entry;
|
||||
while ((entry = xkb_compose_table_iterator_next(iter))) {
|
||||
print_compose_table_entry(entry);
|
||||
if (!print_compose_table_entry(entry)) {
|
||||
ret = EXIT_FAILURE;
|
||||
goto entry_error;
|
||||
}
|
||||
xkb_compose_table_iterator_free(iter);
|
||||
}
|
||||
ret = EXIT_SUCCESS;
|
||||
|
||||
entry_error:
|
||||
xkb_compose_table_iterator_free(iter);
|
||||
out:
|
||||
xkb_compose_table_unref(compose_table);
|
||||
file_error:
|
||||
|
|
Loading…
Reference in New Issue