diff --git a/Makefile.am b/Makefile.am index 0bae4ea..0f713ee 100644 --- a/Makefile.am +++ b/Makefile.am @@ -14,6 +14,7 @@ EXTRA_DIST = \ AM_CPPFLAGS = \ -DDFLT_XKB_CONFIG_ROOT='"$(XKBCONFIGROOT)"' \ + -DXLOCALEDIR='"$(XLOCALEDIR)"' \ -I$(top_srcdir)/src \ -I$(top_srcdir)/src/xkbcomp \ -I$(top_builddir)/src/xkbcomp \ @@ -32,11 +33,19 @@ xkbcommonincludedir = $(includedir)/xkbcommon xkbcommoninclude_HEADERS = \ xkbcommon/xkbcommon.h \ xkbcommon/xkbcommon-compat.h \ + xkbcommon/xkbcommon-compose.h \ xkbcommon/xkbcommon-keysyms.h \ xkbcommon/xkbcommon-names.h lib_LTLIBRARIES = libxkbcommon.la libxkbcommon_la_SOURCES = \ + src/compose/parser.c \ + src/compose/parser.h \ + src/compose/paths.c \ + src/compose/paths.h \ + src/compose/state.c \ + src/compose/table.c \ + src/compose/table.h \ src/xkbcomp/action.c \ src/xkbcomp/action.h \ src/xkbcomp/ast.h \ diff --git a/configure.ac b/configure.ac index 791e158..7449954 100644 --- a/configure.ac +++ b/configure.ac @@ -107,6 +107,14 @@ AC_ARG_WITH([xkb_config_root], [XKBCONFIGROOT="$xkb_base"]) AC_SUBST([XKBCONFIGROOT]) +# Define a configuration option for the X locale directory for compose +AC_ARG_WITH([x_locale_root], + [AS_HELP_STRING([--with-x-locale-root=], + [Set X locale root (default: $datadir/X11/locale)])], + [XLOCALEDIR="$withval"], + [XLOCALEDIR="$datadir/X11/locale"]) +AC_SUBST([XLOCALEDIR]) + AC_ARG_WITH([default_rules], [AS_HELP_STRING([--with-default-rules=], [Default XKB ruleset (default: evdev)])], @@ -189,4 +197,5 @@ AC_MSG_RESULT([ includedir: ${includedir} lib dir: ${libdir} XKB config root: ${XKBCONFIGROOT} + X11 locale root: ${XLOCALEDIR} ]) diff --git a/src/compose/parser.c b/src/compose/parser.c new file mode 100644 index 0000000..29229f3 --- /dev/null +++ b/src/compose/parser.c @@ -0,0 +1,634 @@ +/* + * Copyright © 2013 Ran Benita + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include + +#include "utils.h" +#include "scanner-utils.h" +#include "table.h" +#include "paths.h" +#include "utf8.h" +#include "parser.h" + +#define MAX_LHS_LEN 10 +#define MAX_INCLUDE_DEPTH 5 + +#define KEYSYM_FROM_NAME_CACHE_SIZE 8 + +/* + * xkb_keysym_from_name() is fairly slow, because for internal reasons + * it must use strcasecmp(). + * A small cache reduces about 20% from the compilation time of + * en_US.UTF-8/Compose. + */ +struct keysym_from_name_cache { + struct { + char name[64]; + xkb_keysym_t keysym; + } cache[KEYSYM_FROM_NAME_CACHE_SIZE]; + unsigned next; +}; + +static xkb_keysym_t +cached_keysym_from_name(struct keysym_from_name_cache *cache, + const char *name, size_t len) +{ + xkb_keysym_t keysym; + + if (len >= sizeof(cache->cache[0].name)) + return XKB_KEY_NoSymbol; + + for (unsigned i = 0; i < KEYSYM_FROM_NAME_CACHE_SIZE; i++) + if (streq(cache->cache[i].name, name)) + return cache->cache[i].keysym; + + keysym = xkb_keysym_from_name(name, XKB_KEYSYM_NO_FLAGS); + strcpy(cache->cache[cache->next].name, name); + cache->cache[cache->next].keysym = keysym; + cache->next = (cache->next + 1) % KEYSYM_FROM_NAME_CACHE_SIZE; + return keysym; +} + +/* + * Grammar adapted from libX11/modules/im/ximcp/imLcPrs.c. + * See also the XCompose(5) manpage. + * + * We don't support the MODIFIER rules, which are commented out. + * + * FILE ::= { [PRODUCTION] [COMMENT] "\n" | INCLUDE } + * INCLUDE ::= "include" '"' INCLUDE_STRING '"' + * PRODUCTION ::= LHS ":" RHS [ COMMENT ] + * COMMENT ::= "#" {} + * LHS ::= EVENT { EVENT } + * EVENT ::= "<" keysym ">" + * # EVENT ::= [MODIFIER_LIST] "<" keysym ">" + * # MODIFIER_LIST ::= ("!" {MODIFIER} ) | "None" + * # MODIFIER ::= ["~"] modifier_name + * RHS ::= ( STRING | keysym | STRING keysym ) + * STRING ::= '"' { CHAR } '"' + * CHAR ::= GRAPHIC_CHAR | ESCAPED_CHAR + * GRAPHIC_CHAR ::= locale (codeset) dependent code + * ESCAPED_CHAR ::= ('\\' | '\"' | OCTAL | HEX ) + * OCTAL ::= '\' OCTAL_CHAR [OCTAL_CHAR [OCTAL_CHAR]] + * OCTAL_CHAR ::= (0|1|2|3|4|5|6|7) + * HEX ::= '\' (x|X) HEX_CHAR [HEX_CHAR]] + * HEX_CHAR ::= (0|1|2|3|4|5|6|7|8|9|A|B|C|D|E|F|a|b|c|d|e|f) + * + * INCLUDE_STRING is a filesystem path, with the following %-expansions: + * %% - '%'. + * %H - The user's home directory (the $HOME environment variable). + * %L - The name of the locale specific Compose file (e.g., + * "/usr/share/X11/locale//Compose"). + * %S - The name of the system directory for Compose files (e.g., + * "/usr/share/X11/locale"). + */ + +enum rules_token { + TOK_END_OF_FILE = 0, + TOK_END_OF_LINE, + TOK_INCLUDE, + TOK_INCLUDE_STRING, + TOK_LHS_KEYSYM, + TOK_COLON, + TOK_STRING, + TOK_RHS_KEYSYM, + TOK_ERROR +}; + +/* Values returned with some tokens, like yylval. */ +union lvalue { + const char *string; + xkb_keysym_t keysym; +}; + +static enum rules_token +lex(struct scanner *s, union lvalue *val) +{ + struct keysym_from_name_cache *cache = s->priv; + +skip_more_whitespace_and_comments: + /* Skip spaces. */ + while (is_space(peek(s))) + if (next(s) == '\n') + return TOK_END_OF_LINE; + + /* Skip comments. */ + if (chr(s, '#')) { + while (!eof(s) && !eol(s)) next(s); + goto skip_more_whitespace_and_comments; + } + + /* See if we're done. */ + if (eof(s)) return TOK_END_OF_FILE; + + /* New token. */ + s->token_line = s->line; + s->token_column = s->column; + s->buf_pos = 0; + + /* LHS Keysym. */ + if (chr(s, '<')) { + while (peek(s) != '>' && !eol(s)) + buf_append(s, next(s)); + if (!chr(s, '>')) { + scanner_err(s, "unterminated keysym literal"); + return TOK_ERROR; + } + if (!buf_append(s, '\0')) { + scanner_err(s, "keysym literal is too long"); + return TOK_ERROR; + } + val->keysym = cached_keysym_from_name(cache, s->buf, s->buf_pos); + if (val->keysym == XKB_KEY_NoSymbol) { + scanner_err(s, "unrecognized keysym \"%s\" on left-hand side", s->buf); + return TOK_ERROR; + } + return TOK_LHS_KEYSYM; + } + + /* Colon. */ + if (chr(s, ':')) + return TOK_COLON; + + /* String literal. */ + if (chr(s, '\"')) { + while (!eof(s) && !eol(s) && peek(s) != '\"') { + if (chr(s, '\\')) { + uint8_t o; + if (chr(s, '\\')) { + buf_append(s, '\\'); + } + else if (chr(s, '"')) { + buf_append(s, '"'); + } + else if (chr(s, 'x') || chr(s, 'X')) { + if (hex(s, &o)) + buf_append(s, (char) o); + else + scanner_warn(s, "illegal hexadecimal escape sequence in string literal"); + } + else if (oct(s, &o)) { + buf_append(s, (char) o); + } + else { + scanner_warn(s, "unknown escape sequence (%c) in string literal", peek(s)); + /* Ignore. */ + } + } else { + buf_append(s, next(s)); + } + } + if (!chr(s, '\"')) { + scanner_err(s, "unterminated string literal"); + return TOK_ERROR; + } + if (!buf_append(s, '\0')) { + scanner_err(s, "string literal is too long"); + return TOK_ERROR; + } + if (!is_valid_utf8(s->buf, s->buf_pos - 1)) { + scanner_err(s, "string literal is not a valid UTF-8 string"); + return TOK_ERROR; + } + val->string = s->buf; + return TOK_STRING; + } + + /* RHS keysym or include. */ + if (is_alpha(peek(s)) || peek(s) == '_') { + s->buf_pos = 0; + while (is_alnum(peek(s)) || peek(s) == '_') + buf_append(s, next(s)); + if (!buf_append(s, '\0')) { + scanner_err(s, "identifier is too long"); + return TOK_ERROR; + } + + if (streq(s->buf, "include")) + return TOK_INCLUDE; + + val->keysym = cached_keysym_from_name(cache, s->buf, s->buf_pos); + if (val->keysym == XKB_KEY_NoSymbol) { + scanner_err(s, "unrecognized keysym \"%s\" on right-hand side", s->buf); + return TOK_ERROR; + } + return TOK_RHS_KEYSYM; + } + + /* Skip line. */ + while (!eof(s) && !eol(s)) + next(s); + + scanner_err(s, "unrecognized token"); + return TOK_ERROR; +} + +static enum rules_token +lex_include_string(struct scanner *s, struct xkb_compose_table *table, + union lvalue *val_out) +{ + while (is_space(peek(s))) + if (next(s) == '\n') + return TOK_END_OF_LINE; + + s->token_line = s->line; + s->token_column = s->column; + s->buf_pos = 0; + + if (!chr(s, '\"')) { + scanner_err(s, "include statement must be followed by a path"); + return TOK_ERROR; + } + + while (!eof(s) && !eol(s) && peek(s) != '\"') { + if (chr(s, '%')) { + if (chr(s, '%')) { + buf_append(s, '%'); + } + else if (chr(s, 'H')) { + const char *home = secure_getenv("HOME"); + if (!home) { + scanner_err(s, "%%H was used in an include statement, but the HOME environment variable is not set"); + return TOK_ERROR; + } + if (!buf_appends(s, home)) { + scanner_err(s, "include path after expanding %%H is too long"); + return TOK_ERROR; + } + } + else if (chr(s, 'L')) { + char *path = get_locale_compose_file_path(table->locale); + if (!path) { + scanner_err(s, "failed to expand %%L to the locale Compose file"); + return TOK_ERROR; + } + if (!buf_appends(s, path)) { + free(path); + scanner_err(s, "include path after expanding %%L is too long"); + return TOK_ERROR; + } + free(path); + } + else if (chr(s, 'S')) { + const char *xlocaledir = get_xlocaledir_path(); + if (!buf_appends(s, xlocaledir)) { + scanner_err(s, "include path after expanding %%S is too long"); + return TOK_ERROR; + } + } + else { + scanner_err(s, "unknown %% format (%c) in include statement", peek(s)); + return TOK_ERROR; + } + } else { + buf_append(s, next(s)); + } + } + if (!chr(s, '\"')) { + scanner_err(s, "unterminated include statement"); + return TOK_ERROR; + } + if (!buf_append(s, '\0')) { + scanner_err(s, "include path is too long"); + return TOK_ERROR; + } + val_out->string = s->buf; + return TOK_INCLUDE_STRING; +} + +struct production { + xkb_keysym_t lhs[MAX_LHS_LEN]; + unsigned int len; + xkb_keysym_t keysym; + char string[256]; + bool has_keysym; + bool has_string; +}; + +static uint32_t +add_node(struct xkb_compose_table *table, xkb_keysym_t keysym) +{ + struct compose_node new = { + .keysym = keysym, + .next = 0, + .is_leaf = true, + }; + darray_append(table->nodes, new); + return darray_size(table->nodes) - 1; +} + +static void +add_production(struct xkb_compose_table *table, struct scanner *s, + const struct production *production) +{ + unsigned lhs_pos; + uint32_t curr; + struct compose_node *node; + + curr = 0; + node = &darray_item(table->nodes, curr); + + /* + * Insert the sequence to the trie, creating new nodes as needed. + * + * TODO: This can be sped up a bit by first trying the path that the + * previous production took, and only then doing the linear search + * through the trie levels. This will work because sequences in the + * Compose files are often clustered by a common prefix; especially + * in the 1st and 2nd keysyms, which is where the largest variation + * (thus, longest search) is. + */ + for (lhs_pos = 0; lhs_pos < production->len; lhs_pos++) { + while (production->lhs[lhs_pos] != node->keysym) { + if (node->next == 0) { + uint32_t next = add_node(table, production->lhs[lhs_pos]); + /* Refetch since add_node could have realloc()ed. */ + node = &darray_item(table->nodes, curr); + node->next = next; + } + + curr = node->next; + node = &darray_item(table->nodes, curr); + } + + if (lhs_pos + 1 == production->len) + break; + + if (node->is_leaf) { + if (node->u.leaf.utf8 != 0 || + node->u.leaf.keysym != XKB_KEY_NoSymbol) { + scanner_warn(s, "a sequence already exists which is a prefix of this sequence; overriding"); + node->u.leaf.utf8 = 0; + node->u.leaf.keysym = XKB_KEY_NoSymbol; + } + + { + uint32_t successor = add_node(table, production->lhs[lhs_pos + 1]); + /* Refetch since add_node could have realloc()ed. */ + node = &darray_item(table->nodes, curr); + node->is_leaf = false; + node->u.successor = successor; + } + } + + curr = node->u.successor; + node = &darray_item(table->nodes, curr); + } + + if (!node->is_leaf) { + scanner_warn(s, "this compose sequence is a prefix of another; skipping line"); + return; + } + + if (node->u.leaf.utf8 != 0 || node->u.leaf.keysym != XKB_KEY_NoSymbol) { + if (streq(&darray_item(table->utf8, node->u.leaf.utf8), + production->string) && + node->u.leaf.keysym == production->keysym) { + scanner_warn(s, "this compose sequence is a duplicate of another; skipping line"); + return; + } + scanner_warn(s, "this compose sequence already exists; overriding"); + } + + if (production->has_string) { + node->u.leaf.utf8 = darray_size(table->utf8); + darray_append_items(table->utf8, production->string, + strlen(production->string) + 1); + } + if (production->has_keysym) { + node->u.leaf.keysym = production->keysym; + } +} + +static bool +parse(struct xkb_compose_table *table, struct scanner *s, + unsigned include_depth); + +static bool +do_include(struct xkb_compose_table *table, struct scanner *s, + const char *path, unsigned include_depth) +{ + FILE *file; + bool ok; + const char *string; + size_t size; + struct scanner new_s; + + if (include_depth >= MAX_INCLUDE_DEPTH) { + scanner_err(s, "maximum include depth (%d) exceeded; maybe there is an include loop?", + MAX_INCLUDE_DEPTH); + return false; + } + + file = fopen(s->buf, "r"); + if (!file) { + scanner_err(s, "failed to open included Compose file \"%s\": %s", + path, strerror(errno)); + return false; + } + + ok = map_file(file, &string, &size); + if (!ok) { + scanner_err(s, "failed to read included Compose file \"%s\": %s", + path, strerror(errno)); + goto err_file; + } + + scanner_init(&new_s, table->ctx, string, size, path); + + ok = parse(table, &new_s, include_depth + 1); + if (!ok) + goto err_unmap; + +err_unmap: + unmap_file(string, size); +err_file: + fclose(file); + return ok; +} + +static bool +parse(struct xkb_compose_table *table, struct scanner *s, + unsigned include_depth) +{ + enum rules_token tok; + union lvalue val; + struct production production; + enum { MAX_ERRORS = 10 }; + int num_errors = 0; + +initial: + production.len = 0; + production.has_keysym = false; + production.has_string = false; + + /* fallthrough */ + +initial_eol: + switch (tok = lex(s, &val)) { + case TOK_END_OF_LINE: + goto initial_eol; + case TOK_END_OF_FILE: + goto finished; + case TOK_INCLUDE: + goto include; + case TOK_LHS_KEYSYM: + production.lhs[production.len++] = val.keysym; + goto lhs; + default: + goto unexpected; + } + +include: + switch (tok = lex_include_string(s, table, &val)) { + case TOK_INCLUDE_STRING: + goto include_eol; + default: + goto unexpected; + } + +include_eol: + switch (tok = lex(s, &val)) { + case TOK_END_OF_LINE: + if (!do_include(table, s, val.string, include_depth)) + goto fail; + goto initial; + default: + goto unexpected; + } + +lhs: + switch (tok = lex(s, &val)) { + case TOK_LHS_KEYSYM: + if (production.len + 1 > MAX_LHS_LEN) { + scanner_warn(s, "too many keysyms (%d) on left-hand side; skipping line", + MAX_LHS_LEN + 1); + goto skip; + } + production.lhs[production.len++] = val.keysym; + goto lhs; + case TOK_COLON: + if (production.len <= 0) { + scanner_warn(s, "expected at least one keysym on left-hand side; skipping line"); + goto skip; + } + goto rhs; + default: + goto unexpected; + } + +rhs: + switch (tok = lex(s, &val)) { + case TOK_STRING: + if (production.has_string) { + scanner_warn(s, "right-hand side can have at most one string; skipping line"); + goto skip; + } + if (*val.string == '\0') { + scanner_warn(s, "right-hand side string must not be empty; skipping line"); + goto skip; + } + if (strlen(val.string) >= sizeof(production.string)) { + scanner_warn(s, "right-hand side string is too long; skipping line"); + goto skip; + } + strcpy(production.string, val.string); + production.has_string = true; + goto rhs; + case TOK_RHS_KEYSYM: + if (production.has_keysym) { + scanner_warn(s, "right-hand side can have at most one keysym; skipping line"); + goto skip; + } + production.keysym = val.keysym; + production.has_keysym = true; + case TOK_END_OF_LINE: + if (!production.has_string && !production.has_keysym) { + scanner_warn(s, "right-hand side must have at least one of string or keysym; skipping line"); + goto skip; + } + add_production(table, s, &production); + goto initial; + default: + goto unexpected; + } + +unexpected: + if (tok != TOK_ERROR) + scanner_err(s, "unexpected token"); + + num_errors++; + if (num_errors <= MAX_ERRORS) + goto skip; + + scanner_err(s, "too many errors"); + goto fail; + +fail: + scanner_err(s, "failed to parse file"); + return false; + +skip: + while (tok != TOK_END_OF_LINE && tok != TOK_END_OF_FILE) + tok = lex(s, &val); + goto initial; + +finished: + return true; +} + +bool +parse_string(struct xkb_compose_table *table, const char *string, size_t len, + const char *file_name) +{ + struct scanner s; + struct keysym_from_name_cache cache; + scanner_init(&s, table->ctx, string, len, file_name); + memset(&cache, 0, sizeof(cache)); + s.priv = &cache; + if (!parse(table, &s, 0)) + return false; + /* Maybe the allocator can use the excess space. */ + darray_shrink(table->nodes); + darray_shrink(table->utf8); + return true; +} + +bool +parse_file(struct xkb_compose_table *table, FILE *file, const char *file_name) +{ + bool ok; + const char *string; + size_t size; + + ok = map_file(file, &string, &size); + if (!ok) { + log_err(table->ctx, "Couldn't read Compose file %s: %s\n", + file_name, strerror(errno)); + return false; + } + + ok = parse_string(table, string, size, file_name); + unmap_file(string, size); + return ok; +} diff --git a/src/compose/parser.h b/src/compose/parser.h new file mode 100644 index 0000000..3f64a07 --- /dev/null +++ b/src/compose/parser.h @@ -0,0 +1,36 @@ +/* + * Copyright © 2013 Ran Benita + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef COMPOSE_PARSER_H +#define COMPOSE_PARSER_H + +bool +parse_string(struct xkb_compose_table *table, + const char *string, size_t len, + const char *file_name); + +bool +parse_file(struct xkb_compose_table *table, + FILE *file, const char *file_name); + +#endif diff --git a/src/compose/paths.c b/src/compose/paths.c new file mode 100644 index 0000000..c96d7d2 --- /dev/null +++ b/src/compose/paths.c @@ -0,0 +1,204 @@ +/* + * Copyright © 2014 Ran Benita + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "utils.h" +#include "paths.h" + +enum resolve_name_direction { + LEFT_TO_RIGHT, + RIGHT_TO_LEFT, +}; + +const char * +get_xlocaledir_path(void) +{ + const char *dir = secure_getenv("XLOCALEDIR"); + if (!dir) + dir = XLOCALEDIR; + return dir; +} + +/* + * Files like compose.dir have the format LEFT: RIGHT. Lookup @name in + * such a file and return its matching value, according to @direction. + * @filename is relative to the xlocaledir. + */ +static char * +resolve_name(const char *filename, enum resolve_name_direction direction, + const char *name) +{ + int ret; + bool ok; + const char *xlocaledir; + char path[512]; + FILE *file; + const char *string, *end; + size_t string_size; + const char *s, *left, *right; + char *match; + size_t left_len, right_len, name_len; + + xlocaledir = get_xlocaledir_path(); + + ret = snprintf(path, sizeof(path), "%s/%s", xlocaledir, filename); + if (ret < 0 || (size_t) ret >= sizeof(path)) + return false; + + file = fopen(path, "r"); + if (!file) + return false; + + ok = map_file(file, &string, &string_size); + fclose(file); + if (!ok) + return false; + + s = string; + end = string + string_size; + name_len = strlen(name); + match = NULL; + + while (s < end) { + /* Skip spaces. */ + while (s < end && is_space(*s)) + s++; + + /* Skip comments. */ + if (s < end && *s == '#') { + while (s < end && *s != '\n') + s++; + continue; + } + + /* Get the left value. */ + left = s; + while (s < end && !is_space(*s) && *s != ':') + s++; + left_len = s - left; + + /* There's an optional colon between left and right. */ + if (s < end && *s == ':') + s++; + + /* Skip spaces. */ + while (s < end && is_space(*s)) + s++; + + /* Get the right value. */ + right = s; + while (s < end && !is_space(*s)) + s++; + right_len = s - right; + + /* Discard rest of line. */ + while (s < end && *s != '\n') + s++; + + if (direction == LEFT_TO_RIGHT) { + if (left_len == name_len && strncmp(left, name, left_len) == 0) { + match = strndup(right, right_len); + break; + } + } + else if (direction == RIGHT_TO_LEFT) { + if (right_len == name_len && strncmp(right, name, right_len) == 0) { + match = strndup(left, left_len); + break; + } + } + } + + unmap_file(string, string_size); + return match; +} + +char * +resolve_locale(const char *locale) +{ + char *alias = resolve_name("locale.alias", LEFT_TO_RIGHT, locale); + return alias ? alias : strdup(locale); +} + +const char * +get_xcomposefile_path(void) +{ + return secure_getenv("XCOMPOSEFILE"); +} + +char * +get_home_xcompose_file_path(void) +{ + int ret; + const char *home; + char *path; + + home = secure_getenv("HOME"); + if (!home) + return NULL; + + ret = asprintf(&path, "%s/.XCompose", home); + if (ret <0) + return NULL; + + return path; +} + +char * +get_locale_compose_file_path(const char *locale) +{ + int ret; + const char *xlocaledir; + char *resolved; + char *path; + + /* + * WARNING: Random workaround ahead. + * + * We currently do not support non-UTF-8 Compose files. The C/POSIX + * locale is specified to be the default fallback locale with an + * ASCII charset. But for some reason the compose.dir points the C + * locale to the iso8859-1/Compose file, which is not ASCII but + * ISO8859-1. Since this is bound to happen a lot, and since our API + * is UTF-8 based, and since 99% of the time a C locale is really just + * a misconfiguration for UTF-8, let's do the most helpful thing. + */ + if (streq(locale, "C")) + locale = "en_US.UTF-8"; + + resolved = resolve_name("compose.dir", RIGHT_TO_LEFT, locale); + if (!resolved) + return NULL; + + if (resolved[0] == '/') { + path = resolved; + } + else { + xlocaledir = get_xlocaledir_path(); + ret = asprintf(&path, "%s/%s", xlocaledir, resolved); + free(resolved); + if (ret < 0) + return NULL; + } + + return path; +} diff --git a/src/compose/paths.h b/src/compose/paths.h new file mode 100644 index 0000000..1d719af --- /dev/null +++ b/src/compose/paths.h @@ -0,0 +1,42 @@ +/* + * Copyright © 2014 Ran Benita + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef COMPOSE_RESOLVE_H +#define COMPOSE_RESOLVE_H + +char * +resolve_locale(const char *locale); + +const char * +get_xlocaledir_path(void); + +const char * +get_xcomposefile_path(void); + +char * +get_home_xcompose_file_path(void); + +char * +get_locale_compose_file_path(const char *locale); + +#endif diff --git a/src/compose/state.c b/src/compose/state.c new file mode 100644 index 0000000..8657ff7 --- /dev/null +++ b/src/compose/state.c @@ -0,0 +1,196 @@ +/* + * Copyright © 2013 Ran Benita + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "table.h" +#include "utils.h" +#include "keysym.h" + +struct xkb_compose_state { + int refcnt; + enum xkb_compose_state_flags flags; + struct xkb_compose_table *table; + + /* + * Offsets into xkb_compose_table::nodes. + * + * They maintain the current and previous position in the trie; see + * xkb_compose_state_feed(). + * + * This is also sufficient for inferring the current status; see + * xkb_compose_state_get_status(). + */ + uint32_t prev_context; + uint32_t context; +}; + +XKB_EXPORT struct xkb_compose_state * +xkb_compose_state_new(struct xkb_compose_table *table, + enum xkb_compose_state_flags flags) +{ + struct xkb_compose_state *state; + + state = calloc(1, sizeof(*state)); + if (!state) + return NULL; + + state->refcnt = 1; + state->table = xkb_compose_table_ref(table); + + state->flags = flags; + state->prev_context = 0; + state->context = 0; + + return state; +} + +XKB_EXPORT struct xkb_compose_state * +xkb_compose_state_ref(struct xkb_compose_state *state) +{ + state->refcnt++; + return state; +} + +XKB_EXPORT void +xkb_compose_state_unref(struct xkb_compose_state *state) +{ + if (!state || --state->refcnt > 0) + return; + + xkb_compose_table_unref(state->table); + free(state); +} + +XKB_EXPORT struct xkb_compose_table * +xkb_compose_state_get_compose_table(struct xkb_compose_state *state) +{ + return state->table; +} + +XKB_EXPORT enum xkb_compose_feed_result +xkb_compose_state_feed(struct xkb_compose_state *state, xkb_keysym_t keysym) +{ + uint32_t context; + const struct compose_node *node; + + /* + * Modifiers do not affect the sequence directly. In particular, + * they do not cancel a sequence; otherwise it'd be impossible to + * have a sequence like (needs Shift in the middle). + * + * The following test is not really accurate - in order to test if + * a key is "modifier key", we really need the keymap, but we don't + * have it here. However, this is (approximately) what libX11 does + * as well. + */ + if (xkb_keysym_is_modifier(keysym)) + return XKB_COMPOSE_FEED_IGNORED; + + node = &darray_item(state->table->nodes, state->context); + + context = (node->is_leaf ? 0 : node->u.successor); + node = &darray_item(state->table->nodes, context); + + while (node->keysym != keysym && node->next != 0) { + context = node->next; + node = &darray_item(state->table->nodes, context); + } + + if (node->keysym != keysym) + context = 0; + + state->prev_context = state->context; + state->context = context; + return XKB_COMPOSE_FEED_ACCEPTED; +} + +XKB_EXPORT void +xkb_compose_state_reset(struct xkb_compose_state *state) +{ + state->prev_context = 0; + state->context = 0; +} + +XKB_EXPORT enum xkb_compose_status +xkb_compose_state_get_status(struct xkb_compose_state *state) +{ + const struct compose_node *prev_node, *node; + + prev_node = &darray_item(state->table->nodes, state->prev_context); + node = &darray_item(state->table->nodes, state->context); + + if (state->context == 0 && !prev_node->is_leaf) + return XKB_COMPOSE_CANCELLED; + + if (state->context == 0) + return XKB_COMPOSE_NOTHING; + + if (!node->is_leaf) + return XKB_COMPOSE_COMPOSING; + + return XKB_COMPOSE_COMPOSED; +} + +XKB_EXPORT int +xkb_compose_state_get_utf8(struct xkb_compose_state *state, + char *buffer, size_t size) +{ + const struct compose_node *node = + &darray_item(state->table->nodes, state->context); + + if (!node->is_leaf) + goto fail; + + /* If there's no string specified, but only a keysym, try to do the + * most helpful thing. */ + if (node->u.leaf.utf8 == 0 && node->u.leaf.keysym != XKB_KEY_NoSymbol) { + char name[64]; + int ret; + + ret = xkb_keysym_to_utf8(node->u.leaf.keysym, name, sizeof(name)); + if (ret < 0 || ret == 0) { + /* ret < 0 is impossible. + * ret == 0 means the keysym has no string representation. */ + goto fail; + } + + return snprintf(buffer, size, "%s", name); + } + + return snprintf(buffer, size, "%s", + &darray_item(state->table->utf8, node->u.leaf.utf8)); + +fail: + if (size > 0) + buffer[0] = '\0'; + return 0; +} + +XKB_EXPORT xkb_keysym_t +xkb_compose_state_get_one_sym(struct xkb_compose_state *state) +{ + const struct compose_node *node = + &darray_item(state->table->nodes, state->context); + if (!node->is_leaf) + return XKB_KEY_NoSymbol; + return node->u.leaf.keysym; +} diff --git a/src/compose/table.c b/src/compose/table.c new file mode 100644 index 0000000..5cd8415 --- /dev/null +++ b/src/compose/table.c @@ -0,0 +1,219 @@ +/* + * Copyright © 2013 Ran Benita + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "utils.h" +#include "table.h" +#include "parser.h" +#include "paths.h" + +static struct xkb_compose_table * +xkb_compose_table_new(struct xkb_context *ctx, + const char *locale, + enum xkb_compose_format format, + enum xkb_compose_compile_flags flags) +{ + char *resolved_locale; + struct xkb_compose_table *table; + struct compose_node root; + + resolved_locale = resolve_locale(locale); + if (!resolved_locale) + return NULL; + + table = calloc(1, sizeof(*table)); + if (!table) { + free(resolved_locale); + return NULL; + } + + table->refcnt = 1; + table->ctx = xkb_context_ref(ctx); + + table->locale = resolved_locale; + table->format = format; + table->flags = flags; + + darray_init(table->nodes); + darray_init(table->utf8); + + root.keysym = XKB_KEY_NoSymbol; + root.next = 0; + root.is_leaf = true; + root.u.leaf.utf8 = 0; + root.u.leaf.keysym = XKB_KEY_NoSymbol; + darray_append(table->nodes, root); + + darray_append(table->utf8, '\0'); + + return table; +} + +XKB_EXPORT struct xkb_compose_table * +xkb_compose_table_ref(struct xkb_compose_table *table) +{ + table->refcnt++; + return table; +} + +XKB_EXPORT void +xkb_compose_table_unref(struct xkb_compose_table *table) +{ + if (!table || --table->refcnt > 0) + return; + free(table->locale); + darray_free(table->nodes); + darray_free(table->utf8); + xkb_context_unref(table->ctx); + free(table); +} + +XKB_EXPORT struct xkb_compose_table * +xkb_compose_table_new_from_file(struct xkb_context *ctx, + FILE *file, + const char *locale, + enum xkb_compose_format format, + enum xkb_compose_compile_flags flags) +{ + struct xkb_compose_table *table; + bool ok; + + if (flags & ~(XKB_COMPOSE_COMPILE_NO_FLAGS)) { + log_err_func(ctx, "unrecognized flags: %#x\n", flags); + return NULL; + } + + if (format != XKB_COMPOSE_FORMAT_TEXT_V1) { + log_err_func(ctx, "unsupported compose format: %d\n", format); + return NULL; + } + + table = xkb_compose_table_new(ctx, locale, format, flags); + if (!table) + return NULL; + + ok = parse_file(table, file, "(unknown file)"); + if (!ok) { + xkb_compose_table_unref(table); + return NULL; + } + + return table; +} + +XKB_EXPORT struct xkb_compose_table * +xkb_compose_table_new_from_buffer(struct xkb_context *ctx, + const char *buffer, size_t length, + const char *locale, + enum xkb_compose_format format, + enum xkb_compose_compile_flags flags) +{ + struct xkb_compose_table *table; + bool ok; + + if (flags & ~(XKB_COMPOSE_COMPILE_NO_FLAGS)) { + log_err_func(ctx, "unrecognized flags: %#x\n", flags); + return NULL; + } + + if (format != XKB_COMPOSE_FORMAT_TEXT_V1) { + log_err_func(ctx, "unsupported compose format: %d\n", format); + return NULL; + } + + table = xkb_compose_table_new(ctx, locale, format, flags); + if (!table) + return NULL; + + ok = parse_string(table, buffer, length, "(input string)"); + if (!ok) { + xkb_compose_table_unref(table); + return NULL; + } + + return table; +} + +XKB_EXPORT struct xkb_compose_table * +xkb_compose_table_new_from_locale(struct xkb_context *ctx, + const char *locale, + enum xkb_compose_compile_flags flags) +{ + struct xkb_compose_table *table; + char *path = NULL; + const char *cpath; + FILE *file; + bool ok; + + if (flags & ~(XKB_COMPOSE_COMPILE_NO_FLAGS)) { + log_err_func(ctx, "unrecognized flags: %#x\n", flags); + return NULL; + } + + table = xkb_compose_table_new(ctx, locale, XKB_COMPOSE_FORMAT_TEXT_V1, + flags); + if (!table) + return NULL; + + cpath = get_xcomposefile_path(); + if (cpath) { + file = fopen(cpath, "r"); + if (file) + goto found_path; + } + + cpath = path = get_home_xcompose_file_path(); + if (path) { + file = fopen(path, "r"); + if (file) + goto found_path; + } + free(path); + path = NULL; + + cpath = path = get_locale_compose_file_path(table->locale); + if (path) { + file = fopen(path, "r"); + if (file) + goto found_path; + } + free(path); + path = NULL; + + log_err(ctx, "couldn't find a Compose file for locale \"%s\"\n", locale); + xkb_compose_table_unref(table); + return NULL; + +found_path: + ok = parse_file(table, file, cpath); + fclose(file); + if (!ok) { + xkb_compose_table_unref(table); + return NULL; + } + + log_dbg(ctx, "created compose table from locale %s with path %s\n", + table->locale, path); + + free(path); + return table; +} diff --git a/src/compose/table.h b/src/compose/table.h new file mode 100644 index 0000000..05a415f --- /dev/null +++ b/src/compose/table.h @@ -0,0 +1,100 @@ +/* + * Copyright © 2013 Ran Benita + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef COMPOSE_COMPOSE_H +#define COMPOSE_COMPOSE_H + +#include "xkbcommon/xkbcommon-compose.h" +#include "utils.h" +#include "context.h" + +/* + * The compose table data structure is a simple trie. An example will + * help. Given these sequences: + * + * : "first" dead_a + * : "second" dead_b + * : "third" dead_c + * + * the trie would look like: + * + * [root] ---> [] -----------------> [] -# + * | | | + * # v v + * [] ---> [] -# [] -# + * | | - + * # v # + * [] -# + * | + * # + * where: + * - [root] is a special empty root node. + * - [] is a node for a sequence keysym . + * - right arrows are `next` pointers. + * - down arrows are `successor` pointers. + * - # is a nil pointer. + * + * The nodes are all kept in a contiguous array. Pointers are represented + * as integer offsets into this array. A nil pointer is represented as 0 + * (which, helpfully, is the offset of the empty root node). + * + * Nodes without a successor are leaf nodes. Since a sequence cannot be a + * prefix of another, these are exactly the nodes which terminate the + * sequences (in a bijective manner). + * + * A leaf contains the result data of its sequence. The result keysym is + * contained in the node struct itself; the result UTF-8 string is a byte + * offset into an array of the form "\0first\0second\0third" (the initial + * \0 is so offset 0 points to an empty string). + */ + +struct compose_node { + xkb_keysym_t keysym; + /* Offset into xkb_compose_table::nodes. */ + unsigned int next:31; + bool is_leaf:1; + + union { + /* Offset into xkb_compose_table::nodes. */ + uint32_t successor; + struct { + /* Offset into xkb_compose_table::utf8. */ + uint32_t utf8; + xkb_keysym_t keysym; + } leaf; + } u; +}; + +struct xkb_compose_table { + int refcnt; + enum xkb_compose_format format; + enum xkb_compose_compile_flags flags; + struct xkb_context *ctx; + + char *locale; + + darray_char utf8; + darray(struct compose_node) nodes; +}; + +#endif diff --git a/xkbcommon.map b/xkbcommon.map index 28122b4..3bbb13e 100644 --- a/xkbcommon.map +++ b/xkbcommon.map @@ -68,6 +68,20 @@ global: xkb_state_layout_index_is_active; xkb_state_led_name_is_active; xkb_state_led_index_is_active; + xkb_compose_table_new_from_locale; + xkb_compose_table_new_from_file; + xkb_compose_table_new_from_buffer; + xkb_compose_table_ref; + xkb_compose_table_unref; + xkb_compose_state_new; + xkb_compose_state_ref; + xkb_compose_state_unref; + xkb_compose_state_get_compose_table; + xkb_compose_state_feed; + xkb_compose_state_reset; + xkb_compose_state_get_status; + xkb_compose_state_get_utf8; + xkb_compose_state_get_one_sym; local: *; };