compose: add xkbcommon-compose - implementation

Signed-off-by: Ran Benita <ran234@gmail.com>
master
Ran Benita 2014-09-12 18:44:30 +03:00
parent 046c802e96
commit edc98b5403
10 changed files with 1463 additions and 0 deletions

View File

@ -14,6 +14,7 @@ EXTRA_DIST = \
AM_CPPFLAGS = \
-DDFLT_XKB_CONFIG_ROOT='"$(XKBCONFIGROOT)"' \
-DXLOCALEDIR='"$(XLOCALEDIR)"' \
-I$(top_srcdir)/src \
-I$(top_srcdir)/src/xkbcomp \
-I$(top_builddir)/src/xkbcomp \
@ -32,11 +33,19 @@ xkbcommonincludedir = $(includedir)/xkbcommon
xkbcommoninclude_HEADERS = \
xkbcommon/xkbcommon.h \
xkbcommon/xkbcommon-compat.h \
xkbcommon/xkbcommon-compose.h \
xkbcommon/xkbcommon-keysyms.h \
xkbcommon/xkbcommon-names.h
lib_LTLIBRARIES = libxkbcommon.la
libxkbcommon_la_SOURCES = \
src/compose/parser.c \
src/compose/parser.h \
src/compose/paths.c \
src/compose/paths.h \
src/compose/state.c \
src/compose/table.c \
src/compose/table.h \
src/xkbcomp/action.c \
src/xkbcomp/action.h \
src/xkbcomp/ast.h \

View File

@ -107,6 +107,14 @@ AC_ARG_WITH([xkb_config_root],
[XKBCONFIGROOT="$xkb_base"])
AC_SUBST([XKBCONFIGROOT])
# Define a configuration option for the X locale directory for compose
AC_ARG_WITH([x_locale_root],
[AS_HELP_STRING([--with-x-locale-root=<path>],
[Set X locale root (default: $datadir/X11/locale)])],
[XLOCALEDIR="$withval"],
[XLOCALEDIR="$datadir/X11/locale"])
AC_SUBST([XLOCALEDIR])
AC_ARG_WITH([default_rules],
[AS_HELP_STRING([--with-default-rules=<path>],
[Default XKB ruleset (default: evdev)])],
@ -189,4 +197,5 @@ AC_MSG_RESULT([
includedir: ${includedir}
lib dir: ${libdir}
XKB config root: ${XKBCONFIGROOT}
X11 locale root: ${XLOCALEDIR}
])

634
src/compose/parser.c Normal file
View File

@ -0,0 +1,634 @@
/*
* Copyright © 2013 Ran Benita <ran234@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <errno.h>
#include "utils.h"
#include "scanner-utils.h"
#include "table.h"
#include "paths.h"
#include "utf8.h"
#include "parser.h"
#define MAX_LHS_LEN 10
#define MAX_INCLUDE_DEPTH 5
#define KEYSYM_FROM_NAME_CACHE_SIZE 8
/*
* xkb_keysym_from_name() is fairly slow, because for internal reasons
* it must use strcasecmp().
* A small cache reduces about 20% from the compilation time of
* en_US.UTF-8/Compose.
*/
struct keysym_from_name_cache {
struct {
char name[64];
xkb_keysym_t keysym;
} cache[KEYSYM_FROM_NAME_CACHE_SIZE];
unsigned next;
};
static xkb_keysym_t
cached_keysym_from_name(struct keysym_from_name_cache *cache,
const char *name, size_t len)
{
xkb_keysym_t keysym;
if (len >= sizeof(cache->cache[0].name))
return XKB_KEY_NoSymbol;
for (unsigned i = 0; i < KEYSYM_FROM_NAME_CACHE_SIZE; i++)
if (streq(cache->cache[i].name, name))
return cache->cache[i].keysym;
keysym = xkb_keysym_from_name(name, XKB_KEYSYM_NO_FLAGS);
strcpy(cache->cache[cache->next].name, name);
cache->cache[cache->next].keysym = keysym;
cache->next = (cache->next + 1) % KEYSYM_FROM_NAME_CACHE_SIZE;
return keysym;
}
/*
* Grammar adapted from libX11/modules/im/ximcp/imLcPrs.c.
* See also the XCompose(5) manpage.
*
* We don't support the MODIFIER rules, which are commented out.
*
* FILE ::= { [PRODUCTION] [COMMENT] "\n" | INCLUDE }
* INCLUDE ::= "include" '"' INCLUDE_STRING '"'
* PRODUCTION ::= LHS ":" RHS [ COMMENT ]
* COMMENT ::= "#" {<any character except null or newline>}
* LHS ::= EVENT { EVENT }
* EVENT ::= "<" keysym ">"
* # EVENT ::= [MODIFIER_LIST] "<" keysym ">"
* # MODIFIER_LIST ::= ("!" {MODIFIER} ) | "None"
* # MODIFIER ::= ["~"] modifier_name
* RHS ::= ( STRING | keysym | STRING keysym )
* STRING ::= '"' { CHAR } '"'
* CHAR ::= GRAPHIC_CHAR | ESCAPED_CHAR
* GRAPHIC_CHAR ::= locale (codeset) dependent code
* ESCAPED_CHAR ::= ('\\' | '\"' | OCTAL | HEX )
* OCTAL ::= '\' OCTAL_CHAR [OCTAL_CHAR [OCTAL_CHAR]]
* OCTAL_CHAR ::= (0|1|2|3|4|5|6|7)
* HEX ::= '\' (x|X) HEX_CHAR [HEX_CHAR]]
* HEX_CHAR ::= (0|1|2|3|4|5|6|7|8|9|A|B|C|D|E|F|a|b|c|d|e|f)
*
* INCLUDE_STRING is a filesystem path, with the following %-expansions:
* %% - '%'.
* %H - The user's home directory (the $HOME environment variable).
* %L - The name of the locale specific Compose file (e.g.,
* "/usr/share/X11/locale/<localename>/Compose").
* %S - The name of the system directory for Compose files (e.g.,
* "/usr/share/X11/locale").
*/
enum rules_token {
TOK_END_OF_FILE = 0,
TOK_END_OF_LINE,
TOK_INCLUDE,
TOK_INCLUDE_STRING,
TOK_LHS_KEYSYM,
TOK_COLON,
TOK_STRING,
TOK_RHS_KEYSYM,
TOK_ERROR
};
/* Values returned with some tokens, like yylval. */
union lvalue {
const char *string;
xkb_keysym_t keysym;
};
static enum rules_token
lex(struct scanner *s, union lvalue *val)
{
struct keysym_from_name_cache *cache = s->priv;
skip_more_whitespace_and_comments:
/* Skip spaces. */
while (is_space(peek(s)))
if (next(s) == '\n')
return TOK_END_OF_LINE;
/* Skip comments. */
if (chr(s, '#')) {
while (!eof(s) && !eol(s)) next(s);
goto skip_more_whitespace_and_comments;
}
/* See if we're done. */
if (eof(s)) return TOK_END_OF_FILE;
/* New token. */
s->token_line = s->line;
s->token_column = s->column;
s->buf_pos = 0;
/* LHS Keysym. */
if (chr(s, '<')) {
while (peek(s) != '>' && !eol(s))
buf_append(s, next(s));
if (!chr(s, '>')) {
scanner_err(s, "unterminated keysym literal");
return TOK_ERROR;
}
if (!buf_append(s, '\0')) {
scanner_err(s, "keysym literal is too long");
return TOK_ERROR;
}
val->keysym = cached_keysym_from_name(cache, s->buf, s->buf_pos);
if (val->keysym == XKB_KEY_NoSymbol) {
scanner_err(s, "unrecognized keysym \"%s\" on left-hand side", s->buf);
return TOK_ERROR;
}
return TOK_LHS_KEYSYM;
}
/* Colon. */
if (chr(s, ':'))
return TOK_COLON;
/* String literal. */
if (chr(s, '\"')) {
while (!eof(s) && !eol(s) && peek(s) != '\"') {
if (chr(s, '\\')) {
uint8_t o;
if (chr(s, '\\')) {
buf_append(s, '\\');
}
else if (chr(s, '"')) {
buf_append(s, '"');
}
else if (chr(s, 'x') || chr(s, 'X')) {
if (hex(s, &o))
buf_append(s, (char) o);
else
scanner_warn(s, "illegal hexadecimal escape sequence in string literal");
}
else if (oct(s, &o)) {
buf_append(s, (char) o);
}
else {
scanner_warn(s, "unknown escape sequence (%c) in string literal", peek(s));
/* Ignore. */
}
} else {
buf_append(s, next(s));
}
}
if (!chr(s, '\"')) {
scanner_err(s, "unterminated string literal");
return TOK_ERROR;
}
if (!buf_append(s, '\0')) {
scanner_err(s, "string literal is too long");
return TOK_ERROR;
}
if (!is_valid_utf8(s->buf, s->buf_pos - 1)) {
scanner_err(s, "string literal is not a valid UTF-8 string");
return TOK_ERROR;
}
val->string = s->buf;
return TOK_STRING;
}
/* RHS keysym or include. */
if (is_alpha(peek(s)) || peek(s) == '_') {
s->buf_pos = 0;
while (is_alnum(peek(s)) || peek(s) == '_')
buf_append(s, next(s));
if (!buf_append(s, '\0')) {
scanner_err(s, "identifier is too long");
return TOK_ERROR;
}
if (streq(s->buf, "include"))
return TOK_INCLUDE;
val->keysym = cached_keysym_from_name(cache, s->buf, s->buf_pos);
if (val->keysym == XKB_KEY_NoSymbol) {
scanner_err(s, "unrecognized keysym \"%s\" on right-hand side", s->buf);
return TOK_ERROR;
}
return TOK_RHS_KEYSYM;
}
/* Skip line. */
while (!eof(s) && !eol(s))
next(s);
scanner_err(s, "unrecognized token");
return TOK_ERROR;
}
static enum rules_token
lex_include_string(struct scanner *s, struct xkb_compose_table *table,
union lvalue *val_out)
{
while (is_space(peek(s)))
if (next(s) == '\n')
return TOK_END_OF_LINE;
s->token_line = s->line;
s->token_column = s->column;
s->buf_pos = 0;
if (!chr(s, '\"')) {
scanner_err(s, "include statement must be followed by a path");
return TOK_ERROR;
}
while (!eof(s) && !eol(s) && peek(s) != '\"') {
if (chr(s, '%')) {
if (chr(s, '%')) {
buf_append(s, '%');
}
else if (chr(s, 'H')) {
const char *home = secure_getenv("HOME");
if (!home) {
scanner_err(s, "%%H was used in an include statement, but the HOME environment variable is not set");
return TOK_ERROR;
}
if (!buf_appends(s, home)) {
scanner_err(s, "include path after expanding %%H is too long");
return TOK_ERROR;
}
}
else if (chr(s, 'L')) {
char *path = get_locale_compose_file_path(table->locale);
if (!path) {
scanner_err(s, "failed to expand %%L to the locale Compose file");
return TOK_ERROR;
}
if (!buf_appends(s, path)) {
free(path);
scanner_err(s, "include path after expanding %%L is too long");
return TOK_ERROR;
}
free(path);
}
else if (chr(s, 'S')) {
const char *xlocaledir = get_xlocaledir_path();
if (!buf_appends(s, xlocaledir)) {
scanner_err(s, "include path after expanding %%S is too long");
return TOK_ERROR;
}
}
else {
scanner_err(s, "unknown %% format (%c) in include statement", peek(s));
return TOK_ERROR;
}
} else {
buf_append(s, next(s));
}
}
if (!chr(s, '\"')) {
scanner_err(s, "unterminated include statement");
return TOK_ERROR;
}
if (!buf_append(s, '\0')) {
scanner_err(s, "include path is too long");
return TOK_ERROR;
}
val_out->string = s->buf;
return TOK_INCLUDE_STRING;
}
struct production {
xkb_keysym_t lhs[MAX_LHS_LEN];
unsigned int len;
xkb_keysym_t keysym;
char string[256];
bool has_keysym;
bool has_string;
};
static uint32_t
add_node(struct xkb_compose_table *table, xkb_keysym_t keysym)
{
struct compose_node new = {
.keysym = keysym,
.next = 0,
.is_leaf = true,
};
darray_append(table->nodes, new);
return darray_size(table->nodes) - 1;
}
static void
add_production(struct xkb_compose_table *table, struct scanner *s,
const struct production *production)
{
unsigned lhs_pos;
uint32_t curr;
struct compose_node *node;
curr = 0;
node = &darray_item(table->nodes, curr);
/*
* Insert the sequence to the trie, creating new nodes as needed.
*
* TODO: This can be sped up a bit by first trying the path that the
* previous production took, and only then doing the linear search
* through the trie levels. This will work because sequences in the
* Compose files are often clustered by a common prefix; especially
* in the 1st and 2nd keysyms, which is where the largest variation
* (thus, longest search) is.
*/
for (lhs_pos = 0; lhs_pos < production->len; lhs_pos++) {
while (production->lhs[lhs_pos] != node->keysym) {
if (node->next == 0) {
uint32_t next = add_node(table, production->lhs[lhs_pos]);
/* Refetch since add_node could have realloc()ed. */
node = &darray_item(table->nodes, curr);
node->next = next;
}
curr = node->next;
node = &darray_item(table->nodes, curr);
}
if (lhs_pos + 1 == production->len)
break;
if (node->is_leaf) {
if (node->u.leaf.utf8 != 0 ||
node->u.leaf.keysym != XKB_KEY_NoSymbol) {
scanner_warn(s, "a sequence already exists which is a prefix of this sequence; overriding");
node->u.leaf.utf8 = 0;
node->u.leaf.keysym = XKB_KEY_NoSymbol;
}
{
uint32_t successor = add_node(table, production->lhs[lhs_pos + 1]);
/* Refetch since add_node could have realloc()ed. */
node = &darray_item(table->nodes, curr);
node->is_leaf = false;
node->u.successor = successor;
}
}
curr = node->u.successor;
node = &darray_item(table->nodes, curr);
}
if (!node->is_leaf) {
scanner_warn(s, "this compose sequence is a prefix of another; skipping line");
return;
}
if (node->u.leaf.utf8 != 0 || node->u.leaf.keysym != XKB_KEY_NoSymbol) {
if (streq(&darray_item(table->utf8, node->u.leaf.utf8),
production->string) &&
node->u.leaf.keysym == production->keysym) {
scanner_warn(s, "this compose sequence is a duplicate of another; skipping line");
return;
}
scanner_warn(s, "this compose sequence already exists; overriding");
}
if (production->has_string) {
node->u.leaf.utf8 = darray_size(table->utf8);
darray_append_items(table->utf8, production->string,
strlen(production->string) + 1);
}
if (production->has_keysym) {
node->u.leaf.keysym = production->keysym;
}
}
static bool
parse(struct xkb_compose_table *table, struct scanner *s,
unsigned include_depth);
static bool
do_include(struct xkb_compose_table *table, struct scanner *s,
const char *path, unsigned include_depth)
{
FILE *file;
bool ok;
const char *string;
size_t size;
struct scanner new_s;
if (include_depth >= MAX_INCLUDE_DEPTH) {
scanner_err(s, "maximum include depth (%d) exceeded; maybe there is an include loop?",
MAX_INCLUDE_DEPTH);
return false;
}
file = fopen(s->buf, "r");
if (!file) {
scanner_err(s, "failed to open included Compose file \"%s\": %s",
path, strerror(errno));
return false;
}
ok = map_file(file, &string, &size);
if (!ok) {
scanner_err(s, "failed to read included Compose file \"%s\": %s",
path, strerror(errno));
goto err_file;
}
scanner_init(&new_s, table->ctx, string, size, path);
ok = parse(table, &new_s, include_depth + 1);
if (!ok)
goto err_unmap;
err_unmap:
unmap_file(string, size);
err_file:
fclose(file);
return ok;
}
static bool
parse(struct xkb_compose_table *table, struct scanner *s,
unsigned include_depth)
{
enum rules_token tok;
union lvalue val;
struct production production;
enum { MAX_ERRORS = 10 };
int num_errors = 0;
initial:
production.len = 0;
production.has_keysym = false;
production.has_string = false;
/* fallthrough */
initial_eol:
switch (tok = lex(s, &val)) {
case TOK_END_OF_LINE:
goto initial_eol;
case TOK_END_OF_FILE:
goto finished;
case TOK_INCLUDE:
goto include;
case TOK_LHS_KEYSYM:
production.lhs[production.len++] = val.keysym;
goto lhs;
default:
goto unexpected;
}
include:
switch (tok = lex_include_string(s, table, &val)) {
case TOK_INCLUDE_STRING:
goto include_eol;
default:
goto unexpected;
}
include_eol:
switch (tok = lex(s, &val)) {
case TOK_END_OF_LINE:
if (!do_include(table, s, val.string, include_depth))
goto fail;
goto initial;
default:
goto unexpected;
}
lhs:
switch (tok = lex(s, &val)) {
case TOK_LHS_KEYSYM:
if (production.len + 1 > MAX_LHS_LEN) {
scanner_warn(s, "too many keysyms (%d) on left-hand side; skipping line",
MAX_LHS_LEN + 1);
goto skip;
}
production.lhs[production.len++] = val.keysym;
goto lhs;
case TOK_COLON:
if (production.len <= 0) {
scanner_warn(s, "expected at least one keysym on left-hand side; skipping line");
goto skip;
}
goto rhs;
default:
goto unexpected;
}
rhs:
switch (tok = lex(s, &val)) {
case TOK_STRING:
if (production.has_string) {
scanner_warn(s, "right-hand side can have at most one string; skipping line");
goto skip;
}
if (*val.string == '\0') {
scanner_warn(s, "right-hand side string must not be empty; skipping line");
goto skip;
}
if (strlen(val.string) >= sizeof(production.string)) {
scanner_warn(s, "right-hand side string is too long; skipping line");
goto skip;
}
strcpy(production.string, val.string);
production.has_string = true;
goto rhs;
case TOK_RHS_KEYSYM:
if (production.has_keysym) {
scanner_warn(s, "right-hand side can have at most one keysym; skipping line");
goto skip;
}
production.keysym = val.keysym;
production.has_keysym = true;
case TOK_END_OF_LINE:
if (!production.has_string && !production.has_keysym) {
scanner_warn(s, "right-hand side must have at least one of string or keysym; skipping line");
goto skip;
}
add_production(table, s, &production);
goto initial;
default:
goto unexpected;
}
unexpected:
if (tok != TOK_ERROR)
scanner_err(s, "unexpected token");
num_errors++;
if (num_errors <= MAX_ERRORS)
goto skip;
scanner_err(s, "too many errors");
goto fail;
fail:
scanner_err(s, "failed to parse file");
return false;
skip:
while (tok != TOK_END_OF_LINE && tok != TOK_END_OF_FILE)
tok = lex(s, &val);
goto initial;
finished:
return true;
}
bool
parse_string(struct xkb_compose_table *table, const char *string, size_t len,
const char *file_name)
{
struct scanner s;
struct keysym_from_name_cache cache;
scanner_init(&s, table->ctx, string, len, file_name);
memset(&cache, 0, sizeof(cache));
s.priv = &cache;
if (!parse(table, &s, 0))
return false;
/* Maybe the allocator can use the excess space. */
darray_shrink(table->nodes);
darray_shrink(table->utf8);
return true;
}
bool
parse_file(struct xkb_compose_table *table, FILE *file, const char *file_name)
{
bool ok;
const char *string;
size_t size;
ok = map_file(file, &string, &size);
if (!ok) {
log_err(table->ctx, "Couldn't read Compose file %s: %s\n",
file_name, strerror(errno));
return false;
}
ok = parse_string(table, string, size, file_name);
unmap_file(string, size);
return ok;
}

36
src/compose/parser.h Normal file
View File

@ -0,0 +1,36 @@
/*
* Copyright © 2013 Ran Benita <ran234@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef COMPOSE_PARSER_H
#define COMPOSE_PARSER_H
bool
parse_string(struct xkb_compose_table *table,
const char *string, size_t len,
const char *file_name);
bool
parse_file(struct xkb_compose_table *table,
FILE *file, const char *file_name);
#endif

204
src/compose/paths.c Normal file
View File

@ -0,0 +1,204 @@
/*
* Copyright © 2014 Ran Benita <ran234@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "utils.h"
#include "paths.h"
enum resolve_name_direction {
LEFT_TO_RIGHT,
RIGHT_TO_LEFT,
};
const char *
get_xlocaledir_path(void)
{
const char *dir = secure_getenv("XLOCALEDIR");
if (!dir)
dir = XLOCALEDIR;
return dir;
}
/*
* Files like compose.dir have the format LEFT: RIGHT. Lookup @name in
* such a file and return its matching value, according to @direction.
* @filename is relative to the xlocaledir.
*/
static char *
resolve_name(const char *filename, enum resolve_name_direction direction,
const char *name)
{
int ret;
bool ok;
const char *xlocaledir;
char path[512];
FILE *file;
const char *string, *end;
size_t string_size;
const char *s, *left, *right;
char *match;
size_t left_len, right_len, name_len;
xlocaledir = get_xlocaledir_path();
ret = snprintf(path, sizeof(path), "%s/%s", xlocaledir, filename);
if (ret < 0 || (size_t) ret >= sizeof(path))
return false;
file = fopen(path, "r");
if (!file)
return false;
ok = map_file(file, &string, &string_size);
fclose(file);
if (!ok)
return false;
s = string;
end = string + string_size;
name_len = strlen(name);
match = NULL;
while (s < end) {
/* Skip spaces. */
while (s < end && is_space(*s))
s++;
/* Skip comments. */
if (s < end && *s == '#') {
while (s < end && *s != '\n')
s++;
continue;
}
/* Get the left value. */
left = s;
while (s < end && !is_space(*s) && *s != ':')
s++;
left_len = s - left;
/* There's an optional colon between left and right. */
if (s < end && *s == ':')
s++;
/* Skip spaces. */
while (s < end && is_space(*s))
s++;
/* Get the right value. */
right = s;
while (s < end && !is_space(*s))
s++;
right_len = s - right;
/* Discard rest of line. */
while (s < end && *s != '\n')
s++;
if (direction == LEFT_TO_RIGHT) {
if (left_len == name_len && strncmp(left, name, left_len) == 0) {
match = strndup(right, right_len);
break;
}
}
else if (direction == RIGHT_TO_LEFT) {
if (right_len == name_len && strncmp(right, name, right_len) == 0) {
match = strndup(left, left_len);
break;
}
}
}
unmap_file(string, string_size);
return match;
}
char *
resolve_locale(const char *locale)
{
char *alias = resolve_name("locale.alias", LEFT_TO_RIGHT, locale);
return alias ? alias : strdup(locale);
}
const char *
get_xcomposefile_path(void)
{
return secure_getenv("XCOMPOSEFILE");
}
char *
get_home_xcompose_file_path(void)
{
int ret;
const char *home;
char *path;
home = secure_getenv("HOME");
if (!home)
return NULL;
ret = asprintf(&path, "%s/.XCompose", home);
if (ret <0)
return NULL;
return path;
}
char *
get_locale_compose_file_path(const char *locale)
{
int ret;
const char *xlocaledir;
char *resolved;
char *path;
/*
* WARNING: Random workaround ahead.
*
* We currently do not support non-UTF-8 Compose files. The C/POSIX
* locale is specified to be the default fallback locale with an
* ASCII charset. But for some reason the compose.dir points the C
* locale to the iso8859-1/Compose file, which is not ASCII but
* ISO8859-1. Since this is bound to happen a lot, and since our API
* is UTF-8 based, and since 99% of the time a C locale is really just
* a misconfiguration for UTF-8, let's do the most helpful thing.
*/
if (streq(locale, "C"))
locale = "en_US.UTF-8";
resolved = resolve_name("compose.dir", RIGHT_TO_LEFT, locale);
if (!resolved)
return NULL;
if (resolved[0] == '/') {
path = resolved;
}
else {
xlocaledir = get_xlocaledir_path();
ret = asprintf(&path, "%s/%s", xlocaledir, resolved);
free(resolved);
if (ret < 0)
return NULL;
}
return path;
}

42
src/compose/paths.h Normal file
View File

@ -0,0 +1,42 @@
/*
* Copyright © 2014 Ran Benita <ran234@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef COMPOSE_RESOLVE_H
#define COMPOSE_RESOLVE_H
char *
resolve_locale(const char *locale);
const char *
get_xlocaledir_path(void);
const char *
get_xcomposefile_path(void);
char *
get_home_xcompose_file_path(void);
char *
get_locale_compose_file_path(const char *locale);
#endif

196
src/compose/state.c Normal file
View File

@ -0,0 +1,196 @@
/*
* Copyright © 2013 Ran Benita <ran234@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "table.h"
#include "utils.h"
#include "keysym.h"
struct xkb_compose_state {
int refcnt;
enum xkb_compose_state_flags flags;
struct xkb_compose_table *table;
/*
* Offsets into xkb_compose_table::nodes.
*
* They maintain the current and previous position in the trie; see
* xkb_compose_state_feed().
*
* This is also sufficient for inferring the current status; see
* xkb_compose_state_get_status().
*/
uint32_t prev_context;
uint32_t context;
};
XKB_EXPORT struct xkb_compose_state *
xkb_compose_state_new(struct xkb_compose_table *table,
enum xkb_compose_state_flags flags)
{
struct xkb_compose_state *state;
state = calloc(1, sizeof(*state));
if (!state)
return NULL;
state->refcnt = 1;
state->table = xkb_compose_table_ref(table);
state->flags = flags;
state->prev_context = 0;
state->context = 0;
return state;
}
XKB_EXPORT struct xkb_compose_state *
xkb_compose_state_ref(struct xkb_compose_state *state)
{
state->refcnt++;
return state;
}
XKB_EXPORT void
xkb_compose_state_unref(struct xkb_compose_state *state)
{
if (!state || --state->refcnt > 0)
return;
xkb_compose_table_unref(state->table);
free(state);
}
XKB_EXPORT struct xkb_compose_table *
xkb_compose_state_get_compose_table(struct xkb_compose_state *state)
{
return state->table;
}
XKB_EXPORT enum xkb_compose_feed_result
xkb_compose_state_feed(struct xkb_compose_state *state, xkb_keysym_t keysym)
{
uint32_t context;
const struct compose_node *node;
/*
* Modifiers do not affect the sequence directly. In particular,
* they do not cancel a sequence; otherwise it'd be impossible to
* have a sequence like <dead_acute><A> (needs Shift in the middle).
*
* The following test is not really accurate - in order to test if
* a key is "modifier key", we really need the keymap, but we don't
* have it here. However, this is (approximately) what libX11 does
* as well.
*/
if (xkb_keysym_is_modifier(keysym))
return XKB_COMPOSE_FEED_IGNORED;
node = &darray_item(state->table->nodes, state->context);
context = (node->is_leaf ? 0 : node->u.successor);
node = &darray_item(state->table->nodes, context);
while (node->keysym != keysym && node->next != 0) {
context = node->next;
node = &darray_item(state->table->nodes, context);
}
if (node->keysym != keysym)
context = 0;
state->prev_context = state->context;
state->context = context;
return XKB_COMPOSE_FEED_ACCEPTED;
}
XKB_EXPORT void
xkb_compose_state_reset(struct xkb_compose_state *state)
{
state->prev_context = 0;
state->context = 0;
}
XKB_EXPORT enum xkb_compose_status
xkb_compose_state_get_status(struct xkb_compose_state *state)
{
const struct compose_node *prev_node, *node;
prev_node = &darray_item(state->table->nodes, state->prev_context);
node = &darray_item(state->table->nodes, state->context);
if (state->context == 0 && !prev_node->is_leaf)
return XKB_COMPOSE_CANCELLED;
if (state->context == 0)
return XKB_COMPOSE_NOTHING;
if (!node->is_leaf)
return XKB_COMPOSE_COMPOSING;
return XKB_COMPOSE_COMPOSED;
}
XKB_EXPORT int
xkb_compose_state_get_utf8(struct xkb_compose_state *state,
char *buffer, size_t size)
{
const struct compose_node *node =
&darray_item(state->table->nodes, state->context);
if (!node->is_leaf)
goto fail;
/* If there's no string specified, but only a keysym, try to do the
* most helpful thing. */
if (node->u.leaf.utf8 == 0 && node->u.leaf.keysym != XKB_KEY_NoSymbol) {
char name[64];
int ret;
ret = xkb_keysym_to_utf8(node->u.leaf.keysym, name, sizeof(name));
if (ret < 0 || ret == 0) {
/* ret < 0 is impossible.
* ret == 0 means the keysym has no string representation. */
goto fail;
}
return snprintf(buffer, size, "%s", name);
}
return snprintf(buffer, size, "%s",
&darray_item(state->table->utf8, node->u.leaf.utf8));
fail:
if (size > 0)
buffer[0] = '\0';
return 0;
}
XKB_EXPORT xkb_keysym_t
xkb_compose_state_get_one_sym(struct xkb_compose_state *state)
{
const struct compose_node *node =
&darray_item(state->table->nodes, state->context);
if (!node->is_leaf)
return XKB_KEY_NoSymbol;
return node->u.leaf.keysym;
}

219
src/compose/table.c Normal file
View File

@ -0,0 +1,219 @@
/*
* Copyright © 2013 Ran Benita <ran234@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "utils.h"
#include "table.h"
#include "parser.h"
#include "paths.h"
static struct xkb_compose_table *
xkb_compose_table_new(struct xkb_context *ctx,
const char *locale,
enum xkb_compose_format format,
enum xkb_compose_compile_flags flags)
{
char *resolved_locale;
struct xkb_compose_table *table;
struct compose_node root;
resolved_locale = resolve_locale(locale);
if (!resolved_locale)
return NULL;
table = calloc(1, sizeof(*table));
if (!table) {
free(resolved_locale);
return NULL;
}
table->refcnt = 1;
table->ctx = xkb_context_ref(ctx);
table->locale = resolved_locale;
table->format = format;
table->flags = flags;
darray_init(table->nodes);
darray_init(table->utf8);
root.keysym = XKB_KEY_NoSymbol;
root.next = 0;
root.is_leaf = true;
root.u.leaf.utf8 = 0;
root.u.leaf.keysym = XKB_KEY_NoSymbol;
darray_append(table->nodes, root);
darray_append(table->utf8, '\0');
return table;
}
XKB_EXPORT struct xkb_compose_table *
xkb_compose_table_ref(struct xkb_compose_table *table)
{
table->refcnt++;
return table;
}
XKB_EXPORT void
xkb_compose_table_unref(struct xkb_compose_table *table)
{
if (!table || --table->refcnt > 0)
return;
free(table->locale);
darray_free(table->nodes);
darray_free(table->utf8);
xkb_context_unref(table->ctx);
free(table);
}
XKB_EXPORT struct xkb_compose_table *
xkb_compose_table_new_from_file(struct xkb_context *ctx,
FILE *file,
const char *locale,
enum xkb_compose_format format,
enum xkb_compose_compile_flags flags)
{
struct xkb_compose_table *table;
bool ok;
if (flags & ~(XKB_COMPOSE_COMPILE_NO_FLAGS)) {
log_err_func(ctx, "unrecognized flags: %#x\n", flags);
return NULL;
}
if (format != XKB_COMPOSE_FORMAT_TEXT_V1) {
log_err_func(ctx, "unsupported compose format: %d\n", format);
return NULL;
}
table = xkb_compose_table_new(ctx, locale, format, flags);
if (!table)
return NULL;
ok = parse_file(table, file, "(unknown file)");
if (!ok) {
xkb_compose_table_unref(table);
return NULL;
}
return table;
}
XKB_EXPORT struct xkb_compose_table *
xkb_compose_table_new_from_buffer(struct xkb_context *ctx,
const char *buffer, size_t length,
const char *locale,
enum xkb_compose_format format,
enum xkb_compose_compile_flags flags)
{
struct xkb_compose_table *table;
bool ok;
if (flags & ~(XKB_COMPOSE_COMPILE_NO_FLAGS)) {
log_err_func(ctx, "unrecognized flags: %#x\n", flags);
return NULL;
}
if (format != XKB_COMPOSE_FORMAT_TEXT_V1) {
log_err_func(ctx, "unsupported compose format: %d\n", format);
return NULL;
}
table = xkb_compose_table_new(ctx, locale, format, flags);
if (!table)
return NULL;
ok = parse_string(table, buffer, length, "(input string)");
if (!ok) {
xkb_compose_table_unref(table);
return NULL;
}
return table;
}
XKB_EXPORT struct xkb_compose_table *
xkb_compose_table_new_from_locale(struct xkb_context *ctx,
const char *locale,
enum xkb_compose_compile_flags flags)
{
struct xkb_compose_table *table;
char *path = NULL;
const char *cpath;
FILE *file;
bool ok;
if (flags & ~(XKB_COMPOSE_COMPILE_NO_FLAGS)) {
log_err_func(ctx, "unrecognized flags: %#x\n", flags);
return NULL;
}
table = xkb_compose_table_new(ctx, locale, XKB_COMPOSE_FORMAT_TEXT_V1,
flags);
if (!table)
return NULL;
cpath = get_xcomposefile_path();
if (cpath) {
file = fopen(cpath, "r");
if (file)
goto found_path;
}
cpath = path = get_home_xcompose_file_path();
if (path) {
file = fopen(path, "r");
if (file)
goto found_path;
}
free(path);
path = NULL;
cpath = path = get_locale_compose_file_path(table->locale);
if (path) {
file = fopen(path, "r");
if (file)
goto found_path;
}
free(path);
path = NULL;
log_err(ctx, "couldn't find a Compose file for locale \"%s\"\n", locale);
xkb_compose_table_unref(table);
return NULL;
found_path:
ok = parse_file(table, file, cpath);
fclose(file);
if (!ok) {
xkb_compose_table_unref(table);
return NULL;
}
log_dbg(ctx, "created compose table from locale %s with path %s\n",
table->locale, path);
free(path);
return table;
}

100
src/compose/table.h Normal file
View File

@ -0,0 +1,100 @@
/*
* Copyright © 2013 Ran Benita <ran234@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef COMPOSE_COMPOSE_H
#define COMPOSE_COMPOSE_H
#include "xkbcommon/xkbcommon-compose.h"
#include "utils.h"
#include "context.h"
/*
* The compose table data structure is a simple trie. An example will
* help. Given these sequences:
*
* <A> <B> : "first" dead_a
* <A> <C> <D> : "second" dead_b
* <E> <F> : "third" dead_c
*
* the trie would look like:
*
* [root] ---> [<A>] -----------------> [<E>] -#
* | | |
* # v v
* [<B>] ---> [<C>] -# [<F>] -#
* | | -
* # v #
* [<D>] -#
* |
* #
* where:
* - [root] is a special empty root node.
* - [<X>] is a node for a sequence keysym <X>.
* - right arrows are `next` pointers.
* - down arrows are `successor` pointers.
* - # is a nil pointer.
*
* The nodes are all kept in a contiguous array. Pointers are represented
* as integer offsets into this array. A nil pointer is represented as 0
* (which, helpfully, is the offset of the empty root node).
*
* Nodes without a successor are leaf nodes. Since a sequence cannot be a
* prefix of another, these are exactly the nodes which terminate the
* sequences (in a bijective manner).
*
* A leaf contains the result data of its sequence. The result keysym is
* contained in the node struct itself; the result UTF-8 string is a byte
* offset into an array of the form "\0first\0second\0third" (the initial
* \0 is so offset 0 points to an empty string).
*/
struct compose_node {
xkb_keysym_t keysym;
/* Offset into xkb_compose_table::nodes. */
unsigned int next:31;
bool is_leaf:1;
union {
/* Offset into xkb_compose_table::nodes. */
uint32_t successor;
struct {
/* Offset into xkb_compose_table::utf8. */
uint32_t utf8;
xkb_keysym_t keysym;
} leaf;
} u;
};
struct xkb_compose_table {
int refcnt;
enum xkb_compose_format format;
enum xkb_compose_compile_flags flags;
struct xkb_context *ctx;
char *locale;
darray_char utf8;
darray(struct compose_node) nodes;
};
#endif

View File

@ -68,6 +68,20 @@ global:
xkb_state_layout_index_is_active;
xkb_state_led_name_is_active;
xkb_state_led_index_is_active;
xkb_compose_table_new_from_locale;
xkb_compose_table_new_from_file;
xkb_compose_table_new_from_buffer;
xkb_compose_table_ref;
xkb_compose_table_unref;
xkb_compose_state_new;
xkb_compose_state_ref;
xkb_compose_state_unref;
xkb_compose_state_get_compose_table;
xkb_compose_state_feed;
xkb_compose_state_reset;
xkb_compose_state_get_status;
xkb_compose_state_get_utf8;
xkb_compose_state_get_one_sym;
local:
*;
};