keysym: use a perfect hash function for case sensitive xkb_keysym_from_name

In 7d84809fdc I added a fast path for the case-sensitive case, but it is still slowing down Compose parsing. Instead of the binary search, use a perfect hash function, computed with a simple python module I found (vendored). It is faster -- perf diff is: Baseline Delta Abs Shared Object Symbol ........ ......... ................. ................................... 22.35% -14.04% libc-2.33.so [.] __strcmp_avx2 16.75% +10.28% bench-compose [.] xkb_keysym_from_name 20.72% +2.40% bench-compose [.] parse.constprop.0 2.29% -1.97% bench-compose [.] strcmp@plt 2.56% +1.81% bench-compose [.] resolve_name 2.37% +0.92% libc-2.33.so [.] __GI_____strtoull_l_internal 26.19% -0.63% bench-compose [.] lex 1.45% +0.56% libc-2.33.so [.] __memchr_avx2 1.13% -0.31% libc-2.33.so [.] __strcpy_avx2 Also reduces the binary size: Before: text data bss dec hex filename 341111 5064 8 346183 54847 build/libxkbcommon.so.0.0.0 After: text data bss dec hex filename 330215 5064 8 335287 51db7 build/libxkbcommon.so.0.0.0 Note however that it's still larger than before 7d84809fdccbb5898d08388: text data bss dec hex filename 320617 5168 8 325793 4f8a1 build/libxkbcommon.so.0.0.0 Signed-off-by: Ran Benita <ran@unusedvar.com>
2021-03-28 20:22:54 +03:00 · 2021-03-28 20:22:54 +03:00 · 68e69b7deb
parent 02b9cabf98
commit 68e69b7deb
4 changed files with 1036 additions and 2577 deletions
--- a/scripts/makekeys
+++ b/scripts/makekeys
@ -2,10 +2,15 @@

 import re, sys, itertools

+import perfect_hash
+
 pattern = re.compile(r'^#define\s+XKB_KEY_(?P<name>\w+)\s+(?P<value>0x[0-9a-fA-F]+)\s')
 matches = [pattern.match(line) for line in open(sys.argv[1])]
 entries = [(m.group("name"), int(m.group("value"), 16)) for m in matches if m]

+entries_isorted = sorted(entries, key=lambda e: e[0].lower())
+entries_kssorted = sorted(entries, key=lambda e: e[1])
+
 print('''
 /**
 * This file comes from libxkbcommon and was generated by makekeys.py
@ -24,7 +29,7 @@ print('''
 static const char *keysym_names =
 '''.strip())
 offs = 0
-for (name, _) in sorted(entries, key=lambda e: e[0].lower()):
+for (name, _) in entries_isorted:
    entry_offsets[name] = offs
    print('    "{name}\\0"'.format(name=name))
    offs += len(name) + 1
@ -35,6 +40,35 @@ print('''
 #endif
 '''.strip())

+
+template = r'''
+static const uint16_t keysym_name_G[] = {
+    $G
+};
+
+static size_t
+keysym_name_hash_f(const char *key, const char *T)
+{
+    size_t sum = 0;
+    for (size_t i = 0; key[i] != '\0'; i++)
+        sum += T[i % $NS] * key[i];
+    return sum % $NG;
+}
+
+static size_t
+keysym_name_perfect_hash(const char *key)
+{
+    return (
+        keysym_name_G[keysym_name_hash_f(key, "$S1")] +
+        keysym_name_G[keysym_name_hash_f(key, "$S2")]
+    ) % $NG;
+}
+'''
+print(perfect_hash.generate_code(
+    keys=[name for name, value in entries_isorted],
+    template=template,
+))
+
 print('''
 struct name_keysym {
    xkb_keysym_t keysym;
@ -46,14 +80,10 @@ def print_entries(x):
        print('    {{ 0x{value:08x}, {offs} }}, /* {name} */'.format(offs=entry_offsets[name], value=value, name=name))

 print('static const struct name_keysym name_to_keysym[] = {')
-print_entries(sorted(entries, key=lambda e: e[0]))
-print('};\n')
-
-print('static const struct name_keysym name_to_keysym_icase[] = {')
-print_entries(sorted(entries, key=lambda e: e[0].lower()))
+print_entries(entries_isorted)
 print('};\n')

 # *.sort() is stable so we always get the first keysym for duplicate
 print('static const struct name_keysym keysym_to_name[] = {')
-print_entries(next(g[1]) for g in itertools.groupby(sorted(entries, key=lambda e: e[1]), key=lambda e: e[1]))
+print_entries(next(g[1]) for g in itertools.groupby(entries_kssorted, key=lambda e: e[1]))
 print('};')
--- a/scripts/perfect_hash.py
+++ b/scripts/perfect_hash.py
@ -0,0 +1,672 @@
+# Derived from: https://github.com/ilanschnell/perfect-hash
+# Commit: e376138af70db9f668de7e23cf84671872a676d8
+
+# BSD 3-Clause License
+#
+# Copyright (c) 2019, Ilan Schnell
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the <organization> nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+Generate a minimal perfect hash function for the keys in a file,
+desired hash values may be specified within this file as well.
+A given code template is filled with parameters, such that the
+output is code which implements the hash function.
+Templates can easily be constructed for any programming language.
+
+The code is based on an a program A.M. Kuchling wrote:
+http://www.amk.ca/python/code/perfect-hash
+
+The algorithm the program uses is described in the paper
+'Optimal algorithms for minimal perfect hashing',
+Z. J. Czech, G. Havas and B.S. Majewski.
+http://citeseer.ist.psu.edu/122364.html
+
+The algorithm works like this:
+
+1.  You have K keys, that you want to perfectly hash against some
+    desired hash values.
+
+2.  Choose a number N larger than K.  This is the number of
+    vertices in a graph G, and also the size of the resulting table G.
+
+3.  Pick two random hash functions f1, f2, that return values from 0..N-1.
+
+4.  Now, for all keys, you draw an edge between vertices f1(key) and f2(key)
+    of the graph G, and associate the desired hash value with that edge.
+
+5.  If G is cyclic, go back to step 2.
+
+6.  Assign values to each vertex such that, for each edge, you can add
+    the values for the two vertices and get the desired (hash) value
+    for that edge.  This task is easy, because the graph is acyclic.
+    This is done by picking a vertex, and assigning it a value of 0.
+    Then do a depth-first search, assigning values to new vertices so that
+    they sum up properly.
+
+7.  f1, f2, and vertex values of G now make up a perfect hash function.
+
+
+For simplicity, the implementation of the algorithm combines steps 5 and 6.
+That is, we check for loops in G and assign the vertex values in one procedure.
+If this procedure succeeds, G is acyclic and the vertex values are assigned.
+If the procedure fails, G is cyclic, and we go back to step 2, replacing G
+with a new graph, and thereby discarding the vertex values from the failed
+attempt.
+"""
+from __future__ import absolute_import, division, print_function
+
+import sys
+import random
+import string
+import subprocess
+import shutil
+import tempfile
+from collections import defaultdict
+from os.path import join
+
+if sys.version_info[0] == 2:
+    from cStringIO import StringIO
+else:
+    from io import StringIO
+
+
+__version__ = '0.4.2'
+
+
+verbose = False
+trials = 150
+
+
+class Graph(object):
+    """
+    Implements a graph with 'N' vertices.  First, you connect the graph with
+    edges, which have a desired value associated.  Then the vertex values
+    are assigned, which will fail if the graph is cyclic.  The vertex values
+    are assigned such that the two values corresponding to an edge add up to
+    the desired edge value (mod N).
+    """
+    def __init__(self, N):
+        self.N = N                     # number of vertices
+
+        # maps a vertex number to the list of tuples (vertex, edge value)
+        # to which it is connected by edges.
+        self.adjacent = defaultdict(list)
+
+    def connect(self, vertex1, vertex2, edge_value):
+        """
+        Connect 'vertex1' and 'vertex2' with an edge, with associated
+        value 'value'
+        """
+        # Add vertices to each other's adjacent list
+        self.adjacent[vertex1].append((vertex2, edge_value))
+        self.adjacent[vertex2].append((vertex1, edge_value))
+
+    def assign_vertex_values(self):
+        """
+        Try to assign the vertex values, such that, for each edge, you can
+        add the values for the two vertices involved and get the desired
+        value for that edge, i.e. the desired hash key.
+        This will fail when the graph is cyclic.
+
+        This is done by a Depth-First Search of the graph.  If the search
+        finds a vertex that was visited before, there's a loop and False is
+        returned immediately, i.e. the assignment is terminated.
+        On success (when the graph is acyclic) True is returned.
+        """
+        self.vertex_values = self.N * [-1]  # -1 means unassigned
+
+        visited = self.N * [False]
+
+        # Loop over all vertices, taking unvisited ones as roots.
+        for root in range(self.N):
+            if visited[root]:
+                continue
+
+            # explore tree starting at 'root'
+            self.vertex_values[root] = 0    # set arbitrarily to zero
+
+            # Stack of vertices to visit, a list of tuples (parent, vertex)
+            tovisit = [(None, root)]
+            while tovisit:
+                parent, vertex = tovisit.pop()
+                visited[vertex] = True
+
+                # Loop over adjacent vertices, but skip the vertex we arrived
+                # here from the first time it is encountered.
+                skip = True
+                for neighbor, edge_value in self.adjacent[vertex]:
+                    if skip and neighbor == parent:
+                        skip = False
+                        continue
+
+                    if visited[neighbor]:
+                        # We visited here before, so the graph is cyclic.
+                        return False
+
+                    tovisit.append((vertex, neighbor))
+
+                    # Set new vertex's value to the desired edge value,
+                    # minus the value of the vertex we came here from.
+                    self.vertex_values[neighbor] = (
+                        edge_value - self.vertex_values[vertex]) % self.N
+
+        # check if all vertices have a valid value
+        for vertex in range(self.N):
+            assert self.vertex_values[vertex] >= 0
+
+        # We got though, so the graph is acyclic,
+        # and all values are now assigned.
+        return True
+
+
+class StrSaltHash(object):
+    """
+    Random hash function generator.
+    Simple byte level hashing: each byte is multiplied to another byte from
+    a random string of characters, summed up, and finally modulo NG is
+    taken.
+    """
+    chars = string.ascii_letters + string.digits
+
+    def __init__(self, N):
+        self.N = N
+        self.salt = ''
+
+    def __call__(self, key):
+        while len(self.salt) < len(key): # add more salt as necessary
+            self.salt += random.choice(self.chars)
+
+        return sum(ord(self.salt[i]) * ord(c)
+                   for i, c in enumerate(key)) % self.N
+
+    template = """
+def hash_f(key, T):
+    return sum(ord(T[i % $NS]) * ord(c) for i, c in enumerate(key)) % $NG
+
+def perfect_hash(key):
+    return (G[hash_f(key, "$S1")] +
+            G[hash_f(key, "$S2")]) % $NG
+"""
+
+class IntSaltHash(object):
+    """
+    Random hash function generator.
+    Simple byte level hashing, each byte is multiplied in sequence to a table
+    containing random numbers, summed tp, and finally modulo NG is taken.
+    """
+    def __init__(self, N):
+        self.N = N
+        self.salt = []
+
+    def __call__(self, key):
+        while len(self.salt) < len(key): # add more salt as necessary
+            self.salt.append(random.randint(1, self.N - 1))
+
+        return sum(self.salt[i] * ord(c)
+                   for i, c in enumerate(key)) % self.N
+
+    template = """
+S1 = [$S1]
+S2 = [$S2]
+assert len(S1) == len(S2) == $NS
+
+def hash_f(key, T):
+    return sum(T[i % $NS] * ord(c) for i, c in enumerate(key)) % $NG
+
+def perfect_hash(key):
+    return (G[hash_f(key, S1)] + G[hash_f(key, S2)]) % $NG
+"""
+
+def builtin_template(Hash):
+    return """\
+# =======================================================================
+# ================= Python code for perfect hash function ===============
+# =======================================================================
+
+G = [$G]
+""" + Hash.template + """
+# ============================ Sanity check =============================
+
+K = [$K]
+assert len(K) == $NK
+
+for h, k in enumerate(K):
+    assert perfect_hash(k) == h
+"""
+
+
+class TooManyInterationsError(Exception):
+    pass
+
+
+def generate_hash(keys, Hash=StrSaltHash):
+    """
+    Return hash functions f1 and f2, and G for a perfect minimal hash.
+    Input is an iterable of 'keys', whos indicies are the desired hash values.
+    'Hash' is a random hash function generator, that means Hash(N) returns a
+    returns a random hash function which returns hash values from 0..N-1.
+    """
+    if not isinstance(keys, (list, tuple)):
+        raise TypeError("list or tuple expected")
+    NK = len(keys)
+    if NK != len(set(keys)):
+        raise ValueError("duplicate keys")
+    for key in keys:
+        if not isinstance(key, str):
+            raise TypeError("key a not string: %r" % key)
+    if NK > 10000 and Hash == StrSaltHash:
+        print("""\
+WARNING: You have %d keys.
+         Using --hft=1 is likely to fail for so many keys.
+         Please use --hft=2 instead.
+""" % NK)
+
+    # the number of vertices in the graph G
+    NG = NK + 1
+    if verbose:
+        print('NG = %d' % NG)
+
+    trial = 0  # Number of trial graphs so far
+    while True:
+        if (trial % trials) == 0:   # trials failures, increase NG slightly
+            if trial > 0:
+                NG = max(NG + 1, int(1.05 * NG))
+            if verbose:
+                sys.stdout.write('\nGenerating graphs NG = %d ' % NG)
+        trial += 1
+
+        if NG > 100 * (NK + 1):
+            raise TooManyInterationsError("%d keys" % NK)
+
+        if verbose:
+            sys.stdout.write('.')
+            sys.stdout.flush()
+
+        G = Graph(NG)   # Create graph with NG vertices
+        f1 = Hash(NG)   # Create 2 random hash functions
+        f2 = Hash(NG)
+
+        # Connect vertices given by the values of the two hash functions
+        # for each key.  Associate the desired hash value with each edge.
+        for hashval, key in enumerate(keys):
+            G.connect(f1(key), f2(key), hashval)
+
+        # Try to assign the vertex values.  This will fail when the graph
+        # is cyclic.  But when the graph is acyclic it will succeed and we
+        # break out, because we're done.
+        if G.assign_vertex_values():
+            break
+
+    if verbose:
+        print('\nAcyclic graph found after %d trials.' % trial)
+        print('NG = %d' % NG)
+
+    # Sanity check the result by actually verifying that all the keys
+    # hash to the right value.
+    for hashval, key in enumerate(keys):
+        assert hashval == (
+            G.vertex_values[f1(key)] + G.vertex_values[f2(key)]
+        ) % NG
+
+    if verbose:
+        print('OK')
+
+    return f1, f2, G.vertex_values
+
+
+class Format(object):
+
+    def __init__(self, width=76, indent=4, delimiter=', '):
+        self.width = width
+        self.indent = indent
+        self.delimiter = delimiter
+
+    def print_format(self):
+        print("Format options:")
+        for name in 'width', 'indent', 'delimiter':
+            print('  %s: %r' % (name, getattr(self, name)))
+
+    def __call__(self, data, quote=False):
+        if not isinstance(data, (list, tuple)):
+            return str(data)
+
+        lendel = len(self.delimiter)
+        aux = StringIO()
+        pos = 20
+        for i, elt in enumerate(data):
+            last = bool(i == len(data) - 1)
+
+            s = ('"%s"' if quote else '%s') % elt
+
+            if pos + len(s) + lendel > self.width:
+                aux.write('\n' + (self.indent * ' '))
+                pos = self.indent
+
+            aux.write(s)
+            pos += len(s)
+            if not last:
+                aux.write(self.delimiter)
+                pos += lendel
+
+        return '\n'.join(l.rstrip() for l in aux.getvalue().split('\n'))
+
+
+def generate_code(keys, Hash=StrSaltHash, template=None, options=None):
+    """
+    Takes a list of key value pairs and inserts the generated parameter
+    lists into the 'template' string.  'Hash' is the random hash function
+    generator, and the optional keywords are formating options.
+    The return value is the substituted code template.
+    """
+    f1, f2, G = generate_hash(keys, Hash)
+
+    assert f1.N == f2.N == len(G)
+    try:
+        salt_len = len(f1.salt)
+        assert salt_len == len(f2.salt)
+    except TypeError:
+        salt_len = None
+
+    if template is None:
+        template = builtin_template(Hash)
+
+    if options is None:
+        fmt = Format()
+    else:
+        fmt = Format(width=options.width, indent=options.indent,
+                     delimiter=options.delimiter)
+
+    if verbose:
+        fmt.print_format()
+
+    return string.Template(template).substitute(
+        NS = salt_len,
+        S1 = fmt(f1.salt),
+        S2 = fmt(f2.salt),
+        NG = len(G),
+        G  = fmt(G),
+        NK = len(keys),
+        K  = fmt(list(keys), quote=True))
+
+
+def read_table(filename, options):
+    """
+    Reads keys and desired hash value pairs from a file.  If no column
+    for the hash value is specified, a sequence of hash values is generated,
+    from 0 to N-1, where N is the number of rows found in the file.
+    """
+    if verbose:
+        print("Reading table from file `%s' to extract keys." % filename)
+    try:
+        fi = open(filename)
+    except IOError:
+        sys.exit("Error: Could not open `%s' for reading." % filename)
+
+    keys = []
+
+    if verbose:
+        print("Reader options:")
+        for name in 'comment', 'splitby', 'keycol':
+            print('  %s: %r' % (name, getattr(options, name)))
+
+    for n, line in enumerate(fi):
+        line = line.strip()
+        if not line or line.startswith(options.comment):
+            continue
+
+        if line.count(options.comment): # strip content after comment
+            line = line.split(options.comment)[0].strip()
+
+        row = [col.strip() for col in line.split(options.splitby)]
+
+        try:
+            key = row[options.keycol - 1]
+        except IndexError:
+            sys.exit("%s:%d: Error: Cannot read key, not enough columns." %
+                     (filename, n + 1))
+
+        keys.append(key)
+
+    fi.close()
+
+    if not keys:
+        exit("Error: no keys found in file `%s'." % filename)
+
+    return keys
+
+
+def read_template(filename):
+    if verbose:
+        print("Reading template from file `%s'" % filename)
+    try:
+        with open(filename, 'r') as fi:
+            return fi.read()
+    except IOError:
+        sys.exit("Error: Could not open `%s' for reading." % filename)
+
+
+def run_code(code):
+    tmpdir = tempfile.mkdtemp()
+    path = join(tmpdir, 't.py')
+    with open(path, 'w') as fo:
+        fo.write(code)
+    try:
+        subprocess.check_call([sys.executable, path])
+    except subprocess.CalledProcessError as e:
+        raise AssertionError(e)
+    finally:
+        shutil.rmtree(tmpdir)
+
+
+def main():
+    from optparse import OptionParser
+
+    usage = "usage: %prog [options] KEYS_FILE [TMPL_FILE]"
+
+    description = """\
+Generates code for perfect hash functions from
+a file with keywords and a code template.
+If no template file is provided, a small built-in Python template
+is processed and the output code is written to stdout.
+"""
+
+    parser = OptionParser(usage = usage,
+                          description = description,
+                          prog = sys.argv[0],
+                          version = "%prog: " + __version__)
+
+    parser.add_option("--delimiter",
+                      action  = "store",
+                      default = ", ",
+                      help    = "Delimiter for list items used in output, "
+                                "the default delimiter is '%default'",
+                      metavar = "STR")
+
+    parser.add_option("--indent",
+                      action  = "store",
+                      default = 4,
+                      type    = "int",
+                      help    = "Make INT spaces at the beginning of a "
+                                "new line when generated list is wrapped. "
+                                "Default is %default",
+                      metavar = "INT")
+
+    parser.add_option("--width",
+                      action  = "store",
+                      default = 76,
+                      type    = "int",
+                      help    = "Maximal width of generated list when "
+                                "wrapped.  Default width is %default",
+                      metavar = "INT")
+
+    parser.add_option("--comment",
+                      action  = "store",
+                      default = "#",
+                      help    = "STR is the character, or sequence of "
+                                "characters, which marks the beginning "
+                                "of a comment (which runs till "
+                                "the end of the line), in the input "
+                                "KEYS_FILE. "
+                                "Default is '%default'",
+                      metavar = "STR")
+
+    parser.add_option("--splitby",
+                      action  = "store",
+                      default = ",",
+                      help    = "STR is the character by which the columns "
+                                "in the input KEYS_FILE are split. "
+                                "Default is '%default'",
+                      metavar = "STR")
+
+    parser.add_option("--keycol",
+                      action  = "store",
+                      default = 1,
+                      type    = "int",
+                      help    = "Specifies the column INT in the input "
+                                "KEYS_FILE which contains the keys. "
+                                "Default is %default, i.e. the first column.",
+                      metavar = "INT")
+
+    parser.add_option("--trials",
+                      action  = "store",
+                      default = 5,
+                      type    = "int",
+                      help    = "Specifies the number of trials before "
+                                "NG is increased.  A small INT will give "
+                                "compute faster, but the array G will be "
+                                "large.  A large INT will take longer to "
+                                "compute but G will be smaller. "
+                                "Default is %default",
+                      metavar = "INT")
+
+    parser.add_option("--hft",
+                      action  = "store",
+                      default = 1,
+                      type    = "int",
+                      help    = "Hash function type INT.  Possible values "
+                                "are 1 (StrSaltHash) and 2 (IntSaltHash). "
+                                "The default is %default",
+                      metavar = "INT")
+
+    parser.add_option("-e", "--execute",
+                      action  = "store_true",
+                      help    = "Execute the generated code within "
+                                "the Python interpreter.")
+
+    parser.add_option("-o", "--output",
+                      action  = "store",
+                      help    = "Specify output FILE explicitly. "
+                                "`-o std' means standard output. "
+                                "`-o no' means no output. "
+                                "By default, the file name is obtained "
+                                "from the name of the template file by "
+                                "substituting `tmpl' to `code'.",
+                      metavar = "FILE")
+
+    parser.add_option("-v", "--verbose",
+                      action = "store_true",
+                      help = "verbosity")
+
+    options, args = parser.parse_args()
+
+    if options.trials <= 0:
+        parser.error("trials before increasing N has to be larger than zero")
+
+    global trials
+    trials = options.trials
+
+    global verbose
+    verbose = options.verbose
+
+    if len(args) not in (1, 2):
+        parser.error("incorrect number of arguments")
+
+    if len(args) == 2 and not args[1].count('tmpl'):
+        parser.error("template filename does not contain 'tmpl'")
+
+    if options.hft == 1:
+        Hash = StrSaltHash
+    elif options.hft == 2:
+        Hash = IntSaltHash
+    else:
+        parser.error("Hash function %s not implemented." % options.hft)
+
+    # --------------------- end parsing and checking --------------
+
+    keys_file = args[0]
+
+    if verbose:
+        print("keys_file = %r" % keys_file)
+
+    keys = read_table(keys_file, options)
+
+    if verbose:
+        print("Number os keys: %d" % len(keys))
+
+    tmpl_file = args[1] if len(args) == 2 else None
+
+    if verbose:
+        print("tmpl_file = %r" % tmpl_file)
+
+    template = read_template(tmpl_file) if tmpl_file else None
+
+    if options.output:
+        outname = options.output
+    else:
+        if tmpl_file:
+            if 'tmpl' not in tmpl_file:
+                sys.exit("Hmm, template filename does not contain 'tmpl'")
+            outname = tmpl_file.replace('tmpl', 'code')
+        else:
+            outname = 'std'
+
+    if verbose:
+        print("outname = %r\n" % outname)
+
+    if outname == 'std':
+        outstream = sys.stdout
+    elif outname == 'no':
+        outstream = None
+    else:
+        try:
+            outstream = open(outname, 'w')
+        except IOError:
+            sys.exit("Error: Could not open `%s' for writing." % outname)
+
+    code = generate_code(keys, Hash, template, options)
+
+    if options.execute or template == builtin_template(Hash):
+        if verbose:
+            print('Executing code...\n')
+        run_code(code)
+
+    if outstream:
+        outstream.write(code)
+        if not outname == 'std':
+            outstream.close()
+
+
+if __name__ == '__main__':
+    main()
--- a/src/keysym.c
+++ b/src/keysym.c
@ -107,22 +107,17 @@ xkb_keysym_from_name(const char *name, enum xkb_keysym_flags flags)
     * So do it in a fast path.
     */
    if (!icase) {
-        size_t lo = 0, hi = ARRAY_SIZE(name_to_keysym) - 1;
-        while (hi >= lo) {
-            size_t mid = (lo + hi) / 2;
-            int cmp = strcmp(name, get_name(&name_to_keysym[mid]));
-            if (cmp > 0)
-                lo = mid + 1;
-            else if (cmp < 0)
-                hi = mid - 1;
-            else
-                return name_to_keysym[mid].keysym;
+        size_t pos = keysym_name_perfect_hash(name);
+        if (pos < ARRAY_SIZE(name_to_keysym)) {
+            const char *s = get_name(&name_to_keysym[pos]);
+            if (strcmp(name, s) == 0)
+                return name_to_keysym[pos].keysym;
        }
    }
    /*
    * Find the correct keysym for case-insensitive match.
    *
-    * The name_to_keysym_icase table is sorted by istrcmp(). So the binary
+    * The name_to_keysym table is sorted by istrcmp(). So the binary
    * search may return _any_ of all possible case-insensitive duplicates. This
    * code searches the entry, all previous and all next entries that match by
    * case-insensitive comparison and returns the "best" case-insensitive
@ -135,16 +130,16 @@ xkb_keysym_from_name(const char *name, enum xkb_keysym_flags flags)
    * lower-case match is enough in this case.
    */
    else {
-        size_t lo = 0, hi = ARRAY_SIZE(name_to_keysym_icase) - 1;
+        size_t lo = 0, hi = ARRAY_SIZE(name_to_keysym) - 1;
        while (hi >= lo) {
            size_t mid = (lo + hi) / 2;
-            int cmp = istrcmp(name, get_name(&name_to_keysym_icase[mid]));
+            int cmp = istrcmp(name, get_name(&name_to_keysym[mid]));
            if (cmp > 0) {
                lo = mid + 1;
            } else if (cmp < 0) {
                hi = mid - 1;
            } else {
-                entry = &name_to_keysym_icase[mid];
+                entry = &name_to_keysym[mid];
                break;
            }
        }
@ -154,14 +149,14 @@ xkb_keysym_from_name(const char *name, enum xkb_keysym_flags flags)
            if (icase && xkb_keysym_is_lower(entry->keysym))
                return entry->keysym;

-            for (iter = entry - 1; iter >= name_to_keysym_icase; --iter) {
+            for (iter = entry - 1; iter >= name_to_keysym; --iter) {
                if (istrcmp(get_name(iter), get_name(entry)) != 0)
                    break;
                if (xkb_keysym_is_lower(iter->keysym))
                    return iter->keysym;
            }

-            last = name_to_keysym_icase + ARRAY_SIZE(name_to_keysym_icase);
+            last = name_to_keysym + ARRAY_SIZE(name_to_keysym);
            for (iter = entry + 1; iter < last; ++iter) {
                if (istrcmp(get_name(iter), get_name(entry)) != 0)
                    break;
--- a/src/ks_tables.h
+++ b/src/ks_tables.h