2 #@ make-uniuni.sh -- script for creating the file uniuni.cpp
4 # $ make-uniuni.sh <version-string> < UnicodeData.txt > uniuni.cpp
5 # `UnicodeData.txt' is the central database file from the Unicode standard.
6 # Unfortunately, it doesn't contain a version number which must be thus
7 # provided manually as a parameter to the filter.
8 # This program needs a C preprocessor.
10 # Copyright (c) 2014 - 2017 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
12 # Copyright (C) 2005, 2006
13 # Free Software Foundation, Inc.
14 # Written by Werner Lemberg <wl@gnu.org>
16 # This is free software; you can redistribute it and/or modify it under
17 # the terms of the GNU General Public License as published by the Free
18 # Software Foundation; either version 2, or (at your option) any later
21 # This is distributed in the hope that it will be useful, but WITHOUT ANY
22 # WARRANTY; without even the implied warranty of MERCHANTABILITY or
23 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
26 # You should have received a copy of the GNU General Public License along
27 # with groff; see the file COPYING. If not, write to the Free Software
28 # Foundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA.
34 if test $# -ne 1; then
35 echo "usage: $0 <version-string> < UnicodeData.txt > uniuni.cpp"
41 # Remove ranges and control characters,
42 # then extract the decomposition field,
43 # then remove lines without decomposition,
44 # then remove all compatibility decompositions.
45 sed -e '/^[^;]*;</d' \
46 |
sed -e 's/;[^;]*;[^;]*;[^;]*;[^;]*;\([^;]*\);.*$/;\1/' \
47 |
sed -e '/^[^;]*;$/d' \
48 |
sed -e '/^[^;]*;</d' > $
$1
50 # Prepare input for running cpp.
52 |
sed -e 's/^\([^;]*\);/#define \1 /' \
55 |
sed -e 's/^\([^;]*\);.*$/\1 u\1/' >> $
$2
57 # Run C preprocessor to recursively decompose.
60 # Convert it back to original format.
67 -e 's/^\([^ ]*\) /\1;/' > $
$4
71 /* Copyright (C) 2002 - 2005
72 * Free Software Foundation, Inc.
73 * Written by Werner Lemberg <wl@gnu.org> */
75 // This code has been algorithmically derived from the file
76 // UnicodeData.txt, version $version_string, available from unicode.org,
77 // on `date '+%Y-%m-%d'`.
83 #include "stringclass.h"
86 struct unicode_decompose {
90 declare_ptable(unicode_decompose)
91 implement_ptable(unicode_decompose)
93 PTABLE(unicode_decompose) unicode_decompose_table;
95 // the first digit in the composite string gives the number of composites
100 } unicode_decompose_list[] = { // FIXME const
106 -e 's/\(.*\);\(.*_.*_.*_.*\)$/ { "\1", "4\2" },/' \
107 -e 's/\(.*\);\(.*_.*_.*\)$/ { "\1", "3\2" },/' \
108 -e 's/\(.*\);\(.*_.*\)$/ { "\1", "2\2" },/' \
109 -e 's/\(.*\);\(.*\)$/ { "\1", "1\2" },/'
115 // global constructor
117 static struct unicode_decompose_init { // FIXME UGH, static ctor init
118 unicode_decompose_init();
119 } _unicode_decompose_init;
121 unicode_decompose_init::unicode_decompose_init()
123 for (unsigned int i = 0;
124 i < sizeof(unicode_decompose_list)/sizeof(unicode_decompose_list[0]);
126 unicode_decompose *dec = new unicode_decompose[1];
127 dec->value = (char *)unicode_decompose_list[i].value;
128 unicode_decompose_table.define(unicode_decompose_list[i].key, dec);
132 const char *decompose_unicode(const char *s)
134 unicode_decompose *result = unicode_decompose_table.lookup(s);
135 return result ? result->value : 0;
141 # Remove temporary files.