Merge branch 'topic/sync-to-go-2'
[s-roff.git] / src / lib-roff / make-uniuni.sh
blobf7d71c8309d26afbea9e9854f2d642a319128182
1 #!/bin/sh
2 #@ make-uniuni.sh -- script for creating the file uniuni.cpp
3 #@ Synopsis:
4 # $ make-uniuni.sh <version-string> < UnicodeData.txt > uniuni.cpp
5 # `UnicodeData.txt' is the central database file from the Unicode standard.
6 # Unfortunately, it doesn't contain a version number which must be thus
7 # provided manually as a parameter to the filter.
8 # This program needs a C preprocessor.
10 # Copyright (c) 2014 - 2017 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
12 # Copyright (C) 2005, 2006
13 # Free Software Foundation, Inc.
14 # Written by Werner Lemberg <wl@gnu.org>
16 # This is free software; you can redistribute it and/or modify it under
17 # the terms of the GNU General Public License as published by the Free
18 # Software Foundation; either version 2, or (at your option) any later
19 # version.
21 # This is distributed in the hope that it will be useful, but WITHOUT ANY
22 # WARRANTY; without even the implied warranty of MERCHANTABILITY or
23 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
24 # for more details.
26 # You should have received a copy of the GNU General Public License along
27 # with groff; see the file COPYING. If not, write to the Free Software
28 # Foundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA.
30 CPP=cpp
32 prog="$0"
34 if test $# -ne 1; then
35 echo "usage: $0 <version-string> < UnicodeData.txt > uniuni.cpp"
36 exit 1
39 version_string="$1"
41 # Remove ranges and control characters,
42 # then extract the decomposition field,
43 # then remove lines without decomposition,
44 # then remove all compatibility decompositions.
45 sed -e '/^[^;]*;</d' \
46 | sed -e 's/;[^;]*;[^;]*;[^;]*;[^;]*;\([^;]*\);.*$/;\1/' \
47 | sed -e '/^[^;]*;$/d' \
48 | sed -e '/^[^;]*;</d' > $$1
50 # Prepare input for running cpp.
51 cat $$1 \
52 | sed -e 's/^\([^;]*\);/#define \1 /' \
53 -e 's/ / u/g' > $$2
54 cat $$1 \
55 | sed -e 's/^\([^;]*\);.*$/\1 u\1/' >> $$2
57 # Run C preprocessor to recursively decompose.
58 $CPP $$2 $$3
60 # Convert it back to original format.
61 cat $$3 \
62 | sed -e '/#/d' \
63 -e '/^$/d' \
64 -e 's/ \+/ /g' \
65 -e 's/ *$//' \
66 -e 's/u//g' \
67 -e 's/^\([^ ]*\) /\1;/' > $$4
69 # Write preamble.
70 cat <<END
71 /* Copyright (C) 2002 - 2005
72 * Free Software Foundation, Inc.
73 * Written by Werner Lemberg <wl@gnu.org> */
75 // This code has been algorithmically derived from the file
76 // UnicodeData.txt, version $version_string, available from unicode.org,
77 // on `date '+%Y-%m-%d'`.
79 #include "config.h"
81 #include "lib.h"
82 #include "ptable.h"
83 #include "stringclass.h"
84 #include "unicode.h"
86 struct unicode_decompose {
87 char *value;
90 declare_ptable(unicode_decompose)
91 implement_ptable(unicode_decompose)
93 PTABLE(unicode_decompose) unicode_decompose_table;
95 // the first digit in the composite string gives the number of composites
97 struct S {
98 const char *key;
99 const char *value;
100 } unicode_decompose_list[] = { // FIXME const
103 # Emit Unicode data.
104 cat $$4 \
105 | sed -e 's/ /_/g' \
106 -e 's/\(.*\);\(.*_.*_.*_.*\)$/ { "\1", "4\2" },/' \
107 -e 's/\(.*\);\(.*_.*_.*\)$/ { "\1", "3\2" },/' \
108 -e 's/\(.*\);\(.*_.*\)$/ { "\1", "2\2" },/' \
109 -e 's/\(.*\);\(.*\)$/ { "\1", "1\2" },/'
111 # Write postamble.
112 cat <<END
115 // global constructor
117 static struct unicode_decompose_init { // FIXME UGH, static ctor init
118 unicode_decompose_init();
119 } _unicode_decompose_init;
121 unicode_decompose_init::unicode_decompose_init()
123 for (unsigned int i = 0;
124 i < sizeof(unicode_decompose_list)/sizeof(unicode_decompose_list[0]);
125 i++) {
126 unicode_decompose *dec = new unicode_decompose[1];
127 dec->value = (char *)unicode_decompose_list[i].value;
128 unicode_decompose_table.define(unicode_decompose_list[i].key, dec);
132 const char *decompose_unicode(const char *s)
134 unicode_decompose *result = unicode_decompose_table.lookup(s);
135 return result ? result->value : 0;
138 // s-it2-mode
141 # Remove temporary files.
142 rm $$1 $$2 $$3 $$4
144 # s-sh-mode