config: use utf8_bom[] from utf.[ch] in git_parse_source()
[alt-git.git] / update_unicode.sh
blob000b937e6828d378829cfe8a25e93ce07c531574
1 #!/bin/sh
2 #See http://www.unicode.org/reports/tr44/
4 #Me Enclosing_Mark an enclosing combining mark
5 #Mn Nonspacing_Mark a nonspacing combining mark (zero advance width)
6 #Cf Format a format control character
8 UNICODEWIDTH_H=../unicode_width.h
9 if ! test -d unicode; then
10 mkdir unicode
11 fi &&
12 ( cd unicode &&
13 if ! test -f UnicodeData.txt; then
14 wget http://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
15 fi &&
16 if ! test -f EastAsianWidth.txt; then
17 wget http://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt
18 fi &&
19 if ! test -d uniset; then
20 git clone https://github.com/depp/uniset.git
21 fi &&
23 cd uniset &&
24 if ! test -x uniset; then
25 autoreconf -i &&
26 ./configure --enable-warnings=-Werror CFLAGS='-O0 -ggdb'
27 fi &&
28 make
29 ) &&
30 echo "static const struct interval zero_width[] = {" >$UNICODEWIDTH_H &&
31 UNICODE_DIR=. ./uniset/uniset --32 cat:Me,Mn,Cf + U+1160..U+11FF - U+00AD |
32 grep -v plane >>$UNICODEWIDTH_H &&
33 echo "};" >>$UNICODEWIDTH_H &&
34 echo "static const struct interval double_width[] = {" >>$UNICODEWIDTH_H &&
35 UNICODE_DIR=. ./uniset/uniset --32 eaw:F,W >>$UNICODEWIDTH_H &&
36 echo "};" >>$UNICODEWIDTH_H