3 # Copyright (C) 2015-2024 Free Software Foundation, Inc.
5 # Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
6 # National Institute of Advanced Industrial Science and Technology (AIST)
7 # Registration Number H13PRO009
9 # This file is part of GNU Emacs.
11 # GNU Emacs is free software: you can redistribute it and/or modify
12 # it under the terms of the GNU General Public License as published by
13 # the Free Software Foundation, either version 3 of the License, or
14 # (at your option) any later version.
16 # GNU Emacs is distributed in the hope that it will be useful,
17 # but WITHOUT ANY WARRANTY; without even the implied warranty of
18 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 # GNU General Public License for more details.
21 # You should have received a copy of the GNU General Public License
22 # along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>.
26 # Generate charset maps in etc/charsets.
33 top_srcdir
= @top_srcdir@
34 top_builddir
= @top_builddir@
36 charsetdir
= ${top_srcdir}/etc
/charsets
37 lispintdir
= ${top_srcdir}/lisp
/international
38 mapfiledir
= ${srcdir}/mapfiles
40 GLIBC_CHARMAPS
= ${srcdir}/glibc
42 -include ${top_builddir}/src
/verbose.mk
44 # Note: We can not prepend "ISO-" to these map files because of file
47 8859-2.map
8859-3.map
8859-4.map
8859-5.map
8859-6.map
8859-7.map \
48 8859-8.map
8859-9.map
8859-10.map
8859-11.map
8859-13.map
8859-14.map \
49 8859-15.map
8859-16.map
52 IBM037.map IBM038.map \
53 IBM256.map IBM273.map IBM274.map IBM275.map IBM277.map IBM278.map \
54 IBM280.map IBM281.map IBM284.map IBM285.map IBM290.map IBM297.map \
55 IBM420.map IBM423.map IBM424.map IBM437.map IBM500.map IBM850.map \
56 IBM851.map IBM852.map IBM855.map IBM856.map IBM857.map IBM860.map \
57 IBM861.map IBM862.map IBM863.map IBM864.map IBM865.map IBM866.map \
58 IBM868.map IBM869.map IBM870.map IBM871.map IBM874.map IBM875.map \
59 IBM880.map IBM891.map IBM903.map IBM904.map IBM905.map IBM918.map \
60 IBM1004.map IBM1026.map IBM1047.map
63 CP737.map CP775.map CP1125.map\
64 CP1250.map CP1251.map CP1252.map CP1253.map CP1254.map \
65 CP1255.map CP1256.map CP1257.map CP1258.map \
69 CJK
= GB2312.map GBK.map GB180302.map GB180304.map \
70 BIG5.map BIG5-HKSCS.map\
71 CNS-1.map CNS-2.map CNS-3.map CNS-4.map CNS-5.map CNS-6.map CNS-7.map \
73 JISX0201.map JISX0208.map JISX0212.map JISX2131.map JISX2132.map \
74 JISC6226.map CP932-2BYTE.map JISX213A.map\
75 KSC5601.map KSC5636.map JOHAB.map
77 MISC
= KOI-8.map KOI8-R.map KOI8-U.map KOI8-T.map ALTERNATIVNYJ.map \
79 TIS-620.map VISCII.map VSCII.map VSCII-2.map\
80 KA-PS.map KA-ACADEMY.map \
81 HP-ROMAN8.map NEXTSTEP.map MACINTOSH.map EBCDICUK.map EBCDICUS.map \
82 stdenc.map symbol.map \
86 # Emacs-mule charsets.
87 MULE
= MULE-ethiopic.map MULE-ipa.map MULE-is13194.map \
88 MULE-sisheng.map MULE-tibetan.map \
89 MULE-lviscii.map MULE-uviscii.map
91 SED_SCRIPT
= $(srcdir)/jisx2131-filter
93 TRANS_TABLE
= cp51932.el eucjp-ms.el
94 TRANS_TABLE
:= $(addprefix ${lispintdir}/,${TRANS_TABLE})
96 CHARSETS
= ${ISO8859} ${IBM} ${CODEPAGE} ${CJK} ${MISC} ${MULE}
97 CHARSETS
:= $(addprefix ${charsetdir}/,${CHARSETS})
101 all: ${srcdir}/charsets.stamp
103 ${srcdir}/charsets.stamp
: ${CHARSETS} ${TRANS_TABLE}
104 ${AM_V_GEN}echo timestamp
> $@
106 ## Rules for each charset.
108 mapconv
= ${srcdir}/mapconv
109 run_mapconv
= AWK
=${AWK} ${srcdir}/mapconv
110 big5
= ${srcdir}/big5.awk
111 compact
= ${srcdir}/compact.awk
112 cp51932
= ${srcdir}/cp51932.awk
113 cp932
= ${srcdir}/cp932.awk
114 eucjp_ms
= ${srcdir}/eucjp-ms.awk
115 gb180302
= ${srcdir}/gb180302.awk
116 gb180304
= ${srcdir}/gb180304.awk
117 kuten
= ${srcdir}/kuten.awk
119 ## Short aliases, eg VSCII.map = ${charsetdir}/VSCII.map
121 .PHONY
: $(notdir ${1})
125 $(foreach mfile
,${CHARSETS} ${TRANS_TABLE},$(eval
$(call map_template
,$(mfile
))))
127 ${charsetdir}/VSCII.map
: ${GLIBC_CHARMAPS}/TCVN5712-1.gz
${mapconv} ${compact}
128 ${AM_V_GEN}${run_mapconv} $< '/^<.*[ ]\/x[0-9a-f].[ ]/' GLIBC-1
${compact} > $@
130 ${charsetdir}/VSCII-2.map
: ${GLIBC_CHARMAPS}/TCVN5712-1.gz
${mapconv} ${compact}
131 ${AM_V_GEN}${run_mapconv} $< '/^<.*[ ]\/x[2-7a-f].[ ]/' GLIBC-1
${compact} \
132 | sed
's/0x20-0x7F.*/0x00-0x7F 0x0000/' > $@
134 ${charsetdir}/ALTERNATIVNYJ.map
: ${charsetdir}/IBM866.map
135 ${AM_V_GEN}(echo
"# Modified from $(notdir $<) according to the chart at" && \
136 echo
"# https://web.archive.org/web/20100131045151/http://www.cyrillic.com/ref/cyrillic/koi-8alt.html" && \
137 echo
"# with guesses for the Unicodes of the glyphs." && \
139 -e
'/0xF2/ s/ .*/ 0x2019/' \
140 -e
'/0xF3/ s/ .*/ 0x2018/' \
141 -e
'/0xF4/ s/ .*/ 0x0301/' \
142 -e
'/0xF5/ s/ .*/ 0x0300/' \
143 -e
'/0xF6/ s/ .*/ 0x203A/' \
144 -e
'/0xF7/ s/ .*/ 0x2039/' \
145 -e
'/0xF8/ s/ .*/ 0x2191/' \
146 -e
'/0xF9/ s/ .*/ 0x2193/' \
147 -e
'/0xFA/ s/ .*/ 0x00B1/' \
148 -e
'/0xFB/ s/ .*/ 0x00F7/' < $< ) > $@
150 ${charsetdir}/MIK.map
: ${mapfiledir}/bulgarian-mik.txt
${mapconv} ${compact}
151 ${AM_V_GEN}${run_mapconv} $< '1,$$' CZYBORRA
${compact} > $@
153 ${charsetdir}/PTCP154.map
: ${mapfiledir}/PTCP154
${mapconv} ${compact}
154 ${AM_V_GEN}${run_mapconv} $< '/^0x/' IANA
${compact} > $@
156 ${charsetdir}/stdenc.map
: ${mapfiledir}/stdenc.txt
${mapconv} ${compact}
157 ${AM_V_GEN}${run_mapconv} $< '/^[0-9A-Fa-f]/' UNICODE
${compact} > $@
159 ${charsetdir}/symbol.map
: ${mapfiledir}/symbol.txt
${mapconv} ${compact}
160 ${AM_V_GEN}${run_mapconv} $< '/^[0-9A-Fa-f]/' UNICODE
${compact} > $@
162 ${charsetdir}/CP720.map
: ${mapfiledir}/CP720.map
165 ${charsetdir}/CP858.map
: ${mapfiledir}/CP858.map
168 ${charsetdir}/CP949-2BYTE.map
: ${GLIBC_CHARMAPS}/CP949.gz
${mapconv} ${compact}
169 ${AM_V_GEN}${run_mapconv} $< '/^<.*[ ]\/x[89a-f]/' GLIBC-2
${compact} > $@
171 ${charsetdir}/GB2312.map
: ${GLIBC_CHARMAPS}/GB2312.gz
${mapconv} ${compact}
172 ${AM_V_GEN}${run_mapconv} $< '/^<.*[ ]\/x[a-f]/' GLIBC-2-7
${compact} > $@
174 ${charsetdir}/GBK.map
: ${GLIBC_CHARMAPS}/GBK.gz
${mapconv} ${compact}
175 ${AM_V_GEN}${run_mapconv} $< '/^<.*[ ]\/x[89a-f]/' GLIBC-2
${compact} > $@
177 ${charsetdir}/GB180302.map
: ${GLIBC_CHARMAPS}/GB18030.gz
${mapconv} ${gb180302}
178 ${AM_V_GEN}${run_mapconv} $< '/^<.*[ ]\/x..\/x..[ ]/' GLIBC-2
${gb180302} > $@
180 ${charsetdir}/GB180304.map
: ${charsetdir}/GB180302.map
${gb180304}
181 ${AM_V_GEN}$(AWK
) -f
${gb180304} < $< > $@
183 ${charsetdir}/JISX0201.map
: ${GLIBC_CHARMAPS}/JIS_X0201.gz
${mapconv} ${compact}
184 ${AM_V_GEN}(${run_mapconv} $< '/^<.*[ ]\/x[0-9]/' GLIBC-1
${compact} && \
185 echo
"# Generated by hand" && \
186 echo
"0xA1-0xDF 0xFF61" ) > $@
188 ${charsetdir}/JISX0208.map
: ${GLIBC_CHARMAPS}/EUC-JP.gz
${mapconv}
189 ${AM_V_GEN}${run_mapconv} $< '/^<.*[ ]\/x[a-f]/' GLIBC-2-7 \
190 | sed
's/0x2015/0x2014/' > $@
192 ${charsetdir}/JISX0212.map
: ${GLIBC_CHARMAPS}/EUC-JP.gz
${mapconv} ${compact}
193 ${AM_V_GEN}${run_mapconv} $< '/^<.*[ ]\/x8f/ s,/x8f,,' GLIBC-2-7
${compact} > $@
195 $(SED_SCRIPT
): ${mapfiledir}/JISX213A.map
196 ${AM_V_at}sed
-n
-e
'/^#/d' -e
's,.*0x\([0-9A-Z]*\)$$,/0x0*\1$$/d,p' < $< > $@
198 ${charsetdir}/JISX2131.map
: ${GLIBC_CHARMAPS}/EUC-JISX0213.gz
${mapconv} \
200 ${AM_V_GEN}${run_mapconv} $< '/^<.*[ ]\/x[a-f]/' GLIBC-2-7 \
201 | sed
-f
$(SED_SCRIPT
) \
202 | sed
-e
's/0x2015/0x2014/' -e
's/0x2299/0x29BF/' > $@
204 ${charsetdir}/JISX2132.map
: ${GLIBC_CHARMAPS}/EUC-JISX0213.gz
${mapconv}
205 ${AM_V_GEN}${run_mapconv} $< '/^<.*[ ]\/x8f/ s,/x8f,,' GLIBC-2-7
> $@
207 ${charsetdir}/JISX213A.map
: ${mapfiledir}/JISX213A.map
210 ${charsetdir}/CP932-2BYTE.map
: ${mapfiledir}/CP932.TXT
${mapconv} ${cp932}
211 ${AM_V_GEN}${run_mapconv} $< '/^0x[89A-F][0-9A-F][0-9A-F]/' UNICODE2
${cp932} > $@
213 ${lispintdir}/cp51932.el
: ${charsetdir}/CP932-2BYTE.map
${cp51932}
214 ${AM_V_GEN}$(AWK
) -f
${cp51932} < $< > $@
216 ${lispintdir}/eucjp-ms.el
: ${GLIBC_CHARMAPS}/EUC-JP-MS.gz
${eucjp_ms}
217 ${AM_V_GEN}gunzip
-c
$< |
$(AWK
) -f
${eucjp_ms} > $@
219 # As Uni2JIS doesn't contain mappings of characters added to Unicode
220 # recently, we add them manually here (including one correction for
221 # U+005C vs U+FF3C). These changes are based on bogytech's blog at:
222 # https://bogytech.blogspot.com/search/label/emacs
223 ${charsetdir}/JISC6226.map
: ${mapfiledir}/Uni2JIS
${mapconv} ${kuten}
224 ${AM_V_GEN}(${run_mapconv} $< '/^[^#].*0-/' YASUOKA
${kuten} \
225 | sed
-e
'/0x2140/s/005C/FF3C/' && \
226 echo
'0x3442 0x3D4E' && echo
'0x374E 0x25874' && \
227 echo
'0x3764 0x28EF6' && echo
'0x513D 0x2F80F' && \
228 echo
'0x7045 0x9724' ) > $@
230 ${charsetdir}/KSC5601.map
: ${GLIBC_CHARMAPS}/EUC-KR.gz
${mapconv} ${compact}
231 ${AM_V_GEN}${run_mapconv} $< '/^<.*[ ]\/x[a-f]/' GLIBC-2-7
${compact} > $@
233 ${charsetdir}/BIG5.map
: ${GLIBC_CHARMAPS}/BIG5.gz
${mapconv} ${compact}
234 ${AM_V_GEN}${run_mapconv} $< '/^<.*[ ]\/x[a-f]/' GLIBC-2
> $@
236 ${charsetdir}/BIG5-1.map
: ${charsetdir}/BIG5.map
${mapconv} ${big5}
237 ${AM_V_GEN}(echo
"# Generated from $(notdir $<)" && \
238 sed
-n
-e
'/0xa140/,/0xc8fe/p' < $< |
$(AWK
) -f
${big5} ) > $@
240 ${charsetdir}/BIG5-2.map
: ${charsetdir}/BIG5.map
${mapconv} ${big5}
241 ${AM_V_GEN}(echo
"# Generated from $(notdir $<)" && \
242 sed
-n
-e
'/0xc940/,$$ p' < $< |
$(AWK
) -f
${big5} ) > $@
244 ${charsetdir}/BIG5-HKSCS.map
: ${GLIBC_CHARMAPS}/BIG5-HKSCS.gz
${mapconv} ${compact}
245 ${AM_V_GEN}${run_mapconv} $< '/^<.*[ ]\/x[89a-f].\//' GLIBC-2
${compact} > $@
247 ${charsetdir}/JOHAB.map
: ${GLIBC_CHARMAPS}/JOHAB.gz
${mapconv} ${compact}
248 ${AM_V_GEN}${run_mapconv} $< '/^<.*[ ]\/x[89a-f]/' GLIBC-2
${compact} > $@
250 ${charsetdir}/CNS-1.map
: ${GLIBC_CHARMAPS}/EUC-TW.gz
${mapconv} ${compact}
251 ${AM_V_GEN}${run_mapconv} $< '/^<.*[ ]\/x[a-f]/' GLIBC-2-7
${compact} > $@
253 # ${charsetdir}/CNS-1.map: ${mapfiledir}/cns2ucsdkw.txt ${mapconv} ${compact}
254 # ${AM_V_GEN}${run_mapconv} $< '/^C1/' KANJI-DATABASE ${compact} > $@
256 ${charsetdir}/CNS-2.map
: ${mapfiledir}/cns2ucsdkw.txt
${mapconv} ${compact}
257 ${AM_V_GEN}${run_mapconv} $< '/^C2/' KANJI-DATABASE
${compact} > $@
259 ${charsetdir}/CNS-3.map
: ${mapfiledir}/cns2ucsdkw.txt
${mapconv} ${compact}
260 ${AM_V_GEN}${run_mapconv} $< '/^C3/' KANJI-DATABASE
${compact} > $@
262 ${charsetdir}/CNS-4.map
: ${mapfiledir}/cns2ucsdkw.txt
${mapconv} ${compact}
263 ${AM_V_GEN}${run_mapconv} $< '/^C4/' KANJI-DATABASE
${compact} > $@
265 ${charsetdir}/CNS-5.map
: ${mapfiledir}/cns2ucsdkw.txt
${mapconv} ${compact}
266 ${AM_V_GEN}${run_mapconv} $< '/^C5/' KANJI-DATABASE
${compact} > $@
268 ${charsetdir}/CNS-6.map
: ${mapfiledir}/cns2ucsdkw.txt
${mapconv} ${compact}
269 ${AM_V_GEN}${run_mapconv} $< '/^C6/' KANJI-DATABASE
${compact} > $@
271 ${charsetdir}/CNS-7.map
: ${mapfiledir}/cns2ucsdkw.txt
${mapconv} ${compact}
272 ${AM_V_GEN}${run_mapconv} $< '/^C7/' KANJI-DATABASE
${compact} > $@
274 ${charsetdir}/CNS-F.map
: ${GLIBC_CHARMAPS}/EUC-TW.gz
${mapconv} ${compact}
275 ${AM_V_GEN}${run_mapconv} $< '/^<.*\/x8e\/xaf/ s,/x8e/xaf,,' GLIBC-2-7
${compact} > $@
277 # General target to produce map files for mule charsets.
278 ${charsetdir}/MULE-
%.map
: ${mapfiledir}/MULE-
%.map
281 # General target to produce map files for ISO-8859, GEORGIAN, and
282 # EBCDIC charsets. We can not use the original file name because of
283 # file name limit on DOS. "KA" is ISO 639 language code for Georgian.
285 ${charsetdir}/8859-%.map
: ${GLIBC_CHARMAPS}/ISO-8859-
%.gz
${mapconv} ${compact}
286 ${AM_V_GEN}${run_mapconv} $< '/^<.*[ ]\/x/' GLIBC-1
${compact} > $@
288 ${charsetdir}/KA-
%.map
: ${GLIBC_CHARMAPS}/GEORGIAN-
%.gz
${mapconv} ${compact}
289 ${AM_V_GEN}${run_mapconv} $< '/^<.*[ ]\/x/' GLIBC-1
${compact} > $@
291 ${charsetdir}/EBCDIC
%.map
: ${GLIBC_CHARMAPS}/EBCDIC-
%.gz
${mapconv} ${compact}
292 ${AM_V_GEN}${run_mapconv} $< '/^<.*[ ]\/x/' GLIBC-1
${compact} > $@
294 # General target to produce map files for single-byte charsets.
296 ${charsetdir}/%.map
: ${GLIBC_CHARMAPS}/%.gz
${mapconv} ${compact}
297 ${AM_V_GEN}${run_mapconv} $< '/^<.*[ ]\/x/' GLIBC-1
${compact} > $@
300 .PHONY
: clean bootstrap-clean
distclean maintainer-clean gen-clean
304 ## IMO this should also run gen-clean.
305 bootstrap-clean
: clean
311 rm -f
${CHARSETS} ${SED_SCRIPT} ${TRANS_TABLE} ${srcdir}/charsets.stamp
313 maintainer-clean
: gen-clean
distclean