From 57939ff41e57b4e24c00aa875046d103b46e066c Mon Sep 17 00:00:00 2001 From: Eli Zaretskii Date: Sat, 18 Feb 2012 13:36:54 +0200 Subject: [PATCH] Update char-script-table from the latest Unicode Database. lisp/international/characters.el (script-list): Sync with the latest Unicode Character Database. --- lisp/ChangeLog | 5 ++ lisp/international/characters.el | 109 ++++++++++++++++++++++++++++++++++----- 2 files changed, 100 insertions(+), 14 deletions(-) diff --git a/lisp/ChangeLog b/lisp/ChangeLog index 14e2596e43d..1980d18d3b4 100644 --- a/lisp/ChangeLog +++ b/lisp/ChangeLog @@ -1,3 +1,8 @@ +2012-02-18 Eli Zaretskii + + * international/characters.el (script-list): Sync with the latest + Unicode Character Database. + 2012-02-18 Andreas Schwab * international/titdic-cnv.el: Remove duplicate coding tag. diff --git a/lisp/international/characters.el b/lisp/international/characters.el index 4b8ed3e21e2..ba8b8955fba 100644 --- a/lisp/international/characters.el +++ b/lisp/international/characters.el @@ -1114,6 +1114,12 @@ Setup char-width-table appropriate for non-CJK language environment." ;; Setting char-script-table. +;; The data is compiled from Blocks.txt and Scripts.txt in the +;; "Unicode Character Database", simplified to lump together all the +;; blocks belonging to the same language. E.g., "Basic Latin", +;; "Latin-1 Supplement", "Latin Extended-A", etc. are all lumped +;; together under "latin". +;; ;; The Unicode blocks actually extend past some of these ranges with ;; undefined codepoints. (let ((script-list nil)) @@ -1126,13 +1132,17 @@ Setup char-width-table appropriate for non-CJK language environment." (#x0370 #x03E1 greek) (#x03E2 #x03EF coptic) (#x03F0 #x03F3 greek) - (#x0400 #x04FF cyrillic) + (#x0400 #x052F cyrillic) (#x0530 #x058F armenian) (#x0590 #x05FF hebrew) (#x0600 #x06FF arabic) (#x0700 #x074F syriac) - (#x07C0 #x07FA nko) + (#x0750 #x077F arabic) (#x0780 #x07BF thaana) + (#x07C0 #x07FF nko) + (#x0800 #x083F samaritan) + (#x0840 #x085F mandaic) + (#x08A0 #x08FF arabic) (#x0900 #x097F devanagari) (#x0980 #x09FF bengali) (#x0A00 #x0A7F gurmukhi) @@ -1143,10 +1153,10 @@ Setup char-width-table appropriate for non-CJK language environment." (#x0C80 #x0CFF kannada) (#x0D00 #x0D7F malayalam) (#x0D80 #x0DFF sinhala) - (#x0E00 #x0E5F thai) - (#x0E80 #x0EDF lao) + (#x0E00 #x0E7F thai) + (#x0E80 #x0EFF lao) (#x0F00 #x0FFF tibetan) - (#x1000 #x109F burmese) + (#x1000 #x109F burmese) ; according to Unicode 6.1, should be "myanmar" (#x10A0 #x10FF georgian) (#x1100 #x11FF hangul) (#x1200 #x139F ethiopic) @@ -1154,14 +1164,40 @@ Setup char-width-table appropriate for non-CJK language environment." (#x1400 #x167F canadian-aboriginal) (#x1680 #x169F ogham) (#x16A0 #x16FF runic) + (#x1700 #x171F tagalog) + (#x1720 #x173F hanunoo) + (#x1740 #x175F buhid) + (#x1760 #x177F tagbanwa) (#x1780 #x17FF khmer) (#x1800 #x18AF mongolian) - (#x1D00 #x1DFF phonetic) - (#x1E00 #x1EFF latin) + (#x18B0 #x18FF canadian-aboriginal) + (#x1900 #x194F limbu) + (#x1950 #x197F tai-le) + (#x1980 #x19DF tai-lue) + (#x19E0 #x19FF khmer) + (#x1A00 #x1A00 buginese) + (#x1A20 #x1AAF tai-tham) + (#x1B00 #x1B7F balinese) + (#x1B80 #x1BBF sundanese) + (#x1BC0 #x1BFF batak) + (#x1C00 #x1C4F lepcha) + (#x1C50 #x1C7F ol-chiki) + (#x1CC0 #x1CCF sundanese) + (#x1CD0 #x1CFF vedic) + (#x1D00 #x1DBF phonetic) + (#x1DC0 #x1EFF latin) (#x1F00 #x1FFF greek) (#x2000 #x27FF symbol) (#x2800 #x28FF braille) + (#x2900 #x2BFF symbol) + (#x2C00 #x2C5F glagolitic) + (#x2C60 #x2C7F latin) + (#x2C80 #x2CFF coptic) + (#x2D00 #x2D2F georgian) + (#x2D30 #x2D7F tifinagh) (#x2D80 #x2DDF ethiopic) + (#x2DE0 #x2DFF cyrillic) + (#x2E00 #x2E7F symbol) (#x2E80 #x2FDF han) (#x2FF0 #x2FFF ideographic-description) (#x3000 #x303F cjk-misc) @@ -1170,47 +1206,92 @@ Setup char-width-table appropriate for non-CJK language environment." (#x3130 #x318F hangul) (#x3190 #x319F kanbun) (#x31A0 #x31BF bopomofo) - (#x3400 #x9FAF han) + (#x31C0 #x31EF cjk-misc) + (#x31F0 #x31FF kana) + (#x3200 #x9FAF han) (#xA000 #xA4CF yi) + (#xA4D0 #xA4FF lisu) + (#xA500 #xA63F vai) + (#xA640 #xA69F cyrillic) + (#xA6A0 #xA6FF bamum) + (#xA700 #xA7FF latin) + (#xA800 #xA82F syloti-nagri) + (#xA830 #xA83F north-indic-number) + (#xA840 #xA87F phags-pa) + (#xA880 #xA8DF saurashtra) + (#xA8E0 #xA8FF devanagari) + (#xA900 #xA92F kayah-li) + (#xA930 #xA95F rejang) + (#xA960 #xA97F hangul) + (#xA980 #xA9DF javanese) (#xAA00 #xAA5F cham) - (#xAA60 #xAA7B burmese) + (#xAA60 #xAA7B burmese) ; Unicode 6.1: "myanmar" (#xAA80 #xAADF tai-viet) - (#xAC00 #xD7AF hangul) + (#xAAE0 #xAAFF meetei-mayek) + (#xAB00 #xAB2F ethiopic) + (#xABC0 #xABFF meetei-mayek) + (#xAC00 #xD7FF hangul) (#xF900 #xFAFF han) (#xFB1D #xFB4F hebrew) (#xFB50 #xFDFF arabic) - (#xFE70 #xFEFC arabic) + (#xFE30 #xFE4F han) + (#xFE70 #xFEFF arabic) (#xFF00 #xFF5F cjk-misc) (#xFF61 #xFF9F kana) (#xFFE0 #xFFE6 cjk-misc) (#x10000 #x100FF linear-b) (#x10100 #x1013F aegean-number) - (#x10140 #x1018A ancient-greek-number) - (#x10190 #x1019B ancient-symbol) + (#x10140 #x1018F ancient-greek-number) + (#x10190 #x101CF ancient-symbol) (#x101D0 #x101FF phaistos-disc) (#x10280 #x1029F lycian) (#x102A0 #x102DF carian) (#x10300 #x1032F olt-italic) + (#x10330 #x1034F gothic) (#x10380 #x1039F ugaritic) (#x103A0 #x103DF old-persian) (#x10400 #x1044F deseret) (#x10450 #x1047F shavian) (#x10480 #x104AF osmanya) (#x10800 #x1083F cypriot-syllabary) + (#x10840 #x1085F aramaic) (#x10900 #x1091F phoenician) (#x10920 #x1093F lydian) + (#x10980 #x109FF meroitic) (#x10A00 #x10A5F kharoshthi) + (#x10A60 #x10A7F old-south-arabian) + (#x10B00 #x10B3F avestan) + (#x10B40 #x10B5F inscriptional-parthian) + (#x10B60 #x10B7F inscriptional-pahlavi) + (#x10C00 #x10C4F old-turkic) + (#x10E60 #x10E7F rumi-number) + (#x11000 #x1107F brahmi) + (#x11080 #x110CF kaithi) + (#x110D0 #x110FF sora-sompeng) + (#x11100 #x1114F chakma) + (#x11180 #x111DF sharada) + (#x11680 #x116CF takri) (#x12000 #x123FF cuneiform) (#x12400 #x1247F cuneiform-numbers-and-punctuation) + (#x13000 #x1342F egyptian) + (#x16800 #x16A3F bamum) + (#x16F00 #x16F9F miao) + (#x1B000 #x1B0FF kana) (#x1D000 #x1D0FF byzantine-musical-symbol) (#x1D100 #x1D1FF musical-symbol) (#x1D200 #x1D24F ancient-greek-musical-notation) (#x1D300 #x1D35F tai-xuan-jing-symbol) (#x1D360 #x1D37F counting-rod-numeral) (#x1D400 #x1D7FF mathematical) + (#x1EE00 #x1EEFF arabic) (#x1F000 #x1F02F mahjong-tile) (#x1F030 #x1F09F domino-tile) - (#x20000 #x2AFFF han) + (#x1F0A0 #x1F0FF playing-cards) + (#x1F100 #x1F1FF symbol) + (#x1F200 #x1F2FF han) + (#x1F300 #x1F64F symbol) + (#x1F680 #x1F77F symbol) + (#x20000 #x2B81F han) (#x2F800 #x2FFFF han))) (set-char-table-range char-script-table (cons (car elt) (nth 1 elt)) (nth 2 elt)) -- 2.11.4.GIT