From 2849dde8ba8af34b1e3dda2bbf0df328cc03baff Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 11 Mar 2010 12:38:06 +0100 Subject: [PATCH] Add back a few Perl files In c3b46a4, it seems a few too many files were removed. Add a few back, in the hope to fix issue 433. Signed-off-by: Johannes Schindelin --- lib/perl5/5.8.8/Unicode/Collate.pm | 1877 +++++++++++++++++++++++++++++ lib/perl5/5.8.8/Unicode/Collate/keys.txt | 864 +++++++++++++ lib/perl5/5.8.8/Unicode/UCD.pm | 820 +++++++++++++ lib/perl5/5.8.8/abbrev.pl | 43 + lib/perl5/5.8.8/assert.pl | 55 + lib/perl5/5.8.8/bigfloat.pl | 254 ++++ lib/perl5/5.8.8/bigint.pl | 320 +++++ lib/perl5/5.8.8/bigrat.pl | 155 +++ lib/perl5/5.8.8/bytes_heavy.pl | 40 + lib/perl5/5.8.8/cacheout.pl | 55 + lib/perl5/5.8.8/charnames.pm | 544 +++++++++ lib/perl5/5.8.8/complete.pl | 120 ++ lib/perl5/5.8.8/msys/Config_heavy.pl | 1200 ++++++++++++++++++ lib/perl5/5.8.8/msys/Unicode/Normalize.pm | 479 ++++++++ 14 files changed, 6826 insertions(+) create mode 100644 lib/perl5/5.8.8/Unicode/Collate.pm create mode 100644 lib/perl5/5.8.8/Unicode/Collate/keys.txt create mode 100644 lib/perl5/5.8.8/Unicode/UCD.pm create mode 100644 lib/perl5/5.8.8/abbrev.pl create mode 100644 lib/perl5/5.8.8/assert.pl create mode 100644 lib/perl5/5.8.8/bigfloat.pl create mode 100644 lib/perl5/5.8.8/bigint.pl create mode 100644 lib/perl5/5.8.8/bigrat.pl create mode 100644 lib/perl5/5.8.8/bytes_heavy.pl create mode 100644 lib/perl5/5.8.8/cacheout.pl create mode 100644 lib/perl5/5.8.8/charnames.pm create mode 100644 lib/perl5/5.8.8/complete.pl create mode 100644 lib/perl5/5.8.8/msys/Config_heavy.pl create mode 100644 lib/perl5/5.8.8/msys/Unicode/Normalize.pm diff --git a/lib/perl5/5.8.8/Unicode/Collate.pm b/lib/perl5/5.8.8/Unicode/Collate.pm new file mode 100644 index 00000000..cd9b8e70 --- /dev/null +++ b/lib/perl5/5.8.8/Unicode/Collate.pm @@ -0,0 +1,1877 @@ +package Unicode::Collate; + +BEGIN { + unless ("A" eq pack('U', 0x41)) { + die "Unicode::Collate cannot stringify a Unicode code point\n"; + } +} + +use 5.006; +use strict; +use warnings; +use Carp; +use File::Spec; + +no warnings 'utf8'; + +our $VERSION = '0.52'; +our $PACKAGE = __PACKAGE__; + +my @Path = qw(Unicode Collate); +my $KeyFile = "allkeys.txt"; + +# Perl's boolean +use constant TRUE => 1; +use constant FALSE => ""; +use constant NOMATCHPOS => -1; + +# A coderef to get combining class imported from Unicode::Normalize +# (i.e. \&Unicode::Normalize::getCombinClass). +# This is also used as a HAS_UNICODE_NORMALIZE flag. +my $CVgetCombinClass; + +# Supported Levels +use constant MinLevel => 1; +use constant MaxLevel => 4; + +# Minimum weights at level 2 and 3, respectively +use constant Min2Wt => 0x20; +use constant Min3Wt => 0x02; + +# Shifted weight at 4th level +use constant Shift4Wt => 0xFFFF; + +# A boolean for Variable and 16-bit weights at 4 levels of Collation Element +# PROBLEM: The Default Unicode Collation Element Table +# has weights over 0xFFFF at the 4th level. +# The tie-breaking in the variable weights +# other than "shift" (as well as "shift-trimmed") is unreliable. +use constant VCE_TEMPLATE => 'Cn4'; + +# A sort key: 16-bit weights +# See also the PROBLEM on VCE_TEMPLATE above. +use constant KEY_TEMPLATE => 'n*'; + +# Level separator in a sort key: +# i.e. pack(KEY_TEMPLATE, 0) +use constant LEVEL_SEP => "\0\0"; + +# As Unicode code point separator for hash keys. +# A joined code point string (denoted by JCPS below) +# like "65;768" is used for internal processing +# instead of Perl's Unicode string like "\x41\x{300}", +# as the native code point is different from the Unicode code point +# on EBCDIC platform. +# This character must not be included in any stringified +# representation of an integer. +use constant CODE_SEP => ';'; + +# boolean values of variable weights +use constant NON_VAR => 0; # Non-Variable character +use constant VAR => 1; # Variable character + +# specific code points +use constant Hangul_LBase => 0x1100; +use constant Hangul_LIni => 0x1100; +use constant Hangul_LFin => 0x1159; +use constant Hangul_LFill => 0x115F; +use constant Hangul_VBase => 0x1161; +use constant Hangul_VIni => 0x1160; # from Vowel Filler +use constant Hangul_VFin => 0x11A2; +use constant Hangul_TBase => 0x11A7; # from "no-final" codepoint +use constant Hangul_TIni => 0x11A8; +use constant Hangul_TFin => 0x11F9; +use constant Hangul_TCount => 28; +use constant Hangul_NCount => 588; +use constant Hangul_SBase => 0xAC00; +use constant Hangul_SIni => 0xAC00; +use constant Hangul_SFin => 0xD7A3; +use constant CJK_UidIni => 0x4E00; +use constant CJK_UidFin => 0x9FA5; +use constant CJK_UidF41 => 0x9FBB; +use constant CJK_ExtAIni => 0x3400; +use constant CJK_ExtAFin => 0x4DB5; +use constant CJK_ExtBIni => 0x20000; +use constant CJK_ExtBFin => 0x2A6D6; +use constant BMP_Max => 0xFFFF; + +# Logical_Order_Exception in PropList.txt +my $DefaultRearrange = [ 0x0E40..0x0E44, 0x0EC0..0x0EC4 ]; + +sub UCA_Version { "14" } + +sub Base_Unicode_Version { "4.1.0" } + +###### + +sub pack_U { + return pack('U*', @_); +} + +sub unpack_U { + return unpack('U*', shift(@_).pack('U*')); +} + +###### + +my (%VariableOK); +@VariableOK{ qw/ + blanked non-ignorable shifted shift-trimmed + / } = (); # keys lowercased + +our @ChangeOK = qw/ + alternate backwards level normalization rearrange + katakana_before_hiragana upper_before_lower + overrideHangul overrideCJK preprocess UCA_Version + hangul_terminator variable + /; + +our @ChangeNG = qw/ + entry mapping table maxlength + ignoreChar ignoreName undefChar undefName variableTable + versionTable alternateTable backwardsTable forwardsTable rearrangeTable + derivCode normCode rearrangeHash + backwardsFlag + /; +# The hash key 'ignored' is deleted at v 0.21. +# The hash key 'isShift' is deleted at v 0.23. +# The hash key 'combining' is deleted at v 0.24. +# The hash key 'entries' is deleted at v 0.30. +# The hash key 'L3_ignorable' is deleted at v 0.40. + +sub version { + my $self = shift; + return $self->{versionTable} || 'unknown'; +} + +my (%ChangeOK, %ChangeNG); +@ChangeOK{ @ChangeOK } = (); +@ChangeNG{ @ChangeNG } = (); + +sub change { + my $self = shift; + my %hash = @_; + my %old; + if (exists $hash{variable} && exists $hash{alternate}) { + delete $hash{alternate}; + } + elsif (!exists $hash{variable} && exists $hash{alternate}) { + $hash{variable} = $hash{alternate}; + } + foreach my $k (keys %hash) { + if (exists $ChangeOK{$k}) { + $old{$k} = $self->{$k}; + $self->{$k} = $hash{$k}; + } + elsif (exists $ChangeNG{$k}) { + croak "change of $k via change() is not allowed!"; + } + # else => ignored + } + $self->checkCollator(); + return wantarray ? %old : $self; +} + +sub _checkLevel { + my $level = shift; + my $key = shift; # 'level' or 'backwards' + MinLevel <= $level or croak sprintf + "Illegal level %d (in value for key '%s') lower than %d.", + $level, $key, MinLevel; + $level <= MaxLevel or croak sprintf + "Unsupported level %d (in value for key '%s') higher than %d.", + $level, $key, MaxLevel; +} + +my %DerivCode = ( + 8 => \&_derivCE_8, + 9 => \&_derivCE_9, + 11 => \&_derivCE_9, # 11 == 9 + 14 => \&_derivCE_14, +); + +sub checkCollator { + my $self = shift; + _checkLevel($self->{level}, "level"); + + $self->{derivCode} = $DerivCode{ $self->{UCA_Version} } + or croak "Illegal UCA version (passed $self->{UCA_Version})."; + + $self->{variable} ||= $self->{alternate} || $self->{variableTable} || + $self->{alternateTable} || 'shifted'; + $self->{variable} = $self->{alternate} = lc($self->{variable}); + exists $VariableOK{ $self->{variable} } + or croak "$PACKAGE unknown variable parameter name: $self->{variable}"; + + if (! defined $self->{backwards}) { + $self->{backwardsFlag} = 0; + } + elsif (! ref $self->{backwards}) { + _checkLevel($self->{backwards}, "backwards"); + $self->{backwardsFlag} = 1 << $self->{backwards}; + } + else { + my %level; + $self->{backwardsFlag} = 0; + for my $b (@{ $self->{backwards} }) { + _checkLevel($b, "backwards"); + $level{$b} = 1; + } + for my $v (sort keys %level) { + $self->{backwardsFlag} += 1 << $v; + } + } + + defined $self->{rearrange} or $self->{rearrange} = []; + ref $self->{rearrange} + or croak "$PACKAGE: list for rearrangement must be store in ARRAYREF"; + + # keys of $self->{rearrangeHash} are $self->{rearrange}. + $self->{rearrangeHash} = undef; + + if (@{ $self->{rearrange} }) { + @{ $self->{rearrangeHash} }{ @{ $self->{rearrange} } } = (); + } + + $self->{normCode} = undef; + + if (defined $self->{normalization}) { + eval { require Unicode::Normalize }; + $@ and croak "Unicode::Normalize is required to normalize strings"; + + $CVgetCombinClass ||= \&Unicode::Normalize::getCombinClass; + + if ($self->{normalization} =~ /^(?:NF)D\z/) { # tweak for default + $self->{normCode} = \&Unicode::Normalize::NFD; + } + elsif ($self->{normalization} ne 'prenormalized') { + my $norm = $self->{normalization}; + $self->{normCode} = sub { + Unicode::Normalize::normalize($norm, shift); + }; + eval { $self->{normCode}->("") }; # try + $@ and croak "$PACKAGE unknown normalization form name: $norm"; + } + } + return; +} + +sub new +{ + my $class = shift; + my $self = bless { @_ }, $class; + + # If undef is passed explicitly, no file is read. + $self->{table} = $KeyFile if ! exists $self->{table}; + $self->read_table() if defined $self->{table}; + + if ($self->{entry}) { + while ($self->{entry} =~ /([^\n]+)/g) { + $self->parseEntry($1); + } + } + + $self->{level} ||= MaxLevel; + $self->{UCA_Version} ||= UCA_Version(); + + $self->{overrideHangul} = FALSE + if ! exists $self->{overrideHangul}; + $self->{overrideCJK} = FALSE + if ! exists $self->{overrideCJK}; + $self->{normalization} = 'NFD' + if ! exists $self->{normalization}; + $self->{rearrange} = $self->{rearrangeTable} || + ($self->{UCA_Version} <= 11 ? $DefaultRearrange : []) + if ! exists $self->{rearrange}; + $self->{backwards} = $self->{backwardsTable} + if ! exists $self->{backwards}; + + $self->checkCollator(); + + return $self; +} + +sub read_table { + my $self = shift; + + my($f, $fh); + foreach my $d (@INC) { + $f = File::Spec->catfile($d, @Path, $self->{table}); + last if open($fh, $f); + $f = undef; + } + if (!defined $f) { + $f = File::Spec->catfile(@Path, $self->{table}); + croak("$PACKAGE: Can't locate $f in \@INC (\@INC contains: @INC)"); + } + + while (my $line = <$fh>) { + next if $line =~ /^\s*#/; + unless ($line =~ s/^\s*\@//) { + $self->parseEntry($line); + next; + } + + # matched ^\s*\@ + if ($line =~ /^version\s*(\S*)/) { + $self->{versionTable} ||= $1; + } + elsif ($line =~ /^variable\s+(\S*)/) { # since UTS #10-9 + $self->{variableTable} ||= $1; + } + elsif ($line =~ /^alternate\s+(\S*)/) { # till UTS #10-8 + $self->{alternateTable} ||= $1; + } + elsif ($line =~ /^backwards\s+(\S*)/) { + push @{ $self->{backwardsTable} }, $1; + } + elsif ($line =~ /^forwards\s+(\S*)/) { # parhaps no use + push @{ $self->{forwardsTable} }, $1; + } + elsif ($line =~ /^rearrange\s+(.*)/) { # (\S*) is NG + push @{ $self->{rearrangeTable} }, _getHexArray($1); + } + } + close $fh; +} + + +## +## get $line, parse it, and write an entry in $self +## +sub parseEntry +{ + my $self = shift; + my $line = shift; + my($name, $entry, @uv, @key); + + return if $line !~ /^\s*[0-9A-Fa-f]/; + + # removes comment and gets name + $name = $1 + if $line =~ s/[#%]\s*(.*)//; + return if defined $self->{undefName} && $name =~ /$self->{undefName}/; + + # gets element + my($e, $k) = split /;/, $line; + croak "Wrong Entry: must be separated by ';' from " + if ! $k; + + @uv = _getHexArray($e); + return if !@uv; + + $entry = join(CODE_SEP, @uv); # in JCPS + + if (defined $self->{undefChar} || defined $self->{ignoreChar}) { + my $ele = pack_U(@uv); + + # regarded as if it were not entried in the table + return + if defined $self->{undefChar} && $ele =~ /$self->{undefChar}/; + + # replaced as completely ignorable + $k = '[.0000.0000.0000.0000]' + if defined $self->{ignoreChar} && $ele =~ /$self->{ignoreChar}/; + } + + # replaced as completely ignorable + $k = '[.0000.0000.0000.0000]' + if defined $self->{ignoreName} && $name =~ /$self->{ignoreName}/; + + my $is_L3_ignorable = TRUE; + + foreach my $arr ($k =~ /\[([^\[\]]+)\]/g) { # SPACEs allowed + my $var = $arr =~ /\*/; # exactly /^\*/ but be lenient. + my @wt = _getHexArray($arr); + push @key, pack(VCE_TEMPLATE, $var, @wt); + $is_L3_ignorable = FALSE + if $wt[0] || $wt[1] || $wt[2]; + # Conformance Test for 3.1.1 and 4.0.0 shows Level 3 ignorable + # is completely ignorable. + # For expansion, an entry $is_L3_ignorable + # if and only if "all" CEs are [.0000.0000.0000]. + } + + $self->{mapping}{$entry} = $is_L3_ignorable ? [] : \@key; + + if (@uv > 1) { + (!$self->{maxlength}{$uv[0]} || $self->{maxlength}{$uv[0]} < @uv) + and $self->{maxlength}{$uv[0]} = @uv; + } +} + + +## +## VCE = _varCE(variable term, VCE) +## +sub _varCE +{ + my $vbl = shift; + my $vce = shift; + if ($vbl eq 'non-ignorable') { + return $vce; + } + my ($var, @wt) = unpack VCE_TEMPLATE, $vce; + + if ($var) { + return pack(VCE_TEMPLATE, $var, 0, 0, 0, + $vbl eq 'blanked' ? $wt[3] : $wt[0]); + } + elsif ($vbl eq 'blanked') { + return $vce; + } + else { + return pack(VCE_TEMPLATE, $var, @wt[0..2], + $vbl eq 'shifted' && $wt[0]+$wt[1]+$wt[2] ? Shift4Wt : 0); + } +} + +sub viewSortKey +{ + my $self = shift; + $self->visualizeSortKey($self->getSortKey(@_)); +} + +sub visualizeSortKey +{ + my $self = shift; + my $view = join " ", map sprintf("%04X", $_), unpack(KEY_TEMPLATE, shift); + + if ($self->{UCA_Version} <= 8) { + $view =~ s/ ?0000 ?/|/g; + } else { + $view =~ s/\b0000\b/|/g; + } + return "[$view]"; +} + + +## +## arrayref of JCPS = splitEnt(string to be collated) +## arrayref of arrayref[JCPS, ini_pos, fin_pos] = splitEnt(string, true) +## +sub splitEnt +{ + my $self = shift; + my $wLen = $_[1]; + + my $code = $self->{preprocess}; + my $norm = $self->{normCode}; + my $map = $self->{mapping}; + my $max = $self->{maxlength}; + my $reH = $self->{rearrangeHash}; + my $ver9 = $self->{UCA_Version} >= 9 && $self->{UCA_Version} <= 11; + + my ($str, @buf); + + if ($wLen) { + $code and croak "Preprocess breaks character positions. " + . "Don't use with index(), match(), etc."; + $norm and croak "Normalization breaks character positions. " + . "Don't use with index(), match(), etc."; + $str = $_[0]; + } + else { + $str = $_[0]; + $str = &$code($str) if ref $code; + $str = &$norm($str) if ref $norm; + } + + # get array of Unicode code point of string. + my @src = unpack_U($str); + + # rearrangement: + # Character positions are not kept if rearranged, + # then neglected if $wLen is true. + if ($reH && ! $wLen) { + for (my $i = 0; $i < @src; $i++) { + if (exists $reH->{ $src[$i] } && $i + 1 < @src) { + ($src[$i], $src[$i+1]) = ($src[$i+1], $src[$i]); + $i++; + } + } + } + + # remove a code point marked as a completely ignorable. + for (my $i = 0; $i < @src; $i++) { + $src[$i] = undef + if _isIllegal($src[$i]) || ($ver9 && + $map->{ $src[$i] } && @{ $map->{ $src[$i] } } == 0); + } + + for (my $i = 0; $i < @src; $i++) { + my $jcps = $src[$i]; + + # skip removed code point + if (! defined $jcps) { + if ($wLen && @buf) { + $buf[-1][2] = $i + 1; + } + next; + } + + my $i_orig = $i; + + # find contraction + if ($max->{$jcps}) { + my $temp_jcps = $jcps; + my $jcpsLen = 1; + my $maxLen = $max->{$jcps}; + + for (my $p = $i + 1; $jcpsLen < $maxLen && $p < @src; $p++) { + next if ! defined $src[$p]; + $temp_jcps .= CODE_SEP . $src[$p]; + $jcpsLen++; + if ($map->{$temp_jcps}) { + $jcps = $temp_jcps; + $i = $p; + } + } + + # not-contiguous contraction with Combining Char (cf. UTS#10, S2.1). + # This process requires Unicode::Normalize. + # If "normalization" is undef, here should be skipped *always* + # (in spite of bool value of $CVgetCombinClass), + # since canonical ordering cannot be expected. + # Blocked combining character should not be contracted. + + if ($self->{normalization}) + # $self->{normCode} is false in the case of "prenormalized". + { + my $preCC = 0; + my $curCC = 0; + + for (my $p = $i + 1; $p < @src; $p++) { + next if ! defined $src[$p]; + $curCC = $CVgetCombinClass->($src[$p]); + last unless $curCC; + my $tail = CODE_SEP . $src[$p]; + if ($preCC != $curCC && $map->{$jcps.$tail}) { + $jcps .= $tail; + $src[$p] = undef; + } else { + $preCC = $curCC; + } + } + } + } + + # skip completely ignorable + if ($map->{$jcps} && @{ $map->{$jcps} } == 0) { + if ($wLen && @buf) { + $buf[-1][2] = $i + 1; + } + next; + } + + push @buf, $wLen ? [$jcps, $i_orig, $i + 1] : $jcps; + } + return \@buf; +} + + +## +## list of VCE = getWt(JCPS) +## +sub getWt +{ + my $self = shift; + my $u = shift; + my $vbl = $self->{variable}; + my $map = $self->{mapping}; + my $der = $self->{derivCode}; + + return if !defined $u; + return map(_varCE($vbl, $_), @{ $map->{$u} }) + if $map->{$u}; + + # JCPS must not be a contraction, then it's a code point. + if (Hangul_SIni <= $u && $u <= Hangul_SFin) { + my $hang = $self->{overrideHangul}; + my @hangulCE; + if ($hang) { + @hangulCE = map(pack(VCE_TEMPLATE, NON_VAR, @$_), &$hang($u)); + } + elsif (!defined $hang) { + @hangulCE = $der->($u); + } + else { + my $max = $self->{maxlength}; + my @decH = _decompHangul($u); + + if (@decH == 2) { + my $contract = join(CODE_SEP, @decH); + @decH = ($contract) if $map->{$contract}; + } else { # must be <@decH == 3> + if ($max->{$decH[0]}) { + my $contract = join(CODE_SEP, @decH); + if ($map->{$contract}) { + @decH = ($contract); + } else { + $contract = join(CODE_SEP, @decH[0,1]); + $map->{$contract} and @decH = ($contract, $decH[2]); + } + # even if V's ignorable, LT contraction is not supported. + # If such a situatution were required, NFD should be used. + } + if (@decH == 3 && $max->{$decH[1]}) { + my $contract = join(CODE_SEP, @decH[1,2]); + $map->{$contract} and @decH = ($decH[0], $contract); + } + } + + @hangulCE = map({ + $map->{$_} ? @{ $map->{$_} } : $der->($_); + } @decH); + } + return map _varCE($vbl, $_), @hangulCE; + } + elsif (_isUIdeo($u, $self->{UCA_Version})) { + my $cjk = $self->{overrideCJK}; + return map _varCE($vbl, $_), + $cjk + ? map(pack(VCE_TEMPLATE, NON_VAR, @$_), &$cjk($u)) + : defined $cjk && $self->{UCA_Version} <= 8 && $u < 0x10000 + ? _uideoCE_8($u) + : $der->($u); + } + else { + return map _varCE($vbl, $_), $der->($u); + } +} + + +## +## string sortkey = getSortKey(string arg) +## +sub getSortKey +{ + my $self = shift; + my $lev = $self->{level}; + my $rEnt = $self->splitEnt(shift); # get an arrayref of JCPS + my $v2i = $self->{UCA_Version} >= 9 && + $self->{variable} ne 'non-ignorable'; + + my @buf; # weight arrays + if ($self->{hangul_terminator}) { + my $preHST = ''; + foreach my $jcps (@$rEnt) { + # weird things like VL, TL-contraction are not considered! + my $curHST = ''; + foreach my $u (split /;/, $jcps) { + $curHST .= getHST($u); + } + if ($preHST && !$curHST || # hangul before non-hangul + $preHST =~ /L\z/ && $curHST =~ /^T/ || + $preHST =~ /V\z/ && $curHST =~ /^L/ || + $preHST =~ /T\z/ && $curHST =~ /^[LV]/) { + + push @buf, $self->getWtHangulTerm(); + } + $preHST = $curHST; + + push @buf, $self->getWt($jcps); + } + $preHST # end at hangul + and push @buf, $self->getWtHangulTerm(); + } + else { + foreach my $jcps (@$rEnt) { + push @buf, $self->getWt($jcps); + } + } + + # make sort key + my @ret = ([],[],[],[]); + my $last_is_variable; + + foreach my $vwt (@buf) { + my($var, @wt) = unpack(VCE_TEMPLATE, $vwt); + + # "Ignorable (L1, L2) after Variable" since track. v. 9 + if ($v2i) { + if ($var) { + $last_is_variable = TRUE; + } + elsif (!$wt[0]) { # ignorable + next if $last_is_variable; + } + else { + $last_is_variable = FALSE; + } + } + foreach my $v (0..$lev-1) { + 0 < $wt[$v] and push @{ $ret[$v] }, $wt[$v]; + } + } + + # modification of tertiary weights + if ($self->{upper_before_lower}) { + foreach my $w (@{ $ret[2] }) { + if (0x8 <= $w && $w <= 0xC) { $w -= 6 } # lower + elsif (0x2 <= $w && $w <= 0x6) { $w += 6 } # upper + elsif ($w == 0x1C) { $w += 1 } # square upper + elsif ($w == 0x1D) { $w -= 1 } # square lower + } + } + if ($self->{katakana_before_hiragana}) { + foreach my $w (@{ $ret[2] }) { + if (0x0F <= $w && $w <= 0x13) { $w -= 2 } # katakana + elsif (0x0D <= $w && $w <= 0x0E) { $w += 5 } # hiragana + } + } + + if ($self->{backwardsFlag}) { + for (my $v = MinLevel; $v <= MaxLevel; $v++) { + if ($self->{backwardsFlag} & (1 << $v)) { + @{ $ret[$v-1] } = reverse @{ $ret[$v-1] }; + } + } + } + + join LEVEL_SEP, map pack(KEY_TEMPLATE, @$_), @ret; +} + + +## +## int compare = cmp(string a, string b) +## +sub cmp { $_[0]->getSortKey($_[1]) cmp $_[0]->getSortKey($_[2]) } +sub eq { $_[0]->getSortKey($_[1]) eq $_[0]->getSortKey($_[2]) } +sub ne { $_[0]->getSortKey($_[1]) ne $_[0]->getSortKey($_[2]) } +sub lt { $_[0]->getSortKey($_[1]) lt $_[0]->getSortKey($_[2]) } +sub le { $_[0]->getSortKey($_[1]) le $_[0]->getSortKey($_[2]) } +sub gt { $_[0]->getSortKey($_[1]) gt $_[0]->getSortKey($_[2]) } +sub ge { $_[0]->getSortKey($_[1]) ge $_[0]->getSortKey($_[2]) } + +## +## list[strings] sorted = sort(list[strings] arg) +## +sub sort { + my $obj = shift; + return + map { $_->[1] } + sort{ $a->[0] cmp $b->[0] } + map [ $obj->getSortKey($_), $_ ], @_; +} + + +sub _derivCE_14 { + my $u = shift; + my $base = + (CJK_UidIni <= $u && $u <= CJK_UidF41) + ? 0xFB40 : # CJK + (CJK_ExtAIni <= $u && $u <= CJK_ExtAFin || + CJK_ExtBIni <= $u && $u <= CJK_ExtBFin) + ? 0xFB80 # CJK ext. + : 0xFBC0; # others + + my $aaaa = $base + ($u >> 15); + my $bbbb = ($u & 0x7FFF) | 0x8000; + return + pack(VCE_TEMPLATE, NON_VAR, $aaaa, Min2Wt, Min3Wt, $u), + pack(VCE_TEMPLATE, NON_VAR, $bbbb, 0, 0, $u); +} + +sub _derivCE_9 { + my $u = shift; + my $base = + (CJK_UidIni <= $u && $u <= CJK_UidFin) + ? 0xFB40 : # CJK + (CJK_ExtAIni <= $u && $u <= CJK_ExtAFin || + CJK_ExtBIni <= $u && $u <= CJK_ExtBFin) + ? 0xFB80 # CJK ext. + : 0xFBC0; # others + + my $aaaa = $base + ($u >> 15); + my $bbbb = ($u & 0x7FFF) | 0x8000; + return + pack(VCE_TEMPLATE, NON_VAR, $aaaa, Min2Wt, Min3Wt, $u), + pack(VCE_TEMPLATE, NON_VAR, $bbbb, 0, 0, $u); +} + +sub _derivCE_8 { + my $code = shift; + my $aaaa = 0xFF80 + ($code >> 15); + my $bbbb = ($code & 0x7FFF) | 0x8000; + return + pack(VCE_TEMPLATE, NON_VAR, $aaaa, 2, 1, $code), + pack(VCE_TEMPLATE, NON_VAR, $bbbb, 0, 0, $code); +} + +sub _uideoCE_8 { + my $u = shift; + return pack(VCE_TEMPLATE, NON_VAR, $u, Min2Wt, Min3Wt, $u); +} + +sub _isUIdeo { + my ($u, $uca_vers) = @_; + return( + (CJK_UidIni <= $u && + ($uca_vers >= 14 ? ( $u <= CJK_UidF41) : ($u <= CJK_UidFin))) + || + (CJK_ExtAIni <= $u && $u <= CJK_ExtAFin) + || + (CJK_ExtBIni <= $u && $u <= CJK_ExtBFin) + ); +} + + +sub getWtHangulTerm { + my $self = shift; + return _varCE($self->{variable}, + pack(VCE_TEMPLATE, NON_VAR, $self->{hangul_terminator}, 0,0,0)); +} + + +## +## "hhhh hhhh hhhh" to (dddd, dddd, dddd) +## +sub _getHexArray { map hex, $_[0] =~ /([0-9a-fA-F]+)/g } + +# +# $code *must* be in Hangul syllable. +# Check it before you enter here. +# +sub _decompHangul { + my $code = shift; + my $si = $code - Hangul_SBase; + my $li = int( $si / Hangul_NCount); + my $vi = int(($si % Hangul_NCount) / Hangul_TCount); + my $ti = $si % Hangul_TCount; + return ( + Hangul_LBase + $li, + Hangul_VBase + $vi, + $ti ? (Hangul_TBase + $ti) : (), + ); +} + +sub _isIllegal { + my $code = shift; + return ! defined $code # removed + || ($code < 0 || 0x10FFFF < $code) # out of range + || (($code & 0xFFFE) == 0xFFFE) # ??FFF[EF] (cf. utf8.c) + || (0xD800 <= $code && $code <= 0xDFFF) # unpaired surrogates + || (0xFDD0 <= $code && $code <= 0xFDEF) # other non-characters + ; +} + +# Hangul Syllable Type +sub getHST { + my $u = shift; + return + Hangul_LIni <= $u && $u <= Hangul_LFin || $u == Hangul_LFill ? "L" : + Hangul_VIni <= $u && $u <= Hangul_VFin ? "V" : + Hangul_TIni <= $u && $u <= Hangul_TFin ? "T" : + Hangul_SIni <= $u && $u <= Hangul_SFin ? + ($u - Hangul_SBase) % Hangul_TCount ? "LVT" : "LV" : ""; +} + + +## +## bool _nonIgnorAtLevel(arrayref weights, int level) +## +sub _nonIgnorAtLevel($$) +{ + my $wt = shift; + return if ! defined $wt; + my $lv = shift; + return grep($wt->[$_-1] != 0, MinLevel..$lv) ? TRUE : FALSE; +} + +## +## bool _eqArray( +## arrayref of arrayref[weights] source, +## arrayref of arrayref[weights] substr, +## int level) +## * comparison of graphemes vs graphemes. +## @$source >= @$substr must be true (check it before call this); +## +sub _eqArray($$$) +{ + my $source = shift; + my $substr = shift; + my $lev = shift; + + for my $g (0..@$substr-1){ + # Do the $g'th graphemes have the same number of AV weigths? + return if @{ $source->[$g] } != @{ $substr->[$g] }; + + for my $w (0..@{ $substr->[$g] }-1) { + for my $v (0..$lev-1) { + return if $source->[$g][$w][$v] != $substr->[$g][$w][$v]; + } + } + } + return 1; +} + +## +## (int position, int length) +## int position = index(string, substring, position, [undoc'ed grobal]) +## +## With "grobal" (only for the list context), +## returns list of arrayref[position, length]. +## +sub index +{ + my $self = shift; + my $str = shift; + my $len = length($str); + my $subE = $self->splitEnt(shift); + my $pos = @_ ? shift : 0; + $pos = 0 if $pos < 0; + my $grob = shift; + + my $lev = $self->{level}; + my $v2i = $self->{UCA_Version} >= 9 && + $self->{variable} ne 'non-ignorable'; + + if (! @$subE) { + my $temp = $pos <= 0 ? 0 : $len <= $pos ? $len : $pos; + return $grob + ? map([$_, 0], $temp..$len) + : wantarray ? ($temp,0) : $temp; + } + $len < $pos + and return wantarray ? () : NOMATCHPOS; + my $strE = $self->splitEnt($pos ? substr($str, $pos) : $str, TRUE); + @$strE + or return wantarray ? () : NOMATCHPOS; + + my(@strWt, @iniPos, @finPos, @subWt, @g_ret); + + my $last_is_variable; + for my $vwt (map $self->getWt($_), @$subE) { + my($var, @wt) = unpack(VCE_TEMPLATE, $vwt); + my $to_be_pushed = _nonIgnorAtLevel(\@wt,$lev); + + # "Ignorable (L1, L2) after Variable" since track. v. 9 + if ($v2i) { + if ($var) { + $last_is_variable = TRUE; + } + elsif (!$wt[0]) { # ignorable + $to_be_pushed = FALSE if $last_is_variable; + } + else { + $last_is_variable = FALSE; + } + } + + if (@subWt && !$var && !$wt[0]) { + push @{ $subWt[-1] }, \@wt if $to_be_pushed; + } else { + push @subWt, [ \@wt ]; + } + } + + my $count = 0; + my $end = @$strE - 1; + + $last_is_variable = FALSE; # reuse + for (my $i = 0; $i <= $end; ) { # no $i++ + my $found_base = 0; + + # fetch a grapheme + while ($i <= $end && $found_base == 0) { + for my $vwt ($self->getWt($strE->[$i][0])) { + my($var, @wt) = unpack(VCE_TEMPLATE, $vwt); + my $to_be_pushed = _nonIgnorAtLevel(\@wt,$lev); + + # "Ignorable (L1, L2) after Variable" since track. v. 9 + if ($v2i) { + if ($var) { + $last_is_variable = TRUE; + } + elsif (!$wt[0]) { # ignorable + $to_be_pushed = FALSE if $last_is_variable; + } + else { + $last_is_variable = FALSE; + } + } + + if (@strWt && !$var && !$wt[0]) { + push @{ $strWt[-1] }, \@wt if $to_be_pushed; + $finPos[-1] = $strE->[$i][2]; + } elsif ($to_be_pushed) { + push @strWt, [ \@wt ]; + push @iniPos, $found_base ? NOMATCHPOS : $strE->[$i][1]; + $finPos[-1] = NOMATCHPOS if $found_base; + push @finPos, $strE->[$i][2]; + $found_base++; + } + # else ===> no-op + } + $i++; + } + + # try to match + while ( @strWt > @subWt || (@strWt == @subWt && $i > $end) ) { + if ($iniPos[0] != NOMATCHPOS && + $finPos[$#subWt] != NOMATCHPOS && + _eqArray(\@strWt, \@subWt, $lev)) { + my $temp = $iniPos[0] + $pos; + + if ($grob) { + push @g_ret, [$temp, $finPos[$#subWt] - $iniPos[0]]; + splice @strWt, 0, $#subWt; + splice @iniPos, 0, $#subWt; + splice @finPos, 0, $#subWt; + } + else { + return wantarray + ? ($temp, $finPos[$#subWt] - $iniPos[0]) + : $temp; + } + } + shift @strWt; + shift @iniPos; + shift @finPos; + } + } + + return $grob + ? @g_ret + : wantarray ? () : NOMATCHPOS; +} + +## +## scalarref to matching part = match(string, substring) +## +sub match +{ + my $self = shift; + if (my($pos,$len) = $self->index($_[0], $_[1])) { + my $temp = substr($_[0], $pos, $len); + return wantarray ? $temp : \$temp; + # An lvalue ref \substr should be avoided, + # since its value is affected by modification of its referent. + } + else { + return; + } +} + +## +## arrayref matching parts = gmatch(string, substring) +## +sub gmatch +{ + my $self = shift; + my $str = shift; + my $sub = shift; + return map substr($str, $_->[0], $_->[1]), + $self->index($str, $sub, 0, 'g'); +} + +## +## bool subst'ed = subst(string, substring, replace) +## +sub subst +{ + my $self = shift; + my $code = ref $_[2] eq 'CODE' ? $_[2] : FALSE; + + if (my($pos,$len) = $self->index($_[0], $_[1])) { + if ($code) { + my $mat = substr($_[0], $pos, $len); + substr($_[0], $pos, $len, $code->($mat)); + } else { + substr($_[0], $pos, $len, $_[2]); + } + return TRUE; + } + else { + return FALSE; + } +} + +## +## int count = gsubst(string, substring, replace) +## +sub gsubst +{ + my $self = shift; + my $code = ref $_[2] eq 'CODE' ? $_[2] : FALSE; + my $cnt = 0; + + # Replacement is carried out from the end, then use reverse. + for my $pos_len (reverse $self->index($_[0], $_[1], 0, 'g')) { + if ($code) { + my $mat = substr($_[0], $pos_len->[0], $pos_len->[1]); + substr($_[0], $pos_len->[0], $pos_len->[1], $code->($mat)); + } else { + substr($_[0], $pos_len->[0], $pos_len->[1], $_[2]); + } + $cnt++; + } + return $cnt; +} + +1; +__END__ + +=head1 NAME + +Unicode::Collate - Unicode Collation Algorithm + +=head1 SYNOPSIS + + use Unicode::Collate; + + #construct + $Collator = Unicode::Collate->new(%tailoring); + + #sort + @sorted = $Collator->sort(@not_sorted); + + #compare + $result = $Collator->cmp($a, $b); # returns 1, 0, or -1. + + # If %tailoring is false (i.e. empty), + # $Collator should do the default collation. + +=head1 DESCRIPTION + +This module is an implementation of Unicode Technical Standard #10 +(a.k.a. UTS #10) - Unicode Collation Algorithm (a.k.a. UCA). + +=head2 Constructor and Tailoring + +The C method returns a collator object. + + $Collator = Unicode::Collate->new( + UCA_Version => $UCA_Version, + alternate => $alternate, # deprecated: use of 'variable' is recommended. + backwards => $levelNumber, # or \@levelNumbers + entry => $element, + hangul_terminator => $term_primary_weight, + ignoreName => qr/$ignoreName/, + ignoreChar => qr/$ignoreChar/, + katakana_before_hiragana => $bool, + level => $collationLevel, + normalization => $normalization_form, + overrideCJK => \&overrideCJK, + overrideHangul => \&overrideHangul, + preprocess => \&preprocess, + rearrange => \@charList, + table => $filename, + undefName => qr/$undefName/, + undefChar => qr/$undefChar/, + upper_before_lower => $bool, + variable => $variable, + ); + +=over 4 + +=item UCA_Version + +If the tracking version number of UCA is given, +behavior of that tracking version is emulated on collating. +If omitted, the return value of C is used. +C should return the latest tracking version supported. + +The supported tracking version: 8, 9, 11, or 14. + + UCA Unicode Standard DUCET (@version) + --------------------------------------------------- + 8 3.1 3.0.1 (3.0.1d9) + 9 3.1 with Corrigendum 3 3.1.1 (3.1.1) + 11 4.0 4.0.0 (4.0.0) + 14 4.1.0 4.1.0 (4.1.0) + +Note: Recent UTS #10 renames "Tracking Version" to "Revision." + +=item alternate + +-- see 3.2.2 Alternate Weighting, version 8 of UTS #10 + +For backward compatibility, C (old name) can be used +as an alias for C. + +=item backwards + +-- see 3.1.2 French Accents, UTS #10. + + backwards => $levelNumber or \@levelNumbers + +Weights in reverse order; ex. level 2 (diacritic ordering) in French. +If omitted, forwards at all the levels. + +=item entry + +-- see 3.1 Linguistic Features; 3.2.1 File Format, UTS #10. + +If the same character (or a sequence of characters) exists +in the collation element table through C, +mapping to collation elements is overrided. +If it does not exist, the mapping is defined additionally. + + entry => <<'ENTRY', # for DUCET v4.0.0 (allkeys-4.0.0.txt) +0063 0068 ; [.0E6A.0020.0002.0063] # ch +0043 0068 ; [.0E6A.0020.0007.0043] # Ch +0043 0048 ; [.0E6A.0020.0008.0043] # CH +006C 006C ; [.0F4C.0020.0002.006C] # ll +004C 006C ; [.0F4C.0020.0007.004C] # Ll +004C 004C ; [.0F4C.0020.0008.004C] # LL +00F1 ; [.0F7B.0020.0002.00F1] # n-tilde +006E 0303 ; [.0F7B.0020.0002.00F1] # n-tilde +00D1 ; [.0F7B.0020.0008.00D1] # N-tilde +004E 0303 ; [.0F7B.0020.0008.00D1] # N-tilde +ENTRY + + entry => <<'ENTRY', # for DUCET v4.0.0 (allkeys-4.0.0.txt) +00E6 ; [.0E33.0020.0002.00E6][.0E8B.0020.0002.00E6] # ae ligature as +00C6 ; [.0E33.0020.0008.00C6][.0E8B.0020.0008.00C6] # AE ligature as +ENTRY + +B The code point in the UCA file format (before C<';'>) +B be a Unicode code point (defined as hexadecimal), +but not a native code point. +So C<0063> must always denote C, +but not a character of C<"\x63">. + +Weighting may vary depending on collation element table. +So ensure the weights defined in C will be consistent with +those in the collation element table loaded via C
. + +In DUCET v4.0.0, primary weight of C is C<0E60> +and that of C is C<0E6D>. So setting primary weight of C to C<0E6A> +(as a value between C<0E60> and C<0E6D>) +makes ordering as C CH E D>. +Exactly speaking DUCET already has some characters between C and C: +C (C) with primary weight C<0E64>, +C (C) with C<0E65>, +and C (C) with C<0E69>. +Then primary weight C<0E6A> for C makes C +ordered between C and C. + +=item hangul_terminator + +-- see 7.1.4 Trailing Weights, UTS #10. + +If a true value is given (non-zero but should be positive), +it will be added as a terminator primary weight to the end of +every standard Hangul syllable. Secondary and any higher weights +for terminator are set to zero. +If the value is false or C key does not exist, +insertion of terminator weights will not be performed. + +Boundaries of Hangul syllables are determined +according to conjoining Jamo behavior in F +and F. + +B +(1) For expansion mapping (Unicode character mapped +to a sequence of collation elements), a terminator will not be added +between collation elements, even if Hangul syllable boundary exists there. +Addition of terminator is restricted to the next position +to the last collation element. + +(2) Non-conjoining Hangul letters +(Compatibility Jamo, halfwidth Jamo, and enclosed letters) are not +automatically terminated with a terminator primary weight. +These characters may need terminator included in a collation element +table beforehand. + +=item ignoreChar + +=item ignoreName + +-- see 3.2.2 Variable Weighting, UTS #10. + +Makes the entry in the table completely ignorable; +i.e. as if the weights were zero at all level. + +Through C, any character matching C +will be ignored. Through C, any character whose name +(given in the C
file as a comment) matches C +will be ignored. + +E.g. when 'a' and 'e' are ignorable, +'element' is equal to 'lament' (or 'lmnt'). + +=item katakana_before_hiragana + +-- see 7.3.1 Tertiary Weight Table, UTS #10. + +By default, hiragana is before katakana. +If the parameter is made true, this is reversed. + +B: This parameter simplemindedly assumes that any hiragana/katakana +distinctions must occur in level 3, and their weights at level 3 must be +same as those mentioned in 7.3.1, UTS #10. +If you define your collation elements which violate this requirement, +this parameter does not work validly. + +=item level + +-- see 4.3 Form Sort Key, UTS #10. + +Set the maximum level. +Any higher levels than the specified one are ignored. + + Level 1: alphabetic ordering + Level 2: diacritic ordering + Level 3: case ordering + Level 4: tie-breaking (e.g. in the case when variable is 'shifted') + + ex.level => 2, + +If omitted, the maximum is the 4th. + +=item normalization + +-- see 4.1 Normalize, UTS #10. + +If specified, strings are normalized before preparation of sort keys +(the normalization is executed after preprocess). + +A form name C accepts will be applied +as C<$normalization_form>. +Acceptable names include C<'NFD'>, C<'NFC'>, C<'NFKD'>, and C<'NFKC'>. +See C for detail. +If omitted, C<'NFD'> is used. + +C is performed after C (if defined). + +Furthermore, special values, C and C<"prenormalized">, can be used, +though they are not concerned with C. + +If C (not a string C<"undef">) is passed explicitly +as the value for this key, +any normalization is not carried out (this may make tailoring easier +if any normalization is not desired). Under C<(normalization =E undef)>, +only contiguous contractions are resolved; +e.g. even if C (and C) is ordered after C, +C would be primary equal to C. +In this point, +C<(normalization =E undef, preprocess =E sub { NFD(shift) })> +B equivalent to C<(normalization =E 'NFD')>. + +In the case of C<(normalization =E "prenormalized")>, +any normalization is not performed, but +non-contiguous contractions with combining characters are performed. +Therefore +C<(normalization =E 'prenormalized', preprocess =E sub { NFD(shift) })> +B equivalent to C<(normalization =E 'NFD')>. +If source strings are finely prenormalized, +C<(normalization =E 'prenormalized')> may save time for normalization. + +Except C<(normalization =E undef)>, +B is required (see also B). + +=item overrideCJK + +-- see 7.1 Derived Collation Elements, UTS #10. + +By default, CJK Unified Ideographs are ordered in Unicode codepoint order +but C (if C is 8 to 11, its range is +C; if C is 14, its range is C) +are lesser than C (its range is +C and C). + +Through C, ordering of CJK Unified Ideographs can be overrided. + +ex. CJK Unified Ideographs in the JIS code point order. + + overrideCJK => sub { + my $u = shift; # get a Unicode codepoint + my $b = pack('n', $u); # to UTF-16BE + my $s = your_unicode_to_sjis_converter($b); # convert + my $n = unpack('n', $s); # convert sjis to short + [ $n, 0x20, 0x2, $u ]; # return the collation element + }, + +ex. ignores all CJK Unified Ideographs. + + overrideCJK => sub {()}, # CODEREF returning empty list + + # where ->eq("Pe\x{4E00}rl", "Perl") is true + # as U+4E00 is a CJK Unified Ideograph and to be ignorable. + +If C is passed explicitly as the value for this key, +weights for CJK Unified Ideographs are treated as undefined. +But assignment of weight for CJK Unified Ideographs +in table or C is still valid. + +=item overrideHangul + +-- see 7.1 Derived Collation Elements, UTS #10. + +By default, Hangul Syllables are decomposed into Hangul Jamo, +even if C<(normalization =E undef)>. +But the mapping of Hangul Syllables may be overrided. + +This parameter works like C, so see there for examples. + +If you want to override the mapping of Hangul Syllables, +NFD, NFKD, and FCD are not appropriate, +since they will decompose Hangul Syllables before overriding. + +If C is passed explicitly as the value for this key, +weight for Hangul Syllables is treated as undefined +without decomposition into Hangul Jamo. +But definition of weight for Hangul Syllables +in table or C is still valid. + +=item preprocess + +-- see 5.1 Preprocessing, UTS #10. + +If specified, the coderef is used to preprocess +before the formation of sort keys. + +ex. dropping English articles, such as "a" or "the". +Then, "the pen" is before "a pencil". + + preprocess => sub { + my $str = shift; + $str =~ s/\b(?:an?|the)\s+//gi; + return $str; + }, + +C is performed before C (if defined). + +=item rearrange + +-- see 3.1.3 Rearrangement, UTS #10. + +Characters that are not coded in logical order and to be rearranged. +If C is equal to or lesser than 11, default is: + + rearrange => [ 0x0E40..0x0E44, 0x0EC0..0x0EC4 ], + +If you want to disallow any rearrangement, pass C or C<[]> +(a reference to empty list) as the value for this key. + +If C is equal to 14, default is C<[]> (i.e. no rearrangement). + +B + +=item table + +-- see 3.2 Default Unicode Collation Element Table, UTS #10. + +You can use another collation element table if desired. + +The table file should locate in the F directory +on C<@INC>. Say, if the filename is F, +the table file is searched as F in C<@INC>. + +By default, F (as the filename of DUCET) is used. +If you will prepare your own table file, any name other than F +may be better to avoid namespace conflict. + +If C is passed explicitly as the value for this key, +no file is read (but you can define collation elements via C). + +A typical way to define a collation element table +without any file of table: + + $onlyABC = Unicode::Collate->new( + table => undef, + entry => << 'ENTRIES', +0061 ; [.0101.0020.0002.0061] # LATIN SMALL LETTER A +0041 ; [.0101.0020.0008.0041] # LATIN CAPITAL LETTER A +0062 ; [.0102.0020.0002.0062] # LATIN SMALL LETTER B +0042 ; [.0102.0020.0008.0042] # LATIN CAPITAL LETTER B +0063 ; [.0103.0020.0002.0063] # LATIN SMALL LETTER C +0043 ; [.0103.0020.0008.0043] # LATIN CAPITAL LETTER C +ENTRIES + ); + +If C or C is used, character names should be +specified as a comment (following C<#>) on each line. + +=item undefChar + +=item undefName + +-- see 6.3.4 Reducing the Repertoire, UTS #10. + +Undefines the collation element as if it were unassigned in the table. +This reduces the size of the table. +If an unassigned character appears in the string to be collated, +the sort key is made from its codepoint +as a single-character collation element, +as it is greater than any other assigned collation elements +(in the codepoint order among the unassigned characters). +But, it'd be better to ignore characters +unfamiliar to you and maybe never used. + +Through C, any character matching C +will be undefined. Through C, any character whose name +(given in the C
file as a comment) matches C +will be undefined. + +ex. Collation weights for beyond-BMP characters are not stored in object: + + undefChar => qr/[^\0-\x{fffd}]/, + +=item upper_before_lower + +-- see 6.6 Case Comparisons, UTS #10. + +By default, lowercase is before uppercase. +If the parameter is made true, this is reversed. + +B: This parameter simplemindedly assumes that any lowercase/uppercase +distinctions must occur in level 3, and their weights at level 3 must be +same as those mentioned in 7.3.1, UTS #10. +If you define your collation elements which differs from this requirement, +this parameter doesn't work validly. + +=item variable + +-- see 3.2.2 Variable Weighting, UTS #10. + +This key allows to variable weighting for variable collation elements, +which are marked with an ASTERISK in the table +(NOTE: Many punction marks and symbols are variable in F). + + variable => 'blanked', 'non-ignorable', 'shifted', or 'shift-trimmed'. + +These names are case-insensitive. +By default (if specification is omitted), 'shifted' is adopted. + + 'Blanked' Variable elements are made ignorable at levels 1 through 3; + considered at the 4th level. + + 'Non-Ignorable' Variable elements are not reset to ignorable. + + 'Shifted' Variable elements are made ignorable at levels 1 through 3 + their level 4 weight is replaced by the old level 1 weight. + Level 4 weight for Non-Variable elements is 0xFFFF. + + 'Shift-Trimmed' Same as 'shifted', but all FFFF's at the 4th level + are trimmed. + +=back + +=head2 Methods for Collation + +=over 4 + +=item C<@sorted = $Collator-Esort(@not_sorted)> + +Sorts a list of strings. + +=item C<$result = $Collator-Ecmp($a, $b)> + +Returns 1 (when C<$a> is greater than C<$b>) +or 0 (when C<$a> is equal to C<$b>) +or -1 (when C<$a> is lesser than C<$b>). + +=item C<$result = $Collator-Eeq($a, $b)> + +=item C<$result = $Collator-Ene($a, $b)> + +=item C<$result = $Collator-Elt($a, $b)> + +=item C<$result = $Collator-Ele($a, $b)> + +=item C<$result = $Collator-Egt($a, $b)> + +=item C<$result = $Collator-Ege($a, $b)> + +They works like the same name operators as theirs. + + eq : whether $a is equal to $b. + ne : whether $a is not equal to $b. + lt : whether $a is lesser than $b. + le : whether $a is lesser than $b or equal to $b. + gt : whether $a is greater than $b. + ge : whether $a is greater than $b or equal to $b. + +=item C<$sortKey = $Collator-EgetSortKey($string)> + +-- see 4.3 Form Sort Key, UTS #10. + +Returns a sort key. + +You compare the sort keys using a binary comparison +and get the result of the comparison of the strings using UCA. + + $Collator->getSortKey($a) cmp $Collator->getSortKey($b) + + is equivalent to + + $Collator->cmp($a, $b) + +=item C<$sortKeyForm = $Collator-EviewSortKey($string)> + +Converts a sorting key into its representation form. +If C is 8, the output is slightly different. + + use Unicode::Collate; + my $c = Unicode::Collate->new(); + print $c->viewSortKey("Perl"),"\n"; + + # output: + # [0B67 0A65 0B7F 0B03 | 0020 0020 0020 0020 | 0008 0002 0002 0002 | FFFF FFFF FFFF FFFF] + # Level 1 Level 2 Level 3 Level 4 + +=back + +=head2 Methods for Searching + +B If C or C parameter is true +for C<$Collator>, calling these methods (C, C, C, +C, C) is croaked, +as the position and the length might differ +from those on the specified string. +(And C and C parameters are neglected.) + +The C, C, C, C methods work +like C, C, C, C, respectively, +but they are not aware of any pattern, but only a literal substring. + +=over 4 + +=item C<$position = $Collator-Eindex($string, $substring[, $position])> + +=item C<($position, $length) = $Collator-Eindex($string, $substring[, $position])> + +If C<$substring> matches a part of C<$string>, returns +the position of the first occurrence of the matching part in scalar context; +in list context, returns a two-element list of +the position and the length of the matching part. + +If C<$substring> does not match any part of C<$string>, +returns C<-1> in scalar context and +an empty list in list context. + +e.g. you say + + my $Collator = Unicode::Collate->new( normalization => undef, level => 1 ); + # (normalization => undef) is REQUIRED. + my $str = "Ich muß studieren Perl."; + my $sub = "MÜSS"; + my $match; + if (my($pos,$len) = $Collator->index($str, $sub)) { + $match = substr($str, $pos, $len); + } + +and get C<"muß"> in C<$match> since C<"muß"> +is primary equal to C<"MÜSS">. + +=item C<$match_ref = $Collator-Ematch($string, $substring)> + +=item C<($match) = $Collator-Ematch($string, $substring)> + +If C<$substring> matches a part of C<$string>, in scalar context, returns +B the first occurrence of the matching part +(C<$match_ref> is always true if matches, +since every reference is B); +in list context, returns the first occurrence of the matching part. + +If C<$substring> does not match any part of C<$string>, +returns C in scalar context and +an empty list in list context. + +e.g. + + if ($match_ref = $Collator->match($str, $sub)) { # scalar context + print "matches [$$match_ref].\n"; + } else { + print "doesn't match.\n"; + } + + or + + if (($match) = $Collator->match($str, $sub)) { # list context + print "matches [$match].\n"; + } else { + print "doesn't match.\n"; + } + +=item C<@match = $Collator-Egmatch($string, $substring)> + +If C<$substring> matches a part of C<$string>, returns +all the matching parts (or matching count in scalar context). + +If C<$substring> does not match any part of C<$string>, +returns an empty list. + +=item C<$count = $Collator-Esubst($string, $substring, $replacement)> + +If C<$substring> matches a part of C<$string>, +the first occurrence of the matching part is replaced by C<$replacement> +(C<$string> is modified) and return C<$count> (always equals to C<1>). + +C<$replacement> can be a C, +taking the matching part as an argument, +and returning a string to replace the matching part +(a bit similar to C($1)/e>). + +=item C<$count = $Collator-Egsubst($string, $substring, $replacement)> + +If C<$substring> matches a part of C<$string>, +all the occurrences of the matching part is replaced by C<$replacement> +(C<$string> is modified) and return C<$count>. + +C<$replacement> can be a C, +taking the matching part as an argument, +and returning a string to replace the matching part +(a bit similar to C($1)/eg>). + +e.g. + + my $Collator = Unicode::Collate->new( normalization => undef, level => 1 ); + # (normalization => undef) is REQUIRED. + my $str = "Camel donkey zebra came\x{301}l CAMEL horse cAm\0E\0L..."; + $Collator->gsubst($str, "camel", sub { "$_[0]" }); + + # now $str is "Camel donkey zebra came\x{301}l CAMEL horse cAm\0E\0L..."; + # i.e., all the camels are made bold-faced. + +=back + +=head2 Other Methods + +=over 4 + +=item C<%old_tailoring = $Collator-Echange(%new_tailoring)> + +Change the value of specified keys and returns the changed part. + + $Collator = Unicode::Collate->new(level => 4); + + $Collator->eq("perl", "PERL"); # false + + %old = $Collator->change(level => 2); # returns (level => 4). + + $Collator->eq("perl", "PERL"); # true + + $Collator->change(%old); # returns (level => 2). + + $Collator->eq("perl", "PERL"); # false + +Not all C<(key,value)>s are allowed to be changed. +See also C<@Unicode::Collate::ChangeOK> and C<@Unicode::Collate::ChangeNG>. + +In the scalar context, returns the modified collator +(but it is B a clone from the original). + + $Collator->change(level => 2)->eq("perl", "PERL"); # true + + $Collator->eq("perl", "PERL"); # true; now max level is 2nd. + + $Collator->change(level => 4)->eq("perl", "PERL"); # false + +=item C<$version = $Collator-Eversion()> + +Returns the version number (a string) of the Unicode Standard +which the C
file used by the collator object is based on. +If the table does not include a version line (starting with C<@version>), +returns C<"unknown">. + +=item C + +Returns the tracking version number of UTS #10 this module consults. + +=item C + +Returns the version number of UTS #10 this module consults. + +=back + +=head1 EXPORT + +No method will be exported. + +=head1 INSTALL + +Though this module can be used without any C
file, +to use this module easily, it is recommended to install a table file +in the UCA format, by copying it under the directory +/Unicode/Collate. + +The most preferable one is "The Default Unicode Collation Element Table" +(aka DUCET), available from the Unicode Consortium's website: + + http://www.unicode.org/Public/UCA/ + + http://www.unicode.org/Public/UCA/latest/allkeys.txt (latest version) + +If DUCET is not installed, it is recommended to copy the file +from http://www.unicode.org/Public/UCA/latest/allkeys.txt +to /Unicode/Collate/allkeys.txt +manually. + +=head1 CAVEATS + +=over 4 + +=item Normalization + +Use of the C parameter requires the B +module (see L). + +If you need not it (say, in the case when you need not +handle any combining characters), +assign C undef> explicitly. + +-- see 6.5 Avoiding Normalization, UTS #10. + +=item Conformance Test + +The Conformance Test for the UCA is available +under L. + +For F, +a collator via Cnew( )> should be used; +for F, a collator via +Cnew(variable =E "non-ignorable", level =E 3)>. + +B + +=back + +=head1 AUTHOR, COPYRIGHT AND LICENSE + +The Unicode::Collate module for perl was written by SADAHIRO Tomoyuki, +. This module is Copyright(C) 2001-2005, +SADAHIRO Tomoyuki. Japan. All rights reserved. + +This module is free software; you can redistribute it and/or +modify it under the same terms as Perl itself. + +The file Unicode/Collate/allkeys.txt was copied directly +from L. +This file is Copyright (c) 1991-2005 Unicode, Inc. All rights reserved. +Distributed under the Terms of Use in L. + +=head1 SEE ALSO + +=over 4 + +=item Unicode Collation Algorithm - UTS #10 + +L + +=item The Default Unicode Collation Element Table (DUCET) + +L + +=item The conformance test for the UCA + +L + +L + +=item Hangul Syllable Type + +L + +=item Unicode Normalization Forms - UAX #15 + +L + +=back + +=cut diff --git a/lib/perl5/5.8.8/Unicode/Collate/keys.txt b/lib/perl5/5.8.8/Unicode/Collate/keys.txt new file mode 100644 index 00000000..aaa2d2a5 --- /dev/null +++ b/lib/perl5/5.8.8/Unicode/Collate/keys.txt @@ -0,0 +1,864 @@ +#### This file is generated from allkeys-3.1.1.txt (unicode.org) +#### by deleting many many entries. +#### +#### This table is intended ONLY for doing a test +#### of Unicode/Collate.pm, a Perl module. +#### +#### The entries contained here ARE: +#### 0000..007F # Basic Latin +#### 0080..00FF # Latin-1 Supplement +#### 0300..036F # Combining Diacritical Marks +#### 1100..11FF # Hangul Jamo +#### 2000..206F # General Punctuation +#### 3040..309F # Hiragana +#### 30A0..30FF # Katakana +#### +#### To fetch the original file, access to this: +#### http://www.unicode.org/reports/tr10/allkeys.txt +#### +@version 3.1.1 +0000 ; [.0000.0000.0000.0000] # [0000] NULL (in 6429) +0001 ; [.0000.0000.0000.0000] # [0001] START OF HEADING (in 6429) +0002 ; [.0000.0000.0000.0000] # [0002] START OF TEXT (in 6429) +0003 ; [.0000.0000.0000.0000] # [0003] END OF TEXT (in 6429) +0004 ; [.0000.0000.0000.0000] # [0004] END OF TRANSMISSION (in 6429) +0005 ; [.0000.0000.0000.0000] # [0005] ENQUIRY (in 6429) +0006 ; [.0000.0000.0000.0000] # [0006] ACKNOWLEDGE (in 6429) +0007 ; [.0000.0000.0000.0000] # [0007] BELL (in 6429) +0008 ; [.0000.0000.0000.0000] # [0008] BACKSPACE (in 6429) +000E ; [.0000.0000.0000.0000] # [000E] SHIFT OUT (in 6429) +000F ; [.0000.0000.0000.0000] # [000F] SHIFT IN (in 6429) +0010 ; [.0000.0000.0000.0000] # [0010] DATA LINK ESCAPE (in 6429) +0011 ; [.0000.0000.0000.0000] # [0011] DEVICE CONTROL ONE (in 6429) +0012 ; [.0000.0000.0000.0000] # [0012] DEVICE CONTROL TWO (in 6429) +0013 ; [.0000.0000.0000.0000] # [0013] DEVICE CONTROL THREE (in 6429) +0014 ; [.0000.0000.0000.0000] # [0014] DEVICE CONTROL FOUR (in 6429) +0015 ; [.0000.0000.0000.0000] # [0015] NEGATIVE ACKNOWLEDGE (in 6429) +0016 ; [.0000.0000.0000.0000] # [0016] SYNCHRONOUS IDLE (in 6429) +0017 ; [.0000.0000.0000.0000] # [0017] END OF TRANSMISSION BLOCK (in 6429) +0018 ; [.0000.0000.0000.0000] # [0018] CANCEL (in 6429) +0019 ; [.0000.0000.0000.0000] # [0019] END OF MEDIUM (in 6429) +001A ; [.0000.0000.0000.0000] # [001A] SUBSTITUTE (in 6429) +001B ; [.0000.0000.0000.0000] # [001B] ESCAPE (in 6429) +001C ; [.0000.0000.0000.0000] # [001C] FILE SEPARATOR (in 6429) +001D ; [.0000.0000.0000.0000] # [001D] GROUP SEPARATOR (in 6429) +001E ; [.0000.0000.0000.0000] # [001E] RECORD SEPARATOR (in 6429) +001F ; [.0000.0000.0000.0000] # [001F] UNIT SEPARATOR (in 6429) +007F ; [.0000.0000.0000.0000] # [007F] DELETE (in 6429) +0080 ; [.0000.0000.0000.0000] # [0080] +0081 ; [.0000.0000.0000.0000] # [0081] +0082 ; [.0000.0000.0000.0000] # [0082] BREAK PERMITTED HERE (in 6429) +0083 ; [.0000.0000.0000.0000] # [0083] NO BREAK HERE (in 6429) +0084 ; [.0000.0000.0000.0000] # [0084] +0086 ; [.0000.0000.0000.0000] # [0086] START OF SELECTED AREA (in 6429) +0087 ; [.0000.0000.0000.0000] # [0087] END OF SELECTED AREA (in 6429) +0088 ; [.0000.0000.0000.0000] # [0088] CHARACTER TABULATION SET (in 6429) +0089 ; [.0000.0000.0000.0000] # [0089] CHARACTER TABULATION WITH JUSTIFICATION (in 6429) +008A ; [.0000.0000.0000.0000] # [008A] LINE TABULATION SET (in 6429) +008B ; [.0000.0000.0000.0000] # [008B] PARTIAL LINE FORWARD (in 6429) +008C ; [.0000.0000.0000.0000] # [008C] PARTIAL LINE BACKWARD (in 6429) +008D ; [.0000.0000.0000.0000] # [008D] PARTIAL LINE FEED (in 6429) +008E ; [.0000.0000.0000.0000] # [008E] SINGLE SHIFT TWO (in 6429) +008F ; [.0000.0000.0000.0000] # [008F] SINGLE SHIFT THREE (in 6429) +0090 ; [.0000.0000.0000.0000] # [0090] DEVICE CONTROL STRING (in 6429) +0091 ; [.0000.0000.0000.0000] # [0091] PRIVATE USE ONE (in 6429) +0092 ; [.0000.0000.0000.0000] # [0092] PRIVATE USE TWO (in 6429) +0093 ; [.0000.0000.0000.0000] # [0093] SET TRANSMIT STATE (in 6429) +0094 ; [.0000.0000.0000.0000] # [0094] CANCEL CHARACTER (in 6429) +0095 ; [.0000.0000.0000.0000] # [0095] MESSAGE WAITING (in 6429) +0096 ; [.0000.0000.0000.0000] # [0096] START OF GUARDED AREA (in 6429) +0097 ; [.0000.0000.0000.0000] # [0097] END OF GUARDED AREA (in 6429) +0098 ; [.0000.0000.0000.0000] # [0098] START OF STRING (in 6429) +0099 ; [.0000.0000.0000.0000] # [0099] +009A ; [.0000.0000.0000.0000] # [009A] SINGLE CHARACTER INTRODUCER (in 6429) +009B ; [.0000.0000.0000.0000] # [009B] CONTROL SEQUENCE INTRODUCER (in 6429) +009C ; [.0000.0000.0000.0000] # [009C] STRING TERMINATOR (in 6429) +009D ; [.0000.0000.0000.0000] # [009D] OPERATING SYSTEM COMMAND (in 6429) +009E ; [.0000.0000.0000.0000] # [009E] PRIVACY MESSAGE (in 6429) +009F ; [.0000.0000.0000.0000] # [009F] APPLICATION PROGRAM COMMAND (in 6429) +200B ; [.0000.0000.0000.0000] # [200B] ZERO WIDTH SPACE +200C ; [.0000.0000.0000.0000] # [200C] ZERO WIDTH NON-JOINER +200D ; [.0000.0000.0000.0000] # [200D] ZERO WIDTH JOINER +200E ; [.0000.0000.0000.0000] # [200E] LEFT-TO-RIGHT MARK +200F ; [.0000.0000.0000.0000] # [200F] RIGHT-TO-LEFT MARK +202A ; [.0000.0000.0000.0000] # [202A] LEFT-TO-RIGHT EMBEDDING +202B ; [.0000.0000.0000.0000] # [202B] RIGHT-TO-LEFT EMBEDDING +202C ; [.0000.0000.0000.0000] # [202C] POP DIRECTIONAL FORMATTING +202D ; [.0000.0000.0000.0000] # [202D] LEFT-TO-RIGHT OVERRIDE +202E ; [.0000.0000.0000.0000] # [202E] RIGHT-TO-LEFT OVERRIDE +206A ; [.0000.0000.0000.0000] # [206A] INHIBIT SYMMETRIC SWAPPING +206B ; [.0000.0000.0000.0000] # [206B] ACTIVATE SYMMETRIC SWAPPING +206C ; [.0000.0000.0000.0000] # [206C] INHIBIT ARABIC FORM SHAPING +206D ; [.0000.0000.0000.0000] # [206D] ACTIVATE ARABIC FORM SHAPING +206E ; [.0000.0000.0000.0000] # [206E] NATIONAL DIGIT SHAPES +206F ; [.0000.0000.0000.0000] # [206F] NOMINAL DIGIT SHAPES +0009 ; [*0201.0020.0002.0009] # HORIZONTAL TABULATION (in 6429) +000A ; [*0202.0020.0002.000A] # LINE FEED (in 6429) +000B ; [*0203.0020.0002.000B] # VERTICAL TABULATION (in 6429) +000C ; [*0204.0020.0002.000C] # FORM FEED (in 6429) +000D ; [*0205.0020.0002.000D] # CARRIAGE RETURN (in 6429) +0020 ; [*0209.0020.0002.0020] # SPACE +0021 ; [*024B.0020.0002.0021] # EXCLAMATION MARK +0022 ; [*0270.0020.0002.0022] # QUOTATION MARK +0023 ; [*02A9.0020.0002.0023] # NUMBER SIGN +0025 ; [*02AA.0020.0002.0025] # PERCENT SIGN +0026 ; [*02A7.0020.0002.0026] # AMPERSAND +0027 ; [*0269.0020.0002.0027] # APOSTROPHE +0028 ; [*027A.0020.0002.0028] # LEFT PARENTHESIS +0029 ; [*027B.0020.0002.0029] # RIGHT PARENTHESIS +002A ; [*02A2.0020.0002.002A] # ASTERISK +002B ; [*039F.0020.0002.002B] # PLUS SIGN +002C ; [*022D.0020.0002.002C] # COMMA +002D ; [*0221.0020.0002.002D] # HYPHEN-MINUS +002E ; [*0255.0020.0002.002E] # FULL STOP +002F ; [*02A4.0020.0002.002F] # SOLIDUS +003A ; [*0237.0020.0002.003A] # COLON +003B ; [*0235.0020.0002.003B] # SEMICOLON +003C ; [*03A3.0020.0002.003C] # LESS-THAN SIGN +003D ; [*03A4.0020.0002.003D] # EQUALS SIGN +003E ; [*03A5.0020.0002.003E] # GREATER-THAN SIGN +003F ; [*024E.0020.0002.003F] # QUESTION MARK +0040 ; [*02A1.0020.0002.0040] # COMMERCIAL AT +005B ; [*027C.0020.0002.005B] # LEFT SQUARE BRACKET +005C ; [*02A6.0020.0002.005C] # REVERSE SOLIDUS +005D ; [*027D.0020.0002.005D] # RIGHT SQUARE BRACKET +005E ; [*020F.0020.0002.005E] # CIRCUMFLEX ACCENT +005F ; [*021B.0020.0002.005F] # LOW LINE +0060 ; [*020C.0020.0002.0060] # GRAVE ACCENT +007B ; [*027E.0020.0002.007B] # LEFT CURLY BRACKET +007C ; [*03A7.0020.0002.007C] # VERTICAL LINE +007D ; [*027F.0020.0002.007D] # RIGHT CURLY BRACKET +007E ; [*03AA.0020.0002.007E] # TILDE +0085 ; [*0206.0020.0002.0085] # NEXT LINE (in 6429) +00A0 ; [*0209.0020.001B.00A0] # NO-BREAK SPACE; QQK +00A1 ; [*024C.0020.0002.00A1] # INVERTED EXCLAMATION MARK +00A6 ; [*03A8.0020.0002.00A6] # BROKEN BAR +00A7 ; [*029C.0020.0002.00A7] # SECTION SIGN +00A8 ; [*0214.0020.0002.00A8] # DIAERESIS +00A9 ; [*029F.0020.0002.00A9] # COPYRIGHT SIGN +00AB ; [*0278.0020.0002.00AB] # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +00AC ; [*03A6.0020.0002.00AC] # NOT SIGN +00AD ; [*0220.0020.0002.00AD] # SOFT HYPHEN +00AE ; [*02A0.0020.0002.00AE] # REGISTERED SIGN +00AF ; [*0210.0020.0002.00AF] # MACRON +00B0 ; [*030A.0020.0002.00B0] # DEGREE SIGN +00B1 ; [*03A0.0020.0002.00B1] # PLUS-MINUS SIGN +00B4 ; [*020D.0020.0002.00B4] # ACUTE ACCENT +00B6 ; [*029D.0020.0002.00B6] # PILCROW SIGN +00B7 ; [*025F.0020.0002.00B7] # MIDDLE DOT +00B8 ; [*0219.0020.0002.00B8] # CEDILLA +00BB ; [*0279.0020.0002.00BB] # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +00BF ; [*024F.0020.0002.00BF] # INVERTED QUESTION MARK +00D7 ; [*03A2.0020.0002.00D7] # MULTIPLICATION SIGN +00F7 ; [*03A1.0020.0002.00F7] # DIVISION SIGN +2000 ; [*0209.0020.0004.2000] # EN QUAD; QQK +2001 ; [*0209.0020.0004.2001] # EM QUAD; QQK +2002 ; [*0209.0020.0004.2002] # EN SPACE; QQK +2003 ; [*0209.0020.0004.2003] # EM SPACE; QQK +2004 ; [*0209.0020.0004.2004] # THREE-PER-EM SPACE; QQK +2005 ; [*0209.0020.0004.2005] # FOUR-PER-EM SPACE; QQK +2006 ; [*0209.0020.0004.2006] # SIX-PER-EM SPACE; QQK +2007 ; [*0209.0020.001B.2007] # FIGURE SPACE; QQK +2008 ; [*0209.0020.0004.2008] # PUNCTUATION SPACE; QQK +2009 ; [*0209.0020.0004.2009] # THIN SPACE; QQK +200A ; [*0209.0020.0004.200A] # HAIR SPACE; QQK +2010 ; [*0225.0020.0002.2010] # HYPHEN +2011 ; [*0225.0020.001B.2011] # NON-BREAKING HYPHEN; QQK +2012 ; [*0226.0020.0002.2012] # FIGURE DASH +2013 ; [*0227.0020.0002.2013] # EN DASH +2014 ; [*0228.0020.0002.2014] # EM DASH +2015 ; [*0229.0020.0002.2015] # HORIZONTAL BAR +2016 ; [*03A9.0020.0002.2016] # DOUBLE VERTICAL LINE +2017 ; [*021C.0020.0002.2017] # DOUBLE LOW LINE +2018 ; [*026A.0020.0002.2018] # LEFT SINGLE QUOTATION MARK +2019 ; [*026B.0020.0002.2019] # RIGHT SINGLE QUOTATION MARK +201A ; [*026C.0020.0002.201A] # SINGLE LOW-9 QUOTATION MARK +201B ; [*026D.0020.0002.201B] # SINGLE HIGH-REVERSED-9 QUOTATION MARK +201C ; [*0271.0020.0002.201C] # LEFT DOUBLE QUOTATION MARK +201D ; [*0272.0020.0002.201D] # RIGHT DOUBLE QUOTATION MARK +201E ; [*0273.0020.0002.201E] # DOUBLE LOW-9 QUOTATION MARK +201F ; [*0274.0020.0002.201F] # DOUBLE HIGH-REVERSED-9 QUOTATION MARK +2020 ; [*02AE.0020.0002.2020] # DAGGER +2021 ; [*02AF.0020.0002.2021] # DOUBLE DAGGER +2022 ; [*02B0.0020.0002.2022] # BULLET +2023 ; [*02B1.0020.0002.2023] # TRIANGULAR BULLET +2024 ; [*0255.0020.0004.2024] # ONE DOT LEADER; QQK +2025 ; [*0255.0020.0004.2025][*0255.0020.0004.2025] # TWO DOT LEADER; QQKN +2026 ; [*0255.0020.0004.2026][*0255.0020.0004.2026][*0255.0020.001F.2026] # HORIZONTAL ELLIPSIS; QQKN +2027 ; [*02B2.0020.0002.2027] # HYPHENATION POINT +2028 ; [*0207.0020.0002.2028] # LINE SEPARATOR +2029 ; [*0208.0020.0002.2029] # PARAGRAPH SEPARATOR +202F ; [*0209.0020.001B.202F] # NARROW NO-BREAK SPACE; QQK +2030 ; [*02AC.0020.0002.2030] # PER MILLE SIGN +2031 ; [*02AD.0020.0002.2031] # PER TEN THOUSAND SIGN +2032 ; [*02B6.0020.0002.2032] # PRIME +2033 ; [*02B6.0020.0004.2033][*02B6.0020.0004.2033] # DOUBLE PRIME; QQKN +2034 ; [*02B6.0020.0004.2034][*02B6.0020.0004.2034][*02B6.0020.001F.2034] # TRIPLE PRIME; QQKN +2035 ; [*02B7.0020.0002.2035] # REVERSED PRIME +2036 ; [*02B7.0020.0004.2036][*02B7.0020.0004.2036] # REVERSED DOUBLE PRIME; QQKN +2037 ; [*02B7.0020.0004.2037][*02B7.0020.0004.2037][*02B7.0020.001F.2037] # REVERSED TRIPLE PRIME; QQKN +2038 ; [*02B9.0020.0002.2038] # CARET +2039 ; [*026E.0020.0002.2039] # SINGLE LEFT-POINTING ANGLE QUOTATION MARK +203A ; [*026F.0020.0002.203A] # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +203B ; [*02BA.0020.0002.203B] # REFERENCE MARK +203C ; [*024B.0020.0004.203C][*024B.0020.0004.203C] # DOUBLE EXCLAMATION MARK; QQKN +203D ; [*0254.0020.0002.203D] # INTERROBANG +203E ; [*0211.0020.0002.203E] # OVERLINE +203F ; [*02BB.0020.0002.203F] # UNDERTIE +2040 ; [*02BC.0020.0002.2040] # CHARACTER TIE +2041 ; [*02BD.0020.0002.2041] # CARET INSERTION POINT +2042 ; [*02BE.0020.0002.2042] # ASTERISM +2043 ; [*02B3.0020.0002.2043] # HYPHEN BULLET +2044 ; [*02A5.0020.0002.2044] # FRACTION SLASH +2045 ; [*0286.0020.0002.2045] # LEFT SQUARE BRACKET WITH QUILL +2046 ; [*0287.0020.0002.2046] # RIGHT SQUARE BRACKET WITH QUILL +2048 ; [*024E.0020.0004.2048][*024B.0020.0004.2048] # QUESTION EXCLAMATION MARK; QQKN +2049 ; [*024B.0020.0004.2049][*024E.0020.0004.2049] # EXCLAMATION QUESTION MARK; QQKN +204A ; [*02A8.0020.0002.204A] # TIRONIAN SIGN ET +204B ; [*029E.0020.0002.204B] # REVERSED PILCROW SIGN +204C ; [*02B4.0020.0002.204C] # BLACK LEFTWARDS BULLET +204D ; [*02B5.0020.0002.204D] # BLACK RIGHTWARDS BULLET +309B ; [*021E.0020.0002.309B] # KATAKANA-HIRAGANA VOICED SOUND MARK +309C ; [*021F.0020.0002.309C] # KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +30FB ; [*022C.0020.0002.30FB] # KATAKANA MIDDLE DOT +0332 ; [.0000.0021.0002.0332] # COMBINING LOW LINE +0313 ; [.0000.0022.0002.0313] # COMBINING COMMA ABOVE +0343 ; [.0000.0022.0002.0343] # COMBINING GREEK KORONIS; QQC +0314 ; [.0000.002A.0002.0314] # COMBINING REVERSED COMMA ABOVE +0301 ; [.0000.0032.0002.0301] # COMBINING ACUTE ACCENT +0341 ; [.0000.0032.0002.0341] # COMBINING ACUTE TONE MARK; QQC +0300 ; [.0000.0035.0002.0300] # COMBINING GRAVE ACCENT +0340 ; [.0000.0035.0002.0340] # COMBINING GRAVE TONE MARK; QQC +0306 ; [.0000.0037.0002.0306] # COMBINING BREVE +0302 ; [.0000.003C.0002.0302] # COMBINING CIRCUMFLEX ACCENT +030C ; [.0000.0041.0002.030C] # COMBINING CARON +030A ; [.0000.0043.0002.030A] # COMBINING RING ABOVE +0342 ; [.0000.0045.0002.0342] # COMBINING GREEK PERISPOMENI +0308 ; [.0000.0047.0002.0308] # COMBINING DIAERESIS +0344 ; [.0000.0047.0002.0308][.0000.0032.0002.0301] # COMBINING GREEK DIALYTIKA TONOS; QQCM +030B ; [.0000.004D.0002.030B] # COMBINING DOUBLE ACUTE ACCENT +0303 ; [.0000.004E.0002.0303] # COMBINING TILDE +0307 ; [.0000.0052.0002.0307] # COMBINING DOT ABOVE +0338 ; [.0000.0054.0002.0338] # COMBINING LONG SOLIDUS OVERLAY +0327 ; [.0000.0055.0002.0327] # COMBINING CEDILLA +0328 ; [.0000.0058.0002.0328] # COMBINING OGONEK +0304 ; [.0000.005A.0002.0304] # COMBINING MACRON +0305 ; [.0000.005E.0002.0305] # COMBINING OVERLINE +0309 ; [.0000.005F.0002.0309] # COMBINING HOOK ABOVE +030D ; [.0000.0060.0002.030D] # COMBINING VERTICAL LINE ABOVE +030E ; [.0000.0061.0002.030E] # COMBINING DOUBLE VERTICAL LINE ABOVE +030F ; [.0000.0062.0002.030F] # COMBINING DOUBLE GRAVE ACCENT +0310 ; [.0000.0063.0002.0310] # COMBINING CANDRABINDU +0311 ; [.0000.0064.0002.0311] # COMBINING INVERTED BREVE +0312 ; [.0000.0065.0002.0312] # COMBINING TURNED COMMA ABOVE +0315 ; [.0000.0066.0002.0315] # COMBINING COMMA ABOVE RIGHT +0316 ; [.0000.0067.0002.0316] # COMBINING GRAVE ACCENT BELOW +0317 ; [.0000.0068.0002.0317] # COMBINING ACUTE ACCENT BELOW +0318 ; [.0000.0069.0002.0318] # COMBINING LEFT TACK BELOW +0319 ; [.0000.006A.0002.0319] # COMBINING RIGHT TACK BELOW +031A ; [.0000.006B.0002.031A] # COMBINING LEFT ANGLE ABOVE +031B ; [.0000.006C.0002.031B] # COMBINING HORN +031C ; [.0000.0072.0002.031C] # COMBINING LEFT HALF RING BELOW +031D ; [.0000.0073.0002.031D] # COMBINING UP TACK BELOW +031E ; [.0000.0074.0002.031E] # COMBINING DOWN TACK BELOW +031F ; [.0000.0075.0002.031F] # COMBINING PLUS SIGN BELOW +0320 ; [.0000.0076.0002.0320] # COMBINING MINUS SIGN BELOW +0321 ; [.0000.0077.0002.0321] # COMBINING PALATALIZED HOOK BELOW +0322 ; [.0000.0078.0002.0322] # COMBINING RETROFLEX HOOK BELOW +0323 ; [.0000.0079.0002.0323] # COMBINING DOT BELOW +0324 ; [.0000.007E.0002.0324] # COMBINING DIAERESIS BELOW +0325 ; [.0000.007F.0002.0325] # COMBINING RING BELOW +0326 ; [.0000.0080.0002.0326] # COMBINING COMMA BELOW +0329 ; [.0000.0081.0002.0329] # COMBINING VERTICAL LINE BELOW +032A ; [.0000.0082.0002.032A] # COMBINING BRIDGE BELOW +032B ; [.0000.0083.0002.032B] # COMBINING INVERTED DOUBLE ARCH BELOW +032C ; [.0000.0084.0002.032C] # COMBINING CARON BELOW +032D ; [.0000.0085.0002.032D] # COMBINING CIRCUMFLEX ACCENT BELOW +032E ; [.0000.0086.0002.032E] # COMBINING BREVE BELOW +032F ; [.0000.0087.0002.032F] # COMBINING INVERTED BREVE BELOW +0330 ; [.0000.0088.0002.0330] # COMBINING TILDE BELOW +0331 ; [.0000.0089.0002.0331] # COMBINING MACRON BELOW +0333 ; [.0000.008A.0002.0333] # COMBINING DOUBLE LOW LINE +0334 ; [.0000.008B.0002.0334] # COMBINING TILDE OVERLAY +0335 ; [.0000.008C.0002.0335] # COMBINING SHORT STROKE OVERLAY +0336 ; [.0000.008D.0002.0336] # COMBINING LONG STROKE OVERLAY +0337 ; [.0000.008E.0002.0337] # COMBINING SHORT SOLIDUS OVERLAY +0339 ; [.0000.008F.0002.0339] # COMBINING RIGHT HALF RING BELOW +033A ; [.0000.0090.0002.033A] # COMBINING INVERTED BRIDGE BELOW +033B ; [.0000.0091.0002.033B] # COMBINING SQUARE BELOW +033C ; [.0000.0092.0002.033C] # COMBINING SEAGULL BELOW +033D ; [.0000.0093.0002.033D] # COMBINING X ABOVE +033E ; [.0000.0094.0002.033E] # COMBINING VERTICAL TILDE +033F ; [.0000.0095.0002.033F] # COMBINING DOUBLE OVERLINE +0345 ; [.0000.0096.0002.0345] # COMBINING GREEK YPOGEGRAMMENI +0346 ; [.0000.0097.0002.0346] # COMBINING BRIDGE ABOVE +0347 ; [.0000.0098.0002.0347] # COMBINING EQUALS SIGN BELOW +0348 ; [.0000.0099.0002.0348] # COMBINING DOUBLE VERTICAL LINE BELOW +0349 ; [.0000.009A.0002.0349] # COMBINING LEFT ANGLE BELOW +034A ; [.0000.009B.0002.034A] # COMBINING NOT TILDE ABOVE +034B ; [.0000.009C.0002.034B] # COMBINING HOMOTHETIC ABOVE +034C ; [.0000.009D.0002.034C] # COMBINING ALMOST EQUAL TO ABOVE +034D ; [.0000.009E.0002.034D] # COMBINING LEFT RIGHT ARROW BELOW +034E ; [.0000.009F.0002.034E] # COMBINING UPWARDS ARROW BELOW +0360 ; [.0000.00A0.0002.0360] # COMBINING DOUBLE TILDE +0361 ; [.0000.00A1.0002.0361] # COMBINING DOUBLE INVERTED BREVE +0362 ; [.0000.00A2.0002.0362] # COMBINING DOUBLE RIGHTWARDS ARROW BELOW +3099 ; [.0000.013D.0002.3099] # COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK +309A ; [.0000.013E.0002.309A] # COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +309D ; [.09DB.0020.0002.309D] # HIRAGANA ITERATION MARK +309E ; [.09DB.0020.0002.309D][.0000.013D.0002.3099] # HIRAGANA VOICED ITERATION MARK; QQCM +30FC ; [.09DC.0020.0002.30FC] # KATAKANA-HIRAGANA PROLONGED SOUND MARK +30FD ; [.09DD.0020.0002.30FD] # KATAKANA ITERATION MARK +30FE ; [.09DD.0020.0002.30FD][.0000.013D.0002.3099] # KATAKANA VOICED ITERATION MARK; QQCM +00A4 ; [.09DE.0020.0002.00A4] # CURRENCY SIGN +00A2 ; [.09DF.0020.0002.00A2] # CENT SIGN +0024 ; [.09E0.0020.0002.0024] # DOLLAR SIGN +00A3 ; [.09E1.0020.0002.00A3] # POUND SIGN +00A5 ; [.09E2.0020.0002.00A5] # YEN SIGN +0030 ; [.0A0B.0020.0002.0030] # DIGIT ZERO +0031 ; [.0A0C.0020.0002.0031] # DIGIT ONE +00B9 ; [.0A0C.0020.0014.00B9] # SUPERSCRIPT ONE; QQK +00BD ; [.0A0C.0020.001E.00BD][*02A5.0020.001E.00BD][.0A0D.0020.001F.00BD] # VULGAR FRACTION ONE HALF; QQKN +00BC ; [.0A0C.0020.001E.00BC][*02A5.0020.001E.00BC][.0A0F.0020.001F.00BC] # VULGAR FRACTION ONE QUARTER; QQKN +0032 ; [.0A0D.0020.0002.0032] # DIGIT TWO +00B2 ; [.0A0D.0020.0014.00B2] # SUPERSCRIPT TWO; QQK +0033 ; [.0A0E.0020.0002.0033] # DIGIT THREE +00B3 ; [.0A0E.0020.0014.00B3] # SUPERSCRIPT THREE; QQK +00BE ; [.0A0E.0020.001E.00BE][*02A5.0020.001E.00BE][.0A0F.0020.001F.00BE] # VULGAR FRACTION THREE QUARTERS; QQKN +0034 ; [.0A0F.0020.0002.0034] # DIGIT FOUR +0035 ; [.0A10.0020.0002.0035] # DIGIT FIVE +0036 ; [.0A11.0020.0002.0036] # DIGIT SIX +0037 ; [.0A12.0020.0002.0037] # DIGIT SEVEN +0038 ; [.0A13.0020.0002.0038] # DIGIT EIGHT +0039 ; [.0A14.0020.0002.0039] # DIGIT NINE +0061 ; [.0A15.0020.0002.0061] # LATIN SMALL LETTER A +0041 ; [.0A15.0020.0008.0041] # LATIN CAPITAL LETTER A +00AA ; [.0A15.0020.0014.00AA] # FEMININE ORDINAL INDICATOR; QQK +00E1 ; [.0A15.0020.0002.0061][.0000.0032.0002.0301] # LATIN SMALL LETTER A WITH ACUTE; QQCM +00C1 ; [.0A15.0020.0008.0041][.0000.0032.0002.0301] # LATIN CAPITAL LETTER A WITH ACUTE; QQCM +00E0 ; [.0A15.0020.0002.0061][.0000.0035.0002.0300] # LATIN SMALL LETTER A WITH GRAVE; QQCM +00C0 ; [.0A15.0020.0008.0041][.0000.0035.0002.0300] # LATIN CAPITAL LETTER A WITH GRAVE; QQCM +00E2 ; [.0A15.0020.0002.0061][.0000.003C.0002.0302] # LATIN SMALL LETTER A WITH CIRCUMFLEX; QQCM +00C2 ; [.0A15.0020.0008.0041][.0000.003C.0002.0302] # LATIN CAPITAL LETTER A WITH CIRCUMFLEX; QQCM +00E5 ; [.0A15.0020.0002.0061][.0000.0043.0002.030A] # LATIN SMALL LETTER A WITH RING ABOVE; QQCM +00C5 ; [.0A15.0020.0008.0041][.0000.0043.0002.030A] # LATIN CAPITAL LETTER A WITH RING ABOVE; QQCM +00E4 ; [.0A15.0020.0002.0061][.0000.0047.0002.0308] # LATIN SMALL LETTER A WITH DIAERESIS; QQCM +00C4 ; [.0A15.0020.0008.0041][.0000.0047.0002.0308] # LATIN CAPITAL LETTER A WITH DIAERESIS; QQCM +00E3 ; [.0A15.0020.0002.0061][.0000.004E.0002.0303] # LATIN SMALL LETTER A WITH TILDE; QQCM +00C3 ; [.0A15.0020.0008.0041][.0000.004E.0002.0303] # LATIN CAPITAL LETTER A WITH TILDE; QQCM +00E6 ; [.0A19.0020.0002.00E6] # LATIN SMALL LETTER AE +00C6 ; [.0A19.0020.0008.00C6] # LATIN CAPITAL LETTER AE +0062 ; [.0A29.0020.0002.0062] # LATIN SMALL LETTER B +0042 ; [.0A29.0020.0008.0042] # LATIN CAPITAL LETTER B +0063 ; [.0A3D.0020.0002.0063] # LATIN SMALL LETTER C +0043 ; [.0A3D.0020.0008.0043] # LATIN CAPITAL LETTER C +00E7 ; [.0A3D.0020.0002.0063][.0000.0055.0002.0327] # LATIN SMALL LETTER C WITH CEDILLA; QQCM +00C7 ; [.0A3D.0020.0008.0043][.0000.0055.0002.0327] # LATIN CAPITAL LETTER C WITH CEDILLA; QQCM +0064 ; [.0A49.0020.0002.0064] # LATIN SMALL LETTER D +0044 ; [.0A49.0020.0008.0044] # LATIN CAPITAL LETTER D +00F0 ; [.0A5D.0020.0002.00F0] # LATIN SMALL LETTER ETH +00D0 ; [.0A5D.0020.0008.00D0] # LATIN CAPITAL LETTER ETH +0065 ; [.0A65.0020.0002.0065] # LATIN SMALL LETTER E +0045 ; [.0A65.0020.0008.0045] # LATIN CAPITAL LETTER E +00E9 ; [.0A65.0020.0002.0065][.0000.0032.0002.0301] # LATIN SMALL LETTER E WITH ACUTE; QQCM +00C9 ; [.0A65.0020.0008.0045][.0000.0032.0002.0301] # LATIN CAPITAL LETTER E WITH ACUTE; QQCM +00E8 ; [.0A65.0020.0002.0065][.0000.0035.0002.0300] # LATIN SMALL LETTER E WITH GRAVE; QQCM +00C8 ; [.0A65.0020.0008.0045][.0000.0035.0002.0300] # LATIN CAPITAL LETTER E WITH GRAVE; QQCM +00EA ; [.0A65.0020.0002.0065][.0000.003C.0002.0302] # LATIN SMALL LETTER E WITH CIRCUMFLEX; QQCM +00CA ; [.0A65.0020.0008.0045][.0000.003C.0002.0302] # LATIN CAPITAL LETTER E WITH CIRCUMFLEX; QQCM +00EB ; [.0A65.0020.0002.0065][.0000.0047.0002.0308] # LATIN SMALL LETTER E WITH DIAERESIS; QQCM +00CB ; [.0A65.0020.0008.0045][.0000.0047.0002.0308] # LATIN CAPITAL LETTER E WITH DIAERESIS; QQCM +0066 ; [.0A91.0020.0002.0066] # LATIN SMALL LETTER F +0046 ; [.0A91.0020.0008.0046] # LATIN CAPITAL LETTER F +0067 ; [.0A99.0020.0002.0067] # LATIN SMALL LETTER G +0047 ; [.0A99.0020.0008.0047] # LATIN CAPITAL LETTER G +0068 ; [.0AB9.0020.0002.0068] # LATIN SMALL LETTER H +0048 ; [.0AB9.0020.0008.0048] # LATIN CAPITAL LETTER H +0069 ; [.0AD3.0020.0002.0069] # LATIN SMALL LETTER I +0049 ; [.0AD3.0020.0008.0049] # LATIN CAPITAL LETTER I +00ED ; [.0AD3.0020.0002.0069][.0000.0032.0002.0301] # LATIN SMALL LETTER I WITH ACUTE; QQCM +00CD ; [.0AD3.0020.0008.0049][.0000.0032.0002.0301] # LATIN CAPITAL LETTER I WITH ACUTE; QQCM +00EC ; [.0AD3.0020.0002.0069][.0000.0035.0002.0300] # LATIN SMALL LETTER I WITH GRAVE; QQCM +00CC ; [.0AD3.0020.0008.0049][.0000.0035.0002.0300] # LATIN CAPITAL LETTER I WITH GRAVE; QQCM +00EE ; [.0AD3.0020.0002.0069][.0000.003C.0002.0302] # LATIN SMALL LETTER I WITH CIRCUMFLEX; QQCM +00CE ; [.0AD3.0020.0008.0049][.0000.003C.0002.0302] # LATIN CAPITAL LETTER I WITH CIRCUMFLEX; QQCM +00EF ; [.0AD3.0020.0002.0069][.0000.0047.0002.0308] # LATIN SMALL LETTER I WITH DIAERESIS; QQCM +00CF ; [.0AD3.0020.0008.0049][.0000.0047.0002.0308] # LATIN CAPITAL LETTER I WITH DIAERESIS; QQCM +006A ; [.0AE7.0020.0002.006A] # LATIN SMALL LETTER J +004A ; [.0AE7.0020.0008.004A] # LATIN CAPITAL LETTER J +006B ; [.0AF7.0020.0002.006B] # LATIN SMALL LETTER K +004B ; [.0AF7.0020.0008.004B] # LATIN CAPITAL LETTER K +006C ; [.0B03.0020.0002.006C] # LATIN SMALL LETTER L +004C ; [.0B03.0020.0008.004C] # LATIN CAPITAL LETTER L +006D ; [.0B2B.0020.0002.006D] # LATIN SMALL LETTER M +004D ; [.0B2B.0020.0008.004D] # LATIN CAPITAL LETTER M +006E ; [.0B33.0020.0002.006E] # LATIN SMALL LETTER N +004E ; [.0B33.0020.0008.004E] # LATIN CAPITAL LETTER N +00F1 ; [.0B33.0020.0002.006E][.0000.004E.0002.0303] # LATIN SMALL LETTER N WITH TILDE; QQCM +00D1 ; [.0B33.0020.0008.004E][.0000.004E.0002.0303] # LATIN CAPITAL LETTER N WITH TILDE; QQCM +006F ; [.0B4B.0020.0002.006F] # LATIN SMALL LETTER O +004F ; [.0B4B.0020.0008.004F] # LATIN CAPITAL LETTER O +00BA ; [.0B4B.0020.0014.00BA] # MASCULINE ORDINAL INDICATOR; QQK +00F3 ; [.0B4B.0020.0002.006F][.0000.0032.0002.0301] # LATIN SMALL LETTER O WITH ACUTE; QQCM +00D3 ; [.0B4B.0020.0008.004F][.0000.0032.0002.0301] # LATIN CAPITAL LETTER O WITH ACUTE; QQCM +00F2 ; [.0B4B.0020.0002.006F][.0000.0035.0002.0300] # LATIN SMALL LETTER O WITH GRAVE; QQCM +00D2 ; [.0B4B.0020.0008.004F][.0000.0035.0002.0300] # LATIN CAPITAL LETTER O WITH GRAVE; QQCM +00F4 ; [.0B4B.0020.0002.006F][.0000.003C.0002.0302] # LATIN SMALL LETTER O WITH CIRCUMFLEX; QQCM +00D4 ; [.0B4B.0020.0008.004F][.0000.003C.0002.0302] # LATIN CAPITAL LETTER O WITH CIRCUMFLEX; QQCM +00F6 ; [.0B4B.0020.0002.006F][.0000.0047.0002.0308] # LATIN SMALL LETTER O WITH DIAERESIS; QQCM +00D6 ; [.0B4B.0020.0008.004F][.0000.0047.0002.0308] # LATIN CAPITAL LETTER O WITH DIAERESIS; QQCM +00F5 ; [.0B4B.0020.0002.006F][.0000.004E.0002.0303] # LATIN SMALL LETTER O WITH TILDE; QQCM +00D5 ; [.0B4B.0020.0008.004F][.0000.004E.0002.0303] # LATIN CAPITAL LETTER O WITH TILDE; QQCM +00F8 ; [.0B53.0020.0002.00F8] # LATIN SMALL LETTER O WITH STROKE +00D8 ; [.0B53.0020.0008.00D8] # LATIN CAPITAL LETTER O WITH STROKE +0070 ; [.0B67.0020.0002.0070] # LATIN SMALL LETTER P +0050 ; [.0B67.0020.0008.0050] # LATIN CAPITAL LETTER P +0071 ; [.0B73.0020.0002.0071] # LATIN SMALL LETTER Q +0051 ; [.0B73.0020.0008.0051] # LATIN CAPITAL LETTER Q +0072 ; [.0B7F.0020.0002.0072] # LATIN SMALL LETTER R +0052 ; [.0B7F.0020.0008.0052] # LATIN CAPITAL LETTER R +0073 ; [.0BA7.0020.0002.0073] # LATIN SMALL LETTER S +0053 ; [.0BA7.0020.0008.0053] # LATIN CAPITAL LETTER S +00DF ; [.0BA7.0020.0004.00DF][.0000.0153.0004.00DF][.0BA7.0020.001F.00DF] # LATIN SMALL LETTER SHARP S; QQKN +0074 ; [.0BBF.0020.0002.0074] # LATIN SMALL LETTER T +0054 ; [.0BBF.0020.0008.0054] # LATIN CAPITAL LETTER T +0075 ; [.0BD7.0020.0002.0075] # LATIN SMALL LETTER U +0055 ; [.0BD7.0020.0008.0055] # LATIN CAPITAL LETTER U +00FA ; [.0BD7.0020.0002.0075][.0000.0032.0002.0301] # LATIN SMALL LETTER U WITH ACUTE; QQCM +00DA ; [.0BD7.0020.0008.0055][.0000.0032.0002.0301] # LATIN CAPITAL LETTER U WITH ACUTE; QQCM +00F9 ; [.0BD7.0020.0002.0075][.0000.0035.0002.0300] # LATIN SMALL LETTER U WITH GRAVE; QQCM +00D9 ; [.0BD7.0020.0008.0055][.0000.0035.0002.0300] # LATIN CAPITAL LETTER U WITH GRAVE; QQCM +00FB ; [.0BD7.0020.0002.0075][.0000.003C.0002.0302] # LATIN SMALL LETTER U WITH CIRCUMFLEX; QQCM +00DB ; [.0BD7.0020.0008.0055][.0000.003C.0002.0302] # LATIN CAPITAL LETTER U WITH CIRCUMFLEX; QQCM +00FC ; [.0BD7.0020.0002.0075][.0000.0047.0002.0308] # LATIN SMALL LETTER U WITH DIAERESIS; QQCM +00DC ; [.0BD7.0020.0008.0055][.0000.0047.0002.0308] # LATIN CAPITAL LETTER U WITH DIAERESIS; QQCM +0076 ; [.0BEF.0020.0002.0076] # LATIN SMALL LETTER V +0056 ; [.0BEF.0020.0008.0056] # LATIN CAPITAL LETTER V +0077 ; [.0BFB.0020.0002.0077] # LATIN SMALL LETTER W +0057 ; [.0BFB.0020.0008.0057] # LATIN CAPITAL LETTER W +0078 ; [.0C03.0020.0002.0078] # LATIN SMALL LETTER X +0058 ; [.0C03.0020.0008.0058] # LATIN CAPITAL LETTER X +0079 ; [.0C07.0020.0002.0079] # LATIN SMALL LETTER Y +0059 ; [.0C07.0020.0008.0059] # LATIN CAPITAL LETTER Y +00FD ; [.0C07.0020.0002.0079][.0000.0032.0002.0301] # LATIN SMALL LETTER Y WITH ACUTE; QQCM +00DD ; [.0C07.0020.0008.0059][.0000.0032.0002.0301] # LATIN CAPITAL LETTER Y WITH ACUTE; QQCM +00FF ; [.0C07.0020.0002.0079][.0000.0047.0002.0308] # LATIN SMALL LETTER Y WITH DIAERESIS; QQCM +007A ; [.0C13.0020.0002.007A] # LATIN SMALL LETTER Z +005A ; [.0C13.0020.0008.005A] # LATIN CAPITAL LETTER Z +00FE ; [.0C3B.0020.0002.00FE] # LATIN SMALL LETTER THORN +00DE ; [.0C3B.0020.0008.00DE] # LATIN CAPITAL LETTER THORN +00B5 ; [.0C9F.0020.0004.00B5] # MICRO SIGN; QQK +1100 ; [.1831.0020.0002.1100] # HANGUL CHOSEONG KIYEOK +1101 ; [.1832.0020.0002.1101] # HANGUL CHOSEONG SSANGKIYEOK +1102 ; [.1833.0020.0002.1102] # HANGUL CHOSEONG NIEUN +1103 ; [.1834.0020.0002.1103] # HANGUL CHOSEONG TIKEUT +1104 ; [.1835.0020.0002.1104] # HANGUL CHOSEONG SSANGTIKEUT +1105 ; [.1836.0020.0002.1105] # HANGUL CHOSEONG RIEUL +1106 ; [.1837.0020.0002.1106] # HANGUL CHOSEONG MIEUM +1107 ; [.1838.0020.0002.1107] # HANGUL CHOSEONG PIEUP +1108 ; [.1839.0020.0002.1108] # HANGUL CHOSEONG SSANGPIEUP +1109 ; [.183A.0020.0002.1109] # HANGUL CHOSEONG SIOS +110A ; [.183B.0020.0002.110A] # HANGUL CHOSEONG SSANGSIOS +110B ; [.183C.0020.0002.110B] # HANGUL CHOSEONG IEUNG +110C ; [.183D.0020.0002.110C] # HANGUL CHOSEONG CIEUC +110D ; [.183E.0020.0002.110D] # HANGUL CHOSEONG SSANGCIEUC +110E ; [.183F.0020.0002.110E] # HANGUL CHOSEONG CHIEUCH +110F ; [.1840.0020.0002.110F] # HANGUL CHOSEONG KHIEUKH +1110 ; [.1841.0020.0002.1110] # HANGUL CHOSEONG THIEUTH +1111 ; [.1842.0020.0002.1111] # HANGUL CHOSEONG PHIEUPH +1112 ; [.1843.0020.0002.1112] # HANGUL CHOSEONG HIEUH +1113 ; [.1844.0020.0002.1113] # HANGUL CHOSEONG NIEUN-KIYEOK +1114 ; [.1845.0020.0002.1114] # HANGUL CHOSEONG SSANGNIEUN +1115 ; [.1846.0020.0002.1115] # HANGUL CHOSEONG NIEUN-TIKEUT +1116 ; [.1847.0020.0002.1116] # HANGUL CHOSEONG NIEUN-PIEUP +1117 ; [.1848.0020.0002.1117] # HANGUL CHOSEONG TIKEUT-KIYEOK +1118 ; [.1849.0020.0002.1118] # HANGUL CHOSEONG RIEUL-NIEUN +1119 ; [.184A.0020.0002.1119] # HANGUL CHOSEONG SSANGRIEUL +111A ; [.184B.0020.0002.111A] # HANGUL CHOSEONG RIEUL-HIEUH +111B ; [.184C.0020.0002.111B] # HANGUL CHOSEONG KAPYEOUNRIEUL +111C ; [.184D.0020.0002.111C] # HANGUL CHOSEONG MIEUM-PIEUP +111D ; [.184E.0020.0002.111D] # HANGUL CHOSEONG KAPYEOUNMIEUM +111E ; [.184F.0020.0002.111E] # HANGUL CHOSEONG PIEUP-KIYEOK +111F ; [.1850.0020.0002.111F] # HANGUL CHOSEONG PIEUP-NIEUN +1120 ; [.1851.0020.0002.1120] # HANGUL CHOSEONG PIEUP-TIKEUT +1121 ; [.1852.0020.0002.1121] # HANGUL CHOSEONG PIEUP-SIOS +1122 ; [.1853.0020.0002.1122] # HANGUL CHOSEONG PIEUP-SIOS-KIYEOK +1123 ; [.1854.0020.0002.1123] # HANGUL CHOSEONG PIEUP-SIOS-TIKEUT +1124 ; [.1855.0020.0002.1124] # HANGUL CHOSEONG PIEUP-SIOS-PIEUP +1125 ; [.1856.0020.0002.1125] # HANGUL CHOSEONG PIEUP-SSANGSIOS +1126 ; [.1857.0020.0002.1126] # HANGUL CHOSEONG PIEUP-SIOS-CIEUC +1127 ; [.1858.0020.0002.1127] # HANGUL CHOSEONG PIEUP-CIEUC +1128 ; [.1859.0020.0002.1128] # HANGUL CHOSEONG PIEUP-CHIEUCH +1129 ; [.185A.0020.0002.1129] # HANGUL CHOSEONG PIEUP-THIEUTH +112A ; [.185B.0020.0002.112A] # HANGUL CHOSEONG PIEUP-PHIEUPH +112B ; [.185C.0020.0002.112B] # HANGUL CHOSEONG KAPYEOUNPIEUP +112C ; [.185D.0020.0002.112C] # HANGUL CHOSEONG KAPYEOUNSSANGPIEUP +112D ; [.185E.0020.0002.112D] # HANGUL CHOSEONG SIOS-KIYEOK +112E ; [.185F.0020.0002.112E] # HANGUL CHOSEONG SIOS-NIEUN +112F ; [.1860.0020.0002.112F] # HANGUL CHOSEONG SIOS-TIKEUT +1130 ; [.1861.0020.0002.1130] # HANGUL CHOSEONG SIOS-RIEUL +1131 ; [.1862.0020.0002.1131] # HANGUL CHOSEONG SIOS-MIEUM +1132 ; [.1863.0020.0002.1132] # HANGUL CHOSEONG SIOS-PIEUP +1133 ; [.1864.0020.0002.1133] # HANGUL CHOSEONG SIOS-PIEUP-KIYEOK +1134 ; [.1865.0020.0002.1134] # HANGUL CHOSEONG SIOS-SSANGSIOS +1135 ; [.1866.0020.0002.1135] # HANGUL CHOSEONG SIOS-IEUNG +1136 ; [.1867.0020.0002.1136] # HANGUL CHOSEONG SIOS-CIEUC +1137 ; [.1868.0020.0002.1137] # HANGUL CHOSEONG SIOS-CHIEUCH +1138 ; [.1869.0020.0002.1138] # HANGUL CHOSEONG SIOS-KHIEUKH +1139 ; [.186A.0020.0002.1139] # HANGUL CHOSEONG SIOS-THIEUTH +113A ; [.186B.0020.0002.113A] # HANGUL CHOSEONG SIOS-PHIEUPH +113B ; [.186C.0020.0002.113B] # HANGUL CHOSEONG SIOS-HIEUH +113C ; [.186D.0020.0002.113C] # HANGUL CHOSEONG CHITUEUMSIOS +113D ; [.186E.0020.0002.113D] # HANGUL CHOSEONG CHITUEUMSSANGSIOS +113E ; [.186F.0020.0002.113E] # HANGUL CHOSEONG CEONGCHIEUMSIOS +113F ; [.1870.0020.0002.113F] # HANGUL CHOSEONG CEONGCHIEUMSSANGSIOS +1140 ; [.1871.0020.0002.1140] # HANGUL CHOSEONG PANSIOS +1141 ; [.1872.0020.0002.1141] # HANGUL CHOSEONG IEUNG-KIYEOK +1142 ; [.1873.0020.0002.1142] # HANGUL CHOSEONG IEUNG-TIKEUT +1143 ; [.1874.0020.0002.1143] # HANGUL CHOSEONG IEUNG-MIEUM +1144 ; [.1875.0020.0002.1144] # HANGUL CHOSEONG IEUNG-PIEUP +1145 ; [.1876.0020.0002.1145] # HANGUL CHOSEONG IEUNG-SIOS +1146 ; [.1877.0020.0002.1146] # HANGUL CHOSEONG IEUNG-PANSIOS +1147 ; [.1878.0020.0002.1147] # HANGUL CHOSEONG SSANGIEUNG +1148 ; [.1879.0020.0002.1148] # HANGUL CHOSEONG IEUNG-CIEUC +1149 ; [.187A.0020.0002.1149] # HANGUL CHOSEONG IEUNG-CHIEUCH +114A ; [.187B.0020.0002.114A] # HANGUL CHOSEONG IEUNG-THIEUTH +114B ; [.187C.0020.0002.114B] # HANGUL CHOSEONG IEUNG-PHIEUPH +114C ; [.187D.0020.0002.114C] # HANGUL CHOSEONG YESIEUNG +114D ; [.187E.0020.0002.114D] # HANGUL CHOSEONG CIEUC-IEUNG +114E ; [.187F.0020.0002.114E] # HANGUL CHOSEONG CHITUEUMCIEUC +114F ; [.1880.0020.0002.114F] # HANGUL CHOSEONG CHITUEUMSSANGCIEUC +1150 ; [.1881.0020.0002.1150] # HANGUL CHOSEONG CEONGCHIEUMCIEUC +1151 ; [.1882.0020.0002.1151] # HANGUL CHOSEONG CEONGCHIEUMSSANGCIEUC +1152 ; [.1883.0020.0002.1152] # HANGUL CHOSEONG CHIEUCH-KHIEUKH +1153 ; [.1884.0020.0002.1153] # HANGUL CHOSEONG CHIEUCH-HIEUH +1154 ; [.1885.0020.0002.1154] # HANGUL CHOSEONG CHITUEUMCHIEUCH +1155 ; [.1886.0020.0002.1155] # HANGUL CHOSEONG CEONGCHIEUMCHIEUCH +1156 ; [.1887.0020.0002.1156] # HANGUL CHOSEONG PHIEUPH-PIEUP +1157 ; [.1888.0020.0002.1157] # HANGUL CHOSEONG KAPYEOUNPHIEUPH +1158 ; [.1889.0020.0002.1158] # HANGUL CHOSEONG SSANGHIEUH +1159 ; [.188A.0020.0002.1159] # HANGUL CHOSEONG YEORINHIEUH +115F ; [.188B.0020.0002.115F] # HANGUL CHOSEONG FILLER +1160 ; [.188C.0020.0002.1160] # HANGUL JUNGSEONG FILLER +1161 ; [.188D.0020.0002.1161] # HANGUL JUNGSEONG A +1162 ; [.188E.0020.0002.1162] # HANGUL JUNGSEONG AE +1163 ; [.188F.0020.0002.1163] # HANGUL JUNGSEONG YA +1164 ; [.1890.0020.0002.1164] # HANGUL JUNGSEONG YAE +1165 ; [.1891.0020.0002.1165] # HANGUL JUNGSEONG EO +1166 ; [.1892.0020.0002.1166] # HANGUL JUNGSEONG E +1167 ; [.1893.0020.0002.1167] # HANGUL JUNGSEONG YEO +1168 ; [.1894.0020.0002.1168] # HANGUL JUNGSEONG YE +1169 ; [.1895.0020.0002.1169] # HANGUL JUNGSEONG O +116A ; [.1896.0020.0002.116A] # HANGUL JUNGSEONG WA +116B ; [.1897.0020.0002.116B] # HANGUL JUNGSEONG WAE +116C ; [.1898.0020.0002.116C] # HANGUL JUNGSEONG OE +116D ; [.1899.0020.0002.116D] # HANGUL JUNGSEONG YO +116E ; [.189A.0020.0002.116E] # HANGUL JUNGSEONG U +116F ; [.189B.0020.0002.116F] # HANGUL JUNGSEONG WEO +1170 ; [.189C.0020.0002.1170] # HANGUL JUNGSEONG WE +1171 ; [.189D.0020.0002.1171] # HANGUL JUNGSEONG WI +1172 ; [.189E.0020.0002.1172] # HANGUL JUNGSEONG YU +1173 ; [.189F.0020.0002.1173] # HANGUL JUNGSEONG EU +1174 ; [.18A0.0020.0002.1174] # HANGUL JUNGSEONG YI +1175 ; [.18A1.0020.0002.1175] # HANGUL JUNGSEONG I +1176 ; [.18A2.0020.0002.1176] # HANGUL JUNGSEONG A-O +1177 ; [.18A3.0020.0002.1177] # HANGUL JUNGSEONG A-U +1178 ; [.18A4.0020.0002.1178] # HANGUL JUNGSEONG YA-O +1179 ; [.18A5.0020.0002.1179] # HANGUL JUNGSEONG YA-YO +117A ; [.18A6.0020.0002.117A] # HANGUL JUNGSEONG EO-O +117B ; [.18A7.0020.0002.117B] # HANGUL JUNGSEONG EO-U +117C ; [.18A8.0020.0002.117C] # HANGUL JUNGSEONG EO-EU +117D ; [.18A9.0020.0002.117D] # HANGUL JUNGSEONG YEO-O +117E ; [.18AA.0020.0002.117E] # HANGUL JUNGSEONG YEO-U +117F ; [.18AB.0020.0002.117F] # HANGUL JUNGSEONG O-EO +1180 ; [.18AC.0020.0002.1180] # HANGUL JUNGSEONG O-E +1181 ; [.18AD.0020.0002.1181] # HANGUL JUNGSEONG O-YE +1182 ; [.18AE.0020.0002.1182] # HANGUL JUNGSEONG O-O +1183 ; [.18AF.0020.0002.1183] # HANGUL JUNGSEONG O-U +1184 ; [.18B0.0020.0002.1184] # HANGUL JUNGSEONG YO-YA +1185 ; [.18B1.0020.0002.1185] # HANGUL JUNGSEONG YO-YAE +1186 ; [.18B2.0020.0002.1186] # HANGUL JUNGSEONG YO-YEO +1187 ; [.18B3.0020.0002.1187] # HANGUL JUNGSEONG YO-O +1188 ; [.18B4.0020.0002.1188] # HANGUL JUNGSEONG YO-I +1189 ; [.18B5.0020.0002.1189] # HANGUL JUNGSEONG U-A +118A ; [.18B6.0020.0002.118A] # HANGUL JUNGSEONG U-AE +118B ; [.18B7.0020.0002.118B] # HANGUL JUNGSEONG U-EO-EU +118C ; [.18B8.0020.0002.118C] # HANGUL JUNGSEONG U-YE +118D ; [.18B9.0020.0002.118D] # HANGUL JUNGSEONG U-U +118E ; [.18BA.0020.0002.118E] # HANGUL JUNGSEONG YU-A +118F ; [.18BB.0020.0002.118F] # HANGUL JUNGSEONG YU-EO +1190 ; [.18BC.0020.0002.1190] # HANGUL JUNGSEONG YU-E +1191 ; [.18BD.0020.0002.1191] # HANGUL JUNGSEONG YU-YEO +1192 ; [.18BE.0020.0002.1192] # HANGUL JUNGSEONG YU-YE +1193 ; [.18BF.0020.0002.1193] # HANGUL JUNGSEONG YU-U +1194 ; [.18C0.0020.0002.1194] # HANGUL JUNGSEONG YU-I +1195 ; [.18C1.0020.0002.1195] # HANGUL JUNGSEONG EU-U +1196 ; [.18C2.0020.0002.1196] # HANGUL JUNGSEONG EU-EU +1197 ; [.18C3.0020.0002.1197] # HANGUL JUNGSEONG YI-U +1198 ; [.18C4.0020.0002.1198] # HANGUL JUNGSEONG I-A +1199 ; [.18C5.0020.0002.1199] # HANGUL JUNGSEONG I-YA +119A ; [.18C6.0020.0002.119A] # HANGUL JUNGSEONG I-O +119B ; [.18C7.0020.0002.119B] # HANGUL JUNGSEONG I-U +119C ; [.18C8.0020.0002.119C] # HANGUL JUNGSEONG I-EU +119D ; [.18C9.0020.0002.119D] # HANGUL JUNGSEONG I-ARAEA +119E ; [.18CA.0020.0002.119E] # HANGUL JUNGSEONG ARAEA +119F ; [.18CB.0020.0002.119F] # HANGUL JUNGSEONG ARAEA-EO +11A0 ; [.18CC.0020.0002.11A0] # HANGUL JUNGSEONG ARAEA-U +11A1 ; [.18CD.0020.0002.11A1] # HANGUL JUNGSEONG ARAEA-I +11A2 ; [.18CE.0020.0002.11A2] # HANGUL JUNGSEONG SSANGARAEA +11A8 ; [.18CF.0020.0002.11A8] # HANGUL JONGSEONG KIYEOK +11A9 ; [.18D0.0020.0002.11A9] # HANGUL JONGSEONG SSANGKIYEOK +11AA ; [.18D1.0020.0002.11AA] # HANGUL JONGSEONG KIYEOK-SIOS +11AB ; [.18D2.0020.0002.11AB] # HANGUL JONGSEONG NIEUN +11AC ; [.18D3.0020.0002.11AC] # HANGUL JONGSEONG NIEUN-CIEUC +11AD ; [.18D4.0020.0002.11AD] # HANGUL JONGSEONG NIEUN-HIEUH +11AE ; [.18D5.0020.0002.11AE] # HANGUL JONGSEONG TIKEUT +11AF ; [.18D6.0020.0002.11AF] # HANGUL JONGSEONG RIEUL +11B0 ; [.18D7.0020.0002.11B0] # HANGUL JONGSEONG RIEUL-KIYEOK +11B1 ; [.18D8.0020.0002.11B1] # HANGUL JONGSEONG RIEUL-MIEUM +11B2 ; [.18D9.0020.0002.11B2] # HANGUL JONGSEONG RIEUL-PIEUP +11B3 ; [.18DA.0020.0002.11B3] # HANGUL JONGSEONG RIEUL-SIOS +11B4 ; [.18DB.0020.0002.11B4] # HANGUL JONGSEONG RIEUL-THIEUTH +11B5 ; [.18DC.0020.0002.11B5] # HANGUL JONGSEONG RIEUL-PHIEUPH +11B6 ; [.18DD.0020.0002.11B6] # HANGUL JONGSEONG RIEUL-HIEUH +11B7 ; [.18DE.0020.0002.11B7] # HANGUL JONGSEONG MIEUM +11B8 ; [.18DF.0020.0002.11B8] # HANGUL JONGSEONG PIEUP +11B9 ; [.18E0.0020.0002.11B9] # HANGUL JONGSEONG PIEUP-SIOS +11BA ; [.18E1.0020.0002.11BA] # HANGUL JONGSEONG SIOS +11BB ; [.18E2.0020.0002.11BB] # HANGUL JONGSEONG SSANGSIOS +11BC ; [.18E3.0020.0002.11BC] # HANGUL JONGSEONG IEUNG +11BD ; [.18E4.0020.0002.11BD] # HANGUL JONGSEONG CIEUC +11BE ; [.18E5.0020.0002.11BE] # HANGUL JONGSEONG CHIEUCH +11BF ; [.18E6.0020.0002.11BF] # HANGUL JONGSEONG KHIEUKH +11C0 ; [.18E7.0020.0002.11C0] # HANGUL JONGSEONG THIEUTH +11C1 ; [.18E8.0020.0002.11C1] # HANGUL JONGSEONG PHIEUPH +11C2 ; [.18E9.0020.0002.11C2] # HANGUL JONGSEONG HIEUH +11C3 ; [.18EA.0020.0002.11C3] # HANGUL JONGSEONG KIYEOK-RIEUL +11C4 ; [.18EB.0020.0002.11C4] # HANGUL JONGSEONG KIYEOK-SIOS-KIYEOK +11C5 ; [.18EC.0020.0002.11C5] # HANGUL JONGSEONG NIEUN-KIYEOK +11C6 ; [.18ED.0020.0002.11C6] # HANGUL JONGSEONG NIEUN-TIKEUT +11C7 ; [.18EE.0020.0002.11C7] # HANGUL JONGSEONG NIEUN-SIOS +11C8 ; [.18EF.0020.0002.11C8] # HANGUL JONGSEONG NIEUN-PANSIOS +11C9 ; [.18F0.0020.0002.11C9] # HANGUL JONGSEONG NIEUN-THIEUTH +11CA ; [.18F1.0020.0002.11CA] # HANGUL JONGSEONG TIKEUT-KIYEOK +11CB ; [.18F2.0020.0002.11CB] # HANGUL JONGSEONG TIKEUT-RIEUL +11CC ; [.18F3.0020.0002.11CC] # HANGUL JONGSEONG RIEUL-KIYEOK-SIOS +11CD ; [.18F4.0020.0002.11CD] # HANGUL JONGSEONG RIEUL-NIEUN +11CE ; [.18F5.0020.0002.11CE] # HANGUL JONGSEONG RIEUL-TIKEUT +11CF ; [.18F6.0020.0002.11CF] # HANGUL JONGSEONG RIEUL-TIKEUT-HIEUH +11D0 ; [.18F7.0020.0002.11D0] # HANGUL JONGSEONG SSANGRIEUL +11D1 ; [.18F8.0020.0002.11D1] # HANGUL JONGSEONG RIEUL-MIEUM-KIYEOK +11D2 ; [.18F9.0020.0002.11D2] # HANGUL JONGSEONG RIEUL-MIEUM-SIOS +11D3 ; [.18FA.0020.0002.11D3] # HANGUL JONGSEONG RIEUL-PIEUP-SIOS +11D4 ; [.18FB.0020.0002.11D4] # HANGUL JONGSEONG RIEUL-PIEUP-HIEUH +11D5 ; [.18FC.0020.0002.11D5] # HANGUL JONGSEONG RIEUL-KAPYEOUNPIEUP +11D6 ; [.18FD.0020.0002.11D6] # HANGUL JONGSEONG RIEUL-SSANGSIOS +11D7 ; [.18FE.0020.0002.11D7] # HANGUL JONGSEONG RIEUL-PANSIOS +11D8 ; [.18FF.0020.0002.11D8] # HANGUL JONGSEONG RIEUL-KHIEUKH +11D9 ; [.1900.0020.0002.11D9] # HANGUL JONGSEONG RIEUL-YEORINHIEUH +11DA ; [.1901.0020.0002.11DA] # HANGUL JONGSEONG MIEUM-KIYEOK +11DB ; [.1902.0020.0002.11DB] # HANGUL JONGSEONG MIEUM-RIEUL +11DC ; [.1903.0020.0002.11DC] # HANGUL JONGSEONG MIEUM-PIEUP +11DD ; [.1904.0020.0002.11DD] # HANGUL JONGSEONG MIEUM-SIOS +11DE ; [.1905.0020.0002.11DE] # HANGUL JONGSEONG MIEUM-SSANGSIOS +11DF ; [.1906.0020.0002.11DF] # HANGUL JONGSEONG MIEUM-PANSIOS +11E0 ; [.1907.0020.0002.11E0] # HANGUL JONGSEONG MIEUM-CHIEUCH +11E1 ; [.1908.0020.0002.11E1] # HANGUL JONGSEONG MIEUM-HIEUH +11E2 ; [.1909.0020.0002.11E2] # HANGUL JONGSEONG KAPYEOUNMIEUM +11E3 ; [.190A.0020.0002.11E3] # HANGUL JONGSEONG PIEUP-RIEUL +11E4 ; [.190B.0020.0002.11E4] # HANGUL JONGSEONG PIEUP-PHIEUPH +11E5 ; [.190C.0020.0002.11E5] # HANGUL JONGSEONG PIEUP-HIEUH +11E6 ; [.190D.0020.0002.11E6] # HANGUL JONGSEONG KAPYEOUNPIEUP +11E7 ; [.190E.0020.0002.11E7] # HANGUL JONGSEONG SIOS-KIYEOK +11E8 ; [.190F.0020.0002.11E8] # HANGUL JONGSEONG SIOS-TIKEUT +11E9 ; [.1910.0020.0002.11E9] # HANGUL JONGSEONG SIOS-RIEUL +11EA ; [.1911.0020.0002.11EA] # HANGUL JONGSEONG SIOS-PIEUP +11EB ; [.1912.0020.0002.11EB] # HANGUL JONGSEONG PANSIOS +11EC ; [.1913.0020.0002.11EC] # HANGUL JONGSEONG IEUNG-KIYEOK +11ED ; [.1914.0020.0002.11ED] # HANGUL JONGSEONG IEUNG-SSANGKIYEOK +11EE ; [.1915.0020.0002.11EE] # HANGUL JONGSEONG SSANGIEUNG +11EF ; [.1916.0020.0002.11EF] # HANGUL JONGSEONG IEUNG-KHIEUKH +11F0 ; [.1917.0020.0002.11F0] # HANGUL JONGSEONG YESIEUNG +11F1 ; [.1918.0020.0002.11F1] # HANGUL JONGSEONG YESIEUNG-SIOS +11F2 ; [.1919.0020.0002.11F2] # HANGUL JONGSEONG YESIEUNG-PANSIOS +11F3 ; [.191A.0020.0002.11F3] # HANGUL JONGSEONG PHIEUPH-PIEUP +11F4 ; [.191B.0020.0002.11F4] # HANGUL JONGSEONG KAPYEOUNPHIEUPH +11F5 ; [.191C.0020.0002.11F5] # HANGUL JONGSEONG HIEUH-NIEUN +11F6 ; [.191D.0020.0002.11F6] # HANGUL JONGSEONG HIEUH-RIEUL +11F7 ; [.191E.0020.0002.11F7] # HANGUL JONGSEONG HIEUH-MIEUM +11F8 ; [.191F.0020.0002.11F8] # HANGUL JONGSEONG HIEUH-PIEUP +11F9 ; [.1920.0020.0002.11F9] # HANGUL JONGSEONG YEORINHIEUH +3041 ; [.1921.0020.000D.3041] # HIRAGANA LETTER SMALL A +3042 ; [.1921.0020.000E.3042] # HIRAGANA LETTER A +30A1 ; [.1921.0020.000F.30A1] # KATAKANA LETTER SMALL A +30A2 ; [.1921.0020.0011.30A2] # KATAKANA LETTER A +3043 ; [.1922.0020.000D.3043] # HIRAGANA LETTER SMALL I +3044 ; [.1922.0020.000E.3044] # HIRAGANA LETTER I +30A3 ; [.1922.0020.000F.30A3] # KATAKANA LETTER SMALL I +30A4 ; [.1922.0020.0011.30A4] # KATAKANA LETTER I +3045 ; [.1923.0020.000D.3045] # HIRAGANA LETTER SMALL U +3046 ; [.1923.0020.000E.3046] # HIRAGANA LETTER U +30A5 ; [.1923.0020.000F.30A5] # KATAKANA LETTER SMALL U +30A6 ; [.1923.0020.0011.30A6] # KATAKANA LETTER U +3094 ; [.1923.0020.000E.3046][.0000.013D.0002.3099] # HIRAGANA LETTER VU; QQCM +30F4 ; [.1923.0020.0011.30A6][.0000.013D.0002.3099] # KATAKANA LETTER VU; QQCM +3047 ; [.1924.0020.000D.3047] # HIRAGANA LETTER SMALL E +3048 ; [.1924.0020.000E.3048] # HIRAGANA LETTER E +30A7 ; [.1924.0020.000F.30A7] # KATAKANA LETTER SMALL E +30A8 ; [.1924.0020.0011.30A8] # KATAKANA LETTER E +3049 ; [.1925.0020.000D.3049] # HIRAGANA LETTER SMALL O +304A ; [.1925.0020.000E.304A] # HIRAGANA LETTER O +30A9 ; [.1925.0020.000F.30A9] # KATAKANA LETTER SMALL O +30AA ; [.1925.0020.0011.30AA] # KATAKANA LETTER O +304B ; [.1926.0020.000E.304B] # HIRAGANA LETTER KA +30F5 ; [.1926.0020.000F.30F5] # KATAKANA LETTER SMALL KA +30AB ; [.1926.0020.0011.30AB] # KATAKANA LETTER KA +304C ; [.1926.0020.000E.304B][.0000.013D.0002.3099] # HIRAGANA LETTER GA; QQCM +30AC ; [.1926.0020.0011.30AB][.0000.013D.0002.3099] # KATAKANA LETTER GA; QQCM +304D ; [.1927.0020.000E.304D] # HIRAGANA LETTER KI +30AD ; [.1927.0020.0011.30AD] # KATAKANA LETTER KI +304E ; [.1927.0020.000E.304D][.0000.013D.0002.3099] # HIRAGANA LETTER GI; QQCM +30AE ; [.1927.0020.0011.30AD][.0000.013D.0002.3099] # KATAKANA LETTER GI; QQCM +304F ; [.1928.0020.000E.304F] # HIRAGANA LETTER KU +30AF ; [.1928.0020.0011.30AF] # KATAKANA LETTER KU +3050 ; [.1928.0020.000E.304F][.0000.013D.0002.3099] # HIRAGANA LETTER GU; QQCM +30B0 ; [.1928.0020.0011.30AF][.0000.013D.0002.3099] # KATAKANA LETTER GU; QQCM +3051 ; [.1929.0020.000E.3051] # HIRAGANA LETTER KE +30F6 ; [.1929.0020.000F.30F6] # KATAKANA LETTER SMALL KE +30B1 ; [.1929.0020.0011.30B1] # KATAKANA LETTER KE +3052 ; [.1929.0020.000E.3051][.0000.013D.0002.3099] # HIRAGANA LETTER GE; QQCM +30B2 ; [.1929.0020.0011.30B1][.0000.013D.0002.3099] # KATAKANA LETTER GE; QQCM +3053 ; [.192A.0020.000E.3053] # HIRAGANA LETTER KO +30B3 ; [.192A.0020.0011.30B3] # KATAKANA LETTER KO +3054 ; [.192A.0020.000E.3053][.0000.013D.0002.3099] # HIRAGANA LETTER GO; QQCM +30B4 ; [.192A.0020.0011.30B3][.0000.013D.0002.3099] # KATAKANA LETTER GO; QQCM +3055 ; [.192B.0020.000E.3055] # HIRAGANA LETTER SA +30B5 ; [.192B.0020.0011.30B5] # KATAKANA LETTER SA +3056 ; [.192B.0020.000E.3055][.0000.013D.0002.3099] # HIRAGANA LETTER ZA; QQCM +30B6 ; [.192B.0020.0011.30B5][.0000.013D.0002.3099] # KATAKANA LETTER ZA; QQCM +3057 ; [.192C.0020.000E.3057] # HIRAGANA LETTER SI +30B7 ; [.192C.0020.0011.30B7] # KATAKANA LETTER SI +3058 ; [.192C.0020.000E.3057][.0000.013D.0002.3099] # HIRAGANA LETTER ZI; QQCM +30B8 ; [.192C.0020.0011.30B7][.0000.013D.0002.3099] # KATAKANA LETTER ZI; QQCM +3059 ; [.192D.0020.000E.3059] # HIRAGANA LETTER SU +30B9 ; [.192D.0020.0011.30B9] # KATAKANA LETTER SU +305A ; [.192D.0020.000E.3059][.0000.013D.0002.3099] # HIRAGANA LETTER ZU; QQCM +30BA ; [.192D.0020.0011.30B9][.0000.013D.0002.3099] # KATAKANA LETTER ZU; QQCM +305B ; [.192E.0020.000E.305B] # HIRAGANA LETTER SE +30BB ; [.192E.0020.0011.30BB] # KATAKANA LETTER SE +305C ; [.192E.0020.000E.305B][.0000.013D.0002.3099] # HIRAGANA LETTER ZE; QQCM +30BC ; [.192E.0020.0011.30BB][.0000.013D.0002.3099] # KATAKANA LETTER ZE; QQCM +305D ; [.192F.0020.000E.305D] # HIRAGANA LETTER SO +30BD ; [.192F.0020.0011.30BD] # KATAKANA LETTER SO +305E ; [.192F.0020.000E.305D][.0000.013D.0002.3099] # HIRAGANA LETTER ZO; QQCM +30BE ; [.192F.0020.0011.30BD][.0000.013D.0002.3099] # KATAKANA LETTER ZO; QQCM +305F ; [.1930.0020.000E.305F] # HIRAGANA LETTER TA +30BF ; [.1930.0020.0011.30BF] # KATAKANA LETTER TA +3060 ; [.1930.0020.000E.305F][.0000.013D.0002.3099] # HIRAGANA LETTER DA; QQCM +30C0 ; [.1930.0020.0011.30BF][.0000.013D.0002.3099] # KATAKANA LETTER DA; QQCM +3061 ; [.1931.0020.000E.3061] # HIRAGANA LETTER TI +30C1 ; [.1931.0020.0011.30C1] # KATAKANA LETTER TI +3062 ; [.1931.0020.000E.3061][.0000.013D.0002.3099] # HIRAGANA LETTER DI; QQCM +30C2 ; [.1931.0020.0011.30C1][.0000.013D.0002.3099] # KATAKANA LETTER DI; QQCM +3063 ; [.1932.0020.000D.3063] # HIRAGANA LETTER SMALL TU +3064 ; [.1932.0020.000E.3064] # HIRAGANA LETTER TU +30C3 ; [.1932.0020.000F.30C3] # KATAKANA LETTER SMALL TU +30C4 ; [.1932.0020.0011.30C4] # KATAKANA LETTER TU +3065 ; [.1932.0020.000E.3064][.0000.013D.0002.3099] # HIRAGANA LETTER DU; QQCM +30C5 ; [.1932.0020.0011.30C4][.0000.013D.0002.3099] # KATAKANA LETTER DU; QQCM +3066 ; [.1933.0020.000E.3066] # HIRAGANA LETTER TE +30C6 ; [.1933.0020.0011.30C6] # KATAKANA LETTER TE +3067 ; [.1933.0020.000E.3066][.0000.013D.0002.3099] # HIRAGANA LETTER DE; QQCM +30C7 ; [.1933.0020.0011.30C6][.0000.013D.0002.3099] # KATAKANA LETTER DE; QQCM +3068 ; [.1934.0020.000E.3068] # HIRAGANA LETTER TO +30C8 ; [.1934.0020.0011.30C8] # KATAKANA LETTER TO +3069 ; [.1934.0020.000E.3068][.0000.013D.0002.3099] # HIRAGANA LETTER DO; QQCM +30C9 ; [.1934.0020.0011.30C8][.0000.013D.0002.3099] # KATAKANA LETTER DO; QQCM +306A ; [.1935.0020.000E.306A] # HIRAGANA LETTER NA +30CA ; [.1935.0020.0011.30CA] # KATAKANA LETTER NA +306B ; [.1936.0020.000E.306B] # HIRAGANA LETTER NI +30CB ; [.1936.0020.0011.30CB] # KATAKANA LETTER NI +306C ; [.1937.0020.000E.306C] # HIRAGANA LETTER NU +30CC ; [.1937.0020.0011.30CC] # KATAKANA LETTER NU +306D ; [.1938.0020.000E.306D] # HIRAGANA LETTER NE +30CD ; [.1938.0020.0011.30CD] # KATAKANA LETTER NE +306E ; [.1939.0020.000E.306E] # HIRAGANA LETTER NO +30CE ; [.1939.0020.0011.30CE] # KATAKANA LETTER NO +306F ; [.193A.0020.000E.306F] # HIRAGANA LETTER HA +30CF ; [.193A.0020.0011.30CF] # KATAKANA LETTER HA +3070 ; [.193A.0020.000E.306F][.0000.013D.0002.3099] # HIRAGANA LETTER BA; QQCM +30D0 ; [.193A.0020.0011.30CF][.0000.013D.0002.3099] # KATAKANA LETTER BA; QQCM +3071 ; [.193A.0020.000E.306F][.0000.013E.0002.309A] # HIRAGANA LETTER PA; QQCM +30D1 ; [.193A.0020.0011.30CF][.0000.013E.0002.309A] # KATAKANA LETTER PA; QQCM +3072 ; [.193B.0020.000E.3072] # HIRAGANA LETTER HI +30D2 ; [.193B.0020.0011.30D2] # KATAKANA LETTER HI +3073 ; [.193B.0020.000E.3072][.0000.013D.0002.3099] # HIRAGANA LETTER BI; QQCM +30D3 ; [.193B.0020.0011.30D2][.0000.013D.0002.3099] # KATAKANA LETTER BI; QQCM +3074 ; [.193B.0020.000E.3072][.0000.013E.0002.309A] # HIRAGANA LETTER PI; QQCM +30D4 ; [.193B.0020.0011.30D2][.0000.013E.0002.309A] # KATAKANA LETTER PI; QQCM +3075 ; [.193C.0020.000E.3075] # HIRAGANA LETTER HU +30D5 ; [.193C.0020.0011.30D5] # KATAKANA LETTER HU +3076 ; [.193C.0020.000E.3075][.0000.013D.0002.3099] # HIRAGANA LETTER BU; QQCM +30D6 ; [.193C.0020.0011.30D5][.0000.013D.0002.3099] # KATAKANA LETTER BU; QQCM +3077 ; [.193C.0020.000E.3075][.0000.013E.0002.309A] # HIRAGANA LETTER PU; QQCM +30D7 ; [.193C.0020.0011.30D5][.0000.013E.0002.309A] # KATAKANA LETTER PU; QQCM +3078 ; [.193D.0020.000E.3078] # HIRAGANA LETTER HE +30D8 ; [.193D.0020.0011.30D8] # KATAKANA LETTER HE +3079 ; [.193D.0020.000E.3078][.0000.013D.0002.3099] # HIRAGANA LETTER BE; QQCM +30D9 ; [.193D.0020.0011.30D8][.0000.013D.0002.3099] # KATAKANA LETTER BE; QQCM +307A ; [.193D.0020.000E.3078][.0000.013E.0002.309A] # HIRAGANA LETTER PE; QQCM +30DA ; [.193D.0020.0011.30D8][.0000.013E.0002.309A] # KATAKANA LETTER PE; QQCM +307B ; [.193E.0020.000E.307B] # HIRAGANA LETTER HO +30DB ; [.193E.0020.0011.30DB] # KATAKANA LETTER HO +307C ; [.193E.0020.000E.307B][.0000.013D.0002.3099] # HIRAGANA LETTER BO; QQCM +30DC ; [.193E.0020.0011.30DB][.0000.013D.0002.3099] # KATAKANA LETTER BO; QQCM +307D ; [.193E.0020.000E.307B][.0000.013E.0002.309A] # HIRAGANA LETTER PO; QQCM +30DD ; [.193E.0020.0011.30DB][.0000.013E.0002.309A] # KATAKANA LETTER PO; QQCM +307E ; [.193F.0020.000E.307E] # HIRAGANA LETTER MA +30DE ; [.193F.0020.0011.30DE] # KATAKANA LETTER MA +307F ; [.1940.0020.000E.307F] # HIRAGANA LETTER MI +30DF ; [.1940.0020.0011.30DF] # KATAKANA LETTER MI +3080 ; [.1941.0020.000E.3080] # HIRAGANA LETTER MU +30E0 ; [.1941.0020.0011.30E0] # KATAKANA LETTER MU +3081 ; [.1942.0020.000E.3081] # HIRAGANA LETTER ME +30E1 ; [.1942.0020.0011.30E1] # KATAKANA LETTER ME +3082 ; [.1943.0020.000E.3082] # HIRAGANA LETTER MO +30E2 ; [.1943.0020.0011.30E2] # KATAKANA LETTER MO +3083 ; [.1944.0020.000D.3083] # HIRAGANA LETTER SMALL YA +3084 ; [.1944.0020.000E.3084] # HIRAGANA LETTER YA +30E3 ; [.1944.0020.000F.30E3] # KATAKANA LETTER SMALL YA +30E4 ; [.1944.0020.0011.30E4] # KATAKANA LETTER YA +3085 ; [.1945.0020.000D.3085] # HIRAGANA LETTER SMALL YU +3086 ; [.1945.0020.000E.3086] # HIRAGANA LETTER YU +30E5 ; [.1945.0020.000F.30E5] # KATAKANA LETTER SMALL YU +30E6 ; [.1945.0020.0011.30E6] # KATAKANA LETTER YU +3087 ; [.1946.0020.000D.3087] # HIRAGANA LETTER SMALL YO +3088 ; [.1946.0020.000E.3088] # HIRAGANA LETTER YO +30E7 ; [.1946.0020.000F.30E7] # KATAKANA LETTER SMALL YO +30E8 ; [.1946.0020.0011.30E8] # KATAKANA LETTER YO +3089 ; [.1947.0020.000E.3089] # HIRAGANA LETTER RA +30E9 ; [.1947.0020.0011.30E9] # KATAKANA LETTER RA +308A ; [.1948.0020.000E.308A] # HIRAGANA LETTER RI +30EA ; [.1948.0020.0011.30EA] # KATAKANA LETTER RI +308B ; [.1949.0020.000E.308B] # HIRAGANA LETTER RU +30EB ; [.1949.0020.0011.30EB] # KATAKANA LETTER RU +308C ; [.194A.0020.000E.308C] # HIRAGANA LETTER RE +30EC ; [.194A.0020.0011.30EC] # KATAKANA LETTER RE +308D ; [.194B.0020.000E.308D] # HIRAGANA LETTER RO +30ED ; [.194B.0020.0011.30ED] # KATAKANA LETTER RO +308E ; [.194C.0020.000D.308E] # HIRAGANA LETTER SMALL WA +308F ; [.194C.0020.000E.308F] # HIRAGANA LETTER WA +30EE ; [.194C.0020.000F.30EE] # KATAKANA LETTER SMALL WA +30EF ; [.194C.0020.0011.30EF] # KATAKANA LETTER WA +30F7 ; [.194C.0020.0011.30EF][.0000.013D.0002.3099] # KATAKANA LETTER VA; QQCM +3090 ; [.194D.0020.000E.3090] # HIRAGANA LETTER WI +30F0 ; [.194D.0020.0011.30F0] # KATAKANA LETTER WI +30F8 ; [.194D.0020.0011.30F0][.0000.013D.0002.3099] # KATAKANA LETTER VI; QQCM +3091 ; [.194E.0020.000E.3091] # HIRAGANA LETTER WE +30F1 ; [.194E.0020.0011.30F1] # KATAKANA LETTER WE +30F9 ; [.194E.0020.0011.30F1][.0000.013D.0002.3099] # KATAKANA LETTER VE; QQCM +3092 ; [.194F.0020.000E.3092] # HIRAGANA LETTER WO +30F2 ; [.194F.0020.0011.30F2] # KATAKANA LETTER WO +30FA ; [.194F.0020.0011.30F2][.0000.013D.0002.3099] # KATAKANA LETTER VO; QQCM +3093 ; [.1950.0020.000E.3093] # HIRAGANA LETTER N +30F3 ; [.1950.0020.0011.30F3] # KATAKANA LETTER N diff --git a/lib/perl5/5.8.8/Unicode/UCD.pm b/lib/perl5/5.8.8/Unicode/UCD.pm new file mode 100644 index 00000000..6a2b5e13 --- /dev/null +++ b/lib/perl5/5.8.8/Unicode/UCD.pm @@ -0,0 +1,820 @@ +package Unicode::UCD; + +use strict; +use warnings; + +our $VERSION = '0.24'; + +use Storable qw(dclone); + +require Exporter; + +our @ISA = qw(Exporter); + +our @EXPORT_OK = qw(charinfo + charblock charscript + charblocks charscripts + charinrange + compexcl + casefold casespec + namedseq); + +use Carp; + +=head1 NAME + +Unicode::UCD - Unicode character database + +=head1 SYNOPSIS + + use Unicode::UCD 'charinfo'; + my $charinfo = charinfo($codepoint); + + use Unicode::UCD 'charblock'; + my $charblock = charblock($codepoint); + + use Unicode::UCD 'charscript'; + my $charscript = charscript($codepoint); + + use Unicode::UCD 'charblocks'; + my $charblocks = charblocks(); + + use Unicode::UCD 'charscripts'; + my %charscripts = charscripts(); + + use Unicode::UCD qw(charscript charinrange); + my $range = charscript($script); + print "looks like $script\n" if charinrange($range, $codepoint); + + use Unicode::UCD 'compexcl'; + my $compexcl = compexcl($codepoint); + + use Unicode::UCD 'namedseq'; + my $namedseq = namedseq($named_sequence_name); + + my $unicode_version = Unicode::UCD::UnicodeVersion(); + +=head1 DESCRIPTION + +The Unicode::UCD module offers a simple interface to the Unicode +Character Database. + +=cut + +my $UNICODEFH; +my $BLOCKSFH; +my $SCRIPTSFH; +my $VERSIONFH; +my $COMPEXCLFH; +my $CASEFOLDFH; +my $CASESPECFH; +my $NAMEDSEQFH; + +sub openunicode { + my ($rfh, @path) = @_; + my $f; + unless (defined $$rfh) { + for my $d (@INC) { + use File::Spec; + $f = File::Spec->catfile($d, "unicore", @path); + last if open($$rfh, $f); + undef $f; + } + croak __PACKAGE__, ": failed to find ", + File::Spec->catfile(@path), " in @INC" + unless defined $f; + } + return $f; +} + +=head2 charinfo + + use Unicode::UCD 'charinfo'; + + my $charinfo = charinfo(0x41); + +charinfo() returns a reference to a hash that has the following fields +as defined by the Unicode standard: + + key + + code code point with at least four hexdigits + name name of the character IN UPPER CASE + category general category of the character + combining classes used in the Canonical Ordering Algorithm + bidi bidirectional category + decomposition character decomposition mapping + decimal if decimal digit this is the integer numeric value + digit if digit this is the numeric value + numeric if numeric is the integer or rational numeric value + mirrored if mirrored in bidirectional text + unicode10 Unicode 1.0 name if existed and different + comment ISO 10646 comment field + upper uppercase equivalent mapping + lower lowercase equivalent mapping + title titlecase equivalent mapping + + block block the character belongs to (used in \p{In...}) + script script the character belongs to + +If no match is found, a reference to an empty hash is returned. + +The C property is the same as returned by charinfo(). It is +not defined in the Unicode Character Database proper (Chapter 4 of the +Unicode 3.0 Standard, aka TUS3) but instead in an auxiliary database +(Chapter 14 of TUS3). Similarly for the C