lib/perl5/5.6.1/charnames.pm

   1 package charnames;
   2 use bytes ();           # for $bytes::hint_bits
   3 use warnings();
   4 $charnames::hint_bits = 0x20000;
   5
   6 my $txt;
   7
   8 # This is not optimized in any way yet
   9 sub charnames {
  10   $name = shift;
  11   $txt = do "unicode/Name.pl" unless $txt;
  12   my @off;
  13   if ($^H{charnames_full} and $txt =~ /\t\t$name$/m) {
  14     @off = ($-[0], $+[0]);
  15   }
  16   unless (@off) {
  17     if ($^H{charnames_short} and $name =~ /^(.*?):(.*)/s) {
  18       my ($script, $cname) = ($1,$2);
  19       my $case = ( $cname =~ /[[:upper:]]/ ? "CAPITAL" : "SMALL");
  20       if ($txt =~ m/\t\t\U$script\E (?:$case )?LETTER \U$cname$/m) {
  21         @off = ($-[0], $+[0]);
  22       }
  23     }
  24   }
  25   unless (@off) {
  26     my $case = ( $name =~ /[[:upper:]]/ ? "CAPITAL" : "SMALL");
  27     for ( @{$^H{charnames_scripts}} ) {
  28       (@off = ($-[0], $+[0])), last
  29         if $txt =~ m/\t\t$_ (?:$case )?LETTER \U$name$/m;
  30     }
  31   }
  32   die "Unknown charname '$name'" unless @off;
  33
  34   my $hexlen = 4; # Unicode guarantees 4-, 5-, or 6-digit format
  35   $hexlen++ while
  36       $hexlen < 6 && substr($txt, $off[0] - $hexlen - 1, 1) =~ /[0-9a-f]/;
  37   my $ord = hex substr $txt, $off[0] - $hexlen, $hexlen;
  38   if ($^H & $bytes::hint_bits) {        # "use bytes" in effect?
  39     use bytes;
  40     return chr $ord if $ord <= 255;
  41     my $hex = sprintf '%X=0%o', $ord, $ord;
  42     my $fname = substr $txt, $off[0] + 2, $off[1] - $off[0] - 2;
  43     die "Character 0x$hex with name '$fname' is above 0xFF";
  44   }
  45   return chr $ord;
  46 }
  47
  48 sub import {
  49   shift;
  50   die "`use charnames' needs explicit imports list" unless @_;
  51   $^H |= $charnames::hint_bits;
  52   $^H{charnames} = \&charnames ;
  53   my %h;
  54   @h{@_} = (1) x @_;
  55   $^H{charnames_full} = delete $h{':full'};
  56   $^H{charnames_short} = delete $h{':short'};
  57   $^H{charnames_scripts} = [map uc, keys %h];
  58   if (warnings::enabled('utf8') && @{$^H{charnames_scripts}}) {
  59         $txt = do "unicode/Name.pl" unless $txt;
  60     for (@{$^H{charnames_scripts}}) {
  61         warnings::warn('utf8',  "No such script: '$_'") unless
  62             $txt =~ m/\t\t$_ (?:CAPITAL |SMALL )?LETTER /;
  63         }
  64   }
  65 }
  66
  67
  68 1;
  69 __END__
  70
  71 =head1 NAME
  72
  73 charnames - define character names for C<\N{named}> string literal escape.
  74
  75 =head1 SYNOPSIS
  76
  77   use charnames ':full';
  78   print "\N{GREEK SMALL LETTER SIGMA} is called sigma.\n";
  79
  80   use charnames ':short';
  81   print "\N{greek:Sigma} is an upper-case sigma.\n";
  82
  83   use charnames qw(cyrillic greek);
  84   print "\N{sigma} is Greek sigma, and \N{be} is Cyrillic b.\n";
  85
  86 =head1 DESCRIPTION
  87
  88 Pragma C<use charnames> supports arguments C<:full>, C<:short> and
  89 script names.  If C<:full> is present, for expansion of
  90 C<\N{CHARNAME}}> string C<CHARNAME> is first looked in the list of
  91 standard Unicode names of chars.  If C<:short> is present, and
  92 C<CHARNAME> has the form C<SCRIPT:CNAME>, then C<CNAME> is looked up
  93 as a letter in script C<SCRIPT>.  If pragma C<use charnames> is used
  94 with script name arguments, then for C<\N{CHARNAME}}> the name
  95 C<CHARNAME> is looked up as a letter in the given scripts (in the
  96 specified order).
  97
  98 For lookup of C<CHARNAME> inside a given script C<SCRIPTNAME>
  99 this pragma looks for the names
 100
 101   SCRIPTNAME CAPITAL LETTER CHARNAME
 102   SCRIPTNAME SMALL LETTER CHARNAME
 103   SCRIPTNAME LETTER CHARNAME
 104
 105 in the table of standard Unicode names.  If C<CHARNAME> is lowercase,
 106 then the C<CAPITAL> variant is ignored, otherwise the C<SMALL> variant is
 107 ignored.
 108
 109 =head1 CUSTOM TRANSLATORS
 110
 111 The mechanism of translation of C<\N{...}> escapes is general and not
 112 hardwired into F<charnames.pm>.  A module can install custom
 113 translations (inside the scope which C<use>s the module) with the
 114 following magic incantation:
 115
 116     use charnames ();           # for $charnames::hint_bits
 117     sub import {
 118         shift;
 119         $^H |= $charnames::hint_bits;
 120         $^H{charnames} = \&translator;
 121     }
 122
 123 Here translator() is a subroutine which takes C<CHARNAME> as an
 124 argument, and returns text to insert into the string instead of the
 125 C<\N{CHARNAME}> escape.  Since the text to insert should be different
 126 in C<bytes> mode and out of it, the function should check the current
 127 state of C<bytes>-flag as in:
 128
 129     use bytes ();                       # for $bytes::hint_bits
 130     sub translator {
 131         if ($^H & $bytes::hint_bits) {
 132             return bytes_translator(@_);
 133         }
 134         else {
 135             return utf8_translator(@_);
 136         }
 137     }
 138
 139 =head1 BUGS
 140
 141 Since evaluation of the translation function happens in a middle of
 142 compilation (of a string literal), the translation function should not
 143 do any C<eval>s or C<require>s.  This restriction should be lifted in
 144 a future version of Perl.
 145
 146 =cut