1 ------------------------------------------------------------------------------
3 -- GNAT RUN-TIME COMPONENTS --
5 -- A D A . C H A R A C T E R S . H A N D L I N G --
11 -- Copyright (C) 1992-2001 Free Software Foundation, Inc. --
13 -- GNAT is free software; you can redistribute it and/or modify it under --
14 -- terms of the GNU General Public License as published by the Free Soft- --
15 -- ware Foundation; either version 2, or (at your option) any later ver- --
16 -- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
17 -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
18 -- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License --
19 -- for more details. You should have received a copy of the GNU General --
20 -- Public License distributed with GNAT; see file COPYING. If not, write --
21 -- to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, --
22 -- MA 02111-1307, USA. --
24 -- As a special exception, if other files instantiate generics from this --
25 -- unit, or you link this unit with other files to produce an executable, --
26 -- this unit does not by itself cause the resulting executable to be --
27 -- covered by the GNU General Public License. This exception does not --
28 -- however invalidate any other reasons why the executable file might be --
29 -- covered by the GNU Public License. --
31 -- GNAT was originally developed by the GNAT team at New York University. --
32 -- Extensive contributions were provided by Ada Core Technologies Inc. --
34 ------------------------------------------------------------------------------
36 with Ada
.Characters
.Latin_1
; use Ada
.Characters
.Latin_1
;
37 with Ada
.Strings
.Maps
; use Ada
.Strings
.Maps
;
38 with Ada
.Strings
.Maps
.Constants
; use Ada
.Strings
.Maps
.Constants
;
40 package body Ada
.Characters
.Handling
is
42 ------------------------------------
43 -- Character Classification Table --
44 ------------------------------------
46 type Character_Flags
is mod 256;
47 for Character_Flags
'Size use 8;
49 Control
: constant Character_Flags
:= 1;
50 Lower
: constant Character_Flags
:= 2;
51 Upper
: constant Character_Flags
:= 4;
52 Basic
: constant Character_Flags
:= 8;
53 Hex_Digit
: constant Character_Flags
:= 16;
54 Digit
: constant Character_Flags
:= 32;
55 Special
: constant Character_Flags
:= 64;
57 Letter
: constant Character_Flags
:= Lower
or Upper
;
58 Alphanum
: constant Character_Flags
:= Letter
or Digit
;
59 Graphic
: constant Character_Flags
:= Alphanum
or Special
;
61 Char_Map
: constant array (Character) of Character_Flags
:=
98 Exclamation
=> Special
,
100 Number_Sign
=> Special
,
101 Dollar_Sign
=> Special
,
102 Percent_Sign
=> Special
,
103 Ampersand
=> Special
,
104 Apostrophe
=> Special
,
105 Left_Parenthesis
=> Special
,
106 Right_Parenthesis
=> Special
,
108 Plus_Sign
=> Special
,
111 Full_Stop
=> Special
,
114 '0' .. '9' => Digit
+ Hex_Digit
,
117 Semicolon
=> Special
,
118 Less_Than_Sign
=> Special
,
119 Equals_Sign
=> Special
,
120 Greater_Than_Sign
=> Special
,
122 Commercial_At
=> Special
,
124 'A' .. 'F' => Upper
+ Basic
+ Hex_Digit
,
125 'G' .. 'Z' => Upper
+ Basic
,
127 Left_Square_Bracket
=> Special
,
128 Reverse_Solidus
=> Special
,
129 Right_Square_Bracket
=> Special
,
130 Circumflex
=> Special
,
134 'a' .. 'f' => Lower
+ Basic
+ Hex_Digit
,
135 'g' .. 'z' => Lower
+ Basic
,
137 Left_Curly_Bracket
=> Special
,
138 Vertical_Line
=> Special
,
139 Right_Curly_Bracket
=> Special
,
143 Reserved_128
=> Control
,
144 Reserved_129
=> Control
,
147 Reserved_132
=> Control
,
170 Reserved_153
=> Control
,
178 No_Break_Space
=> Special
,
179 Inverted_Exclamation
=> Special
,
180 Cent_Sign
=> Special
,
181 Pound_Sign
=> Special
,
182 Currency_Sign
=> Special
,
184 Broken_Bar
=> Special
,
185 Section_Sign
=> Special
,
186 Diaeresis
=> Special
,
187 Copyright_Sign
=> Special
,
188 Feminine_Ordinal_Indicator
=> Special
,
189 Left_Angle_Quotation
=> Special
,
191 Soft_Hyphen
=> Special
,
192 Registered_Trade_Mark_Sign
=> Special
,
194 Degree_Sign
=> Special
,
195 Plus_Minus_Sign
=> Special
,
196 Superscript_Two
=> Special
,
197 Superscript_Three
=> Special
,
199 Micro_Sign
=> Special
,
200 Pilcrow_Sign
=> Special
,
201 Middle_Dot
=> Special
,
203 Superscript_One
=> Special
,
204 Masculine_Ordinal_Indicator
=> Special
,
205 Right_Angle_Quotation
=> Special
,
206 Fraction_One_Quarter
=> Special
,
207 Fraction_One_Half
=> Special
,
208 Fraction_Three_Quarters
=> Special
,
209 Inverted_Question
=> Special
,
213 UC_A_Circumflex
=> Upper
,
215 UC_A_Diaeresis
=> Upper
,
217 UC_AE_Diphthong
=> Upper
+ Basic
,
218 UC_C_Cedilla
=> Upper
,
221 UC_E_Circumflex
=> Upper
,
222 UC_E_Diaeresis
=> Upper
,
225 UC_I_Circumflex
=> Upper
,
226 UC_I_Diaeresis
=> Upper
,
227 UC_Icelandic_Eth
=> Upper
+ Basic
,
231 UC_O_Circumflex
=> Upper
,
233 UC_O_Diaeresis
=> Upper
,
235 Multiplication_Sign
=> Special
,
237 UC_O_Oblique_Stroke
=> Upper
,
240 UC_U_Circumflex
=> Upper
,
241 UC_U_Diaeresis
=> Upper
,
243 UC_Icelandic_Thorn
=> Upper
+ Basic
,
245 LC_German_Sharp_S
=> Lower
+ Basic
,
248 LC_A_Circumflex
=> Lower
,
250 LC_A_Diaeresis
=> Lower
,
252 LC_AE_Diphthong
=> Lower
+ Basic
,
253 LC_C_Cedilla
=> Lower
,
256 LC_E_Circumflex
=> Lower
,
257 LC_E_Diaeresis
=> Lower
,
260 LC_I_Circumflex
=> Lower
,
261 LC_I_Diaeresis
=> Lower
,
262 LC_Icelandic_Eth
=> Lower
+ Basic
,
266 LC_O_Circumflex
=> Lower
,
268 LC_O_Diaeresis
=> Lower
,
270 Division_Sign
=> Special
,
272 LC_O_Oblique_Stroke
=> Lower
,
275 LC_U_Circumflex
=> Lower
,
276 LC_U_Diaeresis
=> Lower
,
278 LC_Icelandic_Thorn
=> Lower
+ Basic
,
279 LC_Y_Diaeresis
=> Lower
282 ---------------------
283 -- Is_Alphanumeric --
284 ---------------------
286 function Is_Alphanumeric
(Item
: in Character) return Boolean is
288 return (Char_Map
(Item
) and Alphanum
) /= 0;
295 function Is_Basic
(Item
: in Character) return Boolean is
297 return (Char_Map
(Item
) and Basic
) /= 0;
304 function Is_Character
(Item
: in Wide_Character) return Boolean is
306 return Wide_Character'Pos (Item
) < 256;
313 function Is_Control
(Item
: in Character) return Boolean is
315 return (Char_Map
(Item
) and Control
) /= 0;
322 function Is_Digit
(Item
: in Character) return Boolean is
324 return Item
in '0' .. '9';
331 function Is_Graphic
(Item
: in Character) return Boolean is
333 return (Char_Map
(Item
) and Graphic
) /= 0;
336 --------------------------
337 -- Is_Hexadecimal_Digit --
338 --------------------------
340 function Is_Hexadecimal_Digit
(Item
: in Character) return Boolean is
342 return (Char_Map
(Item
) and Hex_Digit
) /= 0;
343 end Is_Hexadecimal_Digit
;
349 function Is_ISO_646
(Item
: in Character) return Boolean is
351 return Item
in ISO_646
;
354 -- Note: much more efficient coding of the following function is possible
355 -- by testing several 16#80# bits in a complete word in a single operation
357 function Is_ISO_646
(Item
: in String) return Boolean is
359 for J
in Item
'Range loop
360 if Item
(J
) not in ISO_646
then
372 function Is_Letter
(Item
: in Character) return Boolean is
374 return (Char_Map
(Item
) and Letter
) /= 0;
381 function Is_Lower
(Item
: in Character) return Boolean is
383 return (Char_Map
(Item
) and Lower
) /= 0;
390 function Is_Special
(Item
: in Character) return Boolean is
392 return (Char_Map
(Item
) and Special
) /= 0;
399 function Is_String
(Item
: in Wide_String) return Boolean is
401 for J
in Item
'Range loop
402 if Wide_Character'Pos (Item
(J
)) >= 256 then
414 function Is_Upper
(Item
: in Character) return Boolean is
416 return (Char_Map
(Item
) and Upper
) /= 0;
423 function To_Basic
(Item
: in Character) return Character is
425 return Value
(Basic_Map
, Item
);
428 function To_Basic
(Item
: in String) return String is
429 Result
: String (1 .. Item
'Length);
432 for J
in Item
'Range loop
433 Result
(J
- (Item
'First - 1)) := Value
(Basic_Map
, Item
(J
));
443 function To_Character
444 (Item
: in Wide_Character;
445 Substitute
: in Character := ' ')
449 if Is_Character
(Item
) then
450 return Character'Val (Wide_Character'Pos (Item
));
461 (Item
: in Character;
462 Substitute
: in ISO_646
:= ' ')
466 if Item
in ISO_646
then
475 Substitute
: in ISO_646
:= ' ')
478 Result
: String (1 .. Item
'Length);
481 for J
in Item
'Range loop
482 if Item
(J
) in ISO_646
then
483 Result
(J
- (Item
'First - 1)) := Item
(J
);
485 Result
(J
- (Item
'First - 1)) := Substitute
;
496 function To_Lower
(Item
: in Character) return Character is
498 return Value
(Lower_Case_Map
, Item
);
501 function To_Lower
(Item
: in String) return String is
502 Result
: String (1 .. Item
'Length);
505 for J
in Item
'Range loop
506 Result
(J
- (Item
'First - 1)) := Value
(Lower_Case_Map
, Item
(J
));
517 (Item
: in Wide_String;
518 Substitute
: in Character := ' ')
521 Result
: String (1 .. Item
'Length);
524 for J
in Item
'Range loop
525 Result
(J
- (Item
'First - 1)) := To_Character
(Item
(J
), Substitute
);
535 (Item
: in Character)
539 return Value
(Upper_Case_Map
, Item
);
546 Result
: String (1 .. Item
'Length);
549 for J
in Item
'Range loop
550 Result
(J
- (Item
'First - 1)) := Value
(Upper_Case_Map
, Item
(J
));
556 -----------------------
557 -- To_Wide_Character --
558 -----------------------
560 function To_Wide_Character
561 (Item
: in Character)
562 return Wide_Character
565 return Wide_Character'Val (Character'Pos (Item
));
566 end To_Wide_Character
;
572 function To_Wide_String
576 Result
: Wide_String (1 .. Item
'Length);
579 for J
in Item
'Range loop
580 Result
(J
- (Item
'First - 1)) := To_Wide_Character
(Item
(J
));
585 end Ada
.Characters
.Handling
;