1 ------------------------------------------------------------------------------
3 -- GNAT RUN-TIME COMPONENTS --
5 -- A D A . C H A R A C T E R S . H A N D L I N G --
9 -- Copyright (C) 1992-2012, Free Software Foundation, Inc. --
11 -- GNAT is free software; you can redistribute it and/or modify it under --
12 -- terms of the GNU General Public License as published by the Free Soft- --
13 -- ware Foundation; either version 3, or (at your option) any later ver- --
14 -- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
15 -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
16 -- or FITNESS FOR A PARTICULAR PURPOSE. --
18 -- As a special exception under Section 7 of GPL version 3, you are granted --
19 -- additional permissions described in the GCC Runtime Library Exception, --
20 -- version 3.1, as published by the Free Software Foundation. --
22 -- You should have received a copy of the GNU General Public License and --
23 -- a copy of the GCC Runtime Library Exception along with this program; --
24 -- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see --
25 -- <http://www.gnu.org/licenses/>. --
27 -- GNAT was originally developed by the GNAT team at New York University. --
28 -- Extensive contributions were provided by Ada Core Technologies Inc. --
30 ------------------------------------------------------------------------------
32 with Ada
.Characters
.Latin_1
; use Ada
.Characters
.Latin_1
;
33 with Ada
.Strings
.Maps
; use Ada
.Strings
.Maps
;
34 with Ada
.Strings
.Maps
.Constants
; use Ada
.Strings
.Maps
.Constants
;
36 package body Ada
.Characters
.Handling
is
38 ------------------------------------
39 -- Character Classification Table --
40 ------------------------------------
42 type Character_Flags
is mod 256;
43 for Character_Flags
'Size use 8;
45 Control
: constant Character_Flags
:= 1;
46 Lower
: constant Character_Flags
:= 2;
47 Upper
: constant Character_Flags
:= 4;
48 Basic
: constant Character_Flags
:= 8;
49 Hex_Digit
: constant Character_Flags
:= 16;
50 Digit
: constant Character_Flags
:= 32;
51 Special
: constant Character_Flags
:= 64;
53 Letter
: constant Character_Flags
:= Lower
or Upper
;
54 Alphanum
: constant Character_Flags
:= Letter
or Digit
;
55 Graphic
: constant Character_Flags
:= Alphanum
or Special
;
57 Char_Map
: constant array (Character) of Character_Flags
:=
94 Exclamation
=> Special
,
96 Number_Sign
=> Special
,
97 Dollar_Sign
=> Special
,
98 Percent_Sign
=> Special
,
100 Apostrophe
=> Special
,
101 Left_Parenthesis
=> Special
,
102 Right_Parenthesis
=> Special
,
104 Plus_Sign
=> Special
,
107 Full_Stop
=> Special
,
110 '0' .. '9' => Digit
+ Hex_Digit
,
113 Semicolon
=> Special
,
114 Less_Than_Sign
=> Special
,
115 Equals_Sign
=> Special
,
116 Greater_Than_Sign
=> Special
,
118 Commercial_At
=> Special
,
120 'A' .. 'F' => Upper
+ Basic
+ Hex_Digit
,
121 'G' .. 'Z' => Upper
+ Basic
,
123 Left_Square_Bracket
=> Special
,
124 Reverse_Solidus
=> Special
,
125 Right_Square_Bracket
=> Special
,
126 Circumflex
=> Special
,
130 'a' .. 'f' => Lower
+ Basic
+ Hex_Digit
,
131 'g' .. 'z' => Lower
+ Basic
,
133 Left_Curly_Bracket
=> Special
,
134 Vertical_Line
=> Special
,
135 Right_Curly_Bracket
=> Special
,
139 Reserved_128
=> Control
,
140 Reserved_129
=> Control
,
143 Reserved_132
=> Control
,
166 Reserved_153
=> Control
,
174 No_Break_Space
=> Special
,
175 Inverted_Exclamation
=> Special
,
176 Cent_Sign
=> Special
,
177 Pound_Sign
=> Special
,
178 Currency_Sign
=> Special
,
180 Broken_Bar
=> Special
,
181 Section_Sign
=> Special
,
182 Diaeresis
=> Special
,
183 Copyright_Sign
=> Special
,
184 Feminine_Ordinal_Indicator
=> Special
,
185 Left_Angle_Quotation
=> Special
,
187 Soft_Hyphen
=> Special
,
188 Registered_Trade_Mark_Sign
=> Special
,
190 Degree_Sign
=> Special
,
191 Plus_Minus_Sign
=> Special
,
192 Superscript_Two
=> Special
,
193 Superscript_Three
=> Special
,
195 Micro_Sign
=> Special
,
196 Pilcrow_Sign
=> Special
,
197 Middle_Dot
=> Special
,
199 Superscript_One
=> Special
,
200 Masculine_Ordinal_Indicator
=> Special
,
201 Right_Angle_Quotation
=> Special
,
202 Fraction_One_Quarter
=> Special
,
203 Fraction_One_Half
=> Special
,
204 Fraction_Three_Quarters
=> Special
,
205 Inverted_Question
=> Special
,
209 UC_A_Circumflex
=> Upper
,
211 UC_A_Diaeresis
=> Upper
,
213 UC_AE_Diphthong
=> Upper
+ Basic
,
214 UC_C_Cedilla
=> Upper
,
217 UC_E_Circumflex
=> Upper
,
218 UC_E_Diaeresis
=> Upper
,
221 UC_I_Circumflex
=> Upper
,
222 UC_I_Diaeresis
=> Upper
,
223 UC_Icelandic_Eth
=> Upper
+ Basic
,
227 UC_O_Circumflex
=> Upper
,
229 UC_O_Diaeresis
=> Upper
,
231 Multiplication_Sign
=> Special
,
233 UC_O_Oblique_Stroke
=> Upper
,
236 UC_U_Circumflex
=> Upper
,
237 UC_U_Diaeresis
=> Upper
,
239 UC_Icelandic_Thorn
=> Upper
+ Basic
,
241 LC_German_Sharp_S
=> Lower
+ Basic
,
244 LC_A_Circumflex
=> Lower
,
246 LC_A_Diaeresis
=> Lower
,
248 LC_AE_Diphthong
=> Lower
+ Basic
,
249 LC_C_Cedilla
=> Lower
,
252 LC_E_Circumflex
=> Lower
,
253 LC_E_Diaeresis
=> Lower
,
256 LC_I_Circumflex
=> Lower
,
257 LC_I_Diaeresis
=> Lower
,
258 LC_Icelandic_Eth
=> Lower
+ Basic
,
262 LC_O_Circumflex
=> Lower
,
264 LC_O_Diaeresis
=> Lower
,
266 Division_Sign
=> Special
,
268 LC_O_Oblique_Stroke
=> Lower
,
271 LC_U_Circumflex
=> Lower
,
272 LC_U_Diaeresis
=> Lower
,
274 LC_Icelandic_Thorn
=> Lower
+ Basic
,
275 LC_Y_Diaeresis
=> Lower
278 ---------------------
279 -- Is_Alphanumeric --
280 ---------------------
282 function Is_Alphanumeric
(Item
: Character) return Boolean is
284 return (Char_Map
(Item
) and Alphanum
) /= 0;
291 function Is_Basic
(Item
: Character) return Boolean is
293 return (Char_Map
(Item
) and Basic
) /= 0;
300 function Is_Character
(Item
: Wide_Character) return Boolean is
302 return Wide_Character'Pos (Item
) < 256;
309 function Is_Control
(Item
: Character) return Boolean is
311 return (Char_Map
(Item
) and Control
) /= 0;
318 function Is_Digit
(Item
: Character) return Boolean is
320 return Item
in '0' .. '9';
327 function Is_Graphic
(Item
: Character) return Boolean is
329 return (Char_Map
(Item
) and Graphic
) /= 0;
332 --------------------------
333 -- Is_Hexadecimal_Digit --
334 --------------------------
336 function Is_Hexadecimal_Digit
(Item
: Character) return Boolean is
338 return (Char_Map
(Item
) and Hex_Digit
) /= 0;
339 end Is_Hexadecimal_Digit
;
345 function Is_ISO_646
(Item
: Character) return Boolean is
347 return Item
in ISO_646
;
350 -- Note: much more efficient coding of the following function is possible
351 -- by testing several 16#80# bits in a complete word in a single operation
353 function Is_ISO_646
(Item
: String) return Boolean is
355 for J
in Item
'Range loop
356 if Item
(J
) not in ISO_646
then
368 function Is_Letter
(Item
: Character) return Boolean is
370 return (Char_Map
(Item
) and Letter
) /= 0;
377 function Is_Lower
(Item
: Character) return Boolean is
379 return (Char_Map
(Item
) and Lower
) /= 0;
386 function Is_Special
(Item
: Character) return Boolean is
388 return (Char_Map
(Item
) and Special
) /= 0;
395 function Is_String
(Item
: Wide_String) return Boolean is
397 for J
in Item
'Range loop
398 if Wide_Character'Pos (Item
(J
)) >= 256 then
410 function Is_Upper
(Item
: Character) return Boolean is
412 return (Char_Map
(Item
) and Upper
) /= 0;
419 function To_Basic
(Item
: Character) return Character is
421 return Value
(Basic_Map
, Item
);
424 function To_Basic
(Item
: String) return String is
426 return Result
: String (1 .. Item
'Length) do
427 for J
in Item
'Range loop
428 Result
(J
- (Item
'First - 1)) := Value
(Basic_Map
, Item
(J
));
437 function To_Character
438 (Item
: Wide_Character;
439 Substitute
: Character := ' ') return Character
442 if Is_Character
(Item
) then
443 return Character'Val (Wide_Character'Pos (Item
));
455 Substitute
: ISO_646
:= ' ') return ISO_646
458 return (if Item
in ISO_646
then Item
else Substitute
);
463 Substitute
: ISO_646
:= ' ') return String
465 Result
: String (1 .. Item
'Length);
468 for J
in Item
'Range loop
469 Result
(J
- (Item
'First - 1)) :=
470 (if Item
(J
) in ISO_646
then Item
(J
) else Substitute
);
480 function To_Lower
(Item
: Character) return Character is
482 return Value
(Lower_Case_Map
, Item
);
485 function To_Lower
(Item
: String) return String is
487 return Result
: String (1 .. Item
'Length) do
488 for J
in Item
'Range loop
489 Result
(J
- (Item
'First - 1)) := Value
(Lower_Case_Map
, Item
(J
));
500 Substitute
: Character := ' ') return String
502 Result
: String (1 .. Item
'Length);
505 for J
in Item
'Range loop
506 Result
(J
- (Item
'First - 1)) := To_Character
(Item
(J
), Substitute
);
517 (Item
: Character) return Character
520 return Value
(Upper_Case_Map
, Item
);
524 (Item
: String) return String
527 return Result
: String (1 .. Item
'Length) do
528 for J
in Item
'Range loop
529 Result
(J
- (Item
'First - 1)) := Value
(Upper_Case_Map
, Item
(J
));
534 -----------------------
535 -- To_Wide_Character --
536 -----------------------
538 function To_Wide_Character
539 (Item
: Character) return Wide_Character
542 return Wide_Character'Val (Character'Pos (Item
));
543 end To_Wide_Character
;
549 function To_Wide_String
550 (Item
: String) return Wide_String
552 Result
: Wide_String (1 .. Item
'Length);
555 for J
in Item
'Range loop
556 Result
(J
- (Item
'First - 1)) := To_Wide_Character
(Item
(J
));
562 end Ada
.Characters
.Handling
;