1 ------------------------------------------------------------------------------
3 -- GNAT RUN-TIME COMPONENTS --
5 -- A D A . C H A R A C T E R S . H A N D L I N G --
9 -- Copyright (C) 1992-2005 Free Software Foundation, Inc. --
11 -- GNAT is free software; you can redistribute it and/or modify it under --
12 -- terms of the GNU General Public License as published by the Free Soft- --
13 -- ware Foundation; either version 2, or (at your option) any later ver- --
14 -- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
15 -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
16 -- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License --
17 -- for more details. You should have received a copy of the GNU General --
18 -- Public License distributed with GNAT; see file COPYING. If not, write --
19 -- to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, --
20 -- MA 02111-1307, USA. --
22 -- As a special exception, if other files instantiate generics from this --
23 -- unit, or you link this unit with other files to produce an executable, --
24 -- this unit does not by itself cause the resulting executable to be --
25 -- covered by the GNU General Public License. This exception does not --
26 -- however invalidate any other reasons why the executable file might be --
27 -- covered by the GNU Public License. --
29 -- GNAT was originally developed by the GNAT team at New York University. --
30 -- Extensive contributions were provided by Ada Core Technologies Inc. --
32 ------------------------------------------------------------------------------
34 with Ada
.Characters
.Latin_1
; use Ada
.Characters
.Latin_1
;
35 with Ada
.Strings
.Maps
; use Ada
.Strings
.Maps
;
36 with Ada
.Strings
.Maps
.Constants
; use Ada
.Strings
.Maps
.Constants
;
38 package body Ada
.Characters
.Handling
is
40 ------------------------------------
41 -- Character Classification Table --
42 ------------------------------------
44 type Character_Flags
is mod 256;
45 for Character_Flags
'Size use 8;
47 Control
: constant Character_Flags
:= 1;
48 Lower
: constant Character_Flags
:= 2;
49 Upper
: constant Character_Flags
:= 4;
50 Basic
: constant Character_Flags
:= 8;
51 Hex_Digit
: constant Character_Flags
:= 16;
52 Digit
: constant Character_Flags
:= 32;
53 Special
: constant Character_Flags
:= 64;
55 Letter
: constant Character_Flags
:= Lower
or Upper
;
56 Alphanum
: constant Character_Flags
:= Letter
or Digit
;
57 Graphic
: constant Character_Flags
:= Alphanum
or Special
;
59 Char_Map
: constant array (Character) of Character_Flags
:=
96 Exclamation
=> Special
,
98 Number_Sign
=> Special
,
99 Dollar_Sign
=> Special
,
100 Percent_Sign
=> Special
,
101 Ampersand
=> Special
,
102 Apostrophe
=> Special
,
103 Left_Parenthesis
=> Special
,
104 Right_Parenthesis
=> Special
,
106 Plus_Sign
=> Special
,
109 Full_Stop
=> Special
,
112 '0' .. '9' => Digit
+ Hex_Digit
,
115 Semicolon
=> Special
,
116 Less_Than_Sign
=> Special
,
117 Equals_Sign
=> Special
,
118 Greater_Than_Sign
=> Special
,
120 Commercial_At
=> Special
,
122 'A' .. 'F' => Upper
+ Basic
+ Hex_Digit
,
123 'G' .. 'Z' => Upper
+ Basic
,
125 Left_Square_Bracket
=> Special
,
126 Reverse_Solidus
=> Special
,
127 Right_Square_Bracket
=> Special
,
128 Circumflex
=> Special
,
132 'a' .. 'f' => Lower
+ Basic
+ Hex_Digit
,
133 'g' .. 'z' => Lower
+ Basic
,
135 Left_Curly_Bracket
=> Special
,
136 Vertical_Line
=> Special
,
137 Right_Curly_Bracket
=> Special
,
141 Reserved_128
=> Control
,
142 Reserved_129
=> Control
,
145 Reserved_132
=> Control
,
168 Reserved_153
=> Control
,
176 No_Break_Space
=> Special
,
177 Inverted_Exclamation
=> Special
,
178 Cent_Sign
=> Special
,
179 Pound_Sign
=> Special
,
180 Currency_Sign
=> Special
,
182 Broken_Bar
=> Special
,
183 Section_Sign
=> Special
,
184 Diaeresis
=> Special
,
185 Copyright_Sign
=> Special
,
186 Feminine_Ordinal_Indicator
=> Special
,
187 Left_Angle_Quotation
=> Special
,
189 Soft_Hyphen
=> Special
,
190 Registered_Trade_Mark_Sign
=> Special
,
192 Degree_Sign
=> Special
,
193 Plus_Minus_Sign
=> Special
,
194 Superscript_Two
=> Special
,
195 Superscript_Three
=> Special
,
197 Micro_Sign
=> Special
,
198 Pilcrow_Sign
=> Special
,
199 Middle_Dot
=> Special
,
201 Superscript_One
=> Special
,
202 Masculine_Ordinal_Indicator
=> Special
,
203 Right_Angle_Quotation
=> Special
,
204 Fraction_One_Quarter
=> Special
,
205 Fraction_One_Half
=> Special
,
206 Fraction_Three_Quarters
=> Special
,
207 Inverted_Question
=> Special
,
211 UC_A_Circumflex
=> Upper
,
213 UC_A_Diaeresis
=> Upper
,
215 UC_AE_Diphthong
=> Upper
+ Basic
,
216 UC_C_Cedilla
=> Upper
,
219 UC_E_Circumflex
=> Upper
,
220 UC_E_Diaeresis
=> Upper
,
223 UC_I_Circumflex
=> Upper
,
224 UC_I_Diaeresis
=> Upper
,
225 UC_Icelandic_Eth
=> Upper
+ Basic
,
229 UC_O_Circumflex
=> Upper
,
231 UC_O_Diaeresis
=> Upper
,
233 Multiplication_Sign
=> Special
,
235 UC_O_Oblique_Stroke
=> Upper
,
238 UC_U_Circumflex
=> Upper
,
239 UC_U_Diaeresis
=> Upper
,
241 UC_Icelandic_Thorn
=> Upper
+ Basic
,
243 LC_German_Sharp_S
=> Lower
+ Basic
,
246 LC_A_Circumflex
=> Lower
,
248 LC_A_Diaeresis
=> Lower
,
250 LC_AE_Diphthong
=> Lower
+ Basic
,
251 LC_C_Cedilla
=> Lower
,
254 LC_E_Circumflex
=> Lower
,
255 LC_E_Diaeresis
=> Lower
,
258 LC_I_Circumflex
=> Lower
,
259 LC_I_Diaeresis
=> Lower
,
260 LC_Icelandic_Eth
=> Lower
+ Basic
,
264 LC_O_Circumflex
=> Lower
,
266 LC_O_Diaeresis
=> Lower
,
268 Division_Sign
=> Special
,
270 LC_O_Oblique_Stroke
=> Lower
,
273 LC_U_Circumflex
=> Lower
,
274 LC_U_Diaeresis
=> Lower
,
276 LC_Icelandic_Thorn
=> Lower
+ Basic
,
277 LC_Y_Diaeresis
=> Lower
280 ---------------------
281 -- Is_Alphanumeric --
282 ---------------------
284 function Is_Alphanumeric
(Item
: Character) return Boolean is
286 return (Char_Map
(Item
) and Alphanum
) /= 0;
293 function Is_Basic
(Item
: Character) return Boolean is
295 return (Char_Map
(Item
) and Basic
) /= 0;
302 function Is_Character
(Item
: Wide_Character) return Boolean is
304 return Wide_Character'Pos (Item
) < 256;
307 function Is_Character
(Item
: Wide_Wide_Character
) return Boolean is
309 return Wide_Wide_Character
'Pos (Item
) < 256;
316 function Is_Control
(Item
: Character) return Boolean is
318 return (Char_Map
(Item
) and Control
) /= 0;
325 function Is_Digit
(Item
: Character) return Boolean is
327 return Item
in '0' .. '9';
334 function Is_Graphic
(Item
: Character) return Boolean is
336 return (Char_Map
(Item
) and Graphic
) /= 0;
339 --------------------------
340 -- Is_Hexadecimal_Digit --
341 --------------------------
343 function Is_Hexadecimal_Digit
(Item
: Character) return Boolean is
345 return (Char_Map
(Item
) and Hex_Digit
) /= 0;
346 end Is_Hexadecimal_Digit
;
352 function Is_ISO_646
(Item
: Character) return Boolean is
354 return Item
in ISO_646
;
357 -- Note: much more efficient coding of the following function is possible
358 -- by testing several 16#80# bits in a complete word in a single operation
360 function Is_ISO_646
(Item
: String) return Boolean is
362 for J
in Item
'Range loop
363 if Item
(J
) not in ISO_646
then
375 function Is_Letter
(Item
: Character) return Boolean is
377 return (Char_Map
(Item
) and Letter
) /= 0;
384 function Is_Lower
(Item
: Character) return Boolean is
386 return (Char_Map
(Item
) and Lower
) /= 0;
393 function Is_Special
(Item
: Character) return Boolean is
395 return (Char_Map
(Item
) and Special
) /= 0;
402 function Is_String
(Item
: Wide_String) return Boolean is
404 for J
in Item
'Range loop
405 if Wide_Character'Pos (Item
(J
)) >= 256 then
413 function Is_String
(Item
: Wide_Wide_String
) return Boolean is
415 for J
in Item
'Range loop
416 if Wide_Wide_Character
'Pos (Item
(J
)) >= 256 then
428 function Is_Upper
(Item
: Character) return Boolean is
430 return (Char_Map
(Item
) and Upper
) /= 0;
433 -----------------------
434 -- Is_Wide_Character --
435 -----------------------
437 function Is_Wide_Character
(Item
: Wide_Wide_Character
) return Boolean is
439 return Wide_Wide_Character
'Pos (Item
) < 2**16;
440 end Is_Wide_Character
;
446 function Is_Wide_String
(Item
: Wide_Wide_String
) return Boolean is
448 for J
in Item
'Range loop
449 if Wide_Wide_Character
'Pos (Item
(J
)) >= 2**16 then
461 function To_Basic
(Item
: Character) return Character is
463 return Value
(Basic_Map
, Item
);
466 function To_Basic
(Item
: String) return String is
467 Result
: String (1 .. Item
'Length);
470 for J
in Item
'Range loop
471 Result
(J
- (Item
'First - 1)) := Value
(Basic_Map
, Item
(J
));
481 function To_Character
482 (Item
: Wide_Character;
483 Substitute
: Character := ' ') return Character
486 if Is_Character
(Item
) then
487 return Character'Val (Wide_Character'Pos (Item
));
493 function To_Character
494 (Item
: Wide_Wide_Character
;
495 Substitute
: Character := ' ') return Character
498 if Is_Character
(Item
) then
499 return Character'Val (Wide_Wide_Character
'Pos (Item
));
511 Substitute
: ISO_646
:= ' ') return ISO_646
514 if Item
in ISO_646
then
523 Substitute
: ISO_646
:= ' ') return String
525 Result
: String (1 .. Item
'Length);
528 for J
in Item
'Range loop
529 if Item
(J
) in ISO_646
then
530 Result
(J
- (Item
'First - 1)) := Item
(J
);
532 Result
(J
- (Item
'First - 1)) := Substitute
;
543 function To_Lower
(Item
: Character) return Character is
545 return Value
(Lower_Case_Map
, Item
);
548 function To_Lower
(Item
: String) return String is
549 Result
: String (1 .. Item
'Length);
552 for J
in Item
'Range loop
553 Result
(J
- (Item
'First - 1)) := Value
(Lower_Case_Map
, Item
(J
));
565 Substitute
: Character := ' ') return String
567 Result
: String (1 .. Item
'Length);
570 for J
in Item
'Range loop
571 Result
(J
- (Item
'First - 1)) := To_Character
(Item
(J
), Substitute
);
578 (Item
: Wide_Wide_String
;
579 Substitute
: Character := ' ') return String
581 Result
: String (1 .. Item
'Length);
584 for J
in Item
'Range loop
585 Result
(J
- (Item
'First - 1)) := To_Character
(Item
(J
), Substitute
);
596 (Item
: Character) return Character
599 return Value
(Upper_Case_Map
, Item
);
603 (Item
: String) return String
605 Result
: String (1 .. Item
'Length);
608 for J
in Item
'Range loop
609 Result
(J
- (Item
'First - 1)) := Value
(Upper_Case_Map
, Item
(J
));
615 -----------------------
616 -- To_Wide_Character --
617 -----------------------
619 function To_Wide_Character
620 (Item
: Character) return Wide_Character
623 return Wide_Character'Val (Character'Pos (Item
));
624 end To_Wide_Character
;
626 function To_Wide_Character
627 (Item
: Wide_Wide_Character
;
628 Substitute
: Wide_Character := ' ') return Wide_Character
631 if Wide_Wide_Character
'Pos (Item
) < 2**16 then
632 return Wide_Character'Val (Wide_Wide_Character
'Pos (Item
));
636 end To_Wide_Character
;
642 function To_Wide_String
643 (Item
: String) return Wide_String
645 Result
: Wide_String (1 .. Item
'Length);
648 for J
in Item
'Range loop
649 Result
(J
- (Item
'First - 1)) := To_Wide_Character
(Item
(J
));
655 function To_Wide_String
656 (Item
: Wide_Wide_String
;
657 Substitute
: Wide_Character := ' ') return Wide_String
659 Result
: Wide_String (1 .. Item
'Length);
662 for J
in Item
'Range loop
663 Result
(J
- (Item
'First - 1)) :=
664 To_Wide_Character
(Item
(J
), Substitute
);
670 ----------------------------
671 -- To_Wide_Wide_Character --
672 ----------------------------
674 function To_Wide_Wide_Character
675 (Item
: Character) return Wide_Wide_Character
678 return Wide_Wide_Character
'Val (Character'Pos (Item
));
679 end To_Wide_Wide_Character
;
681 function To_Wide_Wide_Character
682 (Item
: Wide_Character) return Wide_Wide_Character
685 return Wide_Wide_Character
'Val (Wide_Character'Pos (Item
));
686 end To_Wide_Wide_Character
;
688 -------------------------
689 -- To_Wide_Wide_String --
690 -------------------------
692 function To_Wide_Wide_String
693 (Item
: String) return Wide_Wide_String
695 Result
: Wide_Wide_String
(1 .. Item
'Length);
698 for J
in Item
'Range loop
699 Result
(J
- (Item
'First - 1)) := To_Wide_Wide_Character
(Item
(J
));
703 end To_Wide_Wide_String
;
705 function To_Wide_Wide_String
706 (Item
: Wide_String) return Wide_Wide_String
708 Result
: Wide_Wide_String
(1 .. Item
'Length);
711 for J
in Item
'Range loop
712 Result
(J
- (Item
'First - 1)) := To_Wide_Wide_Character
(Item
(J
));
716 end To_Wide_Wide_String
;
718 end Ada
.Characters
.Handling
;