1 ------------------------------------------------------------------------------
3 -- GNAT RUN-TIME COMPONENTS --
5 -- A D A . C H A R A C T E R S . H A N D L I N G --
9 -- Copyright (C) 1992-2018, Free Software Foundation, Inc. --
11 -- GNAT is free software; you can redistribute it and/or modify it under --
12 -- terms of the GNU General Public License as published by the Free Soft- --
13 -- ware Foundation; either version 3, or (at your option) any later ver- --
14 -- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
15 -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
16 -- or FITNESS FOR A PARTICULAR PURPOSE. --
18 -- As a special exception under Section 7 of GPL version 3, you are granted --
19 -- additional permissions described in the GCC Runtime Library Exception, --
20 -- version 3.1, as published by the Free Software Foundation. --
22 -- You should have received a copy of the GNU General Public License and --
23 -- a copy of the GCC Runtime Library Exception along with this program; --
24 -- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see --
25 -- <http://www.gnu.org/licenses/>. --
27 -- GNAT was originally developed by the GNAT team at New York University. --
28 -- Extensive contributions were provided by Ada Core Technologies Inc. --
30 ------------------------------------------------------------------------------
32 with Ada
.Characters
.Latin_1
; use Ada
.Characters
.Latin_1
;
33 with Ada
.Strings
.Maps
; use Ada
.Strings
.Maps
;
34 with Ada
.Strings
.Maps
.Constants
; use Ada
.Strings
.Maps
.Constants
;
36 package body Ada
.Characters
.Handling
is
38 ------------------------------------
39 -- Character Classification Table --
40 ------------------------------------
42 type Character_Flags
is mod 256;
43 for Character_Flags
'Size use 8;
45 Control
: constant Character_Flags
:= 1;
46 Lower
: constant Character_Flags
:= 2;
47 Upper
: constant Character_Flags
:= 4;
48 Basic
: constant Character_Flags
:= 8;
49 Hex_Digit
: constant Character_Flags
:= 16;
50 Digit
: constant Character_Flags
:= 32;
51 Special
: constant Character_Flags
:= 64;
52 Line_Term
: constant Character_Flags
:= 128;
54 Letter
: constant Character_Flags
:= Lower
or Upper
;
55 Alphanum
: constant Character_Flags
:= Letter
or Digit
;
56 Graphic
: constant Character_Flags
:= Alphanum
or Special
;
58 Char_Map
: constant array (Character) of Character_Flags
:=
70 LF
=> Control
+ Line_Term
,
71 VT
=> Control
+ Line_Term
,
72 FF
=> Control
+ Line_Term
,
73 CR
=> Control
+ Line_Term
,
95 Exclamation
=> Special
,
97 Number_Sign
=> Special
,
98 Dollar_Sign
=> Special
,
99 Percent_Sign
=> Special
,
100 Ampersand
=> Special
,
101 Apostrophe
=> Special
,
102 Left_Parenthesis
=> Special
,
103 Right_Parenthesis
=> Special
,
105 Plus_Sign
=> Special
,
108 Full_Stop
=> Special
,
111 '0' .. '9' => Digit
+ Hex_Digit
,
114 Semicolon
=> Special
,
115 Less_Than_Sign
=> Special
,
116 Equals_Sign
=> Special
,
117 Greater_Than_Sign
=> Special
,
119 Commercial_At
=> Special
,
121 'A' .. 'F' => Upper
+ Basic
+ Hex_Digit
,
122 'G' .. 'Z' => Upper
+ Basic
,
124 Left_Square_Bracket
=> Special
,
125 Reverse_Solidus
=> Special
,
126 Right_Square_Bracket
=> Special
,
127 Circumflex
=> Special
,
131 'a' .. 'f' => Lower
+ Basic
+ Hex_Digit
,
132 'g' .. 'z' => Lower
+ Basic
,
134 Left_Curly_Bracket
=> Special
,
135 Vertical_Line
=> Special
,
136 Right_Curly_Bracket
=> Special
,
140 Reserved_128
=> Control
,
141 Reserved_129
=> Control
,
144 Reserved_132
=> Control
,
145 NEL
=> Control
+ Line_Term
,
167 Reserved_153
=> Control
,
175 No_Break_Space
=> Special
,
176 Inverted_Exclamation
=> Special
,
177 Cent_Sign
=> Special
,
178 Pound_Sign
=> Special
,
179 Currency_Sign
=> Special
,
181 Broken_Bar
=> Special
,
182 Section_Sign
=> Special
,
183 Diaeresis
=> Special
,
184 Copyright_Sign
=> Special
,
185 Feminine_Ordinal_Indicator
=> Special
,
186 Left_Angle_Quotation
=> Special
,
188 Soft_Hyphen
=> Special
,
189 Registered_Trade_Mark_Sign
=> Special
,
191 Degree_Sign
=> Special
,
192 Plus_Minus_Sign
=> Special
,
193 Superscript_Two
=> Special
,
194 Superscript_Three
=> Special
,
196 Micro_Sign
=> Special
,
197 Pilcrow_Sign
=> Special
,
198 Middle_Dot
=> Special
,
200 Superscript_One
=> Special
,
201 Masculine_Ordinal_Indicator
=> Special
,
202 Right_Angle_Quotation
=> Special
,
203 Fraction_One_Quarter
=> Special
,
204 Fraction_One_Half
=> Special
,
205 Fraction_Three_Quarters
=> Special
,
206 Inverted_Question
=> Special
,
210 UC_A_Circumflex
=> Upper
,
212 UC_A_Diaeresis
=> Upper
,
214 UC_AE_Diphthong
=> Upper
+ Basic
,
215 UC_C_Cedilla
=> Upper
,
218 UC_E_Circumflex
=> Upper
,
219 UC_E_Diaeresis
=> Upper
,
222 UC_I_Circumflex
=> Upper
,
223 UC_I_Diaeresis
=> Upper
,
224 UC_Icelandic_Eth
=> Upper
+ Basic
,
228 UC_O_Circumflex
=> Upper
,
230 UC_O_Diaeresis
=> Upper
,
232 Multiplication_Sign
=> Special
,
234 UC_O_Oblique_Stroke
=> Upper
,
237 UC_U_Circumflex
=> Upper
,
238 UC_U_Diaeresis
=> Upper
,
240 UC_Icelandic_Thorn
=> Upper
+ Basic
,
242 LC_German_Sharp_S
=> Lower
+ Basic
,
245 LC_A_Circumflex
=> Lower
,
247 LC_A_Diaeresis
=> Lower
,
249 LC_AE_Diphthong
=> Lower
+ Basic
,
250 LC_C_Cedilla
=> Lower
,
253 LC_E_Circumflex
=> Lower
,
254 LC_E_Diaeresis
=> Lower
,
257 LC_I_Circumflex
=> Lower
,
258 LC_I_Diaeresis
=> Lower
,
259 LC_Icelandic_Eth
=> Lower
+ Basic
,
263 LC_O_Circumflex
=> Lower
,
265 LC_O_Diaeresis
=> Lower
,
267 Division_Sign
=> Special
,
269 LC_O_Oblique_Stroke
=> Lower
,
272 LC_U_Circumflex
=> Lower
,
273 LC_U_Diaeresis
=> Lower
,
275 LC_Icelandic_Thorn
=> Lower
+ Basic
,
276 LC_Y_Diaeresis
=> Lower
279 ---------------------
280 -- Is_Alphanumeric --
281 ---------------------
283 function Is_Alphanumeric
(Item
: Character) return Boolean is
285 return (Char_Map
(Item
) and Alphanum
) /= 0;
292 function Is_Basic
(Item
: Character) return Boolean is
294 return (Char_Map
(Item
) and Basic
) /= 0;
301 function Is_Character
(Item
: Wide_Character) return Boolean is
303 return Wide_Character'Pos (Item
) < 256;
310 function Is_Control
(Item
: Character) return Boolean is
312 return (Char_Map
(Item
) and Control
) /= 0;
319 function Is_Digit
(Item
: Character) return Boolean is
321 return Item
in '0' .. '9';
328 function Is_Graphic
(Item
: Character) return Boolean is
330 return (Char_Map
(Item
) and Graphic
) /= 0;
333 --------------------------
334 -- Is_Hexadecimal_Digit --
335 --------------------------
337 function Is_Hexadecimal_Digit
(Item
: Character) return Boolean is
339 return (Char_Map
(Item
) and Hex_Digit
) /= 0;
340 end Is_Hexadecimal_Digit
;
346 function Is_ISO_646
(Item
: Character) return Boolean is
348 return Item
in ISO_646
;
351 -- Note: much more efficient coding of the following function is possible
352 -- by testing several 16#80# bits in a complete word in a single operation
354 function Is_ISO_646
(Item
: String) return Boolean is
356 for J
in Item
'Range loop
357 if Item
(J
) not in ISO_646
then
369 function Is_Letter
(Item
: Character) return Boolean is
371 return (Char_Map
(Item
) and Letter
) /= 0;
374 ------------------------
375 -- Is_Line_Terminator --
376 ------------------------
378 function Is_Line_Terminator
(Item
: Character) return Boolean is
380 return (Char_Map
(Item
) and Line_Term
) /= 0;
381 end Is_Line_Terminator
;
387 function Is_Lower
(Item
: Character) return Boolean is
389 return (Char_Map
(Item
) and Lower
) /= 0;
396 function Is_Mark
(Item
: Character) return Boolean is
397 pragma Unreferenced
(Item
);
402 ---------------------
403 -- Is_Other_Format --
404 ---------------------
406 function Is_Other_Format
(Item
: Character) return Boolean is
408 return Item
= Soft_Hyphen
;
411 ------------------------------
412 -- Is_Punctuation_Connector --
413 ------------------------------
415 function Is_Punctuation_Connector
(Item
: Character) return Boolean is
418 end Is_Punctuation_Connector
;
424 function Is_Space
(Item
: Character) return Boolean is
426 return Item
= ' ' or else Item
= No_Break_Space
;
433 function Is_Special
(Item
: Character) return Boolean is
435 return (Char_Map
(Item
) and Special
) /= 0;
442 function Is_String
(Item
: Wide_String) return Boolean is
444 for J
in Item
'Range loop
445 if Wide_Character'Pos (Item
(J
)) >= 256 then
457 function Is_Upper
(Item
: Character) return Boolean is
459 return (Char_Map
(Item
) and Upper
) /= 0;
466 function To_Basic
(Item
: Character) return Character is
468 return Value
(Basic_Map
, Item
);
471 function To_Basic
(Item
: String) return String is
473 return Result
: String (1 .. Item
'Length) do
474 for J
in Item
'Range loop
475 Result
(J
- (Item
'First - 1)) := Value
(Basic_Map
, Item
(J
));
484 function To_Character
485 (Item
: Wide_Character;
486 Substitute
: Character := ' ') return Character
489 if Is_Character
(Item
) then
490 return Character'Val (Wide_Character'Pos (Item
));
502 Substitute
: ISO_646
:= ' ') return ISO_646
505 return (if Item
in ISO_646
then Item
else Substitute
);
510 Substitute
: ISO_646
:= ' ') return String
512 Result
: String (1 .. Item
'Length);
515 for J
in Item
'Range loop
516 Result
(J
- (Item
'First - 1)) :=
517 (if Item
(J
) in ISO_646
then Item
(J
) else Substitute
);
527 function To_Lower
(Item
: Character) return Character is
529 return Value
(Lower_Case_Map
, Item
);
532 function To_Lower
(Item
: String) return String is
534 return Result
: String (1 .. Item
'Length) do
535 for J
in Item
'Range loop
536 Result
(J
- (Item
'First - 1)) := Value
(Lower_Case_Map
, Item
(J
));
547 Substitute
: Character := ' ') return String
549 Result
: String (1 .. Item
'Length);
552 for J
in Item
'Range loop
553 Result
(J
- (Item
'First - 1)) := To_Character
(Item
(J
), Substitute
);
564 (Item
: Character) return Character
567 return Value
(Upper_Case_Map
, Item
);
571 (Item
: String) return String
574 return Result
: String (1 .. Item
'Length) do
575 for J
in Item
'Range loop
576 Result
(J
- (Item
'First - 1)) := Value
(Upper_Case_Map
, Item
(J
));
581 -----------------------
582 -- To_Wide_Character --
583 -----------------------
585 function To_Wide_Character
586 (Item
: Character) return Wide_Character
589 return Wide_Character'Val (Character'Pos (Item
));
590 end To_Wide_Character
;
596 function To_Wide_String
597 (Item
: String) return Wide_String
599 Result
: Wide_String (1 .. Item
'Length);
602 for J
in Item
'Range loop
603 Result
(J
- (Item
'First - 1)) := To_Wide_Character
(Item
(J
));
609 end Ada
.Characters
.Handling
;