1 ------------------------------------------------------------------------------
3 -- GNAT RUN-TIME COMPONENTS --
5 -- A D A . C H A R A C T E R S . H A N D L I N G --
9 -- Copyright (C) 1992-2024, Free Software Foundation, Inc. --
11 -- GNAT is free software; you can redistribute it and/or modify it under --
12 -- terms of the GNU General Public License as published by the Free Soft- --
13 -- ware Foundation; either version 3, or (at your option) any later ver- --
14 -- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
15 -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
16 -- or FITNESS FOR A PARTICULAR PURPOSE. --
18 -- As a special exception under Section 7 of GPL version 3, you are granted --
19 -- additional permissions described in the GCC Runtime Library Exception, --
20 -- version 3.1, as published by the Free Software Foundation. --
22 -- You should have received a copy of the GNU General Public License and --
23 -- a copy of the GCC Runtime Library Exception along with this program; --
24 -- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see --
25 -- <http://www.gnu.org/licenses/>. --
27 -- GNAT was originally developed by the GNAT team at New York University. --
28 -- Extensive contributions were provided by Ada Core Technologies Inc. --
30 ------------------------------------------------------------------------------
32 -- Loop invariants in this unit are meant for analysis only, not for run-time
33 -- checking, as it would be too costly otherwise. This is enforced by setting
34 -- the assertion policy to Ignore.
36 pragma Assertion_Policy
(Loop_Invariant
=> Ignore
);
38 with Ada
.Characters
.Latin_1
; use Ada
.Characters
.Latin_1
;
39 with Ada
.Strings
.Maps
; use Ada
.Strings
.Maps
;
40 with Ada
.Strings
.Maps
.Constants
; use Ada
.Strings
.Maps
.Constants
;
42 package body Ada
.Characters
.Handling
46 ------------------------------------
47 -- Character Classification Table --
48 ------------------------------------
50 type Character_Flags
is mod 256;
51 for Character_Flags
'Size use 8;
53 Control
: constant Character_Flags
:= 1;
54 Lower
: constant Character_Flags
:= 2;
55 Upper
: constant Character_Flags
:= 4;
56 Basic
: constant Character_Flags
:= 8;
57 Hex_Digit
: constant Character_Flags
:= 16;
58 Digit
: constant Character_Flags
:= 32;
59 Special
: constant Character_Flags
:= 64;
60 Line_Term
: constant Character_Flags
:= 128;
62 Letter
: constant Character_Flags
:= Lower
or Upper
;
63 Alphanum
: constant Character_Flags
:= Letter
or Digit
;
64 Graphic
: constant Character_Flags
:= Alphanum
or Special
;
66 Char_Map
: constant array (Character) of Character_Flags
:=
78 LF
=> Control
+ Line_Term
,
79 VT
=> Control
+ Line_Term
,
80 FF
=> Control
+ Line_Term
,
81 CR
=> Control
+ Line_Term
,
103 Exclamation
=> Special
,
104 Quotation
=> Special
,
105 Number_Sign
=> Special
,
106 Dollar_Sign
=> Special
,
107 Percent_Sign
=> Special
,
108 Ampersand
=> Special
,
109 Apostrophe
=> Special
,
110 Left_Parenthesis
=> Special
,
111 Right_Parenthesis
=> Special
,
113 Plus_Sign
=> Special
,
116 Full_Stop
=> Special
,
119 '0' .. '9' => Digit
+ Hex_Digit
,
122 Semicolon
=> Special
,
123 Less_Than_Sign
=> Special
,
124 Equals_Sign
=> Special
,
125 Greater_Than_Sign
=> Special
,
127 Commercial_At
=> Special
,
129 'A' .. 'F' => Upper
+ Basic
+ Hex_Digit
,
130 'G' .. 'Z' => Upper
+ Basic
,
132 Left_Square_Bracket
=> Special
,
133 Reverse_Solidus
=> Special
,
134 Right_Square_Bracket
=> Special
,
135 Circumflex
=> Special
,
139 'a' .. 'f' => Lower
+ Basic
+ Hex_Digit
,
140 'g' .. 'z' => Lower
+ Basic
,
142 Left_Curly_Bracket
=> Special
,
143 Vertical_Line
=> Special
,
144 Right_Curly_Bracket
=> Special
,
148 Reserved_128
=> Control
,
149 Reserved_129
=> Control
,
152 Reserved_132
=> Control
,
153 NEL
=> Control
+ Line_Term
,
175 Reserved_153
=> Control
,
183 No_Break_Space
=> Special
,
184 Inverted_Exclamation
=> Special
,
185 Cent_Sign
=> Special
,
186 Pound_Sign
=> Special
,
187 Currency_Sign
=> Special
,
189 Broken_Bar
=> Special
,
190 Section_Sign
=> Special
,
191 Diaeresis
=> Special
,
192 Copyright_Sign
=> Special
,
193 Feminine_Ordinal_Indicator
=> Special
,
194 Left_Angle_Quotation
=> Special
,
196 Soft_Hyphen
=> Special
,
197 Registered_Trade_Mark_Sign
=> Special
,
199 Degree_Sign
=> Special
,
200 Plus_Minus_Sign
=> Special
,
201 Superscript_Two
=> Special
,
202 Superscript_Three
=> Special
,
204 Micro_Sign
=> Special
,
205 Pilcrow_Sign
=> Special
,
206 Middle_Dot
=> Special
,
208 Superscript_One
=> Special
,
209 Masculine_Ordinal_Indicator
=> Special
,
210 Right_Angle_Quotation
=> Special
,
211 Fraction_One_Quarter
=> Special
,
212 Fraction_One_Half
=> Special
,
213 Fraction_Three_Quarters
=> Special
,
214 Inverted_Question
=> Special
,
218 UC_A_Circumflex
=> Upper
,
220 UC_A_Diaeresis
=> Upper
,
222 UC_AE_Diphthong
=> Upper
+ Basic
,
223 UC_C_Cedilla
=> Upper
,
226 UC_E_Circumflex
=> Upper
,
227 UC_E_Diaeresis
=> Upper
,
230 UC_I_Circumflex
=> Upper
,
231 UC_I_Diaeresis
=> Upper
,
232 UC_Icelandic_Eth
=> Upper
+ Basic
,
236 UC_O_Circumflex
=> Upper
,
238 UC_O_Diaeresis
=> Upper
,
240 Multiplication_Sign
=> Special
,
242 UC_O_Oblique_Stroke
=> Upper
,
245 UC_U_Circumflex
=> Upper
,
246 UC_U_Diaeresis
=> Upper
,
248 UC_Icelandic_Thorn
=> Upper
+ Basic
,
250 LC_German_Sharp_S
=> Lower
+ Basic
,
253 LC_A_Circumflex
=> Lower
,
255 LC_A_Diaeresis
=> Lower
,
257 LC_AE_Diphthong
=> Lower
+ Basic
,
258 LC_C_Cedilla
=> Lower
,
261 LC_E_Circumflex
=> Lower
,
262 LC_E_Diaeresis
=> Lower
,
265 LC_I_Circumflex
=> Lower
,
266 LC_I_Diaeresis
=> Lower
,
267 LC_Icelandic_Eth
=> Lower
+ Basic
,
271 LC_O_Circumflex
=> Lower
,
273 LC_O_Diaeresis
=> Lower
,
275 Division_Sign
=> Special
,
277 LC_O_Oblique_Stroke
=> Lower
,
280 LC_U_Circumflex
=> Lower
,
281 LC_U_Diaeresis
=> Lower
,
283 LC_Icelandic_Thorn
=> Lower
+ Basic
,
284 LC_Y_Diaeresis
=> Lower
287 ---------------------
288 -- Is_Alphanumeric --
289 ---------------------
291 function Is_Alphanumeric
(Item
: Character) return Boolean is
293 return (Char_Map
(Item
) and Alphanum
) /= 0;
300 function Is_Basic
(Item
: Character) return Boolean is
302 return (Char_Map
(Item
) and Basic
) /= 0;
309 function Is_Character
(Item
: Wide_Character) return Boolean is
310 (Wide_Character'Pos (Item
) < 256);
316 function Is_Control
(Item
: Character) return Boolean is
318 return (Char_Map
(Item
) and Control
) /= 0;
325 function Is_Digit
(Item
: Character) return Boolean is
327 return Item
in '0' .. '9';
334 function Is_Graphic
(Item
: Character) return Boolean is
336 return (Char_Map
(Item
) and Graphic
) /= 0;
339 --------------------------
340 -- Is_Hexadecimal_Digit --
341 --------------------------
343 function Is_Hexadecimal_Digit
(Item
: Character) return Boolean is
345 return (Char_Map
(Item
) and Hex_Digit
) /= 0;
346 end Is_Hexadecimal_Digit
;
352 function Is_ISO_646
(Item
: Character) return Boolean is
355 -- Note: much more efficient coding of the following function is possible
356 -- by testing several 16#80# bits in a complete word in a single operation
358 function Is_ISO_646
(Item
: String) return Boolean is
360 for J
in Item
'Range loop
361 if Item
(J
) not in ISO_646
then
364 pragma Loop_Invariant
365 (for all K
in Item
'First .. J
=> Is_ISO_646
(Item
(K
)));
375 function Is_Letter
(Item
: Character) return Boolean is
377 return (Char_Map
(Item
) and Letter
) /= 0;
380 ------------------------
381 -- Is_Line_Terminator --
382 ------------------------
384 function Is_Line_Terminator
(Item
: Character) return Boolean is
386 return (Char_Map
(Item
) and Line_Term
) /= 0;
387 end Is_Line_Terminator
;
393 function Is_Lower
(Item
: Character) return Boolean is
395 return (Char_Map
(Item
) and Lower
) /= 0;
402 function Is_Mark
(Item
: Character) return Boolean is
403 pragma Unreferenced
(Item
);
412 function Is_NFKC
(Item
: Character) return Boolean is
414 return Character'Pos (Item
) not in
415 160 |
168 |
170 |
175 |
178 |
179 |
180 |
181 |
184 |
185 |
186 |
419 ---------------------
420 -- Is_Other_Format --
421 ---------------------
423 function Is_Other_Format
(Item
: Character) return Boolean is
425 return Item
= Soft_Hyphen
;
428 ------------------------------
429 -- Is_Punctuation_Connector --
430 ------------------------------
432 function Is_Punctuation_Connector
(Item
: Character) return Boolean is
435 end Is_Punctuation_Connector
;
441 function Is_Space
(Item
: Character) return Boolean is
443 return Item
= ' ' or else Item
= No_Break_Space
;
450 function Is_Special
(Item
: Character) return Boolean is
452 return (Char_Map
(Item
) and Special
) /= 0;
459 function Is_String
(Item
: Wide_String) return Boolean is
461 for J
in Item
'Range loop
462 if Wide_Character'Pos (Item
(J
)) >= 256 then
465 pragma Loop_Invariant
466 (for all K
in Item
'First .. J
=> Is_Character
(Item
(K
)));
476 function Is_Upper
(Item
: Character) return Boolean is
478 return (Char_Map
(Item
) and Upper
) /= 0;
485 function To_Basic
(Item
: Character) return Character is
486 (Value
(Basic_Map
, Item
));
488 function To_Basic
(Item
: String) return String is
490 return Result
: String (1 .. Item
'Length) with Relaxed_Initialization
do
491 for J
in Item
'Range loop
492 Result
(J
- (Item
'First - 1)) := Value
(Basic_Map
, Item
(J
));
493 pragma Loop_Invariant
494 (Result
(1 .. J
- Item
'First + 1)'Initialized);
495 pragma Loop_Invariant
496 (for all K
in Item
'First .. J
=>
497 Result
(K
- (Item
'First - 1)) = To_Basic
(Item
(K
)));
506 function To_Character
507 (Item
: Wide_Character;
508 Substitute
: Character := ' ') return Character
511 if Is_Character
(Item
) then
512 return Character'Val (Wide_Character'Pos (Item
));
524 Substitute
: ISO_646
:= ' ') return ISO_646
525 is (if Item
in ISO_646
then Item
else Substitute
);
529 Substitute
: ISO_646
:= ' ') return String
532 return Result
: String (1 .. Item
'Length) with Relaxed_Initialization
do
533 for J
in Item
'Range loop
534 Result
(J
- (Item
'First - 1)) :=
535 (if Item
(J
) in ISO_646
then Item
(J
) else Substitute
);
536 pragma Loop_Invariant
537 (Result
(1 .. J
- Item
'First + 1)'Initialized);
538 pragma Loop_Invariant
539 (for all K
in Item
'First .. J
=>
540 Result
(K
- (Item
'First - 1)) =
541 To_ISO_646
(Item
(K
), Substitute
));
550 function To_Lower
(Item
: Character) return Character is
551 (Value
(Lower_Case_Map
, Item
));
553 function To_Lower
(Item
: String) return String is
555 return Result
: String (1 .. Item
'Length) with Relaxed_Initialization
do
556 for J
in Item
'Range loop
557 Result
(J
- (Item
'First - 1)) := Value
(Lower_Case_Map
, Item
(J
));
558 pragma Loop_Invariant
559 (Result
(1 .. J
- Item
'First + 1)'Initialized);
560 pragma Loop_Invariant
561 (for all K
in Item
'First .. J
=>
562 Result
(K
- (Item
'First - 1)) = To_Lower
(Item
(K
)));
573 Substitute
: Character := ' ') return String
576 return Result
: String (1 .. Item
'Length) with Relaxed_Initialization
do
577 for J
in Item
'Range loop
578 Result
(J
- (Item
'First - 1)) :=
579 To_Character
(Item
(J
), Substitute
);
580 pragma Loop_Invariant
581 (Result
(1 .. J
- (Item
'First - 1))'Initialized);
582 pragma Loop_Invariant
583 (for all K
in Item
'First .. J
=>
584 Result
(K
- (Item
'First - 1)) =
585 To_Character
(Item
(K
), Substitute
));
594 function To_Upper
(Item
: Character) return Character is
595 (Value
(Upper_Case_Map
, Item
));
598 (Item
: String) return String
601 return Result
: String (1 .. Item
'Length) with Relaxed_Initialization
do
602 for J
in Item
'Range loop
603 Result
(J
- (Item
'First - 1)) := Value
(Upper_Case_Map
, Item
(J
));
604 pragma Loop_Invariant
605 (Result
(1 .. J
- Item
'First + 1)'Initialized);
606 pragma Loop_Invariant
607 (for all K
in Item
'First .. J
=>
608 Result
(K
- (Item
'First - 1)) = To_Upper
(Item
(K
)));
613 -----------------------
614 -- To_Wide_Character --
615 -----------------------
617 function To_Wide_Character
618 (Item
: Character) return Wide_Character
621 return Wide_Character'Val (Character'Pos (Item
));
622 end To_Wide_Character
;
628 function To_Wide_String
629 (Item
: String) return Wide_String
632 return Result
: Wide_String (1 .. Item
'Length)
633 with Relaxed_Initialization
635 for J
in Item
'Range loop
636 Result
(J
- (Item
'First - 1)) := To_Wide_Character
(Item
(J
));
637 pragma Loop_Invariant
638 (Result
(1 .. J
- (Item
'First - 1))'Initialized);
639 pragma Loop_Invariant
640 (for all K
in Item
'First .. J
=>
641 Result
(K
- (Item
'First - 1)) = To_Wide_Character
(Item
(K
)));
646 end Ada
.Characters
.Handling
;