1 ------------------------------------------------------------------------------
3 -- GNAT RUN-TIME COMPONENTS --
5 -- A D A . C H A R A C T E R S . H A N D L I N G --
9 -- Copyright (C) 1992-2024, Free Software Foundation, Inc. --
11 -- This specification is derived from the Ada Reference Manual for use with --
12 -- GNAT. The copyright notice above, and the license provisions that follow --
13 -- apply solely to the contents of the part following the private keyword. --
15 -- GNAT is free software; you can redistribute it and/or modify it under --
16 -- terms of the GNU General Public License as published by the Free Soft- --
17 -- ware Foundation; either version 3, or (at your option) any later ver- --
18 -- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
19 -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
20 -- or FITNESS FOR A PARTICULAR PURPOSE. --
22 -- As a special exception under Section 7 of GPL version 3, you are granted --
23 -- additional permissions described in the GCC Runtime Library Exception, --
24 -- version 3.1, as published by the Free Software Foundation. --
26 -- You should have received a copy of the GNU General Public License and --
27 -- a copy of the GCC Runtime Library Exception along with this program; --
28 -- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see --
29 -- <http://www.gnu.org/licenses/>. --
31 -- GNAT was originally developed by the GNAT team at New York University. --
32 -- Extensive contributions were provided by Ada Core Technologies Inc. --
34 ------------------------------------------------------------------------------
36 -- Postconditions in this unit are meant for analysis only, not for run-time
37 -- checking, in order not to slow down the execution of these functions.
39 pragma Assertion_Policy
(Post
=> Ignore
);
41 with Ada
.Characters
.Latin_1
;
43 package Ada
.Characters
.Handling
with
48 -- In accordance with Ada 2005 AI-362
50 ----------------------------------------
51 -- Character Classification Functions --
52 ----------------------------------------
54 -- In the description below for each function that returns a Boolean
55 -- result, the effect is described in terms of the conditions under which
56 -- the value True is returned. If these conditions are not met, then the
57 -- function returns False.
59 -- Each of the following classification functions has a formal Character
60 -- parameter, Item, and returns a Boolean result.
62 function Is_Control
(Item
: Character) return Boolean
64 Post
=> Is_Control
'Result =
65 (Character'Pos (Item
) in 0 .. 31 |
127 .. 159);
66 -- True if Item is a control character. A control character is a character
67 -- whose position is in one of the ranges 0..31 or 127..159.
69 function Is_Graphic
(Item
: Character) return Boolean
71 Post
=> Is_Graphic
'Result =
72 (Character'Pos (Item
) in 32 .. 126 |
160 .. 255);
73 -- True if Item is a graphic character. A graphic character is a character
74 -- whose position is in one of the ranges 32..126 or 160..255.
76 function Is_Letter
(Item
: Character) return Boolean
78 Post
=> Is_Letter
'Result =
79 (Item
in 'A' .. 'Z' |
'a' .. 'z'
80 or else Character'Pos (Item
) in 192 .. 214 |
216 .. 246 |
248 .. 255);
81 -- True if Item is a letter. A letter is a character that is in one of the
82 -- ranges 'A'..'Z' or 'a'..'z', or whose position is in one of the ranges
83 -- 192..214, 216..246, or 248..255.
85 function Is_Lower
(Item
: Character) return Boolean
87 Post
=> Is_Lower
'Result =
89 or else Character'Pos (Item
) in 223 .. 246 |
248 .. 255);
90 -- True if Item is a lower-case letter. A lower-case letter is a character
91 -- that is in the range 'a'..'z', or whose position is in one of the ranges
92 -- 223..246 or 248..255.
94 function Is_Upper
(Item
: Character) return Boolean
96 Post
=> Is_Upper
'Result =
98 or else Character'Pos (Item
) in 192 .. 214 |
216 .. 222);
99 -- True if Item is an upper-case letter. An upper-case letter is a
100 -- character that is in the range 'A'..'Z' or whose position is in one
101 -- of the ranges 192..214 or 216..222.
103 function Is_Basic
(Item
: Character) return Boolean
105 Post
=> Is_Basic
'Result =
108 | Latin_1
.UC_AE_Diphthong
109 | Latin_1
.LC_AE_Diphthong
110 | Latin_1
.UC_Icelandic_Eth
111 | Latin_1
.LC_Icelandic_Eth
112 | Latin_1
.UC_Icelandic_Thorn
113 | Latin_1
.LC_Icelandic_Thorn
114 | Latin_1
.LC_German_Sharp_S
);
115 -- True if Item is a basic letter. A basic letter is a character that
116 -- is in one of the ranges 'A'..'Z' and 'a'..'z', or that is one of
117 -- the following: UC_AE_Diphthong, LC_AE_Diphthong, UC_Icelandic_Eth,
118 -- LC_Icelandic_Eth, UC_Icelandic_Thorn, LC_Icelandic_Thorn, or
119 -- LC_German_Sharp_S.
121 function Is_Digit
(Item
: Character) return Boolean
123 Post
=> Is_Digit
'Result = (Item
in '0' .. '9');
124 -- True if Item is a decimal digit. A decimal digit is a character in the
127 function Is_Decimal_Digit
(Item
: Character) return Boolean
130 function Is_Hexadecimal_Digit
(Item
: Character) return Boolean
132 Post
=> Is_Hexadecimal_Digit
'Result =
133 (Is_Decimal_Digit
(Item
) or Item
in 'A' .. 'F' |
'a' .. 'f');
134 -- True if Item is a hexadecimal digit. A hexadecimal digit is a character
135 -- that is either a decimal digit or that is in one of the ranges 'A'..'F'
138 function Is_Alphanumeric
(Item
: Character) return Boolean
140 Post
=> Is_Alphanumeric
'Result =
141 (Is_Letter
(Item
) or Is_Decimal_Digit
(Item
));
142 -- True if Item is an alphanumeric character. An alphanumeric character is
143 -- a character that is either a letter or a decimal digit.
145 function Is_Special
(Item
: Character) return Boolean
147 Post
=> Is_Special
'Result =
148 (Is_Graphic
(Item
) and not Is_Alphanumeric
(Item
));
149 -- True if Item is a special graphic character. A special graphic character
150 -- is a graphic character that is not alphanumeric.
152 function Is_Line_Terminator
(Item
: Character) return Boolean
154 Post
=> Is_Line_Terminator
'Result =
155 (Character'Pos (Item
) in 10 .. 13 |
133);
156 -- True if Item is a character with position 10..13 (Line_Feed,
157 -- Line_Tabulation, Form_Feed, Carriage_Return) or 133 (Next_Line).
159 function Is_Mark
(Item
: Character) return Boolean
161 Post
=> Is_Mark
'Result = False;
162 -- Never True (no value of type Character has categories Mark, Non-Spacing
163 -- or Mark, Spacing Combining).
165 function Is_Other_Format
(Item
: Character) return Boolean
167 Post
=> Is_Other_Format
'Result = (Character'Pos (Item
) = 173);
168 -- True if Item is a character with position 173 (Soft_Hyphen).
170 function Is_Punctuation_Connector
(Item
: Character) return Boolean
172 Post
=> Is_Punctuation_Connector
'Result =
173 (Character'Pos (Item
) = 95);
174 -- True if Item is a character with position 95 ('_', known as Low_Line or
177 function Is_Space
(Item
: Character) return Boolean
179 Post
=> Is_Space
'Result = (Character'Pos (Item
) in 32 |
160);
180 -- True if Item is a character with position 32 (' ') or 160
183 function Is_NFKC
(Item
: Character) return Boolean
185 Post
=> Is_NFKC
'Result =
186 (Character'Pos (Item
) not in
187 160 |
168 |
170 |
175 |
178 |
179 |
180
188 |
181 |
184 |
185 |
186 |
188 |
189 |
190);
189 -- True if Item could be present in a string normalized to Normalization
190 -- Form KC (as defined by Clause 21 of ISO/IEC 10646:2017); this includes
191 -- all characters except those with positions 160, 168, 170, 175, 178, 179,
192 -- 180, 181, 184, 185, 186, 188, 189, and 190.
194 ---------------------------------------------------
195 -- Conversion Functions for Character and String --
196 ---------------------------------------------------
198 -- Each of the names To_Lower, To_Upper, and To_Basic refers to two
199 -- functions: one that converts from Character to Character, and
200 -- the other that converts from String to String. The result of each
201 -- Character-to-Character function is described below, in terms of
202 -- the conversion applied to Item, its formal Character parameter. The
203 -- result of each String-to-String conversion is obtained by applying
204 -- to each element of the function's String parameter the corresponding
205 -- Character-to-Character conversion; the result is the null String if the
206 -- value of the formal parameter is the null String. The lower bound of the
207 -- result String is 1.
209 function To_Lower
(Item
: Character) return Character
211 Post
=> To_Lower
'Result =
212 (if Is_Upper
(Item
) then
213 Character'Val (Character'Pos (Item
) +
214 (if Item
in 'A' .. 'Z' then
215 Character'Pos ('a') - Character'Pos ('A')
217 Character'Pos (Latin_1
.LC_A_Grave
)
218 - Character'Pos (Latin_1
.UC_A_Grave
)))
221 -- Returns the corresponding lower-case value for Item if Is_Upper(Item),
222 -- and returns Item otherwise.
224 function To_Upper
(Item
: Character) return Character
226 Post
=> To_Upper
'Result =
228 and then Item
not in Latin_1
.LC_German_Sharp_S
229 | Latin_1
.LC_Y_Diaeresis
231 Character'Val (Character'Pos (Item
) +
232 (if Item
in 'A' .. 'Z' then
233 Character'Pos ('A') - Character'Pos ('a')
235 Character'Pos (Latin_1
.UC_A_Grave
)
236 - Character'Pos (Latin_1
.LC_A_Grave
)))
239 -- Returns the corresponding upper-case value for Item if Is_Lower(Item)
240 -- and Item has an upper-case form, and returns Item otherwise. The lower
241 -- case letters LC_German_Sharp_S and LC_Y_Diaeresis do not have upper case
244 function To_Basic
(Item
: Character) return Character
246 Post
=> To_Basic
'Result =
247 (if not Is_Letter
(Item
) or else Is_Basic
(Item
) then
251 when Latin_1
.UC_A_Grave
.. Latin_1
.UC_A_Ring
=> 'A',
252 when Latin_1
.UC_C_Cedilla
=> 'C',
253 when Latin_1
.UC_E_Grave
.. Latin_1
.UC_E_Diaeresis
=> 'E',
254 when Latin_1
.UC_I_Grave
.. Latin_1
.UC_I_Diaeresis
=> 'I',
255 when Latin_1
.UC_N_Tilde
=> 'N',
256 when Latin_1
.UC_O_Grave
.. Latin_1
.UC_O_Diaeresis
=> 'O',
257 when Latin_1
.UC_O_Oblique_Stroke
=> 'O',
258 when Latin_1
.UC_U_Grave
.. Latin_1
.UC_U_Diaeresis
=> 'U',
259 when Latin_1
.UC_Y_Acute
=> 'Y',
260 when Latin_1
.LC_A_Grave
.. Latin_1
.LC_A_Ring
=> 'a',
261 when Latin_1
.LC_C_Cedilla
=> 'c',
262 when Latin_1
.LC_E_Grave
.. Latin_1
.LC_E_Diaeresis
=> 'e',
263 when Latin_1
.LC_I_Grave
.. Latin_1
.LC_I_Diaeresis
=> 'i',
264 when Latin_1
.LC_N_Tilde
=> 'n',
265 when Latin_1
.LC_O_Grave
.. Latin_1
.LC_O_Diaeresis
=> 'o',
266 when Latin_1
.LC_O_Oblique_Stroke
=> 'o',
267 when Latin_1
.LC_U_Grave
.. Latin_1
.LC_U_Diaeresis
=> 'u',
268 when Latin_1
.LC_Y_Acute
=> 'y',
269 when Latin_1
.LC_Y_Diaeresis
=> 'y',
270 when others => raise Program_Error
));
271 -- Returns the letter corresponding to Item but with no diacritical mark,
272 -- if Item is a letter but not a basic letter; returns Item otherwise.
274 function To_Lower
(Item
: String) return String
276 Post
=> To_Lower
'Result'First = 1
277 and then To_Lower'Result'Length
= Item
'Length
279 (for all J
in To_Lower
'Result'Range =>
280 To_Lower'Result (J) = To_Lower (Item (Item'First + (J - 1))));
282 function To_Upper (Item : String) return String
284 Post => To_Upper'Result'First
= 1
285 and then To_Upper
'Result'Length = Item'Length
287 (for all J in To_Upper'Result'Range =>
288 To_Upper
'Result (J
) = To_Upper
(Item
(Item
'First + (J
- 1))));
290 function To_Basic
(Item
: String) return String
292 Post
=> To_Basic
'Result'First = 1
293 and then To_Basic'Result'Length
= Item
'Length
295 (for all J
in To_Basic
'Result'Range =>
296 To_Basic'Result (J) = To_Basic (Item (Item'First + (J - 1))));
298 ----------------------------------------------------------------------
299 -- Classifications of and Conversions Between Character and ISO 646 --
300 ----------------------------------------------------------------------
302 -- The following set of functions test for membership in the ISO 646
303 -- character range, or convert between ISO 646 and Character.
306 Character range Character'Val (0) .. Character'Val (127);
308 function Is_ISO_646 (Item : Character) return Boolean
310 Post => Is_ISO_646'Result = (Item in ISO_646);
311 -- The function whose formal parameter, Item, is of type Character returns
312 -- True if Item is in the subtype ISO_646.
314 function Is_ISO_646 (Item : String) return Boolean
316 Post => Is_ISO_646'Result =
317 (for all J in Item'Range => Is_ISO_646 (Item (J)));
318 -- The function whose formal parameter, Item, is of type String returns
319 -- True if Is_ISO_646(Item(I)) is True for each I in Item'Range.
323 Substitute : ISO_646 := ' ') return ISO_646
325 Post => To_ISO_646'Result =
326 (if Is_ISO_646 (Item) then Item else Substitute);
327 -- The function whose first formal parameter, Item, is of type Character
328 -- returns Item if Is_ISO_646(Item), and returns the Substitute ISO_646
329 -- character otherwise.
333 Substitute : ISO_646 := ' ') return String
335 Post => To_ISO_646'Result'First
= 1
336 and then To_ISO_646
'Result'Length = Item'Length
338 (for all J in To_ISO_646'Result'Range =>
339 To_ISO_646
'Result (J
) =
340 To_ISO_646
(Item
(Item
'First + (J
- 1)), Substitute
));
341 -- The function whose first formal parameter, Item, is of type String
342 -- returns the String whose Range is 1..Item'Length and each of whose
343 -- elements is given by To_ISO_646 of the corresponding element in Item.
345 ------------------------------------------------------
346 -- Classifications of Wide_Character and Characters --
347 ------------------------------------------------------
349 -- Ada 2005 AI 395: these functions are moved to Ada.Characters.Conversions
350 -- and are considered obsolete in Ada.Characters.Handling. However we do
351 -- not complain about this obsolescence, since in practice it is necessary
352 -- to use these routines when creating code that is intended to run in
353 -- either Ada 95 or Ada 2005 mode.
355 -- We do however have to flag these if the pragma No_Obsolescent_Features
356 -- restriction is active (see Restrict.Check_Obsolescent_2005_Entity).
358 function Is_Character
(Item
: Wide_Character) return Boolean
360 Post
=> Is_Character
'Result =
361 (Wide_Character'Pos (Item
) <= Character'Pos (Character'Last));
362 -- Returns True if Wide_Character'Pos(Item) <=
363 -- Character'Pos(Character'Last).
365 function Is_String
(Item
: Wide_String) return Boolean
367 Post
=> Is_String
'Result =
368 (for all I
in Item
'Range => Is_Character
(Item
(I
)));
369 -- Returns True if Is_Character(Item(I)) is True for each I in Item'Range.
371 ------------------------------------------------------
372 -- Conversions between Wide_Character and Character --
373 ------------------------------------------------------
375 -- Ada 2005 AI 395: these functions are moved to Ada.Characters.Conversions
376 -- and are considered obsolete in Ada.Characters.Handling. However we do
377 -- not complain about this obsolescence, since in practice it is necessary
378 -- to use these routines when creating code that is intended to run in
379 -- either Ada 95 or Ada 2005 mode.
381 -- We do however have to flag these if the pragma No_Obsolescent_Features
382 -- restriction is active (see Restrict.Check_Obsolescent_2005_Entity).
384 function To_Character
385 (Item
: Wide_Character;
386 Substitute
: Character := ' ') return Character
388 Post
=> To_Character
'Result =
389 (if Is_Character
(Item
) then
390 Character'Val (Wide_Character'Pos (Item
))
393 -- Returns the Character corresponding to Item if Is_Character(Item), and
394 -- returns the Substitute Character otherwise.
398 Substitute
: Character := ' ') return String
400 Post
=> To_String
'Result'First = 1
401 and then To_String'Result'Length
= Item
'Length
403 (for all J
in To_String
'Result'Range =>
404 To_String'Result (J) =
405 To_Character (Item (Item'First + (J - 1)), Substitute));
406 -- Returns the String whose range is 1..Item'Length and each of whose
407 -- elements is given by To_Character of the corresponding element in Item.
409 function To_Wide_Character
410 (Item : Character) return Wide_Character
412 Post => To_Wide_Character'Result =
413 Wide_Character'Val (Character'Pos (Item));
414 -- Returns the Wide_Character X such that Character'Pos(Item) =
415 -- Wide_Character'Pos (X).
417 function To_Wide_String
418 (Item : String) return Wide_String
420 Post => To_Wide_String'Result'First
= 1
421 and then To_Wide_String
'Result'Length = Item'Length
423 (for all J in To_Wide_String'Result'Range =>
424 To_Wide_String
'Result (J
) =
425 To_Wide_Character
(Item
(Item
'First + (J
- 1))));
426 -- Returns the Wide_String whose range is 1..Item'Length and each of whose
427 -- elements is given by To_Wide_Character of the corresponding element in
431 pragma Inline
(Is_Alphanumeric
);
432 pragma Inline
(Is_Basic
);
433 pragma Inline
(Is_Character
);
434 pragma Inline
(Is_Control
);
435 pragma Inline
(Is_Digit
);
436 pragma Inline
(Is_Graphic
);
437 pragma Inline
(Is_Hexadecimal_Digit
);
438 pragma Inline
(Is_ISO_646
);
439 pragma Inline
(Is_Letter
);
440 pragma Inline
(Is_Line_Terminator
);
441 pragma Inline
(Is_Lower
);
442 pragma Inline
(Is_Mark
);
443 pragma Inline
(Is_Other_Format
);
444 pragma Inline
(Is_Punctuation_Connector
);
445 pragma Inline
(Is_Space
);
446 pragma Inline
(Is_Special
);
447 pragma Inline
(Is_Upper
);
448 pragma Inline
(To_Basic
);
449 pragma Inline
(To_Character
);
450 pragma Inline
(To_Lower
);
451 pragma Inline
(To_Upper
);
452 pragma Inline
(To_Wide_Character
);
454 end Ada
.Characters
.Handling
;