PR sanitizer/80403
[official-gcc.git] / gcc / ada / a-chahan.adb
blobf95a7bb0eaf69681997dcd1aca8808003b2bf4e7
1 ------------------------------------------------------------------------------
2 -- --
3 -- GNAT RUN-TIME COMPONENTS --
4 -- --
5 -- A D A . C H A R A C T E R S . H A N D L I N G --
6 -- --
7 -- B o d y --
8 -- --
9 -- Copyright (C) 1992-2013, Free Software Foundation, Inc. --
10 -- --
11 -- GNAT is free software; you can redistribute it and/or modify it under --
12 -- terms of the GNU General Public License as published by the Free Soft- --
13 -- ware Foundation; either version 3, or (at your option) any later ver- --
14 -- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
15 -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
16 -- or FITNESS FOR A PARTICULAR PURPOSE. --
17 -- --
18 -- As a special exception under Section 7 of GPL version 3, you are granted --
19 -- additional permissions described in the GCC Runtime Library Exception, --
20 -- version 3.1, as published by the Free Software Foundation. --
21 -- --
22 -- You should have received a copy of the GNU General Public License and --
23 -- a copy of the GCC Runtime Library Exception along with this program; --
24 -- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see --
25 -- <http://www.gnu.org/licenses/>. --
26 -- --
27 -- GNAT was originally developed by the GNAT team at New York University. --
28 -- Extensive contributions were provided by Ada Core Technologies Inc. --
29 -- --
30 ------------------------------------------------------------------------------
32 with Ada.Characters.Latin_1; use Ada.Characters.Latin_1;
33 with Ada.Strings.Maps; use Ada.Strings.Maps;
34 with Ada.Strings.Maps.Constants; use Ada.Strings.Maps.Constants;
36 package body Ada.Characters.Handling is
38 ------------------------------------
39 -- Character Classification Table --
40 ------------------------------------
42 type Character_Flags is mod 256;
43 for Character_Flags'Size use 8;
45 Control : constant Character_Flags := 1;
46 Lower : constant Character_Flags := 2;
47 Upper : constant Character_Flags := 4;
48 Basic : constant Character_Flags := 8;
49 Hex_Digit : constant Character_Flags := 16;
50 Digit : constant Character_Flags := 32;
51 Special : constant Character_Flags := 64;
52 Line_Term : constant Character_Flags := 128;
54 Letter : constant Character_Flags := Lower or Upper;
55 Alphanum : constant Character_Flags := Letter or Digit;
56 Graphic : constant Character_Flags := Alphanum or Special;
58 Char_Map : constant array (Character) of Character_Flags :=
60 NUL => Control,
61 SOH => Control,
62 STX => Control,
63 ETX => Control,
64 EOT => Control,
65 ENQ => Control,
66 ACK => Control,
67 BEL => Control,
68 BS => Control,
69 HT => Control,
70 LF => Control + Line_Term,
71 VT => Control + Line_Term,
72 FF => Control + Line_Term,
73 CR => Control + Line_Term,
74 SO => Control,
75 SI => Control,
77 DLE => Control,
78 DC1 => Control,
79 DC2 => Control,
80 DC3 => Control,
81 DC4 => Control,
82 NAK => Control,
83 SYN => Control,
84 ETB => Control,
85 CAN => Control,
86 EM => Control,
87 SUB => Control,
88 ESC => Control,
89 FS => Control,
90 GS => Control,
91 RS => Control,
92 US => Control,
94 Space => Special,
95 Exclamation => Special,
96 Quotation => Special,
97 Number_Sign => Special,
98 Dollar_Sign => Special,
99 Percent_Sign => Special,
100 Ampersand => Special,
101 Apostrophe => Special,
102 Left_Parenthesis => Special,
103 Right_Parenthesis => Special,
104 Asterisk => Special,
105 Plus_Sign => Special,
106 Comma => Special,
107 Hyphen => Special,
108 Full_Stop => Special,
109 Solidus => Special,
111 '0' .. '9' => Digit + Hex_Digit,
113 Colon => Special,
114 Semicolon => Special,
115 Less_Than_Sign => Special,
116 Equals_Sign => Special,
117 Greater_Than_Sign => Special,
118 Question => Special,
119 Commercial_At => Special,
121 'A' .. 'F' => Upper + Basic + Hex_Digit,
122 'G' .. 'Z' => Upper + Basic,
124 Left_Square_Bracket => Special,
125 Reverse_Solidus => Special,
126 Right_Square_Bracket => Special,
127 Circumflex => Special,
128 Low_Line => Special,
129 Grave => Special,
131 'a' .. 'f' => Lower + Basic + Hex_Digit,
132 'g' .. 'z' => Lower + Basic,
134 Left_Curly_Bracket => Special,
135 Vertical_Line => Special,
136 Right_Curly_Bracket => Special,
137 Tilde => Special,
139 DEL => Control,
140 Reserved_128 => Control,
141 Reserved_129 => Control,
142 BPH => Control,
143 NBH => Control,
144 Reserved_132 => Control,
145 NEL => Control + Line_Term,
146 SSA => Control,
147 ESA => Control,
148 HTS => Control,
149 HTJ => Control,
150 VTS => Control,
151 PLD => Control,
152 PLU => Control,
153 RI => Control,
154 SS2 => Control,
155 SS3 => Control,
157 DCS => Control,
158 PU1 => Control,
159 PU2 => Control,
160 STS => Control,
161 CCH => Control,
162 MW => Control,
163 SPA => Control,
164 EPA => Control,
166 SOS => Control,
167 Reserved_153 => Control,
168 SCI => Control,
169 CSI => Control,
170 ST => Control,
171 OSC => Control,
172 PM => Control,
173 APC => Control,
175 No_Break_Space => Special,
176 Inverted_Exclamation => Special,
177 Cent_Sign => Special,
178 Pound_Sign => Special,
179 Currency_Sign => Special,
180 Yen_Sign => Special,
181 Broken_Bar => Special,
182 Section_Sign => Special,
183 Diaeresis => Special,
184 Copyright_Sign => Special,
185 Feminine_Ordinal_Indicator => Special,
186 Left_Angle_Quotation => Special,
187 Not_Sign => Special,
188 Soft_Hyphen => Special,
189 Registered_Trade_Mark_Sign => Special,
190 Macron => Special,
191 Degree_Sign => Special,
192 Plus_Minus_Sign => Special,
193 Superscript_Two => Special,
194 Superscript_Three => Special,
195 Acute => Special,
196 Micro_Sign => Special,
197 Pilcrow_Sign => Special,
198 Middle_Dot => Special,
199 Cedilla => Special,
200 Superscript_One => Special,
201 Masculine_Ordinal_Indicator => Special,
202 Right_Angle_Quotation => Special,
203 Fraction_One_Quarter => Special,
204 Fraction_One_Half => Special,
205 Fraction_Three_Quarters => Special,
206 Inverted_Question => Special,
208 UC_A_Grave => Upper,
209 UC_A_Acute => Upper,
210 UC_A_Circumflex => Upper,
211 UC_A_Tilde => Upper,
212 UC_A_Diaeresis => Upper,
213 UC_A_Ring => Upper,
214 UC_AE_Diphthong => Upper + Basic,
215 UC_C_Cedilla => Upper,
216 UC_E_Grave => Upper,
217 UC_E_Acute => Upper,
218 UC_E_Circumflex => Upper,
219 UC_E_Diaeresis => Upper,
220 UC_I_Grave => Upper,
221 UC_I_Acute => Upper,
222 UC_I_Circumflex => Upper,
223 UC_I_Diaeresis => Upper,
224 UC_Icelandic_Eth => Upper + Basic,
225 UC_N_Tilde => Upper,
226 UC_O_Grave => Upper,
227 UC_O_Acute => Upper,
228 UC_O_Circumflex => Upper,
229 UC_O_Tilde => Upper,
230 UC_O_Diaeresis => Upper,
232 Multiplication_Sign => Special,
234 UC_O_Oblique_Stroke => Upper,
235 UC_U_Grave => Upper,
236 UC_U_Acute => Upper,
237 UC_U_Circumflex => Upper,
238 UC_U_Diaeresis => Upper,
239 UC_Y_Acute => Upper,
240 UC_Icelandic_Thorn => Upper + Basic,
242 LC_German_Sharp_S => Lower + Basic,
243 LC_A_Grave => Lower,
244 LC_A_Acute => Lower,
245 LC_A_Circumflex => Lower,
246 LC_A_Tilde => Lower,
247 LC_A_Diaeresis => Lower,
248 LC_A_Ring => Lower,
249 LC_AE_Diphthong => Lower + Basic,
250 LC_C_Cedilla => Lower,
251 LC_E_Grave => Lower,
252 LC_E_Acute => Lower,
253 LC_E_Circumflex => Lower,
254 LC_E_Diaeresis => Lower,
255 LC_I_Grave => Lower,
256 LC_I_Acute => Lower,
257 LC_I_Circumflex => Lower,
258 LC_I_Diaeresis => Lower,
259 LC_Icelandic_Eth => Lower + Basic,
260 LC_N_Tilde => Lower,
261 LC_O_Grave => Lower,
262 LC_O_Acute => Lower,
263 LC_O_Circumflex => Lower,
264 LC_O_Tilde => Lower,
265 LC_O_Diaeresis => Lower,
267 Division_Sign => Special,
269 LC_O_Oblique_Stroke => Lower,
270 LC_U_Grave => Lower,
271 LC_U_Acute => Lower,
272 LC_U_Circumflex => Lower,
273 LC_U_Diaeresis => Lower,
274 LC_Y_Acute => Lower,
275 LC_Icelandic_Thorn => Lower + Basic,
276 LC_Y_Diaeresis => Lower
279 ---------------------
280 -- Is_Alphanumeric --
281 ---------------------
283 function Is_Alphanumeric (Item : Character) return Boolean is
284 begin
285 return (Char_Map (Item) and Alphanum) /= 0;
286 end Is_Alphanumeric;
288 --------------
289 -- Is_Basic --
290 --------------
292 function Is_Basic (Item : Character) return Boolean is
293 begin
294 return (Char_Map (Item) and Basic) /= 0;
295 end Is_Basic;
297 ------------------
298 -- Is_Character --
299 ------------------
301 function Is_Character (Item : Wide_Character) return Boolean is
302 begin
303 return Wide_Character'Pos (Item) < 256;
304 end Is_Character;
306 ----------------
307 -- Is_Control --
308 ----------------
310 function Is_Control (Item : Character) return Boolean is
311 begin
312 return (Char_Map (Item) and Control) /= 0;
313 end Is_Control;
315 --------------
316 -- Is_Digit --
317 --------------
319 function Is_Digit (Item : Character) return Boolean is
320 begin
321 return Item in '0' .. '9';
322 end Is_Digit;
324 ----------------
325 -- Is_Graphic --
326 ----------------
328 function Is_Graphic (Item : Character) return Boolean is
329 begin
330 return (Char_Map (Item) and Graphic) /= 0;
331 end Is_Graphic;
333 --------------------------
334 -- Is_Hexadecimal_Digit --
335 --------------------------
337 function Is_Hexadecimal_Digit (Item : Character) return Boolean is
338 begin
339 return (Char_Map (Item) and Hex_Digit) /= 0;
340 end Is_Hexadecimal_Digit;
342 ----------------
343 -- Is_ISO_646 --
344 ----------------
346 function Is_ISO_646 (Item : Character) return Boolean is
347 begin
348 return Item in ISO_646;
349 end Is_ISO_646;
351 -- Note: much more efficient coding of the following function is possible
352 -- by testing several 16#80# bits in a complete word in a single operation
354 function Is_ISO_646 (Item : String) return Boolean is
355 begin
356 for J in Item'Range loop
357 if Item (J) not in ISO_646 then
358 return False;
359 end if;
360 end loop;
362 return True;
363 end Is_ISO_646;
365 ---------------
366 -- Is_Letter --
367 ---------------
369 function Is_Letter (Item : Character) return Boolean is
370 begin
371 return (Char_Map (Item) and Letter) /= 0;
372 end Is_Letter;
374 ------------------------
375 -- Is_Line_Terminator --
376 ------------------------
378 function Is_Line_Terminator (Item : Character) return Boolean is
379 begin
380 return (Char_Map (Item) and Line_Term) /= 0;
381 end Is_Line_Terminator;
383 --------------
384 -- Is_Lower --
385 --------------
387 function Is_Lower (Item : Character) return Boolean is
388 begin
389 return (Char_Map (Item) and Lower) /= 0;
390 end Is_Lower;
392 -------------
393 -- Is_Mark --
394 -------------
396 function Is_Mark (Item : Character) return Boolean is
397 pragma Unreferenced (Item);
398 begin
399 return False;
400 end Is_Mark;
402 ---------------------
403 -- Is_Other_Format --
404 ---------------------
406 function Is_Other_Format (Item : Character) return Boolean is
407 begin
408 return Item = Soft_Hyphen;
409 end Is_Other_Format;
411 ------------------------------
412 -- Is_Punctuation_Connector --
413 ------------------------------
415 function Is_Punctuation_Connector (Item : Character) return Boolean is
416 begin
417 return Item = '_';
418 end Is_Punctuation_Connector;
420 --------------
421 -- Is_Space --
422 --------------
424 function Is_Space (Item : Character) return Boolean is
425 begin
426 return Item = ' ' or else Item = No_Break_Space;
427 end Is_Space;
429 ----------------
430 -- Is_Special --
431 ----------------
433 function Is_Special (Item : Character) return Boolean is
434 begin
435 return (Char_Map (Item) and Special) /= 0;
436 end Is_Special;
438 ---------------
439 -- Is_String --
440 ---------------
442 function Is_String (Item : Wide_String) return Boolean is
443 begin
444 for J in Item'Range loop
445 if Wide_Character'Pos (Item (J)) >= 256 then
446 return False;
447 end if;
448 end loop;
450 return True;
451 end Is_String;
453 --------------
454 -- Is_Upper --
455 --------------
457 function Is_Upper (Item : Character) return Boolean is
458 begin
459 return (Char_Map (Item) and Upper) /= 0;
460 end Is_Upper;
462 --------------
463 -- To_Basic --
464 --------------
466 function To_Basic (Item : Character) return Character is
467 begin
468 return Value (Basic_Map, Item);
469 end To_Basic;
471 function To_Basic (Item : String) return String is
472 begin
473 return Result : String (1 .. Item'Length) do
474 for J in Item'Range loop
475 Result (J - (Item'First - 1)) := Value (Basic_Map, Item (J));
476 end loop;
477 end return;
478 end To_Basic;
480 ------------------
481 -- To_Character --
482 ------------------
484 function To_Character
485 (Item : Wide_Character;
486 Substitute : Character := ' ') return Character
488 begin
489 if Is_Character (Item) then
490 return Character'Val (Wide_Character'Pos (Item));
491 else
492 return Substitute;
493 end if;
494 end To_Character;
496 ----------------
497 -- To_ISO_646 --
498 ----------------
500 function To_ISO_646
501 (Item : Character;
502 Substitute : ISO_646 := ' ') return ISO_646
504 begin
505 return (if Item in ISO_646 then Item else Substitute);
506 end To_ISO_646;
508 function To_ISO_646
509 (Item : String;
510 Substitute : ISO_646 := ' ') return String
512 Result : String (1 .. Item'Length);
514 begin
515 for J in Item'Range loop
516 Result (J - (Item'First - 1)) :=
517 (if Item (J) in ISO_646 then Item (J) else Substitute);
518 end loop;
520 return Result;
521 end To_ISO_646;
523 --------------
524 -- To_Lower --
525 --------------
527 function To_Lower (Item : Character) return Character is
528 begin
529 return Value (Lower_Case_Map, Item);
530 end To_Lower;
532 function To_Lower (Item : String) return String is
533 begin
534 return Result : String (1 .. Item'Length) do
535 for J in Item'Range loop
536 Result (J - (Item'First - 1)) := Value (Lower_Case_Map, Item (J));
537 end loop;
538 end return;
539 end To_Lower;
541 ---------------
542 -- To_String --
543 ---------------
545 function To_String
546 (Item : Wide_String;
547 Substitute : Character := ' ') return String
549 Result : String (1 .. Item'Length);
551 begin
552 for J in Item'Range loop
553 Result (J - (Item'First - 1)) := To_Character (Item (J), Substitute);
554 end loop;
556 return Result;
557 end To_String;
559 --------------
560 -- To_Upper --
561 --------------
563 function To_Upper
564 (Item : Character) return Character
566 begin
567 return Value (Upper_Case_Map, Item);
568 end To_Upper;
570 function To_Upper
571 (Item : String) return String
573 begin
574 return Result : String (1 .. Item'Length) do
575 for J in Item'Range loop
576 Result (J - (Item'First - 1)) := Value (Upper_Case_Map, Item (J));
577 end loop;
578 end return;
579 end To_Upper;
581 -----------------------
582 -- To_Wide_Character --
583 -----------------------
585 function To_Wide_Character
586 (Item : Character) return Wide_Character
588 begin
589 return Wide_Character'Val (Character'Pos (Item));
590 end To_Wide_Character;
592 --------------------
593 -- To_Wide_String --
594 --------------------
596 function To_Wide_String
597 (Item : String) return Wide_String
599 Result : Wide_String (1 .. Item'Length);
601 begin
602 for J in Item'Range loop
603 Result (J - (Item'First - 1)) := To_Wide_Character (Item (J));
604 end loop;
606 return Result;
607 end To_Wide_String;
609 end Ada.Characters.Handling;