1 ------------------------------------------------------------------------------
3 -- GNAT RUN-TIME COMPONENTS --
5 -- A D A . S T R I N G S . M A P S --
9 -- Copyright (C) 1992-2023, Free Software Foundation, Inc. --
11 -- This specification is derived from the Ada Reference Manual for use with --
12 -- GNAT. The copyright notice above, and the license provisions that follow --
13 -- apply solely to the contents of the part following the private keyword. --
15 -- GNAT is free software; you can redistribute it and/or modify it under --
16 -- terms of the GNU General Public License as published by the Free Soft- --
17 -- ware Foundation; either version 3, or (at your option) any later ver- --
18 -- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
19 -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
20 -- or FITNESS FOR A PARTICULAR PURPOSE. --
22 -- As a special exception under Section 7 of GPL version 3, you are granted --
23 -- additional permissions described in the GCC Runtime Library Exception, --
24 -- version 3.1, as published by the Free Software Foundation. --
26 -- You should have received a copy of the GNU General Public License and --
27 -- a copy of the GCC Runtime Library Exception along with this program; --
28 -- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see --
29 -- <http://www.gnu.org/licenses/>. --
31 -- GNAT was originally developed by the GNAT team at New York University. --
32 -- Extensive contributions were provided by Ada Core Technologies Inc. --
34 ------------------------------------------------------------------------------
36 -- The package Strings.Maps defines the types, operations, and other entities
37 -- needed for character sets and character-to-character mappings.
39 -- Preconditions in this unit are meant for analysis only, not for run-time
40 -- checking, so that the expected exceptions are raised. This is enforced by
41 -- setting the corresponding assertion policy to Ignore. Postconditions and
42 -- ghost code should not be executed at runtime as well, in order not to slow
43 -- down the execution of these functions.
45 pragma Assertion_Policy
(Pre
=> Ignore
,
49 with Ada
.Characters
.Latin_1
;
51 package Ada
.Strings
.Maps
with
56 -- In accordance with Ada 2005 AI-362
58 --------------------------------
59 -- Character Set Declarations --
60 --------------------------------
62 type Character_Set
is private;
63 pragma Preelaborable_Initialization
(Character_Set
);
64 -- An object of type Character_Set represents a set of characters.
66 Null_Set
: constant Character_Set
;
67 -- Null_Set represents the set containing no characters.
69 ---------------------------
70 -- Constructors for Sets --
71 ---------------------------
73 type Character_Range
is record
77 -- An object Obj of type Character_Range represents the set of characters
78 -- in the range Obj.Low .. Obj.High.
80 type Character_Ranges
is array (Positive range <>) of Character_Range
;
81 -- An object Obj of type Character_Ranges represents the union of the sets
82 -- corresponding to Obj(I) for I in Obj'Range.
84 function To_Set
(Ranges
: Character_Ranges
) return Character_Set
with
86 (if Ranges
'Length = 0 then To_Set
'Result = Null_Set
)
88 (for all Char
in Character =>
89 (if Is_In
(Char
, To_Set
'Result)
90 then (for some Span
of Ranges
=> Char
in Span
.Low
.. Span
.High
)))
92 (for all Span
of Ranges
=>
93 (for all Char
in Span
.Low
.. Span
.High
=>
94 Is_In
(Char
, To_Set
'Result)));
95 -- If Ranges'Length=0 then Null_Set is returned; otherwise, the returned
96 -- value represents the set corresponding to Ranges.
98 function To_Set
(Span
: Character_Range
) return Character_Set
with
100 (if Span
.High
< Span
.Low
then To_Set
'Result = Null_Set
)
102 (for all Char
in Character =>
103 (if Is_In
(Char
, To_Set
'Result) then Char
in Span
.Low
.. Span
.High
))
105 (for all Char
in Span
.Low
.. Span
.High
=> Is_In
(Char
, To_Set
'Result));
106 -- The returned value represents the set containing each character in Span.
108 function To_Ranges
(Set
: Character_Set
) return Character_Ranges
with
110 (if Set
= Null_Set
then To_Ranges
'Result'Length = 0)
112 (for all Char in Character =>
113 (if Is_In (Char, Set)
115 (for some Span of To_Ranges'Result =>
116 Char in Span.Low .. Span.High)))
118 (for all Span of To_Ranges'Result =>
119 (for all Char in Span.Low .. Span.High => Is_In (Char, Set)));
120 -- If Set = Null_Set, then an empty Character_Ranges array is returned;
121 -- otherwise, the shortest array of contiguous ranges of Character values
122 -- in Set, in increasing order of Low, is returned.
124 -- The postcondition above does not express that the result is the shortest
125 -- array and that it is sorted.
127 ----------------------------------
128 -- Operations on Character Sets --
129 ----------------------------------
131 function "=" (Left, Right : Character_Set) return Boolean with
135 (for all Char
in Character =>
136 (Is_In
(Char
, Left
) = Is_In
(Char
, Right
)));
137 -- The function "=" returns True if Left and Right represent identical
138 -- sets, and False otherwise.
140 -- Each of the logical operators "not", "and", "or", and "xor" returns a
141 -- Character_Set value that represents the set obtained by applying the
142 -- corresponding operation to the set(s) represented by the parameter(s)
145 function "not" (Right
: Character_Set
) return Character_Set
with
147 (for all Char
in Character =>
148 (Is_In
(Char
, "not"'Result)
150 not Is_In (Char, Right)));
152 function "and" (Left, Right : Character_Set) return Character_Set with
154 (for all Char in Character =>
155 (Is_In (Char, "and"'Result
)
157 (Is_In
(Char
, Left
) and Is_In
(Char
, Right
))));
159 function "or" (Left
, Right
: Character_Set
) return Character_Set
with
161 (for all Char
in Character =>
162 (Is_In
(Char
, "or"'Result)
164 (Is_In (Char, Left) or Is_In (Char, Right))));
166 function "xor" (Left, Right : Character_Set) return Character_Set with
168 (for all Char in Character =>
169 (Is_In (Char, "xor"'Result
)
171 (Is_In
(Char
, Left
) xor Is_In
(Char
, Right
))));
173 function "-" (Left
, Right
: Character_Set
) return Character_Set
with
175 (for all Char
in Character =>
176 (Is_In
(Char
, "-"'Result)
178 (Is_In (Char, Left) and not Is_In (Char, Right))));
179 -- "-"(Left, Right) is equivalent to "and"(Left, "not"(Right)).
182 (Element : Character;
183 Set : Character_Set) return Boolean;
184 -- Is_In returns True if Element is in Set, and False otherwise.
187 (Elements : Character_Set;
188 Set : Character_Set) return Boolean
193 (for all Char in Character =>
194 (if Is_In (Char, Elements) then Is_In (Char, Set)));
195 -- Is_Subset returns True if Elements is a subset of Set, and False
199 (Left : Character_Set;
200 Right : Character_Set) return Boolean
203 subtype Character_Sequence is String;
204 -- The Character_Sequence subtype is used to portray a set of character
205 -- values and also to identify the domain and range of a character mapping.
207 function SPARK_Proof_Sorted_Character_Sequence
208 (Seq : Character_Sequence) return Boolean
210 (for all J in Seq'Range =>
211 (if J /= Seq'Last then Seq (J) < Seq (J + 1)))
214 -- Check whether the Character_Sequence is sorted in stricly increasing
215 -- order, as expected from the result of To_Sequence and To_Domain.
217 -- Sequence portrays the set of character values that it explicitly
218 -- contains (ignoring duplicates). Singleton portrays the set comprising a
219 -- single Character. Each of the To_Set functions returns a Character_Set
220 -- value that represents the set portrayed by Sequence or Singleton.
222 function To_Set (Sequence : Character_Sequence) return Character_Set with
224 (if Sequence'Length = 0 then To_Set'Result = Null_Set)
226 (for all Char in Character =>
227 (if Is_In (Char, To_Set'Result)
228 then (for some X of Sequence => Char = X)))
230 (for all Char of Sequence => Is_In (Char, To_Set'Result));
232 function To_Set (Singleton : Character) return Character_Set with
234 Is_In (Singleton, To_Set'Result)
236 (for all Char in Character =>
237 (if Char /= Singleton
238 then not Is_In (Char, To_Set'Result)));
240 function To_Sequence (Set : Character_Set) return Character_Sequence with
242 (if Set = Null_Set then To_Sequence'Result'Length
= 0)
244 (for all Char
in Character =>
245 (if Is_In
(Char
, Set
)
246 then (for some X
of To_Sequence
'Result => Char
= X
)))
248 (for all Char
of To_Sequence
'Result => Is_In
(Char
, Set
))
250 SPARK_Proof_Sorted_Character_Sequence
(To_Sequence
'Result);
251 -- The function To_Sequence returns a Character_Sequence value containing
252 -- each of the characters in the set represented by Set, in ascending order
253 -- with no duplicates.
255 ------------------------------------
256 -- Character Mapping Declarations --
257 ------------------------------------
259 type Character_Mapping
is private;
260 pragma Preelaborable_Initialization
(Character_Mapping
);
261 -- An object of type Character_Mapping represents a Character-to-Character
264 type SPARK_Proof_Character_Mapping_Model
is
265 array (Character) of Character
267 -- Publicly visible model of a Character_Mapping
269 function SPARK_Proof_Model
270 (Map
: Character_Mapping
)
271 return SPARK_Proof_Character_Mapping_Model
273 -- Creation of a publicly visible model of a Character_Mapping
276 (Map
: Character_Mapping
;
277 Element
: Character) return Character
279 Post
=> Value
'Result = SPARK_Proof_Model
(Map
) (Element
);
280 -- The function Value returns the Character value to which Element maps
281 -- with respect to the mapping represented by Map.
283 -- A character C matches a pattern character P with respect to a given
284 -- Character_Mapping value Map if Value(Map, C) = P. A string S matches
285 -- a pattern string P with respect to a given Character_Mapping if
286 -- their lengths are the same and if each character in S matches its
287 -- corresponding character in the pattern string P.
289 -- String handling subprograms that deal with character mappings have
290 -- parameters whose type is Character_Mapping.
292 Identity
: constant Character_Mapping
;
293 -- Identity maps each Character to itself.
295 ----------------------------
296 -- Operations on Mappings --
297 ----------------------------
300 (From
, To
: Character_Sequence
) return Character_Mapping
303 From
'Length = To
'Length
305 (for all J
in From
'Range =>
306 (for all K
in From
'Range =>
307 (if J
/= K
then From
(J
) /= From
(K
)))),
309 (if From
= To
then To_Mapping
'Result = Identity
)
311 (for all Char
in Character =>
312 ((for all J
in From
'Range =>
314 then Value
(To_Mapping
'Result, Char
)
315 = To
(J
- From
'First + To
'First)))
317 (if (for all X
of From
=> Char
/= X
)
318 then Value
(To_Mapping
'Result, Char
) = Char
)));
319 -- To_Mapping produces a Character_Mapping such that each element of From
320 -- maps to the corresponding element of To, and each other character maps
321 -- to itself. If From'Length /= To'Length, or if some character is repeated
322 -- in From, then Translation_Error is propagated.
325 (Map
: Character_Mapping
) return Character_Sequence
with
327 (if Map
= Identity
then To_Domain
'Result'Length = 0)
329 To_Domain'Result'First
= 1
331 SPARK_Proof_Sorted_Character_Sequence
(To_Domain
'Result)
333 (for all Char
in Character =>
334 (if (for all X
of To_Domain
'Result => X
/= Char
)
335 then Value
(Map
, Char
) = Char
))
337 (for all Char
of To_Domain
'Result => Value
(Map
, Char
) /= Char
);
338 -- To_Domain returns the shortest Character_Sequence value D such that each
339 -- character not in D maps to itself, and such that the characters in D are
340 -- in ascending order. The lower bound of D is 1.
343 (Map
: Character_Mapping
) return Character_Sequence
with
345 To_Range
'Result'First = 1
347 To_Range'Result'Length
= To_Domain
(Map
)'Length
349 (for all J
in To_Range
'Result'Range =>
350 To_Range'Result (J) = Value (Map, To_Domain (Map) (J)));
351 -- To_Range returns the Character_Sequence value R, such that if D =
352 -- To_Domain(Map), then R has the same bounds as D, and D(I) maps to
353 -- R(I) for each I in D'Range.
355 -- A direct encoding of the Ada RM would be the postcondition
356 -- To_Range'Result'Last
= To_Domain
(Map
)'Last
357 -- which is not provable unless the postcondition of To_Domain is also
358 -- strengthened to state the value of the high bound for an empty result.
360 type Character_Mapping_Function
is
361 access function (From
: Character) return Character;
362 -- An object F of type Character_Mapping_Function maps a Character value C
363 -- to the Character value F.all(C), which is said to match C with respect
364 -- to mapping function F.
367 pragma Inline
(Is_In
);
368 pragma Inline
(Value
);
370 type Character_Set_Internal
is array (Character) of Boolean;
371 pragma Pack
(Character_Set_Internal
);
373 type Character_Set
is new Character_Set_Internal
;
374 -- Note: the reason for this level of derivation is to make sure
375 -- that the predefined logical operations on this type remain
376 -- accessible. The operations on Character_Set are overridden by
377 -- the defined operations in the spec, but the operations defined
378 -- on Character_Set_Internal remain visible.
380 Null_Set
: constant Character_Set
:= [others => False];
382 type Character_Mapping
is array (Character) of Character;
384 function SPARK_Proof_Model
385 (Map
: Character_Mapping
)
386 return SPARK_Proof_Character_Mapping_Model
388 (SPARK_Proof_Character_Mapping_Model
(Map
));
390 package L
renames Ada
.Characters
.Latin_1
;
392 Identity
: constant Character_Mapping
:=
426 L
.Exclamation
& -- '!' 33
427 L
.Quotation
& -- '"' 34
428 L
.Number_Sign
& -- '#' 35
429 L
.Dollar_Sign
& -- '$' 36
430 L
.Percent_Sign
& -- '%' 37
431 L
.Ampersand
& -- '&' 38
432 L
.Apostrophe
& -- ''' 39
433 L
.Left_Parenthesis
& -- '(' 40
434 L
.Right_Parenthesis
& -- ')' 41
435 L
.Asterisk
& -- '*' 42
436 L
.Plus_Sign
& -- '+' 43
439 L
.Full_Stop
& -- '.' 46
440 L
.Solidus
& -- '/' 47
452 L
.Semicolon
& -- ';' 59
453 L
.Less_Than_Sign
& -- '<' 60
454 L
.Equals_Sign
& -- '=' 61
455 L
.Greater_Than_Sign
& -- '>' 62
456 L
.Question
& -- '?' 63
457 L
.Commercial_At
& -- '@' 64
484 L
.Left_Square_Bracket
& -- '[' 91
485 L
.Reverse_Solidus
& -- '\' 92
486 L
.Right_Square_Bracket
& -- ']' 93
487 L
.Circumflex
& -- '^' 94
488 L
.Low_Line
& -- '_' 95
516 L
.Left_Curly_Bracket
& -- '{' 123
517 L
.Vertical_Line
& -- '|' 124
518 L
.Right_Curly_Bracket
& -- '}' 125
521 L
.Reserved_128
& -- Reserved_128 128
522 L
.Reserved_129
& -- Reserved_129 129
525 L
.Reserved_132
& -- Reserved_132 132
546 L
.Reserved_153
& -- Reserved_153 153
553 L
.No_Break_Space
& -- No_Break_Space 160
554 L
.Inverted_Exclamation
& -- Inverted_Exclamation 161
555 L
.Cent_Sign
& -- Cent_Sign 162
556 L
.Pound_Sign
& -- Pound_Sign 163
557 L
.Currency_Sign
& -- Currency_Sign 164
558 L
.Yen_Sign
& -- Yen_Sign 165
559 L
.Broken_Bar
& -- Broken_Bar 166
560 L
.Section_Sign
& -- Section_Sign 167
561 L
.Diaeresis
& -- Diaeresis 168
562 L
.Copyright_Sign
& -- Copyright_Sign 169
563 L
.Feminine_Ordinal_Indicator
& -- Feminine_Ordinal_Indicator 170
564 L
.Left_Angle_Quotation
& -- Left_Angle_Quotation 171
565 L
.Not_Sign
& -- Not_Sign 172
566 L
.Soft_Hyphen
& -- Soft_Hyphen 173
567 L
.Registered_Trade_Mark_Sign
& -- Registered_Trade_Mark_Sign 174
568 L
.Macron
& -- Macron 175
569 L
.Degree_Sign
& -- Degree_Sign 176
570 L
.Plus_Minus_Sign
& -- Plus_Minus_Sign 177
571 L
.Superscript_Two
& -- Superscript_Two 178
572 L
.Superscript_Three
& -- Superscript_Three 179
573 L
.Acute
& -- Acute 180
574 L
.Micro_Sign
& -- Micro_Sign 181
575 L
.Pilcrow_Sign
& -- Pilcrow_Sign 182
576 L
.Middle_Dot
& -- Middle_Dot 183
577 L
.Cedilla
& -- Cedilla 184
578 L
.Superscript_One
& -- Superscript_One 185
579 L
.Masculine_Ordinal_Indicator
& -- Masculine_Ordinal_Indicator 186
580 L
.Right_Angle_Quotation
& -- Right_Angle_Quotation 187
581 L
.Fraction_One_Quarter
& -- Fraction_One_Quarter 188
582 L
.Fraction_One_Half
& -- Fraction_One_Half 189
583 L
.Fraction_Three_Quarters
& -- Fraction_Three_Quarters 190
584 L
.Inverted_Question
& -- Inverted_Question 191
585 L
.UC_A_Grave
& -- UC_A_Grave 192
586 L
.UC_A_Acute
& -- UC_A_Acute 193
587 L
.UC_A_Circumflex
& -- UC_A_Circumflex 194
588 L
.UC_A_Tilde
& -- UC_A_Tilde 195
589 L
.UC_A_Diaeresis
& -- UC_A_Diaeresis 196
590 L
.UC_A_Ring
& -- UC_A_Ring 197
591 L
.UC_AE_Diphthong
& -- UC_AE_Diphthong 198
592 L
.UC_C_Cedilla
& -- UC_C_Cedilla 199
593 L
.UC_E_Grave
& -- UC_E_Grave 200
594 L
.UC_E_Acute
& -- UC_E_Acute 201
595 L
.UC_E_Circumflex
& -- UC_E_Circumflex 202
596 L
.UC_E_Diaeresis
& -- UC_E_Diaeresis 203
597 L
.UC_I_Grave
& -- UC_I_Grave 204
598 L
.UC_I_Acute
& -- UC_I_Acute 205
599 L
.UC_I_Circumflex
& -- UC_I_Circumflex 206
600 L
.UC_I_Diaeresis
& -- UC_I_Diaeresis 207
601 L
.UC_Icelandic_Eth
& -- UC_Icelandic_Eth 208
602 L
.UC_N_Tilde
& -- UC_N_Tilde 209
603 L
.UC_O_Grave
& -- UC_O_Grave 210
604 L
.UC_O_Acute
& -- UC_O_Acute 211
605 L
.UC_O_Circumflex
& -- UC_O_Circumflex 212
606 L
.UC_O_Tilde
& -- UC_O_Tilde 213
607 L
.UC_O_Diaeresis
& -- UC_O_Diaeresis 214
608 L
.Multiplication_Sign
& -- Multiplication_Sign 215
609 L
.UC_O_Oblique_Stroke
& -- UC_O_Oblique_Stroke 216
610 L
.UC_U_Grave
& -- UC_U_Grave 217
611 L
.UC_U_Acute
& -- UC_U_Acute 218
612 L
.UC_U_Circumflex
& -- UC_U_Circumflex 219
613 L
.UC_U_Diaeresis
& -- UC_U_Diaeresis 220
614 L
.UC_Y_Acute
& -- UC_Y_Acute 221
615 L
.UC_Icelandic_Thorn
& -- UC_Icelandic_Thorn 222
616 L
.LC_German_Sharp_S
& -- LC_German_Sharp_S 223
617 L
.LC_A_Grave
& -- LC_A_Grave 224
618 L
.LC_A_Acute
& -- LC_A_Acute 225
619 L
.LC_A_Circumflex
& -- LC_A_Circumflex 226
620 L
.LC_A_Tilde
& -- LC_A_Tilde 227
621 L
.LC_A_Diaeresis
& -- LC_A_Diaeresis 228
622 L
.LC_A_Ring
& -- LC_A_Ring 229
623 L
.LC_AE_Diphthong
& -- LC_AE_Diphthong 230
624 L
.LC_C_Cedilla
& -- LC_C_Cedilla 231
625 L
.LC_E_Grave
& -- LC_E_Grave 232
626 L
.LC_E_Acute
& -- LC_E_Acute 233
627 L
.LC_E_Circumflex
& -- LC_E_Circumflex 234
628 L
.LC_E_Diaeresis
& -- LC_E_Diaeresis 235
629 L
.LC_I_Grave
& -- LC_I_Grave 236
630 L
.LC_I_Acute
& -- LC_I_Acute 237
631 L
.LC_I_Circumflex
& -- LC_I_Circumflex 238
632 L
.LC_I_Diaeresis
& -- LC_I_Diaeresis 239
633 L
.LC_Icelandic_Eth
& -- LC_Icelandic_Eth 240
634 L
.LC_N_Tilde
& -- LC_N_Tilde 241
635 L
.LC_O_Grave
& -- LC_O_Grave 242
636 L
.LC_O_Acute
& -- LC_O_Acute 243
637 L
.LC_O_Circumflex
& -- LC_O_Circumflex 244
638 L
.LC_O_Tilde
& -- LC_O_Tilde 245
639 L
.LC_O_Diaeresis
& -- LC_O_Diaeresis 246
640 L
.Division_Sign
& -- Division_Sign 247
641 L
.LC_O_Oblique_Stroke
& -- LC_O_Oblique_Stroke 248
642 L
.LC_U_Grave
& -- LC_U_Grave 249
643 L
.LC_U_Acute
& -- LC_U_Acute 250
644 L
.LC_U_Circumflex
& -- LC_U_Circumflex 251
645 L
.LC_U_Diaeresis
& -- LC_U_Diaeresis 252
646 L
.LC_Y_Acute
& -- LC_Y_Acute 253
647 L
.LC_Icelandic_Thorn
& -- LC_Icelandic_Thorn 254
648 L
.LC_Y_Diaeresis
); -- LC_Y_Diaeresis 255
650 end Ada
.Strings
.Maps
;