1 ------------------------------------------------------------------------------
3 -- GNAT COMPILER COMPONENTS --
5 -- S Y S T E M . R E G E X P --
9 -- Copyright (C) 1999-2017, AdaCore --
11 -- GNAT is free software; you can redistribute it and/or modify it under --
12 -- terms of the GNU General Public License as published by the Free Soft- --
13 -- ware Foundation; either version 3, or (at your option) any later ver- --
14 -- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
15 -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
16 -- or FITNESS FOR A PARTICULAR PURPOSE. --
18 -- As a special exception under Section 7 of GPL version 3, you are granted --
19 -- additional permissions described in the GCC Runtime Library Exception, --
20 -- version 3.1, as published by the Free Software Foundation. --
22 -- You should have received a copy of the GNU General Public License and --
23 -- a copy of the GCC Runtime Library Exception along with this program; --
24 -- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see --
25 -- <http://www.gnu.org/licenses/>. --
27 -- GNAT was originally developed by the GNAT team at New York University. --
28 -- Extensive contributions were provided by Ada Core Technologies Inc. --
30 ------------------------------------------------------------------------------
32 with Ada
.Unchecked_Deallocation
;
33 with System
.Case_Util
;
35 package body System
.Regexp
is
37 Initial_Max_States_In_Primary_Table
: constant := 100;
38 -- Initial size for the number of states in the indefinite state
39 -- machine. The number of states will be increased as needed.
41 -- This is also used as the maximal number of meta states (groups of
42 -- states) in the secondary table.
44 Open_Paren
: constant Character := '(';
45 Close_Paren
: constant Character := ')';
46 Open_Bracket
: constant Character := '[';
47 Close_Bracket
: constant Character := ']';
49 type State_Index
is new Natural;
50 type Column_Index
is new Natural;
52 type Regexp_Array
is array
53 (State_Index
range <>, Column_Index
range <>) of State_Index
;
54 -- First index is for the state number. Second index is for the character
55 -- type. Contents is the new State.
57 type Regexp_Array_Access
is access Regexp_Array
;
58 -- Use this type through the functions Set below, so that it can grow
59 -- dynamically depending on the needs.
61 type Mapping
is array (Character'Range) of Column_Index
;
62 -- Mapping between characters and column in the Regexp_Array
64 type Boolean_Array
is array (State_Index
range <>) of Boolean;
67 (Alphabet_Size
: Column_Index
;
68 Num_States
: State_Index
) is
71 Case_Sensitive
: Boolean;
72 States
: Regexp_Array
(1 .. Num_States
, 0 .. Alphabet_Size
);
73 Is_Final
: Boolean_Array
(1 .. Num_States
);
75 -- Deterministic finite-state machine
77 -----------------------
78 -- Local Subprograms --
79 -----------------------
82 (Table
: in out Regexp_Array_Access
;
84 Column
: Column_Index
;
86 -- Sets a value in the table. If the table is too small, reallocate it
87 -- dynamically so that (State, Column) is a valid index in it.
90 (Table
: Regexp_Array_Access
;
92 Column
: Column_Index
) return State_Index
;
93 -- Returns the value in the table at (State, Column). If this index does
94 -- not exist in the table, returns zero.
96 procedure Free
is new Ada
.Unchecked_Deallocation
97 (Regexp_Array
, Regexp_Array_Access
);
103 procedure Adjust
(R
: in out Regexp
) is
107 Tmp
:= new Regexp_Value
(Alphabet_Size
=> R
.R
.Alphabet_Size
,
108 Num_States
=> R
.R
.Num_States
);
120 Glob
: Boolean := False;
121 Case_Sensitive
: Boolean := True) return Regexp
123 S
: String := Pattern
;
124 -- The pattern which is really compiled (when the pattern is case
125 -- insensitive, we convert this string to lower-cases
127 Map
: Mapping
:= (others => 0);
128 -- Mapping between characters and columns in the tables
130 Alphabet_Size
: Column_Index
:= 0;
131 -- Number of significant characters in the regular expression.
132 -- This total does not include special operators, such as *, (, ...
134 procedure Check_Well_Formed_Pattern
;
135 -- Check that the pattern to compile is well-formed, so that subsequent
136 -- code can rely on this without performing each time the checks to
137 -- avoid accessing the pattern outside its bounds. However, not all
138 -- well-formedness rules are checked. In particular, rules about special
139 -- characters not being treated as regular characters are not checked.
141 procedure Create_Mapping
;
142 -- Creates a mapping between characters in the regexp and columns
143 -- in the tables representing the regexp. Test that the regexp is
144 -- well-formed Modifies Alphabet_Size and Map
146 procedure Create_Primary_Table
147 (Table
: out Regexp_Array_Access
;
148 Num_States
: out State_Index
;
149 Start_State
: out State_Index
;
150 End_State
: out State_Index
);
151 -- Creates the first version of the regexp (this is a non deterministic
152 -- finite state machine, which is unadapted for a fast pattern
153 -- matching algorithm). We use a recursive algorithm to process the
154 -- parenthesis sub-expressions.
156 -- Table : at the end of the procedure : Column 0 is for any character
157 -- ('.') and the last columns are for no character (closure). Num_States
158 -- is set to the number of states in the table Start_State is the number
159 -- of the starting state in the regexp End_State is the number of the
160 -- final state when the regexp matches.
162 procedure Create_Primary_Table_Glob
163 (Table
: out Regexp_Array_Access
;
164 Num_States
: out State_Index
;
165 Start_State
: out State_Index
;
166 End_State
: out State_Index
);
167 -- Same function as above, but it deals with the second possible
168 -- grammar for 'globbing pattern', which is a kind of subset of the
169 -- whole regular expression grammar.
171 function Create_Secondary_Table
172 (First_Table
: Regexp_Array_Access
;
173 Start_State
: State_Index
;
174 End_State
: State_Index
) return Regexp
;
175 -- Creates the definitive table representing the regular expression
176 -- This is actually a transformation of the primary table First_Table,
177 -- where every state is grouped with the states in its 'no-character'
178 -- columns. The transitions between the new states are then recalculated
179 -- and if necessary some new states are created.
181 -- Note that the resulting finite-state machine is not optimized in
182 -- terms of the number of states : it would be more time-consuming to
183 -- add a third pass to reduce the number of states in the machine, with
184 -- no speed improvement...
186 procedure Raise_Exception
(M
: String; Index
: Integer);
187 pragma No_Return
(Raise_Exception
);
188 -- Raise an exception, indicating an error at character Index in S
190 -------------------------------
191 -- Check_Well_Formed_Pattern --
192 -------------------------------
194 procedure Check_Well_Formed_Pattern
is
197 Past_Elmt
: Boolean := False;
198 -- Set to True everywhere an elmt has been parsed, if Glob=False,
199 -- meaning there can be now an occurrence of '*', '+' and '?'.
201 Past_Term
: Boolean := False;
202 -- Set to True everywhere a term has been parsed, if Glob=False,
203 -- meaning there can be now an occurrence of '|'.
205 Parenthesis_Level
: Integer := 0;
206 Curly_Level
: Integer := 0;
208 Last_Open
: Integer := S
'First - 1;
209 -- The last occurrence of an opening parenthesis, if Glob=False,
210 -- or the last occurrence of an opening curly brace, if Glob=True.
212 procedure Raise_Exception_If_No_More_Chars
(K
: Integer := 0);
213 -- If no more characters are raised, call Raise_Exception
215 --------------------------------------
216 -- Raise_Exception_If_No_More_Chars --
217 --------------------------------------
219 procedure Raise_Exception_If_No_More_Chars
(K
: Integer := 0) is
221 if J
+ K
> S
'Last then
222 Raise_Exception
("Ill-formed pattern while parsing", J
);
224 end Raise_Exception_If_No_More_Chars
;
226 -- Start of processing for Check_Well_Formed_Pattern
230 while J
<= S
'Last loop
234 Raise_Exception_If_No_More_Chars
;
239 Raise_Exception_If_No_More_Chars
;
243 -- The first character never has a special meaning
245 if S
(J
) = ']' or else S
(J
) = '-' then
247 Raise_Exception_If_No_More_Chars
;
250 -- The set of characters cannot be empty
254 ("Set of characters cannot be empty in regular "
259 Possible_Range_Start
: Boolean := True;
260 -- Set True everywhere a range character '-' can occur
264 exit when S
(J
) = Close_Bracket
;
266 -- The current character should be followed by a
269 Raise_Exception_If_No_More_Chars
(1);
272 and then S
(J
+ 1) /= Close_Bracket
274 if not Possible_Range_Start
then
276 ("No mix of ranges is allowed in "
277 & "regular expression", J
);
281 Raise_Exception_If_No_More_Chars
;
283 -- Range cannot be followed by '-' character,
284 -- except as last character in the set.
286 Possible_Range_Start
:= False;
289 Possible_Range_Start
:= True;
294 Raise_Exception_If_No_More_Chars
;
301 -- A closing bracket can end an elmt or term
306 when Close_Bracket
=>
308 -- A close bracket must follow a open_bracket, and cannot be
309 -- found alone on the line.
312 ("Incorrect character ']' in regular expression", J
);
318 -- Any character can be an elmt or a term
324 -- \ not allowed at the end of the regexp
327 ("Incorrect character '\' in regular expression", J
);
332 Parenthesis_Level
:= Parenthesis_Level
+ 1;
335 -- An open parenthesis does not end an elmt or term
343 Parenthesis_Level
:= Parenthesis_Level
- 1;
345 if Parenthesis_Level
< 0 then
347 ("')' is not associated with '(' in regular "
351 if J
= Last_Open
+ 1 then
353 ("Empty parentheses not allowed in regular "
357 if not Past_Term
then
359 ("Closing parenthesis not allowed here in regular "
363 -- A closing parenthesis can end an elmt or term
371 Curly_Level
:= Curly_Level
+ 1;
375 -- Any character can be an elmt or a term
381 -- No need to check for ',' as the code always accepts them
385 Curly_Level
:= Curly_Level
- 1;
387 if Curly_Level
< 0 then
389 ("'}' is not associated with '{' in regular "
393 if J
= Last_Open
+ 1 then
395 ("Empty curly braces not allowed in regular "
400 -- Any character can be an elmt or a term
406 when '*' |
'?' |
'+' =>
409 -- These operators must apply to an elmt sub-expression,
410 -- and cannot be found if one has not just been parsed.
412 if not Past_Elmt
then
414 ("'*', '+' and '?' operators must be "
415 & "applied to an element in regular expression", J
);
425 -- This operator must apply to a term sub-expression,
426 -- and cannot be found if one has not just been parsed.
428 if not Past_Term
then
430 ("'|' operator must be "
431 & "applied to a term in regular expression", J
);
441 -- Any character can be an elmt or a term
451 -- A closing parenthesis must follow an open parenthesis
453 if Parenthesis_Level
/= 0 then
455 ("'(' must always be associated with a ')'", J
);
458 -- A closing curly brace must follow an open curly brace
460 if Curly_Level
/= 0 then
462 ("'{' must always be associated with a '}'", J
);
464 end Check_Well_Formed_Pattern
;
470 procedure Create_Mapping
is
472 procedure Add_In_Map
(C
: Character);
473 -- Add a character in the mapping, if it is not already defined
479 procedure Add_In_Map
(C
: Character) is
482 Alphabet_Size
:= Alphabet_Size
+ 1;
483 Map
(C
) := Alphabet_Size
;
487 J
: Integer := S
'First;
488 Parenthesis_Level
: Integer := 0;
489 Curly_Level
: Integer := 0;
490 Last_Open
: Integer := S
'First - 1;
492 -- Start of processing for Create_Mapping
495 while J
<= S
'Last loop
504 if S
(J
) = ']' or else S
(J
) = '-' then
508 -- The first character never has a special meaning
513 ("Ran out of characters while parsing ", J
);
516 exit when S
(J
) = Close_Bracket
;
519 and then S
(J
+ 1) /= Close_Bracket
522 Start
: constant Integer := J
- 1;
531 for Char
in S
(Start
) .. S
(J
) loop
546 -- A close bracket must follow a open_bracket and cannot be
547 -- found alone on the line
549 when Close_Bracket
=>
551 ("Incorrect character ']' in regular expression", J
);
559 -- Back slash \ not allowed at the end of the regexp
562 ("Incorrect character '\' in regular expression", J
);
567 Parenthesis_Level
:= Parenthesis_Level
+ 1;
570 Add_In_Map
(Open_Paren
);
575 Parenthesis_Level
:= Parenthesis_Level
- 1;
577 if Parenthesis_Level
< 0 then
579 ("')' is not associated with '(' in regular "
583 if J
= Last_Open
+ 1 then
585 ("Empty parenthesis not allowed in regular "
590 Add_In_Map
(Close_Paren
);
602 Curly_Level
:= Curly_Level
+ 1;
609 Curly_Level
:= Curly_Level
- 1;
616 ("'*', '+', '?' and '|' operators cannot be in "
617 & "first position in regular expression", J
);
625 -- These operators must apply to a sub-expression,
626 -- and cannot be found at the beginning of the line
629 ("'*', '+', '?' and '|' operators cannot be in "
630 & "first position in regular expression", J
);
644 -- A closing parenthesis must follow an open parenthesis
646 if Parenthesis_Level
/= 0 then
648 ("'(' must always be associated with a ')'", J
);
651 if Curly_Level
/= 0 then
653 ("'{' must always be associated with a '}'", J
);
657 --------------------------
658 -- Create_Primary_Table --
659 --------------------------
661 procedure Create_Primary_Table
662 (Table
: out Regexp_Array_Access
;
663 Num_States
: out State_Index
;
664 Start_State
: out State_Index
;
665 End_State
: out State_Index
)
667 Empty_Char
: constant Column_Index
:= Alphabet_Size
+ 1;
669 Current_State
: State_Index
:= 0;
670 -- Index of the last created state
672 procedure Add_Empty_Char
673 (State
: State_Index
;
674 To_State
: State_Index
);
675 -- Add a empty-character transition from State to To_State
677 procedure Create_Repetition
678 (Repetition
: Character;
679 Start_Prev
: State_Index
;
680 End_Prev
: State_Index
;
681 New_Start
: out State_Index
;
682 New_End
: in out State_Index
);
683 -- Create the table in case we have a '*', '+' or '?'.
684 -- Start_Prev .. End_Prev should indicate respectively the start and
685 -- end index of the previous expression, to which '*', '+' or '?' is
688 procedure Create_Simple
689 (Start_Index
: Integer;
691 Start_State
: out State_Index
;
692 End_State
: out State_Index
);
693 -- Fill the table for the regexp Simple. This is the recursive
694 -- procedure called to handle () expressions If End_State = 0, then
695 -- the call to Create_Simple creates an independent regexp, not a
696 -- concatenation Start_Index .. End_Index is the starting index in
699 -- Warning: it may look like we are creating too many empty-string
700 -- transitions, but they are needed to get the correct regexp.
701 -- The table is filled as follow ( s means start-state, e means
704 -- regexp state_num | a b * empty_string
705 -- ------- ------------------------------
709 -- ab 1 (s) | 2 - - -
726 -- (a) 1 (s) | 2 - - -
742 function Next_Sub_Expression
743 (Start_Index
: Integer;
744 End_Index
: Integer) return Integer;
745 -- Returns the index of the last character of the next sub-expression
746 -- in Simple. Index cannot be greater than End_Index.
752 procedure Add_Empty_Char
753 (State
: State_Index
;
754 To_State
: State_Index
)
756 J
: Column_Index
:= Empty_Char
;
759 while Get
(Table
, State
, J
) /= 0 loop
763 Set
(Table
, State
, J
, To_State
);
766 -----------------------
767 -- Create_Repetition --
768 -----------------------
770 procedure Create_Repetition
771 (Repetition
: Character;
772 Start_Prev
: State_Index
;
773 End_Prev
: State_Index
;
774 New_Start
: out State_Index
;
775 New_End
: in out State_Index
)
778 New_Start
:= Current_State
+ 1;
781 Add_Empty_Char
(New_End
, New_Start
);
784 Current_State
:= Current_State
+ 2;
785 New_End
:= Current_State
;
787 Add_Empty_Char
(End_Prev
, New_End
);
788 Add_Empty_Char
(New_Start
, Start_Prev
);
790 if Repetition
/= '+' then
791 Add_Empty_Char
(New_Start
, New_End
);
794 if Repetition
/= '?' then
795 Add_Empty_Char
(New_End
, New_Start
);
797 end Create_Repetition
;
803 procedure Create_Simple
804 (Start_Index
: Integer;
806 Start_State
: out State_Index
;
807 End_State
: out State_Index
)
809 J
: Integer := Start_Index
;
810 Last_Start
: State_Index
:= 0;
815 while J
<= End_Index
loop
819 J_Start
: constant Integer := J
+ 1;
820 Next_Start
: State_Index
;
821 Next_End
: State_Index
;
824 J
:= Next_Sub_Expression
(J
, End_Index
);
825 Create_Simple
(J_Start
, J
- 1, Next_Start
, Next_End
);
828 and then (S
(J
+ 1) = '*' or else
829 S
(J
+ 1) = '+' or else
841 Last_Start
:= Next_Start
;
843 if End_State
/= 0 then
844 Add_Empty_Char
(End_State
, Last_Start
);
847 End_State
:= Next_End
;
853 Start_Prev
: constant State_Index
:= Start_State
;
854 End_Prev
: constant State_Index
:= End_State
;
855 Start_J
: constant Integer := J
+ 1;
856 Start_Next
: State_Index
:= 0;
857 End_Next
: State_Index
:= 0;
860 J
:= Next_Sub_Expression
(J
, End_Index
);
862 -- Create a new state for the start of the alternative
864 Current_State
:= Current_State
+ 1;
865 Last_Start
:= Current_State
;
866 Start_State
:= Last_Start
;
868 -- Create the tree for the second part of alternative
870 Create_Simple
(Start_J
, J
, Start_Next
, End_Next
);
872 -- Create the end state
874 Add_Empty_Char
(Last_Start
, Start_Next
);
875 Add_Empty_Char
(Last_Start
, Start_Prev
);
876 Current_State
:= Current_State
+ 1;
877 End_State
:= Current_State
;
878 Add_Empty_Char
(End_Prev
, End_State
);
879 Add_Empty_Char
(End_Next
, End_State
);
883 Current_State
:= Current_State
+ 1;
886 Next_State
: State_Index
:= Current_State
+ 1;
896 for Column
in 0 .. Alphabet_Size
loop
897 Set
(Table
, Current_State
, Column
,
898 Value
=> Current_State
+ 1);
902 -- Automatically add the first character
904 if S
(J
) = '-' or else S
(J
) = ']' then
905 Set
(Table
, Current_State
, Map
(S
(J
)),
906 Value
=> Next_State
);
910 -- Loop till closing bracket found
913 exit when S
(J
) = Close_Bracket
;
916 and then S
(J
+ 1) /= ']'
919 Start
: constant Integer := J
- 1;
928 for Char
in S
(Start
) .. S
(J
) loop
929 Set
(Table
, Current_State
, Map
(Char
),
930 Value
=> Next_State
);
939 Set
(Table
, Current_State
, Map
(S
(J
)),
940 Value
=> Next_State
);
946 Current_State
:= Current_State
+ 1;
948 -- If the next symbol is a special symbol
951 and then (S
(J
+ 1) = '*' or else
952 S
(J
+ 1) = '+' or else
964 Last_Start
:= Current_State
- 1;
966 if End_State
/= 0 then
967 Add_Empty_Char
(End_State
, Last_Start
);
970 End_State
:= Current_State
;
978 ("Incorrect character in regular expression :", J
);
981 Current_State
:= Current_State
+ 1;
983 -- Create the state for the symbol S (J)
986 for K
in 0 .. Alphabet_Size
loop
987 Set
(Table
, Current_State
, K
,
988 Value
=> Current_State
+ 1);
996 Set
(Table
, Current_State
, Map
(S
(J
)),
997 Value
=> Current_State
+ 1);
1000 Current_State
:= Current_State
+ 1;
1002 -- If the next symbol is a special symbol
1005 and then (S
(J
+ 1) = '*' or else
1006 S
(J
+ 1) = '+' or else
1018 Last_Start
:= Current_State
- 1;
1020 if End_State
/= 0 then
1021 Add_Empty_Char
(End_State
, Last_Start
);
1024 End_State
:= Current_State
;
1028 if Start_State
= 0 then
1029 Start_State
:= Last_Start
;
1036 -------------------------
1037 -- Next_Sub_Expression --
1038 -------------------------
1040 function Next_Sub_Expression
1041 (Start_Index
: Integer;
1042 End_Index
: Integer) return Integer
1044 J
: Integer := Start_Index
;
1045 Start_On_Alter
: Boolean := False;
1049 Start_On_Alter
:= True;
1053 exit when J
= End_Index
;
1060 when Open_Bracket
=>
1063 exit when S
(J
) = Close_Bracket
;
1071 J
:= Next_Sub_Expression
(J
, End_Index
);
1077 if Start_On_Alter
then
1087 end Next_Sub_Expression
;
1089 -- Start of processing for Create_Primary_Table
1092 Table
.all := (others => (others => 0));
1093 Create_Simple
(S
'First, S
'Last, Start_State
, End_State
);
1094 Num_States
:= Current_State
;
1095 end Create_Primary_Table
;
1097 -------------------------------
1098 -- Create_Primary_Table_Glob --
1099 -------------------------------
1101 procedure Create_Primary_Table_Glob
1102 (Table
: out Regexp_Array_Access
;
1103 Num_States
: out State_Index
;
1104 Start_State
: out State_Index
;
1105 End_State
: out State_Index
)
1107 Empty_Char
: constant Column_Index
:= Alphabet_Size
+ 1;
1109 Current_State
: State_Index
:= 0;
1110 -- Index of the last created state
1112 procedure Add_Empty_Char
1113 (State
: State_Index
;
1114 To_State
: State_Index
);
1115 -- Add a empty-character transition from State to To_State
1117 procedure Create_Simple
1118 (Start_Index
: Integer;
1119 End_Index
: Integer;
1120 Start_State
: out State_Index
;
1121 End_State
: out State_Index
);
1122 -- Fill the table for the S (Start_Index .. End_Index).
1123 -- This is the recursive procedure called to handle () expressions
1125 --------------------
1126 -- Add_Empty_Char --
1127 --------------------
1129 procedure Add_Empty_Char
1130 (State
: State_Index
;
1131 To_State
: State_Index
)
1137 while Get
(Table
, State
, J
) /= 0 loop
1141 Set
(Table
, State
, J
, Value
=> To_State
);
1148 procedure Create_Simple
1149 (Start_Index
: Integer;
1150 End_Index
: Integer;
1151 Start_State
: out State_Index
;
1152 End_State
: out State_Index
)
1155 Last_Start
: State_Index
:= 0;
1162 while J
<= End_Index
loop
1164 when Open_Bracket
=>
1165 Current_State
:= Current_State
+ 1;
1168 Next_State
: State_Index
:= Current_State
+ 1;
1177 for Column
in 0 .. Alphabet_Size
loop
1178 Set
(Table
, Current_State
, Column
,
1179 Value
=> Current_State
+ 1);
1183 -- Automatically add the first character
1185 if S
(J
) = '-' or else S
(J
) = ']' then
1186 Set
(Table
, Current_State
, Map
(S
(J
)),
1187 Value
=> Current_State
);
1191 -- Loop till closing bracket found
1194 exit when S
(J
) = Close_Bracket
;
1197 and then S
(J
+ 1) /= ']'
1200 Start
: constant Integer := J
- 1;
1209 for Char
in S
(Start
) .. S
(J
) loop
1210 Set
(Table
, Current_State
, Map
(Char
),
1211 Value
=> Next_State
);
1220 Set
(Table
, Current_State
, Map
(S
(J
)),
1221 Value
=> Next_State
);
1227 Last_Start
:= Current_State
;
1228 Current_State
:= Current_State
+ 1;
1230 if End_State
/= 0 then
1231 Add_Empty_Char
(End_State
, Last_Start
);
1234 End_State
:= Current_State
;
1239 Start_Regexp_Sub
: State_Index
;
1240 End_Regexp_Sub
: State_Index
;
1241 Create_Start
: State_Index
:= 0;
1243 Create_End
: State_Index
:= 0;
1244 -- Initialized to avoid junk warning
1247 while S
(J
) /= '}' loop
1249 -- First step : find sub pattern
1252 while S
(End_Sub
) /= ','
1253 and then S
(End_Sub
) /= '}'
1255 End_Sub
:= End_Sub
+ 1;
1258 -- Second step : create a sub pattern
1268 -- Third step : create an alternative
1270 if Create_Start
= 0 then
1271 Current_State
:= Current_State
+ 1;
1272 Create_Start
:= Current_State
;
1273 Add_Empty_Char
(Create_Start
, Start_Regexp_Sub
);
1274 Current_State
:= Current_State
+ 1;
1275 Create_End
:= Current_State
;
1276 Add_Empty_Char
(End_Regexp_Sub
, Create_End
);
1279 Current_State
:= Current_State
+ 1;
1280 Add_Empty_Char
(Current_State
, Create_Start
);
1281 Create_Start
:= Current_State
;
1282 Add_Empty_Char
(Create_Start
, Start_Regexp_Sub
);
1283 Add_Empty_Char
(End_Regexp_Sub
, Create_End
);
1287 if End_State
/= 0 then
1288 Add_Empty_Char
(End_State
, Create_Start
);
1291 End_State
:= Create_End
;
1292 Last_Start
:= Create_Start
;
1296 Current_State
:= Current_State
+ 1;
1298 if End_State
/= 0 then
1299 Add_Empty_Char
(End_State
, Current_State
);
1302 Add_Empty_Char
(Current_State
, Current_State
+ 1);
1303 Add_Empty_Char
(Current_State
, Current_State
+ 3);
1304 Last_Start
:= Current_State
;
1306 Current_State
:= Current_State
+ 1;
1308 for K
in 0 .. Alphabet_Size
loop
1309 Set
(Table
, Current_State
, K
,
1310 Value
=> Current_State
+ 1);
1313 Current_State
:= Current_State
+ 1;
1314 Add_Empty_Char
(Current_State
, Current_State
+ 1);
1316 Current_State
:= Current_State
+ 1;
1317 Add_Empty_Char
(Current_State
, Last_Start
);
1318 End_State
:= Current_State
;
1321 Current_State
:= Current_State
+ 1;
1324 for K
in 0 .. Alphabet_Size
loop
1325 Set
(Table
, Current_State
, K
,
1326 Value
=> Current_State
+ 1);
1334 -- Create the state for the symbol S (J)
1336 Set
(Table
, Current_State
, Map
(S
(J
)),
1337 Value
=> Current_State
+ 1);
1340 Last_Start
:= Current_State
;
1341 Current_State
:= Current_State
+ 1;
1343 if End_State
/= 0 then
1344 Add_Empty_Char
(End_State
, Last_Start
);
1347 End_State
:= Current_State
;
1350 if Start_State
= 0 then
1351 Start_State
:= Last_Start
;
1358 -- Start of processing for Create_Primary_Table_Glob
1361 Table
.all := (others => (others => 0));
1362 Create_Simple
(S
'First, S
'Last, Start_State
, End_State
);
1363 Num_States
:= Current_State
;
1364 end Create_Primary_Table_Glob
;
1366 ----------------------------
1367 -- Create_Secondary_Table --
1368 ----------------------------
1370 function Create_Secondary_Table
1371 (First_Table
: Regexp_Array_Access
;
1372 Start_State
: State_Index
;
1373 End_State
: State_Index
) return Regexp
1375 Last_Index
: constant State_Index
:= First_Table
'Last (1);
1377 type Meta_State
is array (0 .. Last_Index
) of Boolean;
1378 pragma Pack
(Meta_State
);
1379 -- Whether a state from first_table belongs to a metastate.
1381 No_States
: constant Meta_State
:= (others => False);
1383 type Meta_States_Array
is array (State_Index
range <>) of Meta_State
;
1384 type Meta_States_List
is access all Meta_States_Array
;
1385 procedure Unchecked_Free
is new Ada
.Unchecked_Deallocation
1386 (Meta_States_Array
, Meta_States_List
);
1387 Meta_States
: Meta_States_List
;
1388 -- Components of meta-states. A given state might belong to
1389 -- several meta-states.
1390 -- This array grows dynamically.
1392 type Char_To_State
is array (0 .. Alphabet_Size
) of State_Index
;
1393 type Meta_States_Transition_Arr
is
1394 array (State_Index
range <>) of Char_To_State
;
1395 type Meta_States_Transition
is access all Meta_States_Transition_Arr
;
1396 procedure Unchecked_Free
is new Ada
.Unchecked_Deallocation
1397 (Meta_States_Transition_Arr
, Meta_States_Transition
);
1398 Table
: Meta_States_Transition
;
1399 -- Documents the transitions between each meta-state. The
1400 -- first index is the meta-state, the second column is the
1401 -- character seen in the input, the value is the new meta-state.
1403 Temp_State_Not_Null
: Boolean;
1405 Current_State
: State_Index
:= 1;
1406 -- The current meta-state we are creating
1408 Nb_State
: State_Index
:= 1;
1409 -- The total number of meta-states created so far.
1412 (Meta_State
: State_Index
;
1413 State
: State_Index
);
1414 -- Compute the closure of the state (that is every other state which
1415 -- has a empty-character transition) and add it to the state
1417 procedure Ensure_Meta_State
(Meta
: State_Index
);
1418 -- grows the Meta_States array as needed to make sure that there
1419 -- is enough space to store the new meta state.
1421 -----------------------
1422 -- Ensure_Meta_State --
1423 -----------------------
1425 procedure Ensure_Meta_State
(Meta
: State_Index
) is
1426 Tmp
: Meta_States_List
:= Meta_States
;
1427 Tmp2
: Meta_States_Transition
:= Table
;
1430 if Meta_States
= null then
1431 Meta_States
:= new Meta_States_Array
1432 (1 .. State_Index
'Max (Last_Index
, Meta
) + 1);
1433 Meta_States
(Meta_States
'Range) := (others => No_States
);
1435 Table
:= new Meta_States_Transition_Arr
1436 (1 .. State_Index
'Max (Last_Index
, Meta
) + 1);
1437 Table
.all := (others => (others => 0));
1439 elsif Meta
> Meta_States
'Last then
1440 Meta_States
:= new Meta_States_Array
1441 (1 .. State_Index
'Max (2 * Tmp
'Last, Meta
));
1442 Meta_States
(Tmp
'Range) := Tmp
.all;
1443 Meta_States
(Tmp
'Last + 1 .. Meta_States
'Last) :=
1444 (others => No_States
);
1445 Unchecked_Free
(Tmp
);
1447 Table
:= new Meta_States_Transition_Arr
1448 (1 .. State_Index
'Max (2 * Tmp2
'Last, Meta
) + 1);
1449 Table
(Tmp2
'Range) := Tmp2
.all;
1450 Table
(Tmp2
'Last + 1 .. Table
'Last) :=
1451 (others => (others => 0));
1452 Unchecked_Free
(Tmp2
);
1454 end Ensure_Meta_State
;
1461 (Meta_State
: State_Index
;
1462 State
: State_Index
)
1465 if not Meta_States
(Meta_State
)(State
) then
1466 Meta_States
(Meta_State
)(State
) := True;
1468 -- For each transition on empty-character
1470 for Column
in Alphabet_Size
+ 1 .. First_Table
'Last (2) loop
1471 exit when First_Table
(State
, Column
) = 0;
1472 Closure
(Meta_State
, First_Table
(State
, Column
));
1477 -- Start of processing for Create_Secondary_Table
1480 -- Create a new state
1482 Ensure_Meta_State
(Current_State
);
1483 Closure
(Current_State
, Start_State
);
1485 while Current_State
<= Nb_State
loop
1487 -- We will be trying, below, to create the next meta-state
1489 Ensure_Meta_State
(Nb_State
+ 1);
1491 -- For every character in the regexp, calculate the possible
1492 -- transitions from Current_State.
1494 for Column
in 0 .. Alphabet_Size
loop
1495 Temp_State_Not_Null
:= False;
1497 for K
in Meta_States
(Current_State
)'Range loop
1498 if Meta_States
(Current_State
)(K
)
1499 and then First_Table
(K
, Column
) /= 0
1501 Closure
(Nb_State
+ 1, First_Table
(K
, Column
));
1502 Temp_State_Not_Null
:= True;
1506 -- If at least one transition existed
1508 if Temp_State_Not_Null
then
1510 -- Check if this new state corresponds to an old one
1512 for K
in 1 .. Nb_State
loop
1513 if Meta_States
(K
) = Meta_States
(Nb_State
+ 1) then
1514 Table
(Current_State
)(Column
) := K
;
1516 -- Reset data, for the next time we try that state
1518 Meta_States
(Nb_State
+ 1) := No_States
;
1523 -- If not, create a new state
1525 if Table
(Current_State
)(Column
) = 0 then
1526 Nb_State
:= Nb_State
+ 1;
1527 Ensure_Meta_State
(Nb_State
+ 1);
1528 Table
(Current_State
)(Column
) := Nb_State
;
1533 Current_State
:= Current_State
+ 1;
1536 -- Returns the regexp
1542 R
:= new Regexp_Value
(Alphabet_Size
=> Alphabet_Size
,
1543 Num_States
=> Nb_State
);
1545 R
.Case_Sensitive
:= Case_Sensitive
;
1547 for S
in 1 .. Nb_State
loop
1548 R
.Is_Final
(S
) := Meta_States
(S
)(End_State
);
1551 for State
in 1 .. Nb_State
loop
1552 for K
in 0 .. Alphabet_Size
loop
1553 R
.States
(State
, K
) := Table
(State
)(K
);
1557 Unchecked_Free
(Meta_States
);
1558 Unchecked_Free
(Table
);
1560 return (Ada
.Finalization
.Controlled
with R
=> R
);
1562 end Create_Secondary_Table
;
1564 ---------------------
1565 -- Raise_Exception --
1566 ---------------------
1568 procedure Raise_Exception
(M
: String; Index
: Integer) is
1570 raise Error_In_Regexp
with M
& " at offset" & Index
'Img;
1571 end Raise_Exception
;
1573 -- Start of processing for Compile
1576 -- Special case for the empty string: it always matches, and the
1577 -- following processing would fail on it.
1580 return (Ada
.Finalization
.Controlled
with
1581 R
=> new Regexp_Value
'
1582 (Alphabet_Size => 0,
1584 Map => (others => 0),
1585 States => (others => (others => 1)),
1586 Is_Final => (others => True),
1587 Case_Sensitive => True));
1590 if not Case_Sensitive then
1591 System.Case_Util.To_Lower (S);
1594 -- Check the pattern is well-formed before any treatment
1596 Check_Well_Formed_Pattern;
1600 -- Creates the primary table
1603 Table : Regexp_Array_Access;
1604 Num_States : State_Index;
1605 Start_State : State_Index;
1606 End_State : State_Index;
1610 Table := new Regexp_Array (1 .. Initial_Max_States_In_Primary_Table,
1611 0 .. Alphabet_Size + 10);
1613 Create_Primary_Table (Table, Num_States, Start_State, End_State);
1615 Create_Primary_Table_Glob
1616 (Table, Num_States, Start_State, End_State);
1619 -- Creates the secondary table
1621 R := Create_Secondary_Table (Table, Start_State, End_State);
1631 procedure Finalize (R : in out Regexp) is
1632 procedure Free is new
1633 Ada.Unchecked_Deallocation (Regexp_Value, Regexp_Access);
1643 (Table : Regexp_Array_Access;
1644 State : State_Index;
1645 Column : Column_Index) return State_Index
1648 if State <= Table'Last (1)
1649 and then Column <= Table'Last (2)
1651 return Table (State, Column);
1661 function Match (S : String; R : Regexp) return Boolean is
1662 Current_State : State_Index := 1;
1666 raise Constraint_Error;
1669 for Char in S'Range loop
1671 if R.R.Case_Sensitive then
1672 Current_State := R.R.States (Current_State, R.R.Map (S (Char)));
1675 R.R.States (Current_State,
1676 R.R.Map (System.Case_Util.To_Lower (S (Char))));
1679 if Current_State = 0 then
1685 return R.R.Is_Final (Current_State);
1693 (Table : in out Regexp_Array_Access;
1694 State : State_Index;
1695 Column : Column_Index;
1696 Value : State_Index)
1698 New_Lines : State_Index;
1699 New_Columns : Column_Index;
1700 New_Table : Regexp_Array_Access;
1703 if State <= Table'Last (1)
1704 and then Column <= Table'Last (2)
1706 Table (State, Column) := Value;
1708 -- Doubles the size of the table until it is big enough that
1709 -- (State, Column) is a valid index.
1711 New_Lines := Table'Last (1) * (State / Table'Last (1) + 1);
1712 New_Columns := Table'Last (2) * (Column / Table'Last (2) + 1);
1713 New_Table := new Regexp_Array (Table'First (1) .. New_Lines,
1714 Table'First (2) .. New_Columns);
1715 New_Table.all := (others => (others => 0));
1717 for J in Table'Range (1) loop
1718 for K in Table'Range (2) loop
1719 New_Table (J, K) := Table (J, K);
1725 Table (State, Column) := Value;