1 ------------------------------------------------------------------------------
3 -- GNAT COMPILER COMPONENTS --
5 -- S Y S T E M . R E G E X P --
9 -- Copyright (C) 1999-2008, AdaCore --
11 -- GNAT is free software; you can redistribute it and/or modify it under --
12 -- terms of the GNU General Public License as published by the Free Soft- --
13 -- ware Foundation; either version 2, or (at your option) any later ver- --
14 -- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
15 -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
16 -- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License --
17 -- for more details. You should have received a copy of the GNU General --
18 -- Public License distributed with GNAT; see file COPYING. If not, write --
19 -- to the Free Software Foundation, 51 Franklin Street, Fifth Floor, --
20 -- Boston, MA 02110-1301, USA. --
22 -- As a special exception, if other files instantiate generics from this --
23 -- unit, or you link this unit with other files to produce an executable, --
24 -- this unit does not by itself cause the resulting executable to be --
25 -- covered by the GNU General Public License. This exception does not --
26 -- however invalidate any other reasons why the executable file might be --
27 -- covered by the GNU Public License. --
29 -- GNAT was originally developed by the GNAT team at New York University. --
30 -- Extensive contributions were provided by Ada Core Technologies Inc. --
32 ------------------------------------------------------------------------------
34 with Ada
.Unchecked_Deallocation
;
36 with System
.Case_Util
;
38 package body System
.Regexp
is
40 Open_Paren
: constant Character := '(';
41 Close_Paren
: constant Character := ')';
42 Open_Bracket
: constant Character := '[';
43 Close_Bracket
: constant Character := ']';
45 type State_Index
is new Natural;
46 type Column_Index
is new Natural;
48 type Regexp_Array
is array
49 (State_Index
range <>, Column_Index
range <>) of State_Index
;
50 -- First index is for the state number
51 -- Second index is for the character type
52 -- Contents is the new State
54 type Regexp_Array_Access
is access Regexp_Array
;
55 -- Use this type through the functions Set below, so that it
56 -- can grow dynamically depending on the needs.
58 type Mapping
is array (Character'Range) of Column_Index
;
59 -- Mapping between characters and column in the Regexp_Array
61 type Boolean_Array
is array (State_Index
range <>) of Boolean;
64 (Alphabet_Size
: Column_Index
;
65 Num_States
: State_Index
) is
68 States
: Regexp_Array
(1 .. Num_States
, 0 .. Alphabet_Size
);
69 Is_Final
: Boolean_Array
(1 .. Num_States
);
70 Case_Sensitive
: Boolean;
72 -- Deterministic finite-state machine
74 -----------------------
75 -- Local Subprograms --
76 -----------------------
79 (Table
: in out Regexp_Array_Access
;
81 Column
: Column_Index
;
83 -- Sets a value in the table. If the table is too small, reallocate it
84 -- dynamically so that (State, Column) is a valid index in it.
87 (Table
: Regexp_Array_Access
;
89 Column
: Column_Index
)
91 -- Returns the value in the table at (State, Column).
92 -- If this index does not exist in the table, returns 0
94 procedure Free
is new Ada
.Unchecked_Deallocation
95 (Regexp_Array
, Regexp_Array_Access
);
101 procedure Adjust
(R
: in out Regexp
) is
105 Tmp
:= new Regexp_Value
(Alphabet_Size
=> R
.R
.Alphabet_Size
,
106 Num_States
=> R
.R
.Num_States
);
117 Glob
: Boolean := False;
118 Case_Sensitive
: Boolean := True)
121 S
: String := Pattern
;
122 -- The pattern which is really compiled (when the pattern is case
123 -- insensitive, we convert this string to lower-cases
125 Map
: Mapping
:= (others => 0);
126 -- Mapping between characters and columns in the tables
128 Alphabet_Size
: Column_Index
:= 0;
129 -- Number of significant characters in the regular expression.
130 -- This total does not include special operators, such as *, (, ...
132 procedure Create_Mapping
;
133 -- Creates a mapping between characters in the regexp and columns
134 -- in the tables representing the regexp. Test that the regexp is
135 -- well-formed Modifies Alphabet_Size and Map
137 procedure Create_Primary_Table
138 (Table
: out Regexp_Array_Access
;
139 Num_States
: out State_Index
;
140 Start_State
: out State_Index
;
141 End_State
: out State_Index
);
142 -- Creates the first version of the regexp (this is a non deterministic
143 -- finite state machine, which is unadapted for a fast pattern
144 -- matching algorithm). We use a recursive algorithm to process the
145 -- parenthesis sub-expressions.
147 -- Table : at the end of the procedure : Column 0 is for any character
148 -- ('.') and the last columns are for no character (closure)
149 -- Num_States is set to the number of states in the table
150 -- Start_State is the number of the starting state in the regexp
151 -- End_State is the number of the final state when the regexp matches
153 procedure Create_Primary_Table_Glob
154 (Table
: out Regexp_Array_Access
;
155 Num_States
: out State_Index
;
156 Start_State
: out State_Index
;
157 End_State
: out State_Index
);
158 -- Same function as above, but it deals with the second possible
159 -- grammar for 'globbing pattern', which is a kind of subset of the
160 -- whole regular expression grammar.
162 function Create_Secondary_Table
163 (First_Table
: Regexp_Array_Access
;
164 Num_States
: State_Index
;
165 Start_State
: State_Index
;
166 End_State
: State_Index
)
168 -- Creates the definitive table representing the regular expression
169 -- This is actually a transformation of the primary table First_Table,
170 -- where every state is grouped with the states in its 'no-character'
171 -- columns. The transitions between the new states are then recalculated
172 -- and if necessary some new states are created.
174 -- Note that the resulting finite-state machine is not optimized in
175 -- terms of the number of states : it would be more time-consuming to
176 -- add a third pass to reduce the number of states in the machine, with
177 -- no speed improvement...
179 procedure Raise_Exception
(M
: String; Index
: Integer);
180 pragma No_Return
(Raise_Exception
);
181 -- Raise an exception, indicating an error at character Index in S
187 procedure Create_Mapping
is
189 procedure Add_In_Map
(C
: Character);
190 -- Add a character in the mapping, if it is not already defined
196 procedure Add_In_Map
(C
: Character) is
199 Alphabet_Size
:= Alphabet_Size
+ 1;
200 Map
(C
) := Alphabet_Size
;
204 J
: Integer := S
'First;
205 Parenthesis_Level
: Integer := 0;
206 Curly_Level
: Integer := 0;
207 Last_Open
: Integer := S
'First - 1;
209 -- Start of processing for Create_Mapping
212 while J
<= S
'Last loop
221 if S
(J
) = ']' or S
(J
) = '-' then
225 -- The first character never has a special meaning
230 ("Ran out of characters while parsing ", J
);
233 exit when S
(J
) = Close_Bracket
;
236 and then S
(J
+ 1) /= Close_Bracket
239 Start
: constant Integer := J
- 1;
248 for Char
in S
(Start
) .. S
(J
) loop
263 -- A close bracket must follow a open_bracket,
264 -- and cannot be found alone on the line
266 when Close_Bracket
=>
268 ("Incorrect character ']' in regular expression", J
);
276 -- \ not allowed at the end of the regexp
279 ("Incorrect character '\' in regular expression", J
);
284 Parenthesis_Level
:= Parenthesis_Level
+ 1;
287 Add_In_Map
(Open_Paren
);
292 Parenthesis_Level
:= Parenthesis_Level
- 1;
294 if Parenthesis_Level
< 0 then
296 ("')' is not associated with '(' in regular "
300 if J
= Last_Open
+ 1 then
302 ("Empty parenthesis not allowed in regular "
307 Add_In_Map
(Close_Paren
);
319 Curly_Level
:= Curly_Level
+ 1;
326 Curly_Level
:= Curly_Level
- 1;
333 ("'*', '+', '?' and '|' operators cannot be in "
334 & "first position in regular expression", J
);
342 -- These operators must apply to a sub-expression,
343 -- and cannot be found at the beginning of the line
346 ("'*', '+', '?' and '|' operators cannot be in "
347 & "first position in regular expression", J
);
361 -- A closing parenthesis must follow an open parenthesis
363 if Parenthesis_Level
/= 0 then
365 ("'(' must always be associated with a ')'", J
);
368 if Curly_Level
/= 0 then
370 ("'{' must always be associated with a '}'", J
);
374 --------------------------
375 -- Create_Primary_Table --
376 --------------------------
378 procedure Create_Primary_Table
379 (Table
: out Regexp_Array_Access
;
380 Num_States
: out State_Index
;
381 Start_State
: out State_Index
;
382 End_State
: out State_Index
)
384 Empty_Char
: constant Column_Index
:= Alphabet_Size
+ 1;
386 Current_State
: State_Index
:= 0;
387 -- Index of the last created state
389 procedure Add_Empty_Char
390 (State
: State_Index
;
391 To_State
: State_Index
);
392 -- Add a empty-character transition from State to To_State
394 procedure Create_Repetition
395 (Repetition
: Character;
396 Start_Prev
: State_Index
;
397 End_Prev
: State_Index
;
398 New_Start
: out State_Index
;
399 New_End
: in out State_Index
);
400 -- Create the table in case we have a '*', '+' or '?'.
401 -- Start_Prev .. End_Prev should indicate respectively the start and
402 -- end index of the previous expression, to which '*', '+' or '?' is
405 procedure Create_Simple
406 (Start_Index
: Integer;
408 Start_State
: out State_Index
;
409 End_State
: out State_Index
);
410 -- Fill the table for the regexp Simple.
411 -- This is the recursive procedure called to handle () expressions
412 -- If End_State = 0, then the call to Create_Simple creates an
413 -- independent regexp, not a concatenation
414 -- Start_Index .. End_Index is the starting index in the string S.
416 -- Warning: it may look like we are creating too many empty-string
417 -- transitions, but they are needed to get the correct regexp.
418 -- The table is filled as follow ( s means start-state, e means
421 -- regexp state_num | a b * empty_string
422 -- ------- ------------------------------
426 -- ab 1 (s) | 2 - - -
443 -- (a) 1 (s) | 2 - - -
459 function Next_Sub_Expression
460 (Start_Index
: Integer;
463 -- Returns the index of the last character of the next sub-expression
464 -- in Simple. Index cannot be greater than End_Index.
470 procedure Add_Empty_Char
471 (State
: State_Index
;
472 To_State
: State_Index
)
474 J
: Column_Index
:= Empty_Char
;
477 while Get
(Table
, State
, J
) /= 0 loop
481 Set
(Table
, State
, J
, To_State
);
484 -----------------------
485 -- Create_Repetition --
486 -----------------------
488 procedure Create_Repetition
489 (Repetition
: Character;
490 Start_Prev
: State_Index
;
491 End_Prev
: State_Index
;
492 New_Start
: out State_Index
;
493 New_End
: in out State_Index
)
496 New_Start
:= Current_State
+ 1;
499 Add_Empty_Char
(New_End
, New_Start
);
502 Current_State
:= Current_State
+ 2;
503 New_End
:= Current_State
;
505 Add_Empty_Char
(End_Prev
, New_End
);
506 Add_Empty_Char
(New_Start
, Start_Prev
);
508 if Repetition
/= '+' then
509 Add_Empty_Char
(New_Start
, New_End
);
512 if Repetition
/= '?' then
513 Add_Empty_Char
(New_End
, New_Start
);
515 end Create_Repetition
;
521 procedure Create_Simple
522 (Start_Index
: Integer;
524 Start_State
: out State_Index
;
525 End_State
: out State_Index
)
527 J
: Integer := Start_Index
;
528 Last_Start
: State_Index
:= 0;
533 while J
<= End_Index
loop
537 J_Start
: constant Integer := J
+ 1;
538 Next_Start
: State_Index
;
539 Next_End
: State_Index
;
542 J
:= Next_Sub_Expression
(J
, End_Index
);
543 Create_Simple
(J_Start
, J
- 1, Next_Start
, Next_End
);
546 and then (S
(J
+ 1) = '*' or else
547 S
(J
+ 1) = '+' or else
559 Last_Start
:= Next_Start
;
561 if End_State
/= 0 then
562 Add_Empty_Char
(End_State
, Last_Start
);
565 End_State
:= Next_End
;
571 Start_Prev
: constant State_Index
:= Start_State
;
572 End_Prev
: constant State_Index
:= End_State
;
573 Start_J
: constant Integer := J
+ 1;
574 Start_Next
: State_Index
:= 0;
575 End_Next
: State_Index
:= 0;
578 J
:= Next_Sub_Expression
(J
, End_Index
);
580 -- Create a new state for the start of the alternative
582 Current_State
:= Current_State
+ 1;
583 Last_Start
:= Current_State
;
584 Start_State
:= Last_Start
;
586 -- Create the tree for the second part of alternative
588 Create_Simple
(Start_J
, J
, Start_Next
, End_Next
);
590 -- Create the end state
592 Add_Empty_Char
(Last_Start
, Start_Next
);
593 Add_Empty_Char
(Last_Start
, Start_Prev
);
594 Current_State
:= Current_State
+ 1;
595 End_State
:= Current_State
;
596 Add_Empty_Char
(End_Prev
, End_State
);
597 Add_Empty_Char
(End_Next
, End_State
);
601 Current_State
:= Current_State
+ 1;
604 Next_State
: State_Index
:= Current_State
+ 1;
614 for Column
in 0 .. Alphabet_Size
loop
615 Set
(Table
, Current_State
, Column
,
616 Value
=> Current_State
+ 1);
620 -- Automatically add the first character
622 if S
(J
) = '-' or S
(J
) = ']' then
623 Set
(Table
, Current_State
, Map
(S
(J
)),
624 Value
=> Next_State
);
628 -- Loop till closing bracket found
631 exit when S
(J
) = Close_Bracket
;
634 and then S
(J
+ 1) /= ']'
637 Start
: constant Integer := J
- 1;
646 for Char
in S
(Start
) .. S
(J
) loop
647 Set
(Table
, Current_State
, Map
(Char
),
648 Value
=> Next_State
);
657 Set
(Table
, Current_State
, Map
(S
(J
)),
658 Value
=> Next_State
);
664 Current_State
:= Current_State
+ 1;
666 -- If the next symbol is a special symbol
669 and then (S
(J
+ 1) = '*' or else
670 S
(J
+ 1) = '+' or else
682 Last_Start
:= Current_State
- 1;
684 if End_State
/= 0 then
685 Add_Empty_Char
(End_State
, Last_Start
);
688 End_State
:= Current_State
;
691 when '*' |
'+' |
'?' | Close_Paren | Close_Bracket
=>
693 ("Incorrect character in regular expression :", J
);
696 Current_State
:= Current_State
+ 1;
698 -- Create the state for the symbol S (J)
701 for K
in 0 .. Alphabet_Size
loop
702 Set
(Table
, Current_State
, K
,
703 Value
=> Current_State
+ 1);
711 Set
(Table
, Current_State
, Map
(S
(J
)),
712 Value
=> Current_State
+ 1);
715 Current_State
:= Current_State
+ 1;
717 -- If the next symbol is a special symbol
720 and then (S
(J
+ 1) = '*' or else
721 S
(J
+ 1) = '+' or else
733 Last_Start
:= Current_State
- 1;
735 if End_State
/= 0 then
736 Add_Empty_Char
(End_State
, Last_Start
);
739 End_State
:= Current_State
;
744 if Start_State
= 0 then
745 Start_State
:= Last_Start
;
752 -------------------------
753 -- Next_Sub_Expression --
754 -------------------------
756 function Next_Sub_Expression
757 (Start_Index
: Integer;
761 J
: Integer := Start_Index
;
762 Start_On_Alter
: Boolean := False;
766 Start_On_Alter
:= True;
770 exit when J
= End_Index
;
780 exit when S
(J
) = Close_Bracket
;
788 J
:= Next_Sub_Expression
(J
, End_Index
);
794 if Start_On_Alter
then
804 end Next_Sub_Expression
;
806 -- Start of Create_Primary_Table
809 Table
.all := (others => (others => 0));
810 Create_Simple
(S
'First, S
'Last, Start_State
, End_State
);
811 Num_States
:= Current_State
;
812 end Create_Primary_Table
;
814 -------------------------------
815 -- Create_Primary_Table_Glob --
816 -------------------------------
818 procedure Create_Primary_Table_Glob
819 (Table
: out Regexp_Array_Access
;
820 Num_States
: out State_Index
;
821 Start_State
: out State_Index
;
822 End_State
: out State_Index
)
824 Empty_Char
: constant Column_Index
:= Alphabet_Size
+ 1;
826 Current_State
: State_Index
:= 0;
827 -- Index of the last created state
829 procedure Add_Empty_Char
830 (State
: State_Index
;
831 To_State
: State_Index
);
832 -- Add a empty-character transition from State to To_State
834 procedure Create_Simple
835 (Start_Index
: Integer;
837 Start_State
: out State_Index
;
838 End_State
: out State_Index
);
839 -- Fill the table for the S (Start_Index .. End_Index).
840 -- This is the recursive procedure called to handle () expressions
846 procedure Add_Empty_Char
847 (State
: State_Index
;
848 To_State
: State_Index
)
850 J
: Column_Index
:= Empty_Char
;
853 while Get
(Table
, State
, J
) /= 0 loop
857 Set
(Table
, State
, J
,
865 procedure Create_Simple
866 (Start_Index
: Integer;
868 Start_State
: out State_Index
;
869 End_State
: out State_Index
)
871 J
: Integer := Start_Index
;
872 Last_Start
: State_Index
:= 0;
878 while J
<= End_Index
loop
882 Current_State
:= Current_State
+ 1;
885 Next_State
: State_Index
:= Current_State
+ 1;
894 for Column
in 0 .. Alphabet_Size
loop
895 Set
(Table
, Current_State
, Column
,
896 Value
=> Current_State
+ 1);
900 -- Automatically add the first character
902 if S
(J
) = '-' or S
(J
) = ']' then
903 Set
(Table
, Current_State
, Map
(S
(J
)),
904 Value
=> Current_State
);
908 -- Loop till closing bracket found
911 exit when S
(J
) = Close_Bracket
;
914 and then S
(J
+ 1) /= ']'
917 Start
: constant Integer := J
- 1;
925 for Char
in S
(Start
) .. S
(J
) loop
926 Set
(Table
, Current_State
, Map
(Char
),
927 Value
=> Next_State
);
936 Set
(Table
, Current_State
, Map
(S
(J
)),
937 Value
=> Next_State
);
943 Last_Start
:= Current_State
;
944 Current_State
:= Current_State
+ 1;
946 if End_State
/= 0 then
947 Add_Empty_Char
(End_State
, Last_Start
);
950 End_State
:= Current_State
;
955 Start_Regexp_Sub
: State_Index
;
956 End_Regexp_Sub
: State_Index
;
957 Create_Start
: State_Index
:= 0;
959 Create_End
: State_Index
:= 0;
960 -- Initialized to avoid junk warning
963 while S
(J
) /= '}' loop
965 -- First step : find sub pattern
968 while S
(End_Sub
) /= ','
969 and then S
(End_Sub
) /= '}'
971 End_Sub
:= End_Sub
+ 1;
974 -- Second step : create a sub pattern
984 -- Third step : create an alternative
986 if Create_Start
= 0 then
987 Current_State
:= Current_State
+ 1;
988 Create_Start
:= Current_State
;
989 Add_Empty_Char
(Create_Start
, Start_Regexp_Sub
);
990 Current_State
:= Current_State
+ 1;
991 Create_End
:= Current_State
;
992 Add_Empty_Char
(End_Regexp_Sub
, Create_End
);
995 Current_State
:= Current_State
+ 1;
996 Add_Empty_Char
(Current_State
, Create_Start
);
997 Create_Start
:= Current_State
;
998 Add_Empty_Char
(Create_Start
, Start_Regexp_Sub
);
999 Add_Empty_Char
(End_Regexp_Sub
, Create_End
);
1003 if End_State
/= 0 then
1004 Add_Empty_Char
(End_State
, Create_Start
);
1007 End_State
:= Create_End
;
1008 Last_Start
:= Create_Start
;
1012 Current_State
:= Current_State
+ 1;
1014 if End_State
/= 0 then
1015 Add_Empty_Char
(End_State
, Current_State
);
1018 Add_Empty_Char
(Current_State
, Current_State
+ 1);
1019 Add_Empty_Char
(Current_State
, Current_State
+ 3);
1020 Last_Start
:= Current_State
;
1022 Current_State
:= Current_State
+ 1;
1024 for K
in 0 .. Alphabet_Size
loop
1025 Set
(Table
, Current_State
, K
,
1026 Value
=> Current_State
+ 1);
1029 Current_State
:= Current_State
+ 1;
1030 Add_Empty_Char
(Current_State
, Current_State
+ 1);
1032 Current_State
:= Current_State
+ 1;
1033 Add_Empty_Char
(Current_State
, Last_Start
);
1034 End_State
:= Current_State
;
1037 Current_State
:= Current_State
+ 1;
1040 for K
in 0 .. Alphabet_Size
loop
1041 Set
(Table
, Current_State
, K
,
1042 Value
=> Current_State
+ 1);
1050 -- Create the state for the symbol S (J)
1052 Set
(Table
, Current_State
, Map
(S
(J
)),
1053 Value
=> Current_State
+ 1);
1056 Last_Start
:= Current_State
;
1057 Current_State
:= Current_State
+ 1;
1059 if End_State
/= 0 then
1060 Add_Empty_Char
(End_State
, Last_Start
);
1063 End_State
:= Current_State
;
1067 if Start_State
= 0 then
1068 Start_State
:= Last_Start
;
1075 -- Start of processing for Create_Primary_Table_Glob
1078 Table
.all := (others => (others => 0));
1079 Create_Simple
(S
'First, S
'Last, Start_State
, End_State
);
1080 Num_States
:= Current_State
;
1081 end Create_Primary_Table_Glob
;
1083 ----------------------------
1084 -- Create_Secondary_Table --
1085 ----------------------------
1087 function Create_Secondary_Table
1088 (First_Table
: Regexp_Array_Access
;
1089 Num_States
: State_Index
;
1090 Start_State
: State_Index
;
1091 End_State
: State_Index
) return Regexp
1093 pragma Warnings
(Off
, Num_States
);
1095 Last_Index
: constant State_Index
:= First_Table
'Last (1);
1096 type Meta_State
is array (1 .. Last_Index
) of Boolean;
1098 Table
: Regexp_Array
(1 .. Last_Index
, 0 .. Alphabet_Size
) :=
1099 (others => (others => 0));
1101 Meta_States
: array (1 .. Last_Index
+ 1) of Meta_State
:=
1102 (others => (others => False));
1104 Temp_State_Not_Null
: Boolean;
1106 Is_Final
: Boolean_Array
(1 .. Last_Index
) := (others => False);
1108 Current_State
: State_Index
:= 1;
1109 Nb_State
: State_Index
:= 1;
1112 (State
: in out Meta_State
;
1113 Item
: State_Index
);
1114 -- Compute the closure of the state (that is every other state which
1115 -- has a empty-character transition) and add it to the state
1122 (State
: in out Meta_State
;
1126 if State
(Item
) then
1130 State
(Item
) := True;
1132 for Column
in Alphabet_Size
+ 1 .. First_Table
'Last (2) loop
1133 if First_Table
(Item
, Column
) = 0 then
1137 Closure
(State
, First_Table
(Item
, Column
));
1141 -- Start of processing for Create_Secondary_Table
1144 -- Create a new state
1146 Closure
(Meta_States
(Current_State
), Start_State
);
1148 while Current_State
<= Nb_State
loop
1150 -- If this new meta-state includes the primary table end state,
1151 -- then this meta-state will be a final state in the regexp
1153 if Meta_States
(Current_State
)(End_State
) then
1154 Is_Final
(Current_State
) := True;
1157 -- For every character in the regexp, calculate the possible
1158 -- transitions from Current_State
1160 for Column
in 0 .. Alphabet_Size
loop
1161 Meta_States
(Nb_State
+ 1) := (others => False);
1162 Temp_State_Not_Null
:= False;
1164 for K
in Meta_States
(Current_State
)'Range loop
1165 if Meta_States
(Current_State
)(K
)
1166 and then First_Table
(K
, Column
) /= 0
1169 (Meta_States
(Nb_State
+ 1), First_Table
(K
, Column
));
1170 Temp_State_Not_Null
:= True;
1174 -- If at least one transition existed
1176 if Temp_State_Not_Null
then
1178 -- Check if this new state corresponds to an old one
1180 for K
in 1 .. Nb_State
loop
1181 if Meta_States
(K
) = Meta_States
(Nb_State
+ 1) then
1182 Table
(Current_State
, Column
) := K
;
1187 -- If not, create a new state
1189 if Table
(Current_State
, Column
) = 0 then
1190 Nb_State
:= Nb_State
+ 1;
1191 Table
(Current_State
, Column
) := Nb_State
;
1196 Current_State
:= Current_State
+ 1;
1199 -- Returns the regexp
1205 R
:= new Regexp_Value
(Alphabet_Size
=> Alphabet_Size
,
1206 Num_States
=> Nb_State
);
1208 R
.Is_Final
:= Is_Final
(1 .. Nb_State
);
1209 R
.Case_Sensitive
:= Case_Sensitive
;
1211 for State
in 1 .. Nb_State
loop
1212 for K
in 0 .. Alphabet_Size
loop
1213 R
.States
(State
, K
) := Table
(State
, K
);
1217 return (Ada
.Finalization
.Controlled
with R
=> R
);
1219 end Create_Secondary_Table
;
1221 ---------------------
1222 -- Raise_Exception --
1223 ---------------------
1225 procedure Raise_Exception
(M
: String; Index
: Integer) is
1227 raise Error_In_Regexp
with M
& " at offset " & Index
'Img;
1228 end Raise_Exception
;
1230 -- Start of processing for Compile
1233 -- Special case for the empty string: it always matches, and the
1234 -- following processing would fail on it.
1236 return (Ada
.Finalization
.Controlled
with
1237 R
=> new Regexp_Value
'
1238 (Alphabet_Size => 0,
1240 Map => (others => 0),
1241 States => (others => (others => 1)),
1242 Is_Final => (others => True),
1243 Case_Sensitive => True));
1246 if not Case_Sensitive then
1247 System.Case_Util.To_Lower (S);
1252 -- Creates the primary table
1255 Table : Regexp_Array_Access;
1256 Num_States : State_Index;
1257 Start_State : State_Index;
1258 End_State : State_Index;
1262 Table := new Regexp_Array (1 .. 100,
1263 0 .. Alphabet_Size + 10);
1265 Create_Primary_Table (Table, Num_States, Start_State, End_State);
1267 Create_Primary_Table_Glob
1268 (Table, Num_States, Start_State, End_State);
1271 -- Creates the secondary table
1273 R := Create_Secondary_Table
1274 (Table, Num_States, Start_State, End_State);
1284 procedure Finalize (R : in out Regexp) is
1285 procedure Free is new
1286 Ada.Unchecked_Deallocation (Regexp_Value, Regexp_Access);
1297 (Table : Regexp_Array_Access;
1298 State : State_Index;
1299 Column : Column_Index) return State_Index
1302 if State <= Table'Last (1)
1303 and then Column <= Table'Last (2)
1305 return Table (State, Column);
1315 function Match (S : String; R : Regexp) return Boolean is
1316 Current_State : State_Index := 1;
1320 raise Constraint_Error;
1323 for Char in S'Range loop
1325 if R.R.Case_Sensitive then
1326 Current_State := R.R.States (Current_State, R.R.Map (S (Char)));
1329 R.R.States (Current_State,
1330 R.R.Map (System.Case_Util.To_Lower (S (Char))));
1333 if Current_State = 0 then
1339 return R.R.Is_Final (Current_State);
1347 (Table : in out Regexp_Array_Access;
1348 State : State_Index;
1349 Column : Column_Index;
1350 Value : State_Index)
1352 New_Lines : State_Index;
1353 New_Columns : Column_Index;
1354 New_Table : Regexp_Array_Access;
1357 if State <= Table'Last (1)
1358 and then Column <= Table'Last (2)
1360 Table (State, Column) := Value;
1362 -- Doubles the size of the table until it is big enough that
1363 -- (State, Column) is a valid index
1365 New_Lines := Table'Last (1) * (State / Table'Last (1) + 1);
1366 New_Columns := Table'Last (2) * (Column / Table'Last (2) + 1);
1367 New_Table := new Regexp_Array (Table'First (1) .. New_Lines,
1368 Table'First (2) .. New_Columns);
1369 New_Table.all := (others => (others => 0));
1371 for J in Table'Range (1) loop
1372 for K in Table'Range (2) loop
1373 New_Table (J, K) := Table (J, K);
1379 Table (State, Column) := Value;