1 ------------------------------------------------------------------------------
3 -- GNAT COMPILER COMPONENTS --
5 -- S Y S T E M . R E G E X P --
9 -- Copyright (C) 1999-2007, AdaCore --
11 -- GNAT is free software; you can redistribute it and/or modify it under --
12 -- terms of the GNU General Public License as published by the Free Soft- --
13 -- ware Foundation; either version 2, or (at your option) any later ver- --
14 -- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
15 -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
16 -- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License --
17 -- for more details. You should have received a copy of the GNU General --
18 -- Public License distributed with GNAT; see file COPYING. If not, write --
19 -- to the Free Software Foundation, 51 Franklin Street, Fifth Floor, --
20 -- Boston, MA 02110-1301, USA. --
22 -- As a special exception, if other files instantiate generics from this --
23 -- unit, or you link this unit with other files to produce an executable, --
24 -- this unit does not by itself cause the resulting executable to be --
25 -- covered by the GNU General Public License. This exception does not --
26 -- however invalidate any other reasons why the executable file might be --
27 -- covered by the GNU Public License. --
29 -- GNAT was originally developed by the GNAT team at New York University. --
30 -- Extensive contributions were provided by Ada Core Technologies Inc. --
32 ------------------------------------------------------------------------------
34 with Ada
.Unchecked_Deallocation
;
35 -- with Ada.Exceptions;
37 with System
.Case_Util
;
39 package body System
.Regexp
is
41 Open_Paren
: constant Character := '(';
42 Close_Paren
: constant Character := ')';
43 Open_Bracket
: constant Character := '[';
44 Close_Bracket
: constant Character := ']';
46 type State_Index
is new Natural;
47 type Column_Index
is new Natural;
49 type Regexp_Array
is array
50 (State_Index
range <>, Column_Index
range <>) of State_Index
;
51 -- First index is for the state number
52 -- Second index is for the character type
53 -- Contents is the new State
55 type Regexp_Array_Access
is access Regexp_Array
;
56 -- Use this type through the functions Set below, so that it
57 -- can grow dynamically depending on the needs.
59 type Mapping
is array (Character'Range) of Column_Index
;
60 -- Mapping between characters and column in the Regexp_Array
62 type Boolean_Array
is array (State_Index
range <>) of Boolean;
65 (Alphabet_Size
: Column_Index
;
66 Num_States
: State_Index
) is
69 States
: Regexp_Array
(1 .. Num_States
, 0 .. Alphabet_Size
);
70 Is_Final
: Boolean_Array
(1 .. Num_States
);
71 Case_Sensitive
: Boolean;
73 -- Deterministic finite-state machine
75 -----------------------
76 -- Local Subprograms --
77 -----------------------
80 (Table
: in out Regexp_Array_Access
;
82 Column
: Column_Index
;
84 -- Sets a value in the table. If the table is too small, reallocate it
85 -- dynamically so that (State, Column) is a valid index in it.
88 (Table
: Regexp_Array_Access
;
90 Column
: Column_Index
)
92 -- Returns the value in the table at (State, Column).
93 -- If this index does not exist in the table, returns 0
95 procedure Free
is new Ada
.Unchecked_Deallocation
96 (Regexp_Array
, Regexp_Array_Access
);
102 procedure Adjust
(R
: in out Regexp
) is
106 Tmp
:= new Regexp_Value
(Alphabet_Size
=> R
.R
.Alphabet_Size
,
107 Num_States
=> R
.R
.Num_States
);
118 Glob
: Boolean := False;
119 Case_Sensitive
: Boolean := True)
122 S
: String := Pattern
;
123 -- The pattern which is really compiled (when the pattern is case
124 -- insensitive, we convert this string to lower-cases
126 Map
: Mapping
:= (others => 0);
127 -- Mapping between characters and columns in the tables
129 Alphabet_Size
: Column_Index
:= 0;
130 -- Number of significant characters in the regular expression.
131 -- This total does not include special operators, such as *, (, ...
133 procedure Create_Mapping
;
134 -- Creates a mapping between characters in the regexp and columns
135 -- in the tables representing the regexp. Test that the regexp is
136 -- well-formed Modifies Alphabet_Size and Map
138 procedure Create_Primary_Table
139 (Table
: out Regexp_Array_Access
;
140 Num_States
: out State_Index
;
141 Start_State
: out State_Index
;
142 End_State
: out State_Index
);
143 -- Creates the first version of the regexp (this is a non deterministic
144 -- finite state machine, which is unadapted for a fast pattern
145 -- matching algorithm). We use a recursive algorithm to process the
146 -- parenthesis sub-expressions.
148 -- Table : at the end of the procedure : Column 0 is for any character
149 -- ('.') and the last columns are for no character (closure)
150 -- Num_States is set to the number of states in the table
151 -- Start_State is the number of the starting state in the regexp
152 -- End_State is the number of the final state when the regexp matches
154 procedure Create_Primary_Table_Glob
155 (Table
: out Regexp_Array_Access
;
156 Num_States
: out State_Index
;
157 Start_State
: out State_Index
;
158 End_State
: out State_Index
);
159 -- Same function as above, but it deals with the second possible
160 -- grammar for 'globbing pattern', which is a kind of subset of the
161 -- whole regular expression grammar.
163 function Create_Secondary_Table
164 (First_Table
: Regexp_Array_Access
;
165 Num_States
: State_Index
;
166 Start_State
: State_Index
;
167 End_State
: State_Index
)
169 -- Creates the definitive table representing the regular expression
170 -- This is actually a transformation of the primary table First_Table,
171 -- where every state is grouped with the states in its 'no-character'
172 -- columns. The transitions between the new states are then recalculated
173 -- and if necessary some new states are created.
175 -- Note that the resulting finite-state machine is not optimized in
176 -- terms of the number of states : it would be more time-consuming to
177 -- add a third pass to reduce the number of states in the machine, with
178 -- no speed improvement...
180 procedure Raise_Exception
(M
: String; Index
: Integer);
181 pragma No_Return
(Raise_Exception
);
182 -- Raise an exception, indicating an error at character Index in S
188 procedure Create_Mapping
is
190 procedure Add_In_Map
(C
: Character);
191 -- Add a character in the mapping, if it is not already defined
197 procedure Add_In_Map
(C
: Character) is
200 Alphabet_Size
:= Alphabet_Size
+ 1;
201 Map
(C
) := Alphabet_Size
;
205 J
: Integer := S
'First;
206 Parenthesis_Level
: Integer := 0;
207 Curly_Level
: Integer := 0;
209 -- Start of processing for Create_Mapping
212 while J
<= S
'Last loop
221 if S
(J
) = ']' or S
(J
) = '-' then
225 -- The first character never has a special meaning
230 ("Ran out of characters while parsing ", J
);
233 exit when S
(J
) = Close_Bracket
;
236 and then S
(J
+ 1) /= Close_Bracket
239 Start
: constant Integer := J
- 1;
248 for Char
in S
(Start
) .. S
(J
) loop
263 -- A close bracket must follow a open_bracket,
264 -- and cannot be found alone on the line
266 when Close_Bracket
=>
268 ("Incorrect character ']' in regular expression", J
);
276 -- \ not allowed at the end of the regexp
279 ("Incorrect character '\' in regular expression", J
);
284 Parenthesis_Level
:= Parenthesis_Level
+ 1;
286 Add_In_Map
(Open_Paren
);
291 Parenthesis_Level
:= Parenthesis_Level
- 1;
293 if Parenthesis_Level
< 0 then
295 ("')' is not associated with '(' in regular "
299 if S
(J
- 1) = Open_Paren
then
301 ("Empty parenthesis not allowed in regular "
306 Add_In_Map
(Close_Paren
);
318 Curly_Level
:= Curly_Level
+ 1;
325 Curly_Level
:= Curly_Level
- 1;
332 ("'*', '+', '?' and '|' operators cannot be in "
333 & "first position in regular expression", J
);
341 -- These operators must apply to a sub-expression,
342 -- and cannot be found at the beginning of the line
345 ("'*', '+', '?' and '|' operators cannot be in "
346 & "first position in regular expression", J
);
360 -- A closing parenthesis must follow an open parenthesis
362 if Parenthesis_Level
/= 0 then
364 ("'(' must always be associated with a ')'", J
);
367 if Curly_Level
/= 0 then
369 ("'{' must always be associated with a '}'", J
);
373 --------------------------
374 -- Create_Primary_Table --
375 --------------------------
377 procedure Create_Primary_Table
378 (Table
: out Regexp_Array_Access
;
379 Num_States
: out State_Index
;
380 Start_State
: out State_Index
;
381 End_State
: out State_Index
)
383 Empty_Char
: constant Column_Index
:= Alphabet_Size
+ 1;
385 Current_State
: State_Index
:= 0;
386 -- Index of the last created state
388 procedure Add_Empty_Char
389 (State
: State_Index
;
390 To_State
: State_Index
);
391 -- Add a empty-character transition from State to To_State
393 procedure Create_Repetition
394 (Repetition
: Character;
395 Start_Prev
: State_Index
;
396 End_Prev
: State_Index
;
397 New_Start
: out State_Index
;
398 New_End
: in out State_Index
);
399 -- Create the table in case we have a '*', '+' or '?'.
400 -- Start_Prev .. End_Prev should indicate respectively the start and
401 -- end index of the previous expression, to which '*', '+' or '?' is
404 procedure Create_Simple
405 (Start_Index
: Integer;
407 Start_State
: out State_Index
;
408 End_State
: out State_Index
);
409 -- Fill the table for the regexp Simple.
410 -- This is the recursive procedure called to handle () expressions
411 -- If End_State = 0, then the call to Create_Simple creates an
412 -- independent regexp, not a concatenation
413 -- Start_Index .. End_Index is the starting index in the string S.
415 -- Warning: it may look like we are creating too many empty-string
416 -- transitions, but they are needed to get the correct regexp.
417 -- The table is filled as follow ( s means start-state, e means
420 -- regexp state_num | a b * empty_string
421 -- ------- ------------------------------
425 -- ab 1 (s) | 2 - - -
442 -- (a) 1 (s) | 2 - - -
458 function Next_Sub_Expression
459 (Start_Index
: Integer;
462 -- Returns the index of the last character of the next sub-expression
463 -- in Simple. Index cannot be greater than End_Index.
469 procedure Add_Empty_Char
470 (State
: State_Index
;
471 To_State
: State_Index
)
473 J
: Column_Index
:= Empty_Char
;
476 while Get
(Table
, State
, J
) /= 0 loop
480 Set
(Table
, State
, J
, To_State
);
483 -----------------------
484 -- Create_Repetition --
485 -----------------------
487 procedure Create_Repetition
488 (Repetition
: Character;
489 Start_Prev
: State_Index
;
490 End_Prev
: State_Index
;
491 New_Start
: out State_Index
;
492 New_End
: in out State_Index
)
495 New_Start
:= Current_State
+ 1;
498 Add_Empty_Char
(New_End
, New_Start
);
501 Current_State
:= Current_State
+ 2;
502 New_End
:= Current_State
;
504 Add_Empty_Char
(End_Prev
, New_End
);
505 Add_Empty_Char
(New_Start
, Start_Prev
);
507 if Repetition
/= '+' then
508 Add_Empty_Char
(New_Start
, New_End
);
511 if Repetition
/= '?' then
512 Add_Empty_Char
(New_End
, New_Start
);
514 end Create_Repetition
;
520 procedure Create_Simple
521 (Start_Index
: Integer;
523 Start_State
: out State_Index
;
524 End_State
: out State_Index
)
526 J
: Integer := Start_Index
;
527 Last_Start
: State_Index
:= 0;
532 while J
<= End_Index
loop
536 J_Start
: constant Integer := J
+ 1;
537 Next_Start
: State_Index
;
538 Next_End
: State_Index
;
541 J
:= Next_Sub_Expression
(J
, End_Index
);
542 Create_Simple
(J_Start
, J
- 1, Next_Start
, Next_End
);
545 and then (S
(J
+ 1) = '*' or else
546 S
(J
+ 1) = '+' or else
558 Last_Start
:= Next_Start
;
560 if End_State
/= 0 then
561 Add_Empty_Char
(End_State
, Last_Start
);
564 End_State
:= Next_End
;
570 Start_Prev
: constant State_Index
:= Start_State
;
571 End_Prev
: constant State_Index
:= End_State
;
572 Start_J
: constant Integer := J
+ 1;
573 Start_Next
: State_Index
:= 0;
574 End_Next
: State_Index
:= 0;
577 J
:= Next_Sub_Expression
(J
, End_Index
);
579 -- Create a new state for the start of the alternative
581 Current_State
:= Current_State
+ 1;
582 Last_Start
:= Current_State
;
583 Start_State
:= Last_Start
;
585 -- Create the tree for the second part of alternative
587 Create_Simple
(Start_J
, J
, Start_Next
, End_Next
);
589 -- Create the end state
591 Add_Empty_Char
(Last_Start
, Start_Next
);
592 Add_Empty_Char
(Last_Start
, Start_Prev
);
593 Current_State
:= Current_State
+ 1;
594 End_State
:= Current_State
;
595 Add_Empty_Char
(End_Prev
, End_State
);
596 Add_Empty_Char
(End_Next
, End_State
);
600 Current_State
:= Current_State
+ 1;
603 Next_State
: State_Index
:= Current_State
+ 1;
613 for Column
in 0 .. Alphabet_Size
loop
614 Set
(Table
, Current_State
, Column
,
615 Value
=> Current_State
+ 1);
619 -- Automatically add the first character
621 if S
(J
) = '-' or S
(J
) = ']' then
622 Set
(Table
, Current_State
, Map
(S
(J
)),
623 Value
=> Next_State
);
627 -- Loop till closing bracket found
630 exit when S
(J
) = Close_Bracket
;
633 and then S
(J
+ 1) /= ']'
636 Start
: constant Integer := J
- 1;
645 for Char
in S
(Start
) .. S
(J
) loop
646 Set
(Table
, Current_State
, Map
(Char
),
647 Value
=> Next_State
);
656 Set
(Table
, Current_State
, Map
(S
(J
)),
657 Value
=> Next_State
);
663 Current_State
:= Current_State
+ 1;
665 -- If the next symbol is a special symbol
668 and then (S
(J
+ 1) = '*' or else
669 S
(J
+ 1) = '+' or else
681 Last_Start
:= Current_State
- 1;
683 if End_State
/= 0 then
684 Add_Empty_Char
(End_State
, Last_Start
);
687 End_State
:= Current_State
;
690 when '*' |
'+' |
'?' | Close_Paren | Close_Bracket
=>
692 ("Incorrect character in regular expression :", J
);
695 Current_State
:= Current_State
+ 1;
697 -- Create the state for the symbol S (J)
700 for K
in 0 .. Alphabet_Size
loop
701 Set
(Table
, Current_State
, K
,
702 Value
=> Current_State
+ 1);
710 Set
(Table
, Current_State
, Map
(S
(J
)),
711 Value
=> Current_State
+ 1);
714 Current_State
:= Current_State
+ 1;
716 -- If the next symbol is a special symbol
719 and then (S
(J
+ 1) = '*' or else
720 S
(J
+ 1) = '+' or else
732 Last_Start
:= Current_State
- 1;
734 if End_State
/= 0 then
735 Add_Empty_Char
(End_State
, Last_Start
);
738 End_State
:= Current_State
;
743 if Start_State
= 0 then
744 Start_State
:= Last_Start
;
751 -------------------------
752 -- Next_Sub_Expression --
753 -------------------------
755 function Next_Sub_Expression
756 (Start_Index
: Integer;
760 J
: Integer := Start_Index
;
761 Start_On_Alter
: Boolean := False;
765 Start_On_Alter
:= True;
769 exit when J
= End_Index
;
779 exit when S
(J
) = Close_Bracket
;
787 J
:= Next_Sub_Expression
(J
, End_Index
);
793 if Start_On_Alter
then
803 end Next_Sub_Expression
;
805 -- Start of Create_Primary_Table
808 Table
.all := (others => (others => 0));
809 Create_Simple
(S
'First, S
'Last, Start_State
, End_State
);
810 Num_States
:= Current_State
;
811 end Create_Primary_Table
;
813 -------------------------------
814 -- Create_Primary_Table_Glob --
815 -------------------------------
817 procedure Create_Primary_Table_Glob
818 (Table
: out Regexp_Array_Access
;
819 Num_States
: out State_Index
;
820 Start_State
: out State_Index
;
821 End_State
: out State_Index
)
823 Empty_Char
: constant Column_Index
:= Alphabet_Size
+ 1;
825 Current_State
: State_Index
:= 0;
826 -- Index of the last created state
828 procedure Add_Empty_Char
829 (State
: State_Index
;
830 To_State
: State_Index
);
831 -- Add a empty-character transition from State to To_State
833 procedure Create_Simple
834 (Start_Index
: Integer;
836 Start_State
: out State_Index
;
837 End_State
: out State_Index
);
838 -- Fill the table for the S (Start_Index .. End_Index).
839 -- This is the recursive procedure called to handle () expressions
845 procedure Add_Empty_Char
846 (State
: State_Index
;
847 To_State
: State_Index
)
849 J
: Column_Index
:= Empty_Char
;
852 while Get
(Table
, State
, J
) /= 0 loop
856 Set
(Table
, State
, J
,
864 procedure Create_Simple
865 (Start_Index
: Integer;
867 Start_State
: out State_Index
;
868 End_State
: out State_Index
)
870 J
: Integer := Start_Index
;
871 Last_Start
: State_Index
:= 0;
877 while J
<= End_Index
loop
881 Current_State
:= Current_State
+ 1;
884 Next_State
: State_Index
:= Current_State
+ 1;
893 for Column
in 0 .. Alphabet_Size
loop
894 Set
(Table
, Current_State
, Column
,
895 Value
=> Current_State
+ 1);
899 -- Automatically add the first character
901 if S
(J
) = '-' or S
(J
) = ']' then
902 Set
(Table
, Current_State
, Map
(S
(J
)),
903 Value
=> Current_State
);
907 -- Loop till closing bracket found
910 exit when S
(J
) = Close_Bracket
;
913 and then S
(J
+ 1) /= ']'
916 Start
: constant Integer := J
- 1;
924 for Char
in S
(Start
) .. S
(J
) loop
925 Set
(Table
, Current_State
, Map
(Char
),
926 Value
=> Next_State
);
935 Set
(Table
, Current_State
, Map
(S
(J
)),
936 Value
=> Next_State
);
942 Last_Start
:= Current_State
;
943 Current_State
:= Current_State
+ 1;
945 if End_State
/= 0 then
946 Add_Empty_Char
(End_State
, Last_Start
);
949 End_State
:= Current_State
;
954 Start_Regexp_Sub
: State_Index
;
955 End_Regexp_Sub
: State_Index
;
956 Create_Start
: State_Index
:= 0;
958 Create_End
: State_Index
:= 0;
959 -- Initialized to avoid junk warning
962 while S
(J
) /= '}' loop
964 -- First step : find sub pattern
967 while S
(End_Sub
) /= ','
968 and then S
(End_Sub
) /= '}'
970 End_Sub
:= End_Sub
+ 1;
973 -- Second step : create a sub pattern
983 -- Third step : create an alternative
985 if Create_Start
= 0 then
986 Current_State
:= Current_State
+ 1;
987 Create_Start
:= Current_State
;
988 Add_Empty_Char
(Create_Start
, Start_Regexp_Sub
);
989 Current_State
:= Current_State
+ 1;
990 Create_End
:= Current_State
;
991 Add_Empty_Char
(End_Regexp_Sub
, Create_End
);
994 Current_State
:= Current_State
+ 1;
995 Add_Empty_Char
(Current_State
, Create_Start
);
996 Create_Start
:= Current_State
;
997 Add_Empty_Char
(Create_Start
, Start_Regexp_Sub
);
998 Add_Empty_Char
(End_Regexp_Sub
, Create_End
);
1002 if End_State
/= 0 then
1003 Add_Empty_Char
(End_State
, Create_Start
);
1006 End_State
:= Create_End
;
1007 Last_Start
:= Create_Start
;
1011 Current_State
:= Current_State
+ 1;
1013 if End_State
/= 0 then
1014 Add_Empty_Char
(End_State
, Current_State
);
1017 Add_Empty_Char
(Current_State
, Current_State
+ 1);
1018 Add_Empty_Char
(Current_State
, Current_State
+ 3);
1019 Last_Start
:= Current_State
;
1021 Current_State
:= Current_State
+ 1;
1023 for K
in 0 .. Alphabet_Size
loop
1024 Set
(Table
, Current_State
, K
,
1025 Value
=> Current_State
+ 1);
1028 Current_State
:= Current_State
+ 1;
1029 Add_Empty_Char
(Current_State
, Current_State
+ 1);
1031 Current_State
:= Current_State
+ 1;
1032 Add_Empty_Char
(Current_State
, Last_Start
);
1033 End_State
:= Current_State
;
1036 Current_State
:= Current_State
+ 1;
1039 for K
in 0 .. Alphabet_Size
loop
1040 Set
(Table
, Current_State
, K
,
1041 Value
=> Current_State
+ 1);
1049 -- Create the state for the symbol S (J)
1051 Set
(Table
, Current_State
, Map
(S
(J
)),
1052 Value
=> Current_State
+ 1);
1055 Last_Start
:= Current_State
;
1056 Current_State
:= Current_State
+ 1;
1058 if End_State
/= 0 then
1059 Add_Empty_Char
(End_State
, Last_Start
);
1062 End_State
:= Current_State
;
1066 if Start_State
= 0 then
1067 Start_State
:= Last_Start
;
1074 -- Start of processing for Create_Primary_Table_Glob
1077 Table
.all := (others => (others => 0));
1078 Create_Simple
(S
'First, S
'Last, Start_State
, End_State
);
1079 Num_States
:= Current_State
;
1080 end Create_Primary_Table_Glob
;
1082 ----------------------------
1083 -- Create_Secondary_Table --
1084 ----------------------------
1086 function Create_Secondary_Table
1087 (First_Table
: Regexp_Array_Access
;
1088 Num_States
: State_Index
;
1089 Start_State
: State_Index
;
1090 End_State
: State_Index
) return Regexp
1092 pragma Warnings
(Off
, Num_States
);
1094 Last_Index
: constant State_Index
:= First_Table
'Last (1);
1095 type Meta_State
is array (1 .. Last_Index
) of Boolean;
1097 Table
: Regexp_Array
(1 .. Last_Index
, 0 .. Alphabet_Size
) :=
1098 (others => (others => 0));
1100 Meta_States
: array (1 .. Last_Index
+ 1) of Meta_State
:=
1101 (others => (others => False));
1103 Temp_State_Not_Null
: Boolean;
1105 Is_Final
: Boolean_Array
(1 .. Last_Index
) := (others => False);
1107 Current_State
: State_Index
:= 1;
1108 Nb_State
: State_Index
:= 1;
1111 (State
: in out Meta_State
;
1112 Item
: State_Index
);
1113 -- Compute the closure of the state (that is every other state which
1114 -- has a empty-character transition) and add it to the state
1121 (State
: in out Meta_State
;
1125 if State
(Item
) then
1129 State
(Item
) := True;
1131 for Column
in Alphabet_Size
+ 1 .. First_Table
'Last (2) loop
1132 if First_Table
(Item
, Column
) = 0 then
1136 Closure
(State
, First_Table
(Item
, Column
));
1140 -- Start of processing for Create_Secondary_Table
1143 -- Create a new state
1145 Closure
(Meta_States
(Current_State
), Start_State
);
1147 while Current_State
<= Nb_State
loop
1149 -- If this new meta-state includes the primary table end state,
1150 -- then this meta-state will be a final state in the regexp
1152 if Meta_States
(Current_State
)(End_State
) then
1153 Is_Final
(Current_State
) := True;
1156 -- For every character in the regexp, calculate the possible
1157 -- transitions from Current_State
1159 for Column
in 0 .. Alphabet_Size
loop
1160 Meta_States
(Nb_State
+ 1) := (others => False);
1161 Temp_State_Not_Null
:= False;
1163 for K
in Meta_States
(Current_State
)'Range loop
1164 if Meta_States
(Current_State
)(K
)
1165 and then First_Table
(K
, Column
) /= 0
1168 (Meta_States
(Nb_State
+ 1), First_Table
(K
, Column
));
1169 Temp_State_Not_Null
:= True;
1173 -- If at least one transition existed
1175 if Temp_State_Not_Null
then
1177 -- Check if this new state corresponds to an old one
1179 for K
in 1 .. Nb_State
loop
1180 if Meta_States
(K
) = Meta_States
(Nb_State
+ 1) then
1181 Table
(Current_State
, Column
) := K
;
1186 -- If not, create a new state
1188 if Table
(Current_State
, Column
) = 0 then
1189 Nb_State
:= Nb_State
+ 1;
1190 Table
(Current_State
, Column
) := Nb_State
;
1195 Current_State
:= Current_State
+ 1;
1198 -- Returns the regexp
1204 R
:= new Regexp_Value
(Alphabet_Size
=> Alphabet_Size
,
1205 Num_States
=> Nb_State
);
1207 R
.Is_Final
:= Is_Final
(1 .. Nb_State
);
1208 R
.Case_Sensitive
:= Case_Sensitive
;
1210 for State
in 1 .. Nb_State
loop
1211 for K
in 0 .. Alphabet_Size
loop
1212 R
.States
(State
, K
) := Table
(State
, K
);
1216 return (Ada
.Finalization
.Controlled
with R
=> R
);
1218 end Create_Secondary_Table
;
1220 ---------------------
1221 -- Raise_Exception --
1222 ---------------------
1224 procedure Raise_Exception
(M
: String; Index
: Integer) is
1226 raise Error_In_Regexp
with M
& " at offset " & Index
'Img;
1227 end Raise_Exception
;
1229 -- Start of processing for Compile
1232 -- Special case for the empty string: it always matches, and the
1233 -- following processing would fail on it.
1235 return (Ada
.Finalization
.Controlled
with
1236 R
=> new Regexp_Value
'
1237 (Alphabet_Size => 0,
1239 Map => (others => 0),
1240 States => (others => (others => 1)),
1241 Is_Final => (others => True),
1242 Case_Sensitive => True));
1245 if not Case_Sensitive then
1246 System.Case_Util.To_Lower (S);
1251 -- Creates the primary table
1254 Table : Regexp_Array_Access;
1255 Num_States : State_Index;
1256 Start_State : State_Index;
1257 End_State : State_Index;
1261 Table := new Regexp_Array (1 .. 100,
1262 0 .. Alphabet_Size + 10);
1264 Create_Primary_Table (Table, Num_States, Start_State, End_State);
1266 Create_Primary_Table_Glob
1267 (Table, Num_States, Start_State, End_State);
1270 -- Creates the secondary table
1272 R := Create_Secondary_Table
1273 (Table, Num_States, Start_State, End_State);
1283 procedure Finalize (R : in out Regexp) is
1284 procedure Free is new
1285 Ada.Unchecked_Deallocation (Regexp_Value, Regexp_Access);
1296 (Table : Regexp_Array_Access;
1297 State : State_Index;
1298 Column : Column_Index) return State_Index
1301 if State <= Table'Last (1)
1302 and then Column <= Table'Last (2)
1304 return Table (State, Column);
1314 function Match (S : String; R : Regexp) return Boolean is
1315 Current_State : State_Index := 1;
1319 raise Constraint_Error;
1322 for Char in S'Range loop
1324 if R.R.Case_Sensitive then
1325 Current_State := R.R.States (Current_State, R.R.Map (S (Char)));
1328 R.R.States (Current_State,
1329 R.R.Map (System.Case_Util.To_Lower (S (Char))));
1332 if Current_State = 0 then
1338 return R.R.Is_Final (Current_State);
1346 (Table : in out Regexp_Array_Access;
1347 State : State_Index;
1348 Column : Column_Index;
1349 Value : State_Index)
1351 New_Lines : State_Index;
1352 New_Columns : Column_Index;
1353 New_Table : Regexp_Array_Access;
1356 if State <= Table'Last (1)
1357 and then Column <= Table'Last (2)
1359 Table (State, Column) := Value;
1361 -- Doubles the size of the table until it is big enough that
1362 -- (State, Column) is a valid index
1364 New_Lines := Table'Last (1) * (State / Table'Last (1) + 1);
1365 New_Columns := Table'Last (2) * (Column / Table'Last (2) + 1);
1366 New_Table := new Regexp_Array (Table'First (1) .. New_Lines,
1367 Table'First (2) .. New_Columns);
1368 New_Table.all := (others => (others => 0));
1370 for J in Table'Range (1) loop
1371 for K in Table'Range (2) loop
1372 New_Table (J, K) := Table (J, K);
1378 Table (State, Column) := Value;