Update concepts branch to revision 131834
[official-gcc.git] / gcc / ada / s-regexp.adb
blob2dae7b2910375a2cc5c3962f6398b96280fcf95f
1 ------------------------------------------------------------------------------
2 -- --
3 -- GNAT COMPILER COMPONENTS --
4 -- --
5 -- S Y S T E M . R E G E X P --
6 -- --
7 -- B o d y --
8 -- --
9 -- Copyright (C) 1999-2007, AdaCore --
10 -- --
11 -- GNAT is free software; you can redistribute it and/or modify it under --
12 -- terms of the GNU General Public License as published by the Free Soft- --
13 -- ware Foundation; either version 2, or (at your option) any later ver- --
14 -- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
15 -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
16 -- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License --
17 -- for more details. You should have received a copy of the GNU General --
18 -- Public License distributed with GNAT; see file COPYING. If not, write --
19 -- to the Free Software Foundation, 51 Franklin Street, Fifth Floor, --
20 -- Boston, MA 02110-1301, USA. --
21 -- --
22 -- As a special exception, if other files instantiate generics from this --
23 -- unit, or you link this unit with other files to produce an executable, --
24 -- this unit does not by itself cause the resulting executable to be --
25 -- covered by the GNU General Public License. This exception does not --
26 -- however invalidate any other reasons why the executable file might be --
27 -- covered by the GNU Public License. --
28 -- --
29 -- GNAT was originally developed by the GNAT team at New York University. --
30 -- Extensive contributions were provided by Ada Core Technologies Inc. --
31 -- --
32 ------------------------------------------------------------------------------
34 with Ada.Unchecked_Deallocation;
35 -- with Ada.Exceptions;
37 with System.Case_Util;
39 package body System.Regexp is
41 Open_Paren : constant Character := '(';
42 Close_Paren : constant Character := ')';
43 Open_Bracket : constant Character := '[';
44 Close_Bracket : constant Character := ']';
46 type State_Index is new Natural;
47 type Column_Index is new Natural;
49 type Regexp_Array is array
50 (State_Index range <>, Column_Index range <>) of State_Index;
51 -- First index is for the state number
52 -- Second index is for the character type
53 -- Contents is the new State
55 type Regexp_Array_Access is access Regexp_Array;
56 -- Use this type through the functions Set below, so that it
57 -- can grow dynamically depending on the needs.
59 type Mapping is array (Character'Range) of Column_Index;
60 -- Mapping between characters and column in the Regexp_Array
62 type Boolean_Array is array (State_Index range <>) of Boolean;
64 type Regexp_Value
65 (Alphabet_Size : Column_Index;
66 Num_States : State_Index) is
67 record
68 Map : Mapping;
69 States : Regexp_Array (1 .. Num_States, 0 .. Alphabet_Size);
70 Is_Final : Boolean_Array (1 .. Num_States);
71 Case_Sensitive : Boolean;
72 end record;
73 -- Deterministic finite-state machine
75 -----------------------
76 -- Local Subprograms --
77 -----------------------
79 procedure Set
80 (Table : in out Regexp_Array_Access;
81 State : State_Index;
82 Column : Column_Index;
83 Value : State_Index);
84 -- Sets a value in the table. If the table is too small, reallocate it
85 -- dynamically so that (State, Column) is a valid index in it.
87 function Get
88 (Table : Regexp_Array_Access;
89 State : State_Index;
90 Column : Column_Index)
91 return State_Index;
92 -- Returns the value in the table at (State, Column).
93 -- If this index does not exist in the table, returns 0
95 procedure Free is new Ada.Unchecked_Deallocation
96 (Regexp_Array, Regexp_Array_Access);
98 ------------
99 -- Adjust --
100 ------------
102 procedure Adjust (R : in out Regexp) is
103 Tmp : Regexp_Access;
105 begin
106 Tmp := new Regexp_Value (Alphabet_Size => R.R.Alphabet_Size,
107 Num_States => R.R.Num_States);
108 Tmp.all := R.R.all;
109 R.R := Tmp;
110 end Adjust;
112 -------------
113 -- Compile --
114 -------------
116 function Compile
117 (Pattern : String;
118 Glob : Boolean := False;
119 Case_Sensitive : Boolean := True)
120 return Regexp
122 S : String := Pattern;
123 -- The pattern which is really compiled (when the pattern is case
124 -- insensitive, we convert this string to lower-cases
126 Map : Mapping := (others => 0);
127 -- Mapping between characters and columns in the tables
129 Alphabet_Size : Column_Index := 0;
130 -- Number of significant characters in the regular expression.
131 -- This total does not include special operators, such as *, (, ...
133 procedure Create_Mapping;
134 -- Creates a mapping between characters in the regexp and columns
135 -- in the tables representing the regexp. Test that the regexp is
136 -- well-formed Modifies Alphabet_Size and Map
138 procedure Create_Primary_Table
139 (Table : out Regexp_Array_Access;
140 Num_States : out State_Index;
141 Start_State : out State_Index;
142 End_State : out State_Index);
143 -- Creates the first version of the regexp (this is a non deterministic
144 -- finite state machine, which is unadapted for a fast pattern
145 -- matching algorithm). We use a recursive algorithm to process the
146 -- parenthesis sub-expressions.
148 -- Table : at the end of the procedure : Column 0 is for any character
149 -- ('.') and the last columns are for no character (closure)
150 -- Num_States is set to the number of states in the table
151 -- Start_State is the number of the starting state in the regexp
152 -- End_State is the number of the final state when the regexp matches
154 procedure Create_Primary_Table_Glob
155 (Table : out Regexp_Array_Access;
156 Num_States : out State_Index;
157 Start_State : out State_Index;
158 End_State : out State_Index);
159 -- Same function as above, but it deals with the second possible
160 -- grammar for 'globbing pattern', which is a kind of subset of the
161 -- whole regular expression grammar.
163 function Create_Secondary_Table
164 (First_Table : Regexp_Array_Access;
165 Num_States : State_Index;
166 Start_State : State_Index;
167 End_State : State_Index)
168 return Regexp;
169 -- Creates the definitive table representing the regular expression
170 -- This is actually a transformation of the primary table First_Table,
171 -- where every state is grouped with the states in its 'no-character'
172 -- columns. The transitions between the new states are then recalculated
173 -- and if necessary some new states are created.
175 -- Note that the resulting finite-state machine is not optimized in
176 -- terms of the number of states : it would be more time-consuming to
177 -- add a third pass to reduce the number of states in the machine, with
178 -- no speed improvement...
180 procedure Raise_Exception (M : String; Index : Integer);
181 pragma No_Return (Raise_Exception);
182 -- Raise an exception, indicating an error at character Index in S
184 --------------------
185 -- Create_Mapping --
186 --------------------
188 procedure Create_Mapping is
190 procedure Add_In_Map (C : Character);
191 -- Add a character in the mapping, if it is not already defined
193 ----------------
194 -- Add_In_Map --
195 ----------------
197 procedure Add_In_Map (C : Character) is
198 begin
199 if Map (C) = 0 then
200 Alphabet_Size := Alphabet_Size + 1;
201 Map (C) := Alphabet_Size;
202 end if;
203 end Add_In_Map;
205 J : Integer := S'First;
206 Parenthesis_Level : Integer := 0;
207 Curly_Level : Integer := 0;
209 -- Start of processing for Create_Mapping
211 begin
212 while J <= S'Last loop
213 case S (J) is
214 when Open_Bracket =>
215 J := J + 1;
217 if S (J) = '^' then
218 J := J + 1;
219 end if;
221 if S (J) = ']' or S (J) = '-' then
222 J := J + 1;
223 end if;
225 -- The first character never has a special meaning
227 loop
228 if J > S'Last then
229 Raise_Exception
230 ("Ran out of characters while parsing ", J);
231 end if;
233 exit when S (J) = Close_Bracket;
235 if S (J) = '-'
236 and then S (J + 1) /= Close_Bracket
237 then
238 declare
239 Start : constant Integer := J - 1;
241 begin
242 J := J + 1;
244 if S (J) = '\' then
245 J := J + 1;
246 end if;
248 for Char in S (Start) .. S (J) loop
249 Add_In_Map (Char);
250 end loop;
251 end;
252 else
253 if S (J) = '\' then
254 J := J + 1;
255 end if;
257 Add_In_Map (S (J));
258 end if;
260 J := J + 1;
261 end loop;
263 -- A close bracket must follow a open_bracket,
264 -- and cannot be found alone on the line
266 when Close_Bracket =>
267 Raise_Exception
268 ("Incorrect character ']' in regular expression", J);
270 when '\' =>
271 if J < S'Last then
272 J := J + 1;
273 Add_In_Map (S (J));
275 else
276 -- \ not allowed at the end of the regexp
278 Raise_Exception
279 ("Incorrect character '\' in regular expression", J);
280 end if;
282 when Open_Paren =>
283 if not Glob then
284 Parenthesis_Level := Parenthesis_Level + 1;
285 else
286 Add_In_Map (Open_Paren);
287 end if;
289 when Close_Paren =>
290 if not Glob then
291 Parenthesis_Level := Parenthesis_Level - 1;
293 if Parenthesis_Level < 0 then
294 Raise_Exception
295 ("')' is not associated with '(' in regular "
296 & "expression", J);
297 end if;
299 if S (J - 1) = Open_Paren then
300 Raise_Exception
301 ("Empty parenthesis not allowed in regular "
302 & "expression", J);
303 end if;
305 else
306 Add_In_Map (Close_Paren);
307 end if;
309 when '.' =>
310 if Glob then
311 Add_In_Map ('.');
312 end if;
314 when '{' =>
315 if not Glob then
316 Add_In_Map (S (J));
317 else
318 Curly_Level := Curly_Level + 1;
319 end if;
321 when '}' =>
322 if not Glob then
323 Add_In_Map (S (J));
324 else
325 Curly_Level := Curly_Level - 1;
326 end if;
328 when '*' | '?' =>
329 if not Glob then
330 if J = S'First then
331 Raise_Exception
332 ("'*', '+', '?' and '|' operators cannot be in "
333 & "first position in regular expression", J);
334 end if;
335 end if;
337 when '|' | '+' =>
338 if not Glob then
339 if J = S'First then
341 -- These operators must apply to a sub-expression,
342 -- and cannot be found at the beginning of the line
344 Raise_Exception
345 ("'*', '+', '?' and '|' operators cannot be in "
346 & "first position in regular expression", J);
347 end if;
349 else
350 Add_In_Map (S (J));
351 end if;
353 when others =>
354 Add_In_Map (S (J));
355 end case;
357 J := J + 1;
358 end loop;
360 -- A closing parenthesis must follow an open parenthesis
362 if Parenthesis_Level /= 0 then
363 Raise_Exception
364 ("'(' must always be associated with a ')'", J);
365 end if;
367 if Curly_Level /= 0 then
368 Raise_Exception
369 ("'{' must always be associated with a '}'", J);
370 end if;
371 end Create_Mapping;
373 --------------------------
374 -- Create_Primary_Table --
375 --------------------------
377 procedure Create_Primary_Table
378 (Table : out Regexp_Array_Access;
379 Num_States : out State_Index;
380 Start_State : out State_Index;
381 End_State : out State_Index)
383 Empty_Char : constant Column_Index := Alphabet_Size + 1;
385 Current_State : State_Index := 0;
386 -- Index of the last created state
388 procedure Add_Empty_Char
389 (State : State_Index;
390 To_State : State_Index);
391 -- Add a empty-character transition from State to To_State
393 procedure Create_Repetition
394 (Repetition : Character;
395 Start_Prev : State_Index;
396 End_Prev : State_Index;
397 New_Start : out State_Index;
398 New_End : in out State_Index);
399 -- Create the table in case we have a '*', '+' or '?'.
400 -- Start_Prev .. End_Prev should indicate respectively the start and
401 -- end index of the previous expression, to which '*', '+' or '?' is
402 -- applied.
404 procedure Create_Simple
405 (Start_Index : Integer;
406 End_Index : Integer;
407 Start_State : out State_Index;
408 End_State : out State_Index);
409 -- Fill the table for the regexp Simple.
410 -- This is the recursive procedure called to handle () expressions
411 -- If End_State = 0, then the call to Create_Simple creates an
412 -- independent regexp, not a concatenation
413 -- Start_Index .. End_Index is the starting index in the string S.
415 -- Warning: it may look like we are creating too many empty-string
416 -- transitions, but they are needed to get the correct regexp.
417 -- The table is filled as follow ( s means start-state, e means
418 -- end-state) :
420 -- regexp state_num | a b * empty_string
421 -- ------- ------------------------------
422 -- a 1 (s) | 2 - - -
423 -- 2 (e) | - - - -
425 -- ab 1 (s) | 2 - - -
426 -- 2 | - - - 3
427 -- 3 | - 4 - -
428 -- 4 (e) | - - - -
430 -- a|b 1 | 2 - - -
431 -- 2 | - - - 6
432 -- 3 | - 4 - -
433 -- 4 | - - - 6
434 -- 5 (s) | - - - 1,3
435 -- 6 (e) | - - - -
437 -- a* 1 | 2 - - -
438 -- 2 | - - - 4
439 -- 3 (s) | - - - 1,4
440 -- 4 (e) | - - - 3
442 -- (a) 1 (s) | 2 - - -
443 -- 2 (e) | - - - -
445 -- a+ 1 | 2 - - -
446 -- 2 | - - - 4
447 -- 3 (s) | - - - 1
448 -- 4 (e) | - - - 3
450 -- a? 1 | 2 - - -
451 -- 2 | - - - 4
452 -- 3 (s) | - - - 1,4
453 -- 4 (e) | - - - -
455 -- . 1 (s) | 2 2 2 -
456 -- 2 (e) | - - - -
458 function Next_Sub_Expression
459 (Start_Index : Integer;
460 End_Index : Integer)
461 return Integer;
462 -- Returns the index of the last character of the next sub-expression
463 -- in Simple. Index cannot be greater than End_Index.
465 --------------------
466 -- Add_Empty_Char --
467 --------------------
469 procedure Add_Empty_Char
470 (State : State_Index;
471 To_State : State_Index)
473 J : Column_Index := Empty_Char;
475 begin
476 while Get (Table, State, J) /= 0 loop
477 J := J + 1;
478 end loop;
480 Set (Table, State, J, To_State);
481 end Add_Empty_Char;
483 -----------------------
484 -- Create_Repetition --
485 -----------------------
487 procedure Create_Repetition
488 (Repetition : Character;
489 Start_Prev : State_Index;
490 End_Prev : State_Index;
491 New_Start : out State_Index;
492 New_End : in out State_Index)
494 begin
495 New_Start := Current_State + 1;
497 if New_End /= 0 then
498 Add_Empty_Char (New_End, New_Start);
499 end if;
501 Current_State := Current_State + 2;
502 New_End := Current_State;
504 Add_Empty_Char (End_Prev, New_End);
505 Add_Empty_Char (New_Start, Start_Prev);
507 if Repetition /= '+' then
508 Add_Empty_Char (New_Start, New_End);
509 end if;
511 if Repetition /= '?' then
512 Add_Empty_Char (New_End, New_Start);
513 end if;
514 end Create_Repetition;
516 -------------------
517 -- Create_Simple --
518 -------------------
520 procedure Create_Simple
521 (Start_Index : Integer;
522 End_Index : Integer;
523 Start_State : out State_Index;
524 End_State : out State_Index)
526 J : Integer := Start_Index;
527 Last_Start : State_Index := 0;
529 begin
530 Start_State := 0;
531 End_State := 0;
532 while J <= End_Index loop
533 case S (J) is
534 when Open_Paren =>
535 declare
536 J_Start : constant Integer := J + 1;
537 Next_Start : State_Index;
538 Next_End : State_Index;
540 begin
541 J := Next_Sub_Expression (J, End_Index);
542 Create_Simple (J_Start, J - 1, Next_Start, Next_End);
544 if J < End_Index
545 and then (S (J + 1) = '*' or else
546 S (J + 1) = '+' or else
547 S (J + 1) = '?')
548 then
549 J := J + 1;
550 Create_Repetition
551 (S (J),
552 Next_Start,
553 Next_End,
554 Last_Start,
555 End_State);
557 else
558 Last_Start := Next_Start;
560 if End_State /= 0 then
561 Add_Empty_Char (End_State, Last_Start);
562 end if;
564 End_State := Next_End;
565 end if;
566 end;
568 when '|' =>
569 declare
570 Start_Prev : constant State_Index := Start_State;
571 End_Prev : constant State_Index := End_State;
572 Start_J : constant Integer := J + 1;
573 Start_Next : State_Index := 0;
574 End_Next : State_Index := 0;
576 begin
577 J := Next_Sub_Expression (J, End_Index);
579 -- Create a new state for the start of the alternative
581 Current_State := Current_State + 1;
582 Last_Start := Current_State;
583 Start_State := Last_Start;
585 -- Create the tree for the second part of alternative
587 Create_Simple (Start_J, J, Start_Next, End_Next);
589 -- Create the end state
591 Add_Empty_Char (Last_Start, Start_Next);
592 Add_Empty_Char (Last_Start, Start_Prev);
593 Current_State := Current_State + 1;
594 End_State := Current_State;
595 Add_Empty_Char (End_Prev, End_State);
596 Add_Empty_Char (End_Next, End_State);
597 end;
599 when Open_Bracket =>
600 Current_State := Current_State + 1;
602 declare
603 Next_State : State_Index := Current_State + 1;
605 begin
606 J := J + 1;
608 if S (J) = '^' then
609 J := J + 1;
611 Next_State := 0;
613 for Column in 0 .. Alphabet_Size loop
614 Set (Table, Current_State, Column,
615 Value => Current_State + 1);
616 end loop;
617 end if;
619 -- Automatically add the first character
621 if S (J) = '-' or S (J) = ']' then
622 Set (Table, Current_State, Map (S (J)),
623 Value => Next_State);
624 J := J + 1;
625 end if;
627 -- Loop till closing bracket found
629 loop
630 exit when S (J) = Close_Bracket;
632 if S (J) = '-'
633 and then S (J + 1) /= ']'
634 then
635 declare
636 Start : constant Integer := J - 1;
638 begin
639 J := J + 1;
641 if S (J) = '\' then
642 J := J + 1;
643 end if;
645 for Char in S (Start) .. S (J) loop
646 Set (Table, Current_State, Map (Char),
647 Value => Next_State);
648 end loop;
649 end;
651 else
652 if S (J) = '\' then
653 J := J + 1;
654 end if;
656 Set (Table, Current_State, Map (S (J)),
657 Value => Next_State);
658 end if;
659 J := J + 1;
660 end loop;
661 end;
663 Current_State := Current_State + 1;
665 -- If the next symbol is a special symbol
667 if J < End_Index
668 and then (S (J + 1) = '*' or else
669 S (J + 1) = '+' or else
670 S (J + 1) = '?')
671 then
672 J := J + 1;
673 Create_Repetition
674 (S (J),
675 Current_State - 1,
676 Current_State,
677 Last_Start,
678 End_State);
680 else
681 Last_Start := Current_State - 1;
683 if End_State /= 0 then
684 Add_Empty_Char (End_State, Last_Start);
685 end if;
687 End_State := Current_State;
688 end if;
690 when '*' | '+' | '?' | Close_Paren | Close_Bracket =>
691 Raise_Exception
692 ("Incorrect character in regular expression :", J);
694 when others =>
695 Current_State := Current_State + 1;
697 -- Create the state for the symbol S (J)
699 if S (J) = '.' then
700 for K in 0 .. Alphabet_Size loop
701 Set (Table, Current_State, K,
702 Value => Current_State + 1);
703 end loop;
705 else
706 if S (J) = '\' then
707 J := J + 1;
708 end if;
710 Set (Table, Current_State, Map (S (J)),
711 Value => Current_State + 1);
712 end if;
714 Current_State := Current_State + 1;
716 -- If the next symbol is a special symbol
718 if J < End_Index
719 and then (S (J + 1) = '*' or else
720 S (J + 1) = '+' or else
721 S (J + 1) = '?')
722 then
723 J := J + 1;
724 Create_Repetition
725 (S (J),
726 Current_State - 1,
727 Current_State,
728 Last_Start,
729 End_State);
731 else
732 Last_Start := Current_State - 1;
734 if End_State /= 0 then
735 Add_Empty_Char (End_State, Last_Start);
736 end if;
738 End_State := Current_State;
739 end if;
741 end case;
743 if Start_State = 0 then
744 Start_State := Last_Start;
745 end if;
747 J := J + 1;
748 end loop;
749 end Create_Simple;
751 -------------------------
752 -- Next_Sub_Expression --
753 -------------------------
755 function Next_Sub_Expression
756 (Start_Index : Integer;
757 End_Index : Integer)
758 return Integer
760 J : Integer := Start_Index;
761 Start_On_Alter : Boolean := False;
763 begin
764 if S (J) = '|' then
765 Start_On_Alter := True;
766 end if;
768 loop
769 exit when J = End_Index;
770 J := J + 1;
772 case S (J) is
773 when '\' =>
774 J := J + 1;
776 when Open_Bracket =>
777 loop
778 J := J + 1;
779 exit when S (J) = Close_Bracket;
781 if S (J) = '\' then
782 J := J + 1;
783 end if;
784 end loop;
786 when Open_Paren =>
787 J := Next_Sub_Expression (J, End_Index);
789 when Close_Paren =>
790 return J;
792 when '|' =>
793 if Start_On_Alter then
794 return J - 1;
795 end if;
797 when others =>
798 null;
799 end case;
800 end loop;
802 return J;
803 end Next_Sub_Expression;
805 -- Start of Create_Primary_Table
807 begin
808 Table.all := (others => (others => 0));
809 Create_Simple (S'First, S'Last, Start_State, End_State);
810 Num_States := Current_State;
811 end Create_Primary_Table;
813 -------------------------------
814 -- Create_Primary_Table_Glob --
815 -------------------------------
817 procedure Create_Primary_Table_Glob
818 (Table : out Regexp_Array_Access;
819 Num_States : out State_Index;
820 Start_State : out State_Index;
821 End_State : out State_Index)
823 Empty_Char : constant Column_Index := Alphabet_Size + 1;
825 Current_State : State_Index := 0;
826 -- Index of the last created state
828 procedure Add_Empty_Char
829 (State : State_Index;
830 To_State : State_Index);
831 -- Add a empty-character transition from State to To_State
833 procedure Create_Simple
834 (Start_Index : Integer;
835 End_Index : Integer;
836 Start_State : out State_Index;
837 End_State : out State_Index);
838 -- Fill the table for the S (Start_Index .. End_Index).
839 -- This is the recursive procedure called to handle () expressions
841 --------------------
842 -- Add_Empty_Char --
843 --------------------
845 procedure Add_Empty_Char
846 (State : State_Index;
847 To_State : State_Index)
849 J : Column_Index := Empty_Char;
851 begin
852 while Get (Table, State, J) /= 0 loop
853 J := J + 1;
854 end loop;
856 Set (Table, State, J,
857 Value => To_State);
858 end Add_Empty_Char;
860 -------------------
861 -- Create_Simple --
862 -------------------
864 procedure Create_Simple
865 (Start_Index : Integer;
866 End_Index : Integer;
867 Start_State : out State_Index;
868 End_State : out State_Index)
870 J : Integer := Start_Index;
871 Last_Start : State_Index := 0;
873 begin
874 Start_State := 0;
875 End_State := 0;
877 while J <= End_Index loop
878 case S (J) is
880 when Open_Bracket =>
881 Current_State := Current_State + 1;
883 declare
884 Next_State : State_Index := Current_State + 1;
886 begin
887 J := J + 1;
889 if S (J) = '^' then
890 J := J + 1;
891 Next_State := 0;
893 for Column in 0 .. Alphabet_Size loop
894 Set (Table, Current_State, Column,
895 Value => Current_State + 1);
896 end loop;
897 end if;
899 -- Automatically add the first character
901 if S (J) = '-' or S (J) = ']' then
902 Set (Table, Current_State, Map (S (J)),
903 Value => Current_State);
904 J := J + 1;
905 end if;
907 -- Loop till closing bracket found
909 loop
910 exit when S (J) = Close_Bracket;
912 if S (J) = '-'
913 and then S (J + 1) /= ']'
914 then
915 declare
916 Start : constant Integer := J - 1;
917 begin
918 J := J + 1;
920 if S (J) = '\' then
921 J := J + 1;
922 end if;
924 for Char in S (Start) .. S (J) loop
925 Set (Table, Current_State, Map (Char),
926 Value => Next_State);
927 end loop;
928 end;
930 else
931 if S (J) = '\' then
932 J := J + 1;
933 end if;
935 Set (Table, Current_State, Map (S (J)),
936 Value => Next_State);
937 end if;
938 J := J + 1;
939 end loop;
940 end;
942 Last_Start := Current_State;
943 Current_State := Current_State + 1;
945 if End_State /= 0 then
946 Add_Empty_Char (End_State, Last_Start);
947 end if;
949 End_State := Current_State;
951 when '{' =>
952 declare
953 End_Sub : Integer;
954 Start_Regexp_Sub : State_Index;
955 End_Regexp_Sub : State_Index;
956 Create_Start : State_Index := 0;
958 Create_End : State_Index := 0;
959 -- Initialized to avoid junk warning
961 begin
962 while S (J) /= '}' loop
964 -- First step : find sub pattern
966 End_Sub := J + 1;
967 while S (End_Sub) /= ','
968 and then S (End_Sub) /= '}'
969 loop
970 End_Sub := End_Sub + 1;
971 end loop;
973 -- Second step : create a sub pattern
975 Create_Simple
976 (J + 1,
977 End_Sub - 1,
978 Start_Regexp_Sub,
979 End_Regexp_Sub);
981 J := End_Sub;
983 -- Third step : create an alternative
985 if Create_Start = 0 then
986 Current_State := Current_State + 1;
987 Create_Start := Current_State;
988 Add_Empty_Char (Create_Start, Start_Regexp_Sub);
989 Current_State := Current_State + 1;
990 Create_End := Current_State;
991 Add_Empty_Char (End_Regexp_Sub, Create_End);
993 else
994 Current_State := Current_State + 1;
995 Add_Empty_Char (Current_State, Create_Start);
996 Create_Start := Current_State;
997 Add_Empty_Char (Create_Start, Start_Regexp_Sub);
998 Add_Empty_Char (End_Regexp_Sub, Create_End);
999 end if;
1000 end loop;
1002 if End_State /= 0 then
1003 Add_Empty_Char (End_State, Create_Start);
1004 end if;
1006 End_State := Create_End;
1007 Last_Start := Create_Start;
1008 end;
1010 when '*' =>
1011 Current_State := Current_State + 1;
1013 if End_State /= 0 then
1014 Add_Empty_Char (End_State, Current_State);
1015 end if;
1017 Add_Empty_Char (Current_State, Current_State + 1);
1018 Add_Empty_Char (Current_State, Current_State + 3);
1019 Last_Start := Current_State;
1021 Current_State := Current_State + 1;
1023 for K in 0 .. Alphabet_Size loop
1024 Set (Table, Current_State, K,
1025 Value => Current_State + 1);
1026 end loop;
1028 Current_State := Current_State + 1;
1029 Add_Empty_Char (Current_State, Current_State + 1);
1031 Current_State := Current_State + 1;
1032 Add_Empty_Char (Current_State, Last_Start);
1033 End_State := Current_State;
1035 when others =>
1036 Current_State := Current_State + 1;
1038 if S (J) = '?' then
1039 for K in 0 .. Alphabet_Size loop
1040 Set (Table, Current_State, K,
1041 Value => Current_State + 1);
1042 end loop;
1044 else
1045 if S (J) = '\' then
1046 J := J + 1;
1047 end if;
1049 -- Create the state for the symbol S (J)
1051 Set (Table, Current_State, Map (S (J)),
1052 Value => Current_State + 1);
1053 end if;
1055 Last_Start := Current_State;
1056 Current_State := Current_State + 1;
1058 if End_State /= 0 then
1059 Add_Empty_Char (End_State, Last_Start);
1060 end if;
1062 End_State := Current_State;
1064 end case;
1066 if Start_State = 0 then
1067 Start_State := Last_Start;
1068 end if;
1070 J := J + 1;
1071 end loop;
1072 end Create_Simple;
1074 -- Start of processing for Create_Primary_Table_Glob
1076 begin
1077 Table.all := (others => (others => 0));
1078 Create_Simple (S'First, S'Last, Start_State, End_State);
1079 Num_States := Current_State;
1080 end Create_Primary_Table_Glob;
1082 ----------------------------
1083 -- Create_Secondary_Table --
1084 ----------------------------
1086 function Create_Secondary_Table
1087 (First_Table : Regexp_Array_Access;
1088 Num_States : State_Index;
1089 Start_State : State_Index;
1090 End_State : State_Index) return Regexp
1092 pragma Warnings (Off, Num_States);
1094 Last_Index : constant State_Index := First_Table'Last (1);
1095 type Meta_State is array (1 .. Last_Index) of Boolean;
1097 Table : Regexp_Array (1 .. Last_Index, 0 .. Alphabet_Size) :=
1098 (others => (others => 0));
1100 Meta_States : array (1 .. Last_Index + 1) of Meta_State :=
1101 (others => (others => False));
1103 Temp_State_Not_Null : Boolean;
1105 Is_Final : Boolean_Array (1 .. Last_Index) := (others => False);
1107 Current_State : State_Index := 1;
1108 Nb_State : State_Index := 1;
1110 procedure Closure
1111 (State : in out Meta_State;
1112 Item : State_Index);
1113 -- Compute the closure of the state (that is every other state which
1114 -- has a empty-character transition) and add it to the state
1116 -------------
1117 -- Closure --
1118 -------------
1120 procedure Closure
1121 (State : in out Meta_State;
1122 Item : State_Index)
1124 begin
1125 if State (Item) then
1126 return;
1127 end if;
1129 State (Item) := True;
1131 for Column in Alphabet_Size + 1 .. First_Table'Last (2) loop
1132 if First_Table (Item, Column) = 0 then
1133 return;
1134 end if;
1136 Closure (State, First_Table (Item, Column));
1137 end loop;
1138 end Closure;
1140 -- Start of processing for Create_Secondary_Table
1142 begin
1143 -- Create a new state
1145 Closure (Meta_States (Current_State), Start_State);
1147 while Current_State <= Nb_State loop
1149 -- If this new meta-state includes the primary table end state,
1150 -- then this meta-state will be a final state in the regexp
1152 if Meta_States (Current_State)(End_State) then
1153 Is_Final (Current_State) := True;
1154 end if;
1156 -- For every character in the regexp, calculate the possible
1157 -- transitions from Current_State
1159 for Column in 0 .. Alphabet_Size loop
1160 Meta_States (Nb_State + 1) := (others => False);
1161 Temp_State_Not_Null := False;
1163 for K in Meta_States (Current_State)'Range loop
1164 if Meta_States (Current_State)(K)
1165 and then First_Table (K, Column) /= 0
1166 then
1167 Closure
1168 (Meta_States (Nb_State + 1), First_Table (K, Column));
1169 Temp_State_Not_Null := True;
1170 end if;
1171 end loop;
1173 -- If at least one transition existed
1175 if Temp_State_Not_Null then
1177 -- Check if this new state corresponds to an old one
1179 for K in 1 .. Nb_State loop
1180 if Meta_States (K) = Meta_States (Nb_State + 1) then
1181 Table (Current_State, Column) := K;
1182 exit;
1183 end if;
1184 end loop;
1186 -- If not, create a new state
1188 if Table (Current_State, Column) = 0 then
1189 Nb_State := Nb_State + 1;
1190 Table (Current_State, Column) := Nb_State;
1191 end if;
1192 end if;
1193 end loop;
1195 Current_State := Current_State + 1;
1196 end loop;
1198 -- Returns the regexp
1200 declare
1201 R : Regexp_Access;
1203 begin
1204 R := new Regexp_Value (Alphabet_Size => Alphabet_Size,
1205 Num_States => Nb_State);
1206 R.Map := Map;
1207 R.Is_Final := Is_Final (1 .. Nb_State);
1208 R.Case_Sensitive := Case_Sensitive;
1210 for State in 1 .. Nb_State loop
1211 for K in 0 .. Alphabet_Size loop
1212 R.States (State, K) := Table (State, K);
1213 end loop;
1214 end loop;
1216 return (Ada.Finalization.Controlled with R => R);
1217 end;
1218 end Create_Secondary_Table;
1220 ---------------------
1221 -- Raise_Exception --
1222 ---------------------
1224 procedure Raise_Exception (M : String; Index : Integer) is
1225 begin
1226 raise Error_In_Regexp with M & " at offset " & Index'Img;
1227 end Raise_Exception;
1229 -- Start of processing for Compile
1231 begin
1232 -- Special case for the empty string: it always matches, and the
1233 -- following processing would fail on it.
1234 if S = "" then
1235 return (Ada.Finalization.Controlled with
1236 R => new Regexp_Value'
1237 (Alphabet_Size => 0,
1238 Num_States => 1,
1239 Map => (others => 0),
1240 States => (others => (others => 1)),
1241 Is_Final => (others => True),
1242 Case_Sensitive => True));
1243 end if;
1245 if not Case_Sensitive then
1246 System.Case_Util.To_Lower (S);
1247 end if;
1249 Create_Mapping;
1251 -- Creates the primary table
1253 declare
1254 Table : Regexp_Array_Access;
1255 Num_States : State_Index;
1256 Start_State : State_Index;
1257 End_State : State_Index;
1258 R : Regexp;
1260 begin
1261 Table := new Regexp_Array (1 .. 100,
1262 0 .. Alphabet_Size + 10);
1263 if not Glob then
1264 Create_Primary_Table (Table, Num_States, Start_State, End_State);
1265 else
1266 Create_Primary_Table_Glob
1267 (Table, Num_States, Start_State, End_State);
1268 end if;
1270 -- Creates the secondary table
1272 R := Create_Secondary_Table
1273 (Table, Num_States, Start_State, End_State);
1274 Free (Table);
1275 return R;
1276 end;
1277 end Compile;
1279 --------------
1280 -- Finalize --
1281 --------------
1283 procedure Finalize (R : in out Regexp) is
1284 procedure Free is new
1285 Ada.Unchecked_Deallocation (Regexp_Value, Regexp_Access);
1287 begin
1288 Free (R.R);
1289 end Finalize;
1291 ---------
1292 -- Get --
1293 ---------
1295 function Get
1296 (Table : Regexp_Array_Access;
1297 State : State_Index;
1298 Column : Column_Index) return State_Index
1300 begin
1301 if State <= Table'Last (1)
1302 and then Column <= Table'Last (2)
1303 then
1304 return Table (State, Column);
1305 else
1306 return 0;
1307 end if;
1308 end Get;
1310 -----------
1311 -- Match --
1312 -----------
1314 function Match (S : String; R : Regexp) return Boolean is
1315 Current_State : State_Index := 1;
1317 begin
1318 if R.R = null then
1319 raise Constraint_Error;
1320 end if;
1322 for Char in S'Range loop
1324 if R.R.Case_Sensitive then
1325 Current_State := R.R.States (Current_State, R.R.Map (S (Char)));
1326 else
1327 Current_State :=
1328 R.R.States (Current_State,
1329 R.R.Map (System.Case_Util.To_Lower (S (Char))));
1330 end if;
1332 if Current_State = 0 then
1333 return False;
1334 end if;
1336 end loop;
1338 return R.R.Is_Final (Current_State);
1339 end Match;
1341 ---------
1342 -- Set --
1343 ---------
1345 procedure Set
1346 (Table : in out Regexp_Array_Access;
1347 State : State_Index;
1348 Column : Column_Index;
1349 Value : State_Index)
1351 New_Lines : State_Index;
1352 New_Columns : Column_Index;
1353 New_Table : Regexp_Array_Access;
1355 begin
1356 if State <= Table'Last (1)
1357 and then Column <= Table'Last (2)
1358 then
1359 Table (State, Column) := Value;
1360 else
1361 -- Doubles the size of the table until it is big enough that
1362 -- (State, Column) is a valid index
1364 New_Lines := Table'Last (1) * (State / Table'Last (1) + 1);
1365 New_Columns := Table'Last (2) * (Column / Table'Last (2) + 1);
1366 New_Table := new Regexp_Array (Table'First (1) .. New_Lines,
1367 Table'First (2) .. New_Columns);
1368 New_Table.all := (others => (others => 0));
1370 for J in Table'Range (1) loop
1371 for K in Table'Range (2) loop
1372 New_Table (J, K) := Table (J, K);
1373 end loop;
1374 end loop;
1376 Free (Table);
1377 Table := New_Table;
1378 Table (State, Column) := Value;
1379 end if;
1380 end Set;
1382 end System.Regexp;