Mark ChangeLog
[official-gcc.git] / gcc / ada / g-spipat.adb
blob06f7542759c05444904fbe8250f12416c408b86d
1 ------------------------------------------------------------------------------
2 -- --
3 -- GNAT LIBRARY COMPONENTS --
4 -- --
5 -- G N A T . S P I T B O L . P A T T E R N S --
6 -- --
7 -- B o d y --
8 -- --
9 -- Copyright (C) 1998-2004, Ada Core Technologies, Inc. --
10 -- --
11 -- GNAT is free software; you can redistribute it and/or modify it under --
12 -- terms of the GNU General Public License as published by the Free Soft- --
13 -- ware Foundation; either version 2, or (at your option) any later ver- --
14 -- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
15 -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
16 -- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License --
17 -- for more details. You should have received a copy of the GNU General --
18 -- Public License distributed with GNAT; see file COPYING. If not, write --
19 -- to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, --
20 -- MA 02111-1307, USA. --
21 -- --
22 -- As a special exception, if other files instantiate generics from this --
23 -- unit, or you link this unit with other files to produce an executable, --
24 -- this unit does not by itself cause the resulting executable to be --
25 -- covered by the GNU General Public License. This exception does not --
26 -- however invalidate any other reasons why the executable file might be --
27 -- covered by the GNU Public License. --
28 -- --
29 -- GNAT was originally developed by the GNAT team at New York University. --
30 -- Extensive contributions were provided by Ada Core Technologies Inc. --
31 -- --
32 ------------------------------------------------------------------------------
34 -- Note: the data structures and general approach used in this implementation
35 -- are derived from the original MINIMAL sources for SPITBOL. The code is not
36 -- a direct translation, but the approach is followed closely. In particular,
37 -- we use the one stack approach developed in the SPITBOL implementation.
39 with Ada.Exceptions; use Ada.Exceptions;
40 with Ada.Strings.Maps; use Ada.Strings.Maps;
41 with Ada.Strings.Unbounded.Aux; use Ada.Strings.Unbounded.Aux;
43 with GNAT.Debug_Utilities; use GNAT.Debug_Utilities;
45 with System; use System;
47 with Unchecked_Conversion;
48 with Unchecked_Deallocation;
50 package body GNAT.Spitbol.Patterns is
52 ------------------------
53 -- Internal Debugging --
54 ------------------------
56 Internal_Debug : constant Boolean := False;
57 -- Set this flag to True to activate some built-in debugging traceback
58 -- These are all lines output with PutD and Put_LineD.
60 procedure New_LineD;
61 pragma Inline (New_LineD);
62 -- Output new blank line with New_Line if Internal_Debug is True
64 procedure PutD (Str : String);
65 pragma Inline (PutD);
66 -- Output string with Put if Internal_Debug is True
68 procedure Put_LineD (Str : String);
69 pragma Inline (Put_LineD);
70 -- Output string with Put_Line if Internal_Debug is True
72 -----------------------------
73 -- Local Type Declarations --
74 -----------------------------
76 subtype String_Ptr is Ada.Strings.Unbounded.String_Access;
77 subtype File_Ptr is Ada.Text_IO.File_Access;
79 function To_Address is new Unchecked_Conversion (PE_Ptr, Address);
80 -- Used only for debugging output purposes
82 subtype AFC is Ada.Finalization.Controlled;
84 N : constant PE_Ptr := null;
85 -- Shorthand used to initialize Copy fields to null
87 type Natural_Ptr is access all Natural;
88 type Pattern_Ptr is access all Pattern;
90 --------------------------------------------------
91 -- Description of Algorithm and Data Structures --
92 --------------------------------------------------
94 -- A pattern structure is represented as a linked graph of nodes
95 -- with the following structure:
97 -- +------------------------------------+
98 -- I Pcode I
99 -- +------------------------------------+
100 -- I Index I
101 -- +------------------------------------+
102 -- I Pthen I
103 -- +------------------------------------+
104 -- I parameter(s) I
105 -- +------------------------------------+
107 -- Pcode is a code value indicating the type of the patterm node. This
108 -- code is used both as the discriminant value for the record, and as
109 -- the case index in the main match routine that branches to the proper
110 -- match code for the given element.
112 -- Index is a serial index number. The use of these serial index
113 -- numbers is described in a separate section.
115 -- Pthen is a pointer to the successor node, i.e the node to be matched
116 -- if the attempt to match the node succeeds. If this is the last node
117 -- of the pattern to be matched, then Pthen points to a dummy node
118 -- of kind PC_EOP (end of pattern), which initiales pattern exit.
120 -- The parameter or parameters are present for certain node types,
121 -- and the type varies with the pattern code.
123 type Pattern_Code is (
124 PC_Arb_Y,
125 PC_Assign,
126 PC_Bal,
127 PC_BreakX_X,
128 PC_Cancel,
129 PC_EOP,
130 PC_Fail,
131 PC_Fence,
132 PC_Fence_X,
133 PC_Fence_Y,
134 PC_R_Enter,
135 PC_R_Remove,
136 PC_R_Restore,
137 PC_Rest,
138 PC_Succeed,
139 PC_Unanchored,
141 PC_Alt,
142 PC_Arb_X,
143 PC_Arbno_S,
144 PC_Arbno_X,
146 PC_Rpat,
148 PC_Pred_Func,
150 PC_Assign_Imm,
151 PC_Assign_OnM,
152 PC_Any_VP,
153 PC_Break_VP,
154 PC_BreakX_VP,
155 PC_NotAny_VP,
156 PC_NSpan_VP,
157 PC_Span_VP,
158 PC_String_VP,
160 PC_Write_Imm,
161 PC_Write_OnM,
163 PC_Null,
164 PC_String,
166 PC_String_2,
167 PC_String_3,
168 PC_String_4,
169 PC_String_5,
170 PC_String_6,
172 PC_Setcur,
174 PC_Any_CH,
175 PC_Break_CH,
176 PC_BreakX_CH,
177 PC_Char,
178 PC_NotAny_CH,
179 PC_NSpan_CH,
180 PC_Span_CH,
182 PC_Any_CS,
183 PC_Break_CS,
184 PC_BreakX_CS,
185 PC_NotAny_CS,
186 PC_NSpan_CS,
187 PC_Span_CS,
189 PC_Arbno_Y,
190 PC_Len_Nat,
191 PC_Pos_Nat,
192 PC_RPos_Nat,
193 PC_RTab_Nat,
194 PC_Tab_Nat,
196 PC_Pos_NF,
197 PC_Len_NF,
198 PC_RPos_NF,
199 PC_RTab_NF,
200 PC_Tab_NF,
202 PC_Pos_NP,
203 PC_Len_NP,
204 PC_RPos_NP,
205 PC_RTab_NP,
206 PC_Tab_NP,
208 PC_Any_VF,
209 PC_Break_VF,
210 PC_BreakX_VF,
211 PC_NotAny_VF,
212 PC_NSpan_VF,
213 PC_Span_VF,
214 PC_String_VF);
216 type IndexT is range 0 .. +(2 **15 - 1);
218 type PE (Pcode : Pattern_Code) is record
220 Index : IndexT;
221 -- Serial index number of pattern element within pattern.
223 Pthen : PE_Ptr;
224 -- Successor element, to be matched after this one
226 case Pcode is
228 when PC_Arb_Y |
229 PC_Assign |
230 PC_Bal |
231 PC_BreakX_X |
232 PC_Cancel |
233 PC_EOP |
234 PC_Fail |
235 PC_Fence |
236 PC_Fence_X |
237 PC_Fence_Y |
238 PC_Null |
239 PC_R_Enter |
240 PC_R_Remove |
241 PC_R_Restore |
242 PC_Rest |
243 PC_Succeed |
244 PC_Unanchored => null;
246 when PC_Alt |
247 PC_Arb_X |
248 PC_Arbno_S |
249 PC_Arbno_X => Alt : PE_Ptr;
251 when PC_Rpat => PP : Pattern_Ptr;
253 when PC_Pred_Func => BF : Boolean_Func;
255 when PC_Assign_Imm |
256 PC_Assign_OnM |
257 PC_Any_VP |
258 PC_Break_VP |
259 PC_BreakX_VP |
260 PC_NotAny_VP |
261 PC_NSpan_VP |
262 PC_Span_VP |
263 PC_String_VP => VP : VString_Ptr;
265 when PC_Write_Imm |
266 PC_Write_OnM => FP : File_Ptr;
268 when PC_String => Str : String_Ptr;
270 when PC_String_2 => Str2 : String (1 .. 2);
272 when PC_String_3 => Str3 : String (1 .. 3);
274 when PC_String_4 => Str4 : String (1 .. 4);
276 when PC_String_5 => Str5 : String (1 .. 5);
278 when PC_String_6 => Str6 : String (1 .. 6);
280 when PC_Setcur => Var : Natural_Ptr;
282 when PC_Any_CH |
283 PC_Break_CH |
284 PC_BreakX_CH |
285 PC_Char |
286 PC_NotAny_CH |
287 PC_NSpan_CH |
288 PC_Span_CH => Char : Character;
290 when PC_Any_CS |
291 PC_Break_CS |
292 PC_BreakX_CS |
293 PC_NotAny_CS |
294 PC_NSpan_CS |
295 PC_Span_CS => CS : Character_Set;
297 when PC_Arbno_Y |
298 PC_Len_Nat |
299 PC_Pos_Nat |
300 PC_RPos_Nat |
301 PC_RTab_Nat |
302 PC_Tab_Nat => Nat : Natural;
304 when PC_Pos_NF |
305 PC_Len_NF |
306 PC_RPos_NF |
307 PC_RTab_NF |
308 PC_Tab_NF => NF : Natural_Func;
310 when PC_Pos_NP |
311 PC_Len_NP |
312 PC_RPos_NP |
313 PC_RTab_NP |
314 PC_Tab_NP => NP : Natural_Ptr;
316 when PC_Any_VF |
317 PC_Break_VF |
318 PC_BreakX_VF |
319 PC_NotAny_VF |
320 PC_NSpan_VF |
321 PC_Span_VF |
322 PC_String_VF => VF : VString_Func;
324 end case;
325 end record;
327 subtype PC_Has_Alt is Pattern_Code range PC_Alt .. PC_Arbno_X;
328 -- Range of pattern codes that has an Alt field. This is used in the
329 -- recursive traversals, since these links must be followed.
331 EOP_Element : aliased constant PE := (PC_EOP, 0, N);
332 -- This is the end of pattern element, and is thus the representation of
333 -- a null pattern. It has a zero index element since it is never placed
334 -- inside a pattern. Furthermore it does not need a successor, since it
335 -- marks the end of the pattern, so that no more successors are needed.
337 EOP : constant PE_Ptr := EOP_Element'Unrestricted_Access;
338 -- This is the end of pattern pointer, that is used in the Pthen pointer
339 -- of other nodes to signal end of pattern.
341 -- The following array is used to determine if a pattern used as an
342 -- argument for Arbno is eligible for treatment using the simple Arbno
343 -- structure (i.e. it is a pattern that is guaranteed to match at least
344 -- one character on success, and not to make any entries on the stack.
346 OK_For_Simple_Arbno : constant array (Pattern_Code) of Boolean :=
347 (PC_Any_CS |
348 PC_Any_CH |
349 PC_Any_VF |
350 PC_Any_VP |
351 PC_Char |
352 PC_Len_Nat |
353 PC_NotAny_CS |
354 PC_NotAny_CH |
355 PC_NotAny_VF |
356 PC_NotAny_VP |
357 PC_Span_CS |
358 PC_Span_CH |
359 PC_Span_VF |
360 PC_Span_VP |
361 PC_String |
362 PC_String_2 |
363 PC_String_3 |
364 PC_String_4 |
365 PC_String_5 |
366 PC_String_6 => True,
367 others => False);
369 -------------------------------
370 -- The Pattern History Stack --
371 -------------------------------
373 -- The pattern history stack is used for controlling backtracking when
374 -- a match fails. The idea is to stack entries that give a cursor value
375 -- to be restored, and a node to be reestablished as the current node to
376 -- attempt an appropriate rematch operation. The processing for a pattern
377 -- element that has rematch alternatives pushes an appropriate entry or
378 -- entry on to the stack, and the proceeds. If a match fails at any point,
379 -- the top element of the stack is popped off, resetting the cursor and
380 -- the match continues by accessing the node stored with this entry.
382 type Stack_Entry is record
384 Cursor : Integer;
385 -- Saved cursor value that is restored when this entry is popped
386 -- from the stack if a match attempt fails. Occasionally, this
387 -- field is used to store a history stack pointer instead of a
388 -- cursor. Such cases are noted in the documentation and the value
389 -- stored is negative since stack pointer values are always negative.
391 Node : PE_Ptr;
392 -- This pattern element reference is reestablished as the current
393 -- Node to be matched (which will attempt an appropriate rematch).
395 end record;
397 subtype Stack_Range is Integer range -Stack_Size .. -1;
399 type Stack_Type is array (Stack_Range) of Stack_Entry;
400 -- The type used for a history stack. The actual instance of the stack
401 -- is declared as a local variable in the Match routine, to properly
402 -- handle recursive calls to Match. All stack pointer values are negative
403 -- to distinguish them from normal cursor values.
405 -- Note: the pattern matching stack is used only to handle backtracking.
406 -- If no backtracking occurs, its entries are never accessed, and never
407 -- popped off, and in particular it is normal for a successful match
408 -- to terminate with entries on the stack that are simply discarded.
410 -- Note: in subsequent diagrams of the stack, we always place element
411 -- zero (the deepest element) at the top of the page, then build the
412 -- stack down on the page with the most recent (top of stack) element
413 -- being the bottom-most entry on the page.
415 -- Stack checking is handled by labeling every pattern with the maximum
416 -- number of stack entries that are required, so a single check at the
417 -- start of matching the pattern suffices. There are two exceptions.
419 -- First, the count does not include entries for recursive pattern
420 -- references. Such recursions must therefore perform a specific
421 -- stack check with respect to the number of stack entries required
422 -- by the recursive pattern that is accessed and the amount of stack
423 -- that remains unused.
425 -- Second, the count includes only one iteration of an Arbno pattern,
426 -- so a specific check must be made on subsequent iterations that there
427 -- is still enough stack space left. The Arbno node has a field that
428 -- records the number of stack entries required by its argument for
429 -- this purpose.
431 ---------------------------------------------------
432 -- Use of Serial Index Field in Pattern Elements --
433 ---------------------------------------------------
435 -- The serial index numbers for the pattern elements are assigned as
436 -- a pattern is consructed from its constituent elements. Note that there
437 -- is never any sharing of pattern elements between patterns (copies are
438 -- always made), so the serial index numbers are unique to a particular
439 -- pattern as referenced from the P field of a value of type Pattern.
441 -- The index numbers meet three separate invariants, which are used for
442 -- various purposes as described in this section.
444 -- First, the numbers uniquely identify the pattern elements within a
445 -- pattern. If Num is the number of elements in a given pattern, then
446 -- the serial index numbers for the elements of this pattern will range
447 -- from 1 .. Num, so that each element has a separate value.
449 -- The purpose of this assignment is to provide a convenient auxiliary
450 -- data structure mechanism during operations which must traverse a
451 -- pattern (e.g. copy and finalization processing). Once constructed
452 -- patterns are strictly read only. This is necessary to allow sharing
453 -- of patterns between tasks. This means that we cannot go marking the
454 -- pattern (e.g. with a visited bit). Instead we cosntuct a separate
455 -- vector that contains the necessary information indexed by the Index
456 -- values in the pattern elements. For this purpose the only requirement
457 -- is that they be uniquely assigned.
459 -- Second, the pattern element referenced directly, i.e. the leading
460 -- pattern element, is always the maximum numbered element and therefore
461 -- indicates the total number of elements in the pattern. More precisely,
462 -- the element referenced by the P field of a pattern value, or the
463 -- element returned by any of the internal pattern construction routines
464 -- in the body (that return a value of type PE_Ptr) always is this
465 -- maximum element,
467 -- The purpose of this requirement is to allow an immediate determination
468 -- of the number of pattern elements within a pattern. This is used to
469 -- properly size the vectors used to contain auxiliary information for
470 -- traversal as described above.
472 -- Third, as compound pattern structures are constructed, the way in which
473 -- constituent parts of the pattern are constructed is stylized. This is
474 -- an automatic consequence of the way that these compounjd structures
475 -- are constructed, and basically what we are doing is simply documenting
476 -- and specifying the natural result of the pattern construction. The
477 -- section describing compound pattern structures gives details of the
478 -- numbering of each compound pattern structure.
480 -- The purpose of specifying the stylized numbering structures for the
481 -- compound patterns is to help simplify the processing in the Image
482 -- function, since it eases the task of retrieving the original recursive
483 -- structure of the pattern from the flat graph structure of elements.
484 -- This use in the Image function is the only point at which the code
485 -- makes use of the stylized structures.
487 type Ref_Array is array (IndexT range <>) of PE_Ptr;
488 -- This type is used to build an array whose N'th entry references the
489 -- element in a pattern whose Index value is N. See Build_Ref_Array.
491 procedure Build_Ref_Array (E : PE_Ptr; RA : out Ref_Array);
492 -- Given a pattern element which is the leading element of a pattern
493 -- structure, and a Ref_Array with bounds 1 .. E.Index, fills in the
494 -- Ref_Array so that its N'th entry references the element of the
495 -- referenced pattern whose Index value is N.
497 -------------------------------
498 -- Recursive Pattern Matches --
499 -------------------------------
501 -- The pattern primitive (+P) where P is a Pattern_Ptr or Pattern_Func
502 -- causes a recursive pattern match. This cannot be handled by an actual
503 -- recursive call to the outer level Match routine, since this would not
504 -- allow for possible backtracking into the region matched by the inner
505 -- pattern. Indeed this is the classical clash between recursion and
506 -- backtracking, and a simple recursive stack structure does not suffice.
508 -- This section describes how this recursion and the possible associated
509 -- backtracking is handled. We still use a single stack, but we establish
510 -- the concept of nested regions on this stack, each of which has a stack
511 -- base value pointing to the deepest stack entry of the region. The base
512 -- value for the outer level is zero.
514 -- When a recursive match is established, two special stack entries are
515 -- made. The first entry is used to save the original node that starts
516 -- the recursive match. This is saved so that the successor field of
517 -- this node is accessible at the end of the match, but it is never
518 -- popped and executed.
520 -- The second entry corresponds to a standard new region action. A
521 -- PC_R_Remove node is stacked, whose cursor field is used to store
522 -- the outer stack base, and the stack base is reset to point to
523 -- this PC_R_Remove node. Then the recursive pattern is matched and
524 -- it can make history stack entries in the normal matter, so now
525 -- the stack looks like:
527 -- (stack entries made by outer level)
529 -- (Special entry, node is (+P) successor
530 -- cursor entry is not used)
532 -- (PC_R_Remove entry, "cursor" value is (negative) <-- Stack base
533 -- saved base value for the enclosing region)
535 -- (stack entries made by inner level)
537 -- If a subsequent failure occurs and pops the PC_R_Remove node, it
538 -- removes itself and the special entry immediately underneath it,
539 -- restores the stack base value for the enclosing region, and then
540 -- again signals failure to look for alternatives that were stacked
541 -- before the recursion was initiated.
543 -- Now we need to consider what happens if the inner pattern succeeds, as
544 -- signalled by accessing the special PC_EOP pattern primitive. First we
545 -- recognize the nested case by looking at the Base value. If this Base
546 -- value is Stack'First, then the entire match has succeeded, but if the
547 -- base value is greater than Stack'First, then we have successfully
548 -- matched an inner pattern, and processing continues at the outer level.
550 -- There are two cases. The simple case is when the inner pattern has made
551 -- no stack entries, as recognized by the fact that the current stack
552 -- pointer is equal to the current base value. In this case it is fine to
553 -- remove all trace of the recursion by restoring the outer base value and
554 -- using the special entry to find the appropriate successor node.
556 -- The more complex case arises when the inner match does make stack
557 -- entries. In this case, the PC_EOP processing stacks a special entry
558 -- whose cursor value saves the saved inner base value (the one that
559 -- references the corresponding PC_R_Remove value), and whose node
560 -- pointer references a PC_R_Restore node, so the stack looks like:
562 -- (stack entries made by outer level)
564 -- (Special entry, node is (+P) successor,
565 -- cursor entry is not used)
567 -- (PC_R_Remove entry, "cursor" value is (negative)
568 -- saved base value for the enclosing region)
570 -- (stack entries made by inner level)
572 -- (PC_Region_Replace entry, "cursor" value is (negative)
573 -- stack pointer value referencing the PC_R_Remove entry).
575 -- If the entire match succeeds, then these stack entries are, as usual,
576 -- ignored and abandoned. If on the other hand a subsequent failure
577 -- causes the PC_Region_Replace entry to be popped, it restores the
578 -- inner base value from its saved "cursor" value and then fails again.
579 -- Note that it is OK that the cursor is temporarily clobbered by this
580 -- pop, since the second failure will reestablish a proper cursor value.
582 ---------------------------------
583 -- Compound Pattern Structures --
584 ---------------------------------
586 -- This section discusses the compound structures used to represent
587 -- constructed patterns. It shows the graph structures of pattern
588 -- elements that are constructed, and in the case of patterns that
589 -- provide backtracking possibilities, describes how the history
590 -- stack is used to control the backtracking. Finally, it notes the
591 -- way in which the Index numbers are assigned to the structure.
593 -- In all diagrams, solid lines (built witth minus signs or vertical
594 -- bars, represent successor pointers (Pthen fields) with > or V used
595 -- to indicate the direction of the pointer. The initial node of the
596 -- structure is in the upper left of the diagram. A dotted line is an
597 -- alternative pointer from the element above it to the element below
598 -- it. See individual sections for details on how alternatives are used.
600 -------------------
601 -- Concatenation --
602 -------------------
604 -- In the pattern structures listed in this section, a line that looks
605 -- lile ----> with nothing to the right indicates an end of pattern
606 -- (EOP) pointer that represents the end of the match.
608 -- When a pattern concatenation (L & R) occurs, the resulting structure
609 -- is obtained by finding all such EOP pointers in L, and replacing
610 -- them to point to R. This is the most important flattening that
611 -- occurs in constructing a pattern, and it means that the pattern
612 -- matching circuitry does not have to keep track of the structure
613 -- of a pattern with respect to concatenation, since the appropriate
614 -- succesor is always at hand.
616 -- Concatenation itself generates no additional possibilities for
617 -- backtracking, but the constituent patterns of the concatenated
618 -- structure will make stack entries as usual. The maximum amount
619 -- of stack required by the structure is thus simply the sum of the
620 -- maximums required by L and R.
622 -- The index numbering of a concatenation structure works by leaving
623 -- the numbering of the right hand pattern, R, unchanged and adjusting
624 -- the numbers in the left hand pattern, L up by the count of elements
625 -- in R. This ensures that the maximum numbered element is the leading
626 -- element as required (given that it was the leading element in L).
628 -----------------
629 -- Alternation --
630 -----------------
632 -- A pattern (L or R) constructs the structure:
634 -- +---+ +---+
635 -- | A |---->| L |---->
636 -- +---+ +---+
637 -- .
638 -- .
639 -- +---+
640 -- | R |---->
641 -- +---+
643 -- The A element here is a PC_Alt node, and the dotted line represents
644 -- the contents of the Alt field. When the PC_Alt element is matched,
645 -- it stacks a pointer to the leading element of R on the history stack
646 -- so that on subsequent failure, a match of R is attempted.
648 -- The A node is the higest numbered element in the pattern. The
649 -- original index numbers of R are unchanged, but the index numbers
650 -- of the L pattern are adjusted up by the count of elements in R.
652 -- Note that the difference between the index of the L leading element
653 -- the index of the R leading element (after building the alt structure)
654 -- indicates the number of nodes in L, and this is true even after the
655 -- structure is incorporated into some larger structure. For example,
656 -- if the A node has index 16, and L has index 15 and R has index
657 -- 5, then we know that L has 10 (15-5) elements in it.
659 -- Suppose that we now concatenate this structure to another pattern
660 -- with 9 elements in it. We will now have the A node with an index
661 -- of 25, L with an index of 24 and R with an index of 14. We still
662 -- know that L has 10 (24-14) elements in it, numbered 15-24, and
663 -- consequently the successor of the alternation structure has an
664 -- index with a value less than 15. This is used in Image to figure
665 -- out the original recursive structure of a pattern.
667 -- To clarify the interaction of the alternation and concatenation
668 -- structures, here is a more complex example of the structure built
669 -- for the pattern:
671 -- (V or W or X) (Y or Z)
673 -- where A,B,C,D,E are all single element patterns:
675 -- +---+ +---+ +---+ +---+
676 -- I A I---->I V I---+-->I A I---->I Y I---->
677 -- +---+ +---+ I +---+ +---+
678 -- . I .
679 -- . I .
680 -- +---+ +---+ I +---+
681 -- I A I---->I W I-->I I Z I---->
682 -- +---+ +---+ I +---+
683 -- . I
684 -- . I
685 -- +---+ I
686 -- I X I------------>+
687 -- +---+
689 -- The numbering of the nodes would be as follows:
691 -- +---+ +---+ +---+ +---+
692 -- I 8 I---->I 7 I---+-->I 3 I---->I 2 I---->
693 -- +---+ +---+ I +---+ +---+
694 -- . I .
695 -- . I .
696 -- +---+ +---+ I +---+
697 -- I 6 I---->I 5 I-->I I 1 I---->
698 -- +---+ +---+ I +---+
699 -- . I
700 -- . I
701 -- +---+ I
702 -- I 4 I------------>+
703 -- +---+
705 -- Note: The above structure actually corresponds to
707 -- (A or (B or C)) (D or E)
709 -- rather than
711 -- ((A or B) or C) (D or E)
713 -- which is the more natural interpretation, but in fact alternation
714 -- is associative, and the construction of an alternative changes the
715 -- left grouped pattern to the right grouped pattern in any case, so
716 -- that the Image function produces a more natural looking output.
718 ---------
719 -- Arb --
720 ---------
722 -- An Arb pattern builds the structure
724 -- +---+
725 -- | X |---->
726 -- +---+
727 -- .
728 -- .
729 -- +---+
730 -- | Y |---->
731 -- +---+
733 -- The X node is a PC_Arb_X node, which matches null, and stacks a
734 -- pointer to Y node, which is the PC_Arb_Y node that matches one
735 -- extra character and restacks itself.
737 -- The PC_Arb_X node is numbered 2, and the PC_Arb_Y node is 1.
739 -------------------------
740 -- Arbno (simple case) --
741 -------------------------
743 -- The simple form of Arbno can be used where the pattern always
744 -- matches at least one character if it succeeds, and it is known
745 -- not to make any history stack entries. In this case, Arbno (P)
746 -- can construct the following structure:
748 -- +-------------+
749 -- | ^
750 -- V |
751 -- +---+ |
752 -- | S |----> |
753 -- +---+ |
754 -- . |
755 -- . |
756 -- +---+ |
757 -- | P |---------->+
758 -- +---+
760 -- The S (PC_Arbno_S) node matches null stacking a pointer to the
761 -- pattern P. If a subsequent failure causes P to be matched and
762 -- this match succeeds, then node A gets restacked to try another
763 -- instance if needed by a subsequent failure.
765 -- The node numbering of the constituent pattern P is not affected.
766 -- The S node has a node number of P.Index + 1.
768 --------------------------
769 -- Arbno (complex case) --
770 --------------------------
772 -- A call to Arbno (P), where P can match null (or at least is not
773 -- known to require a non-null string) and/or P requires pattern stack
774 -- entries, constructs the following structure:
776 -- +--------------------------+
777 -- | ^
778 -- V |
779 -- +---+ |
780 -- | X |----> |
781 -- +---+ |
782 -- . |
783 -- . |
784 -- +---+ +---+ +---+ |
785 -- | E |---->| P |---->| Y |--->+
786 -- +---+ +---+ +---+
788 -- The node X (PC_Arbno_X) matches null, stacking a pointer to the
789 -- E-P-X structure used to match one Arbno instance.
791 -- Here E is the PC_R_Enter node which matches null and creates two
792 -- stack entries. The first is a special entry whose node field is
793 -- not used at all, and whose cursor field has the initial cursor.
795 -- The second entry corresponds to a standard new region action. A
796 -- PC_R_Remove node is stacked, whose cursor field is used to store
797 -- the outer stack base, and the stack base is reset to point to
798 -- this PC_R_Remove node. Then the pattern P is matched, and it can
799 -- make history stack entries in the normal manner, so now the stack
800 -- looks like:
802 -- (stack entries made before assign pattern)
804 -- (Special entry, node field not used,
805 -- used only to save initial cursor)
807 -- (PC_R_Remove entry, "cursor" value is (negative) <-- Stack Base
808 -- saved base value for the enclosing region)
810 -- (stack entries made by matching P)
812 -- If the match of P fails, then the PC_R_Remove entry is popped and
813 -- it removes both itself and the special entry underneath it,
814 -- restores the outer stack base, and signals failure.
816 -- If the match of P succeeds, then node Y, the PC_Arbno_Y node, pops
817 -- the inner region. There are two possibilities. If matching P left
818 -- no stack entries, then all traces of the inner region can be removed.
819 -- If there are stack entries, then we push an PC_Region_Replace stack
820 -- entry whose "cursor" value is the inner stack base value, and then
821 -- restore the outer stack base value, so the stack looks like:
823 -- (stack entries made before assign pattern)
825 -- (Special entry, node field not used,
826 -- used only to save initial cursor)
828 -- (PC_R_Remove entry, "cursor" value is (negative)
829 -- saved base value for the enclosing region)
831 -- (stack entries made by matching P)
833 -- (PC_Region_Replace entry, "cursor" value is (negative)
834 -- stack pointer value referencing the PC_R_Remove entry).
836 -- Now that we have matched another instance of the Arbno pattern,
837 -- we need to move to the successor. There are two cases. If the
838 -- Arbno pattern matched null, then there is no point in seeking
839 -- alternatives, since we would just match a whole bunch of nulls.
840 -- In this case we look through the alternative node, and move
841 -- directly to its successor (i.e. the successor of the Arbno
842 -- pattern). If on the other hand a non-null string was matched,
843 -- we simply follow the successor to the alternative node, which
844 -- sets up for another possible match of the Arbno pattern.
846 -- As noted in the section on stack checking, the stack count (and
847 -- hence the stack check) for a pattern includes only one iteration
848 -- of the Arbno pattern. To make sure that multiple iterations do not
849 -- overflow the stack, the Arbno node saves the stack count required
850 -- by a single iteration, and the Concat function increments this to
851 -- include stack entries required by any successor. The PC_Arbno_Y
852 -- node uses this count to ensure that sufficient stack remains
853 -- before proceeding after matching each new instance.
855 -- The node numbering of the constituent pattern P is not affected.
856 -- Where N is the number of nodes in P, the Y node is numbered N + 1,
857 -- the E node is N + 2, and the X node is N + 3.
859 ----------------------
860 -- Assign Immediate --
861 ----------------------
863 -- Immediate assignment (P * V) constructs the following structure
865 -- +---+ +---+ +---+
866 -- | E |---->| P |---->| A |---->
867 -- +---+ +---+ +---+
869 -- Here E is the PC_R_Enter node which matches null and creates two
870 -- stack entries. The first is a special entry whose node field is
871 -- not used at all, and whose cursor field has the initial cursor.
873 -- The second entry corresponds to a standard new region action. A
874 -- PC_R_Remove node is stacked, whose cursor field is used to store
875 -- the outer stack base, and the stack base is reset to point to
876 -- this PC_R_Remove node. Then the pattern P is matched, and it can
877 -- make history stack entries in the normal manner, so now the stack
878 -- looks like:
880 -- (stack entries made before assign pattern)
882 -- (Special entry, node field not used,
883 -- used only to save initial cursor)
885 -- (PC_R_Remove entry, "cursor" value is (negative) <-- Stack Base
886 -- saved base value for the enclosing region)
888 -- (stack entries made by matching P)
890 -- If the match of P fails, then the PC_R_Remove entry is popped
891 -- and it removes both itself and the special entry underneath it,
892 -- restores the outer stack base, and signals failure.
894 -- If the match of P succeeds, then node A, which is the actual
895 -- PC_Assign_Imm node, executes the assignment (using the stack
896 -- base to locate the entry with the saved starting cursor value),
897 -- and the pops the inner region. There are two possibilities, if
898 -- matching P left no stack entries, then all traces of the inner
899 -- region can be removed. If there are stack entries, then we push
900 -- an PC_Region_Replace stack entry whose "cursor" value is the
901 -- inner stack base value, and then restore the outer stack base
902 -- value, so the stack looks like:
904 -- (stack entries made before assign pattern)
906 -- (Special entry, node field not used,
907 -- used only to save initial cursor)
909 -- (PC_R_Remove entry, "cursor" value is (negative)
910 -- saved base value for the enclosing region)
912 -- (stack entries made by matching P)
914 -- (PC_Region_Replace entry, "cursor" value is the (negative)
915 -- stack pointer value referencing the PC_R_Remove entry).
917 -- If a subsequent failure occurs, the PC_Region_Replace node restores
918 -- the inner stack base value and signals failure to explore rematches
919 -- of the pattern P.
921 -- The node numbering of the constituent pattern P is not affected.
922 -- Where N is the number of nodes in P, the A node is numbered N + 1,
923 -- and the E node is N + 2.
925 ---------------------
926 -- Assign On Match --
927 ---------------------
929 -- The assign on match (**) pattern is quite similar to the assign
930 -- immediate pattern, except that the actual assignment has to be
931 -- delayed. The following structure is constructed:
933 -- +---+ +---+ +---+
934 -- | E |---->| P |---->| A |---->
935 -- +---+ +---+ +---+
937 -- The operation of this pattern is identical to that described above
938 -- for deferred assignment, up to the point where P has been matched.
940 -- The A node, which is the PC_Assign_OnM node first pushes a
941 -- PC_Assign node onto the history stack. This node saves the ending
942 -- cursor and acts as a flag for the final assignment, as further
943 -- described below.
945 -- It then stores a pointer to itself in the special entry node field.
946 -- This was otherwise unused, and is now used to retrive the address
947 -- of the variable to be assigned at the end of the pattern.
949 -- After that the inner region is terminated in the usual manner,
950 -- by stacking a PC_R_Restore entry as described for the assign
951 -- immediate case. Note that the optimization of completely
952 -- removing the inner region does not happen in this case, since
953 -- we have at least one stack entry (the PC_Assign one we just made).
954 -- The stack now looks like:
956 -- (stack entries made before assign pattern)
958 -- (Special entry, node points to copy of
959 -- the PC_Assign_OnM node, and the
960 -- cursor field saves the initial cursor).
962 -- (PC_R_Remove entry, "cursor" value is (negative)
963 -- saved base value for the enclosing region)
965 -- (stack entries made by matching P)
967 -- (PC_Assign entry, saves final cursor)
969 -- (PC_Region_Replace entry, "cursor" value is (negative)
970 -- stack pointer value referencing the PC_R_Remove entry).
972 -- If a subsequent failure causes the PC_Assign node to execute it
973 -- simply removes itself and propagates the failure.
975 -- If the match succeeds, then the history stack is scanned for
976 -- PC_Assign nodes, and the assignments are executed (examination
977 -- of the above diagram will show that all the necessary data is
978 -- at hand for the assignment).
980 -- To optimize the common case where no assign-on-match operations
981 -- are present, a global flag Assign_OnM is maintained which is
982 -- initialize to False, and gets set True as part of the execution
983 -- of the PC_Assign_OnM node. The scan of the history stack for
984 -- PC_Assign entries is done only if this flag is set.
986 -- The node numbering of the constituent pattern P is not affected.
987 -- Where N is the number of nodes in P, the A node is numbered N + 1,
988 -- and the E node is N + 2.
990 ---------
991 -- Bal --
992 ---------
994 -- Bal builds a single node:
996 -- +---+
997 -- | B |---->
998 -- +---+
1000 -- The node B is the PC_Bal node which matches a parentheses balanced
1001 -- string, starting at the current cursor position. It then updates
1002 -- the cursor past this matched string, and stacks a pointer to itself
1003 -- with this updated cursor value on the history stack, to extend the
1004 -- matched string on a subequent failure.
1006 -- Since this is a single node it is numbered 1 (the reason we include
1007 -- it in the compound patterns section is that it backtracks).
1009 ------------
1010 -- BreakX --
1011 ------------
1013 -- BreakX builds the structure
1015 -- +---+ +---+
1016 -- | B |---->| A |---->
1017 -- +---+ +---+
1018 -- ^ .
1019 -- | .
1020 -- | +---+
1021 -- +<------| X |
1022 -- +---+
1024 -- Here the B node is the BreakX_xx node that performs a normal Break
1025 -- function. The A node is an alternative (PC_Alt) node that matches
1026 -- null, but stacks a pointer to node X (the PC_BreakX_X node) which
1027 -- extends the match one character (to eat up the previously detected
1028 -- break character), and then rematches the break.
1030 -- The B node is numbered 3, the alternative node is 1, and the X
1031 -- node is 2.
1033 -----------
1034 -- Fence --
1035 -----------
1037 -- Fence builds a single node:
1039 -- +---+
1040 -- | F |---->
1041 -- +---+
1043 -- The element F, PC_Fence, matches null, and stacks a pointer to a
1044 -- PC_Cancel element which will abort the match on a subsequent failure.
1046 -- Since this is a single element it is numbered 1 (the reason we
1047 -- include it in the compound patterns section is that it backtracks).
1049 --------------------
1050 -- Fence Function --
1051 --------------------
1053 -- A call to the Fence function builds the structure:
1055 -- +---+ +---+ +---+
1056 -- | E |---->| P |---->| X |---->
1057 -- +---+ +---+ +---+
1059 -- Here E is the PC_R_Enter node which matches null and creates two
1060 -- stack entries. The first is a special entry which is not used at
1061 -- all in the fence case (it is present merely for uniformity with
1062 -- other cases of region enter operations).
1064 -- The second entry corresponds to a standard new region action. A
1065 -- PC_R_Remove node is stacked, whose cursor field is used to store
1066 -- the outer stack base, and the stack base is reset to point to
1067 -- this PC_R_Remove node. Then the pattern P is matched, and it can
1068 -- make history stack entries in the normal manner, so now the stack
1069 -- looks like:
1071 -- (stack entries made before fence pattern)
1073 -- (Special entry, not used at all)
1075 -- (PC_R_Remove entry, "cursor" value is (negative) <-- Stack Base
1076 -- saved base value for the enclosing region)
1078 -- (stack entries made by matching P)
1080 -- If the match of P fails, then the PC_R_Remove entry is popped
1081 -- and it removes both itself and the special entry underneath it,
1082 -- restores the outer stack base, and signals failure.
1084 -- If the match of P succeeds, then node X, the PC_Fence_X node, gets
1085 -- control. One might be tempted to think that at this point, the
1086 -- history stack entries made by matching P can just be removed since
1087 -- they certainly are not going to be used for rematching (that is
1088 -- whole point of Fence after all!) However, this is wrong, because
1089 -- it would result in the loss of possible assign-on-match entries
1090 -- for deferred pattern assignments.
1092 -- Instead what we do is to make a special entry whose node references
1093 -- PC_Fence_Y, and whose cursor saves the inner stack base value, i.e.
1094 -- the pointer to the PC_R_Remove entry. Then the outer stack base
1095 -- pointer is restored, so the stack looks like:
1097 -- (stack entries made before assign pattern)
1099 -- (Special entry, not used at all)
1101 -- (PC_R_Remove entry, "cursor" value is (negative)
1102 -- saved base value for the enclosing region)
1104 -- (stack entries made by matching P)
1106 -- (PC_Fence_Y entry, "cursor" value is (negative) stack
1107 -- pointer value referencing the PC_R_Remove entry).
1109 -- If a subsequent failure occurs, then the PC_Fence_Y entry removes
1110 -- the entire inner region, including all entries made by matching P,
1111 -- and alternatives prior to the Fence pattern are sought.
1113 -- The node numbering of the constituent pattern P is not affected.
1114 -- Where N is the number of nodes in P, the X node is numbered N + 1,
1115 -- and the E node is N + 2.
1117 -------------
1118 -- Succeed --
1119 -------------
1121 -- Succeed builds a single node:
1123 -- +---+
1124 -- | S |---->
1125 -- +---+
1127 -- The node S is the PC_Succeed node which matches null, and stacks
1128 -- a pointer to itself on the history stack, so that a subsequent
1129 -- failure repeats the same match.
1131 -- Since this is a single node it is numbered 1 (the reason we include
1132 -- it in the compound patterns section is that it backtracks).
1134 ---------------------
1135 -- Write Immediate --
1136 ---------------------
1138 -- The structure built for a write immediate operation (P * F, where
1139 -- F is a file access value) is:
1141 -- +---+ +---+ +---+
1142 -- | E |---->| P |---->| W |---->
1143 -- +---+ +---+ +---+
1145 -- Here E is the PC_R_Enter node and W is the PC_Write_Imm node. The
1146 -- handling is identical to that described above for Assign Immediate,
1147 -- except that at the point where a successful match occurs, the matched
1148 -- substring is written to the referenced file.
1150 -- The node numbering of the constituent pattern P is not affected.
1151 -- Where N is the number of nodes in P, the W node is numbered N + 1,
1152 -- and the E node is N + 2.
1154 --------------------
1155 -- Write On Match --
1156 --------------------
1158 -- The structure built for a write on match operation (P ** F, where
1159 -- F is a file access value) is:
1161 -- +---+ +---+ +---+
1162 -- | E |---->| P |---->| W |---->
1163 -- +---+ +---+ +---+
1165 -- Here E is the PC_R_Enter node and W is the PC_Write_OnM node. The
1166 -- handling is identical to that described above for Assign On Match,
1167 -- except that at the point where a successful match has completed,
1168 -- the matched substring is written to the referenced file.
1170 -- The node numbering of the constituent pattern P is not affected.
1171 -- Where N is the number of nodes in P, the W node is numbered N + 1,
1172 -- and the E node is N + 2.
1173 -----------------------
1174 -- Constant Patterns --
1175 -----------------------
1177 -- The following pattern elements are referenced only from the pattern
1178 -- history stack. In each case the processing for the pattern element
1179 -- results in pattern match abort, or futher failure, so there is no
1180 -- need for a successor and no need for a node number
1182 CP_Assign : aliased PE := (PC_Assign, 0, N);
1183 CP_Cancel : aliased PE := (PC_Cancel, 0, N);
1184 CP_Fence_Y : aliased PE := (PC_Fence_Y, 0, N);
1185 CP_R_Remove : aliased PE := (PC_R_Remove, 0, N);
1186 CP_R_Restore : aliased PE := (PC_R_Restore, 0, N);
1188 -----------------------
1189 -- Local Subprograms --
1190 -----------------------
1192 function Alternate (L, R : PE_Ptr) return PE_Ptr;
1193 function "or" (L, R : PE_Ptr) return PE_Ptr renames Alternate;
1194 -- Build pattern structure corresponding to the alternation of L, R.
1195 -- (i.e. try to match L, and if that fails, try to match R).
1197 function Arbno_Simple (P : PE_Ptr) return PE_Ptr;
1198 -- Build simple Arbno pattern, P is a pattern that is guaranteed to
1199 -- match at least one character if it succeeds and to require no
1200 -- stack entries under all circumstances. The result returned is
1201 -- a simple Arbno structure as previously described.
1203 function Bracket (E, P, A : PE_Ptr) return PE_Ptr;
1204 -- Given two single node pattern elements E and A, and a (possible
1205 -- complex) pattern P, construct the concatenation E-->P-->A and
1206 -- return a pointer to E. The concatenation does not affect the
1207 -- node numbering in P. A has a number one higher than the maximum
1208 -- number in P, and E has a number two higher than the maximum
1209 -- number in P (see for example the Assign_Immediate structure to
1210 -- understand a typical use of this function).
1212 function BreakX_Make (B : PE_Ptr) return Pattern;
1213 -- Given a pattern element for a Break patternx, returns the
1214 -- corresponding BreakX compound pattern structure.
1216 function Concat (L, R : PE_Ptr; Incr : Natural) return PE_Ptr;
1217 -- Creates a pattern eelement that represents a concatenation of the
1218 -- two given pattern elements (i.e. the pattern L followed by R).
1219 -- The result returned is always the same as L, but the pattern
1220 -- referenced by L is modified to have R as a successor. This
1221 -- procedure does not copy L or R, so if a copy is required, it
1222 -- is the responsibility of the caller. The Incr parameter is an
1223 -- amount to be added to the Nat field of any P_Arbno_Y node that is
1224 -- in the left operand, it represents the additional stack space
1225 -- required by the right operand.
1227 function C_To_PE (C : PChar) return PE_Ptr;
1228 -- Given a character, constructs a pattern element that matches
1229 -- the single character.
1231 function Copy (P : PE_Ptr) return PE_Ptr;
1232 -- Creates a copy of the pattern element referenced by the given
1233 -- pattern element reference. This is a deep copy, which means that
1234 -- it follows the Next and Alt pointers.
1236 function Image (P : PE_Ptr) return String;
1237 -- Returns the image of the address of the referenced pattern element.
1238 -- This is equivalent to Image (To_Address (P));
1240 function Is_In (C : Character; Str : String) return Boolean;
1241 pragma Inline (Is_In);
1242 -- Determines if the character C is in string Str.
1244 procedure Logic_Error;
1245 -- Called to raise Program_Error with an appropriate message if an
1246 -- internal logic error is detected.
1248 function Str_BF (A : Boolean_Func) return String;
1249 function Str_FP (A : File_Ptr) return String;
1250 function Str_NF (A : Natural_Func) return String;
1251 function Str_NP (A : Natural_Ptr) return String;
1252 function Str_PP (A : Pattern_Ptr) return String;
1253 function Str_VF (A : VString_Func) return String;
1254 function Str_VP (A : VString_Ptr) return String;
1255 -- These are debugging routines, which return a representation of the
1256 -- given access value (they are called only by Image and Dump)
1258 procedure Set_Successor (Pat : PE_Ptr; Succ : PE_Ptr);
1259 -- Adjusts all EOP pointers in Pat to point to Succ. No other changes
1260 -- are made. In particular, Succ is unchanged, and no index numbers
1261 -- are modified. Note that Pat may not be equal to EOP on entry.
1263 function S_To_PE (Str : PString) return PE_Ptr;
1264 -- Given a string, constructs a pattern element that matches the string
1266 procedure Uninitialized_Pattern;
1267 pragma No_Return (Uninitialized_Pattern);
1268 -- Called to raise Program_Error with an appropriate error message if
1269 -- an uninitialized pattern is used in any pattern construction or
1270 -- pattern matching operation.
1272 procedure XMatch
1273 (Subject : String;
1274 Pat_P : PE_Ptr;
1275 Pat_S : Natural;
1276 Start : out Natural;
1277 Stop : out Natural);
1278 -- This is the common pattern match routine. It is passed a string and
1279 -- a pattern, and it indicates success or failure, and on success the
1280 -- section of the string matched. It does not perform any assignments
1281 -- to the subject string, so pattern replacement is for the caller.
1283 -- Subject The subject string. The lower bound is always one. In the
1284 -- Match procedures, it is fine to use strings whose lower bound
1285 -- is not one, but we perform a one time conversion before the
1286 -- call to XMatch, so that XMatch does not have to be bothered
1287 -- with strange lower bounds.
1289 -- Pat_P Points to initial pattern element of pattern to be matched
1291 -- Pat_S Maximum required stack entries for pattern to be matched
1293 -- Start If match is successful, starting index of matched section.
1294 -- This value is always non-zero. A value of zero is used to
1295 -- indicate a failed match.
1297 -- Stop If match is successful, ending index of matched section.
1298 -- This can be zero if we match the null string at the start,
1299 -- in which case Start is set to zero, and Stop to one. If the
1300 -- Match fails, then the contents of Stop is undefined.
1302 procedure XMatchD
1303 (Subject : String;
1304 Pat_P : PE_Ptr;
1305 Pat_S : Natural;
1306 Start : out Natural;
1307 Stop : out Natural);
1308 -- Identical in all respects to XMatch, except that trace information is
1309 -- output on Standard_Ouput during execution of the match. This is the
1310 -- version that is called if the original Match call has Debug => True.
1312 ---------
1313 -- "&" --
1314 ---------
1316 function "&" (L : PString; R : Pattern) return Pattern is
1317 begin
1318 return (AFC with R.Stk, Concat (S_To_PE (L), Copy (R.P), R.Stk));
1319 end "&";
1321 function "&" (L : Pattern; R : PString) return Pattern is
1322 begin
1323 return (AFC with L.Stk, Concat (Copy (L.P), S_To_PE (R), 0));
1324 end "&";
1326 function "&" (L : PChar; R : Pattern) return Pattern is
1327 begin
1328 return (AFC with R.Stk, Concat (C_To_PE (L), Copy (R.P), R.Stk));
1329 end "&";
1331 function "&" (L : Pattern; R : PChar) return Pattern is
1332 begin
1333 return (AFC with L.Stk, Concat (Copy (L.P), C_To_PE (R), 0));
1334 end "&";
1336 function "&" (L : Pattern; R : Pattern) return Pattern is
1337 begin
1338 return (AFC with L.Stk + R.Stk, Concat (Copy (L.P), Copy (R.P), R.Stk));
1339 end "&";
1341 ---------
1342 -- "*" --
1343 ---------
1345 -- Assign immediate
1347 -- +---+ +---+ +---+
1348 -- | E |---->| P |---->| A |---->
1349 -- +---+ +---+ +---+
1351 -- The node numbering of the constituent pattern P is not affected.
1352 -- Where N is the number of nodes in P, the A node is numbered N + 1,
1353 -- and the E node is N + 2.
1355 function "*" (P : Pattern; Var : VString_Var) return Pattern is
1356 Pat : constant PE_Ptr := Copy (P.P);
1357 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1358 A : constant PE_Ptr :=
1359 new PE'(PC_Assign_Imm, 0, EOP, Var'Unrestricted_Access);
1361 begin
1362 return (AFC with P.Stk + 3, Bracket (E, Pat, A));
1363 end "*";
1365 function "*" (P : PString; Var : VString_Var) return Pattern is
1366 Pat : constant PE_Ptr := S_To_PE (P);
1367 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1368 A : constant PE_Ptr :=
1369 new PE'(PC_Assign_Imm, 0, EOP, Var'Unrestricted_Access);
1371 begin
1372 return (AFC with 3, Bracket (E, Pat, A));
1373 end "*";
1375 function "*" (P : PChar; Var : VString_Var) return Pattern is
1376 Pat : constant PE_Ptr := C_To_PE (P);
1377 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1378 A : constant PE_Ptr :=
1379 new PE'(PC_Assign_Imm, 0, EOP, Var'Unrestricted_Access);
1381 begin
1382 return (AFC with 3, Bracket (E, Pat, A));
1383 end "*";
1385 -- Write immediate
1387 -- +---+ +---+ +---+
1388 -- | E |---->| P |---->| W |---->
1389 -- +---+ +---+ +---+
1391 -- The node numbering of the constituent pattern P is not affected.
1392 -- Where N is the number of nodes in P, the W node is numbered N + 1,
1393 -- and the E node is N + 2.
1395 function "*" (P : Pattern; Fil : File_Access) return Pattern is
1396 Pat : constant PE_Ptr := Copy (P.P);
1397 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1398 W : constant PE_Ptr := new PE'(PC_Write_Imm, 0, EOP, Fil);
1400 begin
1401 return (AFC with 3, Bracket (E, Pat, W));
1402 end "*";
1404 function "*" (P : PString; Fil : File_Access) return Pattern is
1405 Pat : constant PE_Ptr := S_To_PE (P);
1406 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1407 W : constant PE_Ptr := new PE'(PC_Write_Imm, 0, EOP, Fil);
1409 begin
1410 return (AFC with 3, Bracket (E, Pat, W));
1411 end "*";
1413 function "*" (P : PChar; Fil : File_Access) return Pattern is
1414 Pat : constant PE_Ptr := C_To_PE (P);
1415 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1416 W : constant PE_Ptr := new PE'(PC_Write_Imm, 0, EOP, Fil);
1418 begin
1419 return (AFC with 3, Bracket (E, Pat, W));
1420 end "*";
1422 ----------
1423 -- "**" --
1424 ----------
1426 -- Assign on match
1428 -- +---+ +---+ +---+
1429 -- | E |---->| P |---->| A |---->
1430 -- +---+ +---+ +---+
1432 -- The node numbering of the constituent pattern P is not affected.
1433 -- Where N is the number of nodes in P, the A node is numbered N + 1,
1434 -- and the E node is N + 2.
1436 function "**" (P : Pattern; Var : VString_Var) return Pattern is
1437 Pat : constant PE_Ptr := Copy (P.P);
1438 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1439 A : constant PE_Ptr :=
1440 new PE'(PC_Assign_OnM, 0, EOP, Var'Unrestricted_Access);
1442 begin
1443 return (AFC with P.Stk + 3, Bracket (E, Pat, A));
1444 end "**";
1446 function "**" (P : PString; Var : VString_Var) return Pattern is
1447 Pat : constant PE_Ptr := S_To_PE (P);
1448 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1449 A : constant PE_Ptr :=
1450 new PE'(PC_Assign_OnM, 0, EOP, Var'Unrestricted_Access);
1452 begin
1453 return (AFC with 3, Bracket (E, Pat, A));
1454 end "**";
1456 function "**" (P : PChar; Var : VString_Var) return Pattern is
1457 Pat : constant PE_Ptr := C_To_PE (P);
1458 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1459 A : constant PE_Ptr :=
1460 new PE'(PC_Assign_OnM, 0, EOP, Var'Unrestricted_Access);
1462 begin
1463 return (AFC with 3, Bracket (E, Pat, A));
1464 end "**";
1466 -- Write on match
1468 -- +---+ +---+ +---+
1469 -- | E |---->| P |---->| W |---->
1470 -- +---+ +---+ +---+
1472 -- The node numbering of the constituent pattern P is not affected.
1473 -- Where N is the number of nodes in P, the W node is numbered N + 1,
1474 -- and the E node is N + 2.
1476 function "**" (P : Pattern; Fil : File_Access) return Pattern is
1477 Pat : constant PE_Ptr := Copy (P.P);
1478 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1479 W : constant PE_Ptr := new PE'(PC_Write_OnM, 0, EOP, Fil);
1481 begin
1482 return (AFC with P.Stk + 3, Bracket (E, Pat, W));
1483 end "**";
1485 function "**" (P : PString; Fil : File_Access) return Pattern is
1486 Pat : constant PE_Ptr := S_To_PE (P);
1487 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1488 W : constant PE_Ptr := new PE'(PC_Write_OnM, 0, EOP, Fil);
1490 begin
1491 return (AFC with 3, Bracket (E, Pat, W));
1492 end "**";
1494 function "**" (P : PChar; Fil : File_Access) return Pattern is
1495 Pat : constant PE_Ptr := C_To_PE (P);
1496 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1497 W : constant PE_Ptr := new PE'(PC_Write_OnM, 0, EOP, Fil);
1499 begin
1500 return (AFC with 3, Bracket (E, Pat, W));
1501 end "**";
1503 ---------
1504 -- "+" --
1505 ---------
1507 function "+" (Str : VString_Var) return Pattern is
1508 begin
1509 return
1510 (AFC with 0,
1511 new PE'(PC_String_VP, 1, EOP, Str'Unrestricted_Access));
1512 end "+";
1514 function "+" (Str : VString_Func) return Pattern is
1515 begin
1516 return (AFC with 0, new PE'(PC_String_VF, 1, EOP, Str));
1517 end "+";
1519 function "+" (P : Pattern_Var) return Pattern is
1520 begin
1521 return
1522 (AFC with 3,
1523 new PE'(PC_Rpat, 1, EOP, P'Unrestricted_Access));
1524 end "+";
1526 function "+" (P : Boolean_Func) return Pattern is
1527 begin
1528 return (AFC with 3, new PE'(PC_Pred_Func, 1, EOP, P));
1529 end "+";
1531 ----------
1532 -- "or" --
1533 ----------
1535 function "or" (L : PString; R : Pattern) return Pattern is
1536 begin
1537 return (AFC with R.Stk + 1, S_To_PE (L) or Copy (R.P));
1538 end "or";
1540 function "or" (L : Pattern; R : PString) return Pattern is
1541 begin
1542 return (AFC with L.Stk + 1, Copy (L.P) or S_To_PE (R));
1543 end "or";
1545 function "or" (L : PString; R : PString) return Pattern is
1546 begin
1547 return (AFC with 1, S_To_PE (L) or S_To_PE (R));
1548 end "or";
1550 function "or" (L : Pattern; R : Pattern) return Pattern is
1551 begin
1552 return (AFC with
1553 Natural'Max (L.Stk, R.Stk) + 1, Copy (L.P) or Copy (R.P));
1554 end "or";
1556 function "or" (L : PChar; R : Pattern) return Pattern is
1557 begin
1558 return (AFC with 1, C_To_PE (L) or Copy (R.P));
1559 end "or";
1561 function "or" (L : Pattern; R : PChar) return Pattern is
1562 begin
1563 return (AFC with 1, Copy (L.P) or C_To_PE (R));
1564 end "or";
1566 function "or" (L : PChar; R : PChar) return Pattern is
1567 begin
1568 return (AFC with 1, C_To_PE (L) or C_To_PE (R));
1569 end "or";
1571 function "or" (L : PString; R : PChar) return Pattern is
1572 begin
1573 return (AFC with 1, S_To_PE (L) or C_To_PE (R));
1574 end "or";
1576 function "or" (L : PChar; R : PString) return Pattern is
1577 begin
1578 return (AFC with 1, C_To_PE (L) or S_To_PE (R));
1579 end "or";
1581 ------------
1582 -- Adjust --
1583 ------------
1585 -- No two patterns share the same pattern elements, so the adjust
1586 -- procedure for a Pattern assignment must do a deep copy of the
1587 -- pattern element structure.
1589 procedure Adjust (Object : in out Pattern) is
1590 begin
1591 Object.P := Copy (Object.P);
1592 end Adjust;
1594 ---------------
1595 -- Alternate --
1596 ---------------
1598 function Alternate (L, R : PE_Ptr) return PE_Ptr is
1599 begin
1600 -- If the left pattern is null, then we just add the alternation
1601 -- node with an index one greater than the right hand pattern.
1603 if L = EOP then
1604 return new PE'(PC_Alt, R.Index + 1, EOP, R);
1606 -- If the left pattern is non-null, then build a reference vector
1607 -- for its elements, and adjust their index values to acccomodate
1608 -- the right hand elements. Then add the alternation node.
1610 else
1611 declare
1612 Refs : Ref_Array (1 .. L.Index);
1614 begin
1615 Build_Ref_Array (L, Refs);
1617 for J in Refs'Range loop
1618 Refs (J).Index := Refs (J).Index + R.Index;
1619 end loop;
1620 end;
1622 return new PE'(PC_Alt, L.Index + 1, L, R);
1623 end if;
1624 end Alternate;
1626 ---------
1627 -- Any --
1628 ---------
1630 function Any (Str : String) return Pattern is
1631 begin
1632 return (AFC with 0, new PE'(PC_Any_CS, 1, EOP, To_Set (Str)));
1633 end Any;
1635 function Any (Str : VString) return Pattern is
1636 begin
1637 return Any (S (Str));
1638 end Any;
1640 function Any (Str : Character) return Pattern is
1641 begin
1642 return (AFC with 0, new PE'(PC_Any_CH, 1, EOP, Str));
1643 end Any;
1645 function Any (Str : Character_Set) return Pattern is
1646 begin
1647 return (AFC with 0, new PE'(PC_Any_CS, 1, EOP, Str));
1648 end Any;
1650 function Any (Str : access VString) return Pattern is
1651 begin
1652 return (AFC with 0, new PE'(PC_Any_VP, 1, EOP, VString_Ptr (Str)));
1653 end Any;
1655 function Any (Str : VString_Func) return Pattern is
1656 begin
1657 return (AFC with 0, new PE'(PC_Any_VF, 1, EOP, Str));
1658 end Any;
1660 ---------
1661 -- Arb --
1662 ---------
1664 -- +---+
1665 -- | X |---->
1666 -- +---+
1667 -- .
1668 -- .
1669 -- +---+
1670 -- | Y |---->
1671 -- +---+
1673 -- The PC_Arb_X element is numbered 2, and the PC_Arb_Y element is 1.
1675 function Arb return Pattern is
1676 Y : constant PE_Ptr := new PE'(PC_Arb_Y, 1, EOP);
1677 X : constant PE_Ptr := new PE'(PC_Arb_X, 2, EOP, Y);
1679 begin
1680 return (AFC with 1, X);
1681 end Arb;
1683 -----------
1684 -- Arbno --
1685 -----------
1687 function Arbno (P : PString) return Pattern is
1688 begin
1689 if P'Length = 0 then
1690 return (AFC with 0, EOP);
1692 else
1693 return (AFC with 0, Arbno_Simple (S_To_PE (P)));
1694 end if;
1695 end Arbno;
1697 function Arbno (P : PChar) return Pattern is
1698 begin
1699 return (AFC with 0, Arbno_Simple (C_To_PE (P)));
1700 end Arbno;
1702 function Arbno (P : Pattern) return Pattern is
1703 Pat : constant PE_Ptr := Copy (P.P);
1705 begin
1706 if P.Stk = 0
1707 and then OK_For_Simple_Arbno (Pat.Pcode)
1708 then
1709 return (AFC with 0, Arbno_Simple (Pat));
1710 end if;
1712 -- This is the complex case, either the pattern makes stack entries
1713 -- or it is possible for the pattern to match the null string (more
1714 -- accurately, we don't know that this is not the case).
1716 -- +--------------------------+
1717 -- | ^
1718 -- V |
1719 -- +---+ |
1720 -- | X |----> |
1721 -- +---+ |
1722 -- . |
1723 -- . |
1724 -- +---+ +---+ +---+ |
1725 -- | E |---->| P |---->| Y |--->+
1726 -- +---+ +---+ +---+
1728 -- The node numbering of the constituent pattern P is not affected.
1729 -- Where N is the number of nodes in P, the Y node is numbered N + 1,
1730 -- the E node is N + 2, and the X node is N + 3.
1732 declare
1733 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1734 X : constant PE_Ptr := new PE'(PC_Arbno_X, 0, EOP, E);
1735 Y : constant PE_Ptr := new PE'(PC_Arbno_Y, 0, X, P.Stk + 3);
1736 EPY : constant PE_Ptr := Bracket (E, Pat, Y);
1738 begin
1739 X.Alt := EPY;
1740 X.Index := EPY.Index + 1;
1741 return (AFC with P.Stk + 3, X);
1742 end;
1743 end Arbno;
1745 ------------------
1746 -- Arbno_Simple --
1747 ------------------
1749 -- +-------------+
1750 -- | ^
1751 -- V |
1752 -- +---+ |
1753 -- | S |----> |
1754 -- +---+ |
1755 -- . |
1756 -- . |
1757 -- +---+ |
1758 -- | P |---------->+
1759 -- +---+
1761 -- The node numbering of the constituent pattern P is not affected.
1762 -- The S node has a node number of P.Index + 1.
1764 -- Note that we know that P cannot be EOP, because a null pattern
1765 -- does not meet the requirements for simple Arbno.
1767 function Arbno_Simple (P : PE_Ptr) return PE_Ptr is
1768 S : constant PE_Ptr := new PE'(PC_Arbno_S, P.Index + 1, EOP, P);
1770 begin
1771 Set_Successor (P, S);
1772 return S;
1773 end Arbno_Simple;
1775 ---------
1776 -- Bal --
1777 ---------
1779 function Bal return Pattern is
1780 begin
1781 return (AFC with 1, new PE'(PC_Bal, 1, EOP));
1782 end Bal;
1784 -------------
1785 -- Bracket --
1786 -------------
1788 function Bracket (E, P, A : PE_Ptr) return PE_Ptr is
1789 begin
1790 if P = EOP then
1791 E.Pthen := A;
1792 E.Index := 2;
1793 A.Index := 1;
1795 else
1796 E.Pthen := P;
1797 Set_Successor (P, A);
1798 E.Index := P.Index + 2;
1799 A.Index := P.Index + 1;
1800 end if;
1802 return E;
1803 end Bracket;
1805 -----------
1806 -- Break --
1807 -----------
1809 function Break (Str : String) return Pattern is
1810 begin
1811 return (AFC with 0, new PE'(PC_Break_CS, 1, EOP, To_Set (Str)));
1812 end Break;
1814 function Break (Str : VString) return Pattern is
1815 begin
1816 return Break (S (Str));
1817 end Break;
1819 function Break (Str : Character) return Pattern is
1820 begin
1821 return (AFC with 0, new PE'(PC_Break_CH, 1, EOP, Str));
1822 end Break;
1824 function Break (Str : Character_Set) return Pattern is
1825 begin
1826 return (AFC with 0, new PE'(PC_Break_CS, 1, EOP, Str));
1827 end Break;
1829 function Break (Str : access VString) return Pattern is
1830 begin
1831 return (AFC with 0, new PE'(PC_Break_VP, 1, EOP, VString_Ptr (Str)));
1832 end Break;
1834 function Break (Str : VString_Func) return Pattern is
1835 begin
1836 return (AFC with 0, new PE'(PC_Break_VF, 1, EOP, Str));
1837 end Break;
1839 ------------
1840 -- BreakX --
1841 ------------
1843 function BreakX (Str : String) return Pattern is
1844 begin
1845 return BreakX_Make (new PE'(PC_BreakX_CS, 3, N, To_Set (Str)));
1846 end BreakX;
1848 function BreakX (Str : VString) return Pattern is
1849 begin
1850 return BreakX (S (Str));
1851 end BreakX;
1853 function BreakX (Str : Character) return Pattern is
1854 begin
1855 return BreakX_Make (new PE'(PC_BreakX_CH, 3, N, Str));
1856 end BreakX;
1858 function BreakX (Str : Character_Set) return Pattern is
1859 begin
1860 return BreakX_Make (new PE'(PC_BreakX_CS, 3, N, Str));
1861 end BreakX;
1863 function BreakX (Str : access VString) return Pattern is
1864 begin
1865 return BreakX_Make (new PE'(PC_BreakX_VP, 3, N, VString_Ptr (Str)));
1866 end BreakX;
1868 function BreakX (Str : VString_Func) return Pattern is
1869 begin
1870 return BreakX_Make (new PE'(PC_BreakX_VF, 3, N, Str));
1871 end BreakX;
1873 -----------------
1874 -- BreakX_Make --
1875 -----------------
1877 -- +---+ +---+
1878 -- | B |---->| A |---->
1879 -- +---+ +---+
1880 -- ^ .
1881 -- | .
1882 -- | +---+
1883 -- +<------| X |
1884 -- +---+
1886 -- The B node is numbered 3, the alternative node is 1, and the X
1887 -- node is 2.
1889 function BreakX_Make (B : PE_Ptr) return Pattern is
1890 X : constant PE_Ptr := new PE'(PC_BreakX_X, 2, B);
1891 A : constant PE_Ptr := new PE'(PC_Alt, 1, EOP, X);
1893 begin
1894 B.Pthen := A;
1895 return (AFC with 2, B);
1896 end BreakX_Make;
1898 ---------------------
1899 -- Build_Ref_Array --
1900 ---------------------
1902 procedure Build_Ref_Array (E : PE_Ptr; RA : out Ref_Array) is
1904 procedure Record_PE (E : PE_Ptr);
1905 -- Record given pattern element if not already recorded in RA,
1906 -- and also record any referenced pattern elements recursively.
1908 procedure Record_PE (E : PE_Ptr) is
1909 begin
1910 PutD (" Record_PE called with PE_Ptr = " & Image (E));
1912 if E = EOP or else RA (E.Index) /= null then
1913 Put_LineD (", nothing to do");
1914 return;
1916 else
1917 Put_LineD (", recording" & IndexT'Image (E.Index));
1918 RA (E.Index) := E;
1919 Record_PE (E.Pthen);
1921 if E.Pcode in PC_Has_Alt then
1922 Record_PE (E.Alt);
1923 end if;
1924 end if;
1925 end Record_PE;
1927 -- Start of processing for Build_Ref_Array
1929 begin
1930 New_LineD;
1931 Put_LineD ("Entering Build_Ref_Array");
1932 Record_PE (E);
1933 New_LineD;
1934 end Build_Ref_Array;
1936 -------------
1937 -- C_To_PE --
1938 -------------
1940 function C_To_PE (C : PChar) return PE_Ptr is
1941 begin
1942 return new PE'(PC_Char, 1, EOP, C);
1943 end C_To_PE;
1945 ------------
1946 -- Cancel --
1947 ------------
1949 function Cancel return Pattern is
1950 begin
1951 return (AFC with 0, new PE'(PC_Cancel, 1, EOP));
1952 end Cancel;
1954 ------------
1955 -- Concat --
1956 ------------
1958 -- Concat needs to traverse the left operand performing the following
1959 -- set of fixups:
1961 -- a) Any successor pointers (Pthen fields) that are set to EOP are
1962 -- reset to point to the second operand.
1964 -- b) Any PC_Arbno_Y node has its stack count field incremented
1965 -- by the parameter Incr provided for this purpose.
1967 -- d) Num fields of all pattern elements in the left operand are
1968 -- adjusted to include the elements of the right operand.
1970 -- Note: we do not use Set_Successor in the processing for Concat, since
1971 -- there is no point in doing two traversals, we may as well do everything
1972 -- at the same time.
1974 function Concat (L, R : PE_Ptr; Incr : Natural) return PE_Ptr is
1975 begin
1976 if L = EOP then
1977 return R;
1979 elsif R = EOP then
1980 return L;
1982 else
1983 declare
1984 Refs : Ref_Array (1 .. L.Index);
1985 -- We build a reference array for L whose N'th element points to
1986 -- the pattern element of L whose original Index value is N.
1988 P : PE_Ptr;
1990 begin
1991 Build_Ref_Array (L, Refs);
1993 for J in Refs'Range loop
1994 P := Refs (J);
1996 P.Index := P.Index + R.Index;
1998 if P.Pcode = PC_Arbno_Y then
1999 P.Nat := P.Nat + Incr;
2000 end if;
2002 if P.Pthen = EOP then
2003 P.Pthen := R;
2004 end if;
2006 if P.Pcode in PC_Has_Alt and then P.Alt = EOP then
2007 P.Alt := R;
2008 end if;
2009 end loop;
2010 end;
2012 return L;
2013 end if;
2014 end Concat;
2016 ----------
2017 -- Copy --
2018 ----------
2020 function Copy (P : PE_Ptr) return PE_Ptr is
2021 begin
2022 if P = null then
2023 Uninitialized_Pattern;
2025 else
2026 declare
2027 Refs : Ref_Array (1 .. P.Index);
2028 -- References to elements in P, indexed by Index field
2030 Copy : Ref_Array (1 .. P.Index);
2031 -- Holds copies of elements of P, indexed by Index field.
2033 E : PE_Ptr;
2035 begin
2036 Build_Ref_Array (P, Refs);
2038 -- Now copy all nodes
2040 for J in Refs'Range loop
2041 Copy (J) := new PE'(Refs (J).all);
2042 end loop;
2044 -- Adjust all internal references
2046 for J in Copy'Range loop
2047 E := Copy (J);
2049 -- Adjust successor pointer to point to copy
2051 if E.Pthen /= EOP then
2052 E.Pthen := Copy (E.Pthen.Index);
2053 end if;
2055 -- Adjust Alt pointer if there is one to point to copy
2057 if E.Pcode in PC_Has_Alt and then E.Alt /= EOP then
2058 E.Alt := Copy (E.Alt.Index);
2059 end if;
2061 -- Copy referenced string
2063 if E.Pcode = PC_String then
2064 E.Str := new String'(E.Str.all);
2065 end if;
2066 end loop;
2068 return Copy (P.Index);
2069 end;
2070 end if;
2071 end Copy;
2073 ----------
2074 -- Dump --
2075 ----------
2077 procedure Dump (P : Pattern) is
2079 subtype Count is Ada.Text_IO.Count;
2080 Scol : Count;
2081 -- Used to keep track of column in dump output
2083 Refs : Ref_Array (1 .. P.P.Index);
2084 -- We build a reference array whose N'th element points to the
2085 -- pattern element whose Index value is N.
2087 Cols : Natural := 2;
2088 -- Number of columns used for pattern numbers, minimum is 2
2090 E : PE_Ptr;
2092 procedure Write_Node_Id (E : PE_Ptr);
2093 -- Writes out a string identifying the given pattern element.
2095 procedure Write_Node_Id (E : PE_Ptr) is
2096 begin
2097 if E = EOP then
2098 Put ("EOP");
2100 for J in 4 .. Cols loop
2101 Put (' ');
2102 end loop;
2104 else
2105 declare
2106 Str : String (1 .. Cols);
2107 N : Natural := Natural (E.Index);
2109 begin
2110 Put ("#");
2112 for J in reverse Str'Range loop
2113 Str (J) := Character'Val (48 + N mod 10);
2114 N := N / 10;
2115 end loop;
2117 Put (Str);
2118 end;
2119 end if;
2120 end Write_Node_Id;
2122 begin
2123 New_Line;
2124 Put ("Pattern Dump Output (pattern at " &
2125 Image (P'Address) &
2126 ", S = " & Natural'Image (P.Stk) & ')');
2128 Scol := Col;
2129 New_Line;
2131 while Col < Scol loop
2132 Put ('-');
2133 end loop;
2135 New_Line;
2137 -- If uninitialized pattern, dump line and we are done
2139 if P.P = null then
2140 Put_Line ("Uninitialized pattern value");
2141 return;
2142 end if;
2144 -- If null pattern, just dump it and we are all done
2146 if P.P = EOP then
2147 Put_Line ("EOP (null pattern)");
2148 return;
2149 end if;
2151 Build_Ref_Array (P.P, Refs);
2153 -- Set number of columns required for node numbers
2155 while 10 ** Cols - 1 < Integer (P.P.Index) loop
2156 Cols := Cols + 1;
2157 end loop;
2159 -- Now dump the nodes in reverse sequence. We output them in reverse
2160 -- sequence since this corresponds to the natural order used to
2161 -- construct the patterns.
2163 for J in reverse Refs'Range loop
2164 E := Refs (J);
2165 Write_Node_Id (E);
2166 Set_Col (Count (Cols) + 4);
2167 Put (Image (E));
2168 Put (" ");
2169 Put (Pattern_Code'Image (E.Pcode));
2170 Put (" ");
2171 Set_Col (21 + Count (Cols) + Address_Image_Length);
2172 Write_Node_Id (E.Pthen);
2173 Set_Col (24 + 2 * Count (Cols) + Address_Image_Length);
2175 case E.Pcode is
2177 when PC_Alt |
2178 PC_Arb_X |
2179 PC_Arbno_S |
2180 PC_Arbno_X =>
2181 Write_Node_Id (E.Alt);
2183 when PC_Rpat =>
2184 Put (Str_PP (E.PP));
2186 when PC_Pred_Func =>
2187 Put (Str_BF (E.BF));
2189 when PC_Assign_Imm |
2190 PC_Assign_OnM |
2191 PC_Any_VP |
2192 PC_Break_VP |
2193 PC_BreakX_VP |
2194 PC_NotAny_VP |
2195 PC_NSpan_VP |
2196 PC_Span_VP |
2197 PC_String_VP =>
2198 Put (Str_VP (E.VP));
2200 when PC_Write_Imm |
2201 PC_Write_OnM =>
2202 Put (Str_FP (E.FP));
2204 when PC_String =>
2205 Put (Image (E.Str.all));
2207 when PC_String_2 =>
2208 Put (Image (E.Str2));
2210 when PC_String_3 =>
2211 Put (Image (E.Str3));
2213 when PC_String_4 =>
2214 Put (Image (E.Str4));
2216 when PC_String_5 =>
2217 Put (Image (E.Str5));
2219 when PC_String_6 =>
2220 Put (Image (E.Str6));
2222 when PC_Setcur =>
2223 Put (Str_NP (E.Var));
2225 when PC_Any_CH |
2226 PC_Break_CH |
2227 PC_BreakX_CH |
2228 PC_Char |
2229 PC_NotAny_CH |
2230 PC_NSpan_CH |
2231 PC_Span_CH =>
2232 Put (''' & E.Char & ''');
2234 when PC_Any_CS |
2235 PC_Break_CS |
2236 PC_BreakX_CS |
2237 PC_NotAny_CS |
2238 PC_NSpan_CS |
2239 PC_Span_CS =>
2240 Put ('"' & To_Sequence (E.CS) & '"');
2242 when PC_Arbno_Y |
2243 PC_Len_Nat |
2244 PC_Pos_Nat |
2245 PC_RPos_Nat |
2246 PC_RTab_Nat |
2247 PC_Tab_Nat =>
2248 Put (S (E.Nat));
2250 when PC_Pos_NF |
2251 PC_Len_NF |
2252 PC_RPos_NF |
2253 PC_RTab_NF |
2254 PC_Tab_NF =>
2255 Put (Str_NF (E.NF));
2257 when PC_Pos_NP |
2258 PC_Len_NP |
2259 PC_RPos_NP |
2260 PC_RTab_NP |
2261 PC_Tab_NP =>
2262 Put (Str_NP (E.NP));
2264 when PC_Any_VF |
2265 PC_Break_VF |
2266 PC_BreakX_VF |
2267 PC_NotAny_VF |
2268 PC_NSpan_VF |
2269 PC_Span_VF |
2270 PC_String_VF =>
2271 Put (Str_VF (E.VF));
2273 when others => null;
2275 end case;
2277 New_Line;
2278 end loop;
2280 New_Line;
2281 end Dump;
2283 ----------
2284 -- Fail --
2285 ----------
2287 function Fail return Pattern is
2288 begin
2289 return (AFC with 0, new PE'(PC_Fail, 1, EOP));
2290 end Fail;
2292 -----------
2293 -- Fence --
2294 -----------
2296 -- Simple case
2298 function Fence return Pattern is
2299 begin
2300 return (AFC with 1, new PE'(PC_Fence, 1, EOP));
2301 end Fence;
2303 -- Function case
2305 -- +---+ +---+ +---+
2306 -- | E |---->| P |---->| X |---->
2307 -- +---+ +---+ +---+
2309 -- The node numbering of the constituent pattern P is not affected.
2310 -- Where N is the number of nodes in P, the X node is numbered N + 1,
2311 -- and the E node is N + 2.
2313 function Fence (P : Pattern) return Pattern is
2314 Pat : constant PE_Ptr := Copy (P.P);
2315 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
2316 X : constant PE_Ptr := new PE'(PC_Fence_X, 0, EOP);
2318 begin
2319 return (AFC with P.Stk + 1, Bracket (E, Pat, X));
2320 end Fence;
2322 --------------
2323 -- Finalize --
2324 --------------
2326 procedure Finalize (Object : in out Pattern) is
2328 procedure Free is new Unchecked_Deallocation (PE, PE_Ptr);
2329 procedure Free is new Unchecked_Deallocation (String, String_Ptr);
2331 begin
2332 -- Nothing to do if already freed
2334 if Object.P = null then
2335 return;
2337 -- Otherwise we must free all elements
2339 else
2340 declare
2341 Refs : Ref_Array (1 .. Object.P.Index);
2342 -- References to elements in pattern to be finalized
2344 begin
2345 Build_Ref_Array (Object.P, Refs);
2347 for J in Refs'Range loop
2348 if Refs (J).Pcode = PC_String then
2349 Free (Refs (J).Str);
2350 end if;
2352 Free (Refs (J));
2353 end loop;
2355 Object.P := null;
2356 end;
2357 end if;
2358 end Finalize;
2360 -----------
2361 -- Image --
2362 -----------
2364 function Image (P : PE_Ptr) return String is
2365 begin
2366 return Image (To_Address (P));
2367 end Image;
2369 function Image (P : Pattern) return String is
2370 begin
2371 return S (Image (P));
2372 end Image;
2374 function Image (P : Pattern) return VString is
2376 Kill_Ampersand : Boolean := False;
2377 -- Set True to delete next & to be output to Result
2379 Result : VString := Nul;
2380 -- The result is accumulated here, using Append
2382 Refs : Ref_Array (1 .. P.P.Index);
2383 -- We build a reference array whose N'th element points to the
2384 -- pattern element whose Index value is N.
2386 procedure Delete_Ampersand;
2387 -- Deletes the ampersand at the end of Result
2389 procedure Image_Seq (E : PE_Ptr; Succ : PE_Ptr; Paren : Boolean);
2390 -- E refers to a pattern structure whose successor is given by Succ.
2391 -- This procedure appends to Result a representation of this pattern.
2392 -- The Paren parameter indicates whether parentheses are required if
2393 -- the output is more than one element.
2395 procedure Image_One (E : in out PE_Ptr);
2396 -- E refers to a pattern structure. This procedure appends to Result
2397 -- a representation of the single simple or compound pattern structure
2398 -- at the start of E and updates E to point to its successor.
2400 ----------------------
2401 -- Delete_Ampersand --
2402 ----------------------
2404 procedure Delete_Ampersand is
2405 L : constant Natural := Length (Result);
2407 begin
2408 if L > 2 then
2409 Delete (Result, L - 1, L);
2410 end if;
2411 end Delete_Ampersand;
2413 ---------------
2414 -- Image_One --
2415 ---------------
2417 procedure Image_One (E : in out PE_Ptr) is
2419 ER : PE_Ptr := E.Pthen;
2420 -- Successor set as result in E unless reset
2422 begin
2423 case E.Pcode is
2425 when PC_Cancel =>
2426 Append (Result, "Cancel");
2428 when PC_Alt => Alt : declare
2430 Elmts_In_L : constant IndexT := E.Pthen.Index - E.Alt.Index;
2431 -- Number of elements in left pattern of alternation.
2433 Lowest_In_L : constant IndexT := E.Index - Elmts_In_L;
2434 -- Number of lowest index in elements of left pattern
2436 E1 : PE_Ptr;
2438 begin
2439 -- The successor of the alternation node must have a lower
2440 -- index than any node that is in the left pattern or a
2441 -- higher index than the alternation node itself.
2443 while ER /= EOP
2444 and then ER.Index >= Lowest_In_L
2445 and then ER.Index < E.Index
2446 loop
2447 ER := ER.Pthen;
2448 end loop;
2450 Append (Result, '(');
2452 E1 := E;
2453 loop
2454 Image_Seq (E1.Pthen, ER, False);
2455 Append (Result, " or ");
2456 E1 := E1.Alt;
2457 exit when E1.Pcode /= PC_Alt;
2458 end loop;
2460 Image_Seq (E1, ER, False);
2461 Append (Result, ')');
2462 end Alt;
2464 when PC_Any_CS =>
2465 Append (Result, "Any (" & Image (To_Sequence (E.CS)) & ')');
2467 when PC_Any_VF =>
2468 Append (Result, "Any (" & Str_VF (E.VF) & ')');
2470 when PC_Any_VP =>
2471 Append (Result, "Any (" & Str_VP (E.VP) & ')');
2473 when PC_Arb_X =>
2474 Append (Result, "Arb");
2476 when PC_Arbno_S =>
2477 Append (Result, "Arbno (");
2478 Image_Seq (E.Alt, E, False);
2479 Append (Result, ')');
2481 when PC_Arbno_X =>
2482 Append (Result, "Arbno (");
2483 Image_Seq (E.Alt.Pthen, Refs (E.Index - 2), False);
2484 Append (Result, ')');
2486 when PC_Assign_Imm =>
2487 Delete_Ampersand;
2488 Append (Result, "* " & Str_VP (Refs (E.Index - 1).VP));
2490 when PC_Assign_OnM =>
2491 Delete_Ampersand;
2492 Append (Result, "** " & Str_VP (Refs (E.Index - 1).VP));
2494 when PC_Any_CH =>
2495 Append (Result, "Any ('" & E.Char & "')");
2497 when PC_Bal =>
2498 Append (Result, "Bal");
2500 when PC_Break_CH =>
2501 Append (Result, "Break ('" & E.Char & "')");
2503 when PC_Break_CS =>
2504 Append (Result, "Break (" & Image (To_Sequence (E.CS)) & ')');
2506 when PC_Break_VF =>
2507 Append (Result, "Break (" & Str_VF (E.VF) & ')');
2509 when PC_Break_VP =>
2510 Append (Result, "Break (" & Str_VP (E.VP) & ')');
2512 when PC_BreakX_CH =>
2513 Append (Result, "BreakX ('" & E.Char & "')");
2514 ER := ER.Pthen;
2516 when PC_BreakX_CS =>
2517 Append (Result, "BreakX (" & Image (To_Sequence (E.CS)) & ')');
2518 ER := ER.Pthen;
2520 when PC_BreakX_VF =>
2521 Append (Result, "BreakX (" & Str_VF (E.VF) & ')');
2522 ER := ER.Pthen;
2524 when PC_BreakX_VP =>
2525 Append (Result, "BreakX (" & Str_VP (E.VP) & ')');
2526 ER := ER.Pthen;
2528 when PC_Char =>
2529 Append (Result, ''' & E.Char & ''');
2531 when PC_Fail =>
2532 Append (Result, "Fail");
2534 when PC_Fence =>
2535 Append (Result, "Fence");
2537 when PC_Fence_X =>
2538 Append (Result, "Fence (");
2539 Image_Seq (E.Pthen, Refs (E.Index - 1), False);
2540 Append (Result, ")");
2541 ER := Refs (E.Index - 1).Pthen;
2543 when PC_Len_Nat =>
2544 Append (Result, "Len (" & E.Nat & ')');
2546 when PC_Len_NF =>
2547 Append (Result, "Len (" & Str_NF (E.NF) & ')');
2549 when PC_Len_NP =>
2550 Append (Result, "Len (" & Str_NP (E.NP) & ')');
2552 when PC_NotAny_CH =>
2553 Append (Result, "NotAny ('" & E.Char & "')");
2555 when PC_NotAny_CS =>
2556 Append (Result, "NotAny (" & Image (To_Sequence (E.CS)) & ')');
2558 when PC_NotAny_VF =>
2559 Append (Result, "NotAny (" & Str_VF (E.VF) & ')');
2561 when PC_NotAny_VP =>
2562 Append (Result, "NotAny (" & Str_VP (E.VP) & ')');
2564 when PC_NSpan_CH =>
2565 Append (Result, "NSpan ('" & E.Char & "')");
2567 when PC_NSpan_CS =>
2568 Append (Result, "NSpan (" & Image (To_Sequence (E.CS)) & ')');
2570 when PC_NSpan_VF =>
2571 Append (Result, "NSpan (" & Str_VF (E.VF) & ')');
2573 when PC_NSpan_VP =>
2574 Append (Result, "NSpan (" & Str_VP (E.VP) & ')');
2576 when PC_Null =>
2577 Append (Result, """""");
2579 when PC_Pos_Nat =>
2580 Append (Result, "Pos (" & E.Nat & ')');
2582 when PC_Pos_NF =>
2583 Append (Result, "Pos (" & Str_NF (E.NF) & ')');
2585 when PC_Pos_NP =>
2586 Append (Result, "Pos (" & Str_NP (E.NP) & ')');
2588 when PC_R_Enter =>
2589 Kill_Ampersand := True;
2591 when PC_Rest =>
2592 Append (Result, "Rest");
2594 when PC_Rpat =>
2595 Append (Result, "(+ " & Str_PP (E.PP) & ')');
2597 when PC_Pred_Func =>
2598 Append (Result, "(+ " & Str_BF (E.BF) & ')');
2600 when PC_RPos_Nat =>
2601 Append (Result, "RPos (" & E.Nat & ')');
2603 when PC_RPos_NF =>
2604 Append (Result, "RPos (" & Str_NF (E.NF) & ')');
2606 when PC_RPos_NP =>
2607 Append (Result, "RPos (" & Str_NP (E.NP) & ')');
2609 when PC_RTab_Nat =>
2610 Append (Result, "RTab (" & E.Nat & ')');
2612 when PC_RTab_NF =>
2613 Append (Result, "RTab (" & Str_NF (E.NF) & ')');
2615 when PC_RTab_NP =>
2616 Append (Result, "RTab (" & Str_NP (E.NP) & ')');
2618 when PC_Setcur =>
2619 Append (Result, "Setcur (" & Str_NP (E.Var) & ')');
2621 when PC_Span_CH =>
2622 Append (Result, "Span ('" & E.Char & "')");
2624 when PC_Span_CS =>
2625 Append (Result, "Span (" & Image (To_Sequence (E.CS)) & ')');
2627 when PC_Span_VF =>
2628 Append (Result, "Span (" & Str_VF (E.VF) & ')');
2630 when PC_Span_VP =>
2631 Append (Result, "Span (" & Str_VP (E.VP) & ')');
2633 when PC_String =>
2634 Append (Result, Image (E.Str.all));
2636 when PC_String_2 =>
2637 Append (Result, Image (E.Str2));
2639 when PC_String_3 =>
2640 Append (Result, Image (E.Str3));
2642 when PC_String_4 =>
2643 Append (Result, Image (E.Str4));
2645 when PC_String_5 =>
2646 Append (Result, Image (E.Str5));
2648 when PC_String_6 =>
2649 Append (Result, Image (E.Str6));
2651 when PC_String_VF =>
2652 Append (Result, "(+" & Str_VF (E.VF) & ')');
2654 when PC_String_VP =>
2655 Append (Result, "(+" & Str_VP (E.VP) & ')');
2657 when PC_Succeed =>
2658 Append (Result, "Succeed");
2660 when PC_Tab_Nat =>
2661 Append (Result, "Tab (" & E.Nat & ')');
2663 when PC_Tab_NF =>
2664 Append (Result, "Tab (" & Str_NF (E.NF) & ')');
2666 when PC_Tab_NP =>
2667 Append (Result, "Tab (" & Str_NP (E.NP) & ')');
2669 when PC_Write_Imm =>
2670 Append (Result, '(');
2671 Image_Seq (E, Refs (E.Index - 1), True);
2672 Append (Result, " * " & Str_FP (Refs (E.Index - 1).FP));
2673 ER := Refs (E.Index - 1).Pthen;
2675 when PC_Write_OnM =>
2676 Append (Result, '(');
2677 Image_Seq (E.Pthen, Refs (E.Index - 1), True);
2678 Append (Result, " ** " & Str_FP (Refs (E.Index - 1).FP));
2679 ER := Refs (E.Index - 1).Pthen;
2681 -- Other pattern codes should not appear as leading elements
2683 when PC_Arb_Y |
2684 PC_Arbno_Y |
2685 PC_Assign |
2686 PC_BreakX_X |
2687 PC_EOP |
2688 PC_Fence_Y |
2689 PC_R_Remove |
2690 PC_R_Restore |
2691 PC_Unanchored =>
2692 Append (Result, "???");
2694 end case;
2696 E := ER;
2697 end Image_One;
2699 ---------------
2700 -- Image_Seq --
2701 ---------------
2703 procedure Image_Seq (E : PE_Ptr; Succ : PE_Ptr; Paren : Boolean) is
2704 Indx : constant Natural := Length (Result);
2705 E1 : PE_Ptr := E;
2706 Mult : Boolean := False;
2708 begin
2709 -- The image of EOP is "" (the null string)
2711 if E = EOP then
2712 Append (Result, """""");
2714 -- Else generate appropriate concatenation sequence
2716 else
2717 loop
2718 Image_One (E1);
2719 exit when E1 = Succ;
2720 exit when E1 = EOP;
2721 Mult := True;
2723 if Kill_Ampersand then
2724 Kill_Ampersand := False;
2725 else
2726 Append (Result, " & ");
2727 end if;
2728 end loop;
2729 end if;
2731 if Mult and Paren then
2732 Insert (Result, Indx + 1, "(");
2733 Append (Result, ")");
2734 end if;
2735 end Image_Seq;
2737 -- Start of processing for Image
2739 begin
2740 Build_Ref_Array (P.P, Refs);
2741 Image_Seq (P.P, EOP, False);
2742 return Result;
2743 end Image;
2745 -----------
2746 -- Is_In --
2747 -----------
2749 function Is_In (C : Character; Str : String) return Boolean is
2750 begin
2751 for J in Str'Range loop
2752 if Str (J) = C then
2753 return True;
2754 end if;
2755 end loop;
2757 return False;
2758 end Is_In;
2760 ---------
2761 -- Len --
2762 ---------
2764 function Len (Count : Natural) return Pattern is
2765 begin
2766 -- Note, the following is not just an optimization, it is needed
2767 -- to ensure that Arbno (Len (0)) does not generate an infinite
2768 -- matching loop (since PC_Len_Nat is OK_For_Simple_Arbno).
2770 if Count = 0 then
2771 return (AFC with 0, new PE'(PC_Null, 1, EOP));
2773 else
2774 return (AFC with 0, new PE'(PC_Len_Nat, 1, EOP, Count));
2775 end if;
2776 end Len;
2778 function Len (Count : Natural_Func) return Pattern is
2779 begin
2780 return (AFC with 0, new PE'(PC_Len_NF, 1, EOP, Count));
2781 end Len;
2783 function Len (Count : access Natural) return Pattern is
2784 begin
2785 return (AFC with 0, new PE'(PC_Len_NP, 1, EOP, Natural_Ptr (Count)));
2786 end Len;
2788 -----------------
2789 -- Logic_Error --
2790 -----------------
2792 procedure Logic_Error is
2793 begin
2794 Raise_Exception
2795 (Program_Error'Identity,
2796 "Internal logic error in GNAT.Spitbol.Patterns");
2797 end Logic_Error;
2799 -----------
2800 -- Match --
2801 -----------
2803 function Match
2804 (Subject : VString;
2805 Pat : Pattern)
2806 return Boolean
2808 Start, Stop : Natural;
2810 begin
2811 if Debug_Mode then
2812 XMatchD (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
2813 else
2814 XMatch (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
2815 end if;
2817 return Start /= 0;
2818 end Match;
2820 function Match
2821 (Subject : String;
2822 Pat : Pattern)
2823 return Boolean
2825 Start, Stop : Natural;
2826 subtype String1 is String (1 .. Subject'Length);
2828 begin
2829 if Debug_Mode then
2830 XMatchD (String1 (Subject), Pat.P, Pat.Stk, Start, Stop);
2831 else
2832 XMatch (String1 (Subject), Pat.P, Pat.Stk, Start, Stop);
2833 end if;
2835 return Start /= 0;
2836 end Match;
2838 function Match
2839 (Subject : VString_Var;
2840 Pat : Pattern;
2841 Replace : VString)
2842 return Boolean
2844 Start, Stop : Natural;
2846 begin
2847 if Debug_Mode then
2848 XMatchD (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
2849 else
2850 XMatch (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
2851 end if;
2853 if Start = 0 then
2854 return False;
2855 else
2856 Replace_Slice
2857 (Subject'Unrestricted_Access.all,
2858 Start, Stop, Get_String (Replace).all);
2859 return True;
2860 end if;
2861 end Match;
2863 function Match
2864 (Subject : VString_Var;
2865 Pat : Pattern;
2866 Replace : String)
2867 return Boolean
2869 Start, Stop : Natural;
2871 begin
2872 if Debug_Mode then
2873 XMatchD (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
2874 else
2875 XMatch (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
2876 end if;
2878 if Start = 0 then
2879 return False;
2880 else
2881 Replace_Slice
2882 (Subject'Unrestricted_Access.all, Start, Stop, Replace);
2883 return True;
2884 end if;
2885 end Match;
2887 procedure Match
2888 (Subject : VString;
2889 Pat : Pattern)
2891 Start, Stop : Natural;
2893 begin
2894 if Debug_Mode then
2895 XMatchD (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
2896 else
2897 XMatch (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
2898 end if;
2900 end Match;
2902 procedure Match
2903 (Subject : String;
2904 Pat : Pattern)
2906 Start, Stop : Natural;
2907 subtype String1 is String (1 .. Subject'Length);
2908 begin
2909 if Debug_Mode then
2910 XMatchD (String1 (Subject), Pat.P, Pat.Stk, Start, Stop);
2911 else
2912 XMatch (String1 (Subject), Pat.P, Pat.Stk, Start, Stop);
2913 end if;
2914 end Match;
2916 procedure Match
2917 (Subject : in out VString;
2918 Pat : Pattern;
2919 Replace : VString)
2921 Start, Stop : Natural;
2923 begin
2924 if Debug_Mode then
2925 XMatchD (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
2926 else
2927 XMatch (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
2928 end if;
2930 if Start /= 0 then
2931 Replace_Slice (Subject, Start, Stop, Get_String (Replace).all);
2932 end if;
2933 end Match;
2935 procedure Match
2936 (Subject : in out VString;
2937 Pat : Pattern;
2938 Replace : String)
2940 Start, Stop : Natural;
2942 begin
2943 if Debug_Mode then
2944 XMatchD (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
2945 else
2946 XMatch (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
2947 end if;
2949 if Start /= 0 then
2950 Replace_Slice (Subject, Start, Stop, Replace);
2951 end if;
2952 end Match;
2954 function Match
2955 (Subject : VString;
2956 Pat : PString)
2957 return Boolean
2959 Pat_Len : constant Natural := Pat'Length;
2960 Sub_Len : constant Natural := Length (Subject);
2961 Sub_Str : constant String_Access := Get_String (Subject);
2963 begin
2964 if Anchored_Mode then
2965 if Pat_Len > Sub_Len then
2966 return False;
2967 else
2968 return Pat = Sub_Str.all (1 .. Pat_Len);
2969 end if;
2971 else
2972 for J in 1 .. Sub_Len - Pat_Len + 1 loop
2973 if Pat = Sub_Str.all (J .. J + (Pat_Len - 1)) then
2974 return True;
2975 end if;
2976 end loop;
2978 return False;
2979 end if;
2980 end Match;
2982 function Match
2983 (Subject : String;
2984 Pat : PString)
2985 return Boolean
2987 Pat_Len : constant Natural := Pat'Length;
2988 Sub_Len : constant Natural := Subject'Length;
2989 SFirst : constant Natural := Subject'First;
2991 begin
2992 if Anchored_Mode then
2993 if Pat_Len > Sub_Len then
2994 return False;
2995 else
2996 return Pat = Subject (SFirst .. SFirst + Pat_Len - 1);
2997 end if;
2999 else
3000 for J in SFirst .. SFirst + Sub_Len - Pat_Len loop
3001 if Pat = Subject (J .. J + (Pat_Len - 1)) then
3002 return True;
3003 end if;
3004 end loop;
3006 return False;
3007 end if;
3008 end Match;
3010 function Match
3011 (Subject : VString_Var;
3012 Pat : PString;
3013 Replace : VString)
3014 return Boolean
3016 Start, Stop : Natural;
3018 begin
3019 if Debug_Mode then
3020 XMatchD (Get_String (Subject).all, S_To_PE (Pat), 0, Start, Stop);
3021 else
3022 XMatch (Get_String (Subject).all, S_To_PE (Pat), 0, Start, Stop);
3023 end if;
3025 if Start = 0 then
3026 return False;
3027 else
3028 Replace_Slice
3029 (Subject'Unrestricted_Access.all,
3030 Start, Stop, Get_String (Replace).all);
3031 return True;
3032 end if;
3033 end Match;
3035 function Match
3036 (Subject : VString_Var;
3037 Pat : PString;
3038 Replace : String)
3039 return Boolean
3041 Start, Stop : Natural;
3043 begin
3044 if Debug_Mode then
3045 XMatchD (Get_String (Subject).all, S_To_PE (Pat), 0, Start, Stop);
3046 else
3047 XMatch (Get_String (Subject).all, S_To_PE (Pat), 0, Start, Stop);
3048 end if;
3050 if Start = 0 then
3051 return False;
3052 else
3053 Replace_Slice
3054 (Subject'Unrestricted_Access.all, Start, Stop, Replace);
3055 return True;
3056 end if;
3057 end Match;
3059 procedure Match
3060 (Subject : VString;
3061 Pat : PString)
3063 Start, Stop : Natural;
3065 begin
3066 if Debug_Mode then
3067 XMatchD (Get_String (Subject).all, S_To_PE (Pat), 0, Start, Stop);
3068 else
3069 XMatch (Get_String (Subject).all, S_To_PE (Pat), 0, Start, Stop);
3070 end if;
3071 end Match;
3073 procedure Match
3074 (Subject : String;
3075 Pat : PString)
3077 Start, Stop : Natural;
3078 subtype String1 is String (1 .. Subject'Length);
3080 begin
3081 if Debug_Mode then
3082 XMatchD (String1 (Subject), S_To_PE (Pat), 0, Start, Stop);
3083 else
3084 XMatch (String1 (Subject), S_To_PE (Pat), 0, Start, Stop);
3085 end if;
3086 end Match;
3088 procedure Match
3089 (Subject : in out VString;
3090 Pat : PString;
3091 Replace : VString)
3093 Start, Stop : Natural;
3095 begin
3096 if Debug_Mode then
3097 XMatchD (Get_String (Subject).all, S_To_PE (Pat), 0, Start, Stop);
3098 else
3099 XMatch (Get_String (Subject).all, S_To_PE (Pat), 0, Start, Stop);
3100 end if;
3102 if Start /= 0 then
3103 Replace_Slice (Subject, Start, Stop, Get_String (Replace).all);
3104 end if;
3105 end Match;
3107 procedure Match
3108 (Subject : in out VString;
3109 Pat : PString;
3110 Replace : String)
3112 Start, Stop : Natural;
3114 begin
3115 if Debug_Mode then
3116 XMatchD (Get_String (Subject).all, S_To_PE (Pat), 0, Start, Stop);
3117 else
3118 XMatch (Get_String (Subject).all, S_To_PE (Pat), 0, Start, Stop);
3119 end if;
3121 if Start /= 0 then
3122 Replace_Slice (Subject, Start, Stop, Replace);
3123 end if;
3124 end Match;
3126 function Match
3127 (Subject : VString_Var;
3128 Pat : Pattern;
3129 Result : Match_Result_Var)
3130 return Boolean
3132 Start, Stop : Natural;
3134 begin
3135 if Debug_Mode then
3136 XMatchD (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
3137 else
3138 XMatch (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
3139 end if;
3141 if Start = 0 then
3142 Result'Unrestricted_Access.all.Var := null;
3143 return False;
3145 else
3146 Result'Unrestricted_Access.all.Var := Subject'Unrestricted_Access;
3147 Result'Unrestricted_Access.all.Start := Start;
3148 Result'Unrestricted_Access.all.Stop := Stop;
3149 return True;
3150 end if;
3151 end Match;
3153 procedure Match
3154 (Subject : in out VString;
3155 Pat : Pattern;
3156 Result : out Match_Result)
3158 Start, Stop : Natural;
3160 begin
3161 if Debug_Mode then
3162 XMatchD (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
3163 else
3164 XMatch (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
3165 end if;
3167 if Start = 0 then
3168 Result.Var := null;
3170 else
3171 Result.Var := Subject'Unrestricted_Access;
3172 Result.Start := Start;
3173 Result.Stop := Stop;
3174 end if;
3175 end Match;
3177 ---------------
3178 -- New_LineD --
3179 ---------------
3181 procedure New_LineD is
3182 begin
3183 if Internal_Debug then
3184 New_Line;
3185 end if;
3186 end New_LineD;
3188 ------------
3189 -- NotAny --
3190 ------------
3192 function NotAny (Str : String) return Pattern is
3193 begin
3194 return (AFC with 0, new PE'(PC_NotAny_CS, 1, EOP, To_Set (Str)));
3195 end NotAny;
3197 function NotAny (Str : VString) return Pattern is
3198 begin
3199 return NotAny (S (Str));
3200 end NotAny;
3202 function NotAny (Str : Character) return Pattern is
3203 begin
3204 return (AFC with 0, new PE'(PC_NotAny_CH, 1, EOP, Str));
3205 end NotAny;
3207 function NotAny (Str : Character_Set) return Pattern is
3208 begin
3209 return (AFC with 0, new PE'(PC_NotAny_CS, 1, EOP, Str));
3210 end NotAny;
3212 function NotAny (Str : access VString) return Pattern is
3213 begin
3214 return (AFC with 0, new PE'(PC_NotAny_VP, 1, EOP, VString_Ptr (Str)));
3215 end NotAny;
3217 function NotAny (Str : VString_Func) return Pattern is
3218 begin
3219 return (AFC with 0, new PE'(PC_NotAny_VF, 1, EOP, Str));
3220 end NotAny;
3222 -----------
3223 -- NSpan --
3224 -----------
3226 function NSpan (Str : String) return Pattern is
3227 begin
3228 return (AFC with 0, new PE'(PC_NSpan_CS, 1, EOP, To_Set (Str)));
3229 end NSpan;
3231 function NSpan (Str : VString) return Pattern is
3232 begin
3233 return NSpan (S (Str));
3234 end NSpan;
3236 function NSpan (Str : Character) return Pattern is
3237 begin
3238 return (AFC with 0, new PE'(PC_NSpan_CH, 1, EOP, Str));
3239 end NSpan;
3241 function NSpan (Str : Character_Set) return Pattern is
3242 begin
3243 return (AFC with 0, new PE'(PC_NSpan_CS, 1, EOP, Str));
3244 end NSpan;
3246 function NSpan (Str : access VString) return Pattern is
3247 begin
3248 return (AFC with 0, new PE'(PC_NSpan_VP, 1, EOP, VString_Ptr (Str)));
3249 end NSpan;
3251 function NSpan (Str : VString_Func) return Pattern is
3252 begin
3253 return (AFC with 0, new PE'(PC_NSpan_VF, 1, EOP, Str));
3254 end NSpan;
3256 ---------
3257 -- Pos --
3258 ---------
3260 function Pos (Count : Natural) return Pattern is
3261 begin
3262 return (AFC with 0, new PE'(PC_Pos_Nat, 1, EOP, Count));
3263 end Pos;
3265 function Pos (Count : Natural_Func) return Pattern is
3266 begin
3267 return (AFC with 0, new PE'(PC_Pos_NF, 1, EOP, Count));
3268 end Pos;
3270 function Pos (Count : access Natural) return Pattern is
3271 begin
3272 return (AFC with 0, new PE'(PC_Pos_NP, 1, EOP, Natural_Ptr (Count)));
3273 end Pos;
3275 ----------
3276 -- PutD --
3277 ----------
3279 procedure PutD (Str : String) is
3280 begin
3281 if Internal_Debug then
3282 Put (Str);
3283 end if;
3284 end PutD;
3286 ---------------
3287 -- Put_LineD --
3288 ---------------
3290 procedure Put_LineD (Str : String) is
3291 begin
3292 if Internal_Debug then
3293 Put_Line (Str);
3294 end if;
3295 end Put_LineD;
3297 -------------
3298 -- Replace --
3299 -------------
3301 procedure Replace
3302 (Result : in out Match_Result;
3303 Replace : VString)
3305 begin
3306 if Result.Var /= null then
3307 Replace_Slice
3308 (Result.Var.all,
3309 Result.Start,
3310 Result.Stop,
3311 Get_String (Replace).all);
3312 Result.Var := null;
3313 end if;
3314 end Replace;
3316 ----------
3317 -- Rest --
3318 ----------
3320 function Rest return Pattern is
3321 begin
3322 return (AFC with 0, new PE'(PC_Rest, 1, EOP));
3323 end Rest;
3325 ----------
3326 -- Rpos --
3327 ----------
3329 function Rpos (Count : Natural) return Pattern is
3330 begin
3331 return (AFC with 0, new PE'(PC_RPos_Nat, 1, EOP, Count));
3332 end Rpos;
3334 function Rpos (Count : Natural_Func) return Pattern is
3335 begin
3336 return (AFC with 0, new PE'(PC_RPos_NF, 1, EOP, Count));
3337 end Rpos;
3339 function Rpos (Count : access Natural) return Pattern is
3340 begin
3341 return (AFC with 0, new PE'(PC_RPos_NP, 1, EOP, Natural_Ptr (Count)));
3342 end Rpos;
3344 ----------
3345 -- Rtab --
3346 ----------
3348 function Rtab (Count : Natural) return Pattern is
3349 begin
3350 return (AFC with 0, new PE'(PC_RTab_Nat, 1, EOP, Count));
3351 end Rtab;
3353 function Rtab (Count : Natural_Func) return Pattern is
3354 begin
3355 return (AFC with 0, new PE'(PC_RTab_NF, 1, EOP, Count));
3356 end Rtab;
3358 function Rtab (Count : access Natural) return Pattern is
3359 begin
3360 return (AFC with 0, new PE'(PC_RTab_NP, 1, EOP, Natural_Ptr (Count)));
3361 end Rtab;
3363 -------------
3364 -- S_To_PE --
3365 -------------
3367 function S_To_PE (Str : PString) return PE_Ptr is
3368 Len : constant Natural := Str'Length;
3370 begin
3371 case Len is
3372 when 0 =>
3373 return new PE'(PC_Null, 1, EOP);
3375 when 1 =>
3376 return new PE'(PC_Char, 1, EOP, Str (1));
3378 when 2 =>
3379 return new PE'(PC_String_2, 1, EOP, Str);
3381 when 3 =>
3382 return new PE'(PC_String_3, 1, EOP, Str);
3384 when 4 =>
3385 return new PE'(PC_String_4, 1, EOP, Str);
3387 when 5 =>
3388 return new PE'(PC_String_5, 1, EOP, Str);
3390 when 6 =>
3391 return new PE'(PC_String_6, 1, EOP, Str);
3393 when others =>
3394 return new PE'(PC_String, 1, EOP, new String'(Str));
3396 end case;
3397 end S_To_PE;
3399 -------------------
3400 -- Set_Successor --
3401 -------------------
3403 -- Note: this procedure is not used by the normal concatenation circuit,
3404 -- since other fixups are required on the left operand in this case, and
3405 -- they might as well be done all together.
3407 procedure Set_Successor (Pat : PE_Ptr; Succ : PE_Ptr) is
3408 begin
3409 if Pat = null then
3410 Uninitialized_Pattern;
3412 elsif Pat = EOP then
3413 Logic_Error;
3415 else
3416 declare
3417 Refs : Ref_Array (1 .. Pat.Index);
3418 -- We build a reference array for L whose N'th element points to
3419 -- the pattern element of L whose original Index value is N.
3421 P : PE_Ptr;
3423 begin
3424 Build_Ref_Array (Pat, Refs);
3426 for J in Refs'Range loop
3427 P := Refs (J);
3429 if P.Pthen = EOP then
3430 P.Pthen := Succ;
3431 end if;
3433 if P.Pcode in PC_Has_Alt and then P.Alt = EOP then
3434 P.Alt := Succ;
3435 end if;
3436 end loop;
3437 end;
3438 end if;
3439 end Set_Successor;
3441 ------------
3442 -- Setcur --
3443 ------------
3445 function Setcur (Var : access Natural) return Pattern is
3446 begin
3447 return (AFC with 0, new PE'(PC_Setcur, 1, EOP, Natural_Ptr (Var)));
3448 end Setcur;
3450 ----------
3451 -- Span --
3452 ----------
3454 function Span (Str : String) return Pattern is
3455 begin
3456 return (AFC with 0, new PE'(PC_Span_CS, 1, EOP, To_Set (Str)));
3457 end Span;
3459 function Span (Str : VString) return Pattern is
3460 begin
3461 return Span (S (Str));
3462 end Span;
3464 function Span (Str : Character) return Pattern is
3465 begin
3466 return (AFC with 0, new PE'(PC_Span_CH, 1, EOP, Str));
3467 end Span;
3469 function Span (Str : Character_Set) return Pattern is
3470 begin
3471 return (AFC with 0, new PE'(PC_Span_CS, 1, EOP, Str));
3472 end Span;
3474 function Span (Str : access VString) return Pattern is
3475 begin
3476 return (AFC with 0, new PE'(PC_Span_VP, 1, EOP, VString_Ptr (Str)));
3477 end Span;
3479 function Span (Str : VString_Func) return Pattern is
3480 begin
3481 return (AFC with 0, new PE'(PC_Span_VF, 1, EOP, Str));
3482 end Span;
3484 ------------
3485 -- Str_BF --
3486 ------------
3488 function Str_BF (A : Boolean_Func) return String is
3489 function To_A is new Unchecked_Conversion (Boolean_Func, Address);
3491 begin
3492 return "BF(" & Image (To_A (A)) & ')';
3493 end Str_BF;
3495 ------------
3496 -- Str_FP --
3497 ------------
3499 function Str_FP (A : File_Ptr) return String is
3500 begin
3501 return "FP(" & Image (A.all'Address) & ')';
3502 end Str_FP;
3504 ------------
3505 -- Str_NF --
3506 ------------
3508 function Str_NF (A : Natural_Func) return String is
3509 function To_A is new Unchecked_Conversion (Natural_Func, Address);
3511 begin
3512 return "NF(" & Image (To_A (A)) & ')';
3513 end Str_NF;
3515 ------------
3516 -- Str_NP --
3517 ------------
3519 function Str_NP (A : Natural_Ptr) return String is
3520 begin
3521 return "NP(" & Image (A.all'Address) & ')';
3522 end Str_NP;
3524 ------------
3525 -- Str_PP --
3526 ------------
3528 function Str_PP (A : Pattern_Ptr) return String is
3529 begin
3530 return "PP(" & Image (A.all'Address) & ')';
3531 end Str_PP;
3533 ------------
3534 -- Str_VF --
3535 ------------
3537 function Str_VF (A : VString_Func) return String is
3538 function To_A is new Unchecked_Conversion (VString_Func, Address);
3540 begin
3541 return "VF(" & Image (To_A (A)) & ')';
3542 end Str_VF;
3544 ------------
3545 -- Str_VP --
3546 ------------
3548 function Str_VP (A : VString_Ptr) return String is
3549 begin
3550 return "VP(" & Image (A.all'Address) & ')';
3551 end Str_VP;
3553 -------------
3554 -- Succeed --
3555 -------------
3557 function Succeed return Pattern is
3558 begin
3559 return (AFC with 1, new PE'(PC_Succeed, 1, EOP));
3560 end Succeed;
3562 ---------
3563 -- Tab --
3564 ---------
3566 function Tab (Count : Natural) return Pattern is
3567 begin
3568 return (AFC with 0, new PE'(PC_Tab_Nat, 1, EOP, Count));
3569 end Tab;
3571 function Tab (Count : Natural_Func) return Pattern is
3572 begin
3573 return (AFC with 0, new PE'(PC_Tab_NF, 1, EOP, Count));
3574 end Tab;
3576 function Tab (Count : access Natural) return Pattern is
3577 begin
3578 return (AFC with 0, new PE'(PC_Tab_NP, 1, EOP, Natural_Ptr (Count)));
3579 end Tab;
3581 ---------------------------
3582 -- Uninitialized_Pattern --
3583 ---------------------------
3585 procedure Uninitialized_Pattern is
3586 begin
3587 Raise_Exception
3588 (Program_Error'Identity,
3589 "uninitialized value of type GNAT.Spitbol.Patterns.Pattern");
3590 end Uninitialized_Pattern;
3592 ------------
3593 -- XMatch --
3594 ------------
3596 procedure XMatch
3597 (Subject : String;
3598 Pat_P : PE_Ptr;
3599 Pat_S : Natural;
3600 Start : out Natural;
3601 Stop : out Natural)
3603 Node : PE_Ptr;
3604 -- Pointer to current pattern node. Initialized from Pat_P, and then
3605 -- updated as the match proceeds through its constituent elements.
3607 Length : constant Natural := Subject'Length;
3608 -- Length of string (= Subject'Last, since Subject'First is always 1)
3610 Cursor : Integer := 0;
3611 -- If the value is non-negative, then this value is the index showing
3612 -- the current position of the match in the subject string. The next
3613 -- character to be matched is at Subject (Cursor + 1). Note that since
3614 -- our view of the subject string in XMatch always has a lower bound
3615 -- of one, regardless of original bounds, that this definition exactly
3616 -- corresponds to the cursor value as referenced by functions like Pos.
3618 -- If the value is negative, then this is a saved stack pointer,
3619 -- typically a base pointer of an inner or outer region. Cursor
3620 -- temporarily holds such a value when it is popped from the stack
3621 -- by Fail. In all cases, Cursor is reset to a proper non-negative
3622 -- cursor value before the match proceeds (e.g. by propagating the
3623 -- failure and popping a "real" cursor value from the stack.
3625 PE_Unanchored : aliased PE := (PC_Unanchored, 0, Pat_P);
3626 -- Dummy pattern element used in the unanchored case.
3628 Stack : Stack_Type;
3629 -- The pattern matching failure stack for this call to Match
3631 Stack_Ptr : Stack_Range;
3632 -- Current stack pointer. This points to the top element of the stack
3633 -- that is currently in use. At the outer level this is the special
3634 -- entry placed on the stack according to the anchor mode.
3636 Stack_Init : constant Stack_Range := Stack'First + 1;
3637 -- This is the initial value of the Stack_Ptr and Stack_Base. The
3638 -- initial (Stack'First) element of the stack is not used so that
3639 -- when we pop the last element off, Stack_Ptr is still in range.
3641 Stack_Base : Stack_Range;
3642 -- This value is the stack base value, i.e. the stack pointer for the
3643 -- first history stack entry in the current stack region. See separate
3644 -- section on handling of recursive pattern matches.
3646 Assign_OnM : Boolean := False;
3647 -- Set True if assign-on-match or write-on-match operations may be
3648 -- present in the history stack, which must then be scanned on a
3649 -- successful match.
3651 procedure Pop_Region;
3652 pragma Inline (Pop_Region);
3653 -- Used at the end of processing of an inner region. if the inner
3654 -- region left no stack entries, then all trace of it is removed.
3655 -- Otherwise a PC_Restore_Region entry is pushed to ensure proper
3656 -- handling of alternatives in the inner region.
3658 procedure Push (Node : PE_Ptr);
3659 pragma Inline (Push);
3660 -- Make entry in pattern matching stack with current cursor valeu
3662 procedure Push_Region;
3663 pragma Inline (Push_Region);
3664 -- This procedure makes a new region on the history stack. The
3665 -- caller first establishes the special entry on the stack, but
3666 -- does not push the stack pointer. Then this call stacks a
3667 -- PC_Remove_Region node, on top of this entry, using the cursor
3668 -- field of the PC_Remove_Region entry to save the outer level
3669 -- stack base value, and resets the stack base to point to this
3670 -- PC_Remove_Region node.
3672 ----------------
3673 -- Pop_Region --
3674 ----------------
3676 procedure Pop_Region is
3677 begin
3678 -- If nothing was pushed in the inner region, we can just get
3679 -- rid of it entirely, leaving no traces that it was ever there
3681 if Stack_Ptr = Stack_Base then
3682 Stack_Ptr := Stack_Base - 2;
3683 Stack_Base := Stack (Stack_Ptr + 2).Cursor;
3685 -- If stuff was pushed in the inner region, then we have to
3686 -- push a PC_R_Restore node so that we properly handle possible
3687 -- rematches within the region.
3689 else
3690 Stack_Ptr := Stack_Ptr + 1;
3691 Stack (Stack_Ptr).Cursor := Stack_Base;
3692 Stack (Stack_Ptr).Node := CP_R_Restore'Access;
3693 Stack_Base := Stack (Stack_Base).Cursor;
3694 end if;
3695 end Pop_Region;
3697 ----------
3698 -- Push --
3699 ----------
3701 procedure Push (Node : PE_Ptr) is
3702 begin
3703 Stack_Ptr := Stack_Ptr + 1;
3704 Stack (Stack_Ptr).Cursor := Cursor;
3705 Stack (Stack_Ptr).Node := Node;
3706 end Push;
3708 -----------------
3709 -- Push_Region --
3710 -----------------
3712 procedure Push_Region is
3713 begin
3714 Stack_Ptr := Stack_Ptr + 2;
3715 Stack (Stack_Ptr).Cursor := Stack_Base;
3716 Stack (Stack_Ptr).Node := CP_R_Remove'Access;
3717 Stack_Base := Stack_Ptr;
3718 end Push_Region;
3720 -- Start of processing for XMatch
3722 begin
3723 if Pat_P = null then
3724 Uninitialized_Pattern;
3725 end if;
3727 -- Check we have enough stack for this pattern. This check deals with
3728 -- every possibility except a match of a recursive pattern, where we
3729 -- make a check at each recursion level.
3731 if Pat_S >= Stack_Size - 1 then
3732 raise Pattern_Stack_Overflow;
3733 end if;
3735 -- In anchored mode, the bottom entry on the stack is an abort entry
3737 if Anchored_Mode then
3738 Stack (Stack_Init).Node := CP_Cancel'Access;
3739 Stack (Stack_Init).Cursor := 0;
3741 -- In unanchored more, the bottom entry on the stack references
3742 -- the special pattern element PE_Unanchored, whose Pthen field
3743 -- points to the initial pattern element. The cursor value in this
3744 -- entry is the number of anchor moves so far.
3746 else
3747 Stack (Stack_Init).Node := PE_Unanchored'Unchecked_Access;
3748 Stack (Stack_Init).Cursor := 0;
3749 end if;
3751 Stack_Ptr := Stack_Init;
3752 Stack_Base := Stack_Ptr;
3753 Cursor := 0;
3754 Node := Pat_P;
3755 goto Match;
3757 -----------------------------------------
3758 -- Main Pattern Matching State Control --
3759 -----------------------------------------
3761 -- This is a state machine which uses gotos to change state. The
3762 -- initial state is Match, to initiate the matching of the first
3763 -- element, so the goto Match above starts the match. In the
3764 -- following descriptions, we indicate the global values that
3765 -- are relevant for the state transition.
3767 -- Come here if entire match fails
3769 <<Match_Fail>>
3770 Start := 0;
3771 Stop := 0;
3772 return;
3774 -- Come here if entire match succeeds
3776 -- Cursor current position in subject string
3778 <<Match_Succeed>>
3779 Start := Stack (Stack_Init).Cursor + 1;
3780 Stop := Cursor;
3782 -- Scan history stack for deferred assignments or writes
3784 if Assign_OnM then
3785 for S in Stack_Init .. Stack_Ptr loop
3786 if Stack (S).Node = CP_Assign'Access then
3787 declare
3788 Inner_Base : constant Stack_Range :=
3789 Stack (S + 1).Cursor;
3790 Special_Entry : constant Stack_Range :=
3791 Inner_Base - 1;
3792 Node_OnM : constant PE_Ptr :=
3793 Stack (Special_Entry).Node;
3794 Start : constant Natural :=
3795 Stack (Special_Entry).Cursor + 1;
3796 Stop : constant Natural := Stack (S).Cursor;
3798 begin
3799 if Node_OnM.Pcode = PC_Assign_OnM then
3800 Set_String (Node_OnM.VP.all, Subject (Start .. Stop));
3802 elsif Node_OnM.Pcode = PC_Write_OnM then
3803 Put_Line (Node_OnM.FP.all, Subject (Start .. Stop));
3805 else
3806 Logic_Error;
3807 end if;
3808 end;
3809 end if;
3810 end loop;
3811 end if;
3813 return;
3815 -- Come here if attempt to match current element fails
3817 -- Stack_Base current stack base
3818 -- Stack_Ptr current stack pointer
3820 <<Fail>>
3821 Cursor := Stack (Stack_Ptr).Cursor;
3822 Node := Stack (Stack_Ptr).Node;
3823 Stack_Ptr := Stack_Ptr - 1;
3824 goto Match;
3826 -- Come here if attempt to match current element succeeds
3828 -- Cursor current position in subject string
3829 -- Node pointer to node successfully matched
3830 -- Stack_Base current stack base
3831 -- Stack_Ptr current stack pointer
3833 <<Succeed>>
3834 Node := Node.Pthen;
3836 -- Come here to match the next pattern element
3838 -- Cursor current position in subject string
3839 -- Node pointer to node to be matched
3840 -- Stack_Base current stack base
3841 -- Stack_Ptr current stack pointer
3843 <<Match>>
3845 --------------------------------------------------
3846 -- Main Pattern Match Element Matching Routines --
3847 --------------------------------------------------
3849 -- Here is the case statement that processes the current node. The
3850 -- processing for each element does one of five things:
3852 -- goto Succeed to move to the successor
3853 -- goto Match_Succeed if the entire match succeeds
3854 -- goto Match_Fail if the entire match fails
3855 -- goto Fail to signal failure of current match
3857 -- Processing is NOT allowed to fall through
3859 case Node.Pcode is
3861 -- Cancel
3863 when PC_Cancel =>
3864 goto Match_Fail;
3866 -- Alternation
3868 when PC_Alt =>
3869 Push (Node.Alt);
3870 Node := Node.Pthen;
3871 goto Match;
3873 -- Any (one character case)
3875 when PC_Any_CH =>
3876 if Cursor < Length
3877 and then Subject (Cursor + 1) = Node.Char
3878 then
3879 Cursor := Cursor + 1;
3880 goto Succeed;
3881 else
3882 goto Fail;
3883 end if;
3885 -- Any (character set case)
3887 when PC_Any_CS =>
3888 if Cursor < Length
3889 and then Is_In (Subject (Cursor + 1), Node.CS)
3890 then
3891 Cursor := Cursor + 1;
3892 goto Succeed;
3893 else
3894 goto Fail;
3895 end if;
3897 -- Any (string function case)
3899 when PC_Any_VF => declare
3900 U : constant VString := Node.VF.all;
3901 Str : constant String_Access := Get_String (U);
3903 begin
3904 if Cursor < Length
3905 and then Is_In (Subject (Cursor + 1), Str.all)
3906 then
3907 Cursor := Cursor + 1;
3908 goto Succeed;
3909 else
3910 goto Fail;
3911 end if;
3912 end;
3914 -- Any (string pointer case)
3916 when PC_Any_VP => declare
3917 Str : constant String_Access := Get_String (Node.VP.all);
3919 begin
3920 if Cursor < Length
3921 and then Is_In (Subject (Cursor + 1), Str.all)
3922 then
3923 Cursor := Cursor + 1;
3924 goto Succeed;
3925 else
3926 goto Fail;
3927 end if;
3928 end;
3930 -- Arb (initial match)
3932 when PC_Arb_X =>
3933 Push (Node.Alt);
3934 Node := Node.Pthen;
3935 goto Match;
3937 -- Arb (extension)
3939 when PC_Arb_Y =>
3940 if Cursor < Length then
3941 Cursor := Cursor + 1;
3942 Push (Node);
3943 goto Succeed;
3944 else
3945 goto Fail;
3946 end if;
3948 -- Arbno_S (simple Arbno initialize). This is the node that
3949 -- initiates the match of a simple Arbno structure.
3951 when PC_Arbno_S =>
3952 Push (Node.Alt);
3953 Node := Node.Pthen;
3954 goto Match;
3956 -- Arbno_X (Arbno initialize). This is the node that initiates
3957 -- the match of a complex Arbno structure.
3959 when PC_Arbno_X =>
3960 Push (Node.Alt);
3961 Node := Node.Pthen;
3962 goto Match;
3964 -- Arbno_Y (Arbno rematch). This is the node that is executed
3965 -- following successful matching of one instance of a complex
3966 -- Arbno pattern.
3968 when PC_Arbno_Y => declare
3969 Null_Match : constant Boolean :=
3970 Cursor = Stack (Stack_Base - 1).Cursor;
3972 begin
3973 Pop_Region;
3975 -- If arbno extension matched null, then immediately fail
3977 if Null_Match then
3978 goto Fail;
3979 end if;
3981 -- Here we must do a stack check to make sure enough stack
3982 -- is left. This check will happen once for each instance of
3983 -- the Arbno pattern that is matched. The Nat field of a
3984 -- PC_Arbno pattern contains the maximum stack entries needed
3985 -- for the Arbno with one instance and the successor pattern
3987 if Stack_Ptr + Node.Nat >= Stack'Last then
3988 raise Pattern_Stack_Overflow;
3989 end if;
3991 goto Succeed;
3992 end;
3994 -- Assign. If this node is executed, it means the assign-on-match
3995 -- or write-on-match operation will not happen after all, so we
3996 -- is propagate the failure, removing the PC_Assign node.
3998 when PC_Assign =>
3999 goto Fail;
4001 -- Assign immediate. This node performs the actual assignment.
4003 when PC_Assign_Imm =>
4004 Set_String
4005 (Node.VP.all,
4006 Subject (Stack (Stack_Base - 1).Cursor + 1 .. Cursor));
4007 Pop_Region;
4008 goto Succeed;
4010 -- Assign on match. This node sets up for the eventual assignment
4012 when PC_Assign_OnM =>
4013 Stack (Stack_Base - 1).Node := Node;
4014 Push (CP_Assign'Access);
4015 Pop_Region;
4016 Assign_OnM := True;
4017 goto Succeed;
4019 -- Bal
4021 when PC_Bal =>
4022 if Cursor >= Length or else Subject (Cursor + 1) = ')' then
4023 goto Fail;
4025 elsif Subject (Cursor + 1) = '(' then
4026 declare
4027 Paren_Count : Natural := 1;
4029 begin
4030 loop
4031 Cursor := Cursor + 1;
4033 if Cursor >= Length then
4034 goto Fail;
4036 elsif Subject (Cursor + 1) = '(' then
4037 Paren_Count := Paren_Count + 1;
4039 elsif Subject (Cursor + 1) = ')' then
4040 Paren_Count := Paren_Count - 1;
4041 exit when Paren_Count = 0;
4042 end if;
4043 end loop;
4044 end;
4045 end if;
4047 Cursor := Cursor + 1;
4048 Push (Node);
4049 goto Succeed;
4051 -- Break (one character case)
4053 when PC_Break_CH =>
4054 while Cursor < Length loop
4055 if Subject (Cursor + 1) = Node.Char then
4056 goto Succeed;
4057 else
4058 Cursor := Cursor + 1;
4059 end if;
4060 end loop;
4062 goto Fail;
4064 -- Break (character set case)
4066 when PC_Break_CS =>
4067 while Cursor < Length loop
4068 if Is_In (Subject (Cursor + 1), Node.CS) then
4069 goto Succeed;
4070 else
4071 Cursor := Cursor + 1;
4072 end if;
4073 end loop;
4075 goto Fail;
4077 -- Break (string function case)
4079 when PC_Break_VF => declare
4080 U : constant VString := Node.VF.all;
4081 Str : constant String_Access := Get_String (U);
4083 begin
4084 while Cursor < Length loop
4085 if Is_In (Subject (Cursor + 1), Str.all) then
4086 goto Succeed;
4087 else
4088 Cursor := Cursor + 1;
4089 end if;
4090 end loop;
4092 goto Fail;
4093 end;
4095 -- Break (string pointer case)
4097 when PC_Break_VP => declare
4098 Str : constant String_Access := Get_String (Node.VP.all);
4100 begin
4101 while Cursor < Length loop
4102 if Is_In (Subject (Cursor + 1), Str.all) then
4103 goto Succeed;
4104 else
4105 Cursor := Cursor + 1;
4106 end if;
4107 end loop;
4109 goto Fail;
4110 end;
4112 -- BreakX (one character case)
4114 when PC_BreakX_CH =>
4115 while Cursor < Length loop
4116 if Subject (Cursor + 1) = Node.Char then
4117 goto Succeed;
4118 else
4119 Cursor := Cursor + 1;
4120 end if;
4121 end loop;
4123 goto Fail;
4125 -- BreakX (character set case)
4127 when PC_BreakX_CS =>
4128 while Cursor < Length loop
4129 if Is_In (Subject (Cursor + 1), Node.CS) then
4130 goto Succeed;
4131 else
4132 Cursor := Cursor + 1;
4133 end if;
4134 end loop;
4136 goto Fail;
4138 -- BreakX (string function case)
4140 when PC_BreakX_VF => declare
4141 U : constant VString := Node.VF.all;
4142 Str : constant String_Access := Get_String (U);
4144 begin
4145 while Cursor < Length loop
4146 if Is_In (Subject (Cursor + 1), Str.all) then
4147 goto Succeed;
4148 else
4149 Cursor := Cursor + 1;
4150 end if;
4151 end loop;
4153 goto Fail;
4154 end;
4156 -- BreakX (string pointer case)
4158 when PC_BreakX_VP => declare
4159 Str : constant String_Access := Get_String (Node.VP.all);
4161 begin
4162 while Cursor < Length loop
4163 if Is_In (Subject (Cursor + 1), Str.all) then
4164 goto Succeed;
4165 else
4166 Cursor := Cursor + 1;
4167 end if;
4168 end loop;
4170 goto Fail;
4171 end;
4173 -- BreakX_X (BreakX extension). See section on "Compound Pattern
4174 -- Structures". This node is the alternative that is stacked to
4175 -- skip past the break character and extend the break.
4177 when PC_BreakX_X =>
4178 Cursor := Cursor + 1;
4179 goto Succeed;
4181 -- Character (one character string)
4183 when PC_Char =>
4184 if Cursor < Length
4185 and then Subject (Cursor + 1) = Node.Char
4186 then
4187 Cursor := Cursor + 1;
4188 goto Succeed;
4189 else
4190 goto Fail;
4191 end if;
4193 -- End of Pattern
4195 when PC_EOP =>
4196 if Stack_Base = Stack_Init then
4197 goto Match_Succeed;
4199 -- End of recursive inner match. See separate section on
4200 -- handing of recursive pattern matches for details.
4202 else
4203 Node := Stack (Stack_Base - 1).Node;
4204 Pop_Region;
4205 goto Match;
4206 end if;
4208 -- Fail
4210 when PC_Fail =>
4211 goto Fail;
4213 -- Fence (built in pattern)
4215 when PC_Fence =>
4216 Push (CP_Cancel'Access);
4217 goto Succeed;
4219 -- Fence function node X. This is the node that gets control
4220 -- after a successful match of the fenced pattern.
4222 when PC_Fence_X =>
4223 Stack_Ptr := Stack_Ptr + 1;
4224 Stack (Stack_Ptr).Cursor := Stack_Base;
4225 Stack (Stack_Ptr).Node := CP_Fence_Y'Access;
4226 Stack_Base := Stack (Stack_Base).Cursor;
4227 goto Succeed;
4229 -- Fence function node Y. This is the node that gets control on
4230 -- a failure that occurs after the fenced pattern has matched.
4232 -- Note: the Cursor at this stage is actually the inner stack
4233 -- base value. We don't reset this, but we do use it to strip
4234 -- off all the entries made by the fenced pattern.
4236 when PC_Fence_Y =>
4237 Stack_Ptr := Cursor - 2;
4238 goto Fail;
4240 -- Len (integer case)
4242 when PC_Len_Nat =>
4243 if Cursor + Node.Nat > Length then
4244 goto Fail;
4245 else
4246 Cursor := Cursor + Node.Nat;
4247 goto Succeed;
4248 end if;
4250 -- Len (Integer function case)
4252 when PC_Len_NF => declare
4253 N : constant Natural := Node.NF.all;
4255 begin
4256 if Cursor + N > Length then
4257 goto Fail;
4258 else
4259 Cursor := Cursor + N;
4260 goto Succeed;
4261 end if;
4262 end;
4264 -- Len (integer pointer case)
4266 when PC_Len_NP =>
4267 if Cursor + Node.NP.all > Length then
4268 goto Fail;
4269 else
4270 Cursor := Cursor + Node.NP.all;
4271 goto Succeed;
4272 end if;
4274 -- NotAny (one character case)
4276 when PC_NotAny_CH =>
4277 if Cursor < Length
4278 and then Subject (Cursor + 1) /= Node.Char
4279 then
4280 Cursor := Cursor + 1;
4281 goto Succeed;
4282 else
4283 goto Fail;
4284 end if;
4286 -- NotAny (character set case)
4288 when PC_NotAny_CS =>
4289 if Cursor < Length
4290 and then not Is_In (Subject (Cursor + 1), Node.CS)
4291 then
4292 Cursor := Cursor + 1;
4293 goto Succeed;
4294 else
4295 goto Fail;
4296 end if;
4298 -- NotAny (string function case)
4300 when PC_NotAny_VF => declare
4301 U : constant VString := Node.VF.all;
4302 Str : constant String_Access := Get_String (U);
4304 begin
4305 if Cursor < Length
4306 and then
4307 not Is_In (Subject (Cursor + 1), Str.all)
4308 then
4309 Cursor := Cursor + 1;
4310 goto Succeed;
4311 else
4312 goto Fail;
4313 end if;
4314 end;
4316 -- NotAny (string pointer case)
4318 when PC_NotAny_VP => declare
4319 Str : constant String_Access := Get_String (Node.VP.all);
4321 begin
4322 if Cursor < Length
4323 and then
4324 not Is_In (Subject (Cursor + 1), Str.all)
4325 then
4326 Cursor := Cursor + 1;
4327 goto Succeed;
4328 else
4329 goto Fail;
4330 end if;
4331 end;
4333 -- NSpan (one character case)
4335 when PC_NSpan_CH =>
4336 while Cursor < Length
4337 and then Subject (Cursor + 1) = Node.Char
4338 loop
4339 Cursor := Cursor + 1;
4340 end loop;
4342 goto Succeed;
4344 -- NSpan (character set case)
4346 when PC_NSpan_CS =>
4347 while Cursor < Length
4348 and then Is_In (Subject (Cursor + 1), Node.CS)
4349 loop
4350 Cursor := Cursor + 1;
4351 end loop;
4353 goto Succeed;
4355 -- NSpan (string function case)
4357 when PC_NSpan_VF => declare
4358 U : constant VString := Node.VF.all;
4359 Str : constant String_Access := Get_String (U);
4361 begin
4362 while Cursor < Length
4363 and then Is_In (Subject (Cursor + 1), Str.all)
4364 loop
4365 Cursor := Cursor + 1;
4366 end loop;
4368 goto Succeed;
4369 end;
4371 -- NSpan (string pointer case)
4373 when PC_NSpan_VP => declare
4374 Str : constant String_Access := Get_String (Node.VP.all);
4376 begin
4377 while Cursor < Length
4378 and then Is_In (Subject (Cursor + 1), Str.all)
4379 loop
4380 Cursor := Cursor + 1;
4381 end loop;
4383 goto Succeed;
4384 end;
4386 -- Null string
4388 when PC_Null =>
4389 goto Succeed;
4391 -- Pos (integer case)
4393 when PC_Pos_Nat =>
4394 if Cursor = Node.Nat then
4395 goto Succeed;
4396 else
4397 goto Fail;
4398 end if;
4400 -- Pos (Integer function case)
4402 when PC_Pos_NF => declare
4403 N : constant Natural := Node.NF.all;
4405 begin
4406 if Cursor = N then
4407 goto Succeed;
4408 else
4409 goto Fail;
4410 end if;
4411 end;
4413 -- Pos (integer pointer case)
4415 when PC_Pos_NP =>
4416 if Cursor = Node.NP.all then
4417 goto Succeed;
4418 else
4419 goto Fail;
4420 end if;
4422 -- Predicate function
4424 when PC_Pred_Func =>
4425 if Node.BF.all then
4426 goto Succeed;
4427 else
4428 goto Fail;
4429 end if;
4431 -- Region Enter. Initiate new pattern history stack region
4433 when PC_R_Enter =>
4434 Stack (Stack_Ptr + 1).Cursor := Cursor;
4435 Push_Region;
4436 goto Succeed;
4438 -- Region Remove node. This is the node stacked by an R_Enter.
4439 -- It removes the special format stack entry right underneath, and
4440 -- then restores the outer level stack base and signals failure.
4442 -- Note: the cursor value at this stage is actually the (negative)
4443 -- stack base value for the outer level.
4445 when PC_R_Remove =>
4446 Stack_Base := Cursor;
4447 Stack_Ptr := Stack_Ptr - 1;
4448 goto Fail;
4450 -- Region restore node. This is the node stacked at the end of an
4451 -- inner level match. Its function is to restore the inner level
4452 -- region, so that alternatives in this region can be sought.
4454 -- Note: the Cursor at this stage is actually the negative of the
4455 -- inner stack base value, which we use to restore the inner region.
4457 when PC_R_Restore =>
4458 Stack_Base := Cursor;
4459 goto Fail;
4461 -- Rest
4463 when PC_Rest =>
4464 Cursor := Length;
4465 goto Succeed;
4467 -- Initiate recursive match (pattern pointer case)
4469 when PC_Rpat =>
4470 Stack (Stack_Ptr + 1).Node := Node.Pthen;
4471 Push_Region;
4473 if Stack_Ptr + Node.PP.all.Stk >= Stack_Size then
4474 raise Pattern_Stack_Overflow;
4475 else
4476 Node := Node.PP.all.P;
4477 goto Match;
4478 end if;
4480 -- RPos (integer case)
4482 when PC_RPos_Nat =>
4483 if Cursor = (Length - Node.Nat) then
4484 goto Succeed;
4485 else
4486 goto Fail;
4487 end if;
4489 -- RPos (integer function case)
4491 when PC_RPos_NF => declare
4492 N : constant Natural := Node.NF.all;
4494 begin
4495 if Length - Cursor = N then
4496 goto Succeed;
4497 else
4498 goto Fail;
4499 end if;
4500 end;
4502 -- RPos (integer pointer case)
4504 when PC_RPos_NP =>
4505 if Cursor = (Length - Node.NP.all) then
4506 goto Succeed;
4507 else
4508 goto Fail;
4509 end if;
4511 -- RTab (integer case)
4513 when PC_RTab_Nat =>
4514 if Cursor <= (Length - Node.Nat) then
4515 Cursor := Length - Node.Nat;
4516 goto Succeed;
4517 else
4518 goto Fail;
4519 end if;
4521 -- RTab (integer function case)
4523 when PC_RTab_NF => declare
4524 N : constant Natural := Node.NF.all;
4526 begin
4527 if Length - Cursor >= N then
4528 Cursor := Length - N;
4529 goto Succeed;
4530 else
4531 goto Fail;
4532 end if;
4533 end;
4535 -- RTab (integer pointer case)
4537 when PC_RTab_NP =>
4538 if Cursor <= (Length - Node.NP.all) then
4539 Cursor := Length - Node.NP.all;
4540 goto Succeed;
4541 else
4542 goto Fail;
4543 end if;
4545 -- Cursor assignment
4547 when PC_Setcur =>
4548 Node.Var.all := Cursor;
4549 goto Succeed;
4551 -- Span (one character case)
4553 when PC_Span_CH => declare
4554 P : Natural := Cursor;
4556 begin
4557 while P < Length
4558 and then Subject (P + 1) = Node.Char
4559 loop
4560 P := P + 1;
4561 end loop;
4563 if P /= Cursor then
4564 Cursor := P;
4565 goto Succeed;
4566 else
4567 goto Fail;
4568 end if;
4569 end;
4571 -- Span (character set case)
4573 when PC_Span_CS => declare
4574 P : Natural := Cursor;
4576 begin
4577 while P < Length
4578 and then Is_In (Subject (P + 1), Node.CS)
4579 loop
4580 P := P + 1;
4581 end loop;
4583 if P /= Cursor then
4584 Cursor := P;
4585 goto Succeed;
4586 else
4587 goto Fail;
4588 end if;
4589 end;
4591 -- Span (string function case)
4593 when PC_Span_VF => declare
4594 U : constant VString := Node.VF.all;
4595 Str : constant String_Access := Get_String (U);
4596 P : Natural := Cursor;
4598 begin
4599 while P < Length
4600 and then Is_In (Subject (P + 1), Str.all)
4601 loop
4602 P := P + 1;
4603 end loop;
4605 if P /= Cursor then
4606 Cursor := P;
4607 goto Succeed;
4608 else
4609 goto Fail;
4610 end if;
4611 end;
4613 -- Span (string pointer case)
4615 when PC_Span_VP => declare
4616 Str : constant String_Access := Get_String (Node.VP.all);
4617 P : Natural := Cursor;
4619 begin
4620 while P < Length
4621 and then Is_In (Subject (P + 1), Str.all)
4622 loop
4623 P := P + 1;
4624 end loop;
4626 if P /= Cursor then
4627 Cursor := P;
4628 goto Succeed;
4629 else
4630 goto Fail;
4631 end if;
4632 end;
4634 -- String (two character case)
4636 when PC_String_2 =>
4637 if (Length - Cursor) >= 2
4638 and then Subject (Cursor + 1 .. Cursor + 2) = Node.Str2
4639 then
4640 Cursor := Cursor + 2;
4641 goto Succeed;
4642 else
4643 goto Fail;
4644 end if;
4646 -- String (three character case)
4648 when PC_String_3 =>
4649 if (Length - Cursor) >= 3
4650 and then Subject (Cursor + 1 .. Cursor + 3) = Node.Str3
4651 then
4652 Cursor := Cursor + 3;
4653 goto Succeed;
4654 else
4655 goto Fail;
4656 end if;
4658 -- String (four character case)
4660 when PC_String_4 =>
4661 if (Length - Cursor) >= 4
4662 and then Subject (Cursor + 1 .. Cursor + 4) = Node.Str4
4663 then
4664 Cursor := Cursor + 4;
4665 goto Succeed;
4666 else
4667 goto Fail;
4668 end if;
4670 -- String (five character case)
4672 when PC_String_5 =>
4673 if (Length - Cursor) >= 5
4674 and then Subject (Cursor + 1 .. Cursor + 5) = Node.Str5
4675 then
4676 Cursor := Cursor + 5;
4677 goto Succeed;
4678 else
4679 goto Fail;
4680 end if;
4682 -- String (six character case)
4684 when PC_String_6 =>
4685 if (Length - Cursor) >= 6
4686 and then Subject (Cursor + 1 .. Cursor + 6) = Node.Str6
4687 then
4688 Cursor := Cursor + 6;
4689 goto Succeed;
4690 else
4691 goto Fail;
4692 end if;
4694 -- String (case of more than six characters)
4696 when PC_String => declare
4697 Len : constant Natural := Node.Str'Length;
4699 begin
4700 if (Length - Cursor) >= Len
4701 and then Node.Str.all = Subject (Cursor + 1 .. Cursor + Len)
4702 then
4703 Cursor := Cursor + Len;
4704 goto Succeed;
4705 else
4706 goto Fail;
4707 end if;
4708 end;
4710 -- String (function case)
4712 when PC_String_VF => declare
4713 U : constant VString := Node.VF.all;
4714 Str : constant String_Access := Get_String (U);
4715 Len : constant Natural := Str'Length;
4717 begin
4718 if (Length - Cursor) >= Len
4719 and then Str.all = Subject (Cursor + 1 .. Cursor + Len)
4720 then
4721 Cursor := Cursor + Len;
4722 goto Succeed;
4723 else
4724 goto Fail;
4725 end if;
4726 end;
4728 -- String (pointer case)
4730 when PC_String_VP => declare
4731 S : constant String_Access := Get_String (Node.VP.all);
4732 Len : constant Natural := S'Length;
4734 begin
4735 if (Length - Cursor) >= Len
4736 and then S.all = Subject (Cursor + 1 .. Cursor + Len)
4737 then
4738 Cursor := Cursor + Len;
4739 goto Succeed;
4740 else
4741 goto Fail;
4742 end if;
4743 end;
4745 -- Succeed
4747 when PC_Succeed =>
4748 Push (Node);
4749 goto Succeed;
4751 -- Tab (integer case)
4753 when PC_Tab_Nat =>
4754 if Cursor <= Node.Nat then
4755 Cursor := Node.Nat;
4756 goto Succeed;
4757 else
4758 goto Fail;
4759 end if;
4761 -- Tab (integer function case)
4763 when PC_Tab_NF => declare
4764 N : constant Natural := Node.NF.all;
4766 begin
4767 if Cursor <= N then
4768 Cursor := N;
4769 goto Succeed;
4770 else
4771 goto Fail;
4772 end if;
4773 end;
4775 -- Tab (integer pointer case)
4777 when PC_Tab_NP =>
4778 if Cursor <= Node.NP.all then
4779 Cursor := Node.NP.all;
4780 goto Succeed;
4781 else
4782 goto Fail;
4783 end if;
4785 -- Unanchored movement
4787 when PC_Unanchored =>
4789 -- All done if we tried every position
4791 if Cursor > Length then
4792 goto Match_Fail;
4794 -- Otherwise extend the anchor point, and restack ourself
4796 else
4797 Cursor := Cursor + 1;
4798 Push (Node);
4799 goto Succeed;
4800 end if;
4802 -- Write immediate. This node performs the actual write
4804 when PC_Write_Imm =>
4805 Put_Line
4806 (Node.FP.all,
4807 Subject (Stack (Stack_Base - 1).Cursor + 1 .. Cursor));
4808 Pop_Region;
4809 goto Succeed;
4811 -- Write on match. This node sets up for the eventual write
4813 when PC_Write_OnM =>
4814 Stack (Stack_Base - 1).Node := Node;
4815 Push (CP_Assign'Access);
4816 Pop_Region;
4817 Assign_OnM := True;
4818 goto Succeed;
4820 end case;
4822 -- We are NOT allowed to fall though this case statement, since every
4823 -- match routine must end by executing a goto to the appropriate point
4824 -- in the finite state machine model.
4826 pragma Warnings (Off);
4827 Logic_Error;
4828 pragma Warnings (On);
4829 end XMatch;
4831 -------------
4832 -- XMatchD --
4833 -------------
4835 -- Maintenance note: There is a LOT of code duplication between XMatch
4836 -- and XMatchD. This is quite intentional, the point is to avoid any
4837 -- unnecessary debugging overhead in the XMatch case, but this does mean
4838 -- that any changes to XMatchD must be mirrored in XMatch. In case of
4839 -- any major changes, the proper approach is to delete XMatch, make the
4840 -- changes to XMatchD, and then make a copy of XMatchD, removing all
4841 -- calls to Dout, and all Put and Put_Line operations. This copy becomes
4842 -- the new XMatch.
4844 procedure XMatchD
4845 (Subject : String;
4846 Pat_P : PE_Ptr;
4847 Pat_S : Natural;
4848 Start : out Natural;
4849 Stop : out Natural)
4851 Node : PE_Ptr;
4852 -- Pointer to current pattern node. Initialized from Pat_P, and then
4853 -- updated as the match proceeds through its constituent elements.
4855 Length : constant Natural := Subject'Length;
4856 -- Length of string (= Subject'Last, since Subject'First is always 1)
4858 Cursor : Integer := 0;
4859 -- If the value is non-negative, then this value is the index showing
4860 -- the current position of the match in the subject string. The next
4861 -- character to be matched is at Subject (Cursor + 1). Note that since
4862 -- our view of the subject string in XMatch always has a lower bound
4863 -- of one, regardless of original bounds, that this definition exactly
4864 -- corresponds to the cursor value as referenced by functions like Pos.
4866 -- If the value is negative, then this is a saved stack pointer,
4867 -- typically a base pointer of an inner or outer region. Cursor
4868 -- temporarily holds such a value when it is popped from the stack
4869 -- by Fail. In all cases, Cursor is reset to a proper non-negative
4870 -- cursor value before the match proceeds (e.g. by propagating the
4871 -- failure and popping a "real" cursor value from the stack.
4873 PE_Unanchored : aliased PE := (PC_Unanchored, 0, Pat_P);
4874 -- Dummy pattern element used in the unanchored case.
4876 Region_Level : Natural := 0;
4877 -- Keeps track of recursive region level. This is used only for
4878 -- debugging, it is the number of saved history stack base values.
4880 Stack : Stack_Type;
4881 -- The pattern matching failure stack for this call to Match
4883 Stack_Ptr : Stack_Range;
4884 -- Current stack pointer. This points to the top element of the stack
4885 -- that is currently in use. At the outer level this is the special
4886 -- entry placed on the stack according to the anchor mode.
4888 Stack_Init : constant Stack_Range := Stack'First + 1;
4889 -- This is the initial value of the Stack_Ptr and Stack_Base. The
4890 -- initial (Stack'First) element of the stack is not used so that
4891 -- when we pop the last element off, Stack_Ptr is still in range.
4893 Stack_Base : Stack_Range;
4894 -- This value is the stack base value, i.e. the stack pointer for the
4895 -- first history stack entry in the current stack region. See separate
4896 -- section on handling of recursive pattern matches.
4898 Assign_OnM : Boolean := False;
4899 -- Set True if assign-on-match or write-on-match operations may be
4900 -- present in the history stack, which must then be scanned on a
4901 -- successful match.
4903 procedure Dout (Str : String);
4904 -- Output string to standard error with bars indicating region level.
4906 procedure Dout (Str : String; A : Character);
4907 -- Calls Dout with the string S ('A')
4909 procedure Dout (Str : String; A : Character_Set);
4910 -- Calls Dout with the string S ("A")
4912 procedure Dout (Str : String; A : Natural);
4913 -- Calls Dout with the string S (A)
4915 procedure Dout (Str : String; A : String);
4916 -- Calls Dout with the string S ("A")
4918 function Img (P : PE_Ptr) return String;
4919 -- Returns a string of the form #nnn where nnn is P.Index
4921 procedure Pop_Region;
4922 pragma Inline (Pop_Region);
4923 -- Used at the end of processing of an inner region. if the inner
4924 -- region left no stack entries, then all trace of it is removed.
4925 -- Otherwise a PC_Restore_Region entry is pushed to ensure proper
4926 -- handling of alternatives in the inner region.
4928 procedure Push (Node : PE_Ptr);
4929 pragma Inline (Push);
4930 -- Make entry in pattern matching stack with current cursor valeu
4932 procedure Push_Region;
4933 pragma Inline (Push_Region);
4934 -- This procedure makes a new region on the history stack. The
4935 -- caller first establishes the special entry on the stack, but
4936 -- does not push the stack pointer. Then this call stacks a
4937 -- PC_Remove_Region node, on top of this entry, using the cursor
4938 -- field of the PC_Remove_Region entry to save the outer level
4939 -- stack base value, and resets the stack base to point to this
4940 -- PC_Remove_Region node.
4942 ----------
4943 -- Dout --
4944 ----------
4946 procedure Dout (Str : String) is
4947 begin
4948 for J in 1 .. Region_Level loop
4949 Put ("| ");
4950 end loop;
4952 Put_Line (Str);
4953 end Dout;
4955 procedure Dout (Str : String; A : Character) is
4956 begin
4957 Dout (Str & " ('" & A & "')");
4958 end Dout;
4960 procedure Dout (Str : String; A : Character_Set) is
4961 begin
4962 Dout (Str & " (" & Image (To_Sequence (A)) & ')');
4963 end Dout;
4965 procedure Dout (Str : String; A : Natural) is
4966 begin
4967 Dout (Str & " (" & A & ')');
4968 end Dout;
4970 procedure Dout (Str : String; A : String) is
4971 begin
4972 Dout (Str & " (" & Image (A) & ')');
4973 end Dout;
4975 ---------
4976 -- Img --
4977 ---------
4979 function Img (P : PE_Ptr) return String is
4980 begin
4981 return "#" & Integer (P.Index) & " ";
4982 end Img;
4984 ----------------
4985 -- Pop_Region --
4986 ----------------
4988 procedure Pop_Region is
4989 begin
4990 Region_Level := Region_Level - 1;
4992 -- If nothing was pushed in the inner region, we can just get
4993 -- rid of it entirely, leaving no traces that it was ever there
4995 if Stack_Ptr = Stack_Base then
4996 Stack_Ptr := Stack_Base - 2;
4997 Stack_Base := Stack (Stack_Ptr + 2).Cursor;
4999 -- If stuff was pushed in the inner region, then we have to
5000 -- push a PC_R_Restore node so that we properly handle possible
5001 -- rematches within the region.
5003 else
5004 Stack_Ptr := Stack_Ptr + 1;
5005 Stack (Stack_Ptr).Cursor := Stack_Base;
5006 Stack (Stack_Ptr).Node := CP_R_Restore'Access;
5007 Stack_Base := Stack (Stack_Base).Cursor;
5008 end if;
5009 end Pop_Region;
5011 ----------
5012 -- Push --
5013 ----------
5015 procedure Push (Node : PE_Ptr) is
5016 begin
5017 Stack_Ptr := Stack_Ptr + 1;
5018 Stack (Stack_Ptr).Cursor := Cursor;
5019 Stack (Stack_Ptr).Node := Node;
5020 end Push;
5022 -----------------
5023 -- Push_Region --
5024 -----------------
5026 procedure Push_Region is
5027 begin
5028 Region_Level := Region_Level + 1;
5029 Stack_Ptr := Stack_Ptr + 2;
5030 Stack (Stack_Ptr).Cursor := Stack_Base;
5031 Stack (Stack_Ptr).Node := CP_R_Remove'Access;
5032 Stack_Base := Stack_Ptr;
5033 end Push_Region;
5035 -- Start of processing for XMatchD
5037 begin
5038 New_Line;
5039 Put_Line ("Initiating pattern match, subject = " & Image (Subject));
5040 Put ("--------------------------------------");
5042 for J in 1 .. Length loop
5043 Put ('-');
5044 end loop;
5046 New_Line;
5047 Put_Line ("subject length = " & Length);
5049 if Pat_P = null then
5050 Uninitialized_Pattern;
5051 end if;
5053 -- Check we have enough stack for this pattern. This check deals with
5054 -- every possibility except a match of a recursive pattern, where we
5055 -- make a check at each recursion level.
5057 if Pat_S >= Stack_Size - 1 then
5058 raise Pattern_Stack_Overflow;
5059 end if;
5061 -- In anchored mode, the bottom entry on the stack is an abort entry
5063 if Anchored_Mode then
5064 Stack (Stack_Init).Node := CP_Cancel'Access;
5065 Stack (Stack_Init).Cursor := 0;
5067 -- In unanchored more, the bottom entry on the stack references
5068 -- the special pattern element PE_Unanchored, whose Pthen field
5069 -- points to the initial pattern element. The cursor value in this
5070 -- entry is the number of anchor moves so far.
5072 else
5073 Stack (Stack_Init).Node := PE_Unanchored'Unchecked_Access;
5074 Stack (Stack_Init).Cursor := 0;
5075 end if;
5077 Stack_Ptr := Stack_Init;
5078 Stack_Base := Stack_Ptr;
5079 Cursor := 0;
5080 Node := Pat_P;
5081 goto Match;
5083 -----------------------------------------
5084 -- Main Pattern Matching State Control --
5085 -----------------------------------------
5087 -- This is a state machine which uses gotos to change state. The
5088 -- initial state is Match, to initiate the matching of the first
5089 -- element, so the goto Match above starts the match. In the
5090 -- following descriptions, we indicate the global values that
5091 -- are relevant for the state transition.
5093 -- Come here if entire match fails
5095 <<Match_Fail>>
5096 Dout ("match fails");
5097 New_Line;
5098 Start := 0;
5099 Stop := 0;
5100 return;
5102 -- Come here if entire match succeeds
5104 -- Cursor current position in subject string
5106 <<Match_Succeed>>
5107 Dout ("match succeeds");
5108 Start := Stack (Stack_Init).Cursor + 1;
5109 Stop := Cursor;
5110 Dout ("first matched character index = " & Start);
5111 Dout ("last matched character index = " & Stop);
5112 Dout ("matched substring = " & Image (Subject (Start .. Stop)));
5114 -- Scan history stack for deferred assignments or writes
5116 if Assign_OnM then
5117 for S in Stack'First .. Stack_Ptr loop
5118 if Stack (S).Node = CP_Assign'Access then
5119 declare
5120 Inner_Base : constant Stack_Range :=
5121 Stack (S + 1).Cursor;
5122 Special_Entry : constant Stack_Range :=
5123 Inner_Base - 1;
5124 Node_OnM : constant PE_Ptr :=
5125 Stack (Special_Entry).Node;
5126 Start : constant Natural :=
5127 Stack (Special_Entry).Cursor + 1;
5128 Stop : constant Natural := Stack (S).Cursor;
5130 begin
5131 if Node_OnM.Pcode = PC_Assign_OnM then
5132 Set_String (Node_OnM.VP.all, Subject (Start .. Stop));
5133 Dout
5134 (Img (Stack (S).Node) &
5135 "deferred assignment of " &
5136 Image (Subject (Start .. Stop)));
5138 elsif Node_OnM.Pcode = PC_Write_OnM then
5139 Put_Line (Node_OnM.FP.all, Subject (Start .. Stop));
5140 Dout
5141 (Img (Stack (S).Node) &
5142 "deferred write of " &
5143 Image (Subject (Start .. Stop)));
5145 else
5146 Logic_Error;
5147 end if;
5148 end;
5149 end if;
5150 end loop;
5151 end if;
5153 New_Line;
5154 return;
5156 -- Come here if attempt to match current element fails
5158 -- Stack_Base current stack base
5159 -- Stack_Ptr current stack pointer
5161 <<Fail>>
5162 Cursor := Stack (Stack_Ptr).Cursor;
5163 Node := Stack (Stack_Ptr).Node;
5164 Stack_Ptr := Stack_Ptr - 1;
5166 if Cursor >= 0 then
5167 Dout ("failure, cursor reset to " & Cursor);
5168 end if;
5170 goto Match;
5172 -- Come here if attempt to match current element succeeds
5174 -- Cursor current position in subject string
5175 -- Node pointer to node successfully matched
5176 -- Stack_Base current stack base
5177 -- Stack_Ptr current stack pointer
5179 <<Succeed>>
5180 Dout ("success, cursor = " & Cursor);
5181 Node := Node.Pthen;
5183 -- Come here to match the next pattern element
5185 -- Cursor current position in subject string
5186 -- Node pointer to node to be matched
5187 -- Stack_Base current stack base
5188 -- Stack_Ptr current stack pointer
5190 <<Match>>
5192 --------------------------------------------------
5193 -- Main Pattern Match Element Matching Routines --
5194 --------------------------------------------------
5196 -- Here is the case statement that processes the current node. The
5197 -- processing for each element does one of five things:
5199 -- goto Succeed to move to the successor
5200 -- goto Match_Succeed if the entire match succeeds
5201 -- goto Match_Fail if the entire match fails
5202 -- goto Fail to signal failure of current match
5204 -- Processing is NOT allowed to fall through
5206 case Node.Pcode is
5208 -- Cancel
5210 when PC_Cancel =>
5211 Dout (Img (Node) & "matching Cancel");
5212 goto Match_Fail;
5214 -- Alternation
5216 when PC_Alt =>
5217 Dout
5218 (Img (Node) & "setting up alternative " & Img (Node.Alt));
5219 Push (Node.Alt);
5220 Node := Node.Pthen;
5221 goto Match;
5223 -- Any (one character case)
5225 when PC_Any_CH =>
5226 Dout (Img (Node) & "matching Any", Node.Char);
5228 if Cursor < Length
5229 and then Subject (Cursor + 1) = Node.Char
5230 then
5231 Cursor := Cursor + 1;
5232 goto Succeed;
5233 else
5234 goto Fail;
5235 end if;
5237 -- Any (character set case)
5239 when PC_Any_CS =>
5240 Dout (Img (Node) & "matching Any", Node.CS);
5242 if Cursor < Length
5243 and then Is_In (Subject (Cursor + 1), Node.CS)
5244 then
5245 Cursor := Cursor + 1;
5246 goto Succeed;
5247 else
5248 goto Fail;
5249 end if;
5251 -- Any (string function case)
5253 when PC_Any_VF => declare
5254 U : constant VString := Node.VF.all;
5255 Str : constant String_Access := Get_String (U);
5257 begin
5258 Dout (Img (Node) & "matching Any", Str.all);
5260 if Cursor < Length
5261 and then Is_In (Subject (Cursor + 1), Str.all)
5262 then
5263 Cursor := Cursor + 1;
5264 goto Succeed;
5265 else
5266 goto Fail;
5267 end if;
5268 end;
5270 -- Any (string pointer case)
5272 when PC_Any_VP => declare
5273 Str : constant String_Access := Get_String (Node.VP.all);
5275 begin
5276 Dout (Img (Node) & "matching Any", Str.all);
5278 if Cursor < Length
5279 and then Is_In (Subject (Cursor + 1), Str.all)
5280 then
5281 Cursor := Cursor + 1;
5282 goto Succeed;
5283 else
5284 goto Fail;
5285 end if;
5286 end;
5288 -- Arb (initial match)
5290 when PC_Arb_X =>
5291 Dout (Img (Node) & "matching Arb");
5292 Push (Node.Alt);
5293 Node := Node.Pthen;
5294 goto Match;
5296 -- Arb (extension)
5298 when PC_Arb_Y =>
5299 Dout (Img (Node) & "extending Arb");
5301 if Cursor < Length then
5302 Cursor := Cursor + 1;
5303 Push (Node);
5304 goto Succeed;
5305 else
5306 goto Fail;
5307 end if;
5309 -- Arbno_S (simple Arbno initialize). This is the node that
5310 -- initiates the match of a simple Arbno structure.
5312 when PC_Arbno_S =>
5313 Dout (Img (Node) &
5314 "setting up Arbno alternative " & Img (Node.Alt));
5315 Push (Node.Alt);
5316 Node := Node.Pthen;
5317 goto Match;
5319 -- Arbno_X (Arbno initialize). This is the node that initiates
5320 -- the match of a complex Arbno structure.
5322 when PC_Arbno_X =>
5323 Dout (Img (Node) &
5324 "setting up Arbno alternative " & Img (Node.Alt));
5325 Push (Node.Alt);
5326 Node := Node.Pthen;
5327 goto Match;
5329 -- Arbno_Y (Arbno rematch). This is the node that is executed
5330 -- following successful matching of one instance of a complex
5331 -- Arbno pattern.
5333 when PC_Arbno_Y => declare
5334 Null_Match : constant Boolean :=
5335 Cursor = Stack (Stack_Base - 1).Cursor;
5337 begin
5338 Dout (Img (Node) & "extending Arbno");
5339 Pop_Region;
5341 -- If arbno extension matched null, then immediately fail
5343 if Null_Match then
5344 Dout ("Arbno extension matched null, so fails");
5345 goto Fail;
5346 end if;
5348 -- Here we must do a stack check to make sure enough stack
5349 -- is left. This check will happen once for each instance of
5350 -- the Arbno pattern that is matched. The Nat field of a
5351 -- PC_Arbno pattern contains the maximum stack entries needed
5352 -- for the Arbno with one instance and the successor pattern
5354 if Stack_Ptr + Node.Nat >= Stack'Last then
5355 raise Pattern_Stack_Overflow;
5356 end if;
5358 goto Succeed;
5359 end;
5361 -- Assign. If this node is executed, it means the assign-on-match
5362 -- or write-on-match operation will not happen after all, so we
5363 -- is propagate the failure, removing the PC_Assign node.
5365 when PC_Assign =>
5366 Dout (Img (Node) & "deferred assign/write cancelled");
5367 goto Fail;
5369 -- Assign immediate. This node performs the actual assignment.
5371 when PC_Assign_Imm =>
5372 Dout
5373 (Img (Node) & "executing immediate assignment of " &
5374 Image (Subject (Stack (Stack_Base - 1).Cursor + 1 .. Cursor)));
5375 Set_String
5376 (Node.VP.all,
5377 Subject (Stack (Stack_Base - 1).Cursor + 1 .. Cursor));
5378 Pop_Region;
5379 goto Succeed;
5381 -- Assign on match. This node sets up for the eventual assignment
5383 when PC_Assign_OnM =>
5384 Dout (Img (Node) & "registering deferred assignment");
5385 Stack (Stack_Base - 1).Node := Node;
5386 Push (CP_Assign'Access);
5387 Pop_Region;
5388 Assign_OnM := True;
5389 goto Succeed;
5391 -- Bal
5393 when PC_Bal =>
5394 Dout (Img (Node) & "matching or extending Bal");
5395 if Cursor >= Length or else Subject (Cursor + 1) = ')' then
5396 goto Fail;
5398 elsif Subject (Cursor + 1) = '(' then
5399 declare
5400 Paren_Count : Natural := 1;
5402 begin
5403 loop
5404 Cursor := Cursor + 1;
5406 if Cursor >= Length then
5407 goto Fail;
5409 elsif Subject (Cursor + 1) = '(' then
5410 Paren_Count := Paren_Count + 1;
5412 elsif Subject (Cursor + 1) = ')' then
5413 Paren_Count := Paren_Count - 1;
5414 exit when Paren_Count = 0;
5415 end if;
5416 end loop;
5417 end;
5418 end if;
5420 Cursor := Cursor + 1;
5421 Push (Node);
5422 goto Succeed;
5424 -- Break (one character case)
5426 when PC_Break_CH =>
5427 Dout (Img (Node) & "matching Break", Node.Char);
5429 while Cursor < Length loop
5430 if Subject (Cursor + 1) = Node.Char then
5431 goto Succeed;
5432 else
5433 Cursor := Cursor + 1;
5434 end if;
5435 end loop;
5437 goto Fail;
5439 -- Break (character set case)
5441 when PC_Break_CS =>
5442 Dout (Img (Node) & "matching Break", Node.CS);
5444 while Cursor < Length loop
5445 if Is_In (Subject (Cursor + 1), Node.CS) then
5446 goto Succeed;
5447 else
5448 Cursor := Cursor + 1;
5449 end if;
5450 end loop;
5452 goto Fail;
5454 -- Break (string function case)
5456 when PC_Break_VF => declare
5457 U : constant VString := Node.VF.all;
5458 Str : constant String_Access := Get_String (U);
5460 begin
5461 Dout (Img (Node) & "matching Break", Str.all);
5463 while Cursor < Length loop
5464 if Is_In (Subject (Cursor + 1), Str.all) then
5465 goto Succeed;
5466 else
5467 Cursor := Cursor + 1;
5468 end if;
5469 end loop;
5471 goto Fail;
5472 end;
5474 -- Break (string pointer case)
5476 when PC_Break_VP => declare
5477 Str : constant String_Access := Get_String (Node.VP.all);
5479 begin
5480 Dout (Img (Node) & "matching Break", Str.all);
5482 while Cursor < Length loop
5483 if Is_In (Subject (Cursor + 1), Str.all) then
5484 goto Succeed;
5485 else
5486 Cursor := Cursor + 1;
5487 end if;
5488 end loop;
5490 goto Fail;
5491 end;
5493 -- BreakX (one character case)
5495 when PC_BreakX_CH =>
5496 Dout (Img (Node) & "matching BreakX", Node.Char);
5498 while Cursor < Length loop
5499 if Subject (Cursor + 1) = Node.Char then
5500 goto Succeed;
5501 else
5502 Cursor := Cursor + 1;
5503 end if;
5504 end loop;
5506 goto Fail;
5508 -- BreakX (character set case)
5510 when PC_BreakX_CS =>
5511 Dout (Img (Node) & "matching BreakX", Node.CS);
5513 while Cursor < Length loop
5514 if Is_In (Subject (Cursor + 1), Node.CS) then
5515 goto Succeed;
5516 else
5517 Cursor := Cursor + 1;
5518 end if;
5519 end loop;
5521 goto Fail;
5523 -- BreakX (string function case)
5525 when PC_BreakX_VF => declare
5526 U : constant VString := Node.VF.all;
5527 Str : constant String_Access := Get_String (U);
5529 begin
5530 Dout (Img (Node) & "matching BreakX", Str.all);
5532 while Cursor < Length loop
5533 if Is_In (Subject (Cursor + 1), Str.all) then
5534 goto Succeed;
5535 else
5536 Cursor := Cursor + 1;
5537 end if;
5538 end loop;
5540 goto Fail;
5541 end;
5543 -- BreakX (string pointer case)
5545 when PC_BreakX_VP => declare
5546 Str : constant String_Access := Get_String (Node.VP.all);
5548 begin
5549 Dout (Img (Node) & "matching BreakX", Str.all);
5551 while Cursor < Length loop
5552 if Is_In (Subject (Cursor + 1), Str.all) then
5553 goto Succeed;
5554 else
5555 Cursor := Cursor + 1;
5556 end if;
5557 end loop;
5559 goto Fail;
5560 end;
5562 -- BreakX_X (BreakX extension). See section on "Compound Pattern
5563 -- Structures". This node is the alternative that is stacked
5564 -- to skip past the break character and extend the break.
5566 when PC_BreakX_X =>
5567 Dout (Img (Node) & "extending BreakX");
5569 Cursor := Cursor + 1;
5570 goto Succeed;
5572 -- Character (one character string)
5574 when PC_Char =>
5575 Dout (Img (Node) & "matching '" & Node.Char & ''');
5577 if Cursor < Length
5578 and then Subject (Cursor + 1) = Node.Char
5579 then
5580 Cursor := Cursor + 1;
5581 goto Succeed;
5582 else
5583 goto Fail;
5584 end if;
5586 -- End of Pattern
5588 when PC_EOP =>
5589 if Stack_Base = Stack_Init then
5590 Dout ("end of pattern");
5591 goto Match_Succeed;
5593 -- End of recursive inner match. See separate section on
5594 -- handing of recursive pattern matches for details.
5596 else
5597 Dout ("terminating recursive match");
5598 Node := Stack (Stack_Base - 1).Node;
5599 Pop_Region;
5600 goto Match;
5601 end if;
5603 -- Fail
5605 when PC_Fail =>
5606 Dout (Img (Node) & "matching Fail");
5607 goto Fail;
5609 -- Fence (built in pattern)
5611 when PC_Fence =>
5612 Dout (Img (Node) & "matching Fence");
5613 Push (CP_Cancel'Access);
5614 goto Succeed;
5616 -- Fence function node X. This is the node that gets control
5617 -- after a successful match of the fenced pattern.
5619 when PC_Fence_X =>
5620 Dout (Img (Node) & "matching Fence function");
5621 Stack_Ptr := Stack_Ptr + 1;
5622 Stack (Stack_Ptr).Cursor := Stack_Base;
5623 Stack (Stack_Ptr).Node := CP_Fence_Y'Access;
5624 Stack_Base := Stack (Stack_Base).Cursor;
5625 Region_Level := Region_Level - 1;
5626 goto Succeed;
5628 -- Fence function node Y. This is the node that gets control on
5629 -- a failure that occurs after the fenced pattern has matched.
5631 -- Note: the Cursor at this stage is actually the inner stack
5632 -- base value. We don't reset this, but we do use it to strip
5633 -- off all the entries made by the fenced pattern.
5635 when PC_Fence_Y =>
5636 Dout (Img (Node) & "pattern matched by Fence caused failure");
5637 Stack_Ptr := Cursor - 2;
5638 goto Fail;
5640 -- Len (integer case)
5642 when PC_Len_Nat =>
5643 Dout (Img (Node) & "matching Len", Node.Nat);
5645 if Cursor + Node.Nat > Length then
5646 goto Fail;
5647 else
5648 Cursor := Cursor + Node.Nat;
5649 goto Succeed;
5650 end if;
5652 -- Len (Integer function case)
5654 when PC_Len_NF => declare
5655 N : constant Natural := Node.NF.all;
5657 begin
5658 Dout (Img (Node) & "matching Len", N);
5660 if Cursor + N > Length then
5661 goto Fail;
5662 else
5663 Cursor := Cursor + N;
5664 goto Succeed;
5665 end if;
5666 end;
5668 -- Len (integer pointer case)
5670 when PC_Len_NP =>
5671 Dout (Img (Node) & "matching Len", Node.NP.all);
5673 if Cursor + Node.NP.all > Length then
5674 goto Fail;
5675 else
5676 Cursor := Cursor + Node.NP.all;
5677 goto Succeed;
5678 end if;
5680 -- NotAny (one character case)
5682 when PC_NotAny_CH =>
5683 Dout (Img (Node) & "matching NotAny", Node.Char);
5685 if Cursor < Length
5686 and then Subject (Cursor + 1) /= Node.Char
5687 then
5688 Cursor := Cursor + 1;
5689 goto Succeed;
5690 else
5691 goto Fail;
5692 end if;
5694 -- NotAny (character set case)
5696 when PC_NotAny_CS =>
5697 Dout (Img (Node) & "matching NotAny", Node.CS);
5699 if Cursor < Length
5700 and then not Is_In (Subject (Cursor + 1), Node.CS)
5701 then
5702 Cursor := Cursor + 1;
5703 goto Succeed;
5704 else
5705 goto Fail;
5706 end if;
5708 -- NotAny (string function case)
5710 when PC_NotAny_VF => declare
5711 U : constant VString := Node.VF.all;
5712 Str : constant String_Access := Get_String (U);
5714 begin
5715 Dout (Img (Node) & "matching NotAny", Str.all);
5717 if Cursor < Length
5718 and then
5719 not Is_In (Subject (Cursor + 1), Str.all)
5720 then
5721 Cursor := Cursor + 1;
5722 goto Succeed;
5723 else
5724 goto Fail;
5725 end if;
5726 end;
5728 -- NotAny (string pointer case)
5730 when PC_NotAny_VP => declare
5731 Str : constant String_Access := Get_String (Node.VP.all);
5733 begin
5734 Dout (Img (Node) & "matching NotAny", Str.all);
5736 if Cursor < Length
5737 and then
5738 not Is_In (Subject (Cursor + 1), Str.all)
5739 then
5740 Cursor := Cursor + 1;
5741 goto Succeed;
5742 else
5743 goto Fail;
5744 end if;
5745 end;
5747 -- NSpan (one character case)
5749 when PC_NSpan_CH =>
5750 Dout (Img (Node) & "matching NSpan", Node.Char);
5752 while Cursor < Length
5753 and then Subject (Cursor + 1) = Node.Char
5754 loop
5755 Cursor := Cursor + 1;
5756 end loop;
5758 goto Succeed;
5760 -- NSpan (character set case)
5762 when PC_NSpan_CS =>
5763 Dout (Img (Node) & "matching NSpan", Node.CS);
5765 while Cursor < Length
5766 and then Is_In (Subject (Cursor + 1), Node.CS)
5767 loop
5768 Cursor := Cursor + 1;
5769 end loop;
5771 goto Succeed;
5773 -- NSpan (string function case)
5775 when PC_NSpan_VF => declare
5776 U : constant VString := Node.VF.all;
5777 Str : constant String_Access := Get_String (U);
5779 begin
5780 Dout (Img (Node) & "matching NSpan", Str.all);
5782 while Cursor < Length
5783 and then Is_In (Subject (Cursor + 1), Str.all)
5784 loop
5785 Cursor := Cursor + 1;
5786 end loop;
5788 goto Succeed;
5789 end;
5791 -- NSpan (string pointer case)
5793 when PC_NSpan_VP => declare
5794 Str : constant String_Access := Get_String (Node.VP.all);
5796 begin
5797 Dout (Img (Node) & "matching NSpan", Str.all);
5799 while Cursor < Length
5800 and then Is_In (Subject (Cursor + 1), Str.all)
5801 loop
5802 Cursor := Cursor + 1;
5803 end loop;
5805 goto Succeed;
5806 end;
5808 when PC_Null =>
5809 Dout (Img (Node) & "matching null");
5810 goto Succeed;
5812 -- Pos (integer case)
5814 when PC_Pos_Nat =>
5815 Dout (Img (Node) & "matching Pos", Node.Nat);
5817 if Cursor = Node.Nat then
5818 goto Succeed;
5819 else
5820 goto Fail;
5821 end if;
5823 -- Pos (Integer function case)
5825 when PC_Pos_NF => declare
5826 N : constant Natural := Node.NF.all;
5828 begin
5829 Dout (Img (Node) & "matching Pos", N);
5831 if Cursor = N then
5832 goto Succeed;
5833 else
5834 goto Fail;
5835 end if;
5836 end;
5838 -- Pos (integer pointer case)
5840 when PC_Pos_NP =>
5841 Dout (Img (Node) & "matching Pos", Node.NP.all);
5843 if Cursor = Node.NP.all then
5844 goto Succeed;
5845 else
5846 goto Fail;
5847 end if;
5849 -- Predicate function
5851 when PC_Pred_Func =>
5852 Dout (Img (Node) & "matching predicate function");
5854 if Node.BF.all then
5855 goto Succeed;
5856 else
5857 goto Fail;
5858 end if;
5860 -- Region Enter. Initiate new pattern history stack region
5862 when PC_R_Enter =>
5863 Dout (Img (Node) & "starting match of nested pattern");
5864 Stack (Stack_Ptr + 1).Cursor := Cursor;
5865 Push_Region;
5866 goto Succeed;
5868 -- Region Remove node. This is the node stacked by an R_Enter.
5869 -- It removes the special format stack entry right underneath, and
5870 -- then restores the outer level stack base and signals failure.
5872 -- Note: the cursor value at this stage is actually the (negative)
5873 -- stack base value for the outer level.
5875 when PC_R_Remove =>
5876 Dout ("failure, match of nested pattern terminated");
5877 Stack_Base := Cursor;
5878 Region_Level := Region_Level - 1;
5879 Stack_Ptr := Stack_Ptr - 1;
5880 goto Fail;
5882 -- Region restore node. This is the node stacked at the end of an
5883 -- inner level match. Its function is to restore the inner level
5884 -- region, so that alternatives in this region can be sought.
5886 -- Note: the Cursor at this stage is actually the negative of the
5887 -- inner stack base value, which we use to restore the inner region.
5889 when PC_R_Restore =>
5890 Dout ("failure, search for alternatives in nested pattern");
5891 Region_Level := Region_Level + 1;
5892 Stack_Base := Cursor;
5893 goto Fail;
5895 -- Rest
5897 when PC_Rest =>
5898 Dout (Img (Node) & "matching Rest");
5899 Cursor := Length;
5900 goto Succeed;
5902 -- Initiate recursive match (pattern pointer case)
5904 when PC_Rpat =>
5905 Stack (Stack_Ptr + 1).Node := Node.Pthen;
5906 Push_Region;
5907 Dout (Img (Node) & "initiating recursive match");
5909 if Stack_Ptr + Node.PP.all.Stk >= Stack_Size then
5910 raise Pattern_Stack_Overflow;
5911 else
5912 Node := Node.PP.all.P;
5913 goto Match;
5914 end if;
5916 -- RPos (integer case)
5918 when PC_RPos_Nat =>
5919 Dout (Img (Node) & "matching RPos", Node.Nat);
5921 if Cursor = (Length - Node.Nat) then
5922 goto Succeed;
5923 else
5924 goto Fail;
5925 end if;
5927 -- RPos (integer function case)
5929 when PC_RPos_NF => declare
5930 N : constant Natural := Node.NF.all;
5932 begin
5933 Dout (Img (Node) & "matching RPos", N);
5935 if Length - Cursor = N then
5936 goto Succeed;
5937 else
5938 goto Fail;
5939 end if;
5940 end;
5942 -- RPos (integer pointer case)
5944 when PC_RPos_NP =>
5945 Dout (Img (Node) & "matching RPos", Node.NP.all);
5947 if Cursor = (Length - Node.NP.all) then
5948 goto Succeed;
5949 else
5950 goto Fail;
5951 end if;
5953 -- RTab (integer case)
5955 when PC_RTab_Nat =>
5956 Dout (Img (Node) & "matching RTab", Node.Nat);
5958 if Cursor <= (Length - Node.Nat) then
5959 Cursor := Length - Node.Nat;
5960 goto Succeed;
5961 else
5962 goto Fail;
5963 end if;
5965 -- RTab (integer function case)
5967 when PC_RTab_NF => declare
5968 N : constant Natural := Node.NF.all;
5970 begin
5971 Dout (Img (Node) & "matching RPos", N);
5973 if Length - Cursor >= N then
5974 Cursor := Length - N;
5975 goto Succeed;
5976 else
5977 goto Fail;
5978 end if;
5979 end;
5981 -- RTab (integer pointer case)
5983 when PC_RTab_NP =>
5984 Dout (Img (Node) & "matching RPos", Node.NP.all);
5986 if Cursor <= (Length - Node.NP.all) then
5987 Cursor := Length - Node.NP.all;
5988 goto Succeed;
5989 else
5990 goto Fail;
5991 end if;
5993 -- Cursor assignment
5995 when PC_Setcur =>
5996 Dout (Img (Node) & "matching Setcur");
5997 Node.Var.all := Cursor;
5998 goto Succeed;
6000 -- Span (one character case)
6002 when PC_Span_CH => declare
6003 P : Natural := Cursor;
6005 begin
6006 Dout (Img (Node) & "matching Span", Node.Char);
6008 while P < Length
6009 and then Subject (P + 1) = Node.Char
6010 loop
6011 P := P + 1;
6012 end loop;
6014 if P /= Cursor then
6015 Cursor := P;
6016 goto Succeed;
6017 else
6018 goto Fail;
6019 end if;
6020 end;
6022 -- Span (character set case)
6024 when PC_Span_CS => declare
6025 P : Natural := Cursor;
6027 begin
6028 Dout (Img (Node) & "matching Span", Node.CS);
6030 while P < Length
6031 and then Is_In (Subject (P + 1), Node.CS)
6032 loop
6033 P := P + 1;
6034 end loop;
6036 if P /= Cursor then
6037 Cursor := P;
6038 goto Succeed;
6039 else
6040 goto Fail;
6041 end if;
6042 end;
6044 -- Span (string function case)
6046 when PC_Span_VF => declare
6047 U : constant VString := Node.VF.all;
6048 Str : constant String_Access := Get_String (U);
6049 P : Natural := Cursor;
6051 begin
6052 Dout (Img (Node) & "matching Span", Str.all);
6054 while P < Length
6055 and then Is_In (Subject (P + 1), Str.all)
6056 loop
6057 P := P + 1;
6058 end loop;
6060 if P /= Cursor then
6061 Cursor := P;
6062 goto Succeed;
6063 else
6064 goto Fail;
6065 end if;
6066 end;
6068 -- Span (string pointer case)
6070 when PC_Span_VP => declare
6071 Str : constant String_Access := Get_String (Node.VP.all);
6072 P : Natural := Cursor;
6074 begin
6075 Dout (Img (Node) & "matching Span", Str.all);
6077 while P < Length
6078 and then Is_In (Subject (P + 1), Str.all)
6079 loop
6080 P := P + 1;
6081 end loop;
6083 if P /= Cursor then
6084 Cursor := P;
6085 goto Succeed;
6086 else
6087 goto Fail;
6088 end if;
6089 end;
6091 -- String (two character case)
6093 when PC_String_2 =>
6094 Dout (Img (Node) & "matching " & Image (Node.Str2));
6096 if (Length - Cursor) >= 2
6097 and then Subject (Cursor + 1 .. Cursor + 2) = Node.Str2
6098 then
6099 Cursor := Cursor + 2;
6100 goto Succeed;
6101 else
6102 goto Fail;
6103 end if;
6105 -- String (three character case)
6107 when PC_String_3 =>
6108 Dout (Img (Node) & "matching " & Image (Node.Str3));
6110 if (Length - Cursor) >= 3
6111 and then Subject (Cursor + 1 .. Cursor + 3) = Node.Str3
6112 then
6113 Cursor := Cursor + 3;
6114 goto Succeed;
6115 else
6116 goto Fail;
6117 end if;
6119 -- String (four character case)
6121 when PC_String_4 =>
6122 Dout (Img (Node) & "matching " & Image (Node.Str4));
6124 if (Length - Cursor) >= 4
6125 and then Subject (Cursor + 1 .. Cursor + 4) = Node.Str4
6126 then
6127 Cursor := Cursor + 4;
6128 goto Succeed;
6129 else
6130 goto Fail;
6131 end if;
6133 -- String (five character case)
6135 when PC_String_5 =>
6136 Dout (Img (Node) & "matching " & Image (Node.Str5));
6138 if (Length - Cursor) >= 5
6139 and then Subject (Cursor + 1 .. Cursor + 5) = Node.Str5
6140 then
6141 Cursor := Cursor + 5;
6142 goto Succeed;
6143 else
6144 goto Fail;
6145 end if;
6147 -- String (six character case)
6149 when PC_String_6 =>
6150 Dout (Img (Node) & "matching " & Image (Node.Str6));
6152 if (Length - Cursor) >= 6
6153 and then Subject (Cursor + 1 .. Cursor + 6) = Node.Str6
6154 then
6155 Cursor := Cursor + 6;
6156 goto Succeed;
6157 else
6158 goto Fail;
6159 end if;
6161 -- String (case of more than six characters)
6163 when PC_String => declare
6164 Len : constant Natural := Node.Str'Length;
6166 begin
6167 Dout (Img (Node) & "matching " & Image (Node.Str.all));
6169 if (Length - Cursor) >= Len
6170 and then Node.Str.all = Subject (Cursor + 1 .. Cursor + Len)
6171 then
6172 Cursor := Cursor + Len;
6173 goto Succeed;
6174 else
6175 goto Fail;
6176 end if;
6177 end;
6179 -- String (function case)
6181 when PC_String_VF => declare
6182 U : constant VString := Node.VF.all;
6183 Str : constant String_Access := Get_String (U);
6184 Len : constant Natural := Str'Length;
6186 begin
6187 Dout (Img (Node) & "matching " & Image (Str.all));
6189 if (Length - Cursor) >= Len
6190 and then Str.all = Subject (Cursor + 1 .. Cursor + Len)
6191 then
6192 Cursor := Cursor + Len;
6193 goto Succeed;
6194 else
6195 goto Fail;
6196 end if;
6197 end;
6199 -- String (vstring pointer case)
6201 when PC_String_VP => declare
6202 S : constant String_Access := Get_String (Node.VP.all);
6203 Len : constant Natural :=
6204 Ada.Strings.Unbounded.Length (Node.VP.all);
6206 begin
6207 Dout
6208 (Img (Node) & "matching " & Image (S.all));
6210 if (Length - Cursor) >= Len
6211 and then S.all = Subject (Cursor + 1 .. Cursor + Len)
6212 then
6213 Cursor := Cursor + Len;
6214 goto Succeed;
6215 else
6216 goto Fail;
6217 end if;
6218 end;
6220 -- Succeed
6222 when PC_Succeed =>
6223 Dout (Img (Node) & "matching Succeed");
6224 Push (Node);
6225 goto Succeed;
6227 -- Tab (integer case)
6229 when PC_Tab_Nat =>
6230 Dout (Img (Node) & "matching Tab", Node.Nat);
6232 if Cursor <= Node.Nat then
6233 Cursor := Node.Nat;
6234 goto Succeed;
6235 else
6236 goto Fail;
6237 end if;
6239 -- Tab (integer function case)
6241 when PC_Tab_NF => declare
6242 N : constant Natural := Node.NF.all;
6244 begin
6245 Dout (Img (Node) & "matching Tab ", N);
6247 if Cursor <= N then
6248 Cursor := N;
6249 goto Succeed;
6250 else
6251 goto Fail;
6252 end if;
6253 end;
6255 -- Tab (integer pointer case)
6257 when PC_Tab_NP =>
6258 Dout (Img (Node) & "matching Tab ", Node.NP.all);
6260 if Cursor <= Node.NP.all then
6261 Cursor := Node.NP.all;
6262 goto Succeed;
6263 else
6264 goto Fail;
6265 end if;
6267 -- Unanchored movement
6269 when PC_Unanchored =>
6270 Dout ("attempting to move anchor point");
6272 -- All done if we tried every position
6274 if Cursor > Length then
6275 goto Match_Fail;
6277 -- Otherwise extend the anchor point, and restack ourself
6279 else
6280 Cursor := Cursor + 1;
6281 Push (Node);
6282 goto Succeed;
6283 end if;
6285 -- Write immediate. This node performs the actual write
6287 when PC_Write_Imm =>
6288 Dout (Img (Node) & "executing immediate write of " &
6289 Subject (Stack (Stack_Base - 1).Cursor + 1 .. Cursor));
6291 Put_Line
6292 (Node.FP.all,
6293 Subject (Stack (Stack_Base - 1).Cursor + 1 .. Cursor));
6294 Pop_Region;
6295 goto Succeed;
6297 -- Write on match. This node sets up for the eventual write
6299 when PC_Write_OnM =>
6300 Dout (Img (Node) & "registering deferred write");
6301 Stack (Stack_Base - 1).Node := Node;
6302 Push (CP_Assign'Access);
6303 Pop_Region;
6304 Assign_OnM := True;
6305 goto Succeed;
6307 end case;
6309 -- We are NOT allowed to fall though this case statement, since every
6310 -- match routine must end by executing a goto to the appropriate point
6311 -- in the finite state machine model.
6313 pragma Warnings (Off);
6314 Logic_Error;
6315 pragma Warnings (On);
6316 end XMatchD;
6318 end GNAT.Spitbol.Patterns;