Merge from mainline
[official-gcc.git] / gcc / ada / g-spipat.adb
blob8a5375ddd6fd0b7fedfba0383b7ec937e01abdcf
1 ------------------------------------------------------------------------------
2 -- --
3 -- GNAT LIBRARY COMPONENTS --
4 -- --
5 -- G N A T . S P I T B O L . P A T T E R N S --
6 -- --
7 -- B o d y --
8 -- --
9 -- Copyright (C) 1998-2006, AdaCore --
10 -- --
11 -- GNAT is free software; you can redistribute it and/or modify it under --
12 -- terms of the GNU General Public License as published by the Free Soft- --
13 -- ware Foundation; either version 2, or (at your option) any later ver- --
14 -- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
15 -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
16 -- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License --
17 -- for more details. You should have received a copy of the GNU General --
18 -- Public License distributed with GNAT; see file COPYING. If not, write --
19 -- to the Free Software Foundation, 51 Franklin Street, Fifth Floor, --
20 -- Boston, MA 02110-1301, USA. --
21 -- --
22 -- As a special exception, if other files instantiate generics from this --
23 -- unit, or you link this unit with other files to produce an executable, --
24 -- this unit does not by itself cause the resulting executable to be --
25 -- covered by the GNU General Public License. This exception does not --
26 -- however invalidate any other reasons why the executable file might be --
27 -- covered by the GNU Public License. --
28 -- --
29 -- GNAT was originally developed by the GNAT team at New York University. --
30 -- Extensive contributions were provided by Ada Core Technologies Inc. --
31 -- --
32 ------------------------------------------------------------------------------
34 -- Note: the data structures and general approach used in this implementation
35 -- are derived from the original MINIMAL sources for SPITBOL. The code is not
36 -- a direct translation, but the approach is followed closely. In particular,
37 -- we use the one stack approach developed in the SPITBOL implementation.
39 with Ada.Exceptions; use Ada.Exceptions;
40 with Ada.Strings.Unbounded.Aux; use Ada.Strings.Unbounded.Aux;
42 with GNAT.Debug_Utilities; use GNAT.Debug_Utilities;
44 with System; use System;
46 with Unchecked_Conversion;
47 with Unchecked_Deallocation;
49 package body GNAT.Spitbol.Patterns is
51 ------------------------
52 -- Internal Debugging --
53 ------------------------
55 Internal_Debug : constant Boolean := False;
56 -- Set this flag to True to activate some built-in debugging traceback
57 -- These are all lines output with PutD and Put_LineD.
59 procedure New_LineD;
60 pragma Inline (New_LineD);
61 -- Output new blank line with New_Line if Internal_Debug is True
63 procedure PutD (Str : String);
64 pragma Inline (PutD);
65 -- Output string with Put if Internal_Debug is True
67 procedure Put_LineD (Str : String);
68 pragma Inline (Put_LineD);
69 -- Output string with Put_Line if Internal_Debug is True
71 -----------------------------
72 -- Local Type Declarations --
73 -----------------------------
75 subtype String_Ptr is Ada.Strings.Unbounded.String_Access;
76 subtype File_Ptr is Ada.Text_IO.File_Access;
78 function To_Address is new Unchecked_Conversion (PE_Ptr, Address);
79 -- Used only for debugging output purposes
81 subtype AFC is Ada.Finalization.Controlled;
83 N : constant PE_Ptr := null;
84 -- Shorthand used to initialize Copy fields to null
86 type Natural_Ptr is access all Natural;
87 type Pattern_Ptr is access all Pattern;
89 --------------------------------------------------
90 -- Description of Algorithm and Data Structures --
91 --------------------------------------------------
93 -- A pattern structure is represented as a linked graph of nodes
94 -- with the following structure:
96 -- +------------------------------------+
97 -- I Pcode I
98 -- +------------------------------------+
99 -- I Index I
100 -- +------------------------------------+
101 -- I Pthen I
102 -- +------------------------------------+
103 -- I parameter(s) I
104 -- +------------------------------------+
106 -- Pcode is a code value indicating the type of the patterm node. This
107 -- code is used both as the discriminant value for the record, and as
108 -- the case index in the main match routine that branches to the proper
109 -- match code for the given element.
111 -- Index is a serial index number. The use of these serial index
112 -- numbers is described in a separate section.
114 -- Pthen is a pointer to the successor node, i.e the node to be matched
115 -- if the attempt to match the node succeeds. If this is the last node
116 -- of the pattern to be matched, then Pthen points to a dummy node
117 -- of kind PC_EOP (end of pattern), which initiales pattern exit.
119 -- The parameter or parameters are present for certain node types,
120 -- and the type varies with the pattern code.
122 type Pattern_Code is (
123 PC_Arb_Y,
124 PC_Assign,
125 PC_Bal,
126 PC_BreakX_X,
127 PC_Cancel,
128 PC_EOP,
129 PC_Fail,
130 PC_Fence,
131 PC_Fence_X,
132 PC_Fence_Y,
133 PC_R_Enter,
134 PC_R_Remove,
135 PC_R_Restore,
136 PC_Rest,
137 PC_Succeed,
138 PC_Unanchored,
140 PC_Alt,
141 PC_Arb_X,
142 PC_Arbno_S,
143 PC_Arbno_X,
145 PC_Rpat,
147 PC_Pred_Func,
149 PC_Assign_Imm,
150 PC_Assign_OnM,
151 PC_Any_VP,
152 PC_Break_VP,
153 PC_BreakX_VP,
154 PC_NotAny_VP,
155 PC_NSpan_VP,
156 PC_Span_VP,
157 PC_String_VP,
159 PC_Write_Imm,
160 PC_Write_OnM,
162 PC_Null,
163 PC_String,
165 PC_String_2,
166 PC_String_3,
167 PC_String_4,
168 PC_String_5,
169 PC_String_6,
171 PC_Setcur,
173 PC_Any_CH,
174 PC_Break_CH,
175 PC_BreakX_CH,
176 PC_Char,
177 PC_NotAny_CH,
178 PC_NSpan_CH,
179 PC_Span_CH,
181 PC_Any_CS,
182 PC_Break_CS,
183 PC_BreakX_CS,
184 PC_NotAny_CS,
185 PC_NSpan_CS,
186 PC_Span_CS,
188 PC_Arbno_Y,
189 PC_Len_Nat,
190 PC_Pos_Nat,
191 PC_RPos_Nat,
192 PC_RTab_Nat,
193 PC_Tab_Nat,
195 PC_Pos_NF,
196 PC_Len_NF,
197 PC_RPos_NF,
198 PC_RTab_NF,
199 PC_Tab_NF,
201 PC_Pos_NP,
202 PC_Len_NP,
203 PC_RPos_NP,
204 PC_RTab_NP,
205 PC_Tab_NP,
207 PC_Any_VF,
208 PC_Break_VF,
209 PC_BreakX_VF,
210 PC_NotAny_VF,
211 PC_NSpan_VF,
212 PC_Span_VF,
213 PC_String_VF);
215 type IndexT is range 0 .. +(2 **15 - 1);
217 type PE (Pcode : Pattern_Code) is record
219 Index : IndexT;
220 -- Serial index number of pattern element within pattern
222 Pthen : PE_Ptr;
223 -- Successor element, to be matched after this one
225 case Pcode is
227 when PC_Arb_Y |
228 PC_Assign |
229 PC_Bal |
230 PC_BreakX_X |
231 PC_Cancel |
232 PC_EOP |
233 PC_Fail |
234 PC_Fence |
235 PC_Fence_X |
236 PC_Fence_Y |
237 PC_Null |
238 PC_R_Enter |
239 PC_R_Remove |
240 PC_R_Restore |
241 PC_Rest |
242 PC_Succeed |
243 PC_Unanchored => null;
245 when PC_Alt |
246 PC_Arb_X |
247 PC_Arbno_S |
248 PC_Arbno_X => Alt : PE_Ptr;
250 when PC_Rpat => PP : Pattern_Ptr;
252 when PC_Pred_Func => BF : Boolean_Func;
254 when PC_Assign_Imm |
255 PC_Assign_OnM |
256 PC_Any_VP |
257 PC_Break_VP |
258 PC_BreakX_VP |
259 PC_NotAny_VP |
260 PC_NSpan_VP |
261 PC_Span_VP |
262 PC_String_VP => VP : VString_Ptr;
264 when PC_Write_Imm |
265 PC_Write_OnM => FP : File_Ptr;
267 when PC_String => Str : String_Ptr;
269 when PC_String_2 => Str2 : String (1 .. 2);
271 when PC_String_3 => Str3 : String (1 .. 3);
273 when PC_String_4 => Str4 : String (1 .. 4);
275 when PC_String_5 => Str5 : String (1 .. 5);
277 when PC_String_6 => Str6 : String (1 .. 6);
279 when PC_Setcur => Var : Natural_Ptr;
281 when PC_Any_CH |
282 PC_Break_CH |
283 PC_BreakX_CH |
284 PC_Char |
285 PC_NotAny_CH |
286 PC_NSpan_CH |
287 PC_Span_CH => Char : Character;
289 when PC_Any_CS |
290 PC_Break_CS |
291 PC_BreakX_CS |
292 PC_NotAny_CS |
293 PC_NSpan_CS |
294 PC_Span_CS => CS : Character_Set;
296 when PC_Arbno_Y |
297 PC_Len_Nat |
298 PC_Pos_Nat |
299 PC_RPos_Nat |
300 PC_RTab_Nat |
301 PC_Tab_Nat => Nat : Natural;
303 when PC_Pos_NF |
304 PC_Len_NF |
305 PC_RPos_NF |
306 PC_RTab_NF |
307 PC_Tab_NF => NF : Natural_Func;
309 when PC_Pos_NP |
310 PC_Len_NP |
311 PC_RPos_NP |
312 PC_RTab_NP |
313 PC_Tab_NP => NP : Natural_Ptr;
315 when PC_Any_VF |
316 PC_Break_VF |
317 PC_BreakX_VF |
318 PC_NotAny_VF |
319 PC_NSpan_VF |
320 PC_Span_VF |
321 PC_String_VF => VF : VString_Func;
323 end case;
324 end record;
326 subtype PC_Has_Alt is Pattern_Code range PC_Alt .. PC_Arbno_X;
327 -- Range of pattern codes that has an Alt field. This is used in the
328 -- recursive traversals, since these links must be followed.
330 EOP_Element : aliased constant PE := (PC_EOP, 0, N);
331 -- This is the end of pattern element, and is thus the representation of
332 -- a null pattern. It has a zero index element since it is never placed
333 -- inside a pattern. Furthermore it does not need a successor, since it
334 -- marks the end of the pattern, so that no more successors are needed.
336 EOP : constant PE_Ptr := EOP_Element'Unrestricted_Access;
337 -- This is the end of pattern pointer, that is used in the Pthen pointer
338 -- of other nodes to signal end of pattern.
340 -- The following array is used to determine if a pattern used as an
341 -- argument for Arbno is eligible for treatment using the simple Arbno
342 -- structure (i.e. it is a pattern that is guaranteed to match at least
343 -- one character on success, and not to make any entries on the stack.
345 OK_For_Simple_Arbno : constant array (Pattern_Code) of Boolean :=
346 (PC_Any_CS |
347 PC_Any_CH |
348 PC_Any_VF |
349 PC_Any_VP |
350 PC_Char |
351 PC_Len_Nat |
352 PC_NotAny_CS |
353 PC_NotAny_CH |
354 PC_NotAny_VF |
355 PC_NotAny_VP |
356 PC_Span_CS |
357 PC_Span_CH |
358 PC_Span_VF |
359 PC_Span_VP |
360 PC_String |
361 PC_String_2 |
362 PC_String_3 |
363 PC_String_4 |
364 PC_String_5 |
365 PC_String_6 => True,
366 others => False);
368 -------------------------------
369 -- The Pattern History Stack --
370 -------------------------------
372 -- The pattern history stack is used for controlling backtracking when
373 -- a match fails. The idea is to stack entries that give a cursor value
374 -- to be restored, and a node to be reestablished as the current node to
375 -- attempt an appropriate rematch operation. The processing for a pattern
376 -- element that has rematch alternatives pushes an appropriate entry or
377 -- entry on to the stack, and the proceeds. If a match fails at any point,
378 -- the top element of the stack is popped off, resetting the cursor and
379 -- the match continues by accessing the node stored with this entry.
381 type Stack_Entry is record
383 Cursor : Integer;
384 -- Saved cursor value that is restored when this entry is popped
385 -- from the stack if a match attempt fails. Occasionally, this
386 -- field is used to store a history stack pointer instead of a
387 -- cursor. Such cases are noted in the documentation and the value
388 -- stored is negative since stack pointer values are always negative.
390 Node : PE_Ptr;
391 -- This pattern element reference is reestablished as the current
392 -- Node to be matched (which will attempt an appropriate rematch).
394 end record;
396 subtype Stack_Range is Integer range -Stack_Size .. -1;
398 type Stack_Type is array (Stack_Range) of Stack_Entry;
399 -- The type used for a history stack. The actual instance of the stack
400 -- is declared as a local variable in the Match routine, to properly
401 -- handle recursive calls to Match. All stack pointer values are negative
402 -- to distinguish them from normal cursor values.
404 -- Note: the pattern matching stack is used only to handle backtracking.
405 -- If no backtracking occurs, its entries are never accessed, and never
406 -- popped off, and in particular it is normal for a successful match
407 -- to terminate with entries on the stack that are simply discarded.
409 -- Note: in subsequent diagrams of the stack, we always place element
410 -- zero (the deepest element) at the top of the page, then build the
411 -- stack down on the page with the most recent (top of stack) element
412 -- being the bottom-most entry on the page.
414 -- Stack checking is handled by labeling every pattern with the maximum
415 -- number of stack entries that are required, so a single check at the
416 -- start of matching the pattern suffices. There are two exceptions.
418 -- First, the count does not include entries for recursive pattern
419 -- references. Such recursions must therefore perform a specific
420 -- stack check with respect to the number of stack entries required
421 -- by the recursive pattern that is accessed and the amount of stack
422 -- that remains unused.
424 -- Second, the count includes only one iteration of an Arbno pattern,
425 -- so a specific check must be made on subsequent iterations that there
426 -- is still enough stack space left. The Arbno node has a field that
427 -- records the number of stack entries required by its argument for
428 -- this purpose.
430 ---------------------------------------------------
431 -- Use of Serial Index Field in Pattern Elements --
432 ---------------------------------------------------
434 -- The serial index numbers for the pattern elements are assigned as
435 -- a pattern is consructed from its constituent elements. Note that there
436 -- is never any sharing of pattern elements between patterns (copies are
437 -- always made), so the serial index numbers are unique to a particular
438 -- pattern as referenced from the P field of a value of type Pattern.
440 -- The index numbers meet three separate invariants, which are used for
441 -- various purposes as described in this section.
443 -- First, the numbers uniquely identify the pattern elements within a
444 -- pattern. If Num is the number of elements in a given pattern, then
445 -- the serial index numbers for the elements of this pattern will range
446 -- from 1 .. Num, so that each element has a separate value.
448 -- The purpose of this assignment is to provide a convenient auxiliary
449 -- data structure mechanism during operations which must traverse a
450 -- pattern (e.g. copy and finalization processing). Once constructed
451 -- patterns are strictly read only. This is necessary to allow sharing
452 -- of patterns between tasks. This means that we cannot go marking the
453 -- pattern (e.g. with a visited bit). Instead we cosntuct a separate
454 -- vector that contains the necessary information indexed by the Index
455 -- values in the pattern elements. For this purpose the only requirement
456 -- is that they be uniquely assigned.
458 -- Second, the pattern element referenced directly, i.e. the leading
459 -- pattern element, is always the maximum numbered element and therefore
460 -- indicates the total number of elements in the pattern. More precisely,
461 -- the element referenced by the P field of a pattern value, or the
462 -- element returned by any of the internal pattern construction routines
463 -- in the body (that return a value of type PE_Ptr) always is this
464 -- maximum element,
466 -- The purpose of this requirement is to allow an immediate determination
467 -- of the number of pattern elements within a pattern. This is used to
468 -- properly size the vectors used to contain auxiliary information for
469 -- traversal as described above.
471 -- Third, as compound pattern structures are constructed, the way in which
472 -- constituent parts of the pattern are constructed is stylized. This is
473 -- an automatic consequence of the way that these compounjd structures
474 -- are constructed, and basically what we are doing is simply documenting
475 -- and specifying the natural result of the pattern construction. The
476 -- section describing compound pattern structures gives details of the
477 -- numbering of each compound pattern structure.
479 -- The purpose of specifying the stylized numbering structures for the
480 -- compound patterns is to help simplify the processing in the Image
481 -- function, since it eases the task of retrieving the original recursive
482 -- structure of the pattern from the flat graph structure of elements.
483 -- This use in the Image function is the only point at which the code
484 -- makes use of the stylized structures.
486 type Ref_Array is array (IndexT range <>) of PE_Ptr;
487 -- This type is used to build an array whose N'th entry references the
488 -- element in a pattern whose Index value is N. See Build_Ref_Array.
490 procedure Build_Ref_Array (E : PE_Ptr; RA : out Ref_Array);
491 -- Given a pattern element which is the leading element of a pattern
492 -- structure, and a Ref_Array with bounds 1 .. E.Index, fills in the
493 -- Ref_Array so that its N'th entry references the element of the
494 -- referenced pattern whose Index value is N.
496 -------------------------------
497 -- Recursive Pattern Matches --
498 -------------------------------
500 -- The pattern primitive (+P) where P is a Pattern_Ptr or Pattern_Func
501 -- causes a recursive pattern match. This cannot be handled by an actual
502 -- recursive call to the outer level Match routine, since this would not
503 -- allow for possible backtracking into the region matched by the inner
504 -- pattern. Indeed this is the classical clash between recursion and
505 -- backtracking, and a simple recursive stack structure does not suffice.
507 -- This section describes how this recursion and the possible associated
508 -- backtracking is handled. We still use a single stack, but we establish
509 -- the concept of nested regions on this stack, each of which has a stack
510 -- base value pointing to the deepest stack entry of the region. The base
511 -- value for the outer level is zero.
513 -- When a recursive match is established, two special stack entries are
514 -- made. The first entry is used to save the original node that starts
515 -- the recursive match. This is saved so that the successor field of
516 -- this node is accessible at the end of the match, but it is never
517 -- popped and executed.
519 -- The second entry corresponds to a standard new region action. A
520 -- PC_R_Remove node is stacked, whose cursor field is used to store
521 -- the outer stack base, and the stack base is reset to point to
522 -- this PC_R_Remove node. Then the recursive pattern is matched and
523 -- it can make history stack entries in the normal matter, so now
524 -- the stack looks like:
526 -- (stack entries made by outer level)
528 -- (Special entry, node is (+P) successor
529 -- cursor entry is not used)
531 -- (PC_R_Remove entry, "cursor" value is (negative) <-- Stack base
532 -- saved base value for the enclosing region)
534 -- (stack entries made by inner level)
536 -- If a subsequent failure occurs and pops the PC_R_Remove node, it
537 -- removes itself and the special entry immediately underneath it,
538 -- restores the stack base value for the enclosing region, and then
539 -- again signals failure to look for alternatives that were stacked
540 -- before the recursion was initiated.
542 -- Now we need to consider what happens if the inner pattern succeeds, as
543 -- signalled by accessing the special PC_EOP pattern primitive. First we
544 -- recognize the nested case by looking at the Base value. If this Base
545 -- value is Stack'First, then the entire match has succeeded, but if the
546 -- base value is greater than Stack'First, then we have successfully
547 -- matched an inner pattern, and processing continues at the outer level.
549 -- There are two cases. The simple case is when the inner pattern has made
550 -- no stack entries, as recognized by the fact that the current stack
551 -- pointer is equal to the current base value. In this case it is fine to
552 -- remove all trace of the recursion by restoring the outer base value and
553 -- using the special entry to find the appropriate successor node.
555 -- The more complex case arises when the inner match does make stack
556 -- entries. In this case, the PC_EOP processing stacks a special entry
557 -- whose cursor value saves the saved inner base value (the one that
558 -- references the corresponding PC_R_Remove value), and whose node
559 -- pointer references a PC_R_Restore node, so the stack looks like:
561 -- (stack entries made by outer level)
563 -- (Special entry, node is (+P) successor,
564 -- cursor entry is not used)
566 -- (PC_R_Remove entry, "cursor" value is (negative)
567 -- saved base value for the enclosing region)
569 -- (stack entries made by inner level)
571 -- (PC_Region_Replace entry, "cursor" value is (negative)
572 -- stack pointer value referencing the PC_R_Remove entry).
574 -- If the entire match succeeds, then these stack entries are, as usual,
575 -- ignored and abandoned. If on the other hand a subsequent failure
576 -- causes the PC_Region_Replace entry to be popped, it restores the
577 -- inner base value from its saved "cursor" value and then fails again.
578 -- Note that it is OK that the cursor is temporarily clobbered by this
579 -- pop, since the second failure will reestablish a proper cursor value.
581 ---------------------------------
582 -- Compound Pattern Structures --
583 ---------------------------------
585 -- This section discusses the compound structures used to represent
586 -- constructed patterns. It shows the graph structures of pattern
587 -- elements that are constructed, and in the case of patterns that
588 -- provide backtracking possibilities, describes how the history
589 -- stack is used to control the backtracking. Finally, it notes the
590 -- way in which the Index numbers are assigned to the structure.
592 -- In all diagrams, solid lines (built witth minus signs or vertical
593 -- bars, represent successor pointers (Pthen fields) with > or V used
594 -- to indicate the direction of the pointer. The initial node of the
595 -- structure is in the upper left of the diagram. A dotted line is an
596 -- alternative pointer from the element above it to the element below
597 -- it. See individual sections for details on how alternatives are used.
599 -------------------
600 -- Concatenation --
601 -------------------
603 -- In the pattern structures listed in this section, a line that looks
604 -- lile ----> with nothing to the right indicates an end of pattern
605 -- (EOP) pointer that represents the end of the match.
607 -- When a pattern concatenation (L & R) occurs, the resulting structure
608 -- is obtained by finding all such EOP pointers in L, and replacing
609 -- them to point to R. This is the most important flattening that
610 -- occurs in constructing a pattern, and it means that the pattern
611 -- matching circuitry does not have to keep track of the structure
612 -- of a pattern with respect to concatenation, since the appropriate
613 -- succesor is always at hand.
615 -- Concatenation itself generates no additional possibilities for
616 -- backtracking, but the constituent patterns of the concatenated
617 -- structure will make stack entries as usual. The maximum amount
618 -- of stack required by the structure is thus simply the sum of the
619 -- maximums required by L and R.
621 -- The index numbering of a concatenation structure works by leaving
622 -- the numbering of the right hand pattern, R, unchanged and adjusting
623 -- the numbers in the left hand pattern, L up by the count of elements
624 -- in R. This ensures that the maximum numbered element is the leading
625 -- element as required (given that it was the leading element in L).
627 -----------------
628 -- Alternation --
629 -----------------
631 -- A pattern (L or R) constructs the structure:
633 -- +---+ +---+
634 -- | A |---->| L |---->
635 -- +---+ +---+
636 -- .
637 -- .
638 -- +---+
639 -- | R |---->
640 -- +---+
642 -- The A element here is a PC_Alt node, and the dotted line represents
643 -- the contents of the Alt field. When the PC_Alt element is matched,
644 -- it stacks a pointer to the leading element of R on the history stack
645 -- so that on subsequent failure, a match of R is attempted.
647 -- The A node is the higest numbered element in the pattern. The
648 -- original index numbers of R are unchanged, but the index numbers
649 -- of the L pattern are adjusted up by the count of elements in R.
651 -- Note that the difference between the index of the L leading element
652 -- the index of the R leading element (after building the alt structure)
653 -- indicates the number of nodes in L, and this is true even after the
654 -- structure is incorporated into some larger structure. For example,
655 -- if the A node has index 16, and L has index 15 and R has index
656 -- 5, then we know that L has 10 (15-5) elements in it.
658 -- Suppose that we now concatenate this structure to another pattern
659 -- with 9 elements in it. We will now have the A node with an index
660 -- of 25, L with an index of 24 and R with an index of 14. We still
661 -- know that L has 10 (24-14) elements in it, numbered 15-24, and
662 -- consequently the successor of the alternation structure has an
663 -- index with a value less than 15. This is used in Image to figure
664 -- out the original recursive structure of a pattern.
666 -- To clarify the interaction of the alternation and concatenation
667 -- structures, here is a more complex example of the structure built
668 -- for the pattern:
670 -- (V or W or X) (Y or Z)
672 -- where A,B,C,D,E are all single element patterns:
674 -- +---+ +---+ +---+ +---+
675 -- I A I---->I V I---+-->I A I---->I Y I---->
676 -- +---+ +---+ I +---+ +---+
677 -- . I .
678 -- . I .
679 -- +---+ +---+ I +---+
680 -- I A I---->I W I-->I I Z I---->
681 -- +---+ +---+ I +---+
682 -- . I
683 -- . I
684 -- +---+ I
685 -- I X I------------>+
686 -- +---+
688 -- The numbering of the nodes would be as follows:
690 -- +---+ +---+ +---+ +---+
691 -- I 8 I---->I 7 I---+-->I 3 I---->I 2 I---->
692 -- +---+ +---+ I +---+ +---+
693 -- . I .
694 -- . I .
695 -- +---+ +---+ I +---+
696 -- I 6 I---->I 5 I-->I I 1 I---->
697 -- +---+ +---+ I +---+
698 -- . I
699 -- . I
700 -- +---+ I
701 -- I 4 I------------>+
702 -- +---+
704 -- Note: The above structure actually corresponds to
706 -- (A or (B or C)) (D or E)
708 -- rather than
710 -- ((A or B) or C) (D or E)
712 -- which is the more natural interpretation, but in fact alternation
713 -- is associative, and the construction of an alternative changes the
714 -- left grouped pattern to the right grouped pattern in any case, so
715 -- that the Image function produces a more natural looking output.
717 ---------
718 -- Arb --
719 ---------
721 -- An Arb pattern builds the structure
723 -- +---+
724 -- | X |---->
725 -- +---+
726 -- .
727 -- .
728 -- +---+
729 -- | Y |---->
730 -- +---+
732 -- The X node is a PC_Arb_X node, which matches null, and stacks a
733 -- pointer to Y node, which is the PC_Arb_Y node that matches one
734 -- extra character and restacks itself.
736 -- The PC_Arb_X node is numbered 2, and the PC_Arb_Y node is 1
738 -------------------------
739 -- Arbno (simple case) --
740 -------------------------
742 -- The simple form of Arbno can be used where the pattern always
743 -- matches at least one character if it succeeds, and it is known
744 -- not to make any history stack entries. In this case, Arbno (P)
745 -- can construct the following structure:
747 -- +-------------+
748 -- | ^
749 -- V |
750 -- +---+ |
751 -- | S |----> |
752 -- +---+ |
753 -- . |
754 -- . |
755 -- +---+ |
756 -- | P |---------->+
757 -- +---+
759 -- The S (PC_Arbno_S) node matches null stacking a pointer to the
760 -- pattern P. If a subsequent failure causes P to be matched and
761 -- this match succeeds, then node A gets restacked to try another
762 -- instance if needed by a subsequent failure.
764 -- The node numbering of the constituent pattern P is not affected.
765 -- The S node has a node number of P.Index + 1.
767 --------------------------
768 -- Arbno (complex case) --
769 --------------------------
771 -- A call to Arbno (P), where P can match null (or at least is not
772 -- known to require a non-null string) and/or P requires pattern stack
773 -- entries, constructs the following structure:
775 -- +--------------------------+
776 -- | ^
777 -- V |
778 -- +---+ |
779 -- | X |----> |
780 -- +---+ |
781 -- . |
782 -- . |
783 -- +---+ +---+ +---+ |
784 -- | E |---->| P |---->| Y |--->+
785 -- +---+ +---+ +---+
787 -- The node X (PC_Arbno_X) matches null, stacking a pointer to the
788 -- E-P-X structure used to match one Arbno instance.
790 -- Here E is the PC_R_Enter node which matches null and creates two
791 -- stack entries. The first is a special entry whose node field is
792 -- not used at all, and whose cursor field has the initial cursor.
794 -- The second entry corresponds to a standard new region action. A
795 -- PC_R_Remove node is stacked, whose cursor field is used to store
796 -- the outer stack base, and the stack base is reset to point to
797 -- this PC_R_Remove node. Then the pattern P is matched, and it can
798 -- make history stack entries in the normal manner, so now the stack
799 -- looks like:
801 -- (stack entries made before assign pattern)
803 -- (Special entry, node field not used,
804 -- used only to save initial cursor)
806 -- (PC_R_Remove entry, "cursor" value is (negative) <-- Stack Base
807 -- saved base value for the enclosing region)
809 -- (stack entries made by matching P)
811 -- If the match of P fails, then the PC_R_Remove entry is popped and
812 -- it removes both itself and the special entry underneath it,
813 -- restores the outer stack base, and signals failure.
815 -- If the match of P succeeds, then node Y, the PC_Arbno_Y node, pops
816 -- the inner region. There are two possibilities. If matching P left
817 -- no stack entries, then all traces of the inner region can be removed.
818 -- If there are stack entries, then we push an PC_Region_Replace stack
819 -- entry whose "cursor" value is the inner stack base value, and then
820 -- restore the outer stack base value, so the stack looks like:
822 -- (stack entries made before assign pattern)
824 -- (Special entry, node field not used,
825 -- used only to save initial cursor)
827 -- (PC_R_Remove entry, "cursor" value is (negative)
828 -- saved base value for the enclosing region)
830 -- (stack entries made by matching P)
832 -- (PC_Region_Replace entry, "cursor" value is (negative)
833 -- stack pointer value referencing the PC_R_Remove entry).
835 -- Now that we have matched another instance of the Arbno pattern,
836 -- we need to move to the successor. There are two cases. If the
837 -- Arbno pattern matched null, then there is no point in seeking
838 -- alternatives, since we would just match a whole bunch of nulls.
839 -- In this case we look through the alternative node, and move
840 -- directly to its successor (i.e. the successor of the Arbno
841 -- pattern). If on the other hand a non-null string was matched,
842 -- we simply follow the successor to the alternative node, which
843 -- sets up for another possible match of the Arbno pattern.
845 -- As noted in the section on stack checking, the stack count (and
846 -- hence the stack check) for a pattern includes only one iteration
847 -- of the Arbno pattern. To make sure that multiple iterations do not
848 -- overflow the stack, the Arbno node saves the stack count required
849 -- by a single iteration, and the Concat function increments this to
850 -- include stack entries required by any successor. The PC_Arbno_Y
851 -- node uses this count to ensure that sufficient stack remains
852 -- before proceeding after matching each new instance.
854 -- The node numbering of the constituent pattern P is not affected.
855 -- Where N is the number of nodes in P, the Y node is numbered N + 1,
856 -- the E node is N + 2, and the X node is N + 3.
858 ----------------------
859 -- Assign Immediate --
860 ----------------------
862 -- Immediate assignment (P * V) constructs the following structure
864 -- +---+ +---+ +---+
865 -- | E |---->| P |---->| A |---->
866 -- +---+ +---+ +---+
868 -- Here E is the PC_R_Enter node which matches null and creates two
869 -- stack entries. The first is a special entry whose node field is
870 -- not used at all, and whose cursor field has the initial cursor.
872 -- The second entry corresponds to a standard new region action. A
873 -- PC_R_Remove node is stacked, whose cursor field is used to store
874 -- the outer stack base, and the stack base is reset to point to
875 -- this PC_R_Remove node. Then the pattern P is matched, and it can
876 -- make history stack entries in the normal manner, so now the stack
877 -- looks like:
879 -- (stack entries made before assign pattern)
881 -- (Special entry, node field not used,
882 -- used only to save initial cursor)
884 -- (PC_R_Remove entry, "cursor" value is (negative) <-- Stack Base
885 -- saved base value for the enclosing region)
887 -- (stack entries made by matching P)
889 -- If the match of P fails, then the PC_R_Remove entry is popped
890 -- and it removes both itself and the special entry underneath it,
891 -- restores the outer stack base, and signals failure.
893 -- If the match of P succeeds, then node A, which is the actual
894 -- PC_Assign_Imm node, executes the assignment (using the stack
895 -- base to locate the entry with the saved starting cursor value),
896 -- and the pops the inner region. There are two possibilities, if
897 -- matching P left no stack entries, then all traces of the inner
898 -- region can be removed. If there are stack entries, then we push
899 -- an PC_Region_Replace stack entry whose "cursor" value is the
900 -- inner stack base value, and then restore the outer stack base
901 -- value, so the stack looks like:
903 -- (stack entries made before assign pattern)
905 -- (Special entry, node field not used,
906 -- used only to save initial cursor)
908 -- (PC_R_Remove entry, "cursor" value is (negative)
909 -- saved base value for the enclosing region)
911 -- (stack entries made by matching P)
913 -- (PC_Region_Replace entry, "cursor" value is the (negative)
914 -- stack pointer value referencing the PC_R_Remove entry).
916 -- If a subsequent failure occurs, the PC_Region_Replace node restores
917 -- the inner stack base value and signals failure to explore rematches
918 -- of the pattern P.
920 -- The node numbering of the constituent pattern P is not affected.
921 -- Where N is the number of nodes in P, the A node is numbered N + 1,
922 -- and the E node is N + 2.
924 ---------------------
925 -- Assign On Match --
926 ---------------------
928 -- The assign on match (**) pattern is quite similar to the assign
929 -- immediate pattern, except that the actual assignment has to be
930 -- delayed. The following structure is constructed:
932 -- +---+ +---+ +---+
933 -- | E |---->| P |---->| A |---->
934 -- +---+ +---+ +---+
936 -- The operation of this pattern is identical to that described above
937 -- for deferred assignment, up to the point where P has been matched.
939 -- The A node, which is the PC_Assign_OnM node first pushes a
940 -- PC_Assign node onto the history stack. This node saves the ending
941 -- cursor and acts as a flag for the final assignment, as further
942 -- described below.
944 -- It then stores a pointer to itself in the special entry node field.
945 -- This was otherwise unused, and is now used to retrive the address
946 -- of the variable to be assigned at the end of the pattern.
948 -- After that the inner region is terminated in the usual manner,
949 -- by stacking a PC_R_Restore entry as described for the assign
950 -- immediate case. Note that the optimization of completely
951 -- removing the inner region does not happen in this case, since
952 -- we have at least one stack entry (the PC_Assign one we just made).
953 -- The stack now looks like:
955 -- (stack entries made before assign pattern)
957 -- (Special entry, node points to copy of
958 -- the PC_Assign_OnM node, and the
959 -- cursor field saves the initial cursor).
961 -- (PC_R_Remove entry, "cursor" value is (negative)
962 -- saved base value for the enclosing region)
964 -- (stack entries made by matching P)
966 -- (PC_Assign entry, saves final cursor)
968 -- (PC_Region_Replace entry, "cursor" value is (negative)
969 -- stack pointer value referencing the PC_R_Remove entry).
971 -- If a subsequent failure causes the PC_Assign node to execute it
972 -- simply removes itself and propagates the failure.
974 -- If the match succeeds, then the history stack is scanned for
975 -- PC_Assign nodes, and the assignments are executed (examination
976 -- of the above diagram will show that all the necessary data is
977 -- at hand for the assignment).
979 -- To optimize the common case where no assign-on-match operations
980 -- are present, a global flag Assign_OnM is maintained which is
981 -- initialize to False, and gets set True as part of the execution
982 -- of the PC_Assign_OnM node. The scan of the history stack for
983 -- PC_Assign entries is done only if this flag is set.
985 -- The node numbering of the constituent pattern P is not affected.
986 -- Where N is the number of nodes in P, the A node is numbered N + 1,
987 -- and the E node is N + 2.
989 ---------
990 -- Bal --
991 ---------
993 -- Bal builds a single node:
995 -- +---+
996 -- | B |---->
997 -- +---+
999 -- The node B is the PC_Bal node which matches a parentheses balanced
1000 -- string, starting at the current cursor position. It then updates
1001 -- the cursor past this matched string, and stacks a pointer to itself
1002 -- with this updated cursor value on the history stack, to extend the
1003 -- matched string on a subequent failure.
1005 -- Since this is a single node it is numbered 1 (the reason we include
1006 -- it in the compound patterns section is that it backtracks).
1008 ------------
1009 -- BreakX --
1010 ------------
1012 -- BreakX builds the structure
1014 -- +---+ +---+
1015 -- | B |---->| A |---->
1016 -- +---+ +---+
1017 -- ^ .
1018 -- | .
1019 -- | +---+
1020 -- +<------| X |
1021 -- +---+
1023 -- Here the B node is the BreakX_xx node that performs a normal Break
1024 -- function. The A node is an alternative (PC_Alt) node that matches
1025 -- null, but stacks a pointer to node X (the PC_BreakX_X node) which
1026 -- extends the match one character (to eat up the previously detected
1027 -- break character), and then rematches the break.
1029 -- The B node is numbered 3, the alternative node is 1, and the X
1030 -- node is 2.
1032 -----------
1033 -- Fence --
1034 -----------
1036 -- Fence builds a single node:
1038 -- +---+
1039 -- | F |---->
1040 -- +---+
1042 -- The element F, PC_Fence, matches null, and stacks a pointer to a
1043 -- PC_Cancel element which will abort the match on a subsequent failure.
1045 -- Since this is a single element it is numbered 1 (the reason we
1046 -- include it in the compound patterns section is that it backtracks).
1048 --------------------
1049 -- Fence Function --
1050 --------------------
1052 -- A call to the Fence function builds the structure:
1054 -- +---+ +---+ +---+
1055 -- | E |---->| P |---->| X |---->
1056 -- +---+ +---+ +---+
1058 -- Here E is the PC_R_Enter node which matches null and creates two
1059 -- stack entries. The first is a special entry which is not used at
1060 -- all in the fence case (it is present merely for uniformity with
1061 -- other cases of region enter operations).
1063 -- The second entry corresponds to a standard new region action. A
1064 -- PC_R_Remove node is stacked, whose cursor field is used to store
1065 -- the outer stack base, and the stack base is reset to point to
1066 -- this PC_R_Remove node. Then the pattern P is matched, and it can
1067 -- make history stack entries in the normal manner, so now the stack
1068 -- looks like:
1070 -- (stack entries made before fence pattern)
1072 -- (Special entry, not used at all)
1074 -- (PC_R_Remove entry, "cursor" value is (negative) <-- Stack Base
1075 -- saved base value for the enclosing region)
1077 -- (stack entries made by matching P)
1079 -- If the match of P fails, then the PC_R_Remove entry is popped
1080 -- and it removes both itself and the special entry underneath it,
1081 -- restores the outer stack base, and signals failure.
1083 -- If the match of P succeeds, then node X, the PC_Fence_X node, gets
1084 -- control. One might be tempted to think that at this point, the
1085 -- history stack entries made by matching P can just be removed since
1086 -- they certainly are not going to be used for rematching (that is
1087 -- whole point of Fence after all!) However, this is wrong, because
1088 -- it would result in the loss of possible assign-on-match entries
1089 -- for deferred pattern assignments.
1091 -- Instead what we do is to make a special entry whose node references
1092 -- PC_Fence_Y, and whose cursor saves the inner stack base value, i.e.
1093 -- the pointer to the PC_R_Remove entry. Then the outer stack base
1094 -- pointer is restored, so the stack looks like:
1096 -- (stack entries made before assign pattern)
1098 -- (Special entry, not used at all)
1100 -- (PC_R_Remove entry, "cursor" value is (negative)
1101 -- saved base value for the enclosing region)
1103 -- (stack entries made by matching P)
1105 -- (PC_Fence_Y entry, "cursor" value is (negative) stack
1106 -- pointer value referencing the PC_R_Remove entry).
1108 -- If a subsequent failure occurs, then the PC_Fence_Y entry removes
1109 -- the entire inner region, including all entries made by matching P,
1110 -- and alternatives prior to the Fence pattern are sought.
1112 -- The node numbering of the constituent pattern P is not affected.
1113 -- Where N is the number of nodes in P, the X node is numbered N + 1,
1114 -- and the E node is N + 2.
1116 -------------
1117 -- Succeed --
1118 -------------
1120 -- Succeed builds a single node:
1122 -- +---+
1123 -- | S |---->
1124 -- +---+
1126 -- The node S is the PC_Succeed node which matches null, and stacks
1127 -- a pointer to itself on the history stack, so that a subsequent
1128 -- failure repeats the same match.
1130 -- Since this is a single node it is numbered 1 (the reason we include
1131 -- it in the compound patterns section is that it backtracks).
1133 ---------------------
1134 -- Write Immediate --
1135 ---------------------
1137 -- The structure built for a write immediate operation (P * F, where
1138 -- F is a file access value) is:
1140 -- +---+ +---+ +---+
1141 -- | E |---->| P |---->| W |---->
1142 -- +---+ +---+ +---+
1144 -- Here E is the PC_R_Enter node and W is the PC_Write_Imm node. The
1145 -- handling is identical to that described above for Assign Immediate,
1146 -- except that at the point where a successful match occurs, the matched
1147 -- substring is written to the referenced file.
1149 -- The node numbering of the constituent pattern P is not affected.
1150 -- Where N is the number of nodes in P, the W node is numbered N + 1,
1151 -- and the E node is N + 2.
1153 --------------------
1154 -- Write On Match --
1155 --------------------
1157 -- The structure built for a write on match operation (P ** F, where
1158 -- F is a file access value) is:
1160 -- +---+ +---+ +---+
1161 -- | E |---->| P |---->| W |---->
1162 -- +---+ +---+ +---+
1164 -- Here E is the PC_R_Enter node and W is the PC_Write_OnM node. The
1165 -- handling is identical to that described above for Assign On Match,
1166 -- except that at the point where a successful match has completed,
1167 -- the matched substring is written to the referenced file.
1169 -- The node numbering of the constituent pattern P is not affected.
1170 -- Where N is the number of nodes in P, the W node is numbered N + 1,
1171 -- and the E node is N + 2.
1172 -----------------------
1173 -- Constant Patterns --
1174 -----------------------
1176 -- The following pattern elements are referenced only from the pattern
1177 -- history stack. In each case the processing for the pattern element
1178 -- results in pattern match abort, or futher failure, so there is no
1179 -- need for a successor and no need for a node number
1181 CP_Assign : aliased PE := (PC_Assign, 0, N);
1182 CP_Cancel : aliased PE := (PC_Cancel, 0, N);
1183 CP_Fence_Y : aliased PE := (PC_Fence_Y, 0, N);
1184 CP_R_Remove : aliased PE := (PC_R_Remove, 0, N);
1185 CP_R_Restore : aliased PE := (PC_R_Restore, 0, N);
1187 -----------------------
1188 -- Local Subprograms --
1189 -----------------------
1191 function Alternate (L, R : PE_Ptr) return PE_Ptr;
1192 function "or" (L, R : PE_Ptr) return PE_Ptr renames Alternate;
1193 -- Build pattern structure corresponding to the alternation of L, R.
1194 -- (i.e. try to match L, and if that fails, try to match R).
1196 function Arbno_Simple (P : PE_Ptr) return PE_Ptr;
1197 -- Build simple Arbno pattern, P is a pattern that is guaranteed to
1198 -- match at least one character if it succeeds and to require no
1199 -- stack entries under all circumstances. The result returned is
1200 -- a simple Arbno structure as previously described.
1202 function Bracket (E, P, A : PE_Ptr) return PE_Ptr;
1203 -- Given two single node pattern elements E and A, and a (possible
1204 -- complex) pattern P, construct the concatenation E-->P-->A and
1205 -- return a pointer to E. The concatenation does not affect the
1206 -- node numbering in P. A has a number one higher than the maximum
1207 -- number in P, and E has a number two higher than the maximum
1208 -- number in P (see for example the Assign_Immediate structure to
1209 -- understand a typical use of this function).
1211 function BreakX_Make (B : PE_Ptr) return Pattern;
1212 -- Given a pattern element for a Break patternx, returns the
1213 -- corresponding BreakX compound pattern structure.
1215 function Concat (L, R : PE_Ptr; Incr : Natural) return PE_Ptr;
1216 -- Creates a pattern eelement that represents a concatenation of the
1217 -- two given pattern elements (i.e. the pattern L followed by R).
1218 -- The result returned is always the same as L, but the pattern
1219 -- referenced by L is modified to have R as a successor. This
1220 -- procedure does not copy L or R, so if a copy is required, it
1221 -- is the responsibility of the caller. The Incr parameter is an
1222 -- amount to be added to the Nat field of any P_Arbno_Y node that is
1223 -- in the left operand, it represents the additional stack space
1224 -- required by the right operand.
1226 function C_To_PE (C : PChar) return PE_Ptr;
1227 -- Given a character, constructs a pattern element that matches
1228 -- the single character.
1230 function Copy (P : PE_Ptr) return PE_Ptr;
1231 -- Creates a copy of the pattern element referenced by the given
1232 -- pattern element reference. This is a deep copy, which means that
1233 -- it follows the Next and Alt pointers.
1235 function Image (P : PE_Ptr) return String;
1236 -- Returns the image of the address of the referenced pattern element.
1237 -- This is equivalent to Image (To_Address (P));
1239 function Is_In (C : Character; Str : String) return Boolean;
1240 pragma Inline (Is_In);
1241 -- Determines if the character C is in string Str
1243 procedure Logic_Error;
1244 -- Called to raise Program_Error with an appropriate message if an
1245 -- internal logic error is detected.
1247 function Str_BF (A : Boolean_Func) return String;
1248 function Str_FP (A : File_Ptr) return String;
1249 function Str_NF (A : Natural_Func) return String;
1250 function Str_NP (A : Natural_Ptr) return String;
1251 function Str_PP (A : Pattern_Ptr) return String;
1252 function Str_VF (A : VString_Func) return String;
1253 function Str_VP (A : VString_Ptr) return String;
1254 -- These are debugging routines, which return a representation of the
1255 -- given access value (they are called only by Image and Dump)
1257 procedure Set_Successor (Pat : PE_Ptr; Succ : PE_Ptr);
1258 -- Adjusts all EOP pointers in Pat to point to Succ. No other changes
1259 -- are made. In particular, Succ is unchanged, and no index numbers
1260 -- are modified. Note that Pat may not be equal to EOP on entry.
1262 function S_To_PE (Str : PString) return PE_Ptr;
1263 -- Given a string, constructs a pattern element that matches the string
1265 procedure Uninitialized_Pattern;
1266 pragma No_Return (Uninitialized_Pattern);
1267 -- Called to raise Program_Error with an appropriate error message if
1268 -- an uninitialized pattern is used in any pattern construction or
1269 -- pattern matching operation.
1271 procedure XMatch
1272 (Subject : String;
1273 Pat_P : PE_Ptr;
1274 Pat_S : Natural;
1275 Start : out Natural;
1276 Stop : out Natural);
1277 -- This is the common pattern match routine. It is passed a string and
1278 -- a pattern, and it indicates success or failure, and on success the
1279 -- section of the string matched. It does not perform any assignments
1280 -- to the subject string, so pattern replacement is for the caller.
1282 -- Subject The subject string. The lower bound is always one. In the
1283 -- Match procedures, it is fine to use strings whose lower bound
1284 -- is not one, but we perform a one time conversion before the
1285 -- call to XMatch, so that XMatch does not have to be bothered
1286 -- with strange lower bounds.
1288 -- Pat_P Points to initial pattern element of pattern to be matched
1290 -- Pat_S Maximum required stack entries for pattern to be matched
1292 -- Start If match is successful, starting index of matched section.
1293 -- This value is always non-zero. A value of zero is used to
1294 -- indicate a failed match.
1296 -- Stop If match is successful, ending index of matched section.
1297 -- This can be zero if we match the null string at the start,
1298 -- in which case Start is set to zero, and Stop to one. If the
1299 -- Match fails, then the contents of Stop is undefined.
1301 procedure XMatchD
1302 (Subject : String;
1303 Pat_P : PE_Ptr;
1304 Pat_S : Natural;
1305 Start : out Natural;
1306 Stop : out Natural);
1307 -- Identical in all respects to XMatch, except that trace information is
1308 -- output on Standard_Ouput during execution of the match. This is the
1309 -- version that is called if the original Match call has Debug => True.
1311 ---------
1312 -- "&" --
1313 ---------
1315 function "&" (L : PString; R : Pattern) return Pattern is
1316 begin
1317 return (AFC with R.Stk, Concat (S_To_PE (L), Copy (R.P), R.Stk));
1318 end "&";
1320 function "&" (L : Pattern; R : PString) return Pattern is
1321 begin
1322 return (AFC with L.Stk, Concat (Copy (L.P), S_To_PE (R), 0));
1323 end "&";
1325 function "&" (L : PChar; R : Pattern) return Pattern is
1326 begin
1327 return (AFC with R.Stk, Concat (C_To_PE (L), Copy (R.P), R.Stk));
1328 end "&";
1330 function "&" (L : Pattern; R : PChar) return Pattern is
1331 begin
1332 return (AFC with L.Stk, Concat (Copy (L.P), C_To_PE (R), 0));
1333 end "&";
1335 function "&" (L : Pattern; R : Pattern) return Pattern is
1336 begin
1337 return (AFC with L.Stk + R.Stk, Concat (Copy (L.P), Copy (R.P), R.Stk));
1338 end "&";
1340 ---------
1341 -- "*" --
1342 ---------
1344 -- Assign immediate
1346 -- +---+ +---+ +---+
1347 -- | E |---->| P |---->| A |---->
1348 -- +---+ +---+ +---+
1350 -- The node numbering of the constituent pattern P is not affected.
1351 -- Where N is the number of nodes in P, the A node is numbered N + 1,
1352 -- and the E node is N + 2.
1354 function "*" (P : Pattern; Var : VString_Var) return Pattern is
1355 Pat : constant PE_Ptr := Copy (P.P);
1356 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1357 A : constant PE_Ptr :=
1358 new PE'(PC_Assign_Imm, 0, EOP, Var'Unrestricted_Access);
1360 begin
1361 return (AFC with P.Stk + 3, Bracket (E, Pat, A));
1362 end "*";
1364 function "*" (P : PString; Var : VString_Var) return Pattern is
1365 Pat : constant PE_Ptr := S_To_PE (P);
1366 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1367 A : constant PE_Ptr :=
1368 new PE'(PC_Assign_Imm, 0, EOP, Var'Unrestricted_Access);
1370 begin
1371 return (AFC with 3, Bracket (E, Pat, A));
1372 end "*";
1374 function "*" (P : PChar; Var : VString_Var) return Pattern is
1375 Pat : constant PE_Ptr := C_To_PE (P);
1376 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1377 A : constant PE_Ptr :=
1378 new PE'(PC_Assign_Imm, 0, EOP, Var'Unrestricted_Access);
1380 begin
1381 return (AFC with 3, Bracket (E, Pat, A));
1382 end "*";
1384 -- Write immediate
1386 -- +---+ +---+ +---+
1387 -- | E |---->| P |---->| W |---->
1388 -- +---+ +---+ +---+
1390 -- The node numbering of the constituent pattern P is not affected.
1391 -- Where N is the number of nodes in P, the W node is numbered N + 1,
1392 -- and the E node is N + 2.
1394 function "*" (P : Pattern; Fil : File_Access) return Pattern is
1395 Pat : constant PE_Ptr := Copy (P.P);
1396 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1397 W : constant PE_Ptr := new PE'(PC_Write_Imm, 0, EOP, Fil);
1399 begin
1400 return (AFC with 3, Bracket (E, Pat, W));
1401 end "*";
1403 function "*" (P : PString; Fil : File_Access) return Pattern is
1404 Pat : constant PE_Ptr := S_To_PE (P);
1405 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1406 W : constant PE_Ptr := new PE'(PC_Write_Imm, 0, EOP, Fil);
1408 begin
1409 return (AFC with 3, Bracket (E, Pat, W));
1410 end "*";
1412 function "*" (P : PChar; Fil : File_Access) return Pattern is
1413 Pat : constant PE_Ptr := C_To_PE (P);
1414 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1415 W : constant PE_Ptr := new PE'(PC_Write_Imm, 0, EOP, Fil);
1417 begin
1418 return (AFC with 3, Bracket (E, Pat, W));
1419 end "*";
1421 ----------
1422 -- "**" --
1423 ----------
1425 -- Assign on match
1427 -- +---+ +---+ +---+
1428 -- | E |---->| P |---->| A |---->
1429 -- +---+ +---+ +---+
1431 -- The node numbering of the constituent pattern P is not affected.
1432 -- Where N is the number of nodes in P, the A node is numbered N + 1,
1433 -- and the E node is N + 2.
1435 function "**" (P : Pattern; Var : VString_Var) return Pattern is
1436 Pat : constant PE_Ptr := Copy (P.P);
1437 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1438 A : constant PE_Ptr :=
1439 new PE'(PC_Assign_OnM, 0, EOP, Var'Unrestricted_Access);
1441 begin
1442 return (AFC with P.Stk + 3, Bracket (E, Pat, A));
1443 end "**";
1445 function "**" (P : PString; Var : VString_Var) return Pattern is
1446 Pat : constant PE_Ptr := S_To_PE (P);
1447 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1448 A : constant PE_Ptr :=
1449 new PE'(PC_Assign_OnM, 0, EOP, Var'Unrestricted_Access);
1451 begin
1452 return (AFC with 3, Bracket (E, Pat, A));
1453 end "**";
1455 function "**" (P : PChar; Var : VString_Var) return Pattern is
1456 Pat : constant PE_Ptr := C_To_PE (P);
1457 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1458 A : constant PE_Ptr :=
1459 new PE'(PC_Assign_OnM, 0, EOP, Var'Unrestricted_Access);
1461 begin
1462 return (AFC with 3, Bracket (E, Pat, A));
1463 end "**";
1465 -- Write on match
1467 -- +---+ +---+ +---+
1468 -- | E |---->| P |---->| W |---->
1469 -- +---+ +---+ +---+
1471 -- The node numbering of the constituent pattern P is not affected.
1472 -- Where N is the number of nodes in P, the W node is numbered N + 1,
1473 -- and the E node is N + 2.
1475 function "**" (P : Pattern; Fil : File_Access) return Pattern is
1476 Pat : constant PE_Ptr := Copy (P.P);
1477 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1478 W : constant PE_Ptr := new PE'(PC_Write_OnM, 0, EOP, Fil);
1480 begin
1481 return (AFC with P.Stk + 3, Bracket (E, Pat, W));
1482 end "**";
1484 function "**" (P : PString; Fil : File_Access) return Pattern is
1485 Pat : constant PE_Ptr := S_To_PE (P);
1486 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1487 W : constant PE_Ptr := new PE'(PC_Write_OnM, 0, EOP, Fil);
1489 begin
1490 return (AFC with 3, Bracket (E, Pat, W));
1491 end "**";
1493 function "**" (P : PChar; Fil : File_Access) return Pattern is
1494 Pat : constant PE_Ptr := C_To_PE (P);
1495 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1496 W : constant PE_Ptr := new PE'(PC_Write_OnM, 0, EOP, Fil);
1498 begin
1499 return (AFC with 3, Bracket (E, Pat, W));
1500 end "**";
1502 ---------
1503 -- "+" --
1504 ---------
1506 function "+" (Str : VString_Var) return Pattern is
1507 begin
1508 return
1509 (AFC with 0,
1510 new PE'(PC_String_VP, 1, EOP, Str'Unrestricted_Access));
1511 end "+";
1513 function "+" (Str : VString_Func) return Pattern is
1514 begin
1515 return (AFC with 0, new PE'(PC_String_VF, 1, EOP, Str));
1516 end "+";
1518 function "+" (P : Pattern_Var) return Pattern is
1519 begin
1520 return
1521 (AFC with 3,
1522 new PE'(PC_Rpat, 1, EOP, P'Unrestricted_Access));
1523 end "+";
1525 function "+" (P : Boolean_Func) return Pattern is
1526 begin
1527 return (AFC with 3, new PE'(PC_Pred_Func, 1, EOP, P));
1528 end "+";
1530 ----------
1531 -- "or" --
1532 ----------
1534 function "or" (L : PString; R : Pattern) return Pattern is
1535 begin
1536 return (AFC with R.Stk + 1, S_To_PE (L) or Copy (R.P));
1537 end "or";
1539 function "or" (L : Pattern; R : PString) return Pattern is
1540 begin
1541 return (AFC with L.Stk + 1, Copy (L.P) or S_To_PE (R));
1542 end "or";
1544 function "or" (L : PString; R : PString) return Pattern is
1545 begin
1546 return (AFC with 1, S_To_PE (L) or S_To_PE (R));
1547 end "or";
1549 function "or" (L : Pattern; R : Pattern) return Pattern is
1550 begin
1551 return (AFC with
1552 Natural'Max (L.Stk, R.Stk) + 1, Copy (L.P) or Copy (R.P));
1553 end "or";
1555 function "or" (L : PChar; R : Pattern) return Pattern is
1556 begin
1557 return (AFC with 1, C_To_PE (L) or Copy (R.P));
1558 end "or";
1560 function "or" (L : Pattern; R : PChar) return Pattern is
1561 begin
1562 return (AFC with 1, Copy (L.P) or C_To_PE (R));
1563 end "or";
1565 function "or" (L : PChar; R : PChar) return Pattern is
1566 begin
1567 return (AFC with 1, C_To_PE (L) or C_To_PE (R));
1568 end "or";
1570 function "or" (L : PString; R : PChar) return Pattern is
1571 begin
1572 return (AFC with 1, S_To_PE (L) or C_To_PE (R));
1573 end "or";
1575 function "or" (L : PChar; R : PString) return Pattern is
1576 begin
1577 return (AFC with 1, C_To_PE (L) or S_To_PE (R));
1578 end "or";
1580 ------------
1581 -- Adjust --
1582 ------------
1584 -- No two patterns share the same pattern elements, so the adjust
1585 -- procedure for a Pattern assignment must do a deep copy of the
1586 -- pattern element structure.
1588 procedure Adjust (Object : in out Pattern) is
1589 begin
1590 Object.P := Copy (Object.P);
1591 end Adjust;
1593 ---------------
1594 -- Alternate --
1595 ---------------
1597 function Alternate (L, R : PE_Ptr) return PE_Ptr is
1598 begin
1599 -- If the left pattern is null, then we just add the alternation
1600 -- node with an index one greater than the right hand pattern.
1602 if L = EOP then
1603 return new PE'(PC_Alt, R.Index + 1, EOP, R);
1605 -- If the left pattern is non-null, then build a reference vector
1606 -- for its elements, and adjust their index values to acccomodate
1607 -- the right hand elements. Then add the alternation node.
1609 else
1610 declare
1611 Refs : Ref_Array (1 .. L.Index);
1613 begin
1614 Build_Ref_Array (L, Refs);
1616 for J in Refs'Range loop
1617 Refs (J).Index := Refs (J).Index + R.Index;
1618 end loop;
1619 end;
1621 return new PE'(PC_Alt, L.Index + 1, L, R);
1622 end if;
1623 end Alternate;
1625 ---------
1626 -- Any --
1627 ---------
1629 function Any (Str : String) return Pattern is
1630 begin
1631 return (AFC with 0, new PE'(PC_Any_CS, 1, EOP, To_Set (Str)));
1632 end Any;
1634 function Any (Str : VString) return Pattern is
1635 begin
1636 return Any (S (Str));
1637 end Any;
1639 function Any (Str : Character) return Pattern is
1640 begin
1641 return (AFC with 0, new PE'(PC_Any_CH, 1, EOP, Str));
1642 end Any;
1644 function Any (Str : Character_Set) return Pattern is
1645 begin
1646 return (AFC with 0, new PE'(PC_Any_CS, 1, EOP, Str));
1647 end Any;
1649 function Any (Str : access VString) return Pattern is
1650 begin
1651 return (AFC with 0, new PE'(PC_Any_VP, 1, EOP, VString_Ptr (Str)));
1652 end Any;
1654 function Any (Str : VString_Func) return Pattern is
1655 begin
1656 return (AFC with 0, new PE'(PC_Any_VF, 1, EOP, Str));
1657 end Any;
1659 ---------
1660 -- Arb --
1661 ---------
1663 -- +---+
1664 -- | X |---->
1665 -- +---+
1666 -- .
1667 -- .
1668 -- +---+
1669 -- | Y |---->
1670 -- +---+
1672 -- The PC_Arb_X element is numbered 2, and the PC_Arb_Y element is 1
1674 function Arb return Pattern is
1675 Y : constant PE_Ptr := new PE'(PC_Arb_Y, 1, EOP);
1676 X : constant PE_Ptr := new PE'(PC_Arb_X, 2, EOP, Y);
1678 begin
1679 return (AFC with 1, X);
1680 end Arb;
1682 -----------
1683 -- Arbno --
1684 -----------
1686 function Arbno (P : PString) return Pattern is
1687 begin
1688 if P'Length = 0 then
1689 return (AFC with 0, EOP);
1691 else
1692 return (AFC with 0, Arbno_Simple (S_To_PE (P)));
1693 end if;
1694 end Arbno;
1696 function Arbno (P : PChar) return Pattern is
1697 begin
1698 return (AFC with 0, Arbno_Simple (C_To_PE (P)));
1699 end Arbno;
1701 function Arbno (P : Pattern) return Pattern is
1702 Pat : constant PE_Ptr := Copy (P.P);
1704 begin
1705 if P.Stk = 0
1706 and then OK_For_Simple_Arbno (Pat.Pcode)
1707 then
1708 return (AFC with 0, Arbno_Simple (Pat));
1709 end if;
1711 -- This is the complex case, either the pattern makes stack entries
1712 -- or it is possible for the pattern to match the null string (more
1713 -- accurately, we don't know that this is not the case).
1715 -- +--------------------------+
1716 -- | ^
1717 -- V |
1718 -- +---+ |
1719 -- | X |----> |
1720 -- +---+ |
1721 -- . |
1722 -- . |
1723 -- +---+ +---+ +---+ |
1724 -- | E |---->| P |---->| Y |--->+
1725 -- +---+ +---+ +---+
1727 -- The node numbering of the constituent pattern P is not affected.
1728 -- Where N is the number of nodes in P, the Y node is numbered N + 1,
1729 -- the E node is N + 2, and the X node is N + 3.
1731 declare
1732 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1733 X : constant PE_Ptr := new PE'(PC_Arbno_X, 0, EOP, E);
1734 Y : constant PE_Ptr := new PE'(PC_Arbno_Y, 0, X, P.Stk + 3);
1735 EPY : constant PE_Ptr := Bracket (E, Pat, Y);
1737 begin
1738 X.Alt := EPY;
1739 X.Index := EPY.Index + 1;
1740 return (AFC with P.Stk + 3, X);
1741 end;
1742 end Arbno;
1744 ------------------
1745 -- Arbno_Simple --
1746 ------------------
1748 -- +-------------+
1749 -- | ^
1750 -- V |
1751 -- +---+ |
1752 -- | S |----> |
1753 -- +---+ |
1754 -- . |
1755 -- . |
1756 -- +---+ |
1757 -- | P |---------->+
1758 -- +---+
1760 -- The node numbering of the constituent pattern P is not affected.
1761 -- The S node has a node number of P.Index + 1.
1763 -- Note that we know that P cannot be EOP, because a null pattern
1764 -- does not meet the requirements for simple Arbno.
1766 function Arbno_Simple (P : PE_Ptr) return PE_Ptr is
1767 S : constant PE_Ptr := new PE'(PC_Arbno_S, P.Index + 1, EOP, P);
1769 begin
1770 Set_Successor (P, S);
1771 return S;
1772 end Arbno_Simple;
1774 ---------
1775 -- Bal --
1776 ---------
1778 function Bal return Pattern is
1779 begin
1780 return (AFC with 1, new PE'(PC_Bal, 1, EOP));
1781 end Bal;
1783 -------------
1784 -- Bracket --
1785 -------------
1787 function Bracket (E, P, A : PE_Ptr) return PE_Ptr is
1788 begin
1789 if P = EOP then
1790 E.Pthen := A;
1791 E.Index := 2;
1792 A.Index := 1;
1794 else
1795 E.Pthen := P;
1796 Set_Successor (P, A);
1797 E.Index := P.Index + 2;
1798 A.Index := P.Index + 1;
1799 end if;
1801 return E;
1802 end Bracket;
1804 -----------
1805 -- Break --
1806 -----------
1808 function Break (Str : String) return Pattern is
1809 begin
1810 return (AFC with 0, new PE'(PC_Break_CS, 1, EOP, To_Set (Str)));
1811 end Break;
1813 function Break (Str : VString) return Pattern is
1814 begin
1815 return Break (S (Str));
1816 end Break;
1818 function Break (Str : Character) return Pattern is
1819 begin
1820 return (AFC with 0, new PE'(PC_Break_CH, 1, EOP, Str));
1821 end Break;
1823 function Break (Str : Character_Set) return Pattern is
1824 begin
1825 return (AFC with 0, new PE'(PC_Break_CS, 1, EOP, Str));
1826 end Break;
1828 function Break (Str : access VString) return Pattern is
1829 begin
1830 return (AFC with 0, new PE'(PC_Break_VP, 1, EOP, VString_Ptr (Str)));
1831 end Break;
1833 function Break (Str : VString_Func) return Pattern is
1834 begin
1835 return (AFC with 0, new PE'(PC_Break_VF, 1, EOP, Str));
1836 end Break;
1838 ------------
1839 -- BreakX --
1840 ------------
1842 function BreakX (Str : String) return Pattern is
1843 begin
1844 return BreakX_Make (new PE'(PC_BreakX_CS, 3, N, To_Set (Str)));
1845 end BreakX;
1847 function BreakX (Str : VString) return Pattern is
1848 begin
1849 return BreakX (S (Str));
1850 end BreakX;
1852 function BreakX (Str : Character) return Pattern is
1853 begin
1854 return BreakX_Make (new PE'(PC_BreakX_CH, 3, N, Str));
1855 end BreakX;
1857 function BreakX (Str : Character_Set) return Pattern is
1858 begin
1859 return BreakX_Make (new PE'(PC_BreakX_CS, 3, N, Str));
1860 end BreakX;
1862 function BreakX (Str : access VString) return Pattern is
1863 begin
1864 return BreakX_Make (new PE'(PC_BreakX_VP, 3, N, VString_Ptr (Str)));
1865 end BreakX;
1867 function BreakX (Str : VString_Func) return Pattern is
1868 begin
1869 return BreakX_Make (new PE'(PC_BreakX_VF, 3, N, Str));
1870 end BreakX;
1872 -----------------
1873 -- BreakX_Make --
1874 -----------------
1876 -- +---+ +---+
1877 -- | B |---->| A |---->
1878 -- +---+ +---+
1879 -- ^ .
1880 -- | .
1881 -- | +---+
1882 -- +<------| X |
1883 -- +---+
1885 -- The B node is numbered 3, the alternative node is 1, and the X
1886 -- node is 2.
1888 function BreakX_Make (B : PE_Ptr) return Pattern is
1889 X : constant PE_Ptr := new PE'(PC_BreakX_X, 2, B);
1890 A : constant PE_Ptr := new PE'(PC_Alt, 1, EOP, X);
1892 begin
1893 B.Pthen := A;
1894 return (AFC with 2, B);
1895 end BreakX_Make;
1897 ---------------------
1898 -- Build_Ref_Array --
1899 ---------------------
1901 procedure Build_Ref_Array (E : PE_Ptr; RA : out Ref_Array) is
1903 procedure Record_PE (E : PE_Ptr);
1904 -- Record given pattern element if not already recorded in RA,
1905 -- and also record any referenced pattern elements recursively.
1907 procedure Record_PE (E : PE_Ptr) is
1908 begin
1909 PutD (" Record_PE called with PE_Ptr = " & Image (E));
1911 if E = EOP or else RA (E.Index) /= null then
1912 Put_LineD (", nothing to do");
1913 return;
1915 else
1916 Put_LineD (", recording" & IndexT'Image (E.Index));
1917 RA (E.Index) := E;
1918 Record_PE (E.Pthen);
1920 if E.Pcode in PC_Has_Alt then
1921 Record_PE (E.Alt);
1922 end if;
1923 end if;
1924 end Record_PE;
1926 -- Start of processing for Build_Ref_Array
1928 begin
1929 New_LineD;
1930 Put_LineD ("Entering Build_Ref_Array");
1931 Record_PE (E);
1932 New_LineD;
1933 end Build_Ref_Array;
1935 -------------
1936 -- C_To_PE --
1937 -------------
1939 function C_To_PE (C : PChar) return PE_Ptr is
1940 begin
1941 return new PE'(PC_Char, 1, EOP, C);
1942 end C_To_PE;
1944 ------------
1945 -- Cancel --
1946 ------------
1948 function Cancel return Pattern is
1949 begin
1950 return (AFC with 0, new PE'(PC_Cancel, 1, EOP));
1951 end Cancel;
1953 ------------
1954 -- Concat --
1955 ------------
1957 -- Concat needs to traverse the left operand performing the following
1958 -- set of fixups:
1960 -- a) Any successor pointers (Pthen fields) that are set to EOP are
1961 -- reset to point to the second operand.
1963 -- b) Any PC_Arbno_Y node has its stack count field incremented
1964 -- by the parameter Incr provided for this purpose.
1966 -- d) Num fields of all pattern elements in the left operand are
1967 -- adjusted to include the elements of the right operand.
1969 -- Note: we do not use Set_Successor in the processing for Concat, since
1970 -- there is no point in doing two traversals, we may as well do everything
1971 -- at the same time.
1973 function Concat (L, R : PE_Ptr; Incr : Natural) return PE_Ptr is
1974 begin
1975 if L = EOP then
1976 return R;
1978 elsif R = EOP then
1979 return L;
1981 else
1982 declare
1983 Refs : Ref_Array (1 .. L.Index);
1984 -- We build a reference array for L whose N'th element points to
1985 -- the pattern element of L whose original Index value is N.
1987 P : PE_Ptr;
1989 begin
1990 Build_Ref_Array (L, Refs);
1992 for J in Refs'Range loop
1993 P := Refs (J);
1995 P.Index := P.Index + R.Index;
1997 if P.Pcode = PC_Arbno_Y then
1998 P.Nat := P.Nat + Incr;
1999 end if;
2001 if P.Pthen = EOP then
2002 P.Pthen := R;
2003 end if;
2005 if P.Pcode in PC_Has_Alt and then P.Alt = EOP then
2006 P.Alt := R;
2007 end if;
2008 end loop;
2009 end;
2011 return L;
2012 end if;
2013 end Concat;
2015 ----------
2016 -- Copy --
2017 ----------
2019 function Copy (P : PE_Ptr) return PE_Ptr is
2020 begin
2021 if P = null then
2022 Uninitialized_Pattern;
2024 else
2025 declare
2026 Refs : Ref_Array (1 .. P.Index);
2027 -- References to elements in P, indexed by Index field
2029 Copy : Ref_Array (1 .. P.Index);
2030 -- Holds copies of elements of P, indexed by Index field
2032 E : PE_Ptr;
2034 begin
2035 Build_Ref_Array (P, Refs);
2037 -- Now copy all nodes
2039 for J in Refs'Range loop
2040 Copy (J) := new PE'(Refs (J).all);
2041 end loop;
2043 -- Adjust all internal references
2045 for J in Copy'Range loop
2046 E := Copy (J);
2048 -- Adjust successor pointer to point to copy
2050 if E.Pthen /= EOP then
2051 E.Pthen := Copy (E.Pthen.Index);
2052 end if;
2054 -- Adjust Alt pointer if there is one to point to copy
2056 if E.Pcode in PC_Has_Alt and then E.Alt /= EOP then
2057 E.Alt := Copy (E.Alt.Index);
2058 end if;
2060 -- Copy referenced string
2062 if E.Pcode = PC_String then
2063 E.Str := new String'(E.Str.all);
2064 end if;
2065 end loop;
2067 return Copy (P.Index);
2068 end;
2069 end if;
2070 end Copy;
2072 ----------
2073 -- Dump --
2074 ----------
2076 procedure Dump (P : Pattern) is
2078 subtype Count is Ada.Text_IO.Count;
2079 Scol : Count;
2080 -- Used to keep track of column in dump output
2082 Refs : Ref_Array (1 .. P.P.Index);
2083 -- We build a reference array whose N'th element points to the
2084 -- pattern element whose Index value is N.
2086 Cols : Natural := 2;
2087 -- Number of columns used for pattern numbers, minimum is 2
2089 E : PE_Ptr;
2091 procedure Write_Node_Id (E : PE_Ptr);
2092 -- Writes out a string identifying the given pattern element
2094 procedure Write_Node_Id (E : PE_Ptr) is
2095 begin
2096 if E = EOP then
2097 Put ("EOP");
2099 for J in 4 .. Cols loop
2100 Put (' ');
2101 end loop;
2103 else
2104 declare
2105 Str : String (1 .. Cols);
2106 N : Natural := Natural (E.Index);
2108 begin
2109 Put ("#");
2111 for J in reverse Str'Range loop
2112 Str (J) := Character'Val (48 + N mod 10);
2113 N := N / 10;
2114 end loop;
2116 Put (Str);
2117 end;
2118 end if;
2119 end Write_Node_Id;
2121 begin
2122 New_Line;
2123 Put ("Pattern Dump Output (pattern at " &
2124 Image (P'Address) &
2125 ", S = " & Natural'Image (P.Stk) & ')');
2127 Scol := Col;
2128 New_Line;
2130 while Col < Scol loop
2131 Put ('-');
2132 end loop;
2134 New_Line;
2136 -- If uninitialized pattern, dump line and we are done
2138 if P.P = null then
2139 Put_Line ("Uninitialized pattern value");
2140 return;
2141 end if;
2143 -- If null pattern, just dump it and we are all done
2145 if P.P = EOP then
2146 Put_Line ("EOP (null pattern)");
2147 return;
2148 end if;
2150 Build_Ref_Array (P.P, Refs);
2152 -- Set number of columns required for node numbers
2154 while 10 ** Cols - 1 < Integer (P.P.Index) loop
2155 Cols := Cols + 1;
2156 end loop;
2158 -- Now dump the nodes in reverse sequence. We output them in reverse
2159 -- sequence since this corresponds to the natural order used to
2160 -- construct the patterns.
2162 for J in reverse Refs'Range loop
2163 E := Refs (J);
2164 Write_Node_Id (E);
2165 Set_Col (Count (Cols) + 4);
2166 Put (Image (E));
2167 Put (" ");
2168 Put (Pattern_Code'Image (E.Pcode));
2169 Put (" ");
2170 Set_Col (21 + Count (Cols) + Address_Image_Length);
2171 Write_Node_Id (E.Pthen);
2172 Set_Col (24 + 2 * Count (Cols) + Address_Image_Length);
2174 case E.Pcode is
2176 when PC_Alt |
2177 PC_Arb_X |
2178 PC_Arbno_S |
2179 PC_Arbno_X =>
2180 Write_Node_Id (E.Alt);
2182 when PC_Rpat =>
2183 Put (Str_PP (E.PP));
2185 when PC_Pred_Func =>
2186 Put (Str_BF (E.BF));
2188 when PC_Assign_Imm |
2189 PC_Assign_OnM |
2190 PC_Any_VP |
2191 PC_Break_VP |
2192 PC_BreakX_VP |
2193 PC_NotAny_VP |
2194 PC_NSpan_VP |
2195 PC_Span_VP |
2196 PC_String_VP =>
2197 Put (Str_VP (E.VP));
2199 when PC_Write_Imm |
2200 PC_Write_OnM =>
2201 Put (Str_FP (E.FP));
2203 when PC_String =>
2204 Put (Image (E.Str.all));
2206 when PC_String_2 =>
2207 Put (Image (E.Str2));
2209 when PC_String_3 =>
2210 Put (Image (E.Str3));
2212 when PC_String_4 =>
2213 Put (Image (E.Str4));
2215 when PC_String_5 =>
2216 Put (Image (E.Str5));
2218 when PC_String_6 =>
2219 Put (Image (E.Str6));
2221 when PC_Setcur =>
2222 Put (Str_NP (E.Var));
2224 when PC_Any_CH |
2225 PC_Break_CH |
2226 PC_BreakX_CH |
2227 PC_Char |
2228 PC_NotAny_CH |
2229 PC_NSpan_CH |
2230 PC_Span_CH =>
2231 Put (''' & E.Char & ''');
2233 when PC_Any_CS |
2234 PC_Break_CS |
2235 PC_BreakX_CS |
2236 PC_NotAny_CS |
2237 PC_NSpan_CS |
2238 PC_Span_CS =>
2239 Put ('"' & To_Sequence (E.CS) & '"');
2241 when PC_Arbno_Y |
2242 PC_Len_Nat |
2243 PC_Pos_Nat |
2244 PC_RPos_Nat |
2245 PC_RTab_Nat |
2246 PC_Tab_Nat =>
2247 Put (S (E.Nat));
2249 when PC_Pos_NF |
2250 PC_Len_NF |
2251 PC_RPos_NF |
2252 PC_RTab_NF |
2253 PC_Tab_NF =>
2254 Put (Str_NF (E.NF));
2256 when PC_Pos_NP |
2257 PC_Len_NP |
2258 PC_RPos_NP |
2259 PC_RTab_NP |
2260 PC_Tab_NP =>
2261 Put (Str_NP (E.NP));
2263 when PC_Any_VF |
2264 PC_Break_VF |
2265 PC_BreakX_VF |
2266 PC_NotAny_VF |
2267 PC_NSpan_VF |
2268 PC_Span_VF |
2269 PC_String_VF =>
2270 Put (Str_VF (E.VF));
2272 when others => null;
2274 end case;
2276 New_Line;
2277 end loop;
2279 New_Line;
2280 end Dump;
2282 ----------
2283 -- Fail --
2284 ----------
2286 function Fail return Pattern is
2287 begin
2288 return (AFC with 0, new PE'(PC_Fail, 1, EOP));
2289 end Fail;
2291 -----------
2292 -- Fence --
2293 -----------
2295 -- Simple case
2297 function Fence return Pattern is
2298 begin
2299 return (AFC with 1, new PE'(PC_Fence, 1, EOP));
2300 end Fence;
2302 -- Function case
2304 -- +---+ +---+ +---+
2305 -- | E |---->| P |---->| X |---->
2306 -- +---+ +---+ +---+
2308 -- The node numbering of the constituent pattern P is not affected.
2309 -- Where N is the number of nodes in P, the X node is numbered N + 1,
2310 -- and the E node is N + 2.
2312 function Fence (P : Pattern) return Pattern is
2313 Pat : constant PE_Ptr := Copy (P.P);
2314 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
2315 X : constant PE_Ptr := new PE'(PC_Fence_X, 0, EOP);
2317 begin
2318 return (AFC with P.Stk + 1, Bracket (E, Pat, X));
2319 end Fence;
2321 --------------
2322 -- Finalize --
2323 --------------
2325 procedure Finalize (Object : in out Pattern) is
2327 procedure Free is new Unchecked_Deallocation (PE, PE_Ptr);
2328 procedure Free is new Unchecked_Deallocation (String, String_Ptr);
2330 begin
2331 -- Nothing to do if already freed
2333 if Object.P = null then
2334 return;
2336 -- Otherwise we must free all elements
2338 else
2339 declare
2340 Refs : Ref_Array (1 .. Object.P.Index);
2341 -- References to elements in pattern to be finalized
2343 begin
2344 Build_Ref_Array (Object.P, Refs);
2346 for J in Refs'Range loop
2347 if Refs (J).Pcode = PC_String then
2348 Free (Refs (J).Str);
2349 end if;
2351 Free (Refs (J));
2352 end loop;
2354 Object.P := null;
2355 end;
2356 end if;
2357 end Finalize;
2359 -----------
2360 -- Image --
2361 -----------
2363 function Image (P : PE_Ptr) return String is
2364 begin
2365 return Image (To_Address (P));
2366 end Image;
2368 function Image (P : Pattern) return String is
2369 begin
2370 return S (Image (P));
2371 end Image;
2373 function Image (P : Pattern) return VString is
2375 Kill_Ampersand : Boolean := False;
2376 -- Set True to delete next & to be output to Result
2378 Result : VString := Nul;
2379 -- The result is accumulated here, using Append
2381 Refs : Ref_Array (1 .. P.P.Index);
2382 -- We build a reference array whose N'th element points to the
2383 -- pattern element whose Index value is N.
2385 procedure Delete_Ampersand;
2386 -- Deletes the ampersand at the end of Result
2388 procedure Image_Seq (E : PE_Ptr; Succ : PE_Ptr; Paren : Boolean);
2389 -- E refers to a pattern structure whose successor is given by Succ.
2390 -- This procedure appends to Result a representation of this pattern.
2391 -- The Paren parameter indicates whether parentheses are required if
2392 -- the output is more than one element.
2394 procedure Image_One (E : in out PE_Ptr);
2395 -- E refers to a pattern structure. This procedure appends to Result
2396 -- a representation of the single simple or compound pattern structure
2397 -- at the start of E and updates E to point to its successor.
2399 ----------------------
2400 -- Delete_Ampersand --
2401 ----------------------
2403 procedure Delete_Ampersand is
2404 L : constant Natural := Length (Result);
2406 begin
2407 if L > 2 then
2408 Delete (Result, L - 1, L);
2409 end if;
2410 end Delete_Ampersand;
2412 ---------------
2413 -- Image_One --
2414 ---------------
2416 procedure Image_One (E : in out PE_Ptr) is
2418 ER : PE_Ptr := E.Pthen;
2419 -- Successor set as result in E unless reset
2421 begin
2422 case E.Pcode is
2424 when PC_Cancel =>
2425 Append (Result, "Cancel");
2427 when PC_Alt => Alt : declare
2429 Elmts_In_L : constant IndexT := E.Pthen.Index - E.Alt.Index;
2430 -- Number of elements in left pattern of alternation
2432 Lowest_In_L : constant IndexT := E.Index - Elmts_In_L;
2433 -- Number of lowest index in elements of left pattern
2435 E1 : PE_Ptr;
2437 begin
2438 -- The successor of the alternation node must have a lower
2439 -- index than any node that is in the left pattern or a
2440 -- higher index than the alternation node itself.
2442 while ER /= EOP
2443 and then ER.Index >= Lowest_In_L
2444 and then ER.Index < E.Index
2445 loop
2446 ER := ER.Pthen;
2447 end loop;
2449 Append (Result, '(');
2451 E1 := E;
2452 loop
2453 Image_Seq (E1.Pthen, ER, False);
2454 Append (Result, " or ");
2455 E1 := E1.Alt;
2456 exit when E1.Pcode /= PC_Alt;
2457 end loop;
2459 Image_Seq (E1, ER, False);
2460 Append (Result, ')');
2461 end Alt;
2463 when PC_Any_CS =>
2464 Append (Result, "Any (" & Image (To_Sequence (E.CS)) & ')');
2466 when PC_Any_VF =>
2467 Append (Result, "Any (" & Str_VF (E.VF) & ')');
2469 when PC_Any_VP =>
2470 Append (Result, "Any (" & Str_VP (E.VP) & ')');
2472 when PC_Arb_X =>
2473 Append (Result, "Arb");
2475 when PC_Arbno_S =>
2476 Append (Result, "Arbno (");
2477 Image_Seq (E.Alt, E, False);
2478 Append (Result, ')');
2480 when PC_Arbno_X =>
2481 Append (Result, "Arbno (");
2482 Image_Seq (E.Alt.Pthen, Refs (E.Index - 2), False);
2483 Append (Result, ')');
2485 when PC_Assign_Imm =>
2486 Delete_Ampersand;
2487 Append (Result, "* " & Str_VP (Refs (E.Index).VP));
2489 when PC_Assign_OnM =>
2490 Delete_Ampersand;
2491 Append (Result, "** " & Str_VP (Refs (E.Index).VP));
2493 when PC_Any_CH =>
2494 Append (Result, "Any ('" & E.Char & "')");
2496 when PC_Bal =>
2497 Append (Result, "Bal");
2499 when PC_Break_CH =>
2500 Append (Result, "Break ('" & E.Char & "')");
2502 when PC_Break_CS =>
2503 Append (Result, "Break (" & Image (To_Sequence (E.CS)) & ')');
2505 when PC_Break_VF =>
2506 Append (Result, "Break (" & Str_VF (E.VF) & ')');
2508 when PC_Break_VP =>
2509 Append (Result, "Break (" & Str_VP (E.VP) & ')');
2511 when PC_BreakX_CH =>
2512 Append (Result, "BreakX ('" & E.Char & "')");
2513 ER := ER.Pthen;
2515 when PC_BreakX_CS =>
2516 Append (Result, "BreakX (" & Image (To_Sequence (E.CS)) & ')');
2517 ER := ER.Pthen;
2519 when PC_BreakX_VF =>
2520 Append (Result, "BreakX (" & Str_VF (E.VF) & ')');
2521 ER := ER.Pthen;
2523 when PC_BreakX_VP =>
2524 Append (Result, "BreakX (" & Str_VP (E.VP) & ')');
2525 ER := ER.Pthen;
2527 when PC_Char =>
2528 Append (Result, ''' & E.Char & ''');
2530 when PC_Fail =>
2531 Append (Result, "Fail");
2533 when PC_Fence =>
2534 Append (Result, "Fence");
2536 when PC_Fence_X =>
2537 Append (Result, "Fence (");
2538 Image_Seq (E.Pthen, Refs (E.Index - 1), False);
2539 Append (Result, ")");
2540 ER := Refs (E.Index - 1).Pthen;
2542 when PC_Len_Nat =>
2543 Append (Result, "Len (" & E.Nat & ')');
2545 when PC_Len_NF =>
2546 Append (Result, "Len (" & Str_NF (E.NF) & ')');
2548 when PC_Len_NP =>
2549 Append (Result, "Len (" & Str_NP (E.NP) & ')');
2551 when PC_NotAny_CH =>
2552 Append (Result, "NotAny ('" & E.Char & "')");
2554 when PC_NotAny_CS =>
2555 Append (Result, "NotAny (" & Image (To_Sequence (E.CS)) & ')');
2557 when PC_NotAny_VF =>
2558 Append (Result, "NotAny (" & Str_VF (E.VF) & ')');
2560 when PC_NotAny_VP =>
2561 Append (Result, "NotAny (" & Str_VP (E.VP) & ')');
2563 when PC_NSpan_CH =>
2564 Append (Result, "NSpan ('" & E.Char & "')");
2566 when PC_NSpan_CS =>
2567 Append (Result, "NSpan (" & Image (To_Sequence (E.CS)) & ')');
2569 when PC_NSpan_VF =>
2570 Append (Result, "NSpan (" & Str_VF (E.VF) & ')');
2572 when PC_NSpan_VP =>
2573 Append (Result, "NSpan (" & Str_VP (E.VP) & ')');
2575 when PC_Null =>
2576 Append (Result, """""");
2578 when PC_Pos_Nat =>
2579 Append (Result, "Pos (" & E.Nat & ')');
2581 when PC_Pos_NF =>
2582 Append (Result, "Pos (" & Str_NF (E.NF) & ')');
2584 when PC_Pos_NP =>
2585 Append (Result, "Pos (" & Str_NP (E.NP) & ')');
2587 when PC_R_Enter =>
2588 Kill_Ampersand := True;
2590 when PC_Rest =>
2591 Append (Result, "Rest");
2593 when PC_Rpat =>
2594 Append (Result, "(+ " & Str_PP (E.PP) & ')');
2596 when PC_Pred_Func =>
2597 Append (Result, "(+ " & Str_BF (E.BF) & ')');
2599 when PC_RPos_Nat =>
2600 Append (Result, "RPos (" & E.Nat & ')');
2602 when PC_RPos_NF =>
2603 Append (Result, "RPos (" & Str_NF (E.NF) & ')');
2605 when PC_RPos_NP =>
2606 Append (Result, "RPos (" & Str_NP (E.NP) & ')');
2608 when PC_RTab_Nat =>
2609 Append (Result, "RTab (" & E.Nat & ')');
2611 when PC_RTab_NF =>
2612 Append (Result, "RTab (" & Str_NF (E.NF) & ')');
2614 when PC_RTab_NP =>
2615 Append (Result, "RTab (" & Str_NP (E.NP) & ')');
2617 when PC_Setcur =>
2618 Append (Result, "Setcur (" & Str_NP (E.Var) & ')');
2620 when PC_Span_CH =>
2621 Append (Result, "Span ('" & E.Char & "')");
2623 when PC_Span_CS =>
2624 Append (Result, "Span (" & Image (To_Sequence (E.CS)) & ')');
2626 when PC_Span_VF =>
2627 Append (Result, "Span (" & Str_VF (E.VF) & ')');
2629 when PC_Span_VP =>
2630 Append (Result, "Span (" & Str_VP (E.VP) & ')');
2632 when PC_String =>
2633 Append (Result, Image (E.Str.all));
2635 when PC_String_2 =>
2636 Append (Result, Image (E.Str2));
2638 when PC_String_3 =>
2639 Append (Result, Image (E.Str3));
2641 when PC_String_4 =>
2642 Append (Result, Image (E.Str4));
2644 when PC_String_5 =>
2645 Append (Result, Image (E.Str5));
2647 when PC_String_6 =>
2648 Append (Result, Image (E.Str6));
2650 when PC_String_VF =>
2651 Append (Result, "(+" & Str_VF (E.VF) & ')');
2653 when PC_String_VP =>
2654 Append (Result, "(+" & Str_VP (E.VP) & ')');
2656 when PC_Succeed =>
2657 Append (Result, "Succeed");
2659 when PC_Tab_Nat =>
2660 Append (Result, "Tab (" & E.Nat & ')');
2662 when PC_Tab_NF =>
2663 Append (Result, "Tab (" & Str_NF (E.NF) & ')');
2665 when PC_Tab_NP =>
2666 Append (Result, "Tab (" & Str_NP (E.NP) & ')');
2668 when PC_Write_Imm =>
2669 Append (Result, '(');
2670 Image_Seq (E, Refs (E.Index - 1), True);
2671 Append (Result, " * " & Str_FP (Refs (E.Index - 1).FP));
2672 ER := Refs (E.Index - 1).Pthen;
2674 when PC_Write_OnM =>
2675 Append (Result, '(');
2676 Image_Seq (E.Pthen, Refs (E.Index - 1), True);
2677 Append (Result, " ** " & Str_FP (Refs (E.Index - 1).FP));
2678 ER := Refs (E.Index - 1).Pthen;
2680 -- Other pattern codes should not appear as leading elements
2682 when PC_Arb_Y |
2683 PC_Arbno_Y |
2684 PC_Assign |
2685 PC_BreakX_X |
2686 PC_EOP |
2687 PC_Fence_Y |
2688 PC_R_Remove |
2689 PC_R_Restore |
2690 PC_Unanchored =>
2691 Append (Result, "???");
2693 end case;
2695 E := ER;
2696 end Image_One;
2698 ---------------
2699 -- Image_Seq --
2700 ---------------
2702 procedure Image_Seq (E : PE_Ptr; Succ : PE_Ptr; Paren : Boolean) is
2703 Indx : constant Natural := Length (Result);
2704 E1 : PE_Ptr := E;
2705 Mult : Boolean := False;
2707 begin
2708 -- The image of EOP is "" (the null string)
2710 if E = EOP then
2711 Append (Result, """""");
2713 -- Else generate appropriate concatenation sequence
2715 else
2716 loop
2717 Image_One (E1);
2718 exit when E1 = Succ;
2719 exit when E1 = EOP;
2720 Mult := True;
2722 if Kill_Ampersand then
2723 Kill_Ampersand := False;
2724 else
2725 Append (Result, " & ");
2726 end if;
2727 end loop;
2728 end if;
2730 if Mult and Paren then
2731 Insert (Result, Indx + 1, "(");
2732 Append (Result, ")");
2733 end if;
2734 end Image_Seq;
2736 -- Start of processing for Image
2738 begin
2739 Build_Ref_Array (P.P, Refs);
2740 Image_Seq (P.P, EOP, False);
2741 return Result;
2742 end Image;
2744 -----------
2745 -- Is_In --
2746 -----------
2748 function Is_In (C : Character; Str : String) return Boolean is
2749 begin
2750 for J in Str'Range loop
2751 if Str (J) = C then
2752 return True;
2753 end if;
2754 end loop;
2756 return False;
2757 end Is_In;
2759 ---------
2760 -- Len --
2761 ---------
2763 function Len (Count : Natural) return Pattern is
2764 begin
2765 -- Note, the following is not just an optimization, it is needed
2766 -- to ensure that Arbno (Len (0)) does not generate an infinite
2767 -- matching loop (since PC_Len_Nat is OK_For_Simple_Arbno).
2769 if Count = 0 then
2770 return (AFC with 0, new PE'(PC_Null, 1, EOP));
2772 else
2773 return (AFC with 0, new PE'(PC_Len_Nat, 1, EOP, Count));
2774 end if;
2775 end Len;
2777 function Len (Count : Natural_Func) return Pattern is
2778 begin
2779 return (AFC with 0, new PE'(PC_Len_NF, 1, EOP, Count));
2780 end Len;
2782 function Len (Count : access Natural) return Pattern is
2783 begin
2784 return (AFC with 0, new PE'(PC_Len_NP, 1, EOP, Natural_Ptr (Count)));
2785 end Len;
2787 -----------------
2788 -- Logic_Error --
2789 -----------------
2791 procedure Logic_Error is
2792 begin
2793 Raise_Exception
2794 (Program_Error'Identity,
2795 "Internal logic error in GNAT.Spitbol.Patterns");
2796 end Logic_Error;
2798 -----------
2799 -- Match --
2800 -----------
2802 function Match
2803 (Subject : VString;
2804 Pat : Pattern) return Boolean
2806 Start : Natural;
2807 Stop : Natural;
2808 S : String_Access;
2809 L : Natural;
2811 begin
2812 Get_String (Subject, S, L);
2814 if Debug_Mode then
2815 XMatchD (S (1 .. L), Pat.P, Pat.Stk, Start, Stop);
2816 else
2817 XMatch (S (1 .. L), Pat.P, Pat.Stk, Start, Stop);
2818 end if;
2820 return Start /= 0;
2821 end Match;
2823 function Match
2824 (Subject : String;
2825 Pat : Pattern) return Boolean
2827 Start, Stop : Natural;
2828 subtype String1 is String (1 .. Subject'Length);
2830 begin
2831 if Debug_Mode then
2832 XMatchD (String1 (Subject), Pat.P, Pat.Stk, Start, Stop);
2833 else
2834 XMatch (String1 (Subject), Pat.P, Pat.Stk, Start, Stop);
2835 end if;
2837 return Start /= 0;
2838 end Match;
2840 function Match
2841 (Subject : VString_Var;
2842 Pat : Pattern;
2843 Replace : VString) return Boolean
2845 Start : Natural;
2846 Stop : Natural;
2847 S : String_Access;
2848 L : Natural;
2850 begin
2851 Get_String (Subject, S, L);
2853 if Debug_Mode then
2854 XMatchD (S (1 .. L), Pat.P, Pat.Stk, Start, Stop);
2855 else
2856 XMatch (S (1 .. L), Pat.P, Pat.Stk, Start, Stop);
2857 end if;
2859 if Start = 0 then
2860 return False;
2861 else
2862 Get_String (Replace, S, L);
2863 Replace_Slice
2864 (Subject'Unrestricted_Access.all, Start, Stop, S (1 .. L));
2865 return True;
2866 end if;
2867 end Match;
2869 function Match
2870 (Subject : VString_Var;
2871 Pat : Pattern;
2872 Replace : String) return Boolean
2874 Start : Natural;
2875 Stop : Natural;
2876 S : String_Access;
2877 L : Natural;
2879 begin
2880 Get_String (Subject, S, L);
2882 if Debug_Mode then
2883 XMatchD (S (1 .. L), Pat.P, Pat.Stk, Start, Stop);
2884 else
2885 XMatch (S (1 .. L), Pat.P, Pat.Stk, Start, Stop);
2886 end if;
2888 if Start = 0 then
2889 return False;
2890 else
2891 Replace_Slice
2892 (Subject'Unrestricted_Access.all, Start, Stop, Replace);
2893 return True;
2894 end if;
2895 end Match;
2897 procedure Match
2898 (Subject : VString;
2899 Pat : Pattern)
2901 Start : Natural;
2902 Stop : Natural;
2903 S : String_Access;
2904 L : Natural;
2906 begin
2907 Get_String (Subject, S, L);
2909 if Debug_Mode then
2910 XMatchD (S (1 .. L), Pat.P, Pat.Stk, Start, Stop);
2911 else
2912 XMatch (S (1 .. L), Pat.P, Pat.Stk, Start, Stop);
2913 end if;
2914 end Match;
2916 procedure Match
2917 (Subject : String;
2918 Pat : Pattern)
2920 Start, Stop : Natural;
2921 subtype String1 is String (1 .. Subject'Length);
2922 begin
2923 if Debug_Mode then
2924 XMatchD (String1 (Subject), Pat.P, Pat.Stk, Start, Stop);
2925 else
2926 XMatch (String1 (Subject), Pat.P, Pat.Stk, Start, Stop);
2927 end if;
2928 end Match;
2930 procedure Match
2931 (Subject : in out VString;
2932 Pat : Pattern;
2933 Replace : VString)
2935 Start : Natural;
2936 Stop : Natural;
2937 S : String_Access;
2938 L : Natural;
2940 begin
2941 Get_String (Subject, S, L);
2943 if Debug_Mode then
2944 XMatchD (S (1 .. L), Pat.P, Pat.Stk, Start, Stop);
2945 else
2946 XMatch (S (1 .. L), Pat.P, Pat.Stk, Start, Stop);
2947 end if;
2949 if Start /= 0 then
2950 Get_String (Replace, S, L);
2951 Replace_Slice (Subject, Start, Stop, S (1 .. L));
2952 end if;
2953 end Match;
2955 procedure Match
2956 (Subject : in out VString;
2957 Pat : Pattern;
2958 Replace : String)
2960 Start : Natural;
2961 Stop : Natural;
2962 S : String_Access;
2963 L : Natural;
2965 begin
2966 Get_String (Subject, S, L);
2968 if Debug_Mode then
2969 XMatchD (S (1 .. L), Pat.P, Pat.Stk, Start, Stop);
2970 else
2971 XMatch (S (1 .. L), Pat.P, Pat.Stk, Start, Stop);
2972 end if;
2974 if Start /= 0 then
2975 Replace_Slice (Subject, Start, Stop, Replace);
2976 end if;
2977 end Match;
2979 function Match
2980 (Subject : VString;
2981 Pat : PString) return Boolean
2983 Pat_Len : constant Natural := Pat'Length;
2984 S : String_Access;
2985 L : Natural;
2987 begin
2988 Get_String (Subject, S, L);
2990 if Anchored_Mode then
2991 if Pat_Len > L then
2992 return False;
2993 else
2994 return Pat = S (1 .. Pat_Len);
2995 end if;
2997 else
2998 for J in 1 .. L - Pat_Len + 1 loop
2999 if Pat = S (J .. J + (Pat_Len - 1)) then
3000 return True;
3001 end if;
3002 end loop;
3004 return False;
3005 end if;
3006 end Match;
3008 function Match
3009 (Subject : String;
3010 Pat : PString) return Boolean
3012 Pat_Len : constant Natural := Pat'Length;
3013 Sub_Len : constant Natural := Subject'Length;
3014 SFirst : constant Natural := Subject'First;
3016 begin
3017 if Anchored_Mode then
3018 if Pat_Len > Sub_Len then
3019 return False;
3020 else
3021 return Pat = Subject (SFirst .. SFirst + Pat_Len - 1);
3022 end if;
3024 else
3025 for J in SFirst .. SFirst + Sub_Len - Pat_Len loop
3026 if Pat = Subject (J .. J + (Pat_Len - 1)) then
3027 return True;
3028 end if;
3029 end loop;
3031 return False;
3032 end if;
3033 end Match;
3035 function Match
3036 (Subject : VString_Var;
3037 Pat : PString;
3038 Replace : VString) return Boolean
3040 Start : Natural;
3041 Stop : Natural;
3042 S : String_Access;
3043 L : Natural;
3045 begin
3046 Get_String (Subject, S, L);
3048 if Debug_Mode then
3049 XMatchD (S (1 .. L), S_To_PE (Pat), 0, Start, Stop);
3050 else
3051 XMatch (S (1 .. L), S_To_PE (Pat), 0, Start, Stop);
3052 end if;
3054 if Start = 0 then
3055 return False;
3056 else
3057 Get_String (Replace, S, L);
3058 Replace_Slice
3059 (Subject'Unrestricted_Access.all, Start, Stop, S (1 .. L));
3060 return True;
3061 end if;
3062 end Match;
3064 function Match
3065 (Subject : VString_Var;
3066 Pat : PString;
3067 Replace : String) return Boolean
3069 Start : Natural;
3070 Stop : Natural;
3071 S : String_Access;
3072 L : Natural;
3074 begin
3075 Get_String (Subject, S, L);
3077 if Debug_Mode then
3078 XMatchD (S (1 .. L), S_To_PE (Pat), 0, Start, Stop);
3079 else
3080 XMatch (S (1 .. L), S_To_PE (Pat), 0, Start, Stop);
3081 end if;
3083 if Start = 0 then
3084 return False;
3085 else
3086 Replace_Slice
3087 (Subject'Unrestricted_Access.all, Start, Stop, Replace);
3088 return True;
3089 end if;
3090 end Match;
3092 procedure Match
3093 (Subject : VString;
3094 Pat : PString)
3096 Start : Natural;
3097 Stop : Natural;
3098 S : String_Access;
3099 L : Natural;
3101 begin
3102 Get_String (Subject, S, L);
3104 if Debug_Mode then
3105 XMatchD (S (1 .. L), S_To_PE (Pat), 0, Start, Stop);
3106 else
3107 XMatch (S (1 .. L), S_To_PE (Pat), 0, Start, Stop);
3108 end if;
3109 end Match;
3111 procedure Match
3112 (Subject : String;
3113 Pat : PString)
3115 Start, Stop : Natural;
3116 subtype String1 is String (1 .. Subject'Length);
3118 begin
3119 if Debug_Mode then
3120 XMatchD (String1 (Subject), S_To_PE (Pat), 0, Start, Stop);
3121 else
3122 XMatch (String1 (Subject), S_To_PE (Pat), 0, Start, Stop);
3123 end if;
3124 end Match;
3126 procedure Match
3127 (Subject : in out VString;
3128 Pat : PString;
3129 Replace : VString)
3131 Start : Natural;
3132 Stop : Natural;
3133 S : String_Access;
3134 L : Natural;
3136 begin
3137 Get_String (Subject, S, L);
3139 if Debug_Mode then
3140 XMatchD (S (1 .. L), S_To_PE (Pat), 0, Start, Stop);
3141 else
3142 XMatch (S (1 .. L), S_To_PE (Pat), 0, Start, Stop);
3143 end if;
3145 if Start /= 0 then
3146 Get_String (Replace, S, L);
3147 Replace_Slice (Subject, Start, Stop, S (1 .. L));
3148 end if;
3149 end Match;
3151 procedure Match
3152 (Subject : in out VString;
3153 Pat : PString;
3154 Replace : String)
3156 Start : Natural;
3157 Stop : Natural;
3158 S : String_Access;
3159 L : Natural;
3161 begin
3162 Get_String (Subject, S, L);
3164 if Debug_Mode then
3165 XMatchD (S (1 .. L), S_To_PE (Pat), 0, Start, Stop);
3166 else
3167 XMatch (S (1 .. L), S_To_PE (Pat), 0, Start, Stop);
3168 end if;
3170 if Start /= 0 then
3171 Replace_Slice (Subject, Start, Stop, Replace);
3172 end if;
3173 end Match;
3175 function Match
3176 (Subject : VString_Var;
3177 Pat : Pattern;
3178 Result : Match_Result_Var) return Boolean
3180 Start : Natural;
3181 Stop : Natural;
3182 S : String_Access;
3183 L : Natural;
3185 begin
3186 Get_String (Subject, S, L);
3188 if Debug_Mode then
3189 XMatchD (S (1 .. L), Pat.P, Pat.Stk, Start, Stop);
3190 else
3191 XMatch (S (1 .. L), Pat.P, Pat.Stk, Start, Stop);
3192 end if;
3194 if Start = 0 then
3195 Result'Unrestricted_Access.all.Var := null;
3196 return False;
3198 else
3199 Result'Unrestricted_Access.all.Var := Subject'Unrestricted_Access;
3200 Result'Unrestricted_Access.all.Start := Start;
3201 Result'Unrestricted_Access.all.Stop := Stop;
3202 return True;
3203 end if;
3204 end Match;
3206 procedure Match
3207 (Subject : in out VString;
3208 Pat : Pattern;
3209 Result : out Match_Result)
3211 Start : Natural;
3212 Stop : Natural;
3213 S : String_Access;
3214 L : Natural;
3216 begin
3217 Get_String (Subject, S, L);
3219 if Debug_Mode then
3220 XMatchD (S (1 .. L), Pat.P, Pat.Stk, Start, Stop);
3221 else
3222 XMatch (S (1 .. L), Pat.P, Pat.Stk, Start, Stop);
3223 end if;
3225 if Start = 0 then
3226 Result.Var := null;
3227 else
3228 Result.Var := Subject'Unrestricted_Access;
3229 Result.Start := Start;
3230 Result.Stop := Stop;
3231 end if;
3232 end Match;
3234 ---------------
3235 -- New_LineD --
3236 ---------------
3238 procedure New_LineD is
3239 begin
3240 if Internal_Debug then
3241 New_Line;
3242 end if;
3243 end New_LineD;
3245 ------------
3246 -- NotAny --
3247 ------------
3249 function NotAny (Str : String) return Pattern is
3250 begin
3251 return (AFC with 0, new PE'(PC_NotAny_CS, 1, EOP, To_Set (Str)));
3252 end NotAny;
3254 function NotAny (Str : VString) return Pattern is
3255 begin
3256 return NotAny (S (Str));
3257 end NotAny;
3259 function NotAny (Str : Character) return Pattern is
3260 begin
3261 return (AFC with 0, new PE'(PC_NotAny_CH, 1, EOP, Str));
3262 end NotAny;
3264 function NotAny (Str : Character_Set) return Pattern is
3265 begin
3266 return (AFC with 0, new PE'(PC_NotAny_CS, 1, EOP, Str));
3267 end NotAny;
3269 function NotAny (Str : access VString) return Pattern is
3270 begin
3271 return (AFC with 0, new PE'(PC_NotAny_VP, 1, EOP, VString_Ptr (Str)));
3272 end NotAny;
3274 function NotAny (Str : VString_Func) return Pattern is
3275 begin
3276 return (AFC with 0, new PE'(PC_NotAny_VF, 1, EOP, Str));
3277 end NotAny;
3279 -----------
3280 -- NSpan --
3281 -----------
3283 function NSpan (Str : String) return Pattern is
3284 begin
3285 return (AFC with 0, new PE'(PC_NSpan_CS, 1, EOP, To_Set (Str)));
3286 end NSpan;
3288 function NSpan (Str : VString) return Pattern is
3289 begin
3290 return NSpan (S (Str));
3291 end NSpan;
3293 function NSpan (Str : Character) return Pattern is
3294 begin
3295 return (AFC with 0, new PE'(PC_NSpan_CH, 1, EOP, Str));
3296 end NSpan;
3298 function NSpan (Str : Character_Set) return Pattern is
3299 begin
3300 return (AFC with 0, new PE'(PC_NSpan_CS, 1, EOP, Str));
3301 end NSpan;
3303 function NSpan (Str : access VString) return Pattern is
3304 begin
3305 return (AFC with 0, new PE'(PC_NSpan_VP, 1, EOP, VString_Ptr (Str)));
3306 end NSpan;
3308 function NSpan (Str : VString_Func) return Pattern is
3309 begin
3310 return (AFC with 0, new PE'(PC_NSpan_VF, 1, EOP, Str));
3311 end NSpan;
3313 ---------
3314 -- Pos --
3315 ---------
3317 function Pos (Count : Natural) return Pattern is
3318 begin
3319 return (AFC with 0, new PE'(PC_Pos_Nat, 1, EOP, Count));
3320 end Pos;
3322 function Pos (Count : Natural_Func) return Pattern is
3323 begin
3324 return (AFC with 0, new PE'(PC_Pos_NF, 1, EOP, Count));
3325 end Pos;
3327 function Pos (Count : access Natural) return Pattern is
3328 begin
3329 return (AFC with 0, new PE'(PC_Pos_NP, 1, EOP, Natural_Ptr (Count)));
3330 end Pos;
3332 ----------
3333 -- PutD --
3334 ----------
3336 procedure PutD (Str : String) is
3337 begin
3338 if Internal_Debug then
3339 Put (Str);
3340 end if;
3341 end PutD;
3343 ---------------
3344 -- Put_LineD --
3345 ---------------
3347 procedure Put_LineD (Str : String) is
3348 begin
3349 if Internal_Debug then
3350 Put_Line (Str);
3351 end if;
3352 end Put_LineD;
3354 -------------
3355 -- Replace --
3356 -------------
3358 procedure Replace
3359 (Result : in out Match_Result;
3360 Replace : VString)
3362 S : String_Access;
3363 L : Natural;
3365 begin
3366 Get_String (Replace, S, L);
3368 if Result.Var /= null then
3369 Replace_Slice (Result.Var.all, Result.Start, Result.Stop, S (1 .. L));
3370 Result.Var := null;
3371 end if;
3372 end Replace;
3374 ----------
3375 -- Rest --
3376 ----------
3378 function Rest return Pattern is
3379 begin
3380 return (AFC with 0, new PE'(PC_Rest, 1, EOP));
3381 end Rest;
3383 ----------
3384 -- Rpos --
3385 ----------
3387 function Rpos (Count : Natural) return Pattern is
3388 begin
3389 return (AFC with 0, new PE'(PC_RPos_Nat, 1, EOP, Count));
3390 end Rpos;
3392 function Rpos (Count : Natural_Func) return Pattern is
3393 begin
3394 return (AFC with 0, new PE'(PC_RPos_NF, 1, EOP, Count));
3395 end Rpos;
3397 function Rpos (Count : access Natural) return Pattern is
3398 begin
3399 return (AFC with 0, new PE'(PC_RPos_NP, 1, EOP, Natural_Ptr (Count)));
3400 end Rpos;
3402 ----------
3403 -- Rtab --
3404 ----------
3406 function Rtab (Count : Natural) return Pattern is
3407 begin
3408 return (AFC with 0, new PE'(PC_RTab_Nat, 1, EOP, Count));
3409 end Rtab;
3411 function Rtab (Count : Natural_Func) return Pattern is
3412 begin
3413 return (AFC with 0, new PE'(PC_RTab_NF, 1, EOP, Count));
3414 end Rtab;
3416 function Rtab (Count : access Natural) return Pattern is
3417 begin
3418 return (AFC with 0, new PE'(PC_RTab_NP, 1, EOP, Natural_Ptr (Count)));
3419 end Rtab;
3421 -------------
3422 -- S_To_PE --
3423 -------------
3425 function S_To_PE (Str : PString) return PE_Ptr is
3426 Len : constant Natural := Str'Length;
3428 begin
3429 case Len is
3430 when 0 =>
3431 return new PE'(PC_Null, 1, EOP);
3433 when 1 =>
3434 return new PE'(PC_Char, 1, EOP, Str (1));
3436 when 2 =>
3437 return new PE'(PC_String_2, 1, EOP, Str);
3439 when 3 =>
3440 return new PE'(PC_String_3, 1, EOP, Str);
3442 when 4 =>
3443 return new PE'(PC_String_4, 1, EOP, Str);
3445 when 5 =>
3446 return new PE'(PC_String_5, 1, EOP, Str);
3448 when 6 =>
3449 return new PE'(PC_String_6, 1, EOP, Str);
3451 when others =>
3452 return new PE'(PC_String, 1, EOP, new String'(Str));
3454 end case;
3455 end S_To_PE;
3457 -------------------
3458 -- Set_Successor --
3459 -------------------
3461 -- Note: this procedure is not used by the normal concatenation circuit,
3462 -- since other fixups are required on the left operand in this case, and
3463 -- they might as well be done all together.
3465 procedure Set_Successor (Pat : PE_Ptr; Succ : PE_Ptr) is
3466 begin
3467 if Pat = null then
3468 Uninitialized_Pattern;
3470 elsif Pat = EOP then
3471 Logic_Error;
3473 else
3474 declare
3475 Refs : Ref_Array (1 .. Pat.Index);
3476 -- We build a reference array for L whose N'th element points to
3477 -- the pattern element of L whose original Index value is N.
3479 P : PE_Ptr;
3481 begin
3482 Build_Ref_Array (Pat, Refs);
3484 for J in Refs'Range loop
3485 P := Refs (J);
3487 if P.Pthen = EOP then
3488 P.Pthen := Succ;
3489 end if;
3491 if P.Pcode in PC_Has_Alt and then P.Alt = EOP then
3492 P.Alt := Succ;
3493 end if;
3494 end loop;
3495 end;
3496 end if;
3497 end Set_Successor;
3499 ------------
3500 -- Setcur --
3501 ------------
3503 function Setcur (Var : access Natural) return Pattern is
3504 begin
3505 return (AFC with 0, new PE'(PC_Setcur, 1, EOP, Natural_Ptr (Var)));
3506 end Setcur;
3508 ----------
3509 -- Span --
3510 ----------
3512 function Span (Str : String) return Pattern is
3513 begin
3514 return (AFC with 0, new PE'(PC_Span_CS, 1, EOP, To_Set (Str)));
3515 end Span;
3517 function Span (Str : VString) return Pattern is
3518 begin
3519 return Span (S (Str));
3520 end Span;
3522 function Span (Str : Character) return Pattern is
3523 begin
3524 return (AFC with 0, new PE'(PC_Span_CH, 1, EOP, Str));
3525 end Span;
3527 function Span (Str : Character_Set) return Pattern is
3528 begin
3529 return (AFC with 0, new PE'(PC_Span_CS, 1, EOP, Str));
3530 end Span;
3532 function Span (Str : access VString) return Pattern is
3533 begin
3534 return (AFC with 0, new PE'(PC_Span_VP, 1, EOP, VString_Ptr (Str)));
3535 end Span;
3537 function Span (Str : VString_Func) return Pattern is
3538 begin
3539 return (AFC with 0, new PE'(PC_Span_VF, 1, EOP, Str));
3540 end Span;
3542 ------------
3543 -- Str_BF --
3544 ------------
3546 function Str_BF (A : Boolean_Func) return String is
3547 function To_A is new Unchecked_Conversion (Boolean_Func, Address);
3548 begin
3549 return "BF(" & Image (To_A (A)) & ')';
3550 end Str_BF;
3552 ------------
3553 -- Str_FP --
3554 ------------
3556 function Str_FP (A : File_Ptr) return String is
3557 begin
3558 return "FP(" & Image (A.all'Address) & ')';
3559 end Str_FP;
3561 ------------
3562 -- Str_NF --
3563 ------------
3565 function Str_NF (A : Natural_Func) return String is
3566 function To_A is new Unchecked_Conversion (Natural_Func, Address);
3567 begin
3568 return "NF(" & Image (To_A (A)) & ')';
3569 end Str_NF;
3571 ------------
3572 -- Str_NP --
3573 ------------
3575 function Str_NP (A : Natural_Ptr) return String is
3576 begin
3577 return "NP(" & Image (A.all'Address) & ')';
3578 end Str_NP;
3580 ------------
3581 -- Str_PP --
3582 ------------
3584 function Str_PP (A : Pattern_Ptr) return String is
3585 begin
3586 return "PP(" & Image (A.all'Address) & ')';
3587 end Str_PP;
3589 ------------
3590 -- Str_VF --
3591 ------------
3593 function Str_VF (A : VString_Func) return String is
3594 function To_A is new Unchecked_Conversion (VString_Func, Address);
3595 begin
3596 return "VF(" & Image (To_A (A)) & ')';
3597 end Str_VF;
3599 ------------
3600 -- Str_VP --
3601 ------------
3603 function Str_VP (A : VString_Ptr) return String is
3604 begin
3605 return "VP(" & Image (A.all'Address) & ')';
3606 end Str_VP;
3608 -------------
3609 -- Succeed --
3610 -------------
3612 function Succeed return Pattern is
3613 begin
3614 return (AFC with 1, new PE'(PC_Succeed, 1, EOP));
3615 end Succeed;
3617 ---------
3618 -- Tab --
3619 ---------
3621 function Tab (Count : Natural) return Pattern is
3622 begin
3623 return (AFC with 0, new PE'(PC_Tab_Nat, 1, EOP, Count));
3624 end Tab;
3626 function Tab (Count : Natural_Func) return Pattern is
3627 begin
3628 return (AFC with 0, new PE'(PC_Tab_NF, 1, EOP, Count));
3629 end Tab;
3631 function Tab (Count : access Natural) return Pattern is
3632 begin
3633 return (AFC with 0, new PE'(PC_Tab_NP, 1, EOP, Natural_Ptr (Count)));
3634 end Tab;
3636 ---------------------------
3637 -- Uninitialized_Pattern --
3638 ---------------------------
3640 procedure Uninitialized_Pattern is
3641 begin
3642 Raise_Exception
3643 (Program_Error'Identity,
3644 "uninitialized value of type GNAT.Spitbol.Patterns.Pattern");
3645 end Uninitialized_Pattern;
3647 ------------
3648 -- XMatch --
3649 ------------
3651 procedure XMatch
3652 (Subject : String;
3653 Pat_P : PE_Ptr;
3654 Pat_S : Natural;
3655 Start : out Natural;
3656 Stop : out Natural)
3658 Node : PE_Ptr;
3659 -- Pointer to current pattern node. Initialized from Pat_P, and then
3660 -- updated as the match proceeds through its constituent elements.
3662 Length : constant Natural := Subject'Length;
3663 -- Length of string (= Subject'Last, since Subject'First is always 1)
3665 Cursor : Integer := 0;
3666 -- If the value is non-negative, then this value is the index showing
3667 -- the current position of the match in the subject string. The next
3668 -- character to be matched is at Subject (Cursor + 1). Note that since
3669 -- our view of the subject string in XMatch always has a lower bound
3670 -- of one, regardless of original bounds, that this definition exactly
3671 -- corresponds to the cursor value as referenced by functions like Pos.
3673 -- If the value is negative, then this is a saved stack pointer,
3674 -- typically a base pointer of an inner or outer region. Cursor
3675 -- temporarily holds such a value when it is popped from the stack
3676 -- by Fail. In all cases, Cursor is reset to a proper non-negative
3677 -- cursor value before the match proceeds (e.g. by propagating the
3678 -- failure and popping a "real" cursor value from the stack.
3680 PE_Unanchored : aliased PE := (PC_Unanchored, 0, Pat_P);
3681 -- Dummy pattern element used in the unanchored case
3683 Stack : Stack_Type;
3684 -- The pattern matching failure stack for this call to Match
3686 Stack_Ptr : Stack_Range;
3687 -- Current stack pointer. This points to the top element of the stack
3688 -- that is currently in use. At the outer level this is the special
3689 -- entry placed on the stack according to the anchor mode.
3691 Stack_Init : constant Stack_Range := Stack'First + 1;
3692 -- This is the initial value of the Stack_Ptr and Stack_Base. The
3693 -- initial (Stack'First) element of the stack is not used so that
3694 -- when we pop the last element off, Stack_Ptr is still in range.
3696 Stack_Base : Stack_Range;
3697 -- This value is the stack base value, i.e. the stack pointer for the
3698 -- first history stack entry in the current stack region. See separate
3699 -- section on handling of recursive pattern matches.
3701 Assign_OnM : Boolean := False;
3702 -- Set True if assign-on-match or write-on-match operations may be
3703 -- present in the history stack, which must then be scanned on a
3704 -- successful match.
3706 procedure Pop_Region;
3707 pragma Inline (Pop_Region);
3708 -- Used at the end of processing of an inner region. if the inner
3709 -- region left no stack entries, then all trace of it is removed.
3710 -- Otherwise a PC_Restore_Region entry is pushed to ensure proper
3711 -- handling of alternatives in the inner region.
3713 procedure Push (Node : PE_Ptr);
3714 pragma Inline (Push);
3715 -- Make entry in pattern matching stack with current cursor valeu
3717 procedure Push_Region;
3718 pragma Inline (Push_Region);
3719 -- This procedure makes a new region on the history stack. The
3720 -- caller first establishes the special entry on the stack, but
3721 -- does not push the stack pointer. Then this call stacks a
3722 -- PC_Remove_Region node, on top of this entry, using the cursor
3723 -- field of the PC_Remove_Region entry to save the outer level
3724 -- stack base value, and resets the stack base to point to this
3725 -- PC_Remove_Region node.
3727 ----------------
3728 -- Pop_Region --
3729 ----------------
3731 procedure Pop_Region is
3732 begin
3733 -- If nothing was pushed in the inner region, we can just get
3734 -- rid of it entirely, leaving no traces that it was ever there
3736 if Stack_Ptr = Stack_Base then
3737 Stack_Ptr := Stack_Base - 2;
3738 Stack_Base := Stack (Stack_Ptr + 2).Cursor;
3740 -- If stuff was pushed in the inner region, then we have to
3741 -- push a PC_R_Restore node so that we properly handle possible
3742 -- rematches within the region.
3744 else
3745 Stack_Ptr := Stack_Ptr + 1;
3746 Stack (Stack_Ptr).Cursor := Stack_Base;
3747 Stack (Stack_Ptr).Node := CP_R_Restore'Access;
3748 Stack_Base := Stack (Stack_Base).Cursor;
3749 end if;
3750 end Pop_Region;
3752 ----------
3753 -- Push --
3754 ----------
3756 procedure Push (Node : PE_Ptr) is
3757 begin
3758 Stack_Ptr := Stack_Ptr + 1;
3759 Stack (Stack_Ptr).Cursor := Cursor;
3760 Stack (Stack_Ptr).Node := Node;
3761 end Push;
3763 -----------------
3764 -- Push_Region --
3765 -----------------
3767 procedure Push_Region is
3768 begin
3769 Stack_Ptr := Stack_Ptr + 2;
3770 Stack (Stack_Ptr).Cursor := Stack_Base;
3771 Stack (Stack_Ptr).Node := CP_R_Remove'Access;
3772 Stack_Base := Stack_Ptr;
3773 end Push_Region;
3775 -- Start of processing for XMatch
3777 begin
3778 if Pat_P = null then
3779 Uninitialized_Pattern;
3780 end if;
3782 -- Check we have enough stack for this pattern. This check deals with
3783 -- every possibility except a match of a recursive pattern, where we
3784 -- make a check at each recursion level.
3786 if Pat_S >= Stack_Size - 1 then
3787 raise Pattern_Stack_Overflow;
3788 end if;
3790 -- In anchored mode, the bottom entry on the stack is an abort entry
3792 if Anchored_Mode then
3793 Stack (Stack_Init).Node := CP_Cancel'Access;
3794 Stack (Stack_Init).Cursor := 0;
3796 -- In unanchored more, the bottom entry on the stack references
3797 -- the special pattern element PE_Unanchored, whose Pthen field
3798 -- points to the initial pattern element. The cursor value in this
3799 -- entry is the number of anchor moves so far.
3801 else
3802 Stack (Stack_Init).Node := PE_Unanchored'Unchecked_Access;
3803 Stack (Stack_Init).Cursor := 0;
3804 end if;
3806 Stack_Ptr := Stack_Init;
3807 Stack_Base := Stack_Ptr;
3808 Cursor := 0;
3809 Node := Pat_P;
3810 goto Match;
3812 -----------------------------------------
3813 -- Main Pattern Matching State Control --
3814 -----------------------------------------
3816 -- This is a state machine which uses gotos to change state. The
3817 -- initial state is Match, to initiate the matching of the first
3818 -- element, so the goto Match above starts the match. In the
3819 -- following descriptions, we indicate the global values that
3820 -- are relevant for the state transition.
3822 -- Come here if entire match fails
3824 <<Match_Fail>>
3825 Start := 0;
3826 Stop := 0;
3827 return;
3829 -- Come here if entire match succeeds
3831 -- Cursor current position in subject string
3833 <<Match_Succeed>>
3834 Start := Stack (Stack_Init).Cursor + 1;
3835 Stop := Cursor;
3837 -- Scan history stack for deferred assignments or writes
3839 if Assign_OnM then
3840 for S in Stack_Init .. Stack_Ptr loop
3841 if Stack (S).Node = CP_Assign'Access then
3842 declare
3843 Inner_Base : constant Stack_Range :=
3844 Stack (S + 1).Cursor;
3845 Special_Entry : constant Stack_Range :=
3846 Inner_Base - 1;
3847 Node_OnM : constant PE_Ptr :=
3848 Stack (Special_Entry).Node;
3849 Start : constant Natural :=
3850 Stack (Special_Entry).Cursor + 1;
3851 Stop : constant Natural := Stack (S).Cursor;
3853 begin
3854 if Node_OnM.Pcode = PC_Assign_OnM then
3855 Set_String (Node_OnM.VP.all, Subject (Start .. Stop));
3857 elsif Node_OnM.Pcode = PC_Write_OnM then
3858 Put_Line (Node_OnM.FP.all, Subject (Start .. Stop));
3860 else
3861 Logic_Error;
3862 end if;
3863 end;
3864 end if;
3865 end loop;
3866 end if;
3868 return;
3870 -- Come here if attempt to match current element fails
3872 -- Stack_Base current stack base
3873 -- Stack_Ptr current stack pointer
3875 <<Fail>>
3876 Cursor := Stack (Stack_Ptr).Cursor;
3877 Node := Stack (Stack_Ptr).Node;
3878 Stack_Ptr := Stack_Ptr - 1;
3879 goto Match;
3881 -- Come here if attempt to match current element succeeds
3883 -- Cursor current position in subject string
3884 -- Node pointer to node successfully matched
3885 -- Stack_Base current stack base
3886 -- Stack_Ptr current stack pointer
3888 <<Succeed>>
3889 Node := Node.Pthen;
3891 -- Come here to match the next pattern element
3893 -- Cursor current position in subject string
3894 -- Node pointer to node to be matched
3895 -- Stack_Base current stack base
3896 -- Stack_Ptr current stack pointer
3898 <<Match>>
3900 --------------------------------------------------
3901 -- Main Pattern Match Element Matching Routines --
3902 --------------------------------------------------
3904 -- Here is the case statement that processes the current node. The
3905 -- processing for each element does one of five things:
3907 -- goto Succeed to move to the successor
3908 -- goto Match_Succeed if the entire match succeeds
3909 -- goto Match_Fail if the entire match fails
3910 -- goto Fail to signal failure of current match
3912 -- Processing is NOT allowed to fall through
3914 case Node.Pcode is
3916 -- Cancel
3918 when PC_Cancel =>
3919 goto Match_Fail;
3921 -- Alternation
3923 when PC_Alt =>
3924 Push (Node.Alt);
3925 Node := Node.Pthen;
3926 goto Match;
3928 -- Any (one character case)
3930 when PC_Any_CH =>
3931 if Cursor < Length
3932 and then Subject (Cursor + 1) = Node.Char
3933 then
3934 Cursor := Cursor + 1;
3935 goto Succeed;
3936 else
3937 goto Fail;
3938 end if;
3940 -- Any (character set case)
3942 when PC_Any_CS =>
3943 if Cursor < Length
3944 and then Is_In (Subject (Cursor + 1), Node.CS)
3945 then
3946 Cursor := Cursor + 1;
3947 goto Succeed;
3948 else
3949 goto Fail;
3950 end if;
3952 -- Any (string function case)
3954 when PC_Any_VF => declare
3955 U : constant VString := Node.VF.all;
3956 S : String_Access;
3957 L : Natural;
3959 begin
3960 Get_String (U, S, L);
3962 if Cursor < Length
3963 and then Is_In (Subject (Cursor + 1), S (1 .. L))
3964 then
3965 Cursor := Cursor + 1;
3966 goto Succeed;
3967 else
3968 goto Fail;
3969 end if;
3970 end;
3972 -- Any (string pointer case)
3974 when PC_Any_VP => declare
3975 U : constant VString := Node.VP.all;
3976 S : String_Access;
3977 L : Natural;
3979 begin
3980 Get_String (U, S, L);
3982 if Cursor < Length
3983 and then Is_In (Subject (Cursor + 1), S (1 .. L))
3984 then
3985 Cursor := Cursor + 1;
3986 goto Succeed;
3987 else
3988 goto Fail;
3989 end if;
3990 end;
3992 -- Arb (initial match)
3994 when PC_Arb_X =>
3995 Push (Node.Alt);
3996 Node := Node.Pthen;
3997 goto Match;
3999 -- Arb (extension)
4001 when PC_Arb_Y =>
4002 if Cursor < Length then
4003 Cursor := Cursor + 1;
4004 Push (Node);
4005 goto Succeed;
4006 else
4007 goto Fail;
4008 end if;
4010 -- Arbno_S (simple Arbno initialize). This is the node that
4011 -- initiates the match of a simple Arbno structure.
4013 when PC_Arbno_S =>
4014 Push (Node.Alt);
4015 Node := Node.Pthen;
4016 goto Match;
4018 -- Arbno_X (Arbno initialize). This is the node that initiates
4019 -- the match of a complex Arbno structure.
4021 when PC_Arbno_X =>
4022 Push (Node.Alt);
4023 Node := Node.Pthen;
4024 goto Match;
4026 -- Arbno_Y (Arbno rematch). This is the node that is executed
4027 -- following successful matching of one instance of a complex
4028 -- Arbno pattern.
4030 when PC_Arbno_Y => declare
4031 Null_Match : constant Boolean :=
4032 Cursor = Stack (Stack_Base - 1).Cursor;
4034 begin
4035 Pop_Region;
4037 -- If arbno extension matched null, then immediately fail
4039 if Null_Match then
4040 goto Fail;
4041 end if;
4043 -- Here we must do a stack check to make sure enough stack
4044 -- is left. This check will happen once for each instance of
4045 -- the Arbno pattern that is matched. The Nat field of a
4046 -- PC_Arbno pattern contains the maximum stack entries needed
4047 -- for the Arbno with one instance and the successor pattern
4049 if Stack_Ptr + Node.Nat >= Stack'Last then
4050 raise Pattern_Stack_Overflow;
4051 end if;
4053 goto Succeed;
4054 end;
4056 -- Assign. If this node is executed, it means the assign-on-match
4057 -- or write-on-match operation will not happen after all, so we
4058 -- is propagate the failure, removing the PC_Assign node.
4060 when PC_Assign =>
4061 goto Fail;
4063 -- Assign immediate. This node performs the actual assignment
4065 when PC_Assign_Imm =>
4066 Set_String
4067 (Node.VP.all,
4068 Subject (Stack (Stack_Base - 1).Cursor + 1 .. Cursor));
4069 Pop_Region;
4070 goto Succeed;
4072 -- Assign on match. This node sets up for the eventual assignment
4074 when PC_Assign_OnM =>
4075 Stack (Stack_Base - 1).Node := Node;
4076 Push (CP_Assign'Access);
4077 Pop_Region;
4078 Assign_OnM := True;
4079 goto Succeed;
4081 -- Bal
4083 when PC_Bal =>
4084 if Cursor >= Length or else Subject (Cursor + 1) = ')' then
4085 goto Fail;
4087 elsif Subject (Cursor + 1) = '(' then
4088 declare
4089 Paren_Count : Natural := 1;
4091 begin
4092 loop
4093 Cursor := Cursor + 1;
4095 if Cursor >= Length then
4096 goto Fail;
4098 elsif Subject (Cursor + 1) = '(' then
4099 Paren_Count := Paren_Count + 1;
4101 elsif Subject (Cursor + 1) = ')' then
4102 Paren_Count := Paren_Count - 1;
4103 exit when Paren_Count = 0;
4104 end if;
4105 end loop;
4106 end;
4107 end if;
4109 Cursor := Cursor + 1;
4110 Push (Node);
4111 goto Succeed;
4113 -- Break (one character case)
4115 when PC_Break_CH =>
4116 while Cursor < Length loop
4117 if Subject (Cursor + 1) = Node.Char then
4118 goto Succeed;
4119 else
4120 Cursor := Cursor + 1;
4121 end if;
4122 end loop;
4124 goto Fail;
4126 -- Break (character set case)
4128 when PC_Break_CS =>
4129 while Cursor < Length loop
4130 if Is_In (Subject (Cursor + 1), Node.CS) then
4131 goto Succeed;
4132 else
4133 Cursor := Cursor + 1;
4134 end if;
4135 end loop;
4137 goto Fail;
4139 -- Break (string function case)
4141 when PC_Break_VF => declare
4142 U : constant VString := Node.VF.all;
4143 S : String_Access;
4144 L : Natural;
4146 begin
4147 Get_String (U, S, L);
4149 while Cursor < Length loop
4150 if Is_In (Subject (Cursor + 1), S (1 .. L)) then
4151 goto Succeed;
4152 else
4153 Cursor := Cursor + 1;
4154 end if;
4155 end loop;
4157 goto Fail;
4158 end;
4160 -- Break (string pointer case)
4162 when PC_Break_VP => declare
4163 U : constant VString := Node.VP.all;
4164 S : String_Access;
4165 L : Natural;
4167 begin
4168 Get_String (U, S, L);
4170 while Cursor < Length loop
4171 if Is_In (Subject (Cursor + 1), S (1 .. L)) then
4172 goto Succeed;
4173 else
4174 Cursor := Cursor + 1;
4175 end if;
4176 end loop;
4178 goto Fail;
4179 end;
4181 -- BreakX (one character case)
4183 when PC_BreakX_CH =>
4184 while Cursor < Length loop
4185 if Subject (Cursor + 1) = Node.Char then
4186 goto Succeed;
4187 else
4188 Cursor := Cursor + 1;
4189 end if;
4190 end loop;
4192 goto Fail;
4194 -- BreakX (character set case)
4196 when PC_BreakX_CS =>
4197 while Cursor < Length loop
4198 if Is_In (Subject (Cursor + 1), Node.CS) then
4199 goto Succeed;
4200 else
4201 Cursor := Cursor + 1;
4202 end if;
4203 end loop;
4205 goto Fail;
4207 -- BreakX (string function case)
4209 when PC_BreakX_VF => declare
4210 U : constant VString := Node.VF.all;
4211 S : String_Access;
4212 L : Natural;
4214 begin
4215 Get_String (U, S, L);
4217 while Cursor < Length loop
4218 if Is_In (Subject (Cursor + 1), S (1 .. L)) then
4219 goto Succeed;
4220 else
4221 Cursor := Cursor + 1;
4222 end if;
4223 end loop;
4225 goto Fail;
4226 end;
4228 -- BreakX (string pointer case)
4230 when PC_BreakX_VP => declare
4231 U : constant VString := Node.VP.all;
4232 S : String_Access;
4233 L : Natural;
4235 begin
4236 Get_String (U, S, L);
4238 while Cursor < Length loop
4239 if Is_In (Subject (Cursor + 1), S (1 .. L)) then
4240 goto Succeed;
4241 else
4242 Cursor := Cursor + 1;
4243 end if;
4244 end loop;
4246 goto Fail;
4247 end;
4249 -- BreakX_X (BreakX extension). See section on "Compound Pattern
4250 -- Structures". This node is the alternative that is stacked to
4251 -- skip past the break character and extend the break.
4253 when PC_BreakX_X =>
4254 Cursor := Cursor + 1;
4255 goto Succeed;
4257 -- Character (one character string)
4259 when PC_Char =>
4260 if Cursor < Length
4261 and then Subject (Cursor + 1) = Node.Char
4262 then
4263 Cursor := Cursor + 1;
4264 goto Succeed;
4265 else
4266 goto Fail;
4267 end if;
4269 -- End of Pattern
4271 when PC_EOP =>
4272 if Stack_Base = Stack_Init then
4273 goto Match_Succeed;
4275 -- End of recursive inner match. See separate section on
4276 -- handing of recursive pattern matches for details.
4278 else
4279 Node := Stack (Stack_Base - 1).Node;
4280 Pop_Region;
4281 goto Match;
4282 end if;
4284 -- Fail
4286 when PC_Fail =>
4287 goto Fail;
4289 -- Fence (built in pattern)
4291 when PC_Fence =>
4292 Push (CP_Cancel'Access);
4293 goto Succeed;
4295 -- Fence function node X. This is the node that gets control
4296 -- after a successful match of the fenced pattern.
4298 when PC_Fence_X =>
4299 Stack_Ptr := Stack_Ptr + 1;
4300 Stack (Stack_Ptr).Cursor := Stack_Base;
4301 Stack (Stack_Ptr).Node := CP_Fence_Y'Access;
4302 Stack_Base := Stack (Stack_Base).Cursor;
4303 goto Succeed;
4305 -- Fence function node Y. This is the node that gets control on
4306 -- a failure that occurs after the fenced pattern has matched.
4308 -- Note: the Cursor at this stage is actually the inner stack
4309 -- base value. We don't reset this, but we do use it to strip
4310 -- off all the entries made by the fenced pattern.
4312 when PC_Fence_Y =>
4313 Stack_Ptr := Cursor - 2;
4314 goto Fail;
4316 -- Len (integer case)
4318 when PC_Len_Nat =>
4319 if Cursor + Node.Nat > Length then
4320 goto Fail;
4321 else
4322 Cursor := Cursor + Node.Nat;
4323 goto Succeed;
4324 end if;
4326 -- Len (Integer function case)
4328 when PC_Len_NF => declare
4329 N : constant Natural := Node.NF.all;
4331 begin
4332 if Cursor + N > Length then
4333 goto Fail;
4334 else
4335 Cursor := Cursor + N;
4336 goto Succeed;
4337 end if;
4338 end;
4340 -- Len (integer pointer case)
4342 when PC_Len_NP =>
4343 if Cursor + Node.NP.all > Length then
4344 goto Fail;
4345 else
4346 Cursor := Cursor + Node.NP.all;
4347 goto Succeed;
4348 end if;
4350 -- NotAny (one character case)
4352 when PC_NotAny_CH =>
4353 if Cursor < Length
4354 and then Subject (Cursor + 1) /= Node.Char
4355 then
4356 Cursor := Cursor + 1;
4357 goto Succeed;
4358 else
4359 goto Fail;
4360 end if;
4362 -- NotAny (character set case)
4364 when PC_NotAny_CS =>
4365 if Cursor < Length
4366 and then not Is_In (Subject (Cursor + 1), Node.CS)
4367 then
4368 Cursor := Cursor + 1;
4369 goto Succeed;
4370 else
4371 goto Fail;
4372 end if;
4374 -- NotAny (string function case)
4376 when PC_NotAny_VF => declare
4377 U : constant VString := Node.VF.all;
4378 S : String_Access;
4379 L : Natural;
4381 begin
4382 Get_String (U, S, L);
4384 if Cursor < Length
4385 and then
4386 not Is_In (Subject (Cursor + 1), S (1 .. L))
4387 then
4388 Cursor := Cursor + 1;
4389 goto Succeed;
4390 else
4391 goto Fail;
4392 end if;
4393 end;
4395 -- NotAny (string pointer case)
4397 when PC_NotAny_VP => declare
4398 U : constant VString := Node.VP.all;
4399 S : String_Access;
4400 L : Natural;
4402 begin
4403 Get_String (U, S, L);
4405 if Cursor < Length
4406 and then
4407 not Is_In (Subject (Cursor + 1), S (1 .. L))
4408 then
4409 Cursor := Cursor + 1;
4410 goto Succeed;
4411 else
4412 goto Fail;
4413 end if;
4414 end;
4416 -- NSpan (one character case)
4418 when PC_NSpan_CH =>
4419 while Cursor < Length
4420 and then Subject (Cursor + 1) = Node.Char
4421 loop
4422 Cursor := Cursor + 1;
4423 end loop;
4425 goto Succeed;
4427 -- NSpan (character set case)
4429 when PC_NSpan_CS =>
4430 while Cursor < Length
4431 and then Is_In (Subject (Cursor + 1), Node.CS)
4432 loop
4433 Cursor := Cursor + 1;
4434 end loop;
4436 goto Succeed;
4438 -- NSpan (string function case)
4440 when PC_NSpan_VF => declare
4441 U : constant VString := Node.VF.all;
4442 S : String_Access;
4443 L : Natural;
4445 begin
4446 Get_String (U, S, L);
4448 while Cursor < Length
4449 and then Is_In (Subject (Cursor + 1), S (1 .. L))
4450 loop
4451 Cursor := Cursor + 1;
4452 end loop;
4454 goto Succeed;
4455 end;
4457 -- NSpan (string pointer case)
4459 when PC_NSpan_VP => declare
4460 U : constant VString := Node.VP.all;
4461 S : String_Access;
4462 L : Natural;
4464 begin
4465 Get_String (U, S, L);
4467 while Cursor < Length
4468 and then Is_In (Subject (Cursor + 1), S (1 .. L))
4469 loop
4470 Cursor := Cursor + 1;
4471 end loop;
4473 goto Succeed;
4474 end;
4476 -- Null string
4478 when PC_Null =>
4479 goto Succeed;
4481 -- Pos (integer case)
4483 when PC_Pos_Nat =>
4484 if Cursor = Node.Nat then
4485 goto Succeed;
4486 else
4487 goto Fail;
4488 end if;
4490 -- Pos (Integer function case)
4492 when PC_Pos_NF => declare
4493 N : constant Natural := Node.NF.all;
4495 begin
4496 if Cursor = N then
4497 goto Succeed;
4498 else
4499 goto Fail;
4500 end if;
4501 end;
4503 -- Pos (integer pointer case)
4505 when PC_Pos_NP =>
4506 if Cursor = Node.NP.all then
4507 goto Succeed;
4508 else
4509 goto Fail;
4510 end if;
4512 -- Predicate function
4514 when PC_Pred_Func =>
4515 if Node.BF.all then
4516 goto Succeed;
4517 else
4518 goto Fail;
4519 end if;
4521 -- Region Enter. Initiate new pattern history stack region
4523 when PC_R_Enter =>
4524 Stack (Stack_Ptr + 1).Cursor := Cursor;
4525 Push_Region;
4526 goto Succeed;
4528 -- Region Remove node. This is the node stacked by an R_Enter.
4529 -- It removes the special format stack entry right underneath, and
4530 -- then restores the outer level stack base and signals failure.
4532 -- Note: the cursor value at this stage is actually the (negative)
4533 -- stack base value for the outer level.
4535 when PC_R_Remove =>
4536 Stack_Base := Cursor;
4537 Stack_Ptr := Stack_Ptr - 1;
4538 goto Fail;
4540 -- Region restore node. This is the node stacked at the end of an
4541 -- inner level match. Its function is to restore the inner level
4542 -- region, so that alternatives in this region can be sought.
4544 -- Note: the Cursor at this stage is actually the negative of the
4545 -- inner stack base value, which we use to restore the inner region.
4547 when PC_R_Restore =>
4548 Stack_Base := Cursor;
4549 goto Fail;
4551 -- Rest
4553 when PC_Rest =>
4554 Cursor := Length;
4555 goto Succeed;
4557 -- Initiate recursive match (pattern pointer case)
4559 when PC_Rpat =>
4560 Stack (Stack_Ptr + 1).Node := Node.Pthen;
4561 Push_Region;
4563 if Stack_Ptr + Node.PP.all.Stk >= Stack_Size then
4564 raise Pattern_Stack_Overflow;
4565 else
4566 Node := Node.PP.all.P;
4567 goto Match;
4568 end if;
4570 -- RPos (integer case)
4572 when PC_RPos_Nat =>
4573 if Cursor = (Length - Node.Nat) then
4574 goto Succeed;
4575 else
4576 goto Fail;
4577 end if;
4579 -- RPos (integer function case)
4581 when PC_RPos_NF => declare
4582 N : constant Natural := Node.NF.all;
4584 begin
4585 if Length - Cursor = N then
4586 goto Succeed;
4587 else
4588 goto Fail;
4589 end if;
4590 end;
4592 -- RPos (integer pointer case)
4594 when PC_RPos_NP =>
4595 if Cursor = (Length - Node.NP.all) then
4596 goto Succeed;
4597 else
4598 goto Fail;
4599 end if;
4601 -- RTab (integer case)
4603 when PC_RTab_Nat =>
4604 if Cursor <= (Length - Node.Nat) then
4605 Cursor := Length - Node.Nat;
4606 goto Succeed;
4607 else
4608 goto Fail;
4609 end if;
4611 -- RTab (integer function case)
4613 when PC_RTab_NF => declare
4614 N : constant Natural := Node.NF.all;
4616 begin
4617 if Length - Cursor >= N then
4618 Cursor := Length - N;
4619 goto Succeed;
4620 else
4621 goto Fail;
4622 end if;
4623 end;
4625 -- RTab (integer pointer case)
4627 when PC_RTab_NP =>
4628 if Cursor <= (Length - Node.NP.all) then
4629 Cursor := Length - Node.NP.all;
4630 goto Succeed;
4631 else
4632 goto Fail;
4633 end if;
4635 -- Cursor assignment
4637 when PC_Setcur =>
4638 Node.Var.all := Cursor;
4639 goto Succeed;
4641 -- Span (one character case)
4643 when PC_Span_CH => declare
4644 P : Natural := Cursor;
4646 begin
4647 while P < Length
4648 and then Subject (P + 1) = Node.Char
4649 loop
4650 P := P + 1;
4651 end loop;
4653 if P /= Cursor then
4654 Cursor := P;
4655 goto Succeed;
4656 else
4657 goto Fail;
4658 end if;
4659 end;
4661 -- Span (character set case)
4663 when PC_Span_CS => declare
4664 P : Natural := Cursor;
4666 begin
4667 while P < Length
4668 and then Is_In (Subject (P + 1), Node.CS)
4669 loop
4670 P := P + 1;
4671 end loop;
4673 if P /= Cursor then
4674 Cursor := P;
4675 goto Succeed;
4676 else
4677 goto Fail;
4678 end if;
4679 end;
4681 -- Span (string function case)
4683 when PC_Span_VF => declare
4684 U : constant VString := Node.VF.all;
4685 S : String_Access;
4686 L : Natural;
4687 P : Natural;
4689 begin
4690 Get_String (U, S, L);
4692 P := Cursor;
4693 while P < Length
4694 and then Is_In (Subject (P + 1), S (1 .. L))
4695 loop
4696 P := P + 1;
4697 end loop;
4699 if P /= Cursor then
4700 Cursor := P;
4701 goto Succeed;
4702 else
4703 goto Fail;
4704 end if;
4705 end;
4707 -- Span (string pointer case)
4709 when PC_Span_VP => declare
4710 U : constant VString := Node.VP.all;
4711 S : String_Access;
4712 L : Natural;
4713 P : Natural;
4715 begin
4716 Get_String (U, S, L);
4718 P := Cursor;
4719 while P < Length
4720 and then Is_In (Subject (P + 1), S (1 .. L))
4721 loop
4722 P := P + 1;
4723 end loop;
4725 if P /= Cursor then
4726 Cursor := P;
4727 goto Succeed;
4728 else
4729 goto Fail;
4730 end if;
4731 end;
4733 -- String (two character case)
4735 when PC_String_2 =>
4736 if (Length - Cursor) >= 2
4737 and then Subject (Cursor + 1 .. Cursor + 2) = Node.Str2
4738 then
4739 Cursor := Cursor + 2;
4740 goto Succeed;
4741 else
4742 goto Fail;
4743 end if;
4745 -- String (three character case)
4747 when PC_String_3 =>
4748 if (Length - Cursor) >= 3
4749 and then Subject (Cursor + 1 .. Cursor + 3) = Node.Str3
4750 then
4751 Cursor := Cursor + 3;
4752 goto Succeed;
4753 else
4754 goto Fail;
4755 end if;
4757 -- String (four character case)
4759 when PC_String_4 =>
4760 if (Length - Cursor) >= 4
4761 and then Subject (Cursor + 1 .. Cursor + 4) = Node.Str4
4762 then
4763 Cursor := Cursor + 4;
4764 goto Succeed;
4765 else
4766 goto Fail;
4767 end if;
4769 -- String (five character case)
4771 when PC_String_5 =>
4772 if (Length - Cursor) >= 5
4773 and then Subject (Cursor + 1 .. Cursor + 5) = Node.Str5
4774 then
4775 Cursor := Cursor + 5;
4776 goto Succeed;
4777 else
4778 goto Fail;
4779 end if;
4781 -- String (six character case)
4783 when PC_String_6 =>
4784 if (Length - Cursor) >= 6
4785 and then Subject (Cursor + 1 .. Cursor + 6) = Node.Str6
4786 then
4787 Cursor := Cursor + 6;
4788 goto Succeed;
4789 else
4790 goto Fail;
4791 end if;
4793 -- String (case of more than six characters)
4795 when PC_String => declare
4796 Len : constant Natural := Node.Str'Length;
4798 begin
4799 if (Length - Cursor) >= Len
4800 and then Node.Str.all = Subject (Cursor + 1 .. Cursor + Len)
4801 then
4802 Cursor := Cursor + Len;
4803 goto Succeed;
4804 else
4805 goto Fail;
4806 end if;
4807 end;
4809 -- String (function case)
4811 when PC_String_VF => declare
4812 U : constant VString := Node.VF.all;
4813 S : String_Access;
4814 L : Natural;
4816 begin
4817 Get_String (U, S, L);
4819 if (Length - Cursor) >= L
4820 and then S (1 .. L) = Subject (Cursor + 1 .. Cursor + L)
4821 then
4822 Cursor := Cursor + L;
4823 goto Succeed;
4824 else
4825 goto Fail;
4826 end if;
4827 end;
4829 -- String (pointer case)
4831 when PC_String_VP => declare
4832 U : constant VString := Node.VP.all;
4833 S : String_Access;
4834 L : Natural;
4836 begin
4837 Get_String (U, S, L);
4839 if (Length - Cursor) >= L
4840 and then S (1 .. L) = Subject (Cursor + 1 .. Cursor + L)
4841 then
4842 Cursor := Cursor + L;
4843 goto Succeed;
4844 else
4845 goto Fail;
4846 end if;
4847 end;
4849 -- Succeed
4851 when PC_Succeed =>
4852 Push (Node);
4853 goto Succeed;
4855 -- Tab (integer case)
4857 when PC_Tab_Nat =>
4858 if Cursor <= Node.Nat then
4859 Cursor := Node.Nat;
4860 goto Succeed;
4861 else
4862 goto Fail;
4863 end if;
4865 -- Tab (integer function case)
4867 when PC_Tab_NF => declare
4868 N : constant Natural := Node.NF.all;
4870 begin
4871 if Cursor <= N then
4872 Cursor := N;
4873 goto Succeed;
4874 else
4875 goto Fail;
4876 end if;
4877 end;
4879 -- Tab (integer pointer case)
4881 when PC_Tab_NP =>
4882 if Cursor <= Node.NP.all then
4883 Cursor := Node.NP.all;
4884 goto Succeed;
4885 else
4886 goto Fail;
4887 end if;
4889 -- Unanchored movement
4891 when PC_Unanchored =>
4893 -- All done if we tried every position
4895 if Cursor > Length then
4896 goto Match_Fail;
4898 -- Otherwise extend the anchor point, and restack ourself
4900 else
4901 Cursor := Cursor + 1;
4902 Push (Node);
4903 goto Succeed;
4904 end if;
4906 -- Write immediate. This node performs the actual write
4908 when PC_Write_Imm =>
4909 Put_Line
4910 (Node.FP.all,
4911 Subject (Stack (Stack_Base - 1).Cursor + 1 .. Cursor));
4912 Pop_Region;
4913 goto Succeed;
4915 -- Write on match. This node sets up for the eventual write
4917 when PC_Write_OnM =>
4918 Stack (Stack_Base - 1).Node := Node;
4919 Push (CP_Assign'Access);
4920 Pop_Region;
4921 Assign_OnM := True;
4922 goto Succeed;
4924 end case;
4926 -- We are NOT allowed to fall though this case statement, since every
4927 -- match routine must end by executing a goto to the appropriate point
4928 -- in the finite state machine model.
4930 pragma Warnings (Off);
4931 Logic_Error;
4932 pragma Warnings (On);
4933 end XMatch;
4935 -------------
4936 -- XMatchD --
4937 -------------
4939 -- Maintenance note: There is a LOT of code duplication between XMatch
4940 -- and XMatchD. This is quite intentional, the point is to avoid any
4941 -- unnecessary debugging overhead in the XMatch case, but this does mean
4942 -- that any changes to XMatchD must be mirrored in XMatch. In case of
4943 -- any major changes, the proper approach is to delete XMatch, make the
4944 -- changes to XMatchD, and then make a copy of XMatchD, removing all
4945 -- calls to Dout, and all Put and Put_Line operations. This copy becomes
4946 -- the new XMatch.
4948 procedure XMatchD
4949 (Subject : String;
4950 Pat_P : PE_Ptr;
4951 Pat_S : Natural;
4952 Start : out Natural;
4953 Stop : out Natural)
4955 Node : PE_Ptr;
4956 -- Pointer to current pattern node. Initialized from Pat_P, and then
4957 -- updated as the match proceeds through its constituent elements.
4959 Length : constant Natural := Subject'Length;
4960 -- Length of string (= Subject'Last, since Subject'First is always 1)
4962 Cursor : Integer := 0;
4963 -- If the value is non-negative, then this value is the index showing
4964 -- the current position of the match in the subject string. The next
4965 -- character to be matched is at Subject (Cursor + 1). Note that since
4966 -- our view of the subject string in XMatch always has a lower bound
4967 -- of one, regardless of original bounds, that this definition exactly
4968 -- corresponds to the cursor value as referenced by functions like Pos.
4970 -- If the value is negative, then this is a saved stack pointer,
4971 -- typically a base pointer of an inner or outer region. Cursor
4972 -- temporarily holds such a value when it is popped from the stack
4973 -- by Fail. In all cases, Cursor is reset to a proper non-negative
4974 -- cursor value before the match proceeds (e.g. by propagating the
4975 -- failure and popping a "real" cursor value from the stack.
4977 PE_Unanchored : aliased PE := (PC_Unanchored, 0, Pat_P);
4978 -- Dummy pattern element used in the unanchored case
4980 Region_Level : Natural := 0;
4981 -- Keeps track of recursive region level. This is used only for
4982 -- debugging, it is the number of saved history stack base values.
4984 Stack : Stack_Type;
4985 -- The pattern matching failure stack for this call to Match
4987 Stack_Ptr : Stack_Range;
4988 -- Current stack pointer. This points to the top element of the stack
4989 -- that is currently in use. At the outer level this is the special
4990 -- entry placed on the stack according to the anchor mode.
4992 Stack_Init : constant Stack_Range := Stack'First + 1;
4993 -- This is the initial value of the Stack_Ptr and Stack_Base. The
4994 -- initial (Stack'First) element of the stack is not used so that
4995 -- when we pop the last element off, Stack_Ptr is still in range.
4997 Stack_Base : Stack_Range;
4998 -- This value is the stack base value, i.e. the stack pointer for the
4999 -- first history stack entry in the current stack region. See separate
5000 -- section on handling of recursive pattern matches.
5002 Assign_OnM : Boolean := False;
5003 -- Set True if assign-on-match or write-on-match operations may be
5004 -- present in the history stack, which must then be scanned on a
5005 -- successful match.
5007 procedure Dout (Str : String);
5008 -- Output string to standard error with bars indicating region level
5010 procedure Dout (Str : String; A : Character);
5011 -- Calls Dout with the string S ('A')
5013 procedure Dout (Str : String; A : Character_Set);
5014 -- Calls Dout with the string S ("A")
5016 procedure Dout (Str : String; A : Natural);
5017 -- Calls Dout with the string S (A)
5019 procedure Dout (Str : String; A : String);
5020 -- Calls Dout with the string S ("A")
5022 function Img (P : PE_Ptr) return String;
5023 -- Returns a string of the form #nnn where nnn is P.Index
5025 procedure Pop_Region;
5026 pragma Inline (Pop_Region);
5027 -- Used at the end of processing of an inner region. if the inner
5028 -- region left no stack entries, then all trace of it is removed.
5029 -- Otherwise a PC_Restore_Region entry is pushed to ensure proper
5030 -- handling of alternatives in the inner region.
5032 procedure Push (Node : PE_Ptr);
5033 pragma Inline (Push);
5034 -- Make entry in pattern matching stack with current cursor valeu
5036 procedure Push_Region;
5037 pragma Inline (Push_Region);
5038 -- This procedure makes a new region on the history stack. The
5039 -- caller first establishes the special entry on the stack, but
5040 -- does not push the stack pointer. Then this call stacks a
5041 -- PC_Remove_Region node, on top of this entry, using the cursor
5042 -- field of the PC_Remove_Region entry to save the outer level
5043 -- stack base value, and resets the stack base to point to this
5044 -- PC_Remove_Region node.
5046 ----------
5047 -- Dout --
5048 ----------
5050 procedure Dout (Str : String) is
5051 begin
5052 for J in 1 .. Region_Level loop
5053 Put ("| ");
5054 end loop;
5056 Put_Line (Str);
5057 end Dout;
5059 procedure Dout (Str : String; A : Character) is
5060 begin
5061 Dout (Str & " ('" & A & "')");
5062 end Dout;
5064 procedure Dout (Str : String; A : Character_Set) is
5065 begin
5066 Dout (Str & " (" & Image (To_Sequence (A)) & ')');
5067 end Dout;
5069 procedure Dout (Str : String; A : Natural) is
5070 begin
5071 Dout (Str & " (" & A & ')');
5072 end Dout;
5074 procedure Dout (Str : String; A : String) is
5075 begin
5076 Dout (Str & " (" & Image (A) & ')');
5077 end Dout;
5079 ---------
5080 -- Img --
5081 ---------
5083 function Img (P : PE_Ptr) return String is
5084 begin
5085 return "#" & Integer (P.Index) & " ";
5086 end Img;
5088 ----------------
5089 -- Pop_Region --
5090 ----------------
5092 procedure Pop_Region is
5093 begin
5094 Region_Level := Region_Level - 1;
5096 -- If nothing was pushed in the inner region, we can just get
5097 -- rid of it entirely, leaving no traces that it was ever there
5099 if Stack_Ptr = Stack_Base then
5100 Stack_Ptr := Stack_Base - 2;
5101 Stack_Base := Stack (Stack_Ptr + 2).Cursor;
5103 -- If stuff was pushed in the inner region, then we have to
5104 -- push a PC_R_Restore node so that we properly handle possible
5105 -- rematches within the region.
5107 else
5108 Stack_Ptr := Stack_Ptr + 1;
5109 Stack (Stack_Ptr).Cursor := Stack_Base;
5110 Stack (Stack_Ptr).Node := CP_R_Restore'Access;
5111 Stack_Base := Stack (Stack_Base).Cursor;
5112 end if;
5113 end Pop_Region;
5115 ----------
5116 -- Push --
5117 ----------
5119 procedure Push (Node : PE_Ptr) is
5120 begin
5121 Stack_Ptr := Stack_Ptr + 1;
5122 Stack (Stack_Ptr).Cursor := Cursor;
5123 Stack (Stack_Ptr).Node := Node;
5124 end Push;
5126 -----------------
5127 -- Push_Region --
5128 -----------------
5130 procedure Push_Region is
5131 begin
5132 Region_Level := Region_Level + 1;
5133 Stack_Ptr := Stack_Ptr + 2;
5134 Stack (Stack_Ptr).Cursor := Stack_Base;
5135 Stack (Stack_Ptr).Node := CP_R_Remove'Access;
5136 Stack_Base := Stack_Ptr;
5137 end Push_Region;
5139 -- Start of processing for XMatchD
5141 begin
5142 New_Line;
5143 Put_Line ("Initiating pattern match, subject = " & Image (Subject));
5144 Put ("--------------------------------------");
5146 for J in 1 .. Length loop
5147 Put ('-');
5148 end loop;
5150 New_Line;
5151 Put_Line ("subject length = " & Length);
5153 if Pat_P = null then
5154 Uninitialized_Pattern;
5155 end if;
5157 -- Check we have enough stack for this pattern. This check deals with
5158 -- every possibility except a match of a recursive pattern, where we
5159 -- make a check at each recursion level.
5161 if Pat_S >= Stack_Size - 1 then
5162 raise Pattern_Stack_Overflow;
5163 end if;
5165 -- In anchored mode, the bottom entry on the stack is an abort entry
5167 if Anchored_Mode then
5168 Stack (Stack_Init).Node := CP_Cancel'Access;
5169 Stack (Stack_Init).Cursor := 0;
5171 -- In unanchored more, the bottom entry on the stack references
5172 -- the special pattern element PE_Unanchored, whose Pthen field
5173 -- points to the initial pattern element. The cursor value in this
5174 -- entry is the number of anchor moves so far.
5176 else
5177 Stack (Stack_Init).Node := PE_Unanchored'Unchecked_Access;
5178 Stack (Stack_Init).Cursor := 0;
5179 end if;
5181 Stack_Ptr := Stack_Init;
5182 Stack_Base := Stack_Ptr;
5183 Cursor := 0;
5184 Node := Pat_P;
5185 goto Match;
5187 -----------------------------------------
5188 -- Main Pattern Matching State Control --
5189 -----------------------------------------
5191 -- This is a state machine which uses gotos to change state. The
5192 -- initial state is Match, to initiate the matching of the first
5193 -- element, so the goto Match above starts the match. In the
5194 -- following descriptions, we indicate the global values that
5195 -- are relevant for the state transition.
5197 -- Come here if entire match fails
5199 <<Match_Fail>>
5200 Dout ("match fails");
5201 New_Line;
5202 Start := 0;
5203 Stop := 0;
5204 return;
5206 -- Come here if entire match succeeds
5208 -- Cursor current position in subject string
5210 <<Match_Succeed>>
5211 Dout ("match succeeds");
5212 Start := Stack (Stack_Init).Cursor + 1;
5213 Stop := Cursor;
5214 Dout ("first matched character index = " & Start);
5215 Dout ("last matched character index = " & Stop);
5216 Dout ("matched substring = " & Image (Subject (Start .. Stop)));
5218 -- Scan history stack for deferred assignments or writes
5220 if Assign_OnM then
5221 for S in Stack'First .. Stack_Ptr loop
5222 if Stack (S).Node = CP_Assign'Access then
5223 declare
5224 Inner_Base : constant Stack_Range :=
5225 Stack (S + 1).Cursor;
5226 Special_Entry : constant Stack_Range :=
5227 Inner_Base - 1;
5228 Node_OnM : constant PE_Ptr :=
5229 Stack (Special_Entry).Node;
5230 Start : constant Natural :=
5231 Stack (Special_Entry).Cursor + 1;
5232 Stop : constant Natural := Stack (S).Cursor;
5234 begin
5235 if Node_OnM.Pcode = PC_Assign_OnM then
5236 Set_String (Node_OnM.VP.all, Subject (Start .. Stop));
5237 Dout
5238 (Img (Stack (S).Node) &
5239 "deferred assignment of " &
5240 Image (Subject (Start .. Stop)));
5242 elsif Node_OnM.Pcode = PC_Write_OnM then
5243 Put_Line (Node_OnM.FP.all, Subject (Start .. Stop));
5244 Dout
5245 (Img (Stack (S).Node) &
5246 "deferred write of " &
5247 Image (Subject (Start .. Stop)));
5249 else
5250 Logic_Error;
5251 end if;
5252 end;
5253 end if;
5254 end loop;
5255 end if;
5257 New_Line;
5258 return;
5260 -- Come here if attempt to match current element fails
5262 -- Stack_Base current stack base
5263 -- Stack_Ptr current stack pointer
5265 <<Fail>>
5266 Cursor := Stack (Stack_Ptr).Cursor;
5267 Node := Stack (Stack_Ptr).Node;
5268 Stack_Ptr := Stack_Ptr - 1;
5270 if Cursor >= 0 then
5271 Dout ("failure, cursor reset to " & Cursor);
5272 end if;
5274 goto Match;
5276 -- Come here if attempt to match current element succeeds
5278 -- Cursor current position in subject string
5279 -- Node pointer to node successfully matched
5280 -- Stack_Base current stack base
5281 -- Stack_Ptr current stack pointer
5283 <<Succeed>>
5284 Dout ("success, cursor = " & Cursor);
5285 Node := Node.Pthen;
5287 -- Come here to match the next pattern element
5289 -- Cursor current position in subject string
5290 -- Node pointer to node to be matched
5291 -- Stack_Base current stack base
5292 -- Stack_Ptr current stack pointer
5294 <<Match>>
5296 --------------------------------------------------
5297 -- Main Pattern Match Element Matching Routines --
5298 --------------------------------------------------
5300 -- Here is the case statement that processes the current node. The
5301 -- processing for each element does one of five things:
5303 -- goto Succeed to move to the successor
5304 -- goto Match_Succeed if the entire match succeeds
5305 -- goto Match_Fail if the entire match fails
5306 -- goto Fail to signal failure of current match
5308 -- Processing is NOT allowed to fall through
5310 case Node.Pcode is
5312 -- Cancel
5314 when PC_Cancel =>
5315 Dout (Img (Node) & "matching Cancel");
5316 goto Match_Fail;
5318 -- Alternation
5320 when PC_Alt =>
5321 Dout
5322 (Img (Node) & "setting up alternative " & Img (Node.Alt));
5323 Push (Node.Alt);
5324 Node := Node.Pthen;
5325 goto Match;
5327 -- Any (one character case)
5329 when PC_Any_CH =>
5330 Dout (Img (Node) & "matching Any", Node.Char);
5332 if Cursor < Length
5333 and then Subject (Cursor + 1) = Node.Char
5334 then
5335 Cursor := Cursor + 1;
5336 goto Succeed;
5337 else
5338 goto Fail;
5339 end if;
5341 -- Any (character set case)
5343 when PC_Any_CS =>
5344 Dout (Img (Node) & "matching Any", Node.CS);
5346 if Cursor < Length
5347 and then Is_In (Subject (Cursor + 1), Node.CS)
5348 then
5349 Cursor := Cursor + 1;
5350 goto Succeed;
5351 else
5352 goto Fail;
5353 end if;
5355 -- Any (string function case)
5357 when PC_Any_VF => declare
5358 U : constant VString := Node.VF.all;
5359 S : String_Access;
5360 L : Natural;
5362 begin
5363 Get_String (U, S, L);
5365 Dout (Img (Node) & "matching Any", S (1 .. L));
5367 if Cursor < Length
5368 and then Is_In (Subject (Cursor + 1), S (1 .. L))
5369 then
5370 Cursor := Cursor + 1;
5371 goto Succeed;
5372 else
5373 goto Fail;
5374 end if;
5375 end;
5377 -- Any (string pointer case)
5379 when PC_Any_VP => declare
5380 U : constant VString := Node.VP.all;
5381 S : String_Access;
5382 L : Natural;
5384 begin
5385 Get_String (U, S, L);
5386 Dout (Img (Node) & "matching Any", S (1 .. L));
5388 if Cursor < Length
5389 and then Is_In (Subject (Cursor + 1), S (1 .. L))
5390 then
5391 Cursor := Cursor + 1;
5392 goto Succeed;
5393 else
5394 goto Fail;
5395 end if;
5396 end;
5398 -- Arb (initial match)
5400 when PC_Arb_X =>
5401 Dout (Img (Node) & "matching Arb");
5402 Push (Node.Alt);
5403 Node := Node.Pthen;
5404 goto Match;
5406 -- Arb (extension)
5408 when PC_Arb_Y =>
5409 Dout (Img (Node) & "extending Arb");
5411 if Cursor < Length then
5412 Cursor := Cursor + 1;
5413 Push (Node);
5414 goto Succeed;
5415 else
5416 goto Fail;
5417 end if;
5419 -- Arbno_S (simple Arbno initialize). This is the node that
5420 -- initiates the match of a simple Arbno structure.
5422 when PC_Arbno_S =>
5423 Dout (Img (Node) &
5424 "setting up Arbno alternative " & Img (Node.Alt));
5425 Push (Node.Alt);
5426 Node := Node.Pthen;
5427 goto Match;
5429 -- Arbno_X (Arbno initialize). This is the node that initiates
5430 -- the match of a complex Arbno structure.
5432 when PC_Arbno_X =>
5433 Dout (Img (Node) &
5434 "setting up Arbno alternative " & Img (Node.Alt));
5435 Push (Node.Alt);
5436 Node := Node.Pthen;
5437 goto Match;
5439 -- Arbno_Y (Arbno rematch). This is the node that is executed
5440 -- following successful matching of one instance of a complex
5441 -- Arbno pattern.
5443 when PC_Arbno_Y => declare
5444 Null_Match : constant Boolean :=
5445 Cursor = Stack (Stack_Base - 1).Cursor;
5447 begin
5448 Dout (Img (Node) & "extending Arbno");
5449 Pop_Region;
5451 -- If arbno extension matched null, then immediately fail
5453 if Null_Match then
5454 Dout ("Arbno extension matched null, so fails");
5455 goto Fail;
5456 end if;
5458 -- Here we must do a stack check to make sure enough stack
5459 -- is left. This check will happen once for each instance of
5460 -- the Arbno pattern that is matched. The Nat field of a
5461 -- PC_Arbno pattern contains the maximum stack entries needed
5462 -- for the Arbno with one instance and the successor pattern
5464 if Stack_Ptr + Node.Nat >= Stack'Last then
5465 raise Pattern_Stack_Overflow;
5466 end if;
5468 goto Succeed;
5469 end;
5471 -- Assign. If this node is executed, it means the assign-on-match
5472 -- or write-on-match operation will not happen after all, so we
5473 -- is propagate the failure, removing the PC_Assign node.
5475 when PC_Assign =>
5476 Dout (Img (Node) & "deferred assign/write cancelled");
5477 goto Fail;
5479 -- Assign immediate. This node performs the actual assignment
5481 when PC_Assign_Imm =>
5482 Dout
5483 (Img (Node) & "executing immediate assignment of " &
5484 Image (Subject (Stack (Stack_Base - 1).Cursor + 1 .. Cursor)));
5485 Set_String
5486 (Node.VP.all,
5487 Subject (Stack (Stack_Base - 1).Cursor + 1 .. Cursor));
5488 Pop_Region;
5489 goto Succeed;
5491 -- Assign on match. This node sets up for the eventual assignment
5493 when PC_Assign_OnM =>
5494 Dout (Img (Node) & "registering deferred assignment");
5495 Stack (Stack_Base - 1).Node := Node;
5496 Push (CP_Assign'Access);
5497 Pop_Region;
5498 Assign_OnM := True;
5499 goto Succeed;
5501 -- Bal
5503 when PC_Bal =>
5504 Dout (Img (Node) & "matching or extending Bal");
5505 if Cursor >= Length or else Subject (Cursor + 1) = ')' then
5506 goto Fail;
5508 elsif Subject (Cursor + 1) = '(' then
5509 declare
5510 Paren_Count : Natural := 1;
5512 begin
5513 loop
5514 Cursor := Cursor + 1;
5516 if Cursor >= Length then
5517 goto Fail;
5519 elsif Subject (Cursor + 1) = '(' then
5520 Paren_Count := Paren_Count + 1;
5522 elsif Subject (Cursor + 1) = ')' then
5523 Paren_Count := Paren_Count - 1;
5524 exit when Paren_Count = 0;
5525 end if;
5526 end loop;
5527 end;
5528 end if;
5530 Cursor := Cursor + 1;
5531 Push (Node);
5532 goto Succeed;
5534 -- Break (one character case)
5536 when PC_Break_CH =>
5537 Dout (Img (Node) & "matching Break", Node.Char);
5539 while Cursor < Length loop
5540 if Subject (Cursor + 1) = Node.Char then
5541 goto Succeed;
5542 else
5543 Cursor := Cursor + 1;
5544 end if;
5545 end loop;
5547 goto Fail;
5549 -- Break (character set case)
5551 when PC_Break_CS =>
5552 Dout (Img (Node) & "matching Break", Node.CS);
5554 while Cursor < Length loop
5555 if Is_In (Subject (Cursor + 1), Node.CS) then
5556 goto Succeed;
5557 else
5558 Cursor := Cursor + 1;
5559 end if;
5560 end loop;
5562 goto Fail;
5564 -- Break (string function case)
5566 when PC_Break_VF => declare
5567 U : constant VString := Node.VF.all;
5568 S : String_Access;
5569 L : Natural;
5571 begin
5572 Get_String (U, S, L);
5573 Dout (Img (Node) & "matching Break", S (1 .. L));
5575 while Cursor < Length loop
5576 if Is_In (Subject (Cursor + 1), S (1 .. L)) then
5577 goto Succeed;
5578 else
5579 Cursor := Cursor + 1;
5580 end if;
5581 end loop;
5583 goto Fail;
5584 end;
5586 -- Break (string pointer case)
5588 when PC_Break_VP => declare
5589 U : constant VString := Node.VP.all;
5590 S : String_Access;
5591 L : Natural;
5593 begin
5594 Get_String (U, S, L);
5595 Dout (Img (Node) & "matching Break", S (1 .. L));
5597 while Cursor < Length loop
5598 if Is_In (Subject (Cursor + 1), S (1 .. L)) then
5599 goto Succeed;
5600 else
5601 Cursor := Cursor + 1;
5602 end if;
5603 end loop;
5605 goto Fail;
5606 end;
5608 -- BreakX (one character case)
5610 when PC_BreakX_CH =>
5611 Dout (Img (Node) & "matching BreakX", Node.Char);
5613 while Cursor < Length loop
5614 if Subject (Cursor + 1) = Node.Char then
5615 goto Succeed;
5616 else
5617 Cursor := Cursor + 1;
5618 end if;
5619 end loop;
5621 goto Fail;
5623 -- BreakX (character set case)
5625 when PC_BreakX_CS =>
5626 Dout (Img (Node) & "matching BreakX", Node.CS);
5628 while Cursor < Length loop
5629 if Is_In (Subject (Cursor + 1), Node.CS) then
5630 goto Succeed;
5631 else
5632 Cursor := Cursor + 1;
5633 end if;
5634 end loop;
5636 goto Fail;
5638 -- BreakX (string function case)
5640 when PC_BreakX_VF => declare
5641 U : constant VString := Node.VF.all;
5642 S : String_Access;
5643 L : Natural;
5645 begin
5646 Get_String (U, S, L);
5647 Dout (Img (Node) & "matching BreakX", S (1 .. L));
5649 while Cursor < Length loop
5650 if Is_In (Subject (Cursor + 1), S (1 .. L)) then
5651 goto Succeed;
5652 else
5653 Cursor := Cursor + 1;
5654 end if;
5655 end loop;
5657 goto Fail;
5658 end;
5660 -- BreakX (string pointer case)
5662 when PC_BreakX_VP => declare
5663 U : constant VString := Node.VP.all;
5664 S : String_Access;
5665 L : Natural;
5667 begin
5668 Get_String (U, S, L);
5669 Dout (Img (Node) & "matching BreakX", S (1 .. L));
5671 while Cursor < Length loop
5672 if Is_In (Subject (Cursor + 1), S (1 .. L)) then
5673 goto Succeed;
5674 else
5675 Cursor := Cursor + 1;
5676 end if;
5677 end loop;
5679 goto Fail;
5680 end;
5682 -- BreakX_X (BreakX extension). See section on "Compound Pattern
5683 -- Structures". This node is the alternative that is stacked
5684 -- to skip past the break character and extend the break.
5686 when PC_BreakX_X =>
5687 Dout (Img (Node) & "extending BreakX");
5688 Cursor := Cursor + 1;
5689 goto Succeed;
5691 -- Character (one character string)
5693 when PC_Char =>
5694 Dout (Img (Node) & "matching '" & Node.Char & ''');
5696 if Cursor < Length
5697 and then Subject (Cursor + 1) = Node.Char
5698 then
5699 Cursor := Cursor + 1;
5700 goto Succeed;
5701 else
5702 goto Fail;
5703 end if;
5705 -- End of Pattern
5707 when PC_EOP =>
5708 if Stack_Base = Stack_Init then
5709 Dout ("end of pattern");
5710 goto Match_Succeed;
5712 -- End of recursive inner match. See separate section on
5713 -- handing of recursive pattern matches for details.
5715 else
5716 Dout ("terminating recursive match");
5717 Node := Stack (Stack_Base - 1).Node;
5718 Pop_Region;
5719 goto Match;
5720 end if;
5722 -- Fail
5724 when PC_Fail =>
5725 Dout (Img (Node) & "matching Fail");
5726 goto Fail;
5728 -- Fence (built in pattern)
5730 when PC_Fence =>
5731 Dout (Img (Node) & "matching Fence");
5732 Push (CP_Cancel'Access);
5733 goto Succeed;
5735 -- Fence function node X. This is the node that gets control
5736 -- after a successful match of the fenced pattern.
5738 when PC_Fence_X =>
5739 Dout (Img (Node) & "matching Fence function");
5740 Stack_Ptr := Stack_Ptr + 1;
5741 Stack (Stack_Ptr).Cursor := Stack_Base;
5742 Stack (Stack_Ptr).Node := CP_Fence_Y'Access;
5743 Stack_Base := Stack (Stack_Base).Cursor;
5744 Region_Level := Region_Level - 1;
5745 goto Succeed;
5747 -- Fence function node Y. This is the node that gets control on
5748 -- a failure that occurs after the fenced pattern has matched.
5750 -- Note: the Cursor at this stage is actually the inner stack
5751 -- base value. We don't reset this, but we do use it to strip
5752 -- off all the entries made by the fenced pattern.
5754 when PC_Fence_Y =>
5755 Dout (Img (Node) & "pattern matched by Fence caused failure");
5756 Stack_Ptr := Cursor - 2;
5757 goto Fail;
5759 -- Len (integer case)
5761 when PC_Len_Nat =>
5762 Dout (Img (Node) & "matching Len", Node.Nat);
5764 if Cursor + Node.Nat > Length then
5765 goto Fail;
5766 else
5767 Cursor := Cursor + Node.Nat;
5768 goto Succeed;
5769 end if;
5771 -- Len (Integer function case)
5773 when PC_Len_NF => declare
5774 N : constant Natural := Node.NF.all;
5776 begin
5777 Dout (Img (Node) & "matching Len", N);
5779 if Cursor + N > Length then
5780 goto Fail;
5781 else
5782 Cursor := Cursor + N;
5783 goto Succeed;
5784 end if;
5785 end;
5787 -- Len (integer pointer case)
5789 when PC_Len_NP =>
5790 Dout (Img (Node) & "matching Len", Node.NP.all);
5792 if Cursor + Node.NP.all > Length then
5793 goto Fail;
5794 else
5795 Cursor := Cursor + Node.NP.all;
5796 goto Succeed;
5797 end if;
5799 -- NotAny (one character case)
5801 when PC_NotAny_CH =>
5802 Dout (Img (Node) & "matching NotAny", Node.Char);
5804 if Cursor < Length
5805 and then Subject (Cursor + 1) /= Node.Char
5806 then
5807 Cursor := Cursor + 1;
5808 goto Succeed;
5809 else
5810 goto Fail;
5811 end if;
5813 -- NotAny (character set case)
5815 when PC_NotAny_CS =>
5816 Dout (Img (Node) & "matching NotAny", Node.CS);
5818 if Cursor < Length
5819 and then not Is_In (Subject (Cursor + 1), Node.CS)
5820 then
5821 Cursor := Cursor + 1;
5822 goto Succeed;
5823 else
5824 goto Fail;
5825 end if;
5827 -- NotAny (string function case)
5829 when PC_NotAny_VF => declare
5830 U : constant VString := Node.VF.all;
5831 S : String_Access;
5832 L : Natural;
5834 begin
5835 Get_String (U, S, L);
5836 Dout (Img (Node) & "matching NotAny", S (1 .. L));
5838 if Cursor < Length
5839 and then
5840 not Is_In (Subject (Cursor + 1), S (1 .. L))
5841 then
5842 Cursor := Cursor + 1;
5843 goto Succeed;
5844 else
5845 goto Fail;
5846 end if;
5847 end;
5849 -- NotAny (string pointer case)
5851 when PC_NotAny_VP => declare
5852 U : constant VString := Node.VP.all;
5853 S : String_Access;
5854 L : Natural;
5856 begin
5857 Get_String (U, S, L);
5858 Dout (Img (Node) & "matching NotAny", S (1 .. L));
5860 if Cursor < Length
5861 and then
5862 not Is_In (Subject (Cursor + 1), S (1 .. L))
5863 then
5864 Cursor := Cursor + 1;
5865 goto Succeed;
5866 else
5867 goto Fail;
5868 end if;
5869 end;
5871 -- NSpan (one character case)
5873 when PC_NSpan_CH =>
5874 Dout (Img (Node) & "matching NSpan", Node.Char);
5876 while Cursor < Length
5877 and then Subject (Cursor + 1) = Node.Char
5878 loop
5879 Cursor := Cursor + 1;
5880 end loop;
5882 goto Succeed;
5884 -- NSpan (character set case)
5886 when PC_NSpan_CS =>
5887 Dout (Img (Node) & "matching NSpan", Node.CS);
5889 while Cursor < Length
5890 and then Is_In (Subject (Cursor + 1), Node.CS)
5891 loop
5892 Cursor := Cursor + 1;
5893 end loop;
5895 goto Succeed;
5897 -- NSpan (string function case)
5899 when PC_NSpan_VF => declare
5900 U : constant VString := Node.VF.all;
5901 S : String_Access;
5902 L : Natural;
5904 begin
5905 Get_String (U, S, L);
5906 Dout (Img (Node) & "matching NSpan", S (1 .. L));
5908 while Cursor < Length
5909 and then Is_In (Subject (Cursor + 1), S (1 .. L))
5910 loop
5911 Cursor := Cursor + 1;
5912 end loop;
5914 goto Succeed;
5915 end;
5917 -- NSpan (string pointer case)
5919 when PC_NSpan_VP => declare
5920 U : constant VString := Node.VP.all;
5921 S : String_Access;
5922 L : Natural;
5924 begin
5925 Get_String (U, S, L);
5926 Dout (Img (Node) & "matching NSpan", S (1 .. L));
5928 while Cursor < Length
5929 and then Is_In (Subject (Cursor + 1), S (1 .. L))
5930 loop
5931 Cursor := Cursor + 1;
5932 end loop;
5934 goto Succeed;
5935 end;
5937 when PC_Null =>
5938 Dout (Img (Node) & "matching null");
5939 goto Succeed;
5941 -- Pos (integer case)
5943 when PC_Pos_Nat =>
5944 Dout (Img (Node) & "matching Pos", Node.Nat);
5946 if Cursor = Node.Nat then
5947 goto Succeed;
5948 else
5949 goto Fail;
5950 end if;
5952 -- Pos (Integer function case)
5954 when PC_Pos_NF => declare
5955 N : constant Natural := Node.NF.all;
5957 begin
5958 Dout (Img (Node) & "matching Pos", N);
5960 if Cursor = N then
5961 goto Succeed;
5962 else
5963 goto Fail;
5964 end if;
5965 end;
5967 -- Pos (integer pointer case)
5969 when PC_Pos_NP =>
5970 Dout (Img (Node) & "matching Pos", Node.NP.all);
5972 if Cursor = Node.NP.all then
5973 goto Succeed;
5974 else
5975 goto Fail;
5976 end if;
5978 -- Predicate function
5980 when PC_Pred_Func =>
5981 Dout (Img (Node) & "matching predicate function");
5983 if Node.BF.all then
5984 goto Succeed;
5985 else
5986 goto Fail;
5987 end if;
5989 -- Region Enter. Initiate new pattern history stack region
5991 when PC_R_Enter =>
5992 Dout (Img (Node) & "starting match of nested pattern");
5993 Stack (Stack_Ptr + 1).Cursor := Cursor;
5994 Push_Region;
5995 goto Succeed;
5997 -- Region Remove node. This is the node stacked by an R_Enter.
5998 -- It removes the special format stack entry right underneath, and
5999 -- then restores the outer level stack base and signals failure.
6001 -- Note: the cursor value at this stage is actually the (negative)
6002 -- stack base value for the outer level.
6004 when PC_R_Remove =>
6005 Dout ("failure, match of nested pattern terminated");
6006 Stack_Base := Cursor;
6007 Region_Level := Region_Level - 1;
6008 Stack_Ptr := Stack_Ptr - 1;
6009 goto Fail;
6011 -- Region restore node. This is the node stacked at the end of an
6012 -- inner level match. Its function is to restore the inner level
6013 -- region, so that alternatives in this region can be sought.
6015 -- Note: the Cursor at this stage is actually the negative of the
6016 -- inner stack base value, which we use to restore the inner region.
6018 when PC_R_Restore =>
6019 Dout ("failure, search for alternatives in nested pattern");
6020 Region_Level := Region_Level + 1;
6021 Stack_Base := Cursor;
6022 goto Fail;
6024 -- Rest
6026 when PC_Rest =>
6027 Dout (Img (Node) & "matching Rest");
6028 Cursor := Length;
6029 goto Succeed;
6031 -- Initiate recursive match (pattern pointer case)
6033 when PC_Rpat =>
6034 Stack (Stack_Ptr + 1).Node := Node.Pthen;
6035 Push_Region;
6036 Dout (Img (Node) & "initiating recursive match");
6038 if Stack_Ptr + Node.PP.all.Stk >= Stack_Size then
6039 raise Pattern_Stack_Overflow;
6040 else
6041 Node := Node.PP.all.P;
6042 goto Match;
6043 end if;
6045 -- RPos (integer case)
6047 when PC_RPos_Nat =>
6048 Dout (Img (Node) & "matching RPos", Node.Nat);
6050 if Cursor = (Length - Node.Nat) then
6051 goto Succeed;
6052 else
6053 goto Fail;
6054 end if;
6056 -- RPos (integer function case)
6058 when PC_RPos_NF => declare
6059 N : constant Natural := Node.NF.all;
6061 begin
6062 Dout (Img (Node) & "matching RPos", N);
6064 if Length - Cursor = N then
6065 goto Succeed;
6066 else
6067 goto Fail;
6068 end if;
6069 end;
6071 -- RPos (integer pointer case)
6073 when PC_RPos_NP =>
6074 Dout (Img (Node) & "matching RPos", Node.NP.all);
6076 if Cursor = (Length - Node.NP.all) then
6077 goto Succeed;
6078 else
6079 goto Fail;
6080 end if;
6082 -- RTab (integer case)
6084 when PC_RTab_Nat =>
6085 Dout (Img (Node) & "matching RTab", Node.Nat);
6087 if Cursor <= (Length - Node.Nat) then
6088 Cursor := Length - Node.Nat;
6089 goto Succeed;
6090 else
6091 goto Fail;
6092 end if;
6094 -- RTab (integer function case)
6096 when PC_RTab_NF => declare
6097 N : constant Natural := Node.NF.all;
6099 begin
6100 Dout (Img (Node) & "matching RPos", N);
6102 if Length - Cursor >= N then
6103 Cursor := Length - N;
6104 goto Succeed;
6105 else
6106 goto Fail;
6107 end if;
6108 end;
6110 -- RTab (integer pointer case)
6112 when PC_RTab_NP =>
6113 Dout (Img (Node) & "matching RPos", Node.NP.all);
6115 if Cursor <= (Length - Node.NP.all) then
6116 Cursor := Length - Node.NP.all;
6117 goto Succeed;
6118 else
6119 goto Fail;
6120 end if;
6122 -- Cursor assignment
6124 when PC_Setcur =>
6125 Dout (Img (Node) & "matching Setcur");
6126 Node.Var.all := Cursor;
6127 goto Succeed;
6129 -- Span (one character case)
6131 when PC_Span_CH => declare
6132 P : Natural := Cursor;
6134 begin
6135 Dout (Img (Node) & "matching Span", Node.Char);
6137 while P < Length
6138 and then Subject (P + 1) = Node.Char
6139 loop
6140 P := P + 1;
6141 end loop;
6143 if P /= Cursor then
6144 Cursor := P;
6145 goto Succeed;
6146 else
6147 goto Fail;
6148 end if;
6149 end;
6151 -- Span (character set case)
6153 when PC_Span_CS => declare
6154 P : Natural := Cursor;
6156 begin
6157 Dout (Img (Node) & "matching Span", Node.CS);
6159 while P < Length
6160 and then Is_In (Subject (P + 1), Node.CS)
6161 loop
6162 P := P + 1;
6163 end loop;
6165 if P /= Cursor then
6166 Cursor := P;
6167 goto Succeed;
6168 else
6169 goto Fail;
6170 end if;
6171 end;
6173 -- Span (string function case)
6175 when PC_Span_VF => declare
6176 U : constant VString := Node.VF.all;
6177 S : String_Access;
6178 L : Natural;
6179 P : Natural;
6181 begin
6182 Get_String (U, S, L);
6183 Dout (Img (Node) & "matching Span", S (1 .. L));
6185 P := Cursor;
6186 while P < Length
6187 and then Is_In (Subject (P + 1), S (1 .. L))
6188 loop
6189 P := P + 1;
6190 end loop;
6192 if P /= Cursor then
6193 Cursor := P;
6194 goto Succeed;
6195 else
6196 goto Fail;
6197 end if;
6198 end;
6200 -- Span (string pointer case)
6202 when PC_Span_VP => declare
6203 U : constant VString := Node.VP.all;
6204 S : String_Access;
6205 L : Natural;
6206 P : Natural;
6208 begin
6209 Get_String (U, S, L);
6210 Dout (Img (Node) & "matching Span", S (1 .. L));
6212 P := Cursor;
6213 while P < Length
6214 and then Is_In (Subject (P + 1), S (1 .. L))
6215 loop
6216 P := P + 1;
6217 end loop;
6219 if P /= Cursor then
6220 Cursor := P;
6221 goto Succeed;
6222 else
6223 goto Fail;
6224 end if;
6225 end;
6227 -- String (two character case)
6229 when PC_String_2 =>
6230 Dout (Img (Node) & "matching " & Image (Node.Str2));
6232 if (Length - Cursor) >= 2
6233 and then Subject (Cursor + 1 .. Cursor + 2) = Node.Str2
6234 then
6235 Cursor := Cursor + 2;
6236 goto Succeed;
6237 else
6238 goto Fail;
6239 end if;
6241 -- String (three character case)
6243 when PC_String_3 =>
6244 Dout (Img (Node) & "matching " & Image (Node.Str3));
6246 if (Length - Cursor) >= 3
6247 and then Subject (Cursor + 1 .. Cursor + 3) = Node.Str3
6248 then
6249 Cursor := Cursor + 3;
6250 goto Succeed;
6251 else
6252 goto Fail;
6253 end if;
6255 -- String (four character case)
6257 when PC_String_4 =>
6258 Dout (Img (Node) & "matching " & Image (Node.Str4));
6260 if (Length - Cursor) >= 4
6261 and then Subject (Cursor + 1 .. Cursor + 4) = Node.Str4
6262 then
6263 Cursor := Cursor + 4;
6264 goto Succeed;
6265 else
6266 goto Fail;
6267 end if;
6269 -- String (five character case)
6271 when PC_String_5 =>
6272 Dout (Img (Node) & "matching " & Image (Node.Str5));
6274 if (Length - Cursor) >= 5
6275 and then Subject (Cursor + 1 .. Cursor + 5) = Node.Str5
6276 then
6277 Cursor := Cursor + 5;
6278 goto Succeed;
6279 else
6280 goto Fail;
6281 end if;
6283 -- String (six character case)
6285 when PC_String_6 =>
6286 Dout (Img (Node) & "matching " & Image (Node.Str6));
6288 if (Length - Cursor) >= 6
6289 and then Subject (Cursor + 1 .. Cursor + 6) = Node.Str6
6290 then
6291 Cursor := Cursor + 6;
6292 goto Succeed;
6293 else
6294 goto Fail;
6295 end if;
6297 -- String (case of more than six characters)
6299 when PC_String => declare
6300 Len : constant Natural := Node.Str'Length;
6302 begin
6303 Dout (Img (Node) & "matching " & Image (Node.Str.all));
6305 if (Length - Cursor) >= Len
6306 and then Node.Str.all = Subject (Cursor + 1 .. Cursor + Len)
6307 then
6308 Cursor := Cursor + Len;
6309 goto Succeed;
6310 else
6311 goto Fail;
6312 end if;
6313 end;
6315 -- String (function case)
6317 when PC_String_VF => declare
6318 U : constant VString := Node.VF.all;
6319 S : String_Access;
6320 L : Natural;
6322 begin
6323 Get_String (U, S, L);
6324 Dout (Img (Node) & "matching " & Image (S (1 .. L)));
6326 if (Length - Cursor) >= L
6327 and then S (1 .. L) = Subject (Cursor + 1 .. Cursor + L)
6328 then
6329 Cursor := Cursor + L;
6330 goto Succeed;
6331 else
6332 goto Fail;
6333 end if;
6334 end;
6336 -- String (vstring pointer case)
6338 when PC_String_VP => declare
6339 U : constant VString := Node.VP.all;
6340 S : String_Access;
6341 L : Natural;
6343 begin
6344 Get_String (U, S, L);
6345 Dout (Img (Node) & "matching " & Image (S (1 .. L)));
6347 if (Length - Cursor) >= L
6348 and then S (1 .. L) = Subject (Cursor + 1 .. Cursor + L)
6349 then
6350 Cursor := Cursor + L;
6351 goto Succeed;
6352 else
6353 goto Fail;
6354 end if;
6355 end;
6357 -- Succeed
6359 when PC_Succeed =>
6360 Dout (Img (Node) & "matching Succeed");
6361 Push (Node);
6362 goto Succeed;
6364 -- Tab (integer case)
6366 when PC_Tab_Nat =>
6367 Dout (Img (Node) & "matching Tab", Node.Nat);
6369 if Cursor <= Node.Nat then
6370 Cursor := Node.Nat;
6371 goto Succeed;
6372 else
6373 goto Fail;
6374 end if;
6376 -- Tab (integer function case)
6378 when PC_Tab_NF => declare
6379 N : constant Natural := Node.NF.all;
6381 begin
6382 Dout (Img (Node) & "matching Tab ", N);
6384 if Cursor <= N then
6385 Cursor := N;
6386 goto Succeed;
6387 else
6388 goto Fail;
6389 end if;
6390 end;
6392 -- Tab (integer pointer case)
6394 when PC_Tab_NP =>
6395 Dout (Img (Node) & "matching Tab ", Node.NP.all);
6397 if Cursor <= Node.NP.all then
6398 Cursor := Node.NP.all;
6399 goto Succeed;
6400 else
6401 goto Fail;
6402 end if;
6404 -- Unanchored movement
6406 when PC_Unanchored =>
6407 Dout ("attempting to move anchor point");
6409 -- All done if we tried every position
6411 if Cursor > Length then
6412 goto Match_Fail;
6414 -- Otherwise extend the anchor point, and restack ourself
6416 else
6417 Cursor := Cursor + 1;
6418 Push (Node);
6419 goto Succeed;
6420 end if;
6422 -- Write immediate. This node performs the actual write
6424 when PC_Write_Imm =>
6425 Dout (Img (Node) & "executing immediate write of " &
6426 Subject (Stack (Stack_Base - 1).Cursor + 1 .. Cursor));
6428 Put_Line
6429 (Node.FP.all,
6430 Subject (Stack (Stack_Base - 1).Cursor + 1 .. Cursor));
6431 Pop_Region;
6432 goto Succeed;
6434 -- Write on match. This node sets up for the eventual write
6436 when PC_Write_OnM =>
6437 Dout (Img (Node) & "registering deferred write");
6438 Stack (Stack_Base - 1).Node := Node;
6439 Push (CP_Assign'Access);
6440 Pop_Region;
6441 Assign_OnM := True;
6442 goto Succeed;
6444 end case;
6446 -- We are NOT allowed to fall though this case statement, since every
6447 -- match routine must end by executing a goto to the appropriate point
6448 -- in the finite state machine model.
6450 pragma Warnings (Off);
6451 Logic_Error;
6452 pragma Warnings (On);
6453 end XMatchD;
6455 end GNAT.Spitbol.Patterns;