config/sparc/sol2-bi.h: Revert previous delta.
[official-gcc.git] / gcc / ada / g-spipat.adb
blobff6da0c1ce0397c9e89e7bfb4ca6dff86598c872
1 ------------------------------------------------------------------------------
2 -- --
3 -- GNAT LIBRARY COMPONENTS --
4 -- --
5 -- G N A T . S P I T B O L . P A T T E R N S --
6 -- --
7 -- B o d y --
8 -- --
9 -- --
10 -- Copyright (C) 1998-2002, Ada Core Technologies, Inc. --
11 -- --
12 -- GNAT is free software; you can redistribute it and/or modify it under --
13 -- terms of the GNU General Public License as published by the Free Soft- --
14 -- ware Foundation; either version 2, or (at your option) any later ver- --
15 -- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
16 -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
17 -- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License --
18 -- for more details. You should have received a copy of the GNU General --
19 -- Public License distributed with GNAT; see file COPYING. If not, write --
20 -- to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, --
21 -- MA 02111-1307, USA. --
22 -- --
23 -- As a special exception, if other files instantiate generics from this --
24 -- unit, or you link this unit with other files to produce an executable, --
25 -- this unit does not by itself cause the resulting executable to be --
26 -- covered by the GNU General Public License. This exception does not --
27 -- however invalidate any other reasons why the executable file might be --
28 -- covered by the GNU Public License. --
29 -- --
30 -- GNAT is maintained by Ada Core Technologies Inc (http://www.gnat.com). --
31 -- --
32 ------------------------------------------------------------------------------
34 -- Note: the data structures and general approach used in this implementation
35 -- are derived from the original MINIMAL sources for SPITBOL. The code is not
36 -- a direct translation, but the approach is followed closely. In particular,
37 -- we use the one stack approach developed in the SPITBOL implementation.
39 with Ada.Exceptions; use Ada.Exceptions;
40 with Ada.Strings.Maps; use Ada.Strings.Maps;
41 with Ada.Strings.Unbounded.Aux; use Ada.Strings.Unbounded.Aux;
43 with GNAT.Debug_Utilities; use GNAT.Debug_Utilities;
45 with System; use System;
47 with Unchecked_Conversion;
48 with Unchecked_Deallocation;
50 package body GNAT.Spitbol.Patterns is
52 ------------------------
53 -- Internal Debugging --
54 ------------------------
56 Internal_Debug : constant Boolean := False;
57 -- Set this flag to True to activate some built-in debugging traceback
58 -- These are all lines output with PutD and Put_LineD.
60 procedure New_LineD;
61 pragma Inline (New_LineD);
62 -- Output new blank line with New_Line if Internal_Debug is True
64 procedure PutD (Str : String);
65 pragma Inline (PutD);
66 -- Output string with Put if Internal_Debug is True
68 procedure Put_LineD (Str : String);
69 pragma Inline (Put_LineD);
70 -- Output string with Put_Line if Internal_Debug is True
72 -----------------------------
73 -- Local Type Declarations --
74 -----------------------------
76 subtype String_Ptr is Ada.Strings.Unbounded.String_Access;
77 subtype File_Ptr is Ada.Text_IO.File_Access;
79 function To_Address is new Unchecked_Conversion (PE_Ptr, Address);
80 -- Used only for debugging output purposes
82 subtype AFC is Ada.Finalization.Controlled;
84 N : constant PE_Ptr := null;
85 -- Shorthand used to initialize Copy fields to null
87 type Natural_Ptr is access all Natural;
88 type Pattern_Ptr is access all Pattern;
90 --------------------------------------------------
91 -- Description of Algorithm and Data Structures --
92 --------------------------------------------------
94 -- A pattern structure is represented as a linked graph of nodes
95 -- with the following structure:
97 -- +------------------------------------+
98 -- I Pcode I
99 -- +------------------------------------+
100 -- I Index I
101 -- +------------------------------------+
102 -- I Pthen I
103 -- +------------------------------------+
104 -- I parameter(s) I
105 -- +------------------------------------+
107 -- Pcode is a code value indicating the type of the patterm node. This
108 -- code is used both as the discriminant value for the record, and as
109 -- the case index in the main match routine that branches to the proper
110 -- match code for the given element.
112 -- Index is a serial index number. The use of these serial index
113 -- numbers is described in a separate section.
115 -- Pthen is a pointer to the successor node, i.e the node to be matched
116 -- if the attempt to match the node succeeds. If this is the last node
117 -- of the pattern to be matched, then Pthen points to a dummy node
118 -- of kind PC_EOP (end of pattern), which initiales pattern exit.
120 -- The parameter or parameters are present for certain node types,
121 -- and the type varies with the pattern code.
123 type Pattern_Code is (
124 PC_Arb_Y,
125 PC_Assign,
126 PC_Bal,
127 PC_BreakX_X,
128 PC_Cancel,
129 PC_EOP,
130 PC_Fail,
131 PC_Fence,
132 PC_Fence_X,
133 PC_Fence_Y,
134 PC_R_Enter,
135 PC_R_Remove,
136 PC_R_Restore,
137 PC_Rest,
138 PC_Succeed,
139 PC_Unanchored,
141 PC_Alt,
142 PC_Arb_X,
143 PC_Arbno_S,
144 PC_Arbno_X,
146 PC_Rpat,
148 PC_Pred_Func,
150 PC_Assign_Imm,
151 PC_Assign_OnM,
152 PC_Any_VP,
153 PC_Break_VP,
154 PC_BreakX_VP,
155 PC_NotAny_VP,
156 PC_NSpan_VP,
157 PC_Span_VP,
158 PC_String_VP,
160 PC_Write_Imm,
161 PC_Write_OnM,
163 PC_Null,
164 PC_String,
166 PC_String_2,
167 PC_String_3,
168 PC_String_4,
169 PC_String_5,
170 PC_String_6,
172 PC_Setcur,
174 PC_Any_CH,
175 PC_Break_CH,
176 PC_BreakX_CH,
177 PC_Char,
178 PC_NotAny_CH,
179 PC_NSpan_CH,
180 PC_Span_CH,
182 PC_Any_CS,
183 PC_Break_CS,
184 PC_BreakX_CS,
185 PC_NotAny_CS,
186 PC_NSpan_CS,
187 PC_Span_CS,
189 PC_Arbno_Y,
190 PC_Len_Nat,
191 PC_Pos_Nat,
192 PC_RPos_Nat,
193 PC_RTab_Nat,
194 PC_Tab_Nat,
196 PC_Pos_NF,
197 PC_Len_NF,
198 PC_RPos_NF,
199 PC_RTab_NF,
200 PC_Tab_NF,
202 PC_Pos_NP,
203 PC_Len_NP,
204 PC_RPos_NP,
205 PC_RTab_NP,
206 PC_Tab_NP,
208 PC_Any_VF,
209 PC_Break_VF,
210 PC_BreakX_VF,
211 PC_NotAny_VF,
212 PC_NSpan_VF,
213 PC_Span_VF,
214 PC_String_VF);
216 type IndexT is range 0 .. +(2 **15 - 1);
218 type PE (Pcode : Pattern_Code) is record
220 Index : IndexT;
221 -- Serial index number of pattern element within pattern.
223 Pthen : PE_Ptr;
224 -- Successor element, to be matched after this one
226 case Pcode is
228 when PC_Arb_Y |
229 PC_Assign |
230 PC_Bal |
231 PC_BreakX_X |
232 PC_Cancel |
233 PC_EOP |
234 PC_Fail |
235 PC_Fence |
236 PC_Fence_X |
237 PC_Fence_Y |
238 PC_Null |
239 PC_R_Enter |
240 PC_R_Remove |
241 PC_R_Restore |
242 PC_Rest |
243 PC_Succeed |
244 PC_Unanchored => null;
246 when PC_Alt |
247 PC_Arb_X |
248 PC_Arbno_S |
249 PC_Arbno_X => Alt : PE_Ptr;
251 when PC_Rpat => PP : Pattern_Ptr;
253 when PC_Pred_Func => BF : Boolean_Func;
255 when PC_Assign_Imm |
256 PC_Assign_OnM |
257 PC_Any_VP |
258 PC_Break_VP |
259 PC_BreakX_VP |
260 PC_NotAny_VP |
261 PC_NSpan_VP |
262 PC_Span_VP |
263 PC_String_VP => VP : VString_Ptr;
265 when PC_Write_Imm |
266 PC_Write_OnM => FP : File_Ptr;
268 when PC_String => Str : String_Ptr;
270 when PC_String_2 => Str2 : String (1 .. 2);
272 when PC_String_3 => Str3 : String (1 .. 3);
274 when PC_String_4 => Str4 : String (1 .. 4);
276 when PC_String_5 => Str5 : String (1 .. 5);
278 when PC_String_6 => Str6 : String (1 .. 6);
280 when PC_Setcur => Var : Natural_Ptr;
282 when PC_Any_CH |
283 PC_Break_CH |
284 PC_BreakX_CH |
285 PC_Char |
286 PC_NotAny_CH |
287 PC_NSpan_CH |
288 PC_Span_CH => Char : Character;
290 when PC_Any_CS |
291 PC_Break_CS |
292 PC_BreakX_CS |
293 PC_NotAny_CS |
294 PC_NSpan_CS |
295 PC_Span_CS => CS : Character_Set;
297 when PC_Arbno_Y |
298 PC_Len_Nat |
299 PC_Pos_Nat |
300 PC_RPos_Nat |
301 PC_RTab_Nat |
302 PC_Tab_Nat => Nat : Natural;
304 when PC_Pos_NF |
305 PC_Len_NF |
306 PC_RPos_NF |
307 PC_RTab_NF |
308 PC_Tab_NF => NF : Natural_Func;
310 when PC_Pos_NP |
311 PC_Len_NP |
312 PC_RPos_NP |
313 PC_RTab_NP |
314 PC_Tab_NP => NP : Natural_Ptr;
316 when PC_Any_VF |
317 PC_Break_VF |
318 PC_BreakX_VF |
319 PC_NotAny_VF |
320 PC_NSpan_VF |
321 PC_Span_VF |
322 PC_String_VF => VF : VString_Func;
324 end case;
325 end record;
327 subtype PC_Has_Alt is Pattern_Code range PC_Alt .. PC_Arbno_X;
328 -- Range of pattern codes that has an Alt field. This is used in the
329 -- recursive traversals, since these links must be followed.
331 EOP_Element : aliased constant PE := (PC_EOP, 0, N);
332 -- This is the end of pattern element, and is thus the representation of
333 -- a null pattern. It has a zero index element since it is never placed
334 -- inside a pattern. Furthermore it does not need a successor, since it
335 -- marks the end of the pattern, so that no more successors are needed.
337 EOP : constant PE_Ptr := EOP_Element'Unrestricted_Access;
338 -- This is the end of pattern pointer, that is used in the Pthen pointer
339 -- of other nodes to signal end of pattern.
341 -- The following array is used to determine if a pattern used as an
342 -- argument for Arbno is eligible for treatment using the simple Arbno
343 -- structure (i.e. it is a pattern that is guaranteed to match at least
344 -- one character on success, and not to make any entries on the stack.
346 OK_For_Simple_Arbno :
347 array (Pattern_Code) of Boolean := (
348 PC_Any_CS |
349 PC_Any_CH |
350 PC_Any_VF |
351 PC_Any_VP |
352 PC_Char |
353 PC_Len_Nat |
354 PC_NotAny_CS |
355 PC_NotAny_CH |
356 PC_NotAny_VF |
357 PC_NotAny_VP |
358 PC_Span_CS |
359 PC_Span_CH |
360 PC_Span_VF |
361 PC_Span_VP |
362 PC_String |
363 PC_String_2 |
364 PC_String_3 |
365 PC_String_4 |
366 PC_String_5 |
367 PC_String_6 => True,
369 others => False);
371 -------------------------------
372 -- The Pattern History Stack --
373 -------------------------------
375 -- The pattern history stack is used for controlling backtracking when
376 -- a match fails. The idea is to stack entries that give a cursor value
377 -- to be restored, and a node to be reestablished as the current node to
378 -- attempt an appropriate rematch operation. The processing for a pattern
379 -- element that has rematch alternatives pushes an appropriate entry or
380 -- entry on to the stack, and the proceeds. If a match fails at any point,
381 -- the top element of the stack is popped off, resetting the cursor and
382 -- the match continues by accessing the node stored with this entry.
384 type Stack_Entry is record
386 Cursor : Integer;
387 -- Saved cursor value that is restored when this entry is popped
388 -- from the stack if a match attempt fails. Occasionally, this
389 -- field is used to store a history stack pointer instead of a
390 -- cursor. Such cases are noted in the documentation and the value
391 -- stored is negative since stack pointer values are always negative.
393 Node : PE_Ptr;
394 -- This pattern element reference is reestablished as the current
395 -- Node to be matched (which will attempt an appropriate rematch).
397 end record;
399 subtype Stack_Range is Integer range -Stack_Size .. -1;
401 type Stack_Type is array (Stack_Range) of Stack_Entry;
402 -- The type used for a history stack. The actual instance of the stack
403 -- is declared as a local variable in the Match routine, to properly
404 -- handle recursive calls to Match. All stack pointer values are negative
405 -- to distinguish them from normal cursor values.
407 -- Note: the pattern matching stack is used only to handle backtracking.
408 -- If no backtracking occurs, its entries are never accessed, and never
409 -- popped off, and in particular it is normal for a successful match
410 -- to terminate with entries on the stack that are simply discarded.
412 -- Note: in subsequent diagrams of the stack, we always place element
413 -- zero (the deepest element) at the top of the page, then build the
414 -- stack down on the page with the most recent (top of stack) element
415 -- being the bottom-most entry on the page.
417 -- Stack checking is handled by labeling every pattern with the maximum
418 -- number of stack entries that are required, so a single check at the
419 -- start of matching the pattern suffices. There are two exceptions.
421 -- First, the count does not include entries for recursive pattern
422 -- references. Such recursions must therefore perform a specific
423 -- stack check with respect to the number of stack entries required
424 -- by the recursive pattern that is accessed and the amount of stack
425 -- that remains unused.
427 -- Second, the count includes only one iteration of an Arbno pattern,
428 -- so a specific check must be made on subsequent iterations that there
429 -- is still enough stack space left. The Arbno node has a field that
430 -- records the number of stack entries required by its argument for
431 -- this purpose.
433 ---------------------------------------------------
434 -- Use of Serial Index Field in Pattern Elements --
435 ---------------------------------------------------
437 -- The serial index numbers for the pattern elements are assigned as
438 -- a pattern is consructed from its constituent elements. Note that there
439 -- is never any sharing of pattern elements between patterns (copies are
440 -- always made), so the serial index numbers are unique to a particular
441 -- pattern as referenced from the P field of a value of type Pattern.
443 -- The index numbers meet three separate invariants, which are used for
444 -- various purposes as described in this section.
446 -- First, the numbers uniquely identify the pattern elements within a
447 -- pattern. If Num is the number of elements in a given pattern, then
448 -- the serial index numbers for the elements of this pattern will range
449 -- from 1 .. Num, so that each element has a separate value.
451 -- The purpose of this assignment is to provide a convenient auxiliary
452 -- data structure mechanism during operations which must traverse a
453 -- pattern (e.g. copy and finalization processing). Once constructed
454 -- patterns are strictly read only. This is necessary to allow sharing
455 -- of patterns between tasks. This means that we cannot go marking the
456 -- pattern (e.g. with a visited bit). Instead we cosntuct a separate
457 -- vector that contains the necessary information indexed by the Index
458 -- values in the pattern elements. For this purpose the only requirement
459 -- is that they be uniquely assigned.
461 -- Second, the pattern element referenced directly, i.e. the leading
462 -- pattern element, is always the maximum numbered element and therefore
463 -- indicates the total number of elements in the pattern. More precisely,
464 -- the element referenced by the P field of a pattern value, or the
465 -- element returned by any of the internal pattern construction routines
466 -- in the body (that return a value of type PE_Ptr) always is this
467 -- maximum element,
469 -- The purpose of this requirement is to allow an immediate determination
470 -- of the number of pattern elements within a pattern. This is used to
471 -- properly size the vectors used to contain auxiliary information for
472 -- traversal as described above.
474 -- Third, as compound pattern structures are constructed, the way in which
475 -- constituent parts of the pattern are constructed is stylized. This is
476 -- an automatic consequence of the way that these compounjd structures
477 -- are constructed, and basically what we are doing is simply documenting
478 -- and specifying the natural result of the pattern construction. The
479 -- section describing compound pattern structures gives details of the
480 -- numbering of each compound pattern structure.
482 -- The purpose of specifying the stylized numbering structures for the
483 -- compound patterns is to help simplify the processing in the Image
484 -- function, since it eases the task of retrieving the original recursive
485 -- structure of the pattern from the flat graph structure of elements.
486 -- This use in the Image function is the only point at which the code
487 -- makes use of the stylized structures.
489 type Ref_Array is array (IndexT range <>) of PE_Ptr;
490 -- This type is used to build an array whose N'th entry references the
491 -- element in a pattern whose Index value is N. See Build_Ref_Array.
493 procedure Build_Ref_Array (E : PE_Ptr; RA : out Ref_Array);
494 -- Given a pattern element which is the leading element of a pattern
495 -- structure, and a Ref_Array with bounds 1 .. E.Index, fills in the
496 -- Ref_Array so that its N'th entry references the element of the
497 -- referenced pattern whose Index value is N.
499 -------------------------------
500 -- Recursive Pattern Matches --
501 -------------------------------
503 -- The pattern primitive (+P) where P is a Pattern_Ptr or Pattern_Func
504 -- causes a recursive pattern match. This cannot be handled by an actual
505 -- recursive call to the outer level Match routine, since this would not
506 -- allow for possible backtracking into the region matched by the inner
507 -- pattern. Indeed this is the classical clash between recursion and
508 -- backtracking, and a simple recursive stack structure does not suffice.
510 -- This section describes how this recursion and the possible associated
511 -- backtracking is handled. We still use a single stack, but we establish
512 -- the concept of nested regions on this stack, each of which has a stack
513 -- base value pointing to the deepest stack entry of the region. The base
514 -- value for the outer level is zero.
516 -- When a recursive match is established, two special stack entries are
517 -- made. The first entry is used to save the original node that starts
518 -- the recursive match. This is saved so that the successor field of
519 -- this node is accessible at the end of the match, but it is never
520 -- popped and executed.
522 -- The second entry corresponds to a standard new region action. A
523 -- PC_R_Remove node is stacked, whose cursor field is used to store
524 -- the outer stack base, and the stack base is reset to point to
525 -- this PC_R_Remove node. Then the recursive pattern is matched and
526 -- it can make history stack entries in the normal matter, so now
527 -- the stack looks like:
529 -- (stack entries made by outer level)
531 -- (Special entry, node is (+P) successor
532 -- cursor entry is not used)
534 -- (PC_R_Remove entry, "cursor" value is (negative) <-- Stack base
535 -- saved base value for the enclosing region)
537 -- (stack entries made by inner level)
539 -- If a subsequent failure occurs and pops the PC_R_Remove node, it
540 -- removes itself and the special entry immediately underneath it,
541 -- restores the stack base value for the enclosing region, and then
542 -- again signals failure to look for alternatives that were stacked
543 -- before the recursion was initiated.
545 -- Now we need to consider what happens if the inner pattern succeeds, as
546 -- signalled by accessing the special PC_EOP pattern primitive. First we
547 -- recognize the nested case by looking at the Base value. If this Base
548 -- value is Stack'First, then the entire match has succeeded, but if the
549 -- base value is greater than Stack'First, then we have successfully
550 -- matched an inner pattern, and processing continues at the outer level.
552 -- There are two cases. The simple case is when the inner pattern has made
553 -- no stack entries, as recognized by the fact that the current stack
554 -- pointer is equal to the current base value. In this case it is fine to
555 -- remove all trace of the recursion by restoring the outer base value and
556 -- using the special entry to find the appropriate successor node.
558 -- The more complex case arises when the inner match does make stack
559 -- entries. In this case, the PC_EOP processing stacks a special entry
560 -- whose cursor value saves the saved inner base value (the one that
561 -- references the corresponding PC_R_Remove value), and whose node
562 -- pointer references a PC_R_Restore node, so the stack looks like:
564 -- (stack entries made by outer level)
566 -- (Special entry, node is (+P) successor,
567 -- cursor entry is not used)
569 -- (PC_R_Remove entry, "cursor" value is (negative)
570 -- saved base value for the enclosing region)
572 -- (stack entries made by inner level)
574 -- (PC_Region_Replace entry, "cursor" value is (negative)
575 -- stack pointer value referencing the PC_R_Remove entry).
577 -- If the entire match succeeds, then these stack entries are, as usual,
578 -- ignored and abandoned. If on the other hand a subsequent failure
579 -- causes the PC_Region_Replace entry to be popped, it restores the
580 -- inner base value from its saved "cursor" value and then fails again.
581 -- Note that it is OK that the cursor is temporarily clobbered by this
582 -- pop, since the second failure will reestablish a proper cursor value.
584 ---------------------------------
585 -- Compound Pattern Structures --
586 ---------------------------------
588 -- This section discusses the compound structures used to represent
589 -- constructed patterns. It shows the graph structures of pattern
590 -- elements that are constructed, and in the case of patterns that
591 -- provide backtracking possibilities, describes how the history
592 -- stack is used to control the backtracking. Finally, it notes the
593 -- way in which the Index numbers are assigned to the structure.
595 -- In all diagrams, solid lines (built witth minus signs or vertical
596 -- bars, represent successor pointers (Pthen fields) with > or V used
597 -- to indicate the direction of the pointer. The initial node of the
598 -- structure is in the upper left of the diagram. A dotted line is an
599 -- alternative pointer from the element above it to the element below
600 -- it. See individual sections for details on how alternatives are used.
602 -------------------
603 -- Concatenation --
604 -------------------
606 -- In the pattern structures listed in this section, a line that looks
607 -- lile ----> with nothing to the right indicates an end of pattern
608 -- (EOP) pointer that represents the end of the match.
610 -- When a pattern concatenation (L & R) occurs, the resulting structure
611 -- is obtained by finding all such EOP pointers in L, and replacing
612 -- them to point to R. This is the most important flattening that
613 -- occurs in constructing a pattern, and it means that the pattern
614 -- matching circuitry does not have to keep track of the structure
615 -- of a pattern with respect to concatenation, since the appropriate
616 -- successor is always at hand.
618 -- Concatenation itself generates no additional possibilities for
619 -- backtracking, but the constituent patterns of the concatenated
620 -- structure will make stack entries as usual. The maximum amount
621 -- of stack required by the structure is thus simply the sum of the
622 -- maximums required by L and R.
624 -- The index numbering of a concatenation structure works by leaving
625 -- the numbering of the right hand pattern, R, unchanged and adjusting
626 -- the numbers in the left hand pattern, L up by the count of elements
627 -- in R. This ensures that the maximum numbered element is the leading
628 -- element as required (given that it was the leading element in L).
630 -----------------
631 -- Alternation --
632 -----------------
634 -- A pattern (L or R) constructs the structure:
636 -- +---+ +---+
637 -- | A |---->| L |---->
638 -- +---+ +---+
639 -- .
640 -- .
641 -- +---+
642 -- | R |---->
643 -- +---+
645 -- The A element here is a PC_Alt node, and the dotted line represents
646 -- the contents of the Alt field. When the PC_Alt element is matched,
647 -- it stacks a pointer to the leading element of R on the history stack
648 -- so that on subsequent failure, a match of R is attempted.
650 -- The A node is the higest numbered element in the pattern. The
651 -- original index numbers of R are unchanged, but the index numbers
652 -- of the L pattern are adjusted up by the count of elements in R.
654 -- Note that the difference between the index of the L leading element
655 -- the index of the R leading element (after building the alt structure)
656 -- indicates the number of nodes in L, and this is true even after the
657 -- structure is incorporated into some larger structure. For example,
658 -- if the A node has index 16, and L has index 15 and R has index
659 -- 5, then we know that L has 10 (15-5) elements in it.
661 -- Suppose that we now concatenate this structure to another pattern
662 -- with 9 elements in it. We will now have the A node with an index
663 -- of 25, L with an index of 24 and R with an index of 14. We still
664 -- know that L has 10 (24-14) elements in it, numbered 15-24, and
665 -- consequently the successor of the alternation structure has an
666 -- index with a value less than 15. This is used in Image to figure
667 -- out the original recursive structure of a pattern.
669 -- To clarify the interaction of the alternation and concatenation
670 -- structures, here is a more complex example of the structure built
671 -- for the pattern:
673 -- (V or W or X) (Y or Z)
675 -- where A,B,C,D,E are all single element patterns:
677 -- +---+ +---+ +---+ +---+
678 -- I A I---->I V I---+-->I A I---->I Y I---->
679 -- +---+ +---+ I +---+ +---+
680 -- . I .
681 -- . I .
682 -- +---+ +---+ I +---+
683 -- I A I---->I W I-->I I Z I---->
684 -- +---+ +---+ I +---+
685 -- . I
686 -- . I
687 -- +---+ I
688 -- I X I------------>+
689 -- +---+
691 -- The numbering of the nodes would be as follows:
693 -- +---+ +---+ +---+ +---+
694 -- I 8 I---->I 7 I---+-->I 3 I---->I 2 I---->
695 -- +---+ +---+ I +---+ +---+
696 -- . I .
697 -- . I .
698 -- +---+ +---+ I +---+
699 -- I 6 I---->I 5 I-->I I 1 I---->
700 -- +---+ +---+ I +---+
701 -- . I
702 -- . I
703 -- +---+ I
704 -- I 4 I------------>+
705 -- +---+
707 -- Note: The above structure actually corresponds to
709 -- (A or (B or C)) (D or E)
711 -- rather than
713 -- ((A or B) or C) (D or E)
715 -- which is the more natural interpretation, but in fact alternation
716 -- is associative, and the construction of an alternative changes the
717 -- left grouped pattern to the right grouped pattern in any case, so
718 -- that the Image function produces a more natural looking output.
720 ---------
721 -- Arb --
722 ---------
724 -- An Arb pattern builds the structure
726 -- +---+
727 -- | X |---->
728 -- +---+
729 -- .
730 -- .
731 -- +---+
732 -- | Y |---->
733 -- +---+
735 -- The X node is a PC_Arb_X node, which matches null, and stacks a
736 -- pointer to Y node, which is the PC_Arb_Y node that matches one
737 -- extra character and restacks itself.
739 -- The PC_Arb_X node is numbered 2, and the PC_Arb_Y node is 1.
741 -------------------------
742 -- Arbno (simple case) --
743 -------------------------
745 -- The simple form of Arbno can be used where the pattern always
746 -- matches at least one character if it succeeds, and it is known
747 -- not to make any history stack entries. In this case, Arbno (P)
748 -- can construct the following structure:
750 -- +-------------+
751 -- | ^
752 -- V |
753 -- +---+ |
754 -- | S |----> |
755 -- +---+ |
756 -- . |
757 -- . |
758 -- +---+ |
759 -- | P |---------->+
760 -- +---+
762 -- The S (PC_Arbno_S) node matches null stacking a pointer to the
763 -- pattern P. If a subsequent failure causes P to be matched and
764 -- this match succeeds, then node A gets restacked to try another
765 -- instance if needed by a subsequent failure.
767 -- The node numbering of the constituent pattern P is not affected.
768 -- The S node has a node number of P.Index + 1.
770 --------------------------
771 -- Arbno (complex case) --
772 --------------------------
774 -- A call to Arbno (P), where P can match null (or at least is not
775 -- known to require a non-null string) and/or P requires pattern stack
776 -- entries, constructs the following structure:
778 -- +--------------------------+
779 -- | ^
780 -- V |
781 -- +---+ |
782 -- | X |----> |
783 -- +---+ |
784 -- . |
785 -- . |
786 -- +---+ +---+ +---+ |
787 -- | E |---->| P |---->| Y |--->+
788 -- +---+ +---+ +---+
790 -- The node X (PC_Arbno_X) matches null, stacking a pointer to the
791 -- E-P-X structure used to match one Arbno instance.
793 -- Here E is the PC_R_Enter node which matches null and creates two
794 -- stack entries. The first is a special entry whose node field is
795 -- not used at all, and whose cursor field has the initial cursor.
797 -- The second entry corresponds to a standard new region action. A
798 -- PC_R_Remove node is stacked, whose cursor field is used to store
799 -- the outer stack base, and the stack base is reset to point to
800 -- this PC_R_Remove node. Then the pattern P is matched, and it can
801 -- make history stack entries in the normal manner, so now the stack
802 -- looks like:
804 -- (stack entries made before assign pattern)
806 -- (Special entry, node field not used,
807 -- used only to save initial cursor)
809 -- (PC_R_Remove entry, "cursor" value is (negative) <-- Stack Base
810 -- saved base value for the enclosing region)
812 -- (stack entries made by matching P)
814 -- If the match of P fails, then the PC_R_Remove entry is popped and
815 -- it removes both itself and the special entry underneath it,
816 -- restores the outer stack base, and signals failure.
818 -- If the match of P succeeds, then node Y, the PC_Arbno_Y node, pops
819 -- the inner region. There are two possibilities. If matching P left
820 -- no stack entries, then all traces of the inner region can be removed.
821 -- If there are stack entries, then we push an PC_Region_Replace stack
822 -- entry whose "cursor" value is the inner stack base value, and then
823 -- restore the outer stack base value, so the stack looks like:
825 -- (stack entries made before assign pattern)
827 -- (Special entry, node field not used,
828 -- used only to save initial cursor)
830 -- (PC_R_Remove entry, "cursor" value is (negative)
831 -- saved base value for the enclosing region)
833 -- (stack entries made by matching P)
835 -- (PC_Region_Replace entry, "cursor" value is (negative)
836 -- stack pointer value referencing the PC_R_Remove entry).
838 -- Now that we have matched another instance of the Arbno pattern,
839 -- we need to move to the successor. There are two cases. If the
840 -- Arbno pattern matched null, then there is no point in seeking
841 -- alternatives, since we would just match a whole bunch of nulls.
842 -- In this case we look through the alternative node, and move
843 -- directly to its successor (i.e. the successor of the Arbno
844 -- pattern). If on the other hand a non-null string was matched,
845 -- we simply follow the successor to the alternative node, which
846 -- sets up for another possible match of the Arbno pattern.
848 -- As noted in the section on stack checking, the stack count (and
849 -- hence the stack check) for a pattern includes only one iteration
850 -- of the Arbno pattern. To make sure that multiple iterations do not
851 -- overflow the stack, the Arbno node saves the stack count required
852 -- by a single iteration, and the Concat function increments this to
853 -- include stack entries required by any successor. The PC_Arbno_Y
854 -- node uses this count to ensure that sufficient stack remains
855 -- before proceeding after matching each new instance.
857 -- The node numbering of the constituent pattern P is not affected.
858 -- Where N is the number of nodes in P, the Y node is numbered N + 1,
859 -- the E node is N + 2, and the X node is N + 3.
861 ----------------------
862 -- Assign Immediate --
863 ----------------------
865 -- Immediate assignment (P * V) constructs the following structure
867 -- +---+ +---+ +---+
868 -- | E |---->| P |---->| A |---->
869 -- +---+ +---+ +---+
871 -- Here E is the PC_R_Enter node which matches null and creates two
872 -- stack entries. The first is a special entry whose node field is
873 -- not used at all, and whose cursor field has the initial cursor.
875 -- The second entry corresponds to a standard new region action. A
876 -- PC_R_Remove node is stacked, whose cursor field is used to store
877 -- the outer stack base, and the stack base is reset to point to
878 -- this PC_R_Remove node. Then the pattern P is matched, and it can
879 -- make history stack entries in the normal manner, so now the stack
880 -- looks like:
882 -- (stack entries made before assign pattern)
884 -- (Special entry, node field not used,
885 -- used only to save initial cursor)
887 -- (PC_R_Remove entry, "cursor" value is (negative) <-- Stack Base
888 -- saved base value for the enclosing region)
890 -- (stack entries made by matching P)
892 -- If the match of P fails, then the PC_R_Remove entry is popped
893 -- and it removes both itself and the special entry underneath it,
894 -- restores the outer stack base, and signals failure.
896 -- If the match of P succeeds, then node A, which is the actual
897 -- PC_Assign_Imm node, executes the assignment (using the stack
898 -- base to locate the entry with the saved starting cursor value),
899 -- and the pops the inner region. There are two possibilities, if
900 -- matching P left no stack entries, then all traces of the inner
901 -- region can be removed. If there are stack entries, then we push
902 -- an PC_Region_Replace stack entry whose "cursor" value is the
903 -- inner stack base value, and then restore the outer stack base
904 -- value, so the stack looks like:
906 -- (stack entries made before assign pattern)
908 -- (Special entry, node field not used,
909 -- used only to save initial cursor)
911 -- (PC_R_Remove entry, "cursor" value is (negative)
912 -- saved base value for the enclosing region)
914 -- (stack entries made by matching P)
916 -- (PC_Region_Replace entry, "cursor" value is the (negative)
917 -- stack pointer value referencing the PC_R_Remove entry).
919 -- If a subsequent failure occurs, the PC_Region_Replace node restores
920 -- the inner stack base value and signals failure to explore rematches
921 -- of the pattern P.
923 -- The node numbering of the constituent pattern P is not affected.
924 -- Where N is the number of nodes in P, the A node is numbered N + 1,
925 -- and the E node is N + 2.
927 ---------------------
928 -- Assign On Match --
929 ---------------------
931 -- The assign on match (**) pattern is quite similar to the assign
932 -- immediate pattern, except that the actual assignment has to be
933 -- delayed. The following structure is constructed:
935 -- +---+ +---+ +---+
936 -- | E |---->| P |---->| A |---->
937 -- +---+ +---+ +---+
939 -- The operation of this pattern is identical to that described above
940 -- for deferred assignment, up to the point where P has been matched.
942 -- The A node, which is the PC_Assign_OnM node first pushes a
943 -- PC_Assign node onto the history stack. This node saves the ending
944 -- cursor and acts as a flag for the final assignment, as further
945 -- described below.
947 -- It then stores a pointer to itself in the special entry node field.
948 -- This was otherwise unused, and is now used to retrive the address
949 -- of the variable to be assigned at the end of the pattern.
951 -- After that the inner region is terminated in the usual manner,
952 -- by stacking a PC_R_Restore entry as described for the assign
953 -- immediate case. Note that the optimization of completely
954 -- removing the inner region does not happen in this case, since
955 -- we have at least one stack entry (the PC_Assign one we just made).
956 -- The stack now looks like:
958 -- (stack entries made before assign pattern)
960 -- (Special entry, node points to copy of
961 -- the PC_Assign_OnM node, and the
962 -- cursor field saves the initial cursor).
964 -- (PC_R_Remove entry, "cursor" value is (negative)
965 -- saved base value for the enclosing region)
967 -- (stack entries made by matching P)
969 -- (PC_Assign entry, saves final cursor)
971 -- (PC_Region_Replace entry, "cursor" value is (negative)
972 -- stack pointer value referencing the PC_R_Remove entry).
974 -- If a subsequent failure causes the PC_Assign node to execute it
975 -- simply removes itself and propagates the failure.
977 -- If the match succeeds, then the history stack is scanned for
978 -- PC_Assign nodes, and the assignments are executed (examination
979 -- of the above diagram will show that all the necessary data is
980 -- at hand for the assignment).
982 -- To optimize the common case where no assign-on-match operations
983 -- are present, a global flag Assign_OnM is maintained which is
984 -- initialize to False, and gets set True as part of the execution
985 -- of the PC_Assign_OnM node. The scan of the history stack for
986 -- PC_Assign entries is done only if this flag is set.
988 -- The node numbering of the constituent pattern P is not affected.
989 -- Where N is the number of nodes in P, the A node is numbered N + 1,
990 -- and the E node is N + 2.
992 ---------
993 -- Bal --
994 ---------
996 -- Bal builds a single node:
998 -- +---+
999 -- | B |---->
1000 -- +---+
1002 -- The node B is the PC_Bal node which matches a parentheses balanced
1003 -- string, starting at the current cursor position. It then updates
1004 -- the cursor past this matched string, and stacks a pointer to itself
1005 -- with this updated cursor value on the history stack, to extend the
1006 -- matched string on a subequent failure.
1008 -- Since this is a single node it is numbered 1 (the reason we include
1009 -- it in the compound patterns section is that it backtracks).
1011 ------------
1012 -- BreakX --
1013 ------------
1015 -- BreakX builds the structure
1017 -- +---+ +---+
1018 -- | B |---->| A |---->
1019 -- +---+ +---+
1020 -- ^ .
1021 -- | .
1022 -- | +---+
1023 -- +<------| X |
1024 -- +---+
1026 -- Here the B node is the BreakX_xx node that performs a normal Break
1027 -- function. The A node is an alternative (PC_Alt) node that matches
1028 -- null, but stacks a pointer to node X (the PC_BreakX_X node) which
1029 -- extends the match one character (to eat up the previously detected
1030 -- break character), and then rematches the break.
1032 -- The B node is numbered 3, the alternative node is 1, and the X
1033 -- node is 2.
1035 -----------
1036 -- Fence --
1037 -----------
1039 -- Fence builds a single node:
1041 -- +---+
1042 -- | F |---->
1043 -- +---+
1045 -- The element F, PC_Fence, matches null, and stacks a pointer to a
1046 -- PC_Cancel element which will abort the match on a subsequent failure.
1048 -- Since this is a single element it is numbered 1 (the reason we
1049 -- include it in the compound patterns section is that it backtracks).
1051 --------------------
1052 -- Fence Function --
1053 --------------------
1055 -- A call to the Fence function builds the structure:
1057 -- +---+ +---+ +---+
1058 -- | E |---->| P |---->| X |---->
1059 -- +---+ +---+ +---+
1061 -- Here E is the PC_R_Enter node which matches null and creates two
1062 -- stack entries. The first is a special entry which is not used at
1063 -- all in the fence case (it is present merely for uniformity with
1064 -- other cases of region enter operations).
1066 -- The second entry corresponds to a standard new region action. A
1067 -- PC_R_Remove node is stacked, whose cursor field is used to store
1068 -- the outer stack base, and the stack base is reset to point to
1069 -- this PC_R_Remove node. Then the pattern P is matched, and it can
1070 -- make history stack entries in the normal manner, so now the stack
1071 -- looks like:
1073 -- (stack entries made before fence pattern)
1075 -- (Special entry, not used at all)
1077 -- (PC_R_Remove entry, "cursor" value is (negative) <-- Stack Base
1078 -- saved base value for the enclosing region)
1080 -- (stack entries made by matching P)
1082 -- If the match of P fails, then the PC_R_Remove entry is popped
1083 -- and it removes both itself and the special entry underneath it,
1084 -- restores the outer stack base, and signals failure.
1086 -- If the match of P succeeds, then node X, the PC_Fence_X node, gets
1087 -- control. One might be tempted to think that at this point, the
1088 -- history stack entries made by matching P can just be removed since
1089 -- they certainly are not going to be used for rematching (that is
1090 -- whole point of Fence after all!) However, this is wrong, because
1091 -- it would result in the loss of possible assign-on-match entries
1092 -- for deferred pattern assignments.
1094 -- Instead what we do is to make a special entry whose node references
1095 -- PC_Fence_Y, and whose cursor saves the inner stack base value, i.e.
1096 -- the pointer to the PC_R_Remove entry. Then the outer stack base
1097 -- pointer is restored, so the stack looks like:
1099 -- (stack entries made before assign pattern)
1101 -- (Special entry, not used at all)
1103 -- (PC_R_Remove entry, "cursor" value is (negative)
1104 -- saved base value for the enclosing region)
1106 -- (stack entries made by matching P)
1108 -- (PC_Fence_Y entry, "cursor" value is (negative) stack
1109 -- pointer value referencing the PC_R_Remove entry).
1111 -- If a subsequent failure occurs, then the PC_Fence_Y entry removes
1112 -- the entire inner region, including all entries made by matching P,
1113 -- and alternatives prior to the Fence pattern are sought.
1115 -- The node numbering of the constituent pattern P is not affected.
1116 -- Where N is the number of nodes in P, the X node is numbered N + 1,
1117 -- and the E node is N + 2.
1119 -------------
1120 -- Succeed --
1121 -------------
1123 -- Succeed builds a single node:
1125 -- +---+
1126 -- | S |---->
1127 -- +---+
1129 -- The node S is the PC_Succeed node which matches null, and stacks
1130 -- a pointer to itself on the history stack, so that a subsequent
1131 -- failure repeats the same match.
1133 -- Since this is a single node it is numbered 1 (the reason we include
1134 -- it in the compound patterns section is that it backtracks).
1136 ---------------------
1137 -- Write Immediate --
1138 ---------------------
1140 -- The structure built for a write immediate operation (P * F, where
1141 -- F is a file access value) is:
1143 -- +---+ +---+ +---+
1144 -- | E |---->| P |---->| W |---->
1145 -- +---+ +---+ +---+
1147 -- Here E is the PC_R_Enter node and W is the PC_Write_Imm node. The
1148 -- handling is identical to that described above for Assign Immediate,
1149 -- except that at the point where a successful match occurs, the matched
1150 -- substring is written to the referenced file.
1152 -- The node numbering of the constituent pattern P is not affected.
1153 -- Where N is the number of nodes in P, the W node is numbered N + 1,
1154 -- and the E node is N + 2.
1156 --------------------
1157 -- Write On Match --
1158 --------------------
1160 -- The structure built for a write on match operation (P ** F, where
1161 -- F is a file access value) is:
1163 -- +---+ +---+ +---+
1164 -- | E |---->| P |---->| W |---->
1165 -- +---+ +---+ +---+
1167 -- Here E is the PC_R_Enter node and W is the PC_Write_OnM node. The
1168 -- handling is identical to that described above for Assign On Match,
1169 -- except that at the point where a successful match has completed,
1170 -- the matched substring is written to the referenced file.
1172 -- The node numbering of the constituent pattern P is not affected.
1173 -- Where N is the number of nodes in P, the W node is numbered N + 1,
1174 -- and the E node is N + 2.
1175 -----------------------
1176 -- Constant Patterns --
1177 -----------------------
1179 -- The following pattern elements are referenced only from the pattern
1180 -- history stack. In each case the processing for the pattern element
1181 -- results in pattern match abort, or futher failure, so there is no
1182 -- need for a successor and no need for a node number
1184 CP_Assign : aliased PE := (PC_Assign, 0, N);
1185 CP_Cancel : aliased PE := (PC_Cancel, 0, N);
1186 CP_Fence_Y : aliased PE := (PC_Fence_Y, 0, N);
1187 CP_R_Remove : aliased PE := (PC_R_Remove, 0, N);
1188 CP_R_Restore : aliased PE := (PC_R_Restore, 0, N);
1190 -----------------------
1191 -- Local Subprograms --
1192 -----------------------
1194 function Alternate (L, R : PE_Ptr) return PE_Ptr;
1195 function "or" (L, R : PE_Ptr) return PE_Ptr renames Alternate;
1196 -- Build pattern structure corresponding to the alternation of L, R.
1197 -- (i.e. try to match L, and if that fails, try to match R).
1199 function Arbno_Simple (P : PE_Ptr) return PE_Ptr;
1200 -- Build simple Arbno pattern, P is a pattern that is guaranteed to
1201 -- match at least one character if it succeeds and to require no
1202 -- stack entries under all circumstances. The result returned is
1203 -- a simple Arbno structure as previously described.
1205 function Bracket (E, P, A : PE_Ptr) return PE_Ptr;
1206 -- Given two single node pattern elements E and A, and a (possible
1207 -- complex) pattern P, construct the concatenation E-->P-->A and
1208 -- return a pointer to E. The concatenation does not affect the
1209 -- node numbering in P. A has a number one higher than the maximum
1210 -- number in P, and E has a number two higher than the maximum
1211 -- number in P (see for example the Assign_Immediate structure to
1212 -- understand a typical use of this function).
1214 function BreakX_Make (B : PE_Ptr) return Pattern;
1215 -- Given a pattern element for a Break patternx, returns the
1216 -- corresponding BreakX compound pattern structure.
1218 function Concat (L, R : PE_Ptr; Incr : Natural) return PE_Ptr;
1219 -- Creates a pattern eelement that represents a concatenation of the
1220 -- two given pattern elements (i.e. the pattern L followed by R).
1221 -- The result returned is always the same as L, but the pattern
1222 -- referenced by L is modified to have R as a successor. This
1223 -- procedure does not copy L or R, so if a copy is required, it
1224 -- is the responsibility of the caller. The Incr parameter is an
1225 -- amount to be added to the Nat field of any P_Arbno_Y node that is
1226 -- in the left operand, it represents the additional stack space
1227 -- required by the right operand.
1229 function C_To_PE (C : PChar) return PE_Ptr;
1230 -- Given a character, constructs a pattern element that matches
1231 -- the single character.
1233 function Copy (P : PE_Ptr) return PE_Ptr;
1234 -- Creates a copy of the pattern element referenced by the given
1235 -- pattern element reference. This is a deep copy, which means that
1236 -- it follows the Next and Alt pointers.
1238 function Image (P : PE_Ptr) return String;
1239 -- Returns the image of the address of the referenced pattern element.
1240 -- This is equivalent to Image (To_Address (P));
1242 function Is_In (C : Character; Str : String) return Boolean;
1243 pragma Inline (Is_In);
1244 -- Determines if the character C is in string Str.
1246 procedure Logic_Error;
1247 -- Called to raise Program_Error with an appropriate message if an
1248 -- internal logic error is detected.
1250 function Str_BF (A : Boolean_Func) return String;
1251 function Str_FP (A : File_Ptr) return String;
1252 function Str_NF (A : Natural_Func) return String;
1253 function Str_NP (A : Natural_Ptr) return String;
1254 function Str_PP (A : Pattern_Ptr) return String;
1255 function Str_VF (A : VString_Func) return String;
1256 function Str_VP (A : VString_Ptr) return String;
1257 -- These are debugging routines, which return a representation of the
1258 -- given access value (they are called only by Image and Dump)
1260 procedure Set_Successor (Pat : PE_Ptr; Succ : PE_Ptr);
1261 -- Adjusts all EOP pointers in Pat to point to Succ. No other changes
1262 -- are made. In particular, Succ is unchanged, and no index numbers
1263 -- are modified. Note that Pat may not be equal to EOP on entry.
1265 function S_To_PE (Str : PString) return PE_Ptr;
1266 -- Given a string, constructs a pattern element that matches the string
1268 procedure Uninitialized_Pattern;
1269 pragma No_Return (Uninitialized_Pattern);
1270 -- Called to raise Program_Error with an appropriate error message if
1271 -- an uninitialized pattern is used in any pattern construction or
1272 -- pattern matching operation.
1274 procedure XMatch
1275 (Subject : String;
1276 Pat_P : PE_Ptr;
1277 Pat_S : Natural;
1278 Start : out Natural;
1279 Stop : out Natural);
1280 -- This is the common pattern match routine. It is passed a string and
1281 -- a pattern, and it indicates success or failure, and on success the
1282 -- section of the string matched. It does not perform any assignments
1283 -- to the subject string, so pattern replacement is for the caller.
1285 -- Subject The subject string. The lower bound is always one. In the
1286 -- Match procedures, it is fine to use strings whose lower bound
1287 -- is not one, but we perform a one time conversion before the
1288 -- call to XMatch, so that XMatch does not have to be bothered
1289 -- with strange lower bounds.
1291 -- Pat_P Points to initial pattern element of pattern to be matched
1293 -- Pat_S Maximum required stack entries for pattern to be matched
1295 -- Start If match is successful, starting index of matched section.
1296 -- This value is always non-zero. A value of zero is used to
1297 -- indicate a failed match.
1299 -- Stop If match is successful, ending index of matched section.
1300 -- This can be zero if we match the null string at the start,
1301 -- in which case Start is set to zero, and Stop to one. If the
1302 -- Match fails, then the contents of Stop is undefined.
1304 procedure XMatchD
1305 (Subject : String;
1306 Pat_P : PE_Ptr;
1307 Pat_S : Natural;
1308 Start : out Natural;
1309 Stop : out Natural);
1310 -- Identical in all respects to XMatch, except that trace information is
1311 -- output on Standard_Output during execution of the match. This is the
1312 -- version that is called if the original Match call has Debug => True.
1314 ---------
1315 -- "&" --
1316 ---------
1318 function "&" (L : PString; R : Pattern) return Pattern is
1319 begin
1320 return (AFC with R.Stk, Concat (S_To_PE (L), Copy (R.P), R.Stk));
1321 end "&";
1323 function "&" (L : Pattern; R : PString) return Pattern is
1324 begin
1325 return (AFC with L.Stk, Concat (Copy (L.P), S_To_PE (R), 0));
1326 end "&";
1328 function "&" (L : PChar; R : Pattern) return Pattern is
1329 begin
1330 return (AFC with R.Stk, Concat (C_To_PE (L), Copy (R.P), R.Stk));
1331 end "&";
1333 function "&" (L : Pattern; R : PChar) return Pattern is
1334 begin
1335 return (AFC with L.Stk, Concat (Copy (L.P), C_To_PE (R), 0));
1336 end "&";
1338 function "&" (L : Pattern; R : Pattern) return Pattern is
1339 begin
1340 return (AFC with L.Stk + R.Stk, Concat (Copy (L.P), Copy (R.P), R.Stk));
1341 end "&";
1343 ---------
1344 -- "*" --
1345 ---------
1347 -- Assign immediate
1349 -- +---+ +---+ +---+
1350 -- | E |---->| P |---->| A |---->
1351 -- +---+ +---+ +---+
1353 -- The node numbering of the constituent pattern P is not affected.
1354 -- Where N is the number of nodes in P, the A node is numbered N + 1,
1355 -- and the E node is N + 2.
1357 function "*" (P : Pattern; Var : VString_Var) return Pattern is
1358 Pat : constant PE_Ptr := Copy (P.P);
1359 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1360 A : constant PE_Ptr :=
1361 new PE'(PC_Assign_Imm, 0, EOP, Var'Unrestricted_Access);
1363 begin
1364 return (AFC with P.Stk + 3, Bracket (E, Pat, A));
1365 end "*";
1367 function "*" (P : PString; Var : VString_Var) return Pattern is
1368 Pat : constant PE_Ptr := S_To_PE (P);
1369 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1370 A : constant PE_Ptr :=
1371 new PE'(PC_Assign_Imm, 0, EOP, Var'Unrestricted_Access);
1373 begin
1374 return (AFC with 3, Bracket (E, Pat, A));
1375 end "*";
1377 function "*" (P : PChar; Var : VString_Var) return Pattern is
1378 Pat : constant PE_Ptr := C_To_PE (P);
1379 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1380 A : constant PE_Ptr :=
1381 new PE'(PC_Assign_Imm, 0, EOP, Var'Unrestricted_Access);
1383 begin
1384 return (AFC with 3, Bracket (E, Pat, A));
1385 end "*";
1387 -- Write immediate
1389 -- +---+ +---+ +---+
1390 -- | E |---->| P |---->| W |---->
1391 -- +---+ +---+ +---+
1393 -- The node numbering of the constituent pattern P is not affected.
1394 -- Where N is the number of nodes in P, the W node is numbered N + 1,
1395 -- and the E node is N + 2.
1397 function "*" (P : Pattern; Fil : File_Access) return Pattern is
1398 Pat : constant PE_Ptr := Copy (P.P);
1399 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1400 W : constant PE_Ptr := new PE'(PC_Write_Imm, 0, EOP, Fil);
1402 begin
1403 return (AFC with 3, Bracket (E, Pat, W));
1404 end "*";
1406 function "*" (P : PString; Fil : File_Access) return Pattern is
1407 Pat : constant PE_Ptr := S_To_PE (P);
1408 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1409 W : constant PE_Ptr := new PE'(PC_Write_Imm, 0, EOP, Fil);
1411 begin
1412 return (AFC with 3, Bracket (E, Pat, W));
1413 end "*";
1415 function "*" (P : PChar; Fil : File_Access) return Pattern is
1416 Pat : constant PE_Ptr := C_To_PE (P);
1417 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1418 W : constant PE_Ptr := new PE'(PC_Write_Imm, 0, EOP, Fil);
1420 begin
1421 return (AFC with 3, Bracket (E, Pat, W));
1422 end "*";
1424 ----------
1425 -- "**" --
1426 ----------
1428 -- Assign on match
1430 -- +---+ +---+ +---+
1431 -- | E |---->| P |---->| A |---->
1432 -- +---+ +---+ +---+
1434 -- The node numbering of the constituent pattern P is not affected.
1435 -- Where N is the number of nodes in P, the A node is numbered N + 1,
1436 -- and the E node is N + 2.
1438 function "**" (P : Pattern; Var : VString_Var) return Pattern is
1439 Pat : constant PE_Ptr := Copy (P.P);
1440 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1441 A : constant PE_Ptr :=
1442 new PE'(PC_Assign_OnM, 0, EOP, Var'Unrestricted_Access);
1444 begin
1445 return (AFC with P.Stk + 3, Bracket (E, Pat, A));
1446 end "**";
1448 function "**" (P : PString; Var : VString_Var) return Pattern is
1449 Pat : constant PE_Ptr := S_To_PE (P);
1450 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1451 A : constant PE_Ptr :=
1452 new PE'(PC_Assign_OnM, 0, EOP, Var'Unrestricted_Access);
1454 begin
1455 return (AFC with 3, Bracket (E, Pat, A));
1456 end "**";
1458 function "**" (P : PChar; Var : VString_Var) return Pattern is
1459 Pat : constant PE_Ptr := C_To_PE (P);
1460 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1461 A : constant PE_Ptr :=
1462 new PE'(PC_Assign_OnM, 0, EOP, Var'Unrestricted_Access);
1464 begin
1465 return (AFC with 3, Bracket (E, Pat, A));
1466 end "**";
1468 -- Write on match
1470 -- +---+ +---+ +---+
1471 -- | E |---->| P |---->| W |---->
1472 -- +---+ +---+ +---+
1474 -- The node numbering of the constituent pattern P is not affected.
1475 -- Where N is the number of nodes in P, the W node is numbered N + 1,
1476 -- and the E node is N + 2.
1478 function "**" (P : Pattern; Fil : File_Access) return Pattern is
1479 Pat : constant PE_Ptr := Copy (P.P);
1480 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1481 W : constant PE_Ptr := new PE'(PC_Write_OnM, 0, EOP, Fil);
1483 begin
1484 return (AFC with P.Stk + 3, Bracket (E, Pat, W));
1485 end "**";
1487 function "**" (P : PString; Fil : File_Access) return Pattern is
1488 Pat : constant PE_Ptr := S_To_PE (P);
1489 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1490 W : constant PE_Ptr := new PE'(PC_Write_OnM, 0, EOP, Fil);
1492 begin
1493 return (AFC with 3, Bracket (E, Pat, W));
1494 end "**";
1496 function "**" (P : PChar; Fil : File_Access) return Pattern is
1497 Pat : constant PE_Ptr := C_To_PE (P);
1498 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1499 W : constant PE_Ptr := new PE'(PC_Write_OnM, 0, EOP, Fil);
1501 begin
1502 return (AFC with 3, Bracket (E, Pat, W));
1503 end "**";
1505 ---------
1506 -- "+" --
1507 ---------
1509 function "+" (Str : VString_Var) return Pattern is
1510 begin
1511 return
1512 (AFC with 0,
1513 new PE'(PC_String_VP, 1, EOP, Str'Unrestricted_Access));
1514 end "+";
1516 function "+" (Str : VString_Func) return Pattern is
1517 begin
1518 return (AFC with 0, new PE'(PC_String_VF, 1, EOP, Str));
1519 end "+";
1521 function "+" (P : Pattern_Var) return Pattern is
1522 begin
1523 return
1524 (AFC with 3,
1525 new PE'(PC_Rpat, 1, EOP, P'Unrestricted_Access));
1526 end "+";
1528 function "+" (P : Boolean_Func) return Pattern is
1529 begin
1530 return (AFC with 3, new PE'(PC_Pred_Func, 1, EOP, P));
1531 end "+";
1533 ----------
1534 -- "or" --
1535 ----------
1537 function "or" (L : PString; R : Pattern) return Pattern is
1538 begin
1539 return (AFC with R.Stk + 1, S_To_PE (L) or Copy (R.P));
1540 end "or";
1542 function "or" (L : Pattern; R : PString) return Pattern is
1543 begin
1544 return (AFC with L.Stk + 1, Copy (L.P) or S_To_PE (R));
1545 end "or";
1547 function "or" (L : PString; R : PString) return Pattern is
1548 begin
1549 return (AFC with 1, S_To_PE (L) or S_To_PE (R));
1550 end "or";
1552 function "or" (L : Pattern; R : Pattern) return Pattern is
1553 begin
1554 return (AFC with
1555 Natural'Max (L.Stk, R.Stk) + 1, Copy (L.P) or Copy (R.P));
1556 end "or";
1558 function "or" (L : PChar; R : Pattern) return Pattern is
1559 begin
1560 return (AFC with 1, C_To_PE (L) or Copy (R.P));
1561 end "or";
1563 function "or" (L : Pattern; R : PChar) return Pattern is
1564 begin
1565 return (AFC with 1, Copy (L.P) or C_To_PE (R));
1566 end "or";
1568 function "or" (L : PChar; R : PChar) return Pattern is
1569 begin
1570 return (AFC with 1, C_To_PE (L) or C_To_PE (R));
1571 end "or";
1573 function "or" (L : PString; R : PChar) return Pattern is
1574 begin
1575 return (AFC with 1, S_To_PE (L) or C_To_PE (R));
1576 end "or";
1578 function "or" (L : PChar; R : PString) return Pattern is
1579 begin
1580 return (AFC with 1, C_To_PE (L) or S_To_PE (R));
1581 end "or";
1583 ------------
1584 -- Adjust --
1585 ------------
1587 -- No two patterns share the same pattern elements, so the adjust
1588 -- procedure for a Pattern assignment must do a deep copy of the
1589 -- pattern element structure.
1591 procedure Adjust (Object : in out Pattern) is
1592 begin
1593 Object.P := Copy (Object.P);
1594 end Adjust;
1596 ---------------
1597 -- Alternate --
1598 ---------------
1600 function Alternate (L, R : PE_Ptr) return PE_Ptr is
1601 begin
1602 -- If the left pattern is null, then we just add the alternation
1603 -- node with an index one greater than the right hand pattern.
1605 if L = EOP then
1606 return new PE'(PC_Alt, R.Index + 1, EOP, R);
1608 -- If the left pattern is non-null, then build a reference vector
1609 -- for its elements, and adjust their index values to acccomodate
1610 -- the right hand elements. Then add the alternation node.
1612 else
1613 declare
1614 Refs : Ref_Array (1 .. L.Index);
1616 begin
1617 Build_Ref_Array (L, Refs);
1619 for J in Refs'Range loop
1620 Refs (J).Index := Refs (J).Index + R.Index;
1621 end loop;
1622 end;
1624 return new PE'(PC_Alt, L.Index + 1, L, R);
1625 end if;
1626 end Alternate;
1628 ---------
1629 -- Any --
1630 ---------
1632 function Any (Str : String) return Pattern is
1633 begin
1634 return (AFC with 0, new PE'(PC_Any_CS, 1, EOP, To_Set (Str)));
1635 end Any;
1637 function Any (Str : VString) return Pattern is
1638 begin
1639 return Any (S (Str));
1640 end Any;
1642 function Any (Str : Character) return Pattern is
1643 begin
1644 return (AFC with 0, new PE'(PC_Any_CH, 1, EOP, Str));
1645 end Any;
1647 function Any (Str : Character_Set) return Pattern is
1648 begin
1649 return (AFC with 0, new PE'(PC_Any_CS, 1, EOP, Str));
1650 end Any;
1652 function Any (Str : access VString) return Pattern is
1653 begin
1654 return (AFC with 0, new PE'(PC_Any_VP, 1, EOP, VString_Ptr (Str)));
1655 end Any;
1657 function Any (Str : VString_Func) return Pattern is
1658 begin
1659 return (AFC with 0, new PE'(PC_Any_VF, 1, EOP, Str));
1660 end Any;
1662 ---------
1663 -- Arb --
1664 ---------
1666 -- +---+
1667 -- | X |---->
1668 -- +---+
1669 -- .
1670 -- .
1671 -- +---+
1672 -- | Y |---->
1673 -- +---+
1675 -- The PC_Arb_X element is numbered 2, and the PC_Arb_Y element is 1.
1677 function Arb return Pattern is
1678 Y : constant PE_Ptr := new PE'(PC_Arb_Y, 1, EOP);
1679 X : constant PE_Ptr := new PE'(PC_Arb_X, 2, EOP, Y);
1681 begin
1682 return (AFC with 1, X);
1683 end Arb;
1685 -----------
1686 -- Arbno --
1687 -----------
1689 function Arbno (P : PString) return Pattern is
1690 begin
1691 if P'Length = 0 then
1692 return (AFC with 0, EOP);
1694 else
1695 return (AFC with 0, Arbno_Simple (S_To_PE (P)));
1696 end if;
1697 end Arbno;
1699 function Arbno (P : PChar) return Pattern is
1700 begin
1701 return (AFC with 0, Arbno_Simple (C_To_PE (P)));
1702 end Arbno;
1704 function Arbno (P : Pattern) return Pattern is
1705 Pat : constant PE_Ptr := Copy (P.P);
1707 begin
1708 if P.Stk = 0
1709 and then OK_For_Simple_Arbno (Pat.Pcode)
1710 then
1711 return (AFC with 0, Arbno_Simple (Pat));
1712 end if;
1714 -- This is the complex case, either the pattern makes stack entries
1715 -- or it is possible for the pattern to match the null string (more
1716 -- accurately, we don't know that this is not the case).
1718 -- +--------------------------+
1719 -- | ^
1720 -- V |
1721 -- +---+ |
1722 -- | X |----> |
1723 -- +---+ |
1724 -- . |
1725 -- . |
1726 -- +---+ +---+ +---+ |
1727 -- | E |---->| P |---->| Y |--->+
1728 -- +---+ +---+ +---+
1730 -- The node numbering of the constituent pattern P is not affected.
1731 -- Where N is the number of nodes in P, the Y node is numbered N + 1,
1732 -- the E node is N + 2, and the X node is N + 3.
1734 declare
1735 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1736 X : constant PE_Ptr := new PE'(PC_Arbno_X, 0, EOP, E);
1737 Y : constant PE_Ptr := new PE'(PC_Arbno_Y, 0, X, P.Stk + 3);
1738 EPY : constant PE_Ptr := Bracket (E, Pat, Y);
1740 begin
1741 X.Alt := EPY;
1742 X.Index := EPY.Index + 1;
1743 return (AFC with P.Stk + 3, X);
1744 end;
1745 end Arbno;
1747 ------------------
1748 -- Arbno_Simple --
1749 ------------------
1751 -- +-------------+
1752 -- | ^
1753 -- V |
1754 -- +---+ |
1755 -- | S |----> |
1756 -- +---+ |
1757 -- . |
1758 -- . |
1759 -- +---+ |
1760 -- | P |---------->+
1761 -- +---+
1763 -- The node numbering of the constituent pattern P is not affected.
1764 -- The S node has a node number of P.Index + 1.
1766 -- Note that we know that P cannot be EOP, because a null pattern
1767 -- does not meet the requirements for simple Arbno.
1769 function Arbno_Simple (P : PE_Ptr) return PE_Ptr is
1770 S : constant PE_Ptr := new PE'(PC_Arbno_S, P.Index + 1, EOP, P);
1772 begin
1773 Set_Successor (P, S);
1774 return S;
1775 end Arbno_Simple;
1777 ---------
1778 -- Bal --
1779 ---------
1781 function Bal return Pattern is
1782 begin
1783 return (AFC with 1, new PE'(PC_Bal, 1, EOP));
1784 end Bal;
1786 -------------
1787 -- Bracket --
1788 -------------
1790 function Bracket (E, P, A : PE_Ptr) return PE_Ptr is
1791 begin
1792 if P = EOP then
1793 E.Pthen := A;
1794 E.Index := 2;
1795 A.Index := 1;
1797 else
1798 E.Pthen := P;
1799 Set_Successor (P, A);
1800 E.Index := P.Index + 2;
1801 A.Index := P.Index + 1;
1802 end if;
1804 return E;
1805 end Bracket;
1807 -----------
1808 -- Break --
1809 -----------
1811 function Break (Str : String) return Pattern is
1812 begin
1813 return (AFC with 0, new PE'(PC_Break_CS, 1, EOP, To_Set (Str)));
1814 end Break;
1816 function Break (Str : VString) return Pattern is
1817 begin
1818 return Break (S (Str));
1819 end Break;
1821 function Break (Str : Character) return Pattern is
1822 begin
1823 return (AFC with 0, new PE'(PC_Break_CH, 1, EOP, Str));
1824 end Break;
1826 function Break (Str : Character_Set) return Pattern is
1827 begin
1828 return (AFC with 0, new PE'(PC_Break_CS, 1, EOP, Str));
1829 end Break;
1831 function Break (Str : access VString) return Pattern is
1832 begin
1833 return (AFC with 0, new PE'(PC_Break_VP, 1, EOP, VString_Ptr (Str)));
1834 end Break;
1836 function Break (Str : VString_Func) return Pattern is
1837 begin
1838 return (AFC with 0, new PE'(PC_Break_VF, 1, EOP, Str));
1839 end Break;
1841 ------------
1842 -- BreakX --
1843 ------------
1845 function BreakX (Str : String) return Pattern is
1846 begin
1847 return BreakX_Make (new PE'(PC_BreakX_CS, 3, N, To_Set (Str)));
1848 end BreakX;
1850 function BreakX (Str : VString) return Pattern is
1851 begin
1852 return BreakX (S (Str));
1853 end BreakX;
1855 function BreakX (Str : Character) return Pattern is
1856 begin
1857 return BreakX_Make (new PE'(PC_BreakX_CH, 3, N, Str));
1858 end BreakX;
1860 function BreakX (Str : Character_Set) return Pattern is
1861 begin
1862 return BreakX_Make (new PE'(PC_BreakX_CS, 3, N, Str));
1863 end BreakX;
1865 function BreakX (Str : access VString) return Pattern is
1866 begin
1867 return BreakX_Make (new PE'(PC_BreakX_VP, 3, N, VString_Ptr (Str)));
1868 end BreakX;
1870 function BreakX (Str : VString_Func) return Pattern is
1871 begin
1872 return BreakX_Make (new PE'(PC_BreakX_VF, 3, N, Str));
1873 end BreakX;
1875 -----------------
1876 -- BreakX_Make --
1877 -----------------
1879 -- +---+ +---+
1880 -- | B |---->| A |---->
1881 -- +---+ +---+
1882 -- ^ .
1883 -- | .
1884 -- | +---+
1885 -- +<------| X |
1886 -- +---+
1888 -- The B node is numbered 3, the alternative node is 1, and the X
1889 -- node is 2.
1891 function BreakX_Make (B : PE_Ptr) return Pattern is
1892 X : constant PE_Ptr := new PE'(PC_BreakX_X, 2, B);
1893 A : constant PE_Ptr := new PE'(PC_Alt, 1, EOP, X);
1895 begin
1896 B.Pthen := A;
1897 return (AFC with 2, B);
1898 end BreakX_Make;
1900 ---------------------
1901 -- Build_Ref_Array --
1902 ---------------------
1904 procedure Build_Ref_Array (E : PE_Ptr; RA : out Ref_Array) is
1906 procedure Record_PE (E : PE_Ptr);
1907 -- Record given pattern element if not already recorded in RA,
1908 -- and also record any referenced pattern elements recursively.
1910 procedure Record_PE (E : PE_Ptr) is
1911 begin
1912 PutD (" Record_PE called with PE_Ptr = " & Image (E));
1914 if E = EOP or else RA (E.Index) /= null then
1915 Put_LineD (", nothing to do");
1916 return;
1918 else
1919 Put_LineD (", recording" & IndexT'Image (E.Index));
1920 RA (E.Index) := E;
1921 Record_PE (E.Pthen);
1923 if E.Pcode in PC_Has_Alt then
1924 Record_PE (E.Alt);
1925 end if;
1926 end if;
1927 end Record_PE;
1929 -- Start of processing for Build_Ref_Array
1931 begin
1932 New_LineD;
1933 Put_LineD ("Entering Build_Ref_Array");
1934 Record_PE (E);
1935 New_LineD;
1936 end Build_Ref_Array;
1938 -------------
1939 -- C_To_PE --
1940 -------------
1942 function C_To_PE (C : PChar) return PE_Ptr is
1943 begin
1944 return new PE'(PC_Char, 1, EOP, C);
1945 end C_To_PE;
1947 ------------
1948 -- Cancel --
1949 ------------
1951 function Cancel return Pattern is
1952 begin
1953 return (AFC with 0, new PE'(PC_Cancel, 1, EOP));
1954 end Cancel;
1956 ------------
1957 -- Concat --
1958 ------------
1960 -- Concat needs to traverse the left operand performing the following
1961 -- set of fixups:
1963 -- a) Any successor pointers (Pthen fields) that are set to EOP are
1964 -- reset to point to the second operand.
1966 -- b) Any PC_Arbno_Y node has its stack count field incremented
1967 -- by the parameter Incr provided for this purpose.
1969 -- d) Num fields of all pattern elements in the left operand are
1970 -- adjusted to include the elements of the right operand.
1972 -- Note: we do not use Set_Successor in the processing for Concat, since
1973 -- there is no point in doing two traversals, we may as well do everything
1974 -- at the same time.
1976 function Concat (L, R : PE_Ptr; Incr : Natural) return PE_Ptr is
1977 begin
1978 if L = EOP then
1979 return R;
1981 elsif R = EOP then
1982 return L;
1984 else
1985 declare
1986 Refs : Ref_Array (1 .. L.Index);
1987 -- We build a reference array for L whose N'th element points to
1988 -- the pattern element of L whose original Index value is N.
1990 P : PE_Ptr;
1992 begin
1993 Build_Ref_Array (L, Refs);
1995 for J in Refs'Range loop
1996 P := Refs (J);
1998 P.Index := P.Index + R.Index;
2000 if P.Pcode = PC_Arbno_Y then
2001 P.Nat := P.Nat + Incr;
2002 end if;
2004 if P.Pthen = EOP then
2005 P.Pthen := R;
2006 end if;
2008 if P.Pcode in PC_Has_Alt and then P.Alt = EOP then
2009 P.Alt := R;
2010 end if;
2011 end loop;
2012 end;
2014 return L;
2015 end if;
2016 end Concat;
2018 ----------
2019 -- Copy --
2020 ----------
2022 function Copy (P : PE_Ptr) return PE_Ptr is
2023 begin
2024 if P = null then
2025 Uninitialized_Pattern;
2027 else
2028 declare
2029 Refs : Ref_Array (1 .. P.Index);
2030 -- References to elements in P, indexed by Index field
2032 Copy : Ref_Array (1 .. P.Index);
2033 -- Holds copies of elements of P, indexed by Index field.
2035 E : PE_Ptr;
2037 begin
2038 Build_Ref_Array (P, Refs);
2040 -- Now copy all nodes
2042 for J in Refs'Range loop
2043 Copy (J) := new PE'(Refs (J).all);
2044 end loop;
2046 -- Adjust all internal references
2048 for J in Copy'Range loop
2049 E := Copy (J);
2051 -- Adjust successor pointer to point to copy
2053 if E.Pthen /= EOP then
2054 E.Pthen := Copy (E.Pthen.Index);
2055 end if;
2057 -- Adjust Alt pointer if there is one to point to copy
2059 if E.Pcode in PC_Has_Alt and then E.Alt /= EOP then
2060 E.Alt := Copy (E.Alt.Index);
2061 end if;
2063 -- Copy referenced string
2065 if E.Pcode = PC_String then
2066 E.Str := new String'(E.Str.all);
2067 end if;
2068 end loop;
2070 return Copy (P.Index);
2071 end;
2072 end if;
2073 end Copy;
2075 ----------
2076 -- Dump --
2077 ----------
2079 procedure Dump (P : Pattern) is
2081 subtype Count is Ada.Text_IO.Count;
2082 Scol : Count;
2083 -- Used to keep track of column in dump output
2085 Refs : Ref_Array (1 .. P.P.Index);
2086 -- We build a reference array whose N'th element points to the
2087 -- pattern element whose Index value is N.
2089 Cols : Natural := 2;
2090 -- Number of columns used for pattern numbers, minimum is 2
2092 E : PE_Ptr;
2094 procedure Write_Node_Id (E : PE_Ptr);
2095 -- Writes out a string identifying the given pattern element.
2097 procedure Write_Node_Id (E : PE_Ptr) is
2098 begin
2099 if E = EOP then
2100 Put ("EOP");
2102 for J in 4 .. Cols loop
2103 Put (' ');
2104 end loop;
2106 else
2107 declare
2108 Str : String (1 .. Cols);
2109 N : Natural := Natural (E.Index);
2111 begin
2112 Put ("#");
2114 for J in reverse Str'Range loop
2115 Str (J) := Character'Val (48 + N mod 10);
2116 N := N / 10;
2117 end loop;
2119 Put (Str);
2120 end;
2121 end if;
2122 end Write_Node_Id;
2124 begin
2125 New_Line;
2126 Put ("Pattern Dump Output (pattern at " &
2127 Image (P'Address) &
2128 ", S = " & Natural'Image (P.Stk) & ')');
2130 Scol := Col;
2131 New_Line;
2133 while Col < Scol loop
2134 Put ('-');
2135 end loop;
2137 New_Line;
2139 -- If uninitialized pattern, dump line and we are done
2141 if P.P = null then
2142 Put_Line ("Uninitialized pattern value");
2143 return;
2144 end if;
2146 -- If null pattern, just dump it and we are all done
2148 if P.P = EOP then
2149 Put_Line ("EOP (null pattern)");
2150 return;
2151 end if;
2153 Build_Ref_Array (P.P, Refs);
2155 -- Set number of columns required for node numbers
2157 while 10 ** Cols - 1 < Integer (P.P.Index) loop
2158 Cols := Cols + 1;
2159 end loop;
2161 -- Now dump the nodes in reverse sequence. We output them in reverse
2162 -- sequence since this corresponds to the natural order used to
2163 -- construct the patterns.
2165 for J in reverse Refs'Range loop
2166 E := Refs (J);
2167 Write_Node_Id (E);
2168 Set_Col (Count (Cols) + 4);
2169 Put (Image (E));
2170 Put (" ");
2171 Put (Pattern_Code'Image (E.Pcode));
2172 Put (" ");
2173 Set_Col (21 + Count (Cols) + Address_Image_Length);
2174 Write_Node_Id (E.Pthen);
2175 Set_Col (24 + 2 * Count (Cols) + Address_Image_Length);
2177 case E.Pcode is
2179 when PC_Alt |
2180 PC_Arb_X |
2181 PC_Arbno_S |
2182 PC_Arbno_X =>
2183 Write_Node_Id (E.Alt);
2185 when PC_Rpat =>
2186 Put (Str_PP (E.PP));
2188 when PC_Pred_Func =>
2189 Put (Str_BF (E.BF));
2191 when PC_Assign_Imm |
2192 PC_Assign_OnM |
2193 PC_Any_VP |
2194 PC_Break_VP |
2195 PC_BreakX_VP |
2196 PC_NotAny_VP |
2197 PC_NSpan_VP |
2198 PC_Span_VP |
2199 PC_String_VP =>
2200 Put (Str_VP (E.VP));
2202 when PC_Write_Imm |
2203 PC_Write_OnM =>
2204 Put (Str_FP (E.FP));
2206 when PC_String =>
2207 Put (Image (E.Str.all));
2209 when PC_String_2 =>
2210 Put (Image (E.Str2));
2212 when PC_String_3 =>
2213 Put (Image (E.Str3));
2215 when PC_String_4 =>
2216 Put (Image (E.Str4));
2218 when PC_String_5 =>
2219 Put (Image (E.Str5));
2221 when PC_String_6 =>
2222 Put (Image (E.Str6));
2224 when PC_Setcur =>
2225 Put (Str_NP (E.Var));
2227 when PC_Any_CH |
2228 PC_Break_CH |
2229 PC_BreakX_CH |
2230 PC_Char |
2231 PC_NotAny_CH |
2232 PC_NSpan_CH |
2233 PC_Span_CH =>
2234 Put (''' & E.Char & ''');
2236 when PC_Any_CS |
2237 PC_Break_CS |
2238 PC_BreakX_CS |
2239 PC_NotAny_CS |
2240 PC_NSpan_CS |
2241 PC_Span_CS =>
2242 Put ('"' & To_Sequence (E.CS) & '"');
2244 when PC_Arbno_Y |
2245 PC_Len_Nat |
2246 PC_Pos_Nat |
2247 PC_RPos_Nat |
2248 PC_RTab_Nat |
2249 PC_Tab_Nat =>
2250 Put (S (E.Nat));
2252 when PC_Pos_NF |
2253 PC_Len_NF |
2254 PC_RPos_NF |
2255 PC_RTab_NF |
2256 PC_Tab_NF =>
2257 Put (Str_NF (E.NF));
2259 when PC_Pos_NP |
2260 PC_Len_NP |
2261 PC_RPos_NP |
2262 PC_RTab_NP |
2263 PC_Tab_NP =>
2264 Put (Str_NP (E.NP));
2266 when PC_Any_VF |
2267 PC_Break_VF |
2268 PC_BreakX_VF |
2269 PC_NotAny_VF |
2270 PC_NSpan_VF |
2271 PC_Span_VF |
2272 PC_String_VF =>
2273 Put (Str_VF (E.VF));
2275 when others => null;
2277 end case;
2279 New_Line;
2280 end loop;
2282 New_Line;
2283 end Dump;
2285 ----------
2286 -- Fail --
2287 ----------
2289 function Fail return Pattern is
2290 begin
2291 return (AFC with 0, new PE'(PC_Fail, 1, EOP));
2292 end Fail;
2294 -----------
2295 -- Fence --
2296 -----------
2298 -- Simple case
2300 function Fence return Pattern is
2301 begin
2302 return (AFC with 1, new PE'(PC_Fence, 1, EOP));
2303 end Fence;
2305 -- Function case
2307 -- +---+ +---+ +---+
2308 -- | E |---->| P |---->| X |---->
2309 -- +---+ +---+ +---+
2311 -- The node numbering of the constituent pattern P is not affected.
2312 -- Where N is the number of nodes in P, the X node is numbered N + 1,
2313 -- and the E node is N + 2.
2315 function Fence (P : Pattern) return Pattern is
2316 Pat : constant PE_Ptr := Copy (P.P);
2317 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
2318 X : constant PE_Ptr := new PE'(PC_Fence_X, 0, EOP);
2320 begin
2321 return (AFC with P.Stk + 1, Bracket (E, Pat, X));
2322 end Fence;
2324 --------------
2325 -- Finalize --
2326 --------------
2328 procedure Finalize (Object : in out Pattern) is
2330 procedure Free is new Unchecked_Deallocation (PE, PE_Ptr);
2331 procedure Free is new Unchecked_Deallocation (String, String_Ptr);
2333 begin
2334 -- Nothing to do if already freed
2336 if Object.P = null then
2337 return;
2339 -- Otherwise we must free all elements
2341 else
2342 declare
2343 Refs : Ref_Array (1 .. Object.P.Index);
2344 -- References to elements in pattern to be finalized
2346 begin
2347 Build_Ref_Array (Object.P, Refs);
2349 for J in Refs'Range loop
2350 if Refs (J).Pcode = PC_String then
2351 Free (Refs (J).Str);
2352 end if;
2354 Free (Refs (J));
2355 end loop;
2357 Object.P := null;
2358 end;
2359 end if;
2360 end Finalize;
2362 -----------
2363 -- Image --
2364 -----------
2366 function Image (P : PE_Ptr) return String is
2367 begin
2368 return Image (To_Address (P));
2369 end Image;
2371 function Image (P : Pattern) return String is
2372 begin
2373 return S (Image (P));
2374 end Image;
2376 function Image (P : Pattern) return VString is
2378 Kill_Ampersand : Boolean := False;
2379 -- Set True to delete next & to be output to Result
2381 Result : VString := Nul;
2382 -- The result is accumulated here, using Append
2384 Refs : Ref_Array (1 .. P.P.Index);
2385 -- We build a reference array whose N'th element points to the
2386 -- pattern element whose Index value is N.
2388 procedure Delete_Ampersand;
2389 -- Deletes the ampersand at the end of Result
2391 procedure Image_Seq (E : PE_Ptr; Succ : PE_Ptr; Paren : Boolean);
2392 -- E refers to a pattern structure whose successor is given by Succ.
2393 -- This procedure appends to Result a representation of this pattern.
2394 -- The Paren parameter indicates whether parentheses are required if
2395 -- the output is more than one element.
2397 procedure Image_One (E : in out PE_Ptr);
2398 -- E refers to a pattern structure. This procedure appends to Result
2399 -- a representation of the single simple or compound pattern structure
2400 -- at the start of E and updates E to point to its successor.
2402 ----------------------
2403 -- Delete_Ampersand --
2404 ----------------------
2406 procedure Delete_Ampersand is
2407 L : Natural := Length (Result);
2409 begin
2410 if L > 2 then
2411 Delete (Result, L - 1, L);
2412 end if;
2413 end Delete_Ampersand;
2415 ---------------
2416 -- Image_One --
2417 ---------------
2419 procedure Image_One (E : in out PE_Ptr) is
2421 ER : PE_Ptr := E.Pthen;
2422 -- Successor set as result in E unless reset
2424 begin
2425 case E.Pcode is
2427 when PC_Cancel =>
2428 Append (Result, "Cancel");
2430 when PC_Alt => Alt : declare
2432 Elmts_In_L : constant IndexT := E.Pthen.Index - E.Alt.Index;
2433 -- Number of elements in left pattern of alternation.
2435 Lowest_In_L : constant IndexT := E.Index - Elmts_In_L;
2436 -- Number of lowest index in elements of left pattern
2438 E1 : PE_Ptr;
2440 begin
2441 -- The successor of the alternation node must have a lower
2442 -- index than any node that is in the left pattern or a
2443 -- higher index than the alternation node itself.
2445 while ER /= EOP
2446 and then ER.Index >= Lowest_In_L
2447 and then ER.Index < E.Index
2448 loop
2449 ER := ER.Pthen;
2450 end loop;
2452 Append (Result, '(');
2454 E1 := E;
2455 loop
2456 Image_Seq (E1.Pthen, ER, False);
2457 Append (Result, " or ");
2458 E1 := E1.Alt;
2459 exit when E1.Pcode /= PC_Alt;
2460 end loop;
2462 Image_Seq (E1, ER, False);
2463 Append (Result, ')');
2464 end Alt;
2466 when PC_Any_CS =>
2467 Append (Result, "Any (" & Image (To_Sequence (E.CS)) & ')');
2469 when PC_Any_VF =>
2470 Append (Result, "Any (" & Str_VF (E.VF) & ')');
2472 when PC_Any_VP =>
2473 Append (Result, "Any (" & Str_VP (E.VP) & ')');
2475 when PC_Arb_X =>
2476 Append (Result, "Arb");
2478 when PC_Arbno_S =>
2479 Append (Result, "Arbno (");
2480 Image_Seq (E.Alt, E, False);
2481 Append (Result, ')');
2483 when PC_Arbno_X =>
2484 Append (Result, "Arbno (");
2485 Image_Seq (E.Alt.Pthen, Refs (E.Index - 2), False);
2486 Append (Result, ')');
2488 when PC_Assign_Imm =>
2489 Delete_Ampersand;
2490 Append (Result, "* " & Str_VP (Refs (E.Index - 1).VP));
2492 when PC_Assign_OnM =>
2493 Delete_Ampersand;
2494 Append (Result, "** " & Str_VP (Refs (E.Index - 1).VP));
2496 when PC_Any_CH =>
2497 Append (Result, "Any ('" & E.Char & "')");
2499 when PC_Bal =>
2500 Append (Result, "Bal");
2502 when PC_Break_CH =>
2503 Append (Result, "Break ('" & E.Char & "')");
2505 when PC_Break_CS =>
2506 Append (Result, "Break (" & Image (To_Sequence (E.CS)) & ')');
2508 when PC_Break_VF =>
2509 Append (Result, "Break (" & Str_VF (E.VF) & ')');
2511 when PC_Break_VP =>
2512 Append (Result, "Break (" & Str_VP (E.VP) & ')');
2514 when PC_BreakX_CH =>
2515 Append (Result, "BreakX ('" & E.Char & "')");
2516 ER := ER.Pthen;
2518 when PC_BreakX_CS =>
2519 Append (Result, "BreakX (" & Image (To_Sequence (E.CS)) & ')');
2520 ER := ER.Pthen;
2522 when PC_BreakX_VF =>
2523 Append (Result, "BreakX (" & Str_VF (E.VF) & ')');
2524 ER := ER.Pthen;
2526 when PC_BreakX_VP =>
2527 Append (Result, "BreakX (" & Str_VP (E.VP) & ')');
2528 ER := ER.Pthen;
2530 when PC_Char =>
2531 Append (Result, ''' & E.Char & ''');
2533 when PC_Fail =>
2534 Append (Result, "Fail");
2536 when PC_Fence =>
2537 Append (Result, "Fence");
2539 when PC_Fence_X =>
2540 Append (Result, "Fence (");
2541 Image_Seq (E.Pthen, Refs (E.Index - 1), False);
2542 Append (Result, ")");
2543 ER := Refs (E.Index - 1).Pthen;
2545 when PC_Len_Nat =>
2546 Append (Result, "Len (" & E.Nat & ')');
2548 when PC_Len_NF =>
2549 Append (Result, "Len (" & Str_NF (E.NF) & ')');
2551 when PC_Len_NP =>
2552 Append (Result, "Len (" & Str_NP (E.NP) & ')');
2554 when PC_NotAny_CH =>
2555 Append (Result, "NotAny ('" & E.Char & "')");
2557 when PC_NotAny_CS =>
2558 Append (Result, "NotAny (" & Image (To_Sequence (E.CS)) & ')');
2560 when PC_NotAny_VF =>
2561 Append (Result, "NotAny (" & Str_VF (E.VF) & ')');
2563 when PC_NotAny_VP =>
2564 Append (Result, "NotAny (" & Str_VP (E.VP) & ')');
2566 when PC_NSpan_CH =>
2567 Append (Result, "NSpan ('" & E.Char & "')");
2569 when PC_NSpan_CS =>
2570 Append (Result, "NSpan (" & Image (To_Sequence (E.CS)) & ')');
2572 when PC_NSpan_VF =>
2573 Append (Result, "NSpan (" & Str_VF (E.VF) & ')');
2575 when PC_NSpan_VP =>
2576 Append (Result, "NSpan (" & Str_VP (E.VP) & ')');
2578 when PC_Null =>
2579 Append (Result, """""");
2581 when PC_Pos_Nat =>
2582 Append (Result, "Pos (" & E.Nat & ')');
2584 when PC_Pos_NF =>
2585 Append (Result, "Pos (" & Str_NF (E.NF) & ')');
2587 when PC_Pos_NP =>
2588 Append (Result, "Pos (" & Str_NP (E.NP) & ')');
2590 when PC_R_Enter =>
2591 Kill_Ampersand := True;
2593 when PC_Rest =>
2594 Append (Result, "Rest");
2596 when PC_Rpat =>
2597 Append (Result, "(+ " & Str_PP (E.PP) & ')');
2599 when PC_Pred_Func =>
2600 Append (Result, "(+ " & Str_BF (E.BF) & ')');
2602 when PC_RPos_Nat =>
2603 Append (Result, "RPos (" & E.Nat & ')');
2605 when PC_RPos_NF =>
2606 Append (Result, "RPos (" & Str_NF (E.NF) & ')');
2608 when PC_RPos_NP =>
2609 Append (Result, "RPos (" & Str_NP (E.NP) & ')');
2611 when PC_RTab_Nat =>
2612 Append (Result, "RTab (" & E.Nat & ')');
2614 when PC_RTab_NF =>
2615 Append (Result, "RTab (" & Str_NF (E.NF) & ')');
2617 when PC_RTab_NP =>
2618 Append (Result, "RTab (" & Str_NP (E.NP) & ')');
2620 when PC_Setcur =>
2621 Append (Result, "Setcur (" & Str_NP (E.Var) & ')');
2623 when PC_Span_CH =>
2624 Append (Result, "Span ('" & E.Char & "')");
2626 when PC_Span_CS =>
2627 Append (Result, "Span (" & Image (To_Sequence (E.CS)) & ')');
2629 when PC_Span_VF =>
2630 Append (Result, "Span (" & Str_VF (E.VF) & ')');
2632 when PC_Span_VP =>
2633 Append (Result, "Span (" & Str_VP (E.VP) & ')');
2635 when PC_String =>
2636 Append (Result, Image (E.Str.all));
2638 when PC_String_2 =>
2639 Append (Result, Image (E.Str2));
2641 when PC_String_3 =>
2642 Append (Result, Image (E.Str3));
2644 when PC_String_4 =>
2645 Append (Result, Image (E.Str4));
2647 when PC_String_5 =>
2648 Append (Result, Image (E.Str5));
2650 when PC_String_6 =>
2651 Append (Result, Image (E.Str6));
2653 when PC_String_VF =>
2654 Append (Result, "(+" & Str_VF (E.VF) & ')');
2656 when PC_String_VP =>
2657 Append (Result, "(+" & Str_VP (E.VP) & ')');
2659 when PC_Succeed =>
2660 Append (Result, "Succeed");
2662 when PC_Tab_Nat =>
2663 Append (Result, "Tab (" & E.Nat & ')');
2665 when PC_Tab_NF =>
2666 Append (Result, "Tab (" & Str_NF (E.NF) & ')');
2668 when PC_Tab_NP =>
2669 Append (Result, "Tab (" & Str_NP (E.NP) & ')');
2671 when PC_Write_Imm =>
2672 Append (Result, '(');
2673 Image_Seq (E, Refs (E.Index - 1), True);
2674 Append (Result, " * " & Str_FP (Refs (E.Index - 1).FP));
2675 ER := Refs (E.Index - 1).Pthen;
2677 when PC_Write_OnM =>
2678 Append (Result, '(');
2679 Image_Seq (E.Pthen, Refs (E.Index - 1), True);
2680 Append (Result, " ** " & Str_FP (Refs (E.Index - 1).FP));
2681 ER := Refs (E.Index - 1).Pthen;
2683 -- Other pattern codes should not appear as leading elements
2685 when PC_Arb_Y |
2686 PC_Arbno_Y |
2687 PC_Assign |
2688 PC_BreakX_X |
2689 PC_EOP |
2690 PC_Fence_Y |
2691 PC_R_Remove |
2692 PC_R_Restore |
2693 PC_Unanchored =>
2694 Append (Result, "???");
2696 end case;
2698 E := ER;
2699 end Image_One;
2701 ---------------
2702 -- Image_Seq --
2703 ---------------
2705 procedure Image_Seq (E : PE_Ptr; Succ : PE_Ptr; Paren : Boolean) is
2706 E1 : PE_Ptr := E;
2707 Mult : Boolean := False;
2708 Indx : Natural := Length (Result);
2710 begin
2711 -- The image of EOP is "" (the null string)
2713 if E = EOP then
2714 Append (Result, """""");
2716 -- Else generate appropriate concatenation sequence
2718 else
2719 loop
2720 Image_One (E1);
2721 exit when E1 = Succ;
2722 exit when E1 = EOP;
2723 Mult := True;
2725 if Kill_Ampersand then
2726 Kill_Ampersand := False;
2727 else
2728 Append (Result, " & ");
2729 end if;
2730 end loop;
2731 end if;
2733 if Mult and Paren then
2734 Insert (Result, Indx + 1, "(");
2735 Append (Result, ")");
2736 end if;
2737 end Image_Seq;
2739 -- Start of processing for Image
2741 begin
2742 Build_Ref_Array (P.P, Refs);
2743 Image_Seq (P.P, EOP, False);
2744 return Result;
2745 end Image;
2747 -----------
2748 -- Is_In --
2749 -----------
2751 function Is_In (C : Character; Str : String) return Boolean is
2752 begin
2753 for J in Str'Range loop
2754 if Str (J) = C then
2755 return True;
2756 end if;
2757 end loop;
2759 return False;
2760 end Is_In;
2762 ---------
2763 -- Len --
2764 ---------
2766 function Len (Count : Natural) return Pattern is
2767 begin
2768 -- Note, the following is not just an optimization, it is needed
2769 -- to ensure that Arbno (Len (0)) does not generate an infinite
2770 -- matching loop (since PC_Len_Nat is OK_For_Simple_Arbno).
2772 if Count = 0 then
2773 return (AFC with 0, new PE'(PC_Null, 1, EOP));
2775 else
2776 return (AFC with 0, new PE'(PC_Len_Nat, 1, EOP, Count));
2777 end if;
2778 end Len;
2780 function Len (Count : Natural_Func) return Pattern is
2781 begin
2782 return (AFC with 0, new PE'(PC_Len_NF, 1, EOP, Count));
2783 end Len;
2785 function Len (Count : access Natural) return Pattern is
2786 begin
2787 return (AFC with 0, new PE'(PC_Len_NP, 1, EOP, Natural_Ptr (Count)));
2788 end Len;
2790 -----------------
2791 -- Logic_Error --
2792 -----------------
2794 procedure Logic_Error is
2795 begin
2796 Raise_Exception
2797 (Program_Error'Identity,
2798 "Internal logic error in GNAT.Spitbol.Patterns");
2799 end Logic_Error;
2801 -----------
2802 -- Match --
2803 -----------
2805 function Match
2806 (Subject : VString;
2807 Pat : Pattern)
2808 return Boolean
2810 Start, Stop : Natural;
2812 begin
2813 if Debug_Mode then
2814 XMatchD (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
2815 else
2816 XMatch (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
2817 end if;
2819 return Start /= 0;
2820 end Match;
2822 function Match
2823 (Subject : String;
2824 Pat : Pattern)
2825 return Boolean
2827 Start, Stop : Natural;
2828 subtype String1 is String (1 .. Subject'Length);
2830 begin
2831 if Debug_Mode then
2832 XMatchD (String1 (Subject), Pat.P, Pat.Stk, Start, Stop);
2833 else
2834 XMatch (String1 (Subject), Pat.P, Pat.Stk, Start, Stop);
2835 end if;
2837 return Start /= 0;
2838 end Match;
2840 function Match
2841 (Subject : VString_Var;
2842 Pat : Pattern;
2843 Replace : VString)
2844 return Boolean
2846 Start, Stop : Natural;
2848 begin
2849 if Debug_Mode then
2850 XMatchD (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
2851 else
2852 XMatch (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
2853 end if;
2855 if Start = 0 then
2856 return False;
2857 else
2858 Replace_Slice
2859 (Subject'Unrestricted_Access.all,
2860 Start, Stop, Get_String (Replace).all);
2861 return True;
2862 end if;
2863 end Match;
2865 function Match
2866 (Subject : VString_Var;
2867 Pat : Pattern;
2868 Replace : String)
2869 return Boolean
2871 Start, Stop : Natural;
2873 begin
2874 if Debug_Mode then
2875 XMatchD (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
2876 else
2877 XMatch (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
2878 end if;
2880 if Start = 0 then
2881 return False;
2882 else
2883 Replace_Slice
2884 (Subject'Unrestricted_Access.all, Start, Stop, Replace);
2885 return True;
2886 end if;
2887 end Match;
2889 procedure Match
2890 (Subject : VString;
2891 Pat : Pattern)
2893 Start, Stop : Natural;
2895 begin
2896 if Debug_Mode then
2897 XMatchD (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
2898 else
2899 XMatch (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
2900 end if;
2902 end Match;
2904 procedure Match
2905 (Subject : String;
2906 Pat : Pattern)
2908 Start, Stop : Natural;
2909 subtype String1 is String (1 .. Subject'Length);
2910 begin
2911 if Debug_Mode then
2912 XMatchD (String1 (Subject), Pat.P, Pat.Stk, Start, Stop);
2913 else
2914 XMatch (String1 (Subject), Pat.P, Pat.Stk, Start, Stop);
2915 end if;
2916 end Match;
2918 procedure Match
2919 (Subject : in out VString;
2920 Pat : Pattern;
2921 Replace : VString)
2923 Start, Stop : Natural;
2925 begin
2926 if Debug_Mode then
2927 XMatchD (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
2928 else
2929 XMatch (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
2930 end if;
2932 if Start /= 0 then
2933 Replace_Slice (Subject, Start, Stop, Get_String (Replace).all);
2934 end if;
2935 end Match;
2937 procedure Match
2938 (Subject : in out VString;
2939 Pat : Pattern;
2940 Replace : String)
2942 Start, Stop : Natural;
2944 begin
2945 if Debug_Mode then
2946 XMatchD (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
2947 else
2948 XMatch (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
2949 end if;
2951 if Start /= 0 then
2952 Replace_Slice (Subject, Start, Stop, Replace);
2953 end if;
2954 end Match;
2956 function Match
2957 (Subject : VString;
2958 Pat : PString)
2959 return Boolean
2961 Pat_Len : constant Natural := Pat'Length;
2962 Sub_Len : constant Natural := Length (Subject);
2963 Sub_Str : constant String_Access := Get_String (Subject);
2965 begin
2966 if Anchored_Mode then
2967 if Pat_Len > Sub_Len then
2968 return False;
2969 else
2970 return Pat = Sub_Str.all (1 .. Pat_Len);
2971 end if;
2973 else
2974 for J in 1 .. Sub_Len - Pat_Len + 1 loop
2975 if Pat = Sub_Str.all (J .. J + (Pat_Len - 1)) then
2976 return True;
2977 end if;
2978 end loop;
2980 return False;
2981 end if;
2982 end Match;
2984 function Match
2985 (Subject : String;
2986 Pat : PString)
2987 return Boolean
2989 Pat_Len : constant Natural := Pat'Length;
2990 Sub_Len : constant Natural := Subject'Length;
2991 SFirst : constant Natural := Subject'First;
2993 begin
2994 if Anchored_Mode then
2995 if Pat_Len > Sub_Len then
2996 return False;
2997 else
2998 return Pat = Subject (SFirst .. SFirst + Pat_Len - 1);
2999 end if;
3001 else
3002 for J in SFirst .. SFirst + Sub_Len - Pat_Len loop
3003 if Pat = Subject (J .. J + (Pat_Len - 1)) then
3004 return True;
3005 end if;
3006 end loop;
3008 return False;
3009 end if;
3010 end Match;
3012 function Match
3013 (Subject : VString_Var;
3014 Pat : PString;
3015 Replace : VString)
3016 return Boolean
3018 Start, Stop : Natural;
3020 begin
3021 if Debug_Mode then
3022 XMatchD (Get_String (Subject).all, S_To_PE (Pat), 0, Start, Stop);
3023 else
3024 XMatch (Get_String (Subject).all, S_To_PE (Pat), 0, Start, Stop);
3025 end if;
3027 if Start = 0 then
3028 return False;
3029 else
3030 Replace_Slice
3031 (Subject'Unrestricted_Access.all,
3032 Start, Stop, Get_String (Replace).all);
3033 return True;
3034 end if;
3035 end Match;
3037 function Match
3038 (Subject : VString_Var;
3039 Pat : PString;
3040 Replace : String)
3041 return Boolean
3043 Start, Stop : Natural;
3045 begin
3046 if Debug_Mode then
3047 XMatchD (Get_String (Subject).all, S_To_PE (Pat), 0, Start, Stop);
3048 else
3049 XMatch (Get_String (Subject).all, S_To_PE (Pat), 0, Start, Stop);
3050 end if;
3052 if Start = 0 then
3053 return False;
3054 else
3055 Replace_Slice
3056 (Subject'Unrestricted_Access.all, Start, Stop, Replace);
3057 return True;
3058 end if;
3059 end Match;
3061 procedure Match
3062 (Subject : VString;
3063 Pat : PString)
3065 Start, Stop : Natural;
3067 begin
3068 if Debug_Mode then
3069 XMatchD (Get_String (Subject).all, S_To_PE (Pat), 0, Start, Stop);
3070 else
3071 XMatch (Get_String (Subject).all, S_To_PE (Pat), 0, Start, Stop);
3072 end if;
3073 end Match;
3075 procedure Match
3076 (Subject : String;
3077 Pat : PString)
3079 Start, Stop : Natural;
3080 subtype String1 is String (1 .. Subject'Length);
3082 begin
3083 if Debug_Mode then
3084 XMatchD (String1 (Subject), S_To_PE (Pat), 0, Start, Stop);
3085 else
3086 XMatch (String1 (Subject), S_To_PE (Pat), 0, Start, Stop);
3087 end if;
3088 end Match;
3090 procedure Match
3091 (Subject : in out VString;
3092 Pat : PString;
3093 Replace : VString)
3095 Start, Stop : Natural;
3097 begin
3098 if Debug_Mode then
3099 XMatchD (Get_String (Subject).all, S_To_PE (Pat), 0, Start, Stop);
3100 else
3101 XMatch (Get_String (Subject).all, S_To_PE (Pat), 0, Start, Stop);
3102 end if;
3104 if Start /= 0 then
3105 Replace_Slice (Subject, Start, Stop, Get_String (Replace).all);
3106 end if;
3107 end Match;
3109 procedure Match
3110 (Subject : in out VString;
3111 Pat : PString;
3112 Replace : String)
3114 Start, Stop : Natural;
3116 begin
3117 if Debug_Mode then
3118 XMatchD (Get_String (Subject).all, S_To_PE (Pat), 0, Start, Stop);
3119 else
3120 XMatch (Get_String (Subject).all, S_To_PE (Pat), 0, Start, Stop);
3121 end if;
3123 if Start /= 0 then
3124 Replace_Slice (Subject, Start, Stop, Replace);
3125 end if;
3126 end Match;
3128 function Match
3129 (Subject : VString_Var;
3130 Pat : Pattern;
3131 Result : Match_Result_Var)
3132 return Boolean
3134 Start, Stop : Natural;
3136 begin
3137 if Debug_Mode then
3138 XMatchD (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
3139 else
3140 XMatch (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
3141 end if;
3143 if Start = 0 then
3144 Result'Unrestricted_Access.all.Var := null;
3145 return False;
3147 else
3148 Result'Unrestricted_Access.all.Var := Subject'Unrestricted_Access;
3149 Result'Unrestricted_Access.all.Start := Start;
3150 Result'Unrestricted_Access.all.Stop := Stop;
3151 return True;
3152 end if;
3153 end Match;
3155 procedure Match
3156 (Subject : in out VString;
3157 Pat : Pattern;
3158 Result : out Match_Result)
3160 Start, Stop : Natural;
3162 begin
3163 if Debug_Mode then
3164 XMatchD (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
3165 else
3166 XMatch (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
3167 end if;
3169 if Start = 0 then
3170 Result.Var := null;
3172 else
3173 Result.Var := Subject'Unrestricted_Access;
3174 Result.Start := Start;
3175 Result.Stop := Stop;
3176 end if;
3177 end Match;
3179 ---------------
3180 -- New_LineD --
3181 ---------------
3183 procedure New_LineD is
3184 begin
3185 if Internal_Debug then
3186 New_Line;
3187 end if;
3188 end New_LineD;
3190 ------------
3191 -- NotAny --
3192 ------------
3194 function NotAny (Str : String) return Pattern is
3195 begin
3196 return (AFC with 0, new PE'(PC_NotAny_CS, 1, EOP, To_Set (Str)));
3197 end NotAny;
3199 function NotAny (Str : VString) return Pattern is
3200 begin
3201 return NotAny (S (Str));
3202 end NotAny;
3204 function NotAny (Str : Character) return Pattern is
3205 begin
3206 return (AFC with 0, new PE'(PC_NotAny_CH, 1, EOP, Str));
3207 end NotAny;
3209 function NotAny (Str : Character_Set) return Pattern is
3210 begin
3211 return (AFC with 0, new PE'(PC_NotAny_CS, 1, EOP, Str));
3212 end NotAny;
3214 function NotAny (Str : access VString) return Pattern is
3215 begin
3216 return (AFC with 0, new PE'(PC_NotAny_VP, 1, EOP, VString_Ptr (Str)));
3217 end NotAny;
3219 function NotAny (Str : VString_Func) return Pattern is
3220 begin
3221 return (AFC with 0, new PE'(PC_NotAny_VF, 1, EOP, Str));
3222 end NotAny;
3224 -----------
3225 -- NSpan --
3226 -----------
3228 function NSpan (Str : String) return Pattern is
3229 begin
3230 return (AFC with 0, new PE'(PC_NSpan_CS, 1, EOP, To_Set (Str)));
3231 end NSpan;
3233 function NSpan (Str : VString) return Pattern is
3234 begin
3235 return NSpan (S (Str));
3236 end NSpan;
3238 function NSpan (Str : Character) return Pattern is
3239 begin
3240 return (AFC with 0, new PE'(PC_NSpan_CH, 1, EOP, Str));
3241 end NSpan;
3243 function NSpan (Str : Character_Set) return Pattern is
3244 begin
3245 return (AFC with 0, new PE'(PC_NSpan_CS, 1, EOP, Str));
3246 end NSpan;
3248 function NSpan (Str : access VString) return Pattern is
3249 begin
3250 return (AFC with 0, new PE'(PC_NSpan_VP, 1, EOP, VString_Ptr (Str)));
3251 end NSpan;
3253 function NSpan (Str : VString_Func) return Pattern is
3254 begin
3255 return (AFC with 0, new PE'(PC_NSpan_VF, 1, EOP, Str));
3256 end NSpan;
3258 ---------
3259 -- Pos --
3260 ---------
3262 function Pos (Count : Natural) return Pattern is
3263 begin
3264 return (AFC with 0, new PE'(PC_Pos_Nat, 1, EOP, Count));
3265 end Pos;
3267 function Pos (Count : Natural_Func) return Pattern is
3268 begin
3269 return (AFC with 0, new PE'(PC_Pos_NF, 1, EOP, Count));
3270 end Pos;
3272 function Pos (Count : access Natural) return Pattern is
3273 begin
3274 return (AFC with 0, new PE'(PC_Pos_NP, 1, EOP, Natural_Ptr (Count)));
3275 end Pos;
3277 ----------
3278 -- PutD --
3279 ----------
3281 procedure PutD (Str : String) is
3282 begin
3283 if Internal_Debug then
3284 Put (Str);
3285 end if;
3286 end PutD;
3288 ---------------
3289 -- Put_LineD --
3290 ---------------
3292 procedure Put_LineD (Str : String) is
3293 begin
3294 if Internal_Debug then
3295 Put_Line (Str);
3296 end if;
3297 end Put_LineD;
3299 -------------
3300 -- Replace --
3301 -------------
3303 procedure Replace
3304 (Result : in out Match_Result;
3305 Replace : VString)
3307 begin
3308 if Result.Var /= null then
3309 Replace_Slice
3310 (Result.Var.all,
3311 Result.Start,
3312 Result.Stop,
3313 Get_String (Replace).all);
3314 Result.Var := null;
3315 end if;
3316 end Replace;
3318 ----------
3319 -- Rest --
3320 ----------
3322 function Rest return Pattern is
3323 begin
3324 return (AFC with 0, new PE'(PC_Rest, 1, EOP));
3325 end Rest;
3327 ----------
3328 -- Rpos --
3329 ----------
3331 function Rpos (Count : Natural) return Pattern is
3332 begin
3333 return (AFC with 0, new PE'(PC_RPos_Nat, 1, EOP, Count));
3334 end Rpos;
3336 function Rpos (Count : Natural_Func) return Pattern is
3337 begin
3338 return (AFC with 0, new PE'(PC_RPos_NF, 1, EOP, Count));
3339 end Rpos;
3341 function Rpos (Count : access Natural) return Pattern is
3342 begin
3343 return (AFC with 0, new PE'(PC_RPos_NP, 1, EOP, Natural_Ptr (Count)));
3344 end Rpos;
3346 ----------
3347 -- Rtab --
3348 ----------
3350 function Rtab (Count : Natural) return Pattern is
3351 begin
3352 return (AFC with 0, new PE'(PC_RTab_Nat, 1, EOP, Count));
3353 end Rtab;
3355 function Rtab (Count : Natural_Func) return Pattern is
3356 begin
3357 return (AFC with 0, new PE'(PC_RTab_NF, 1, EOP, Count));
3358 end Rtab;
3360 function Rtab (Count : access Natural) return Pattern is
3361 begin
3362 return (AFC with 0, new PE'(PC_RTab_NP, 1, EOP, Natural_Ptr (Count)));
3363 end Rtab;
3365 -------------
3366 -- S_To_PE --
3367 -------------
3369 function S_To_PE (Str : PString) return PE_Ptr is
3370 Len : constant Natural := Str'Length;
3372 begin
3373 case Len is
3374 when 0 =>
3375 return new PE'(PC_Null, 1, EOP);
3377 when 1 =>
3378 return new PE'(PC_Char, 1, EOP, Str (1));
3380 when 2 =>
3381 return new PE'(PC_String_2, 1, EOP, Str);
3383 when 3 =>
3384 return new PE'(PC_String_3, 1, EOP, Str);
3386 when 4 =>
3387 return new PE'(PC_String_4, 1, EOP, Str);
3389 when 5 =>
3390 return new PE'(PC_String_5, 1, EOP, Str);
3392 when 6 =>
3393 return new PE'(PC_String_6, 1, EOP, Str);
3395 when others =>
3396 return new PE'(PC_String, 1, EOP, new String'(Str));
3398 end case;
3399 end S_To_PE;
3401 -------------------
3402 -- Set_Successor --
3403 -------------------
3405 -- Note: this procedure is not used by the normal concatenation circuit,
3406 -- since other fixups are required on the left operand in this case, and
3407 -- they might as well be done all together.
3409 procedure Set_Successor (Pat : PE_Ptr; Succ : PE_Ptr) is
3410 begin
3411 if Pat = null then
3412 Uninitialized_Pattern;
3414 elsif Pat = EOP then
3415 Logic_Error;
3417 else
3418 declare
3419 Refs : Ref_Array (1 .. Pat.Index);
3420 -- We build a reference array for L whose N'th element points to
3421 -- the pattern element of L whose original Index value is N.
3423 P : PE_Ptr;
3425 begin
3426 Build_Ref_Array (Pat, Refs);
3428 for J in Refs'Range loop
3429 P := Refs (J);
3431 if P.Pthen = EOP then
3432 P.Pthen := Succ;
3433 end if;
3435 if P.Pcode in PC_Has_Alt and then P.Alt = EOP then
3436 P.Alt := Succ;
3437 end if;
3438 end loop;
3439 end;
3440 end if;
3441 end Set_Successor;
3443 ------------
3444 -- Setcur --
3445 ------------
3447 function Setcur (Var : access Natural) return Pattern is
3448 begin
3449 return (AFC with 0, new PE'(PC_Setcur, 1, EOP, Natural_Ptr (Var)));
3450 end Setcur;
3452 ----------
3453 -- Span --
3454 ----------
3456 function Span (Str : String) return Pattern is
3457 begin
3458 return (AFC with 0, new PE'(PC_Span_CS, 1, EOP, To_Set (Str)));
3459 end Span;
3461 function Span (Str : VString) return Pattern is
3462 begin
3463 return Span (S (Str));
3464 end Span;
3466 function Span (Str : Character) return Pattern is
3467 begin
3468 return (AFC with 0, new PE'(PC_Span_CH, 1, EOP, Str));
3469 end Span;
3471 function Span (Str : Character_Set) return Pattern is
3472 begin
3473 return (AFC with 0, new PE'(PC_Span_CS, 1, EOP, Str));
3474 end Span;
3476 function Span (Str : access VString) return Pattern is
3477 begin
3478 return (AFC with 0, new PE'(PC_Span_VP, 1, EOP, VString_Ptr (Str)));
3479 end Span;
3481 function Span (Str : VString_Func) return Pattern is
3482 begin
3483 return (AFC with 0, new PE'(PC_Span_VF, 1, EOP, Str));
3484 end Span;
3486 ------------
3487 -- Str_BF --
3488 ------------
3490 function Str_BF (A : Boolean_Func) return String is
3491 function To_A is new Unchecked_Conversion (Boolean_Func, Address);
3493 begin
3494 return "BF(" & Image (To_A (A)) & ')';
3495 end Str_BF;
3497 ------------
3498 -- Str_FP --
3499 ------------
3501 function Str_FP (A : File_Ptr) return String is
3502 begin
3503 return "FP(" & Image (A.all'Address) & ')';
3504 end Str_FP;
3506 ------------
3507 -- Str_NF --
3508 ------------
3510 function Str_NF (A : Natural_Func) return String is
3511 function To_A is new Unchecked_Conversion (Natural_Func, Address);
3513 begin
3514 return "NF(" & Image (To_A (A)) & ')';
3515 end Str_NF;
3517 ------------
3518 -- Str_NP --
3519 ------------
3521 function Str_NP (A : Natural_Ptr) return String is
3522 begin
3523 return "NP(" & Image (A.all'Address) & ')';
3524 end Str_NP;
3526 ------------
3527 -- Str_PP --
3528 ------------
3530 function Str_PP (A : Pattern_Ptr) return String is
3531 begin
3532 return "PP(" & Image (A.all'Address) & ')';
3533 end Str_PP;
3535 ------------
3536 -- Str_VF --
3537 ------------
3539 function Str_VF (A : VString_Func) return String is
3540 function To_A is new Unchecked_Conversion (VString_Func, Address);
3542 begin
3543 return "VF(" & Image (To_A (A)) & ')';
3544 end Str_VF;
3546 ------------
3547 -- Str_VP --
3548 ------------
3550 function Str_VP (A : VString_Ptr) return String is
3551 begin
3552 return "VP(" & Image (A.all'Address) & ')';
3553 end Str_VP;
3555 -------------
3556 -- Succeed --
3557 -------------
3559 function Succeed return Pattern is
3560 begin
3561 return (AFC with 1, new PE'(PC_Succeed, 1, EOP));
3562 end Succeed;
3564 ---------
3565 -- Tab --
3566 ---------
3568 function Tab (Count : Natural) return Pattern is
3569 begin
3570 return (AFC with 0, new PE'(PC_Tab_Nat, 1, EOP, Count));
3571 end Tab;
3573 function Tab (Count : Natural_Func) return Pattern is
3574 begin
3575 return (AFC with 0, new PE'(PC_Tab_NF, 1, EOP, Count));
3576 end Tab;
3578 function Tab (Count : access Natural) return Pattern is
3579 begin
3580 return (AFC with 0, new PE'(PC_Tab_NP, 1, EOP, Natural_Ptr (Count)));
3581 end Tab;
3583 ---------------------------
3584 -- Uninitialized_Pattern --
3585 ---------------------------
3587 procedure Uninitialized_Pattern is
3588 begin
3589 Raise_Exception
3590 (Program_Error'Identity,
3591 "uninitialized value of type GNAT.Spitbol.Patterns.Pattern");
3592 end Uninitialized_Pattern;
3594 ------------
3595 -- XMatch --
3596 ------------
3598 procedure XMatch
3599 (Subject : String;
3600 Pat_P : PE_Ptr;
3601 Pat_S : Natural;
3602 Start : out Natural;
3603 Stop : out Natural)
3605 Node : PE_Ptr;
3606 -- Pointer to current pattern node. Initialized from Pat_P, and then
3607 -- updated as the match proceeds through its constituent elements.
3609 Length : constant Natural := Subject'Length;
3610 -- Length of string (= Subject'Last, since Subject'First is always 1)
3612 Cursor : Integer := 0;
3613 -- If the value is non-negative, then this value is the index showing
3614 -- the current position of the match in the subject string. The next
3615 -- character to be matched is at Subject (Cursor + 1). Note that since
3616 -- our view of the subject string in XMatch always has a lower bound
3617 -- of one, regardless of original bounds, that this definition exactly
3618 -- corresponds to the cursor value as referenced by functions like Pos.
3620 -- If the value is negative, then this is a saved stack pointer,
3621 -- typically a base pointer of an inner or outer region. Cursor
3622 -- temporarily holds such a value when it is popped from the stack
3623 -- by Fail. In all cases, Cursor is reset to a proper non-negative
3624 -- cursor value before the match proceeds (e.g. by propagating the
3625 -- failure and popping a "real" cursor value from the stack.
3627 PE_Unanchored : aliased PE := (PC_Unanchored, 0, Pat_P);
3628 -- Dummy pattern element used in the unanchored case.
3630 Stack : Stack_Type;
3631 -- The pattern matching failure stack for this call to Match
3633 Stack_Ptr : Stack_Range;
3634 -- Current stack pointer. This points to the top element of the stack
3635 -- that is currently in use. At the outer level this is the special
3636 -- entry placed on the stack according to the anchor mode.
3638 Stack_Init : constant Stack_Range := Stack'First + 1;
3639 -- This is the initial value of the Stack_Ptr and Stack_Base. The
3640 -- initial (Stack'First) element of the stack is not used so that
3641 -- when we pop the last element off, Stack_Ptr is still in range.
3643 Stack_Base : Stack_Range;
3644 -- This value is the stack base value, i.e. the stack pointer for the
3645 -- first history stack entry in the current stack region. See separate
3646 -- section on handling of recursive pattern matches.
3648 Assign_OnM : Boolean := False;
3649 -- Set True if assign-on-match or write-on-match operations may be
3650 -- present in the history stack, which must then be scanned on a
3651 -- successful match.
3653 procedure Pop_Region;
3654 pragma Inline (Pop_Region);
3655 -- Used at the end of processing of an inner region. if the inner
3656 -- region left no stack entries, then all trace of it is removed.
3657 -- Otherwise a PC_Restore_Region entry is pushed to ensure proper
3658 -- handling of alternatives in the inner region.
3660 procedure Push (Node : PE_Ptr);
3661 pragma Inline (Push);
3662 -- Make entry in pattern matching stack with current cursor valeu
3664 procedure Push_Region;
3665 pragma Inline (Push_Region);
3666 -- This procedure makes a new region on the history stack. The
3667 -- caller first establishes the special entry on the stack, but
3668 -- does not push the stack pointer. Then this call stacks a
3669 -- PC_Remove_Region node, on top of this entry, using the cursor
3670 -- field of the PC_Remove_Region entry to save the outer level
3671 -- stack base value, and resets the stack base to point to this
3672 -- PC_Remove_Region node.
3674 ----------------
3675 -- Pop_Region --
3676 ----------------
3678 procedure Pop_Region is
3679 begin
3680 -- If nothing was pushed in the inner region, we can just get
3681 -- rid of it entirely, leaving no traces that it was ever there
3683 if Stack_Ptr = Stack_Base then
3684 Stack_Ptr := Stack_Base - 2;
3685 Stack_Base := Stack (Stack_Ptr + 2).Cursor;
3687 -- If stuff was pushed in the inner region, then we have to
3688 -- push a PC_R_Restore node so that we properly handle possible
3689 -- rematches within the region.
3691 else
3692 Stack_Ptr := Stack_Ptr + 1;
3693 Stack (Stack_Ptr).Cursor := Stack_Base;
3694 Stack (Stack_Ptr).Node := CP_R_Restore'Access;
3695 Stack_Base := Stack (Stack_Base).Cursor;
3696 end if;
3697 end Pop_Region;
3699 ----------
3700 -- Push --
3701 ----------
3703 procedure Push (Node : PE_Ptr) is
3704 begin
3705 Stack_Ptr := Stack_Ptr + 1;
3706 Stack (Stack_Ptr).Cursor := Cursor;
3707 Stack (Stack_Ptr).Node := Node;
3708 end Push;
3710 -----------------
3711 -- Push_Region --
3712 -----------------
3714 procedure Push_Region is
3715 begin
3716 Stack_Ptr := Stack_Ptr + 2;
3717 Stack (Stack_Ptr).Cursor := Stack_Base;
3718 Stack (Stack_Ptr).Node := CP_R_Remove'Access;
3719 Stack_Base := Stack_Ptr;
3720 end Push_Region;
3722 -- Start of processing for XMatch
3724 begin
3725 if Pat_P = null then
3726 Uninitialized_Pattern;
3727 end if;
3729 -- Check we have enough stack for this pattern. This check deals with
3730 -- every possibility except a match of a recursive pattern, where we
3731 -- make a check at each recursion level.
3733 if Pat_S >= Stack_Size - 1 then
3734 raise Pattern_Stack_Overflow;
3735 end if;
3737 -- In anchored mode, the bottom entry on the stack is an abort entry
3739 if Anchored_Mode then
3740 Stack (Stack_Init).Node := CP_Cancel'Access;
3741 Stack (Stack_Init).Cursor := 0;
3743 -- In unanchored more, the bottom entry on the stack references
3744 -- the special pattern element PE_Unanchored, whose Pthen field
3745 -- points to the initial pattern element. The cursor value in this
3746 -- entry is the number of anchor moves so far.
3748 else
3749 Stack (Stack_Init).Node := PE_Unanchored'Unchecked_Access;
3750 Stack (Stack_Init).Cursor := 0;
3751 end if;
3753 Stack_Ptr := Stack_Init;
3754 Stack_Base := Stack_Ptr;
3755 Cursor := 0;
3756 Node := Pat_P;
3757 goto Match;
3759 -----------------------------------------
3760 -- Main Pattern Matching State Control --
3761 -----------------------------------------
3763 -- This is a state machine which uses gotos to change state. The
3764 -- initial state is Match, to initiate the matching of the first
3765 -- element, so the goto Match above starts the match. In the
3766 -- following descriptions, we indicate the global values that
3767 -- are relevant for the state transition.
3769 -- Come here if entire match fails
3771 <<Match_Fail>>
3772 Start := 0;
3773 Stop := 0;
3774 return;
3776 -- Come here if entire match succeeds
3778 -- Cursor current position in subject string
3780 <<Match_Succeed>>
3781 Start := Stack (Stack_Init).Cursor + 1;
3782 Stop := Cursor;
3784 -- Scan history stack for deferred assignments or writes
3786 if Assign_OnM then
3787 for S in Stack_Init .. Stack_Ptr loop
3788 if Stack (S).Node = CP_Assign'Access then
3789 declare
3790 Inner_Base : constant Stack_Range :=
3791 Stack (S + 1).Cursor;
3792 Special_Entry : constant Stack_Range :=
3793 Inner_Base - 1;
3794 Node_OnM : constant PE_Ptr :=
3795 Stack (Special_Entry).Node;
3796 Start : constant Natural :=
3797 Stack (Special_Entry).Cursor + 1;
3798 Stop : constant Natural := Stack (S).Cursor;
3800 begin
3801 if Node_OnM.Pcode = PC_Assign_OnM then
3802 Set_String (Node_OnM.VP.all, Subject (Start .. Stop));
3804 elsif Node_OnM.Pcode = PC_Write_OnM then
3805 Put_Line (Node_OnM.FP.all, Subject (Start .. Stop));
3807 else
3808 Logic_Error;
3809 end if;
3810 end;
3811 end if;
3812 end loop;
3813 end if;
3815 return;
3817 -- Come here if attempt to match current element fails
3819 -- Stack_Base current stack base
3820 -- Stack_Ptr current stack pointer
3822 <<Fail>>
3823 Cursor := Stack (Stack_Ptr).Cursor;
3824 Node := Stack (Stack_Ptr).Node;
3825 Stack_Ptr := Stack_Ptr - 1;
3826 goto Match;
3828 -- Come here if attempt to match current element succeeds
3830 -- Cursor current position in subject string
3831 -- Node pointer to node successfully matched
3832 -- Stack_Base current stack base
3833 -- Stack_Ptr current stack pointer
3835 <<Succeed>>
3836 Node := Node.Pthen;
3838 -- Come here to match the next pattern element
3840 -- Cursor current position in subject string
3841 -- Node pointer to node to be matched
3842 -- Stack_Base current stack base
3843 -- Stack_Ptr current stack pointer
3845 <<Match>>
3847 --------------------------------------------------
3848 -- Main Pattern Match Element Matching Routines --
3849 --------------------------------------------------
3851 -- Here is the case statement that processes the current node. The
3852 -- processing for each element does one of five things:
3854 -- goto Succeed to move to the successor
3855 -- goto Match_Succeed if the entire match succeeds
3856 -- goto Match_Fail if the entire match fails
3857 -- goto Fail to signal failure of current match
3859 -- Processing is NOT allowed to fall through
3861 case Node.Pcode is
3863 -- Cancel
3865 when PC_Cancel =>
3866 goto Match_Fail;
3868 -- Alternation
3870 when PC_Alt =>
3871 Push (Node.Alt);
3872 Node := Node.Pthen;
3873 goto Match;
3875 -- Any (one character case)
3877 when PC_Any_CH =>
3878 if Cursor < Length
3879 and then Subject (Cursor + 1) = Node.Char
3880 then
3881 Cursor := Cursor + 1;
3882 goto Succeed;
3883 else
3884 goto Fail;
3885 end if;
3887 -- Any (character set case)
3889 when PC_Any_CS =>
3890 if Cursor < Length
3891 and then Is_In (Subject (Cursor + 1), Node.CS)
3892 then
3893 Cursor := Cursor + 1;
3894 goto Succeed;
3895 else
3896 goto Fail;
3897 end if;
3899 -- Any (string function case)
3901 when PC_Any_VF => declare
3902 U : constant VString := Node.VF.all;
3903 Str : constant String_Access := Get_String (U);
3905 begin
3906 if Cursor < Length
3907 and then Is_In (Subject (Cursor + 1), Str.all)
3908 then
3909 Cursor := Cursor + 1;
3910 goto Succeed;
3911 else
3912 goto Fail;
3913 end if;
3914 end;
3916 -- Any (string pointer case)
3918 when PC_Any_VP => declare
3919 Str : constant String_Access := Get_String (Node.VP.all);
3921 begin
3922 if Cursor < Length
3923 and then Is_In (Subject (Cursor + 1), Str.all)
3924 then
3925 Cursor := Cursor + 1;
3926 goto Succeed;
3927 else
3928 goto Fail;
3929 end if;
3930 end;
3932 -- Arb (initial match)
3934 when PC_Arb_X =>
3935 Push (Node.Alt);
3936 Node := Node.Pthen;
3937 goto Match;
3939 -- Arb (extension)
3941 when PC_Arb_Y =>
3942 if Cursor < Length then
3943 Cursor := Cursor + 1;
3944 Push (Node);
3945 goto Succeed;
3946 else
3947 goto Fail;
3948 end if;
3950 -- Arbno_S (simple Arbno initialize). This is the node that
3951 -- initiates the match of a simple Arbno structure.
3953 when PC_Arbno_S =>
3954 Push (Node.Alt);
3955 Node := Node.Pthen;
3956 goto Match;
3958 -- Arbno_X (Arbno initialize). This is the node that initiates
3959 -- the match of a complex Arbno structure.
3961 when PC_Arbno_X =>
3962 Push (Node.Alt);
3963 Node := Node.Pthen;
3964 goto Match;
3966 -- Arbno_Y (Arbno rematch). This is the node that is executed
3967 -- following successful matching of one instance of a complex
3968 -- Arbno pattern.
3970 when PC_Arbno_Y => declare
3971 Null_Match : Boolean := (Cursor = Stack (Stack_Base - 1).Cursor);
3973 begin
3974 Pop_Region;
3976 -- If arbno extension matched null, then immediately fail
3978 if Null_Match then
3979 goto Fail;
3980 end if;
3982 -- Here we must do a stack check to make sure enough stack
3983 -- is left. This check will happen once for each instance of
3984 -- the Arbno pattern that is matched. The Nat field of a
3985 -- PC_Arbno pattern contains the maximum stack entries needed
3986 -- for the Arbno with one instance and the successor pattern
3988 if Stack_Ptr + Node.Nat >= Stack'Last then
3989 raise Pattern_Stack_Overflow;
3990 end if;
3992 goto Succeed;
3993 end;
3995 -- Assign. If this node is executed, it means the assign-on-match
3996 -- or write-on-match operation will not happen after all, so we
3997 -- is propagate the failure, removing the PC_Assign node.
3999 when PC_Assign =>
4000 goto Fail;
4002 -- Assign immediate. This node performs the actual assignment.
4004 when PC_Assign_Imm =>
4005 Set_String
4006 (Node.VP.all,
4007 Subject (Stack (Stack_Base - 1).Cursor + 1 .. Cursor));
4008 Pop_Region;
4009 goto Succeed;
4011 -- Assign on match. This node sets up for the eventual assignment
4013 when PC_Assign_OnM =>
4014 Stack (Stack_Base - 1).Node := Node;
4015 Push (CP_Assign'Access);
4016 Pop_Region;
4017 Assign_OnM := True;
4018 goto Succeed;
4020 -- Bal
4022 when PC_Bal =>
4023 if Cursor >= Length or else Subject (Cursor + 1) = ')' then
4024 goto Fail;
4026 elsif Subject (Cursor + 1) = '(' then
4027 declare
4028 Paren_Count : Natural := 1;
4030 begin
4031 loop
4032 Cursor := Cursor + 1;
4034 if Cursor >= Length then
4035 goto Fail;
4037 elsif Subject (Cursor + 1) = '(' then
4038 Paren_Count := Paren_Count + 1;
4040 elsif Subject (Cursor + 1) = ')' then
4041 Paren_Count := Paren_Count - 1;
4042 exit when Paren_Count = 0;
4043 end if;
4044 end loop;
4045 end;
4046 end if;
4048 Cursor := Cursor + 1;
4049 Push (Node);
4050 goto Succeed;
4052 -- Break (one character case)
4054 when PC_Break_CH =>
4055 while Cursor < Length loop
4056 if Subject (Cursor + 1) = Node.Char then
4057 goto Succeed;
4058 else
4059 Cursor := Cursor + 1;
4060 end if;
4061 end loop;
4063 goto Fail;
4065 -- Break (character set case)
4067 when PC_Break_CS =>
4068 while Cursor < Length loop
4069 if Is_In (Subject (Cursor + 1), Node.CS) then
4070 goto Succeed;
4071 else
4072 Cursor := Cursor + 1;
4073 end if;
4074 end loop;
4076 goto Fail;
4078 -- Break (string function case)
4080 when PC_Break_VF => declare
4081 U : constant VString := Node.VF.all;
4082 Str : constant String_Access := Get_String (U);
4084 begin
4085 while Cursor < Length loop
4086 if Is_In (Subject (Cursor + 1), Str.all) then
4087 goto Succeed;
4088 else
4089 Cursor := Cursor + 1;
4090 end if;
4091 end loop;
4093 goto Fail;
4094 end;
4096 -- Break (string pointer case)
4098 when PC_Break_VP => declare
4099 Str : String_Access := Get_String (Node.VP.all);
4101 begin
4102 while Cursor < Length loop
4103 if Is_In (Subject (Cursor + 1), Str.all) then
4104 goto Succeed;
4105 else
4106 Cursor := Cursor + 1;
4107 end if;
4108 end loop;
4110 goto Fail;
4111 end;
4113 -- BreakX (one character case)
4115 when PC_BreakX_CH =>
4116 while Cursor < Length loop
4117 if Subject (Cursor + 1) = Node.Char then
4118 goto Succeed;
4119 else
4120 Cursor := Cursor + 1;
4121 end if;
4122 end loop;
4124 goto Fail;
4126 -- BreakX (character set case)
4128 when PC_BreakX_CS =>
4129 while Cursor < Length loop
4130 if Is_In (Subject (Cursor + 1), Node.CS) then
4131 goto Succeed;
4132 else
4133 Cursor := Cursor + 1;
4134 end if;
4135 end loop;
4137 goto Fail;
4139 -- BreakX (string function case)
4141 when PC_BreakX_VF => declare
4142 U : constant VString := Node.VF.all;
4143 Str : constant String_Access := Get_String (U);
4145 begin
4146 while Cursor < Length loop
4147 if Is_In (Subject (Cursor + 1), Str.all) then
4148 goto Succeed;
4149 else
4150 Cursor := Cursor + 1;
4151 end if;
4152 end loop;
4154 goto Fail;
4155 end;
4157 -- BreakX (string pointer case)
4159 when PC_BreakX_VP => declare
4160 Str : String_Access := Get_String (Node.VP.all);
4162 begin
4163 while Cursor < Length loop
4164 if Is_In (Subject (Cursor + 1), Str.all) then
4165 goto Succeed;
4166 else
4167 Cursor := Cursor + 1;
4168 end if;
4169 end loop;
4171 goto Fail;
4172 end;
4174 -- BreakX_X (BreakX extension). See section on "Compound Pattern
4175 -- Structures". This node is the alternative that is stacked to
4176 -- skip past the break character and extend the break.
4178 when PC_BreakX_X =>
4179 Cursor := Cursor + 1;
4180 goto Succeed;
4182 -- Character (one character string)
4184 when PC_Char =>
4185 if Cursor < Length
4186 and then Subject (Cursor + 1) = Node.Char
4187 then
4188 Cursor := Cursor + 1;
4189 goto Succeed;
4190 else
4191 goto Fail;
4192 end if;
4194 -- End of Pattern
4196 when PC_EOP =>
4197 if Stack_Base = Stack_Init then
4198 goto Match_Succeed;
4200 -- End of recursive inner match. See separate section on
4201 -- handing of recursive pattern matches for details.
4203 else
4204 Node := Stack (Stack_Base - 1).Node;
4205 Pop_Region;
4206 goto Match;
4207 end if;
4209 -- Fail
4211 when PC_Fail =>
4212 goto Fail;
4214 -- Fence (built in pattern)
4216 when PC_Fence =>
4217 Push (CP_Cancel'Access);
4218 goto Succeed;
4220 -- Fence function node X. This is the node that gets control
4221 -- after a successful match of the fenced pattern.
4223 when PC_Fence_X =>
4224 Stack_Ptr := Stack_Ptr + 1;
4225 Stack (Stack_Ptr).Cursor := Stack_Base;
4226 Stack (Stack_Ptr).Node := CP_Fence_Y'Access;
4227 Stack_Base := Stack (Stack_Base).Cursor;
4228 goto Succeed;
4230 -- Fence function node Y. This is the node that gets control on
4231 -- a failure that occurs after the fenced pattern has matched.
4233 -- Note: the Cursor at this stage is actually the inner stack
4234 -- base value. We don't reset this, but we do use it to strip
4235 -- off all the entries made by the fenced pattern.
4237 when PC_Fence_Y =>
4238 Stack_Ptr := Cursor - 2;
4239 goto Fail;
4241 -- Len (integer case)
4243 when PC_Len_Nat =>
4244 if Cursor + Node.Nat > Length then
4245 goto Fail;
4246 else
4247 Cursor := Cursor + Node.Nat;
4248 goto Succeed;
4249 end if;
4251 -- Len (Integer function case)
4253 when PC_Len_NF => declare
4254 N : constant Natural := Node.NF.all;
4256 begin
4257 if Cursor + N > Length then
4258 goto Fail;
4259 else
4260 Cursor := Cursor + N;
4261 goto Succeed;
4262 end if;
4263 end;
4265 -- Len (integer pointer case)
4267 when PC_Len_NP =>
4268 if Cursor + Node.NP.all > Length then
4269 goto Fail;
4270 else
4271 Cursor := Cursor + Node.NP.all;
4272 goto Succeed;
4273 end if;
4275 -- NotAny (one character case)
4277 when PC_NotAny_CH =>
4278 if Cursor < Length
4279 and then Subject (Cursor + 1) /= Node.Char
4280 then
4281 Cursor := Cursor + 1;
4282 goto Succeed;
4283 else
4284 goto Fail;
4285 end if;
4287 -- NotAny (character set case)
4289 when PC_NotAny_CS =>
4290 if Cursor < Length
4291 and then not Is_In (Subject (Cursor + 1), Node.CS)
4292 then
4293 Cursor := Cursor + 1;
4294 goto Succeed;
4295 else
4296 goto Fail;
4297 end if;
4299 -- NotAny (string function case)
4301 when PC_NotAny_VF => declare
4302 U : constant VString := Node.VF.all;
4303 Str : constant String_Access := Get_String (U);
4305 begin
4306 if Cursor < Length
4307 and then
4308 not Is_In (Subject (Cursor + 1), Str.all)
4309 then
4310 Cursor := Cursor + 1;
4311 goto Succeed;
4312 else
4313 goto Fail;
4314 end if;
4315 end;
4317 -- NotAny (string pointer case)
4319 when PC_NotAny_VP => declare
4320 Str : String_Access := Get_String (Node.VP.all);
4322 begin
4323 if Cursor < Length
4324 and then
4325 not Is_In (Subject (Cursor + 1), Str.all)
4326 then
4327 Cursor := Cursor + 1;
4328 goto Succeed;
4329 else
4330 goto Fail;
4331 end if;
4332 end;
4334 -- NSpan (one character case)
4336 when PC_NSpan_CH =>
4337 while Cursor < Length
4338 and then Subject (Cursor + 1) = Node.Char
4339 loop
4340 Cursor := Cursor + 1;
4341 end loop;
4343 goto Succeed;
4345 -- NSpan (character set case)
4347 when PC_NSpan_CS =>
4348 while Cursor < Length
4349 and then Is_In (Subject (Cursor + 1), Node.CS)
4350 loop
4351 Cursor := Cursor + 1;
4352 end loop;
4354 goto Succeed;
4356 -- NSpan (string function case)
4358 when PC_NSpan_VF => declare
4359 U : constant VString := Node.VF.all;
4360 Str : constant String_Access := Get_String (U);
4362 begin
4363 while Cursor < Length
4364 and then Is_In (Subject (Cursor + 1), Str.all)
4365 loop
4366 Cursor := Cursor + 1;
4367 end loop;
4369 goto Succeed;
4370 end;
4372 -- NSpan (string pointer case)
4374 when PC_NSpan_VP => declare
4375 Str : String_Access := Get_String (Node.VP.all);
4377 begin
4378 while Cursor < Length
4379 and then Is_In (Subject (Cursor + 1), Str.all)
4380 loop
4381 Cursor := Cursor + 1;
4382 end loop;
4384 goto Succeed;
4385 end;
4387 -- Null string
4389 when PC_Null =>
4390 goto Succeed;
4392 -- Pos (integer case)
4394 when PC_Pos_Nat =>
4395 if Cursor = Node.Nat then
4396 goto Succeed;
4397 else
4398 goto Fail;
4399 end if;
4401 -- Pos (Integer function case)
4403 when PC_Pos_NF => declare
4404 N : constant Natural := Node.NF.all;
4406 begin
4407 if Cursor = N then
4408 goto Succeed;
4409 else
4410 goto Fail;
4411 end if;
4412 end;
4414 -- Pos (integer pointer case)
4416 when PC_Pos_NP =>
4417 if Cursor = Node.NP.all then
4418 goto Succeed;
4419 else
4420 goto Fail;
4421 end if;
4423 -- Predicate function
4425 when PC_Pred_Func =>
4426 if Node.BF.all then
4427 goto Succeed;
4428 else
4429 goto Fail;
4430 end if;
4432 -- Region Enter. Initiate new pattern history stack region
4434 when PC_R_Enter =>
4435 Stack (Stack_Ptr + 1).Cursor := Cursor;
4436 Push_Region;
4437 goto Succeed;
4439 -- Region Remove node. This is the node stacked by an R_Enter.
4440 -- It removes the special format stack entry right underneath, and
4441 -- then restores the outer level stack base and signals failure.
4443 -- Note: the cursor value at this stage is actually the (negative)
4444 -- stack base value for the outer level.
4446 when PC_R_Remove =>
4447 Stack_Base := Cursor;
4448 Stack_Ptr := Stack_Ptr - 1;
4449 goto Fail;
4451 -- Region restore node. This is the node stacked at the end of an
4452 -- inner level match. Its function is to restore the inner level
4453 -- region, so that alternatives in this region can be sought.
4455 -- Note: the Cursor at this stage is actually the negative of the
4456 -- inner stack base value, which we use to restore the inner region.
4458 when PC_R_Restore =>
4459 Stack_Base := Cursor;
4460 goto Fail;
4462 -- Rest
4464 when PC_Rest =>
4465 Cursor := Length;
4466 goto Succeed;
4468 -- Initiate recursive match (pattern pointer case)
4470 when PC_Rpat =>
4471 Stack (Stack_Ptr + 1).Node := Node.Pthen;
4472 Push_Region;
4474 if Stack_Ptr + Node.PP.all.Stk >= Stack_Size then
4475 raise Pattern_Stack_Overflow;
4476 else
4477 Node := Node.PP.all.P;
4478 goto Match;
4479 end if;
4481 -- RPos (integer case)
4483 when PC_RPos_Nat =>
4484 if Cursor = (Length - Node.Nat) then
4485 goto Succeed;
4486 else
4487 goto Fail;
4488 end if;
4490 -- RPos (integer function case)
4492 when PC_RPos_NF => declare
4493 N : constant Natural := Node.NF.all;
4495 begin
4496 if Length - Cursor = N then
4497 goto Succeed;
4498 else
4499 goto Fail;
4500 end if;
4501 end;
4503 -- RPos (integer pointer case)
4505 when PC_RPos_NP =>
4506 if Cursor = (Length - Node.NP.all) then
4507 goto Succeed;
4508 else
4509 goto Fail;
4510 end if;
4512 -- RTab (integer case)
4514 when PC_RTab_Nat =>
4515 if Cursor <= (Length - Node.Nat) then
4516 Cursor := Length - Node.Nat;
4517 goto Succeed;
4518 else
4519 goto Fail;
4520 end if;
4522 -- RTab (integer function case)
4524 when PC_RTab_NF => declare
4525 N : constant Natural := Node.NF.all;
4527 begin
4528 if Length - Cursor >= N then
4529 Cursor := Length - N;
4530 goto Succeed;
4531 else
4532 goto Fail;
4533 end if;
4534 end;
4536 -- RTab (integer pointer case)
4538 when PC_RTab_NP =>
4539 if Cursor <= (Length - Node.NP.all) then
4540 Cursor := Length - Node.NP.all;
4541 goto Succeed;
4542 else
4543 goto Fail;
4544 end if;
4546 -- Cursor assignment
4548 when PC_Setcur =>
4549 Node.Var.all := Cursor;
4550 goto Succeed;
4552 -- Span (one character case)
4554 when PC_Span_CH => declare
4555 P : Natural := Cursor;
4557 begin
4558 while P < Length
4559 and then Subject (P + 1) = Node.Char
4560 loop
4561 P := P + 1;
4562 end loop;
4564 if P /= Cursor then
4565 Cursor := P;
4566 goto Succeed;
4567 else
4568 goto Fail;
4569 end if;
4570 end;
4572 -- Span (character set case)
4574 when PC_Span_CS => declare
4575 P : Natural := Cursor;
4577 begin
4578 while P < Length
4579 and then Is_In (Subject (P + 1), Node.CS)
4580 loop
4581 P := P + 1;
4582 end loop;
4584 if P /= Cursor then
4585 Cursor := P;
4586 goto Succeed;
4587 else
4588 goto Fail;
4589 end if;
4590 end;
4592 -- Span (string function case)
4594 when PC_Span_VF => declare
4595 U : constant VString := Node.VF.all;
4596 Str : constant String_Access := Get_String (U);
4597 P : Natural := Cursor;
4599 begin
4600 while P < Length
4601 and then Is_In (Subject (P + 1), Str.all)
4602 loop
4603 P := P + 1;
4604 end loop;
4606 if P /= Cursor then
4607 Cursor := P;
4608 goto Succeed;
4609 else
4610 goto Fail;
4611 end if;
4612 end;
4614 -- Span (string pointer case)
4616 when PC_Span_VP => declare
4617 Str : String_Access := Get_String (Node.VP.all);
4618 P : Natural := Cursor;
4620 begin
4621 while P < Length
4622 and then Is_In (Subject (P + 1), Str.all)
4623 loop
4624 P := P + 1;
4625 end loop;
4627 if P /= Cursor then
4628 Cursor := P;
4629 goto Succeed;
4630 else
4631 goto Fail;
4632 end if;
4633 end;
4635 -- String (two character case)
4637 when PC_String_2 =>
4638 if (Length - Cursor) >= 2
4639 and then Subject (Cursor + 1 .. Cursor + 2) = Node.Str2
4640 then
4641 Cursor := Cursor + 2;
4642 goto Succeed;
4643 else
4644 goto Fail;
4645 end if;
4647 -- String (three character case)
4649 when PC_String_3 =>
4650 if (Length - Cursor) >= 3
4651 and then Subject (Cursor + 1 .. Cursor + 3) = Node.Str3
4652 then
4653 Cursor := Cursor + 3;
4654 goto Succeed;
4655 else
4656 goto Fail;
4657 end if;
4659 -- String (four character case)
4661 when PC_String_4 =>
4662 if (Length - Cursor) >= 4
4663 and then Subject (Cursor + 1 .. Cursor + 4) = Node.Str4
4664 then
4665 Cursor := Cursor + 4;
4666 goto Succeed;
4667 else
4668 goto Fail;
4669 end if;
4671 -- String (five character case)
4673 when PC_String_5 =>
4674 if (Length - Cursor) >= 5
4675 and then Subject (Cursor + 1 .. Cursor + 5) = Node.Str5
4676 then
4677 Cursor := Cursor + 5;
4678 goto Succeed;
4679 else
4680 goto Fail;
4681 end if;
4683 -- String (six character case)
4685 when PC_String_6 =>
4686 if (Length - Cursor) >= 6
4687 and then Subject (Cursor + 1 .. Cursor + 6) = Node.Str6
4688 then
4689 Cursor := Cursor + 6;
4690 goto Succeed;
4691 else
4692 goto Fail;
4693 end if;
4695 -- String (case of more than six characters)
4697 when PC_String => declare
4698 Len : constant Natural := Node.Str'Length;
4700 begin
4701 if (Length - Cursor) >= Len
4702 and then Node.Str.all = Subject (Cursor + 1 .. Cursor + Len)
4703 then
4704 Cursor := Cursor + Len;
4705 goto Succeed;
4706 else
4707 goto Fail;
4708 end if;
4709 end;
4711 -- String (function case)
4713 when PC_String_VF => declare
4714 U : constant VString := Node.VF.all;
4715 Str : constant String_Access := Get_String (U);
4716 Len : constant Natural := Str'Length;
4718 begin
4719 if (Length - Cursor) >= Len
4720 and then Str.all = Subject (Cursor + 1 .. Cursor + Len)
4721 then
4722 Cursor := Cursor + Len;
4723 goto Succeed;
4724 else
4725 goto Fail;
4726 end if;
4727 end;
4729 -- String (pointer case)
4731 when PC_String_VP => declare
4732 S : String_Access := Get_String (Node.VP.all);
4733 Len : constant Natural := S'Length;
4735 begin
4736 if (Length - Cursor) >= Len
4737 and then S.all = Subject (Cursor + 1 .. Cursor + Len)
4738 then
4739 Cursor := Cursor + Len;
4740 goto Succeed;
4741 else
4742 goto Fail;
4743 end if;
4744 end;
4746 -- Succeed
4748 when PC_Succeed =>
4749 Push (Node);
4750 goto Succeed;
4752 -- Tab (integer case)
4754 when PC_Tab_Nat =>
4755 if Cursor <= Node.Nat then
4756 Cursor := Node.Nat;
4757 goto Succeed;
4758 else
4759 goto Fail;
4760 end if;
4762 -- Tab (integer function case)
4764 when PC_Tab_NF => declare
4765 N : constant Natural := Node.NF.all;
4767 begin
4768 if Cursor <= N then
4769 Cursor := N;
4770 goto Succeed;
4771 else
4772 goto Fail;
4773 end if;
4774 end;
4776 -- Tab (integer pointer case)
4778 when PC_Tab_NP =>
4779 if Cursor <= Node.NP.all then
4780 Cursor := Node.NP.all;
4781 goto Succeed;
4782 else
4783 goto Fail;
4784 end if;
4786 -- Unanchored movement
4788 when PC_Unanchored =>
4790 -- All done if we tried every position
4792 if Cursor > Length then
4793 goto Match_Fail;
4795 -- Otherwise extend the anchor point, and restack ourself
4797 else
4798 Cursor := Cursor + 1;
4799 Push (Node);
4800 goto Succeed;
4801 end if;
4803 -- Write immediate. This node performs the actual write
4805 when PC_Write_Imm =>
4806 Put_Line
4807 (Node.FP.all,
4808 Subject (Stack (Stack_Base - 1).Cursor + 1 .. Cursor));
4809 Pop_Region;
4810 goto Succeed;
4812 -- Write on match. This node sets up for the eventual write
4814 when PC_Write_OnM =>
4815 Stack (Stack_Base - 1).Node := Node;
4816 Push (CP_Assign'Access);
4817 Pop_Region;
4818 Assign_OnM := True;
4819 goto Succeed;
4821 end case;
4823 -- We are NOT allowed to fall though this case statement, since every
4824 -- match routine must end by executing a goto to the appropriate point
4825 -- in the finite state machine model.
4827 Logic_Error;
4829 end XMatch;
4831 -------------
4832 -- XMatchD --
4833 -------------
4835 -- Maintenance note: There is a LOT of code duplication between XMatch
4836 -- and XMatchD. This is quite intentional, the point is to avoid any
4837 -- unnecessary debugging overhead in the XMatch case, but this does mean
4838 -- that any changes to XMatchD must be mirrored in XMatch. In case of
4839 -- any major changes, the proper approach is to delete XMatch, make the
4840 -- changes to XMatchD, and then make a copy of XMatchD, removing all
4841 -- calls to Dout, and all Put and Put_Line operations. This copy becomes
4842 -- the new XMatch.
4844 procedure XMatchD
4845 (Subject : String;
4846 Pat_P : PE_Ptr;
4847 Pat_S : Natural;
4848 Start : out Natural;
4849 Stop : out Natural)
4851 Node : PE_Ptr;
4852 -- Pointer to current pattern node. Initialized from Pat_P, and then
4853 -- updated as the match proceeds through its constituent elements.
4855 Length : constant Natural := Subject'Length;
4856 -- Length of string (= Subject'Last, since Subject'First is always 1)
4858 Cursor : Integer := 0;
4859 -- If the value is non-negative, then this value is the index showing
4860 -- the current position of the match in the subject string. The next
4861 -- character to be matched is at Subject (Cursor + 1). Note that since
4862 -- our view of the subject string in XMatch always has a lower bound
4863 -- of one, regardless of original bounds, that this definition exactly
4864 -- corresponds to the cursor value as referenced by functions like Pos.
4866 -- If the value is negative, then this is a saved stack pointer,
4867 -- typically a base pointer of an inner or outer region. Cursor
4868 -- temporarily holds such a value when it is popped from the stack
4869 -- by Fail. In all cases, Cursor is reset to a proper non-negative
4870 -- cursor value before the match proceeds (e.g. by propagating the
4871 -- failure and popping a "real" cursor value from the stack.
4873 PE_Unanchored : aliased PE := (PC_Unanchored, 0, Pat_P);
4874 -- Dummy pattern element used in the unanchored case.
4876 Region_Level : Natural := 0;
4877 -- Keeps track of recursive region level. This is used only for
4878 -- debugging, it is the number of saved history stack base values.
4880 Stack : Stack_Type;
4881 -- The pattern matching failure stack for this call to Match
4883 Stack_Ptr : Stack_Range;
4884 -- Current stack pointer. This points to the top element of the stack
4885 -- that is currently in use. At the outer level this is the special
4886 -- entry placed on the stack according to the anchor mode.
4888 Stack_Init : constant Stack_Range := Stack'First + 1;
4889 -- This is the initial value of the Stack_Ptr and Stack_Base. The
4890 -- initial (Stack'First) element of the stack is not used so that
4891 -- when we pop the last element off, Stack_Ptr is still in range.
4893 Stack_Base : Stack_Range;
4894 -- This value is the stack base value, i.e. the stack pointer for the
4895 -- first history stack entry in the current stack region. See separate
4896 -- section on handling of recursive pattern matches.
4898 Assign_OnM : Boolean := False;
4899 -- Set True if assign-on-match or write-on-match operations may be
4900 -- present in the history stack, which must then be scanned on a
4901 -- successful match.
4903 procedure Dout (Str : String);
4904 -- Output string to standard error with bars indicating region level.
4906 procedure Dout (Str : String; A : Character);
4907 -- Calls Dout with the string S ('A')
4909 procedure Dout (Str : String; A : Character_Set);
4910 -- Calls Dout with the string S ("A")
4912 procedure Dout (Str : String; A : Natural);
4913 -- Calls Dout with the string S (A)
4915 procedure Dout (Str : String; A : String);
4916 -- Calls Dout with the string S ("A")
4918 function Img (P : PE_Ptr) return String;
4919 -- Returns a string of the form #nnn where nnn is P.Index
4921 procedure Pop_Region;
4922 pragma Inline (Pop_Region);
4923 -- Used at the end of processing of an inner region. if the inner
4924 -- region left no stack entries, then all trace of it is removed.
4925 -- Otherwise a PC_Restore_Region entry is pushed to ensure proper
4926 -- handling of alternatives in the inner region.
4928 procedure Push (Node : PE_Ptr);
4929 pragma Inline (Push);
4930 -- Make entry in pattern matching stack with current cursor valeu
4932 procedure Push_Region;
4933 pragma Inline (Push_Region);
4934 -- This procedure makes a new region on the history stack. The
4935 -- caller first establishes the special entry on the stack, but
4936 -- does not push the stack pointer. Then this call stacks a
4937 -- PC_Remove_Region node, on top of this entry, using the cursor
4938 -- field of the PC_Remove_Region entry to save the outer level
4939 -- stack base value, and resets the stack base to point to this
4940 -- PC_Remove_Region node.
4942 ----------
4943 -- Dout --
4944 ----------
4946 procedure Dout (Str : String) is
4947 begin
4948 for J in 1 .. Region_Level loop
4949 Put ("| ");
4950 end loop;
4952 Put_Line (Str);
4953 end Dout;
4955 procedure Dout (Str : String; A : Character) is
4956 begin
4957 Dout (Str & " ('" & A & "')");
4958 end Dout;
4960 procedure Dout (Str : String; A : Character_Set) is
4961 begin
4962 Dout (Str & " (" & Image (To_Sequence (A)) & ')');
4963 end Dout;
4965 procedure Dout (Str : String; A : Natural) is
4966 begin
4967 Dout (Str & " (" & A & ')');
4968 end Dout;
4970 procedure Dout (Str : String; A : String) is
4971 begin
4972 Dout (Str & " (" & Image (A) & ')');
4973 end Dout;
4975 ---------
4976 -- Img --
4977 ---------
4979 function Img (P : PE_Ptr) return String is
4980 begin
4981 return "#" & Integer (P.Index) & " ";
4982 end Img;
4984 ----------------
4985 -- Pop_Region --
4986 ----------------
4988 procedure Pop_Region is
4989 begin
4990 Region_Level := Region_Level - 1;
4992 -- If nothing was pushed in the inner region, we can just get
4993 -- rid of it entirely, leaving no traces that it was ever there
4995 if Stack_Ptr = Stack_Base then
4996 Stack_Ptr := Stack_Base - 2;
4997 Stack_Base := Stack (Stack_Ptr + 2).Cursor;
4999 -- If stuff was pushed in the inner region, then we have to
5000 -- push a PC_R_Restore node so that we properly handle possible
5001 -- rematches within the region.
5003 else
5004 Stack_Ptr := Stack_Ptr + 1;
5005 Stack (Stack_Ptr).Cursor := Stack_Base;
5006 Stack (Stack_Ptr).Node := CP_R_Restore'Access;
5007 Stack_Base := Stack (Stack_Base).Cursor;
5008 end if;
5009 end Pop_Region;
5011 ----------
5012 -- Push --
5013 ----------
5015 procedure Push (Node : PE_Ptr) is
5016 begin
5017 Stack_Ptr := Stack_Ptr + 1;
5018 Stack (Stack_Ptr).Cursor := Cursor;
5019 Stack (Stack_Ptr).Node := Node;
5020 end Push;
5022 -----------------
5023 -- Push_Region --
5024 -----------------
5026 procedure Push_Region is
5027 begin
5028 Region_Level := Region_Level + 1;
5029 Stack_Ptr := Stack_Ptr + 2;
5030 Stack (Stack_Ptr).Cursor := Stack_Base;
5031 Stack (Stack_Ptr).Node := CP_R_Remove'Access;
5032 Stack_Base := Stack_Ptr;
5033 end Push_Region;
5035 -- Start of processing for XMatchD
5037 begin
5038 New_Line;
5039 Put_Line ("Initiating pattern match, subject = " & Image (Subject));
5040 Put ("--------------------------------------");
5042 for J in 1 .. Length loop
5043 Put ('-');
5044 end loop;
5046 New_Line;
5047 Put_Line ("subject length = " & Length);
5049 if Pat_P = null then
5050 Uninitialized_Pattern;
5051 end if;
5053 -- Check we have enough stack for this pattern. This check deals with
5054 -- every possibility except a match of a recursive pattern, where we
5055 -- make a check at each recursion level.
5057 if Pat_S >= Stack_Size - 1 then
5058 raise Pattern_Stack_Overflow;
5059 end if;
5061 -- In anchored mode, the bottom entry on the stack is an abort entry
5063 if Anchored_Mode then
5064 Stack (Stack_Init).Node := CP_Cancel'Access;
5065 Stack (Stack_Init).Cursor := 0;
5067 -- In unanchored more, the bottom entry on the stack references
5068 -- the special pattern element PE_Unanchored, whose Pthen field
5069 -- points to the initial pattern element. The cursor value in this
5070 -- entry is the number of anchor moves so far.
5072 else
5073 Stack (Stack_Init).Node := PE_Unanchored'Unchecked_Access;
5074 Stack (Stack_Init).Cursor := 0;
5075 end if;
5077 Stack_Ptr := Stack_Init;
5078 Stack_Base := Stack_Ptr;
5079 Cursor := 0;
5080 Node := Pat_P;
5081 goto Match;
5083 -----------------------------------------
5084 -- Main Pattern Matching State Control --
5085 -----------------------------------------
5087 -- This is a state machine which uses gotos to change state. The
5088 -- initial state is Match, to initiate the matching of the first
5089 -- element, so the goto Match above starts the match. In the
5090 -- following descriptions, we indicate the global values that
5091 -- are relevant for the state transition.
5093 -- Come here if entire match fails
5095 <<Match_Fail>>
5096 Dout ("match fails");
5097 New_Line;
5098 Start := 0;
5099 Stop := 0;
5100 return;
5102 -- Come here if entire match succeeds
5104 -- Cursor current position in subject string
5106 <<Match_Succeed>>
5107 Dout ("match succeeds");
5108 Start := Stack (Stack_Init).Cursor + 1;
5109 Stop := Cursor;
5110 Dout ("first matched character index = " & Start);
5111 Dout ("last matched character index = " & Stop);
5112 Dout ("matched substring = " & Image (Subject (Start .. Stop)));
5114 -- Scan history stack for deferred assignments or writes
5116 if Assign_OnM then
5117 for S in Stack'First .. Stack_Ptr loop
5118 if Stack (S).Node = CP_Assign'Access then
5119 declare
5120 Inner_Base : constant Stack_Range :=
5121 Stack (S + 1).Cursor;
5122 Special_Entry : constant Stack_Range :=
5123 Inner_Base - 1;
5124 Node_OnM : constant PE_Ptr :=
5125 Stack (Special_Entry).Node;
5126 Start : constant Natural :=
5127 Stack (Special_Entry).Cursor + 1;
5128 Stop : constant Natural := Stack (S).Cursor;
5130 begin
5131 if Node_OnM.Pcode = PC_Assign_OnM then
5132 Set_String (Node_OnM.VP.all, Subject (Start .. Stop));
5133 Dout
5134 (Img (Stack (S).Node) &
5135 "deferred assignment of " &
5136 Image (Subject (Start .. Stop)));
5138 elsif Node_OnM.Pcode = PC_Write_OnM then
5139 Put_Line (Node_OnM.FP.all, Subject (Start .. Stop));
5140 Dout
5141 (Img (Stack (S).Node) &
5142 "deferred write of " &
5143 Image (Subject (Start .. Stop)));
5145 else
5146 Logic_Error;
5147 end if;
5148 end;
5149 end if;
5150 end loop;
5151 end if;
5153 New_Line;
5154 return;
5156 -- Come here if attempt to match current element fails
5158 -- Stack_Base current stack base
5159 -- Stack_Ptr current stack pointer
5161 <<Fail>>
5162 Cursor := Stack (Stack_Ptr).Cursor;
5163 Node := Stack (Stack_Ptr).Node;
5164 Stack_Ptr := Stack_Ptr - 1;
5166 if Cursor >= 0 then
5167 Dout ("failure, cursor reset to " & Cursor);
5168 end if;
5170 goto Match;
5172 -- Come here if attempt to match current element succeeds
5174 -- Cursor current position in subject string
5175 -- Node pointer to node successfully matched
5176 -- Stack_Base current stack base
5177 -- Stack_Ptr current stack pointer
5179 <<Succeed>>
5180 Dout ("success, cursor = " & Cursor);
5181 Node := Node.Pthen;
5183 -- Come here to match the next pattern element
5185 -- Cursor current position in subject string
5186 -- Node pointer to node to be matched
5187 -- Stack_Base current stack base
5188 -- Stack_Ptr current stack pointer
5190 <<Match>>
5192 --------------------------------------------------
5193 -- Main Pattern Match Element Matching Routines --
5194 --------------------------------------------------
5196 -- Here is the case statement that processes the current node. The
5197 -- processing for each element does one of five things:
5199 -- goto Succeed to move to the successor
5200 -- goto Match_Succeed if the entire match succeeds
5201 -- goto Match_Fail if the entire match fails
5202 -- goto Fail to signal failure of current match
5204 -- Processing is NOT allowed to fall through
5206 case Node.Pcode is
5208 -- Cancel
5210 when PC_Cancel =>
5211 Dout (Img (Node) & "matching Cancel");
5212 goto Match_Fail;
5214 -- Alternation
5216 when PC_Alt =>
5217 Dout
5218 (Img (Node) & "setting up alternative " & Img (Node.Alt));
5219 Push (Node.Alt);
5220 Node := Node.Pthen;
5221 goto Match;
5223 -- Any (one character case)
5225 when PC_Any_CH =>
5226 Dout (Img (Node) & "matching Any", Node.Char);
5228 if Cursor < Length
5229 and then Subject (Cursor + 1) = Node.Char
5230 then
5231 Cursor := Cursor + 1;
5232 goto Succeed;
5233 else
5234 goto Fail;
5235 end if;
5237 -- Any (character set case)
5239 when PC_Any_CS =>
5240 Dout (Img (Node) & "matching Any", Node.CS);
5242 if Cursor < Length
5243 and then Is_In (Subject (Cursor + 1), Node.CS)
5244 then
5245 Cursor := Cursor + 1;
5246 goto Succeed;
5247 else
5248 goto Fail;
5249 end if;
5251 -- Any (string function case)
5253 when PC_Any_VF => declare
5254 U : constant VString := Node.VF.all;
5255 Str : constant String_Access := Get_String (U);
5257 begin
5258 Dout (Img (Node) & "matching Any", Str.all);
5260 if Cursor < Length
5261 and then Is_In (Subject (Cursor + 1), Str.all)
5262 then
5263 Cursor := Cursor + 1;
5264 goto Succeed;
5265 else
5266 goto Fail;
5267 end if;
5268 end;
5270 -- Any (string pointer case)
5272 when PC_Any_VP => declare
5273 Str : String_Access := Get_String (Node.VP.all);
5275 begin
5276 Dout (Img (Node) & "matching Any", Str.all);
5278 if Cursor < Length
5279 and then Is_In (Subject (Cursor + 1), Str.all)
5280 then
5281 Cursor := Cursor + 1;
5282 goto Succeed;
5283 else
5284 goto Fail;
5285 end if;
5286 end;
5288 -- Arb (initial match)
5290 when PC_Arb_X =>
5291 Dout (Img (Node) & "matching Arb");
5292 Push (Node.Alt);
5293 Node := Node.Pthen;
5294 goto Match;
5296 -- Arb (extension)
5298 when PC_Arb_Y =>
5299 Dout (Img (Node) & "extending Arb");
5301 if Cursor < Length then
5302 Cursor := Cursor + 1;
5303 Push (Node);
5304 goto Succeed;
5305 else
5306 goto Fail;
5307 end if;
5309 -- Arbno_S (simple Arbno initialize). This is the node that
5310 -- initiates the match of a simple Arbno structure.
5312 when PC_Arbno_S =>
5313 Dout (Img (Node) &
5314 "setting up Arbno alternative " & Img (Node.Alt));
5315 Push (Node.Alt);
5316 Node := Node.Pthen;
5317 goto Match;
5319 -- Arbno_X (Arbno initialize). This is the node that initiates
5320 -- the match of a complex Arbno structure.
5322 when PC_Arbno_X =>
5323 Dout (Img (Node) &
5324 "setting up Arbno alternative " & Img (Node.Alt));
5325 Push (Node.Alt);
5326 Node := Node.Pthen;
5327 goto Match;
5329 -- Arbno_Y (Arbno rematch). This is the node that is executed
5330 -- following successful matching of one instance of a complex
5331 -- Arbno pattern.
5333 when PC_Arbno_Y => declare
5334 Null_Match : Boolean := (Cursor = Stack (Stack_Base - 1).Cursor);
5336 begin
5337 Dout (Img (Node) & "extending Arbno");
5338 Pop_Region;
5340 -- If arbno extension matched null, then immediately fail
5342 if Null_Match then
5343 Dout ("Arbno extension matched null, so fails");
5344 goto Fail;
5345 end if;
5347 -- Here we must do a stack check to make sure enough stack
5348 -- is left. This check will happen once for each instance of
5349 -- the Arbno pattern that is matched. The Nat field of a
5350 -- PC_Arbno pattern contains the maximum stack entries needed
5351 -- for the Arbno with one instance and the successor pattern
5353 if Stack_Ptr + Node.Nat >= Stack'Last then
5354 raise Pattern_Stack_Overflow;
5355 end if;
5357 goto Succeed;
5358 end;
5360 -- Assign. If this node is executed, it means the assign-on-match
5361 -- or write-on-match operation will not happen after all, so we
5362 -- is propagate the failure, removing the PC_Assign node.
5364 when PC_Assign =>
5365 Dout (Img (Node) & "deferred assign/write cancelled");
5366 goto Fail;
5368 -- Assign immediate. This node performs the actual assignment.
5370 when PC_Assign_Imm =>
5371 Dout
5372 (Img (Node) & "executing immediate assignment of " &
5373 Image (Subject (Stack (Stack_Base - 1).Cursor + 1 .. Cursor)));
5374 Set_String
5375 (Node.VP.all,
5376 Subject (Stack (Stack_Base - 1).Cursor + 1 .. Cursor));
5377 Pop_Region;
5378 goto Succeed;
5380 -- Assign on match. This node sets up for the eventual assignment
5382 when PC_Assign_OnM =>
5383 Dout (Img (Node) & "registering deferred assignment");
5384 Stack (Stack_Base - 1).Node := Node;
5385 Push (CP_Assign'Access);
5386 Pop_Region;
5387 Assign_OnM := True;
5388 goto Succeed;
5390 -- Bal
5392 when PC_Bal =>
5393 Dout (Img (Node) & "matching or extending Bal");
5394 if Cursor >= Length or else Subject (Cursor + 1) = ')' then
5395 goto Fail;
5397 elsif Subject (Cursor + 1) = '(' then
5398 declare
5399 Paren_Count : Natural := 1;
5401 begin
5402 loop
5403 Cursor := Cursor + 1;
5405 if Cursor >= Length then
5406 goto Fail;
5408 elsif Subject (Cursor + 1) = '(' then
5409 Paren_Count := Paren_Count + 1;
5411 elsif Subject (Cursor + 1) = ')' then
5412 Paren_Count := Paren_Count - 1;
5413 exit when Paren_Count = 0;
5414 end if;
5415 end loop;
5416 end;
5417 end if;
5419 Cursor := Cursor + 1;
5420 Push (Node);
5421 goto Succeed;
5423 -- Break (one character case)
5425 when PC_Break_CH =>
5426 Dout (Img (Node) & "matching Break", Node.Char);
5428 while Cursor < Length loop
5429 if Subject (Cursor + 1) = Node.Char then
5430 goto Succeed;
5431 else
5432 Cursor := Cursor + 1;
5433 end if;
5434 end loop;
5436 goto Fail;
5438 -- Break (character set case)
5440 when PC_Break_CS =>
5441 Dout (Img (Node) & "matching Break", Node.CS);
5443 while Cursor < Length loop
5444 if Is_In (Subject (Cursor + 1), Node.CS) then
5445 goto Succeed;
5446 else
5447 Cursor := Cursor + 1;
5448 end if;
5449 end loop;
5451 goto Fail;
5453 -- Break (string function case)
5455 when PC_Break_VF => declare
5456 U : constant VString := Node.VF.all;
5457 Str : constant String_Access := Get_String (U);
5459 begin
5460 Dout (Img (Node) & "matching Break", Str.all);
5462 while Cursor < Length loop
5463 if Is_In (Subject (Cursor + 1), Str.all) then
5464 goto Succeed;
5465 else
5466 Cursor := Cursor + 1;
5467 end if;
5468 end loop;
5470 goto Fail;
5471 end;
5473 -- Break (string pointer case)
5475 when PC_Break_VP => declare
5476 Str : String_Access := Get_String (Node.VP.all);
5478 begin
5479 Dout (Img (Node) & "matching Break", Str.all);
5481 while Cursor < Length loop
5482 if Is_In (Subject (Cursor + 1), Str.all) then
5483 goto Succeed;
5484 else
5485 Cursor := Cursor + 1;
5486 end if;
5487 end loop;
5489 goto Fail;
5490 end;
5492 -- BreakX (one character case)
5494 when PC_BreakX_CH =>
5495 Dout (Img (Node) & "matching BreakX", Node.Char);
5497 while Cursor < Length loop
5498 if Subject (Cursor + 1) = Node.Char then
5499 goto Succeed;
5500 else
5501 Cursor := Cursor + 1;
5502 end if;
5503 end loop;
5505 goto Fail;
5507 -- BreakX (character set case)
5509 when PC_BreakX_CS =>
5510 Dout (Img (Node) & "matching BreakX", Node.CS);
5512 while Cursor < Length loop
5513 if Is_In (Subject (Cursor + 1), Node.CS) then
5514 goto Succeed;
5515 else
5516 Cursor := Cursor + 1;
5517 end if;
5518 end loop;
5520 goto Fail;
5522 -- BreakX (string function case)
5524 when PC_BreakX_VF => declare
5525 U : constant VString := Node.VF.all;
5526 Str : constant String_Access := Get_String (U);
5528 begin
5529 Dout (Img (Node) & "matching BreakX", Str.all);
5531 while Cursor < Length loop
5532 if Is_In (Subject (Cursor + 1), Str.all) then
5533 goto Succeed;
5534 else
5535 Cursor := Cursor + 1;
5536 end if;
5537 end loop;
5539 goto Fail;
5540 end;
5542 -- BreakX (string pointer case)
5544 when PC_BreakX_VP => declare
5545 Str : String_Access := Get_String (Node.VP.all);
5547 begin
5548 Dout (Img (Node) & "matching BreakX", Str.all);
5550 while Cursor < Length loop
5551 if Is_In (Subject (Cursor + 1), Str.all) then
5552 goto Succeed;
5553 else
5554 Cursor := Cursor + 1;
5555 end if;
5556 end loop;
5558 goto Fail;
5559 end;
5561 -- BreakX_X (BreakX extension). See section on "Compound Pattern
5562 -- Structures". This node is the alternative that is stacked
5563 -- to skip past the break character and extend the break.
5565 when PC_BreakX_X =>
5566 Dout (Img (Node) & "extending BreakX");
5568 Cursor := Cursor + 1;
5569 goto Succeed;
5571 -- Character (one character string)
5573 when PC_Char =>
5574 Dout (Img (Node) & "matching '" & Node.Char & ''');
5576 if Cursor < Length
5577 and then Subject (Cursor + 1) = Node.Char
5578 then
5579 Cursor := Cursor + 1;
5580 goto Succeed;
5581 else
5582 goto Fail;
5583 end if;
5585 -- End of Pattern
5587 when PC_EOP =>
5588 if Stack_Base = Stack_Init then
5589 Dout ("end of pattern");
5590 goto Match_Succeed;
5592 -- End of recursive inner match. See separate section on
5593 -- handing of recursive pattern matches for details.
5595 else
5596 Dout ("terminating recursive match");
5597 Node := Stack (Stack_Base - 1).Node;
5598 Pop_Region;
5599 goto Match;
5600 end if;
5602 -- Fail
5604 when PC_Fail =>
5605 Dout (Img (Node) & "matching Fail");
5606 goto Fail;
5608 -- Fence (built in pattern)
5610 when PC_Fence =>
5611 Dout (Img (Node) & "matching Fence");
5612 Push (CP_Cancel'Access);
5613 goto Succeed;
5615 -- Fence function node X. This is the node that gets control
5616 -- after a successful match of the fenced pattern.
5618 when PC_Fence_X =>
5619 Dout (Img (Node) & "matching Fence function");
5620 Stack_Ptr := Stack_Ptr + 1;
5621 Stack (Stack_Ptr).Cursor := Stack_Base;
5622 Stack (Stack_Ptr).Node := CP_Fence_Y'Access;
5623 Stack_Base := Stack (Stack_Base).Cursor;
5624 Region_Level := Region_Level - 1;
5625 goto Succeed;
5627 -- Fence function node Y. This is the node that gets control on
5628 -- a failure that occurs after the fenced pattern has matched.
5630 -- Note: the Cursor at this stage is actually the inner stack
5631 -- base value. We don't reset this, but we do use it to strip
5632 -- off all the entries made by the fenced pattern.
5634 when PC_Fence_Y =>
5635 Dout (Img (Node) & "pattern matched by Fence caused failure");
5636 Stack_Ptr := Cursor - 2;
5637 goto Fail;
5639 -- Len (integer case)
5641 when PC_Len_Nat =>
5642 Dout (Img (Node) & "matching Len", Node.Nat);
5644 if Cursor + Node.Nat > Length then
5645 goto Fail;
5646 else
5647 Cursor := Cursor + Node.Nat;
5648 goto Succeed;
5649 end if;
5651 -- Len (Integer function case)
5653 when PC_Len_NF => declare
5654 N : constant Natural := Node.NF.all;
5656 begin
5657 Dout (Img (Node) & "matching Len", N);
5659 if Cursor + N > Length then
5660 goto Fail;
5661 else
5662 Cursor := Cursor + N;
5663 goto Succeed;
5664 end if;
5665 end;
5667 -- Len (integer pointer case)
5669 when PC_Len_NP =>
5670 Dout (Img (Node) & "matching Len", Node.NP.all);
5672 if Cursor + Node.NP.all > Length then
5673 goto Fail;
5674 else
5675 Cursor := Cursor + Node.NP.all;
5676 goto Succeed;
5677 end if;
5679 -- NotAny (one character case)
5681 when PC_NotAny_CH =>
5682 Dout (Img (Node) & "matching NotAny", Node.Char);
5684 if Cursor < Length
5685 and then Subject (Cursor + 1) /= Node.Char
5686 then
5687 Cursor := Cursor + 1;
5688 goto Succeed;
5689 else
5690 goto Fail;
5691 end if;
5693 -- NotAny (character set case)
5695 when PC_NotAny_CS =>
5696 Dout (Img (Node) & "matching NotAny", Node.CS);
5698 if Cursor < Length
5699 and then not Is_In (Subject (Cursor + 1), Node.CS)
5700 then
5701 Cursor := Cursor + 1;
5702 goto Succeed;
5703 else
5704 goto Fail;
5705 end if;
5707 -- NotAny (string function case)
5709 when PC_NotAny_VF => declare
5710 U : constant VString := Node.VF.all;
5711 Str : constant String_Access := Get_String (U);
5713 begin
5714 Dout (Img (Node) & "matching NotAny", Str.all);
5716 if Cursor < Length
5717 and then
5718 not Is_In (Subject (Cursor + 1), Str.all)
5719 then
5720 Cursor := Cursor + 1;
5721 goto Succeed;
5722 else
5723 goto Fail;
5724 end if;
5725 end;
5727 -- NotAny (string pointer case)
5729 when PC_NotAny_VP => declare
5730 Str : String_Access := Get_String (Node.VP.all);
5732 begin
5733 Dout (Img (Node) & "matching NotAny", Str.all);
5735 if Cursor < Length
5736 and then
5737 not Is_In (Subject (Cursor + 1), Str.all)
5738 then
5739 Cursor := Cursor + 1;
5740 goto Succeed;
5741 else
5742 goto Fail;
5743 end if;
5744 end;
5746 -- NSpan (one character case)
5748 when PC_NSpan_CH =>
5749 Dout (Img (Node) & "matching NSpan", Node.Char);
5751 while Cursor < Length
5752 and then Subject (Cursor + 1) = Node.Char
5753 loop
5754 Cursor := Cursor + 1;
5755 end loop;
5757 goto Succeed;
5759 -- NSpan (character set case)
5761 when PC_NSpan_CS =>
5762 Dout (Img (Node) & "matching NSpan", Node.CS);
5764 while Cursor < Length
5765 and then Is_In (Subject (Cursor + 1), Node.CS)
5766 loop
5767 Cursor := Cursor + 1;
5768 end loop;
5770 goto Succeed;
5772 -- NSpan (string function case)
5774 when PC_NSpan_VF => declare
5775 U : constant VString := Node.VF.all;
5776 Str : constant String_Access := Get_String (U);
5778 begin
5779 Dout (Img (Node) & "matching NSpan", Str.all);
5781 while Cursor < Length
5782 and then Is_In (Subject (Cursor + 1), Str.all)
5783 loop
5784 Cursor := Cursor + 1;
5785 end loop;
5787 goto Succeed;
5788 end;
5790 -- NSpan (string pointer case)
5792 when PC_NSpan_VP => declare
5793 Str : String_Access := Get_String (Node.VP.all);
5795 begin
5796 Dout (Img (Node) & "matching NSpan", Str.all);
5798 while Cursor < Length
5799 and then Is_In (Subject (Cursor + 1), Str.all)
5800 loop
5801 Cursor := Cursor + 1;
5802 end loop;
5804 goto Succeed;
5805 end;
5807 when PC_Null =>
5808 Dout (Img (Node) & "matching null");
5809 goto Succeed;
5811 -- Pos (integer case)
5813 when PC_Pos_Nat =>
5814 Dout (Img (Node) & "matching Pos", Node.Nat);
5816 if Cursor = Node.Nat then
5817 goto Succeed;
5818 else
5819 goto Fail;
5820 end if;
5822 -- Pos (Integer function case)
5824 when PC_Pos_NF => declare
5825 N : constant Natural := Node.NF.all;
5827 begin
5828 Dout (Img (Node) & "matching Pos", N);
5830 if Cursor = N then
5831 goto Succeed;
5832 else
5833 goto Fail;
5834 end if;
5835 end;
5837 -- Pos (integer pointer case)
5839 when PC_Pos_NP =>
5840 Dout (Img (Node) & "matching Pos", Node.NP.all);
5842 if Cursor = Node.NP.all then
5843 goto Succeed;
5844 else
5845 goto Fail;
5846 end if;
5848 -- Predicate function
5850 when PC_Pred_Func =>
5851 Dout (Img (Node) & "matching predicate function");
5853 if Node.BF.all then
5854 goto Succeed;
5855 else
5856 goto Fail;
5857 end if;
5859 -- Region Enter. Initiate new pattern history stack region
5861 when PC_R_Enter =>
5862 Dout (Img (Node) & "starting match of nested pattern");
5863 Stack (Stack_Ptr + 1).Cursor := Cursor;
5864 Push_Region;
5865 goto Succeed;
5867 -- Region Remove node. This is the node stacked by an R_Enter.
5868 -- It removes the special format stack entry right underneath, and
5869 -- then restores the outer level stack base and signals failure.
5871 -- Note: the cursor value at this stage is actually the (negative)
5872 -- stack base value for the outer level.
5874 when PC_R_Remove =>
5875 Dout ("failure, match of nested pattern terminated");
5876 Stack_Base := Cursor;
5877 Region_Level := Region_Level - 1;
5878 Stack_Ptr := Stack_Ptr - 1;
5879 goto Fail;
5881 -- Region restore node. This is the node stacked at the end of an
5882 -- inner level match. Its function is to restore the inner level
5883 -- region, so that alternatives in this region can be sought.
5885 -- Note: the Cursor at this stage is actually the negative of the
5886 -- inner stack base value, which we use to restore the inner region.
5888 when PC_R_Restore =>
5889 Dout ("failure, search for alternatives in nested pattern");
5890 Region_Level := Region_Level + 1;
5891 Stack_Base := Cursor;
5892 goto Fail;
5894 -- Rest
5896 when PC_Rest =>
5897 Dout (Img (Node) & "matching Rest");
5898 Cursor := Length;
5899 goto Succeed;
5901 -- Initiate recursive match (pattern pointer case)
5903 when PC_Rpat =>
5904 Stack (Stack_Ptr + 1).Node := Node.Pthen;
5905 Push_Region;
5906 Dout (Img (Node) & "initiating recursive match");
5908 if Stack_Ptr + Node.PP.all.Stk >= Stack_Size then
5909 raise Pattern_Stack_Overflow;
5910 else
5911 Node := Node.PP.all.P;
5912 goto Match;
5913 end if;
5915 -- RPos (integer case)
5917 when PC_RPos_Nat =>
5918 Dout (Img (Node) & "matching RPos", Node.Nat);
5920 if Cursor = (Length - Node.Nat) then
5921 goto Succeed;
5922 else
5923 goto Fail;
5924 end if;
5926 -- RPos (integer function case)
5928 when PC_RPos_NF => declare
5929 N : constant Natural := Node.NF.all;
5931 begin
5932 Dout (Img (Node) & "matching RPos", N);
5934 if Length - Cursor = N then
5935 goto Succeed;
5936 else
5937 goto Fail;
5938 end if;
5939 end;
5941 -- RPos (integer pointer case)
5943 when PC_RPos_NP =>
5944 Dout (Img (Node) & "matching RPos", Node.NP.all);
5946 if Cursor = (Length - Node.NP.all) then
5947 goto Succeed;
5948 else
5949 goto Fail;
5950 end if;
5952 -- RTab (integer case)
5954 when PC_RTab_Nat =>
5955 Dout (Img (Node) & "matching RTab", Node.Nat);
5957 if Cursor <= (Length - Node.Nat) then
5958 Cursor := Length - Node.Nat;
5959 goto Succeed;
5960 else
5961 goto Fail;
5962 end if;
5964 -- RTab (integer function case)
5966 when PC_RTab_NF => declare
5967 N : constant Natural := Node.NF.all;
5969 begin
5970 Dout (Img (Node) & "matching RPos", N);
5972 if Length - Cursor >= N then
5973 Cursor := Length - N;
5974 goto Succeed;
5975 else
5976 goto Fail;
5977 end if;
5978 end;
5980 -- RTab (integer pointer case)
5982 when PC_RTab_NP =>
5983 Dout (Img (Node) & "matching RPos", Node.NP.all);
5985 if Cursor <= (Length - Node.NP.all) then
5986 Cursor := Length - Node.NP.all;
5987 goto Succeed;
5988 else
5989 goto Fail;
5990 end if;
5992 -- Cursor assignment
5994 when PC_Setcur =>
5995 Dout (Img (Node) & "matching Setcur");
5996 Node.Var.all := Cursor;
5997 goto Succeed;
5999 -- Span (one character case)
6001 when PC_Span_CH => declare
6002 P : Natural := Cursor;
6004 begin
6005 Dout (Img (Node) & "matching Span", Node.Char);
6007 while P < Length
6008 and then Subject (P + 1) = Node.Char
6009 loop
6010 P := P + 1;
6011 end loop;
6013 if P /= Cursor then
6014 Cursor := P;
6015 goto Succeed;
6016 else
6017 goto Fail;
6018 end if;
6019 end;
6021 -- Span (character set case)
6023 when PC_Span_CS => declare
6024 P : Natural := Cursor;
6026 begin
6027 Dout (Img (Node) & "matching Span", Node.CS);
6029 while P < Length
6030 and then Is_In (Subject (P + 1), Node.CS)
6031 loop
6032 P := P + 1;
6033 end loop;
6035 if P /= Cursor then
6036 Cursor := P;
6037 goto Succeed;
6038 else
6039 goto Fail;
6040 end if;
6041 end;
6043 -- Span (string function case)
6045 when PC_Span_VF => declare
6046 U : constant VString := Node.VF.all;
6047 Str : constant String_Access := Get_String (U);
6048 P : Natural := Cursor;
6050 begin
6051 Dout (Img (Node) & "matching Span", Str.all);
6053 while P < Length
6054 and then Is_In (Subject (P + 1), Str.all)
6055 loop
6056 P := P + 1;
6057 end loop;
6059 if P /= Cursor then
6060 Cursor := P;
6061 goto Succeed;
6062 else
6063 goto Fail;
6064 end if;
6065 end;
6067 -- Span (string pointer case)
6069 when PC_Span_VP => declare
6070 Str : String_Access := Get_String (Node.VP.all);
6071 P : Natural := Cursor;
6073 begin
6074 Dout (Img (Node) & "matching Span", Str.all);
6076 while P < Length
6077 and then Is_In (Subject (P + 1), Str.all)
6078 loop
6079 P := P + 1;
6080 end loop;
6082 if P /= Cursor then
6083 Cursor := P;
6084 goto Succeed;
6085 else
6086 goto Fail;
6087 end if;
6088 end;
6090 -- String (two character case)
6092 when PC_String_2 =>
6093 Dout (Img (Node) & "matching " & Image (Node.Str2));
6095 if (Length - Cursor) >= 2
6096 and then Subject (Cursor + 1 .. Cursor + 2) = Node.Str2
6097 then
6098 Cursor := Cursor + 2;
6099 goto Succeed;
6100 else
6101 goto Fail;
6102 end if;
6104 -- String (three character case)
6106 when PC_String_3 =>
6107 Dout (Img (Node) & "matching " & Image (Node.Str3));
6109 if (Length - Cursor) >= 3
6110 and then Subject (Cursor + 1 .. Cursor + 3) = Node.Str3
6111 then
6112 Cursor := Cursor + 3;
6113 goto Succeed;
6114 else
6115 goto Fail;
6116 end if;
6118 -- String (four character case)
6120 when PC_String_4 =>
6121 Dout (Img (Node) & "matching " & Image (Node.Str4));
6123 if (Length - Cursor) >= 4
6124 and then Subject (Cursor + 1 .. Cursor + 4) = Node.Str4
6125 then
6126 Cursor := Cursor + 4;
6127 goto Succeed;
6128 else
6129 goto Fail;
6130 end if;
6132 -- String (five character case)
6134 when PC_String_5 =>
6135 Dout (Img (Node) & "matching " & Image (Node.Str5));
6137 if (Length - Cursor) >= 5
6138 and then Subject (Cursor + 1 .. Cursor + 5) = Node.Str5
6139 then
6140 Cursor := Cursor + 5;
6141 goto Succeed;
6142 else
6143 goto Fail;
6144 end if;
6146 -- String (six character case)
6148 when PC_String_6 =>
6149 Dout (Img (Node) & "matching " & Image (Node.Str6));
6151 if (Length - Cursor) >= 6
6152 and then Subject (Cursor + 1 .. Cursor + 6) = Node.Str6
6153 then
6154 Cursor := Cursor + 6;
6155 goto Succeed;
6156 else
6157 goto Fail;
6158 end if;
6160 -- String (case of more than six characters)
6162 when PC_String => declare
6163 Len : constant Natural := Node.Str'Length;
6165 begin
6166 Dout (Img (Node) & "matching " & Image (Node.Str.all));
6168 if (Length - Cursor) >= Len
6169 and then Node.Str.all = Subject (Cursor + 1 .. Cursor + Len)
6170 then
6171 Cursor := Cursor + Len;
6172 goto Succeed;
6173 else
6174 goto Fail;
6175 end if;
6176 end;
6178 -- String (function case)
6180 when PC_String_VF => declare
6181 U : constant VString := Node.VF.all;
6182 Str : constant String_Access := Get_String (U);
6183 Len : constant Natural := Str'Length;
6185 begin
6186 Dout (Img (Node) & "matching " & Image (Str.all));
6188 if (Length - Cursor) >= Len
6189 and then Str.all = Subject (Cursor + 1 .. Cursor + Len)
6190 then
6191 Cursor := Cursor + Len;
6192 goto Succeed;
6193 else
6194 goto Fail;
6195 end if;
6196 end;
6198 -- String (vstring pointer case)
6200 when PC_String_VP => declare
6201 S : String_Access := Get_String (Node.VP.all);
6202 Len : constant Natural :=
6203 Ada.Strings.Unbounded.Length (Node.VP.all);
6205 begin
6206 Dout
6207 (Img (Node) & "matching " & Image (S.all));
6209 if (Length - Cursor) >= Len
6210 and then S.all = Subject (Cursor + 1 .. Cursor + Len)
6211 then
6212 Cursor := Cursor + Len;
6213 goto Succeed;
6214 else
6215 goto Fail;
6216 end if;
6217 end;
6219 -- Succeed
6221 when PC_Succeed =>
6222 Dout (Img (Node) & "matching Succeed");
6223 Push (Node);
6224 goto Succeed;
6226 -- Tab (integer case)
6228 when PC_Tab_Nat =>
6229 Dout (Img (Node) & "matching Tab", Node.Nat);
6231 if Cursor <= Node.Nat then
6232 Cursor := Node.Nat;
6233 goto Succeed;
6234 else
6235 goto Fail;
6236 end if;
6238 -- Tab (integer function case)
6240 when PC_Tab_NF => declare
6241 N : constant Natural := Node.NF.all;
6243 begin
6244 Dout (Img (Node) & "matching Tab ", N);
6246 if Cursor <= N then
6247 Cursor := N;
6248 goto Succeed;
6249 else
6250 goto Fail;
6251 end if;
6252 end;
6254 -- Tab (integer pointer case)
6256 when PC_Tab_NP =>
6257 Dout (Img (Node) & "matching Tab ", Node.NP.all);
6259 if Cursor <= Node.NP.all then
6260 Cursor := Node.NP.all;
6261 goto Succeed;
6262 else
6263 goto Fail;
6264 end if;
6266 -- Unanchored movement
6268 when PC_Unanchored =>
6269 Dout ("attempting to move anchor point");
6271 -- All done if we tried every position
6273 if Cursor > Length then
6274 goto Match_Fail;
6276 -- Otherwise extend the anchor point, and restack ourself
6278 else
6279 Cursor := Cursor + 1;
6280 Push (Node);
6281 goto Succeed;
6282 end if;
6284 -- Write immediate. This node performs the actual write
6286 when PC_Write_Imm =>
6287 Dout (Img (Node) & "executing immediate write of " &
6288 Subject (Stack (Stack_Base - 1).Cursor + 1 .. Cursor));
6290 Put_Line
6291 (Node.FP.all,
6292 Subject (Stack (Stack_Base - 1).Cursor + 1 .. Cursor));
6293 Pop_Region;
6294 goto Succeed;
6296 -- Write on match. This node sets up for the eventual write
6298 when PC_Write_OnM =>
6299 Dout (Img (Node) & "registering deferred write");
6300 Stack (Stack_Base - 1).Node := Node;
6301 Push (CP_Assign'Access);
6302 Pop_Region;
6303 Assign_OnM := True;
6304 goto Succeed;
6306 end case;
6308 -- We are NOT allowed to fall though this case statement, since every
6309 -- match routine must end by executing a goto to the appropriate point
6310 -- in the finite state machine model.
6312 Logic_Error;
6314 end XMatchD;
6316 end GNAT.Spitbol.Patterns;