Fix thinko
[official-gcc.git] / gcc / ada / g-spipat.adb
blobe0fa74a41fa68f182277a9c86a74f972782b14ec
1 ------------------------------------------------------------------------------
2 -- --
3 -- GNAT LIBRARY COMPONENTS --
4 -- --
5 -- G N A T . S P I T B O L . P A T T E R N S --
6 -- --
7 -- B o d y --
8 -- --
9 -- Copyright (C) 1998-2002, Ada Core Technologies, Inc. --
10 -- --
11 -- GNAT is free software; you can redistribute it and/or modify it under --
12 -- terms of the GNU General Public License as published by the Free Soft- --
13 -- ware Foundation; either version 2, or (at your option) any later ver- --
14 -- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
15 -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
16 -- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License --
17 -- for more details. You should have received a copy of the GNU General --
18 -- Public License distributed with GNAT; see file COPYING. If not, write --
19 -- to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, --
20 -- MA 02111-1307, USA. --
21 -- --
22 -- As a special exception, if other files instantiate generics from this --
23 -- unit, or you link this unit with other files to produce an executable, --
24 -- this unit does not by itself cause the resulting executable to be --
25 -- covered by the GNU General Public License. This exception does not --
26 -- however invalidate any other reasons why the executable file might be --
27 -- covered by the GNU Public License. --
28 -- --
29 -- GNAT is maintained by Ada Core Technologies Inc (http://www.gnat.com). --
30 -- --
31 ------------------------------------------------------------------------------
33 -- Note: the data structures and general approach used in this implementation
34 -- are derived from the original MINIMAL sources for SPITBOL. The code is not
35 -- a direct translation, but the approach is followed closely. In particular,
36 -- we use the one stack approach developed in the SPITBOL implementation.
38 with Ada.Exceptions; use Ada.Exceptions;
39 with Ada.Strings.Maps; use Ada.Strings.Maps;
40 with Ada.Strings.Unbounded.Aux; use Ada.Strings.Unbounded.Aux;
42 with GNAT.Debug_Utilities; use GNAT.Debug_Utilities;
44 with System; use System;
46 with Unchecked_Conversion;
47 with Unchecked_Deallocation;
49 package body GNAT.Spitbol.Patterns is
51 ------------------------
52 -- Internal Debugging --
53 ------------------------
55 Internal_Debug : constant Boolean := False;
56 -- Set this flag to True to activate some built-in debugging traceback
57 -- These are all lines output with PutD and Put_LineD.
59 procedure New_LineD;
60 pragma Inline (New_LineD);
61 -- Output new blank line with New_Line if Internal_Debug is True
63 procedure PutD (Str : String);
64 pragma Inline (PutD);
65 -- Output string with Put if Internal_Debug is True
67 procedure Put_LineD (Str : String);
68 pragma Inline (Put_LineD);
69 -- Output string with Put_Line if Internal_Debug is True
71 -----------------------------
72 -- Local Type Declarations --
73 -----------------------------
75 subtype String_Ptr is Ada.Strings.Unbounded.String_Access;
76 subtype File_Ptr is Ada.Text_IO.File_Access;
78 function To_Address is new Unchecked_Conversion (PE_Ptr, Address);
79 -- Used only for debugging output purposes
81 subtype AFC is Ada.Finalization.Controlled;
83 N : constant PE_Ptr := null;
84 -- Shorthand used to initialize Copy fields to null
86 type Natural_Ptr is access all Natural;
87 type Pattern_Ptr is access all Pattern;
89 --------------------------------------------------
90 -- Description of Algorithm and Data Structures --
91 --------------------------------------------------
93 -- A pattern structure is represented as a linked graph of nodes
94 -- with the following structure:
96 -- +------------------------------------+
97 -- I Pcode I
98 -- +------------------------------------+
99 -- I Index I
100 -- +------------------------------------+
101 -- I Pthen I
102 -- +------------------------------------+
103 -- I parameter(s) I
104 -- +------------------------------------+
106 -- Pcode is a code value indicating the type of the patterm node. This
107 -- code is used both as the discriminant value for the record, and as
108 -- the case index in the main match routine that branches to the proper
109 -- match code for the given element.
111 -- Index is a serial index number. The use of these serial index
112 -- numbers is described in a separate section.
114 -- Pthen is a pointer to the successor node, i.e the node to be matched
115 -- if the attempt to match the node succeeds. If this is the last node
116 -- of the pattern to be matched, then Pthen points to a dummy node
117 -- of kind PC_EOP (end of pattern), which initiales pattern exit.
119 -- The parameter or parameters are present for certain node types,
120 -- and the type varies with the pattern code.
122 type Pattern_Code is (
123 PC_Arb_Y,
124 PC_Assign,
125 PC_Bal,
126 PC_BreakX_X,
127 PC_Cancel,
128 PC_EOP,
129 PC_Fail,
130 PC_Fence,
131 PC_Fence_X,
132 PC_Fence_Y,
133 PC_R_Enter,
134 PC_R_Remove,
135 PC_R_Restore,
136 PC_Rest,
137 PC_Succeed,
138 PC_Unanchored,
140 PC_Alt,
141 PC_Arb_X,
142 PC_Arbno_S,
143 PC_Arbno_X,
145 PC_Rpat,
147 PC_Pred_Func,
149 PC_Assign_Imm,
150 PC_Assign_OnM,
151 PC_Any_VP,
152 PC_Break_VP,
153 PC_BreakX_VP,
154 PC_NotAny_VP,
155 PC_NSpan_VP,
156 PC_Span_VP,
157 PC_String_VP,
159 PC_Write_Imm,
160 PC_Write_OnM,
162 PC_Null,
163 PC_String,
165 PC_String_2,
166 PC_String_3,
167 PC_String_4,
168 PC_String_5,
169 PC_String_6,
171 PC_Setcur,
173 PC_Any_CH,
174 PC_Break_CH,
175 PC_BreakX_CH,
176 PC_Char,
177 PC_NotAny_CH,
178 PC_NSpan_CH,
179 PC_Span_CH,
181 PC_Any_CS,
182 PC_Break_CS,
183 PC_BreakX_CS,
184 PC_NotAny_CS,
185 PC_NSpan_CS,
186 PC_Span_CS,
188 PC_Arbno_Y,
189 PC_Len_Nat,
190 PC_Pos_Nat,
191 PC_RPos_Nat,
192 PC_RTab_Nat,
193 PC_Tab_Nat,
195 PC_Pos_NF,
196 PC_Len_NF,
197 PC_RPos_NF,
198 PC_RTab_NF,
199 PC_Tab_NF,
201 PC_Pos_NP,
202 PC_Len_NP,
203 PC_RPos_NP,
204 PC_RTab_NP,
205 PC_Tab_NP,
207 PC_Any_VF,
208 PC_Break_VF,
209 PC_BreakX_VF,
210 PC_NotAny_VF,
211 PC_NSpan_VF,
212 PC_Span_VF,
213 PC_String_VF);
215 type IndexT is range 0 .. +(2 **15 - 1);
217 type PE (Pcode : Pattern_Code) is record
219 Index : IndexT;
220 -- Serial index number of pattern element within pattern.
222 Pthen : PE_Ptr;
223 -- Successor element, to be matched after this one
225 case Pcode is
227 when PC_Arb_Y |
228 PC_Assign |
229 PC_Bal |
230 PC_BreakX_X |
231 PC_Cancel |
232 PC_EOP |
233 PC_Fail |
234 PC_Fence |
235 PC_Fence_X |
236 PC_Fence_Y |
237 PC_Null |
238 PC_R_Enter |
239 PC_R_Remove |
240 PC_R_Restore |
241 PC_Rest |
242 PC_Succeed |
243 PC_Unanchored => null;
245 when PC_Alt |
246 PC_Arb_X |
247 PC_Arbno_S |
248 PC_Arbno_X => Alt : PE_Ptr;
250 when PC_Rpat => PP : Pattern_Ptr;
252 when PC_Pred_Func => BF : Boolean_Func;
254 when PC_Assign_Imm |
255 PC_Assign_OnM |
256 PC_Any_VP |
257 PC_Break_VP |
258 PC_BreakX_VP |
259 PC_NotAny_VP |
260 PC_NSpan_VP |
261 PC_Span_VP |
262 PC_String_VP => VP : VString_Ptr;
264 when PC_Write_Imm |
265 PC_Write_OnM => FP : File_Ptr;
267 when PC_String => Str : String_Ptr;
269 when PC_String_2 => Str2 : String (1 .. 2);
271 when PC_String_3 => Str3 : String (1 .. 3);
273 when PC_String_4 => Str4 : String (1 .. 4);
275 when PC_String_5 => Str5 : String (1 .. 5);
277 when PC_String_6 => Str6 : String (1 .. 6);
279 when PC_Setcur => Var : Natural_Ptr;
281 when PC_Any_CH |
282 PC_Break_CH |
283 PC_BreakX_CH |
284 PC_Char |
285 PC_NotAny_CH |
286 PC_NSpan_CH |
287 PC_Span_CH => Char : Character;
289 when PC_Any_CS |
290 PC_Break_CS |
291 PC_BreakX_CS |
292 PC_NotAny_CS |
293 PC_NSpan_CS |
294 PC_Span_CS => CS : Character_Set;
296 when PC_Arbno_Y |
297 PC_Len_Nat |
298 PC_Pos_Nat |
299 PC_RPos_Nat |
300 PC_RTab_Nat |
301 PC_Tab_Nat => Nat : Natural;
303 when PC_Pos_NF |
304 PC_Len_NF |
305 PC_RPos_NF |
306 PC_RTab_NF |
307 PC_Tab_NF => NF : Natural_Func;
309 when PC_Pos_NP |
310 PC_Len_NP |
311 PC_RPos_NP |
312 PC_RTab_NP |
313 PC_Tab_NP => NP : Natural_Ptr;
315 when PC_Any_VF |
316 PC_Break_VF |
317 PC_BreakX_VF |
318 PC_NotAny_VF |
319 PC_NSpan_VF |
320 PC_Span_VF |
321 PC_String_VF => VF : VString_Func;
323 end case;
324 end record;
326 subtype PC_Has_Alt is Pattern_Code range PC_Alt .. PC_Arbno_X;
327 -- Range of pattern codes that has an Alt field. This is used in the
328 -- recursive traversals, since these links must be followed.
330 EOP_Element : aliased constant PE := (PC_EOP, 0, N);
331 -- This is the end of pattern element, and is thus the representation of
332 -- a null pattern. It has a zero index element since it is never placed
333 -- inside a pattern. Furthermore it does not need a successor, since it
334 -- marks the end of the pattern, so that no more successors are needed.
336 EOP : constant PE_Ptr := EOP_Element'Unrestricted_Access;
337 -- This is the end of pattern pointer, that is used in the Pthen pointer
338 -- of other nodes to signal end of pattern.
340 -- The following array is used to determine if a pattern used as an
341 -- argument for Arbno is eligible for treatment using the simple Arbno
342 -- structure (i.e. it is a pattern that is guaranteed to match at least
343 -- one character on success, and not to make any entries on the stack.
345 OK_For_Simple_Arbno :
346 array (Pattern_Code) of Boolean := (
347 PC_Any_CS |
348 PC_Any_CH |
349 PC_Any_VF |
350 PC_Any_VP |
351 PC_Char |
352 PC_Len_Nat |
353 PC_NotAny_CS |
354 PC_NotAny_CH |
355 PC_NotAny_VF |
356 PC_NotAny_VP |
357 PC_Span_CS |
358 PC_Span_CH |
359 PC_Span_VF |
360 PC_Span_VP |
361 PC_String |
362 PC_String_2 |
363 PC_String_3 |
364 PC_String_4 |
365 PC_String_5 |
366 PC_String_6 => True,
368 others => False);
370 -------------------------------
371 -- The Pattern History Stack --
372 -------------------------------
374 -- The pattern history stack is used for controlling backtracking when
375 -- a match fails. The idea is to stack entries that give a cursor value
376 -- to be restored, and a node to be reestablished as the current node to
377 -- attempt an appropriate rematch operation. The processing for a pattern
378 -- element that has rematch alternatives pushes an appropriate entry or
379 -- entry on to the stack, and the proceeds. If a match fails at any point,
380 -- the top element of the stack is popped off, resetting the cursor and
381 -- the match continues by accessing the node stored with this entry.
383 type Stack_Entry is record
385 Cursor : Integer;
386 -- Saved cursor value that is restored when this entry is popped
387 -- from the stack if a match attempt fails. Occasionally, this
388 -- field is used to store a history stack pointer instead of a
389 -- cursor. Such cases are noted in the documentation and the value
390 -- stored is negative since stack pointer values are always negative.
392 Node : PE_Ptr;
393 -- This pattern element reference is reestablished as the current
394 -- Node to be matched (which will attempt an appropriate rematch).
396 end record;
398 subtype Stack_Range is Integer range -Stack_Size .. -1;
400 type Stack_Type is array (Stack_Range) of Stack_Entry;
401 -- The type used for a history stack. The actual instance of the stack
402 -- is declared as a local variable in the Match routine, to properly
403 -- handle recursive calls to Match. All stack pointer values are negative
404 -- to distinguish them from normal cursor values.
406 -- Note: the pattern matching stack is used only to handle backtracking.
407 -- If no backtracking occurs, its entries are never accessed, and never
408 -- popped off, and in particular it is normal for a successful match
409 -- to terminate with entries on the stack that are simply discarded.
411 -- Note: in subsequent diagrams of the stack, we always place element
412 -- zero (the deepest element) at the top of the page, then build the
413 -- stack down on the page with the most recent (top of stack) element
414 -- being the bottom-most entry on the page.
416 -- Stack checking is handled by labeling every pattern with the maximum
417 -- number of stack entries that are required, so a single check at the
418 -- start of matching the pattern suffices. There are two exceptions.
420 -- First, the count does not include entries for recursive pattern
421 -- references. Such recursions must therefore perform a specific
422 -- stack check with respect to the number of stack entries required
423 -- by the recursive pattern that is accessed and the amount of stack
424 -- that remains unused.
426 -- Second, the count includes only one iteration of an Arbno pattern,
427 -- so a specific check must be made on subsequent iterations that there
428 -- is still enough stack space left. The Arbno node has a field that
429 -- records the number of stack entries required by its argument for
430 -- this purpose.
432 ---------------------------------------------------
433 -- Use of Serial Index Field in Pattern Elements --
434 ---------------------------------------------------
436 -- The serial index numbers for the pattern elements are assigned as
437 -- a pattern is consructed from its constituent elements. Note that there
438 -- is never any sharing of pattern elements between patterns (copies are
439 -- always made), so the serial index numbers are unique to a particular
440 -- pattern as referenced from the P field of a value of type Pattern.
442 -- The index numbers meet three separate invariants, which are used for
443 -- various purposes as described in this section.
445 -- First, the numbers uniquely identify the pattern elements within a
446 -- pattern. If Num is the number of elements in a given pattern, then
447 -- the serial index numbers for the elements of this pattern will range
448 -- from 1 .. Num, so that each element has a separate value.
450 -- The purpose of this assignment is to provide a convenient auxiliary
451 -- data structure mechanism during operations which must traverse a
452 -- pattern (e.g. copy and finalization processing). Once constructed
453 -- patterns are strictly read only. This is necessary to allow sharing
454 -- of patterns between tasks. This means that we cannot go marking the
455 -- pattern (e.g. with a visited bit). Instead we cosntuct a separate
456 -- vector that contains the necessary information indexed by the Index
457 -- values in the pattern elements. For this purpose the only requirement
458 -- is that they be uniquely assigned.
460 -- Second, the pattern element referenced directly, i.e. the leading
461 -- pattern element, is always the maximum numbered element and therefore
462 -- indicates the total number of elements in the pattern. More precisely,
463 -- the element referenced by the P field of a pattern value, or the
464 -- element returned by any of the internal pattern construction routines
465 -- in the body (that return a value of type PE_Ptr) always is this
466 -- maximum element,
468 -- The purpose of this requirement is to allow an immediate determination
469 -- of the number of pattern elements within a pattern. This is used to
470 -- properly size the vectors used to contain auxiliary information for
471 -- traversal as described above.
473 -- Third, as compound pattern structures are constructed, the way in which
474 -- constituent parts of the pattern are constructed is stylized. This is
475 -- an automatic consequence of the way that these compounjd structures
476 -- are constructed, and basically what we are doing is simply documenting
477 -- and specifying the natural result of the pattern construction. The
478 -- section describing compound pattern structures gives details of the
479 -- numbering of each compound pattern structure.
481 -- The purpose of specifying the stylized numbering structures for the
482 -- compound patterns is to help simplify the processing in the Image
483 -- function, since it eases the task of retrieving the original recursive
484 -- structure of the pattern from the flat graph structure of elements.
485 -- This use in the Image function is the only point at which the code
486 -- makes use of the stylized structures.
488 type Ref_Array is array (IndexT range <>) of PE_Ptr;
489 -- This type is used to build an array whose N'th entry references the
490 -- element in a pattern whose Index value is N. See Build_Ref_Array.
492 procedure Build_Ref_Array (E : PE_Ptr; RA : out Ref_Array);
493 -- Given a pattern element which is the leading element of a pattern
494 -- structure, and a Ref_Array with bounds 1 .. E.Index, fills in the
495 -- Ref_Array so that its N'th entry references the element of the
496 -- referenced pattern whose Index value is N.
498 -------------------------------
499 -- Recursive Pattern Matches --
500 -------------------------------
502 -- The pattern primitive (+P) where P is a Pattern_Ptr or Pattern_Func
503 -- causes a recursive pattern match. This cannot be handled by an actual
504 -- recursive call to the outer level Match routine, since this would not
505 -- allow for possible backtracking into the region matched by the inner
506 -- pattern. Indeed this is the classical clash between recursion and
507 -- backtracking, and a simple recursive stack structure does not suffice.
509 -- This section describes how this recursion and the possible associated
510 -- backtracking is handled. We still use a single stack, but we establish
511 -- the concept of nested regions on this stack, each of which has a stack
512 -- base value pointing to the deepest stack entry of the region. The base
513 -- value for the outer level is zero.
515 -- When a recursive match is established, two special stack entries are
516 -- made. The first entry is used to save the original node that starts
517 -- the recursive match. This is saved so that the successor field of
518 -- this node is accessible at the end of the match, but it is never
519 -- popped and executed.
521 -- The second entry corresponds to a standard new region action. A
522 -- PC_R_Remove node is stacked, whose cursor field is used to store
523 -- the outer stack base, and the stack base is reset to point to
524 -- this PC_R_Remove node. Then the recursive pattern is matched and
525 -- it can make history stack entries in the normal matter, so now
526 -- the stack looks like:
528 -- (stack entries made by outer level)
530 -- (Special entry, node is (+P) successor
531 -- cursor entry is not used)
533 -- (PC_R_Remove entry, "cursor" value is (negative) <-- Stack base
534 -- saved base value for the enclosing region)
536 -- (stack entries made by inner level)
538 -- If a subsequent failure occurs and pops the PC_R_Remove node, it
539 -- removes itself and the special entry immediately underneath it,
540 -- restores the stack base value for the enclosing region, and then
541 -- again signals failure to look for alternatives that were stacked
542 -- before the recursion was initiated.
544 -- Now we need to consider what happens if the inner pattern succeeds, as
545 -- signalled by accessing the special PC_EOP pattern primitive. First we
546 -- recognize the nested case by looking at the Base value. If this Base
547 -- value is Stack'First, then the entire match has succeeded, but if the
548 -- base value is greater than Stack'First, then we have successfully
549 -- matched an inner pattern, and processing continues at the outer level.
551 -- There are two cases. The simple case is when the inner pattern has made
552 -- no stack entries, as recognized by the fact that the current stack
553 -- pointer is equal to the current base value. In this case it is fine to
554 -- remove all trace of the recursion by restoring the outer base value and
555 -- using the special entry to find the appropriate successor node.
557 -- The more complex case arises when the inner match does make stack
558 -- entries. In this case, the PC_EOP processing stacks a special entry
559 -- whose cursor value saves the saved inner base value (the one that
560 -- references the corresponding PC_R_Remove value), and whose node
561 -- pointer references a PC_R_Restore node, so the stack looks like:
563 -- (stack entries made by outer level)
565 -- (Special entry, node is (+P) successor,
566 -- cursor entry is not used)
568 -- (PC_R_Remove entry, "cursor" value is (negative)
569 -- saved base value for the enclosing region)
571 -- (stack entries made by inner level)
573 -- (PC_Region_Replace entry, "cursor" value is (negative)
574 -- stack pointer value referencing the PC_R_Remove entry).
576 -- If the entire match succeeds, then these stack entries are, as usual,
577 -- ignored and abandoned. If on the other hand a subsequent failure
578 -- causes the PC_Region_Replace entry to be popped, it restores the
579 -- inner base value from its saved "cursor" value and then fails again.
580 -- Note that it is OK that the cursor is temporarily clobbered by this
581 -- pop, since the second failure will reestablish a proper cursor value.
583 ---------------------------------
584 -- Compound Pattern Structures --
585 ---------------------------------
587 -- This section discusses the compound structures used to represent
588 -- constructed patterns. It shows the graph structures of pattern
589 -- elements that are constructed, and in the case of patterns that
590 -- provide backtracking possibilities, describes how the history
591 -- stack is used to control the backtracking. Finally, it notes the
592 -- way in which the Index numbers are assigned to the structure.
594 -- In all diagrams, solid lines (built witth minus signs or vertical
595 -- bars, represent successor pointers (Pthen fields) with > or V used
596 -- to indicate the direction of the pointer. The initial node of the
597 -- structure is in the upper left of the diagram. A dotted line is an
598 -- alternative pointer from the element above it to the element below
599 -- it. See individual sections for details on how alternatives are used.
601 -------------------
602 -- Concatenation --
603 -------------------
605 -- In the pattern structures listed in this section, a line that looks
606 -- lile ----> with nothing to the right indicates an end of pattern
607 -- (EOP) pointer that represents the end of the match.
609 -- When a pattern concatenation (L & R) occurs, the resulting structure
610 -- is obtained by finding all such EOP pointers in L, and replacing
611 -- them to point to R. This is the most important flattening that
612 -- occurs in constructing a pattern, and it means that the pattern
613 -- matching circuitry does not have to keep track of the structure
614 -- of a pattern with respect to concatenation, since the appropriate
615 -- successor is always at hand.
617 -- Concatenation itself generates no additional possibilities for
618 -- backtracking, but the constituent patterns of the concatenated
619 -- structure will make stack entries as usual. The maximum amount
620 -- of stack required by the structure is thus simply the sum of the
621 -- maximums required by L and R.
623 -- The index numbering of a concatenation structure works by leaving
624 -- the numbering of the right hand pattern, R, unchanged and adjusting
625 -- the numbers in the left hand pattern, L up by the count of elements
626 -- in R. This ensures that the maximum numbered element is the leading
627 -- element as required (given that it was the leading element in L).
629 -----------------
630 -- Alternation --
631 -----------------
633 -- A pattern (L or R) constructs the structure:
635 -- +---+ +---+
636 -- | A |---->| L |---->
637 -- +---+ +---+
638 -- .
639 -- .
640 -- +---+
641 -- | R |---->
642 -- +---+
644 -- The A element here is a PC_Alt node, and the dotted line represents
645 -- the contents of the Alt field. When the PC_Alt element is matched,
646 -- it stacks a pointer to the leading element of R on the history stack
647 -- so that on subsequent failure, a match of R is attempted.
649 -- The A node is the higest numbered element in the pattern. The
650 -- original index numbers of R are unchanged, but the index numbers
651 -- of the L pattern are adjusted up by the count of elements in R.
653 -- Note that the difference between the index of the L leading element
654 -- the index of the R leading element (after building the alt structure)
655 -- indicates the number of nodes in L, and this is true even after the
656 -- structure is incorporated into some larger structure. For example,
657 -- if the A node has index 16, and L has index 15 and R has index
658 -- 5, then we know that L has 10 (15-5) elements in it.
660 -- Suppose that we now concatenate this structure to another pattern
661 -- with 9 elements in it. We will now have the A node with an index
662 -- of 25, L with an index of 24 and R with an index of 14. We still
663 -- know that L has 10 (24-14) elements in it, numbered 15-24, and
664 -- consequently the successor of the alternation structure has an
665 -- index with a value less than 15. This is used in Image to figure
666 -- out the original recursive structure of a pattern.
668 -- To clarify the interaction of the alternation and concatenation
669 -- structures, here is a more complex example of the structure built
670 -- for the pattern:
672 -- (V or W or X) (Y or Z)
674 -- where A,B,C,D,E are all single element patterns:
676 -- +---+ +---+ +---+ +---+
677 -- I A I---->I V I---+-->I A I---->I Y I---->
678 -- +---+ +---+ I +---+ +---+
679 -- . I .
680 -- . I .
681 -- +---+ +---+ I +---+
682 -- I A I---->I W I-->I I Z I---->
683 -- +---+ +---+ I +---+
684 -- . I
685 -- . I
686 -- +---+ I
687 -- I X I------------>+
688 -- +---+
690 -- The numbering of the nodes would be as follows:
692 -- +---+ +---+ +---+ +---+
693 -- I 8 I---->I 7 I---+-->I 3 I---->I 2 I---->
694 -- +---+ +---+ I +---+ +---+
695 -- . I .
696 -- . I .
697 -- +---+ +---+ I +---+
698 -- I 6 I---->I 5 I-->I I 1 I---->
699 -- +---+ +---+ I +---+
700 -- . I
701 -- . I
702 -- +---+ I
703 -- I 4 I------------>+
704 -- +---+
706 -- Note: The above structure actually corresponds to
708 -- (A or (B or C)) (D or E)
710 -- rather than
712 -- ((A or B) or C) (D or E)
714 -- which is the more natural interpretation, but in fact alternation
715 -- is associative, and the construction of an alternative changes the
716 -- left grouped pattern to the right grouped pattern in any case, so
717 -- that the Image function produces a more natural looking output.
719 ---------
720 -- Arb --
721 ---------
723 -- An Arb pattern builds the structure
725 -- +---+
726 -- | X |---->
727 -- +---+
728 -- .
729 -- .
730 -- +---+
731 -- | Y |---->
732 -- +---+
734 -- The X node is a PC_Arb_X node, which matches null, and stacks a
735 -- pointer to Y node, which is the PC_Arb_Y node that matches one
736 -- extra character and restacks itself.
738 -- The PC_Arb_X node is numbered 2, and the PC_Arb_Y node is 1.
740 -------------------------
741 -- Arbno (simple case) --
742 -------------------------
744 -- The simple form of Arbno can be used where the pattern always
745 -- matches at least one character if it succeeds, and it is known
746 -- not to make any history stack entries. In this case, Arbno (P)
747 -- can construct the following structure:
749 -- +-------------+
750 -- | ^
751 -- V |
752 -- +---+ |
753 -- | S |----> |
754 -- +---+ |
755 -- . |
756 -- . |
757 -- +---+ |
758 -- | P |---------->+
759 -- +---+
761 -- The S (PC_Arbno_S) node matches null stacking a pointer to the
762 -- pattern P. If a subsequent failure causes P to be matched and
763 -- this match succeeds, then node A gets restacked to try another
764 -- instance if needed by a subsequent failure.
766 -- The node numbering of the constituent pattern P is not affected.
767 -- The S node has a node number of P.Index + 1.
769 --------------------------
770 -- Arbno (complex case) --
771 --------------------------
773 -- A call to Arbno (P), where P can match null (or at least is not
774 -- known to require a non-null string) and/or P requires pattern stack
775 -- entries, constructs the following structure:
777 -- +--------------------------+
778 -- | ^
779 -- V |
780 -- +---+ |
781 -- | X |----> |
782 -- +---+ |
783 -- . |
784 -- . |
785 -- +---+ +---+ +---+ |
786 -- | E |---->| P |---->| Y |--->+
787 -- +---+ +---+ +---+
789 -- The node X (PC_Arbno_X) matches null, stacking a pointer to the
790 -- E-P-X structure used to match one Arbno instance.
792 -- Here E is the PC_R_Enter node which matches null and creates two
793 -- stack entries. The first is a special entry whose node field is
794 -- not used at all, and whose cursor field has the initial cursor.
796 -- The second entry corresponds to a standard new region action. A
797 -- PC_R_Remove node is stacked, whose cursor field is used to store
798 -- the outer stack base, and the stack base is reset to point to
799 -- this PC_R_Remove node. Then the pattern P is matched, and it can
800 -- make history stack entries in the normal manner, so now the stack
801 -- looks like:
803 -- (stack entries made before assign pattern)
805 -- (Special entry, node field not used,
806 -- used only to save initial cursor)
808 -- (PC_R_Remove entry, "cursor" value is (negative) <-- Stack Base
809 -- saved base value for the enclosing region)
811 -- (stack entries made by matching P)
813 -- If the match of P fails, then the PC_R_Remove entry is popped and
814 -- it removes both itself and the special entry underneath it,
815 -- restores the outer stack base, and signals failure.
817 -- If the match of P succeeds, then node Y, the PC_Arbno_Y node, pops
818 -- the inner region. There are two possibilities. If matching P left
819 -- no stack entries, then all traces of the inner region can be removed.
820 -- If there are stack entries, then we push an PC_Region_Replace stack
821 -- entry whose "cursor" value is the inner stack base value, and then
822 -- restore the outer stack base value, so the stack looks like:
824 -- (stack entries made before assign pattern)
826 -- (Special entry, node field not used,
827 -- used only to save initial cursor)
829 -- (PC_R_Remove entry, "cursor" value is (negative)
830 -- saved base value for the enclosing region)
832 -- (stack entries made by matching P)
834 -- (PC_Region_Replace entry, "cursor" value is (negative)
835 -- stack pointer value referencing the PC_R_Remove entry).
837 -- Now that we have matched another instance of the Arbno pattern,
838 -- we need to move to the successor. There are two cases. If the
839 -- Arbno pattern matched null, then there is no point in seeking
840 -- alternatives, since we would just match a whole bunch of nulls.
841 -- In this case we look through the alternative node, and move
842 -- directly to its successor (i.e. the successor of the Arbno
843 -- pattern). If on the other hand a non-null string was matched,
844 -- we simply follow the successor to the alternative node, which
845 -- sets up for another possible match of the Arbno pattern.
847 -- As noted in the section on stack checking, the stack count (and
848 -- hence the stack check) for a pattern includes only one iteration
849 -- of the Arbno pattern. To make sure that multiple iterations do not
850 -- overflow the stack, the Arbno node saves the stack count required
851 -- by a single iteration, and the Concat function increments this to
852 -- include stack entries required by any successor. The PC_Arbno_Y
853 -- node uses this count to ensure that sufficient stack remains
854 -- before proceeding after matching each new instance.
856 -- The node numbering of the constituent pattern P is not affected.
857 -- Where N is the number of nodes in P, the Y node is numbered N + 1,
858 -- the E node is N + 2, and the X node is N + 3.
860 ----------------------
861 -- Assign Immediate --
862 ----------------------
864 -- Immediate assignment (P * V) constructs the following structure
866 -- +---+ +---+ +---+
867 -- | E |---->| P |---->| A |---->
868 -- +---+ +---+ +---+
870 -- Here E is the PC_R_Enter node which matches null and creates two
871 -- stack entries. The first is a special entry whose node field is
872 -- not used at all, and whose cursor field has the initial cursor.
874 -- The second entry corresponds to a standard new region action. A
875 -- PC_R_Remove node is stacked, whose cursor field is used to store
876 -- the outer stack base, and the stack base is reset to point to
877 -- this PC_R_Remove node. Then the pattern P is matched, and it can
878 -- make history stack entries in the normal manner, so now the stack
879 -- looks like:
881 -- (stack entries made before assign pattern)
883 -- (Special entry, node field not used,
884 -- used only to save initial cursor)
886 -- (PC_R_Remove entry, "cursor" value is (negative) <-- Stack Base
887 -- saved base value for the enclosing region)
889 -- (stack entries made by matching P)
891 -- If the match of P fails, then the PC_R_Remove entry is popped
892 -- and it removes both itself and the special entry underneath it,
893 -- restores the outer stack base, and signals failure.
895 -- If the match of P succeeds, then node A, which is the actual
896 -- PC_Assign_Imm node, executes the assignment (using the stack
897 -- base to locate the entry with the saved starting cursor value),
898 -- and the pops the inner region. There are two possibilities, if
899 -- matching P left no stack entries, then all traces of the inner
900 -- region can be removed. If there are stack entries, then we push
901 -- an PC_Region_Replace stack entry whose "cursor" value is the
902 -- inner stack base value, and then restore the outer stack base
903 -- value, so the stack looks like:
905 -- (stack entries made before assign pattern)
907 -- (Special entry, node field not used,
908 -- used only to save initial cursor)
910 -- (PC_R_Remove entry, "cursor" value is (negative)
911 -- saved base value for the enclosing region)
913 -- (stack entries made by matching P)
915 -- (PC_Region_Replace entry, "cursor" value is the (negative)
916 -- stack pointer value referencing the PC_R_Remove entry).
918 -- If a subsequent failure occurs, the PC_Region_Replace node restores
919 -- the inner stack base value and signals failure to explore rematches
920 -- of the pattern P.
922 -- The node numbering of the constituent pattern P is not affected.
923 -- Where N is the number of nodes in P, the A node is numbered N + 1,
924 -- and the E node is N + 2.
926 ---------------------
927 -- Assign On Match --
928 ---------------------
930 -- The assign on match (**) pattern is quite similar to the assign
931 -- immediate pattern, except that the actual assignment has to be
932 -- delayed. The following structure is constructed:
934 -- +---+ +---+ +---+
935 -- | E |---->| P |---->| A |---->
936 -- +---+ +---+ +---+
938 -- The operation of this pattern is identical to that described above
939 -- for deferred assignment, up to the point where P has been matched.
941 -- The A node, which is the PC_Assign_OnM node first pushes a
942 -- PC_Assign node onto the history stack. This node saves the ending
943 -- cursor and acts as a flag for the final assignment, as further
944 -- described below.
946 -- It then stores a pointer to itself in the special entry node field.
947 -- This was otherwise unused, and is now used to retrive the address
948 -- of the variable to be assigned at the end of the pattern.
950 -- After that the inner region is terminated in the usual manner,
951 -- by stacking a PC_R_Restore entry as described for the assign
952 -- immediate case. Note that the optimization of completely
953 -- removing the inner region does not happen in this case, since
954 -- we have at least one stack entry (the PC_Assign one we just made).
955 -- The stack now looks like:
957 -- (stack entries made before assign pattern)
959 -- (Special entry, node points to copy of
960 -- the PC_Assign_OnM node, and the
961 -- cursor field saves the initial cursor).
963 -- (PC_R_Remove entry, "cursor" value is (negative)
964 -- saved base value for the enclosing region)
966 -- (stack entries made by matching P)
968 -- (PC_Assign entry, saves final cursor)
970 -- (PC_Region_Replace entry, "cursor" value is (negative)
971 -- stack pointer value referencing the PC_R_Remove entry).
973 -- If a subsequent failure causes the PC_Assign node to execute it
974 -- simply removes itself and propagates the failure.
976 -- If the match succeeds, then the history stack is scanned for
977 -- PC_Assign nodes, and the assignments are executed (examination
978 -- of the above diagram will show that all the necessary data is
979 -- at hand for the assignment).
981 -- To optimize the common case where no assign-on-match operations
982 -- are present, a global flag Assign_OnM is maintained which is
983 -- initialize to False, and gets set True as part of the execution
984 -- of the PC_Assign_OnM node. The scan of the history stack for
985 -- PC_Assign entries is done only if this flag is set.
987 -- The node numbering of the constituent pattern P is not affected.
988 -- Where N is the number of nodes in P, the A node is numbered N + 1,
989 -- and the E node is N + 2.
991 ---------
992 -- Bal --
993 ---------
995 -- Bal builds a single node:
997 -- +---+
998 -- | B |---->
999 -- +---+
1001 -- The node B is the PC_Bal node which matches a parentheses balanced
1002 -- string, starting at the current cursor position. It then updates
1003 -- the cursor past this matched string, and stacks a pointer to itself
1004 -- with this updated cursor value on the history stack, to extend the
1005 -- matched string on a subequent failure.
1007 -- Since this is a single node it is numbered 1 (the reason we include
1008 -- it in the compound patterns section is that it backtracks).
1010 ------------
1011 -- BreakX --
1012 ------------
1014 -- BreakX builds the structure
1016 -- +---+ +---+
1017 -- | B |---->| A |---->
1018 -- +---+ +---+
1019 -- ^ .
1020 -- | .
1021 -- | +---+
1022 -- +<------| X |
1023 -- +---+
1025 -- Here the B node is the BreakX_xx node that performs a normal Break
1026 -- function. The A node is an alternative (PC_Alt) node that matches
1027 -- null, but stacks a pointer to node X (the PC_BreakX_X node) which
1028 -- extends the match one character (to eat up the previously detected
1029 -- break character), and then rematches the break.
1031 -- The B node is numbered 3, the alternative node is 1, and the X
1032 -- node is 2.
1034 -----------
1035 -- Fence --
1036 -----------
1038 -- Fence builds a single node:
1040 -- +---+
1041 -- | F |---->
1042 -- +---+
1044 -- The element F, PC_Fence, matches null, and stacks a pointer to a
1045 -- PC_Cancel element which will abort the match on a subsequent failure.
1047 -- Since this is a single element it is numbered 1 (the reason we
1048 -- include it in the compound patterns section is that it backtracks).
1050 --------------------
1051 -- Fence Function --
1052 --------------------
1054 -- A call to the Fence function builds the structure:
1056 -- +---+ +---+ +---+
1057 -- | E |---->| P |---->| X |---->
1058 -- +---+ +---+ +---+
1060 -- Here E is the PC_R_Enter node which matches null and creates two
1061 -- stack entries. The first is a special entry which is not used at
1062 -- all in the fence case (it is present merely for uniformity with
1063 -- other cases of region enter operations).
1065 -- The second entry corresponds to a standard new region action. A
1066 -- PC_R_Remove node is stacked, whose cursor field is used to store
1067 -- the outer stack base, and the stack base is reset to point to
1068 -- this PC_R_Remove node. Then the pattern P is matched, and it can
1069 -- make history stack entries in the normal manner, so now the stack
1070 -- looks like:
1072 -- (stack entries made before fence pattern)
1074 -- (Special entry, not used at all)
1076 -- (PC_R_Remove entry, "cursor" value is (negative) <-- Stack Base
1077 -- saved base value for the enclosing region)
1079 -- (stack entries made by matching P)
1081 -- If the match of P fails, then the PC_R_Remove entry is popped
1082 -- and it removes both itself and the special entry underneath it,
1083 -- restores the outer stack base, and signals failure.
1085 -- If the match of P succeeds, then node X, the PC_Fence_X node, gets
1086 -- control. One might be tempted to think that at this point, the
1087 -- history stack entries made by matching P can just be removed since
1088 -- they certainly are not going to be used for rematching (that is
1089 -- whole point of Fence after all!) However, this is wrong, because
1090 -- it would result in the loss of possible assign-on-match entries
1091 -- for deferred pattern assignments.
1093 -- Instead what we do is to make a special entry whose node references
1094 -- PC_Fence_Y, and whose cursor saves the inner stack base value, i.e.
1095 -- the pointer to the PC_R_Remove entry. Then the outer stack base
1096 -- pointer is restored, so the stack looks like:
1098 -- (stack entries made before assign pattern)
1100 -- (Special entry, not used at all)
1102 -- (PC_R_Remove entry, "cursor" value is (negative)
1103 -- saved base value for the enclosing region)
1105 -- (stack entries made by matching P)
1107 -- (PC_Fence_Y entry, "cursor" value is (negative) stack
1108 -- pointer value referencing the PC_R_Remove entry).
1110 -- If a subsequent failure occurs, then the PC_Fence_Y entry removes
1111 -- the entire inner region, including all entries made by matching P,
1112 -- and alternatives prior to the Fence pattern are sought.
1114 -- The node numbering of the constituent pattern P is not affected.
1115 -- Where N is the number of nodes in P, the X node is numbered N + 1,
1116 -- and the E node is N + 2.
1118 -------------
1119 -- Succeed --
1120 -------------
1122 -- Succeed builds a single node:
1124 -- +---+
1125 -- | S |---->
1126 -- +---+
1128 -- The node S is the PC_Succeed node which matches null, and stacks
1129 -- a pointer to itself on the history stack, so that a subsequent
1130 -- failure repeats the same match.
1132 -- Since this is a single node it is numbered 1 (the reason we include
1133 -- it in the compound patterns section is that it backtracks).
1135 ---------------------
1136 -- Write Immediate --
1137 ---------------------
1139 -- The structure built for a write immediate operation (P * F, where
1140 -- F is a file access value) is:
1142 -- +---+ +---+ +---+
1143 -- | E |---->| P |---->| W |---->
1144 -- +---+ +---+ +---+
1146 -- Here E is the PC_R_Enter node and W is the PC_Write_Imm node. The
1147 -- handling is identical to that described above for Assign Immediate,
1148 -- except that at the point where a successful match occurs, the matched
1149 -- substring is written to the referenced file.
1151 -- The node numbering of the constituent pattern P is not affected.
1152 -- Where N is the number of nodes in P, the W node is numbered N + 1,
1153 -- and the E node is N + 2.
1155 --------------------
1156 -- Write On Match --
1157 --------------------
1159 -- The structure built for a write on match operation (P ** F, where
1160 -- F is a file access value) is:
1162 -- +---+ +---+ +---+
1163 -- | E |---->| P |---->| W |---->
1164 -- +---+ +---+ +---+
1166 -- Here E is the PC_R_Enter node and W is the PC_Write_OnM node. The
1167 -- handling is identical to that described above for Assign On Match,
1168 -- except that at the point where a successful match has completed,
1169 -- the matched substring is written to the referenced file.
1171 -- The node numbering of the constituent pattern P is not affected.
1172 -- Where N is the number of nodes in P, the W node is numbered N + 1,
1173 -- and the E node is N + 2.
1174 -----------------------
1175 -- Constant Patterns --
1176 -----------------------
1178 -- The following pattern elements are referenced only from the pattern
1179 -- history stack. In each case the processing for the pattern element
1180 -- results in pattern match abort, or futher failure, so there is no
1181 -- need for a successor and no need for a node number
1183 CP_Assign : aliased PE := (PC_Assign, 0, N);
1184 CP_Cancel : aliased PE := (PC_Cancel, 0, N);
1185 CP_Fence_Y : aliased PE := (PC_Fence_Y, 0, N);
1186 CP_R_Remove : aliased PE := (PC_R_Remove, 0, N);
1187 CP_R_Restore : aliased PE := (PC_R_Restore, 0, N);
1189 -----------------------
1190 -- Local Subprograms --
1191 -----------------------
1193 function Alternate (L, R : PE_Ptr) return PE_Ptr;
1194 function "or" (L, R : PE_Ptr) return PE_Ptr renames Alternate;
1195 -- Build pattern structure corresponding to the alternation of L, R.
1196 -- (i.e. try to match L, and if that fails, try to match R).
1198 function Arbno_Simple (P : PE_Ptr) return PE_Ptr;
1199 -- Build simple Arbno pattern, P is a pattern that is guaranteed to
1200 -- match at least one character if it succeeds and to require no
1201 -- stack entries under all circumstances. The result returned is
1202 -- a simple Arbno structure as previously described.
1204 function Bracket (E, P, A : PE_Ptr) return PE_Ptr;
1205 -- Given two single node pattern elements E and A, and a (possible
1206 -- complex) pattern P, construct the concatenation E-->P-->A and
1207 -- return a pointer to E. The concatenation does not affect the
1208 -- node numbering in P. A has a number one higher than the maximum
1209 -- number in P, and E has a number two higher than the maximum
1210 -- number in P (see for example the Assign_Immediate structure to
1211 -- understand a typical use of this function).
1213 function BreakX_Make (B : PE_Ptr) return Pattern;
1214 -- Given a pattern element for a Break patternx, returns the
1215 -- corresponding BreakX compound pattern structure.
1217 function Concat (L, R : PE_Ptr; Incr : Natural) return PE_Ptr;
1218 -- Creates a pattern eelement that represents a concatenation of the
1219 -- two given pattern elements (i.e. the pattern L followed by R).
1220 -- The result returned is always the same as L, but the pattern
1221 -- referenced by L is modified to have R as a successor. This
1222 -- procedure does not copy L or R, so if a copy is required, it
1223 -- is the responsibility of the caller. The Incr parameter is an
1224 -- amount to be added to the Nat field of any P_Arbno_Y node that is
1225 -- in the left operand, it represents the additional stack space
1226 -- required by the right operand.
1228 function C_To_PE (C : PChar) return PE_Ptr;
1229 -- Given a character, constructs a pattern element that matches
1230 -- the single character.
1232 function Copy (P : PE_Ptr) return PE_Ptr;
1233 -- Creates a copy of the pattern element referenced by the given
1234 -- pattern element reference. This is a deep copy, which means that
1235 -- it follows the Next and Alt pointers.
1237 function Image (P : PE_Ptr) return String;
1238 -- Returns the image of the address of the referenced pattern element.
1239 -- This is equivalent to Image (To_Address (P));
1241 function Is_In (C : Character; Str : String) return Boolean;
1242 pragma Inline (Is_In);
1243 -- Determines if the character C is in string Str.
1245 procedure Logic_Error;
1246 -- Called to raise Program_Error with an appropriate message if an
1247 -- internal logic error is detected.
1249 function Str_BF (A : Boolean_Func) return String;
1250 function Str_FP (A : File_Ptr) return String;
1251 function Str_NF (A : Natural_Func) return String;
1252 function Str_NP (A : Natural_Ptr) return String;
1253 function Str_PP (A : Pattern_Ptr) return String;
1254 function Str_VF (A : VString_Func) return String;
1255 function Str_VP (A : VString_Ptr) return String;
1256 -- These are debugging routines, which return a representation of the
1257 -- given access value (they are called only by Image and Dump)
1259 procedure Set_Successor (Pat : PE_Ptr; Succ : PE_Ptr);
1260 -- Adjusts all EOP pointers in Pat to point to Succ. No other changes
1261 -- are made. In particular, Succ is unchanged, and no index numbers
1262 -- are modified. Note that Pat may not be equal to EOP on entry.
1264 function S_To_PE (Str : PString) return PE_Ptr;
1265 -- Given a string, constructs a pattern element that matches the string
1267 procedure Uninitialized_Pattern;
1268 pragma No_Return (Uninitialized_Pattern);
1269 -- Called to raise Program_Error with an appropriate error message if
1270 -- an uninitialized pattern is used in any pattern construction or
1271 -- pattern matching operation.
1273 procedure XMatch
1274 (Subject : String;
1275 Pat_P : PE_Ptr;
1276 Pat_S : Natural;
1277 Start : out Natural;
1278 Stop : out Natural);
1279 -- This is the common pattern match routine. It is passed a string and
1280 -- a pattern, and it indicates success or failure, and on success the
1281 -- section of the string matched. It does not perform any assignments
1282 -- to the subject string, so pattern replacement is for the caller.
1284 -- Subject The subject string. The lower bound is always one. In the
1285 -- Match procedures, it is fine to use strings whose lower bound
1286 -- is not one, but we perform a one time conversion before the
1287 -- call to XMatch, so that XMatch does not have to be bothered
1288 -- with strange lower bounds.
1290 -- Pat_P Points to initial pattern element of pattern to be matched
1292 -- Pat_S Maximum required stack entries for pattern to be matched
1294 -- Start If match is successful, starting index of matched section.
1295 -- This value is always non-zero. A value of zero is used to
1296 -- indicate a failed match.
1298 -- Stop If match is successful, ending index of matched section.
1299 -- This can be zero if we match the null string at the start,
1300 -- in which case Start is set to zero, and Stop to one. If the
1301 -- Match fails, then the contents of Stop is undefined.
1303 procedure XMatchD
1304 (Subject : String;
1305 Pat_P : PE_Ptr;
1306 Pat_S : Natural;
1307 Start : out Natural;
1308 Stop : out Natural);
1309 -- Identical in all respects to XMatch, except that trace information is
1310 -- output on Standard_Output during execution of the match. This is the
1311 -- version that is called if the original Match call has Debug => True.
1313 ---------
1314 -- "&" --
1315 ---------
1317 function "&" (L : PString; R : Pattern) return Pattern is
1318 begin
1319 return (AFC with R.Stk, Concat (S_To_PE (L), Copy (R.P), R.Stk));
1320 end "&";
1322 function "&" (L : Pattern; R : PString) return Pattern is
1323 begin
1324 return (AFC with L.Stk, Concat (Copy (L.P), S_To_PE (R), 0));
1325 end "&";
1327 function "&" (L : PChar; R : Pattern) return Pattern is
1328 begin
1329 return (AFC with R.Stk, Concat (C_To_PE (L), Copy (R.P), R.Stk));
1330 end "&";
1332 function "&" (L : Pattern; R : PChar) return Pattern is
1333 begin
1334 return (AFC with L.Stk, Concat (Copy (L.P), C_To_PE (R), 0));
1335 end "&";
1337 function "&" (L : Pattern; R : Pattern) return Pattern is
1338 begin
1339 return (AFC with L.Stk + R.Stk, Concat (Copy (L.P), Copy (R.P), R.Stk));
1340 end "&";
1342 ---------
1343 -- "*" --
1344 ---------
1346 -- Assign immediate
1348 -- +---+ +---+ +---+
1349 -- | E |---->| P |---->| A |---->
1350 -- +---+ +---+ +---+
1352 -- The node numbering of the constituent pattern P is not affected.
1353 -- Where N is the number of nodes in P, the A node is numbered N + 1,
1354 -- and the E node is N + 2.
1356 function "*" (P : Pattern; Var : VString_Var) return Pattern is
1357 Pat : constant PE_Ptr := Copy (P.P);
1358 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1359 A : constant PE_Ptr :=
1360 new PE'(PC_Assign_Imm, 0, EOP, Var'Unrestricted_Access);
1362 begin
1363 return (AFC with P.Stk + 3, Bracket (E, Pat, A));
1364 end "*";
1366 function "*" (P : PString; Var : VString_Var) return Pattern is
1367 Pat : constant PE_Ptr := S_To_PE (P);
1368 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1369 A : constant PE_Ptr :=
1370 new PE'(PC_Assign_Imm, 0, EOP, Var'Unrestricted_Access);
1372 begin
1373 return (AFC with 3, Bracket (E, Pat, A));
1374 end "*";
1376 function "*" (P : PChar; Var : VString_Var) return Pattern is
1377 Pat : constant PE_Ptr := C_To_PE (P);
1378 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1379 A : constant PE_Ptr :=
1380 new PE'(PC_Assign_Imm, 0, EOP, Var'Unrestricted_Access);
1382 begin
1383 return (AFC with 3, Bracket (E, Pat, A));
1384 end "*";
1386 -- Write immediate
1388 -- +---+ +---+ +---+
1389 -- | E |---->| P |---->| W |---->
1390 -- +---+ +---+ +---+
1392 -- The node numbering of the constituent pattern P is not affected.
1393 -- Where N is the number of nodes in P, the W node is numbered N + 1,
1394 -- and the E node is N + 2.
1396 function "*" (P : Pattern; Fil : File_Access) return Pattern is
1397 Pat : constant PE_Ptr := Copy (P.P);
1398 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1399 W : constant PE_Ptr := new PE'(PC_Write_Imm, 0, EOP, Fil);
1401 begin
1402 return (AFC with 3, Bracket (E, Pat, W));
1403 end "*";
1405 function "*" (P : PString; Fil : File_Access) return Pattern is
1406 Pat : constant PE_Ptr := S_To_PE (P);
1407 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1408 W : constant PE_Ptr := new PE'(PC_Write_Imm, 0, EOP, Fil);
1410 begin
1411 return (AFC with 3, Bracket (E, Pat, W));
1412 end "*";
1414 function "*" (P : PChar; Fil : File_Access) return Pattern is
1415 Pat : constant PE_Ptr := C_To_PE (P);
1416 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1417 W : constant PE_Ptr := new PE'(PC_Write_Imm, 0, EOP, Fil);
1419 begin
1420 return (AFC with 3, Bracket (E, Pat, W));
1421 end "*";
1423 ----------
1424 -- "**" --
1425 ----------
1427 -- Assign on match
1429 -- +---+ +---+ +---+
1430 -- | E |---->| P |---->| A |---->
1431 -- +---+ +---+ +---+
1433 -- The node numbering of the constituent pattern P is not affected.
1434 -- Where N is the number of nodes in P, the A node is numbered N + 1,
1435 -- and the E node is N + 2.
1437 function "**" (P : Pattern; Var : VString_Var) return Pattern is
1438 Pat : constant PE_Ptr := Copy (P.P);
1439 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1440 A : constant PE_Ptr :=
1441 new PE'(PC_Assign_OnM, 0, EOP, Var'Unrestricted_Access);
1443 begin
1444 return (AFC with P.Stk + 3, Bracket (E, Pat, A));
1445 end "**";
1447 function "**" (P : PString; Var : VString_Var) return Pattern is
1448 Pat : constant PE_Ptr := S_To_PE (P);
1449 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1450 A : constant PE_Ptr :=
1451 new PE'(PC_Assign_OnM, 0, EOP, Var'Unrestricted_Access);
1453 begin
1454 return (AFC with 3, Bracket (E, Pat, A));
1455 end "**";
1457 function "**" (P : PChar; Var : VString_Var) return Pattern is
1458 Pat : constant PE_Ptr := C_To_PE (P);
1459 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1460 A : constant PE_Ptr :=
1461 new PE'(PC_Assign_OnM, 0, EOP, Var'Unrestricted_Access);
1463 begin
1464 return (AFC with 3, Bracket (E, Pat, A));
1465 end "**";
1467 -- Write on match
1469 -- +---+ +---+ +---+
1470 -- | E |---->| P |---->| W |---->
1471 -- +---+ +---+ +---+
1473 -- The node numbering of the constituent pattern P is not affected.
1474 -- Where N is the number of nodes in P, the W node is numbered N + 1,
1475 -- and the E node is N + 2.
1477 function "**" (P : Pattern; Fil : File_Access) return Pattern is
1478 Pat : constant PE_Ptr := Copy (P.P);
1479 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1480 W : constant PE_Ptr := new PE'(PC_Write_OnM, 0, EOP, Fil);
1482 begin
1483 return (AFC with P.Stk + 3, Bracket (E, Pat, W));
1484 end "**";
1486 function "**" (P : PString; Fil : File_Access) return Pattern is
1487 Pat : constant PE_Ptr := S_To_PE (P);
1488 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1489 W : constant PE_Ptr := new PE'(PC_Write_OnM, 0, EOP, Fil);
1491 begin
1492 return (AFC with 3, Bracket (E, Pat, W));
1493 end "**";
1495 function "**" (P : PChar; Fil : File_Access) return Pattern is
1496 Pat : constant PE_Ptr := C_To_PE (P);
1497 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1498 W : constant PE_Ptr := new PE'(PC_Write_OnM, 0, EOP, Fil);
1500 begin
1501 return (AFC with 3, Bracket (E, Pat, W));
1502 end "**";
1504 ---------
1505 -- "+" --
1506 ---------
1508 function "+" (Str : VString_Var) return Pattern is
1509 begin
1510 return
1511 (AFC with 0,
1512 new PE'(PC_String_VP, 1, EOP, Str'Unrestricted_Access));
1513 end "+";
1515 function "+" (Str : VString_Func) return Pattern is
1516 begin
1517 return (AFC with 0, new PE'(PC_String_VF, 1, EOP, Str));
1518 end "+";
1520 function "+" (P : Pattern_Var) return Pattern is
1521 begin
1522 return
1523 (AFC with 3,
1524 new PE'(PC_Rpat, 1, EOP, P'Unrestricted_Access));
1525 end "+";
1527 function "+" (P : Boolean_Func) return Pattern is
1528 begin
1529 return (AFC with 3, new PE'(PC_Pred_Func, 1, EOP, P));
1530 end "+";
1532 ----------
1533 -- "or" --
1534 ----------
1536 function "or" (L : PString; R : Pattern) return Pattern is
1537 begin
1538 return (AFC with R.Stk + 1, S_To_PE (L) or Copy (R.P));
1539 end "or";
1541 function "or" (L : Pattern; R : PString) return Pattern is
1542 begin
1543 return (AFC with L.Stk + 1, Copy (L.P) or S_To_PE (R));
1544 end "or";
1546 function "or" (L : PString; R : PString) return Pattern is
1547 begin
1548 return (AFC with 1, S_To_PE (L) or S_To_PE (R));
1549 end "or";
1551 function "or" (L : Pattern; R : Pattern) return Pattern is
1552 begin
1553 return (AFC with
1554 Natural'Max (L.Stk, R.Stk) + 1, Copy (L.P) or Copy (R.P));
1555 end "or";
1557 function "or" (L : PChar; R : Pattern) return Pattern is
1558 begin
1559 return (AFC with 1, C_To_PE (L) or Copy (R.P));
1560 end "or";
1562 function "or" (L : Pattern; R : PChar) return Pattern is
1563 begin
1564 return (AFC with 1, Copy (L.P) or C_To_PE (R));
1565 end "or";
1567 function "or" (L : PChar; R : PChar) return Pattern is
1568 begin
1569 return (AFC with 1, C_To_PE (L) or C_To_PE (R));
1570 end "or";
1572 function "or" (L : PString; R : PChar) return Pattern is
1573 begin
1574 return (AFC with 1, S_To_PE (L) or C_To_PE (R));
1575 end "or";
1577 function "or" (L : PChar; R : PString) return Pattern is
1578 begin
1579 return (AFC with 1, C_To_PE (L) or S_To_PE (R));
1580 end "or";
1582 ------------
1583 -- Adjust --
1584 ------------
1586 -- No two patterns share the same pattern elements, so the adjust
1587 -- procedure for a Pattern assignment must do a deep copy of the
1588 -- pattern element structure.
1590 procedure Adjust (Object : in out Pattern) is
1591 begin
1592 Object.P := Copy (Object.P);
1593 end Adjust;
1595 ---------------
1596 -- Alternate --
1597 ---------------
1599 function Alternate (L, R : PE_Ptr) return PE_Ptr is
1600 begin
1601 -- If the left pattern is null, then we just add the alternation
1602 -- node with an index one greater than the right hand pattern.
1604 if L = EOP then
1605 return new PE'(PC_Alt, R.Index + 1, EOP, R);
1607 -- If the left pattern is non-null, then build a reference vector
1608 -- for its elements, and adjust their index values to acccomodate
1609 -- the right hand elements. Then add the alternation node.
1611 else
1612 declare
1613 Refs : Ref_Array (1 .. L.Index);
1615 begin
1616 Build_Ref_Array (L, Refs);
1618 for J in Refs'Range loop
1619 Refs (J).Index := Refs (J).Index + R.Index;
1620 end loop;
1621 end;
1623 return new PE'(PC_Alt, L.Index + 1, L, R);
1624 end if;
1625 end Alternate;
1627 ---------
1628 -- Any --
1629 ---------
1631 function Any (Str : String) return Pattern is
1632 begin
1633 return (AFC with 0, new PE'(PC_Any_CS, 1, EOP, To_Set (Str)));
1634 end Any;
1636 function Any (Str : VString) return Pattern is
1637 begin
1638 return Any (S (Str));
1639 end Any;
1641 function Any (Str : Character) return Pattern is
1642 begin
1643 return (AFC with 0, new PE'(PC_Any_CH, 1, EOP, Str));
1644 end Any;
1646 function Any (Str : Character_Set) return Pattern is
1647 begin
1648 return (AFC with 0, new PE'(PC_Any_CS, 1, EOP, Str));
1649 end Any;
1651 function Any (Str : access VString) return Pattern is
1652 begin
1653 return (AFC with 0, new PE'(PC_Any_VP, 1, EOP, VString_Ptr (Str)));
1654 end Any;
1656 function Any (Str : VString_Func) return Pattern is
1657 begin
1658 return (AFC with 0, new PE'(PC_Any_VF, 1, EOP, Str));
1659 end Any;
1661 ---------
1662 -- Arb --
1663 ---------
1665 -- +---+
1666 -- | X |---->
1667 -- +---+
1668 -- .
1669 -- .
1670 -- +---+
1671 -- | Y |---->
1672 -- +---+
1674 -- The PC_Arb_X element is numbered 2, and the PC_Arb_Y element is 1.
1676 function Arb return Pattern is
1677 Y : constant PE_Ptr := new PE'(PC_Arb_Y, 1, EOP);
1678 X : constant PE_Ptr := new PE'(PC_Arb_X, 2, EOP, Y);
1680 begin
1681 return (AFC with 1, X);
1682 end Arb;
1684 -----------
1685 -- Arbno --
1686 -----------
1688 function Arbno (P : PString) return Pattern is
1689 begin
1690 if P'Length = 0 then
1691 return (AFC with 0, EOP);
1693 else
1694 return (AFC with 0, Arbno_Simple (S_To_PE (P)));
1695 end if;
1696 end Arbno;
1698 function Arbno (P : PChar) return Pattern is
1699 begin
1700 return (AFC with 0, Arbno_Simple (C_To_PE (P)));
1701 end Arbno;
1703 function Arbno (P : Pattern) return Pattern is
1704 Pat : constant PE_Ptr := Copy (P.P);
1706 begin
1707 if P.Stk = 0
1708 and then OK_For_Simple_Arbno (Pat.Pcode)
1709 then
1710 return (AFC with 0, Arbno_Simple (Pat));
1711 end if;
1713 -- This is the complex case, either the pattern makes stack entries
1714 -- or it is possible for the pattern to match the null string (more
1715 -- accurately, we don't know that this is not the case).
1717 -- +--------------------------+
1718 -- | ^
1719 -- V |
1720 -- +---+ |
1721 -- | X |----> |
1722 -- +---+ |
1723 -- . |
1724 -- . |
1725 -- +---+ +---+ +---+ |
1726 -- | E |---->| P |---->| Y |--->+
1727 -- +---+ +---+ +---+
1729 -- The node numbering of the constituent pattern P is not affected.
1730 -- Where N is the number of nodes in P, the Y node is numbered N + 1,
1731 -- the E node is N + 2, and the X node is N + 3.
1733 declare
1734 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1735 X : constant PE_Ptr := new PE'(PC_Arbno_X, 0, EOP, E);
1736 Y : constant PE_Ptr := new PE'(PC_Arbno_Y, 0, X, P.Stk + 3);
1737 EPY : constant PE_Ptr := Bracket (E, Pat, Y);
1739 begin
1740 X.Alt := EPY;
1741 X.Index := EPY.Index + 1;
1742 return (AFC with P.Stk + 3, X);
1743 end;
1744 end Arbno;
1746 ------------------
1747 -- Arbno_Simple --
1748 ------------------
1750 -- +-------------+
1751 -- | ^
1752 -- V |
1753 -- +---+ |
1754 -- | S |----> |
1755 -- +---+ |
1756 -- . |
1757 -- . |
1758 -- +---+ |
1759 -- | P |---------->+
1760 -- +---+
1762 -- The node numbering of the constituent pattern P is not affected.
1763 -- The S node has a node number of P.Index + 1.
1765 -- Note that we know that P cannot be EOP, because a null pattern
1766 -- does not meet the requirements for simple Arbno.
1768 function Arbno_Simple (P : PE_Ptr) return PE_Ptr is
1769 S : constant PE_Ptr := new PE'(PC_Arbno_S, P.Index + 1, EOP, P);
1771 begin
1772 Set_Successor (P, S);
1773 return S;
1774 end Arbno_Simple;
1776 ---------
1777 -- Bal --
1778 ---------
1780 function Bal return Pattern is
1781 begin
1782 return (AFC with 1, new PE'(PC_Bal, 1, EOP));
1783 end Bal;
1785 -------------
1786 -- Bracket --
1787 -------------
1789 function Bracket (E, P, A : PE_Ptr) return PE_Ptr is
1790 begin
1791 if P = EOP then
1792 E.Pthen := A;
1793 E.Index := 2;
1794 A.Index := 1;
1796 else
1797 E.Pthen := P;
1798 Set_Successor (P, A);
1799 E.Index := P.Index + 2;
1800 A.Index := P.Index + 1;
1801 end if;
1803 return E;
1804 end Bracket;
1806 -----------
1807 -- Break --
1808 -----------
1810 function Break (Str : String) return Pattern is
1811 begin
1812 return (AFC with 0, new PE'(PC_Break_CS, 1, EOP, To_Set (Str)));
1813 end Break;
1815 function Break (Str : VString) return Pattern is
1816 begin
1817 return Break (S (Str));
1818 end Break;
1820 function Break (Str : Character) return Pattern is
1821 begin
1822 return (AFC with 0, new PE'(PC_Break_CH, 1, EOP, Str));
1823 end Break;
1825 function Break (Str : Character_Set) return Pattern is
1826 begin
1827 return (AFC with 0, new PE'(PC_Break_CS, 1, EOP, Str));
1828 end Break;
1830 function Break (Str : access VString) return Pattern is
1831 begin
1832 return (AFC with 0, new PE'(PC_Break_VP, 1, EOP, VString_Ptr (Str)));
1833 end Break;
1835 function Break (Str : VString_Func) return Pattern is
1836 begin
1837 return (AFC with 0, new PE'(PC_Break_VF, 1, EOP, Str));
1838 end Break;
1840 ------------
1841 -- BreakX --
1842 ------------
1844 function BreakX (Str : String) return Pattern is
1845 begin
1846 return BreakX_Make (new PE'(PC_BreakX_CS, 3, N, To_Set (Str)));
1847 end BreakX;
1849 function BreakX (Str : VString) return Pattern is
1850 begin
1851 return BreakX (S (Str));
1852 end BreakX;
1854 function BreakX (Str : Character) return Pattern is
1855 begin
1856 return BreakX_Make (new PE'(PC_BreakX_CH, 3, N, Str));
1857 end BreakX;
1859 function BreakX (Str : Character_Set) return Pattern is
1860 begin
1861 return BreakX_Make (new PE'(PC_BreakX_CS, 3, N, Str));
1862 end BreakX;
1864 function BreakX (Str : access VString) return Pattern is
1865 begin
1866 return BreakX_Make (new PE'(PC_BreakX_VP, 3, N, VString_Ptr (Str)));
1867 end BreakX;
1869 function BreakX (Str : VString_Func) return Pattern is
1870 begin
1871 return BreakX_Make (new PE'(PC_BreakX_VF, 3, N, Str));
1872 end BreakX;
1874 -----------------
1875 -- BreakX_Make --
1876 -----------------
1878 -- +---+ +---+
1879 -- | B |---->| A |---->
1880 -- +---+ +---+
1881 -- ^ .
1882 -- | .
1883 -- | +---+
1884 -- +<------| X |
1885 -- +---+
1887 -- The B node is numbered 3, the alternative node is 1, and the X
1888 -- node is 2.
1890 function BreakX_Make (B : PE_Ptr) return Pattern is
1891 X : constant PE_Ptr := new PE'(PC_BreakX_X, 2, B);
1892 A : constant PE_Ptr := new PE'(PC_Alt, 1, EOP, X);
1894 begin
1895 B.Pthen := A;
1896 return (AFC with 2, B);
1897 end BreakX_Make;
1899 ---------------------
1900 -- Build_Ref_Array --
1901 ---------------------
1903 procedure Build_Ref_Array (E : PE_Ptr; RA : out Ref_Array) is
1905 procedure Record_PE (E : PE_Ptr);
1906 -- Record given pattern element if not already recorded in RA,
1907 -- and also record any referenced pattern elements recursively.
1909 procedure Record_PE (E : PE_Ptr) is
1910 begin
1911 PutD (" Record_PE called with PE_Ptr = " & Image (E));
1913 if E = EOP or else RA (E.Index) /= null then
1914 Put_LineD (", nothing to do");
1915 return;
1917 else
1918 Put_LineD (", recording" & IndexT'Image (E.Index));
1919 RA (E.Index) := E;
1920 Record_PE (E.Pthen);
1922 if E.Pcode in PC_Has_Alt then
1923 Record_PE (E.Alt);
1924 end if;
1925 end if;
1926 end Record_PE;
1928 -- Start of processing for Build_Ref_Array
1930 begin
1931 New_LineD;
1932 Put_LineD ("Entering Build_Ref_Array");
1933 Record_PE (E);
1934 New_LineD;
1935 end Build_Ref_Array;
1937 -------------
1938 -- C_To_PE --
1939 -------------
1941 function C_To_PE (C : PChar) return PE_Ptr is
1942 begin
1943 return new PE'(PC_Char, 1, EOP, C);
1944 end C_To_PE;
1946 ------------
1947 -- Cancel --
1948 ------------
1950 function Cancel return Pattern is
1951 begin
1952 return (AFC with 0, new PE'(PC_Cancel, 1, EOP));
1953 end Cancel;
1955 ------------
1956 -- Concat --
1957 ------------
1959 -- Concat needs to traverse the left operand performing the following
1960 -- set of fixups:
1962 -- a) Any successor pointers (Pthen fields) that are set to EOP are
1963 -- reset to point to the second operand.
1965 -- b) Any PC_Arbno_Y node has its stack count field incremented
1966 -- by the parameter Incr provided for this purpose.
1968 -- d) Num fields of all pattern elements in the left operand are
1969 -- adjusted to include the elements of the right operand.
1971 -- Note: we do not use Set_Successor in the processing for Concat, since
1972 -- there is no point in doing two traversals, we may as well do everything
1973 -- at the same time.
1975 function Concat (L, R : PE_Ptr; Incr : Natural) return PE_Ptr is
1976 begin
1977 if L = EOP then
1978 return R;
1980 elsif R = EOP then
1981 return L;
1983 else
1984 declare
1985 Refs : Ref_Array (1 .. L.Index);
1986 -- We build a reference array for L whose N'th element points to
1987 -- the pattern element of L whose original Index value is N.
1989 P : PE_Ptr;
1991 begin
1992 Build_Ref_Array (L, Refs);
1994 for J in Refs'Range loop
1995 P := Refs (J);
1997 P.Index := P.Index + R.Index;
1999 if P.Pcode = PC_Arbno_Y then
2000 P.Nat := P.Nat + Incr;
2001 end if;
2003 if P.Pthen = EOP then
2004 P.Pthen := R;
2005 end if;
2007 if P.Pcode in PC_Has_Alt and then P.Alt = EOP then
2008 P.Alt := R;
2009 end if;
2010 end loop;
2011 end;
2013 return L;
2014 end if;
2015 end Concat;
2017 ----------
2018 -- Copy --
2019 ----------
2021 function Copy (P : PE_Ptr) return PE_Ptr is
2022 begin
2023 if P = null then
2024 Uninitialized_Pattern;
2026 else
2027 declare
2028 Refs : Ref_Array (1 .. P.Index);
2029 -- References to elements in P, indexed by Index field
2031 Copy : Ref_Array (1 .. P.Index);
2032 -- Holds copies of elements of P, indexed by Index field.
2034 E : PE_Ptr;
2036 begin
2037 Build_Ref_Array (P, Refs);
2039 -- Now copy all nodes
2041 for J in Refs'Range loop
2042 Copy (J) := new PE'(Refs (J).all);
2043 end loop;
2045 -- Adjust all internal references
2047 for J in Copy'Range loop
2048 E := Copy (J);
2050 -- Adjust successor pointer to point to copy
2052 if E.Pthen /= EOP then
2053 E.Pthen := Copy (E.Pthen.Index);
2054 end if;
2056 -- Adjust Alt pointer if there is one to point to copy
2058 if E.Pcode in PC_Has_Alt and then E.Alt /= EOP then
2059 E.Alt := Copy (E.Alt.Index);
2060 end if;
2062 -- Copy referenced string
2064 if E.Pcode = PC_String then
2065 E.Str := new String'(E.Str.all);
2066 end if;
2067 end loop;
2069 return Copy (P.Index);
2070 end;
2071 end if;
2072 end Copy;
2074 ----------
2075 -- Dump --
2076 ----------
2078 procedure Dump (P : Pattern) is
2080 subtype Count is Ada.Text_IO.Count;
2081 Scol : Count;
2082 -- Used to keep track of column in dump output
2084 Refs : Ref_Array (1 .. P.P.Index);
2085 -- We build a reference array whose N'th element points to the
2086 -- pattern element whose Index value is N.
2088 Cols : Natural := 2;
2089 -- Number of columns used for pattern numbers, minimum is 2
2091 E : PE_Ptr;
2093 procedure Write_Node_Id (E : PE_Ptr);
2094 -- Writes out a string identifying the given pattern element.
2096 procedure Write_Node_Id (E : PE_Ptr) is
2097 begin
2098 if E = EOP then
2099 Put ("EOP");
2101 for J in 4 .. Cols loop
2102 Put (' ');
2103 end loop;
2105 else
2106 declare
2107 Str : String (1 .. Cols);
2108 N : Natural := Natural (E.Index);
2110 begin
2111 Put ("#");
2113 for J in reverse Str'Range loop
2114 Str (J) := Character'Val (48 + N mod 10);
2115 N := N / 10;
2116 end loop;
2118 Put (Str);
2119 end;
2120 end if;
2121 end Write_Node_Id;
2123 begin
2124 New_Line;
2125 Put ("Pattern Dump Output (pattern at " &
2126 Image (P'Address) &
2127 ", S = " & Natural'Image (P.Stk) & ')');
2129 Scol := Col;
2130 New_Line;
2132 while Col < Scol loop
2133 Put ('-');
2134 end loop;
2136 New_Line;
2138 -- If uninitialized pattern, dump line and we are done
2140 if P.P = null then
2141 Put_Line ("Uninitialized pattern value");
2142 return;
2143 end if;
2145 -- If null pattern, just dump it and we are all done
2147 if P.P = EOP then
2148 Put_Line ("EOP (null pattern)");
2149 return;
2150 end if;
2152 Build_Ref_Array (P.P, Refs);
2154 -- Set number of columns required for node numbers
2156 while 10 ** Cols - 1 < Integer (P.P.Index) loop
2157 Cols := Cols + 1;
2158 end loop;
2160 -- Now dump the nodes in reverse sequence. We output them in reverse
2161 -- sequence since this corresponds to the natural order used to
2162 -- construct the patterns.
2164 for J in reverse Refs'Range loop
2165 E := Refs (J);
2166 Write_Node_Id (E);
2167 Set_Col (Count (Cols) + 4);
2168 Put (Image (E));
2169 Put (" ");
2170 Put (Pattern_Code'Image (E.Pcode));
2171 Put (" ");
2172 Set_Col (21 + Count (Cols) + Address_Image_Length);
2173 Write_Node_Id (E.Pthen);
2174 Set_Col (24 + 2 * Count (Cols) + Address_Image_Length);
2176 case E.Pcode is
2178 when PC_Alt |
2179 PC_Arb_X |
2180 PC_Arbno_S |
2181 PC_Arbno_X =>
2182 Write_Node_Id (E.Alt);
2184 when PC_Rpat =>
2185 Put (Str_PP (E.PP));
2187 when PC_Pred_Func =>
2188 Put (Str_BF (E.BF));
2190 when PC_Assign_Imm |
2191 PC_Assign_OnM |
2192 PC_Any_VP |
2193 PC_Break_VP |
2194 PC_BreakX_VP |
2195 PC_NotAny_VP |
2196 PC_NSpan_VP |
2197 PC_Span_VP |
2198 PC_String_VP =>
2199 Put (Str_VP (E.VP));
2201 when PC_Write_Imm |
2202 PC_Write_OnM =>
2203 Put (Str_FP (E.FP));
2205 when PC_String =>
2206 Put (Image (E.Str.all));
2208 when PC_String_2 =>
2209 Put (Image (E.Str2));
2211 when PC_String_3 =>
2212 Put (Image (E.Str3));
2214 when PC_String_4 =>
2215 Put (Image (E.Str4));
2217 when PC_String_5 =>
2218 Put (Image (E.Str5));
2220 when PC_String_6 =>
2221 Put (Image (E.Str6));
2223 when PC_Setcur =>
2224 Put (Str_NP (E.Var));
2226 when PC_Any_CH |
2227 PC_Break_CH |
2228 PC_BreakX_CH |
2229 PC_Char |
2230 PC_NotAny_CH |
2231 PC_NSpan_CH |
2232 PC_Span_CH =>
2233 Put (''' & E.Char & ''');
2235 when PC_Any_CS |
2236 PC_Break_CS |
2237 PC_BreakX_CS |
2238 PC_NotAny_CS |
2239 PC_NSpan_CS |
2240 PC_Span_CS =>
2241 Put ('"' & To_Sequence (E.CS) & '"');
2243 when PC_Arbno_Y |
2244 PC_Len_Nat |
2245 PC_Pos_Nat |
2246 PC_RPos_Nat |
2247 PC_RTab_Nat |
2248 PC_Tab_Nat =>
2249 Put (S (E.Nat));
2251 when PC_Pos_NF |
2252 PC_Len_NF |
2253 PC_RPos_NF |
2254 PC_RTab_NF |
2255 PC_Tab_NF =>
2256 Put (Str_NF (E.NF));
2258 when PC_Pos_NP |
2259 PC_Len_NP |
2260 PC_RPos_NP |
2261 PC_RTab_NP |
2262 PC_Tab_NP =>
2263 Put (Str_NP (E.NP));
2265 when PC_Any_VF |
2266 PC_Break_VF |
2267 PC_BreakX_VF |
2268 PC_NotAny_VF |
2269 PC_NSpan_VF |
2270 PC_Span_VF |
2271 PC_String_VF =>
2272 Put (Str_VF (E.VF));
2274 when others => null;
2276 end case;
2278 New_Line;
2279 end loop;
2281 New_Line;
2282 end Dump;
2284 ----------
2285 -- Fail --
2286 ----------
2288 function Fail return Pattern is
2289 begin
2290 return (AFC with 0, new PE'(PC_Fail, 1, EOP));
2291 end Fail;
2293 -----------
2294 -- Fence --
2295 -----------
2297 -- Simple case
2299 function Fence return Pattern is
2300 begin
2301 return (AFC with 1, new PE'(PC_Fence, 1, EOP));
2302 end Fence;
2304 -- Function case
2306 -- +---+ +---+ +---+
2307 -- | E |---->| P |---->| X |---->
2308 -- +---+ +---+ +---+
2310 -- The node numbering of the constituent pattern P is not affected.
2311 -- Where N is the number of nodes in P, the X node is numbered N + 1,
2312 -- and the E node is N + 2.
2314 function Fence (P : Pattern) return Pattern is
2315 Pat : constant PE_Ptr := Copy (P.P);
2316 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
2317 X : constant PE_Ptr := new PE'(PC_Fence_X, 0, EOP);
2319 begin
2320 return (AFC with P.Stk + 1, Bracket (E, Pat, X));
2321 end Fence;
2323 --------------
2324 -- Finalize --
2325 --------------
2327 procedure Finalize (Object : in out Pattern) is
2329 procedure Free is new Unchecked_Deallocation (PE, PE_Ptr);
2330 procedure Free is new Unchecked_Deallocation (String, String_Ptr);
2332 begin
2333 -- Nothing to do if already freed
2335 if Object.P = null then
2336 return;
2338 -- Otherwise we must free all elements
2340 else
2341 declare
2342 Refs : Ref_Array (1 .. Object.P.Index);
2343 -- References to elements in pattern to be finalized
2345 begin
2346 Build_Ref_Array (Object.P, Refs);
2348 for J in Refs'Range loop
2349 if Refs (J).Pcode = PC_String then
2350 Free (Refs (J).Str);
2351 end if;
2353 Free (Refs (J));
2354 end loop;
2356 Object.P := null;
2357 end;
2358 end if;
2359 end Finalize;
2361 -----------
2362 -- Image --
2363 -----------
2365 function Image (P : PE_Ptr) return String is
2366 begin
2367 return Image (To_Address (P));
2368 end Image;
2370 function Image (P : Pattern) return String is
2371 begin
2372 return S (Image (P));
2373 end Image;
2375 function Image (P : Pattern) return VString is
2377 Kill_Ampersand : Boolean := False;
2378 -- Set True to delete next & to be output to Result
2380 Result : VString := Nul;
2381 -- The result is accumulated here, using Append
2383 Refs : Ref_Array (1 .. P.P.Index);
2384 -- We build a reference array whose N'th element points to the
2385 -- pattern element whose Index value is N.
2387 procedure Delete_Ampersand;
2388 -- Deletes the ampersand at the end of Result
2390 procedure Image_Seq (E : PE_Ptr; Succ : PE_Ptr; Paren : Boolean);
2391 -- E refers to a pattern structure whose successor is given by Succ.
2392 -- This procedure appends to Result a representation of this pattern.
2393 -- The Paren parameter indicates whether parentheses are required if
2394 -- the output is more than one element.
2396 procedure Image_One (E : in out PE_Ptr);
2397 -- E refers to a pattern structure. This procedure appends to Result
2398 -- a representation of the single simple or compound pattern structure
2399 -- at the start of E and updates E to point to its successor.
2401 ----------------------
2402 -- Delete_Ampersand --
2403 ----------------------
2405 procedure Delete_Ampersand is
2406 L : Natural := Length (Result);
2408 begin
2409 if L > 2 then
2410 Delete (Result, L - 1, L);
2411 end if;
2412 end Delete_Ampersand;
2414 ---------------
2415 -- Image_One --
2416 ---------------
2418 procedure Image_One (E : in out PE_Ptr) is
2420 ER : PE_Ptr := E.Pthen;
2421 -- Successor set as result in E unless reset
2423 begin
2424 case E.Pcode is
2426 when PC_Cancel =>
2427 Append (Result, "Cancel");
2429 when PC_Alt => Alt : declare
2431 Elmts_In_L : constant IndexT := E.Pthen.Index - E.Alt.Index;
2432 -- Number of elements in left pattern of alternation.
2434 Lowest_In_L : constant IndexT := E.Index - Elmts_In_L;
2435 -- Number of lowest index in elements of left pattern
2437 E1 : PE_Ptr;
2439 begin
2440 -- The successor of the alternation node must have a lower
2441 -- index than any node that is in the left pattern or a
2442 -- higher index than the alternation node itself.
2444 while ER /= EOP
2445 and then ER.Index >= Lowest_In_L
2446 and then ER.Index < E.Index
2447 loop
2448 ER := ER.Pthen;
2449 end loop;
2451 Append (Result, '(');
2453 E1 := E;
2454 loop
2455 Image_Seq (E1.Pthen, ER, False);
2456 Append (Result, " or ");
2457 E1 := E1.Alt;
2458 exit when E1.Pcode /= PC_Alt;
2459 end loop;
2461 Image_Seq (E1, ER, False);
2462 Append (Result, ')');
2463 end Alt;
2465 when PC_Any_CS =>
2466 Append (Result, "Any (" & Image (To_Sequence (E.CS)) & ')');
2468 when PC_Any_VF =>
2469 Append (Result, "Any (" & Str_VF (E.VF) & ')');
2471 when PC_Any_VP =>
2472 Append (Result, "Any (" & Str_VP (E.VP) & ')');
2474 when PC_Arb_X =>
2475 Append (Result, "Arb");
2477 when PC_Arbno_S =>
2478 Append (Result, "Arbno (");
2479 Image_Seq (E.Alt, E, False);
2480 Append (Result, ')');
2482 when PC_Arbno_X =>
2483 Append (Result, "Arbno (");
2484 Image_Seq (E.Alt.Pthen, Refs (E.Index - 2), False);
2485 Append (Result, ')');
2487 when PC_Assign_Imm =>
2488 Delete_Ampersand;
2489 Append (Result, "* " & Str_VP (Refs (E.Index - 1).VP));
2491 when PC_Assign_OnM =>
2492 Delete_Ampersand;
2493 Append (Result, "** " & Str_VP (Refs (E.Index - 1).VP));
2495 when PC_Any_CH =>
2496 Append (Result, "Any ('" & E.Char & "')");
2498 when PC_Bal =>
2499 Append (Result, "Bal");
2501 when PC_Break_CH =>
2502 Append (Result, "Break ('" & E.Char & "')");
2504 when PC_Break_CS =>
2505 Append (Result, "Break (" & Image (To_Sequence (E.CS)) & ')');
2507 when PC_Break_VF =>
2508 Append (Result, "Break (" & Str_VF (E.VF) & ')');
2510 when PC_Break_VP =>
2511 Append (Result, "Break (" & Str_VP (E.VP) & ')');
2513 when PC_BreakX_CH =>
2514 Append (Result, "BreakX ('" & E.Char & "')");
2515 ER := ER.Pthen;
2517 when PC_BreakX_CS =>
2518 Append (Result, "BreakX (" & Image (To_Sequence (E.CS)) & ')');
2519 ER := ER.Pthen;
2521 when PC_BreakX_VF =>
2522 Append (Result, "BreakX (" & Str_VF (E.VF) & ')');
2523 ER := ER.Pthen;
2525 when PC_BreakX_VP =>
2526 Append (Result, "BreakX (" & Str_VP (E.VP) & ')');
2527 ER := ER.Pthen;
2529 when PC_Char =>
2530 Append (Result, ''' & E.Char & ''');
2532 when PC_Fail =>
2533 Append (Result, "Fail");
2535 when PC_Fence =>
2536 Append (Result, "Fence");
2538 when PC_Fence_X =>
2539 Append (Result, "Fence (");
2540 Image_Seq (E.Pthen, Refs (E.Index - 1), False);
2541 Append (Result, ")");
2542 ER := Refs (E.Index - 1).Pthen;
2544 when PC_Len_Nat =>
2545 Append (Result, "Len (" & E.Nat & ')');
2547 when PC_Len_NF =>
2548 Append (Result, "Len (" & Str_NF (E.NF) & ')');
2550 when PC_Len_NP =>
2551 Append (Result, "Len (" & Str_NP (E.NP) & ')');
2553 when PC_NotAny_CH =>
2554 Append (Result, "NotAny ('" & E.Char & "')");
2556 when PC_NotAny_CS =>
2557 Append (Result, "NotAny (" & Image (To_Sequence (E.CS)) & ')');
2559 when PC_NotAny_VF =>
2560 Append (Result, "NotAny (" & Str_VF (E.VF) & ')');
2562 when PC_NotAny_VP =>
2563 Append (Result, "NotAny (" & Str_VP (E.VP) & ')');
2565 when PC_NSpan_CH =>
2566 Append (Result, "NSpan ('" & E.Char & "')");
2568 when PC_NSpan_CS =>
2569 Append (Result, "NSpan (" & Image (To_Sequence (E.CS)) & ')');
2571 when PC_NSpan_VF =>
2572 Append (Result, "NSpan (" & Str_VF (E.VF) & ')');
2574 when PC_NSpan_VP =>
2575 Append (Result, "NSpan (" & Str_VP (E.VP) & ')');
2577 when PC_Null =>
2578 Append (Result, """""");
2580 when PC_Pos_Nat =>
2581 Append (Result, "Pos (" & E.Nat & ')');
2583 when PC_Pos_NF =>
2584 Append (Result, "Pos (" & Str_NF (E.NF) & ')');
2586 when PC_Pos_NP =>
2587 Append (Result, "Pos (" & Str_NP (E.NP) & ')');
2589 when PC_R_Enter =>
2590 Kill_Ampersand := True;
2592 when PC_Rest =>
2593 Append (Result, "Rest");
2595 when PC_Rpat =>
2596 Append (Result, "(+ " & Str_PP (E.PP) & ')');
2598 when PC_Pred_Func =>
2599 Append (Result, "(+ " & Str_BF (E.BF) & ')');
2601 when PC_RPos_Nat =>
2602 Append (Result, "RPos (" & E.Nat & ')');
2604 when PC_RPos_NF =>
2605 Append (Result, "RPos (" & Str_NF (E.NF) & ')');
2607 when PC_RPos_NP =>
2608 Append (Result, "RPos (" & Str_NP (E.NP) & ')');
2610 when PC_RTab_Nat =>
2611 Append (Result, "RTab (" & E.Nat & ')');
2613 when PC_RTab_NF =>
2614 Append (Result, "RTab (" & Str_NF (E.NF) & ')');
2616 when PC_RTab_NP =>
2617 Append (Result, "RTab (" & Str_NP (E.NP) & ')');
2619 when PC_Setcur =>
2620 Append (Result, "Setcur (" & Str_NP (E.Var) & ')');
2622 when PC_Span_CH =>
2623 Append (Result, "Span ('" & E.Char & "')");
2625 when PC_Span_CS =>
2626 Append (Result, "Span (" & Image (To_Sequence (E.CS)) & ')');
2628 when PC_Span_VF =>
2629 Append (Result, "Span (" & Str_VF (E.VF) & ')');
2631 when PC_Span_VP =>
2632 Append (Result, "Span (" & Str_VP (E.VP) & ')');
2634 when PC_String =>
2635 Append (Result, Image (E.Str.all));
2637 when PC_String_2 =>
2638 Append (Result, Image (E.Str2));
2640 when PC_String_3 =>
2641 Append (Result, Image (E.Str3));
2643 when PC_String_4 =>
2644 Append (Result, Image (E.Str4));
2646 when PC_String_5 =>
2647 Append (Result, Image (E.Str5));
2649 when PC_String_6 =>
2650 Append (Result, Image (E.Str6));
2652 when PC_String_VF =>
2653 Append (Result, "(+" & Str_VF (E.VF) & ')');
2655 when PC_String_VP =>
2656 Append (Result, "(+" & Str_VP (E.VP) & ')');
2658 when PC_Succeed =>
2659 Append (Result, "Succeed");
2661 when PC_Tab_Nat =>
2662 Append (Result, "Tab (" & E.Nat & ')');
2664 when PC_Tab_NF =>
2665 Append (Result, "Tab (" & Str_NF (E.NF) & ')');
2667 when PC_Tab_NP =>
2668 Append (Result, "Tab (" & Str_NP (E.NP) & ')');
2670 when PC_Write_Imm =>
2671 Append (Result, '(');
2672 Image_Seq (E, Refs (E.Index - 1), True);
2673 Append (Result, " * " & Str_FP (Refs (E.Index - 1).FP));
2674 ER := Refs (E.Index - 1).Pthen;
2676 when PC_Write_OnM =>
2677 Append (Result, '(');
2678 Image_Seq (E.Pthen, Refs (E.Index - 1), True);
2679 Append (Result, " ** " & Str_FP (Refs (E.Index - 1).FP));
2680 ER := Refs (E.Index - 1).Pthen;
2682 -- Other pattern codes should not appear as leading elements
2684 when PC_Arb_Y |
2685 PC_Arbno_Y |
2686 PC_Assign |
2687 PC_BreakX_X |
2688 PC_EOP |
2689 PC_Fence_Y |
2690 PC_R_Remove |
2691 PC_R_Restore |
2692 PC_Unanchored =>
2693 Append (Result, "???");
2695 end case;
2697 E := ER;
2698 end Image_One;
2700 ---------------
2701 -- Image_Seq --
2702 ---------------
2704 procedure Image_Seq (E : PE_Ptr; Succ : PE_Ptr; Paren : Boolean) is
2705 E1 : PE_Ptr := E;
2706 Mult : Boolean := False;
2707 Indx : Natural := Length (Result);
2709 begin
2710 -- The image of EOP is "" (the null string)
2712 if E = EOP then
2713 Append (Result, """""");
2715 -- Else generate appropriate concatenation sequence
2717 else
2718 loop
2719 Image_One (E1);
2720 exit when E1 = Succ;
2721 exit when E1 = EOP;
2722 Mult := True;
2724 if Kill_Ampersand then
2725 Kill_Ampersand := False;
2726 else
2727 Append (Result, " & ");
2728 end if;
2729 end loop;
2730 end if;
2732 if Mult and Paren then
2733 Insert (Result, Indx + 1, "(");
2734 Append (Result, ")");
2735 end if;
2736 end Image_Seq;
2738 -- Start of processing for Image
2740 begin
2741 Build_Ref_Array (P.P, Refs);
2742 Image_Seq (P.P, EOP, False);
2743 return Result;
2744 end Image;
2746 -----------
2747 -- Is_In --
2748 -----------
2750 function Is_In (C : Character; Str : String) return Boolean is
2751 begin
2752 for J in Str'Range loop
2753 if Str (J) = C then
2754 return True;
2755 end if;
2756 end loop;
2758 return False;
2759 end Is_In;
2761 ---------
2762 -- Len --
2763 ---------
2765 function Len (Count : Natural) return Pattern is
2766 begin
2767 -- Note, the following is not just an optimization, it is needed
2768 -- to ensure that Arbno (Len (0)) does not generate an infinite
2769 -- matching loop (since PC_Len_Nat is OK_For_Simple_Arbno).
2771 if Count = 0 then
2772 return (AFC with 0, new PE'(PC_Null, 1, EOP));
2774 else
2775 return (AFC with 0, new PE'(PC_Len_Nat, 1, EOP, Count));
2776 end if;
2777 end Len;
2779 function Len (Count : Natural_Func) return Pattern is
2780 begin
2781 return (AFC with 0, new PE'(PC_Len_NF, 1, EOP, Count));
2782 end Len;
2784 function Len (Count : access Natural) return Pattern is
2785 begin
2786 return (AFC with 0, new PE'(PC_Len_NP, 1, EOP, Natural_Ptr (Count)));
2787 end Len;
2789 -----------------
2790 -- Logic_Error --
2791 -----------------
2793 procedure Logic_Error is
2794 begin
2795 Raise_Exception
2796 (Program_Error'Identity,
2797 "Internal logic error in GNAT.Spitbol.Patterns");
2798 end Logic_Error;
2800 -----------
2801 -- Match --
2802 -----------
2804 function Match
2805 (Subject : VString;
2806 Pat : Pattern)
2807 return Boolean
2809 Start, Stop : Natural;
2811 begin
2812 if Debug_Mode then
2813 XMatchD (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
2814 else
2815 XMatch (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
2816 end if;
2818 return Start /= 0;
2819 end Match;
2821 function Match
2822 (Subject : String;
2823 Pat : Pattern)
2824 return Boolean
2826 Start, Stop : Natural;
2827 subtype String1 is String (1 .. Subject'Length);
2829 begin
2830 if Debug_Mode then
2831 XMatchD (String1 (Subject), Pat.P, Pat.Stk, Start, Stop);
2832 else
2833 XMatch (String1 (Subject), Pat.P, Pat.Stk, Start, Stop);
2834 end if;
2836 return Start /= 0;
2837 end Match;
2839 function Match
2840 (Subject : VString_Var;
2841 Pat : Pattern;
2842 Replace : VString)
2843 return Boolean
2845 Start, Stop : Natural;
2847 begin
2848 if Debug_Mode then
2849 XMatchD (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
2850 else
2851 XMatch (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
2852 end if;
2854 if Start = 0 then
2855 return False;
2856 else
2857 Replace_Slice
2858 (Subject'Unrestricted_Access.all,
2859 Start, Stop, Get_String (Replace).all);
2860 return True;
2861 end if;
2862 end Match;
2864 function Match
2865 (Subject : VString_Var;
2866 Pat : Pattern;
2867 Replace : String)
2868 return Boolean
2870 Start, Stop : Natural;
2872 begin
2873 if Debug_Mode then
2874 XMatchD (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
2875 else
2876 XMatch (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
2877 end if;
2879 if Start = 0 then
2880 return False;
2881 else
2882 Replace_Slice
2883 (Subject'Unrestricted_Access.all, Start, Stop, Replace);
2884 return True;
2885 end if;
2886 end Match;
2888 procedure Match
2889 (Subject : VString;
2890 Pat : Pattern)
2892 Start, Stop : Natural;
2894 begin
2895 if Debug_Mode then
2896 XMatchD (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
2897 else
2898 XMatch (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
2899 end if;
2901 end Match;
2903 procedure Match
2904 (Subject : String;
2905 Pat : Pattern)
2907 Start, Stop : Natural;
2908 subtype String1 is String (1 .. Subject'Length);
2909 begin
2910 if Debug_Mode then
2911 XMatchD (String1 (Subject), Pat.P, Pat.Stk, Start, Stop);
2912 else
2913 XMatch (String1 (Subject), Pat.P, Pat.Stk, Start, Stop);
2914 end if;
2915 end Match;
2917 procedure Match
2918 (Subject : in out VString;
2919 Pat : Pattern;
2920 Replace : VString)
2922 Start, Stop : Natural;
2924 begin
2925 if Debug_Mode then
2926 XMatchD (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
2927 else
2928 XMatch (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
2929 end if;
2931 if Start /= 0 then
2932 Replace_Slice (Subject, Start, Stop, Get_String (Replace).all);
2933 end if;
2934 end Match;
2936 procedure Match
2937 (Subject : in out VString;
2938 Pat : Pattern;
2939 Replace : String)
2941 Start, Stop : Natural;
2943 begin
2944 if Debug_Mode then
2945 XMatchD (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
2946 else
2947 XMatch (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
2948 end if;
2950 if Start /= 0 then
2951 Replace_Slice (Subject, Start, Stop, Replace);
2952 end if;
2953 end Match;
2955 function Match
2956 (Subject : VString;
2957 Pat : PString)
2958 return Boolean
2960 Pat_Len : constant Natural := Pat'Length;
2961 Sub_Len : constant Natural := Length (Subject);
2962 Sub_Str : constant String_Access := Get_String (Subject);
2964 begin
2965 if Anchored_Mode then
2966 if Pat_Len > Sub_Len then
2967 return False;
2968 else
2969 return Pat = Sub_Str.all (1 .. Pat_Len);
2970 end if;
2972 else
2973 for J in 1 .. Sub_Len - Pat_Len + 1 loop
2974 if Pat = Sub_Str.all (J .. J + (Pat_Len - 1)) then
2975 return True;
2976 end if;
2977 end loop;
2979 return False;
2980 end if;
2981 end Match;
2983 function Match
2984 (Subject : String;
2985 Pat : PString)
2986 return Boolean
2988 Pat_Len : constant Natural := Pat'Length;
2989 Sub_Len : constant Natural := Subject'Length;
2990 SFirst : constant Natural := Subject'First;
2992 begin
2993 if Anchored_Mode then
2994 if Pat_Len > Sub_Len then
2995 return False;
2996 else
2997 return Pat = Subject (SFirst .. SFirst + Pat_Len - 1);
2998 end if;
3000 else
3001 for J in SFirst .. SFirst + Sub_Len - Pat_Len loop
3002 if Pat = Subject (J .. J + (Pat_Len - 1)) then
3003 return True;
3004 end if;
3005 end loop;
3007 return False;
3008 end if;
3009 end Match;
3011 function Match
3012 (Subject : VString_Var;
3013 Pat : PString;
3014 Replace : VString)
3015 return Boolean
3017 Start, Stop : Natural;
3019 begin
3020 if Debug_Mode then
3021 XMatchD (Get_String (Subject).all, S_To_PE (Pat), 0, Start, Stop);
3022 else
3023 XMatch (Get_String (Subject).all, S_To_PE (Pat), 0, Start, Stop);
3024 end if;
3026 if Start = 0 then
3027 return False;
3028 else
3029 Replace_Slice
3030 (Subject'Unrestricted_Access.all,
3031 Start, Stop, Get_String (Replace).all);
3032 return True;
3033 end if;
3034 end Match;
3036 function Match
3037 (Subject : VString_Var;
3038 Pat : PString;
3039 Replace : String)
3040 return Boolean
3042 Start, Stop : Natural;
3044 begin
3045 if Debug_Mode then
3046 XMatchD (Get_String (Subject).all, S_To_PE (Pat), 0, Start, Stop);
3047 else
3048 XMatch (Get_String (Subject).all, S_To_PE (Pat), 0, Start, Stop);
3049 end if;
3051 if Start = 0 then
3052 return False;
3053 else
3054 Replace_Slice
3055 (Subject'Unrestricted_Access.all, Start, Stop, Replace);
3056 return True;
3057 end if;
3058 end Match;
3060 procedure Match
3061 (Subject : VString;
3062 Pat : PString)
3064 Start, Stop : Natural;
3066 begin
3067 if Debug_Mode then
3068 XMatchD (Get_String (Subject).all, S_To_PE (Pat), 0, Start, Stop);
3069 else
3070 XMatch (Get_String (Subject).all, S_To_PE (Pat), 0, Start, Stop);
3071 end if;
3072 end Match;
3074 procedure Match
3075 (Subject : String;
3076 Pat : PString)
3078 Start, Stop : Natural;
3079 subtype String1 is String (1 .. Subject'Length);
3081 begin
3082 if Debug_Mode then
3083 XMatchD (String1 (Subject), S_To_PE (Pat), 0, Start, Stop);
3084 else
3085 XMatch (String1 (Subject), S_To_PE (Pat), 0, Start, Stop);
3086 end if;
3087 end Match;
3089 procedure Match
3090 (Subject : in out VString;
3091 Pat : PString;
3092 Replace : VString)
3094 Start, Stop : Natural;
3096 begin
3097 if Debug_Mode then
3098 XMatchD (Get_String (Subject).all, S_To_PE (Pat), 0, Start, Stop);
3099 else
3100 XMatch (Get_String (Subject).all, S_To_PE (Pat), 0, Start, Stop);
3101 end if;
3103 if Start /= 0 then
3104 Replace_Slice (Subject, Start, Stop, Get_String (Replace).all);
3105 end if;
3106 end Match;
3108 procedure Match
3109 (Subject : in out VString;
3110 Pat : PString;
3111 Replace : String)
3113 Start, Stop : Natural;
3115 begin
3116 if Debug_Mode then
3117 XMatchD (Get_String (Subject).all, S_To_PE (Pat), 0, Start, Stop);
3118 else
3119 XMatch (Get_String (Subject).all, S_To_PE (Pat), 0, Start, Stop);
3120 end if;
3122 if Start /= 0 then
3123 Replace_Slice (Subject, Start, Stop, Replace);
3124 end if;
3125 end Match;
3127 function Match
3128 (Subject : VString_Var;
3129 Pat : Pattern;
3130 Result : Match_Result_Var)
3131 return Boolean
3133 Start, Stop : Natural;
3135 begin
3136 if Debug_Mode then
3137 XMatchD (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
3138 else
3139 XMatch (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
3140 end if;
3142 if Start = 0 then
3143 Result'Unrestricted_Access.all.Var := null;
3144 return False;
3146 else
3147 Result'Unrestricted_Access.all.Var := Subject'Unrestricted_Access;
3148 Result'Unrestricted_Access.all.Start := Start;
3149 Result'Unrestricted_Access.all.Stop := Stop;
3150 return True;
3151 end if;
3152 end Match;
3154 procedure Match
3155 (Subject : in out VString;
3156 Pat : Pattern;
3157 Result : out Match_Result)
3159 Start, Stop : Natural;
3161 begin
3162 if Debug_Mode then
3163 XMatchD (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
3164 else
3165 XMatch (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
3166 end if;
3168 if Start = 0 then
3169 Result.Var := null;
3171 else
3172 Result.Var := Subject'Unrestricted_Access;
3173 Result.Start := Start;
3174 Result.Stop := Stop;
3175 end if;
3176 end Match;
3178 ---------------
3179 -- New_LineD --
3180 ---------------
3182 procedure New_LineD is
3183 begin
3184 if Internal_Debug then
3185 New_Line;
3186 end if;
3187 end New_LineD;
3189 ------------
3190 -- NotAny --
3191 ------------
3193 function NotAny (Str : String) return Pattern is
3194 begin
3195 return (AFC with 0, new PE'(PC_NotAny_CS, 1, EOP, To_Set (Str)));
3196 end NotAny;
3198 function NotAny (Str : VString) return Pattern is
3199 begin
3200 return NotAny (S (Str));
3201 end NotAny;
3203 function NotAny (Str : Character) return Pattern is
3204 begin
3205 return (AFC with 0, new PE'(PC_NotAny_CH, 1, EOP, Str));
3206 end NotAny;
3208 function NotAny (Str : Character_Set) return Pattern is
3209 begin
3210 return (AFC with 0, new PE'(PC_NotAny_CS, 1, EOP, Str));
3211 end NotAny;
3213 function NotAny (Str : access VString) return Pattern is
3214 begin
3215 return (AFC with 0, new PE'(PC_NotAny_VP, 1, EOP, VString_Ptr (Str)));
3216 end NotAny;
3218 function NotAny (Str : VString_Func) return Pattern is
3219 begin
3220 return (AFC with 0, new PE'(PC_NotAny_VF, 1, EOP, Str));
3221 end NotAny;
3223 -----------
3224 -- NSpan --
3225 -----------
3227 function NSpan (Str : String) return Pattern is
3228 begin
3229 return (AFC with 0, new PE'(PC_NSpan_CS, 1, EOP, To_Set (Str)));
3230 end NSpan;
3232 function NSpan (Str : VString) return Pattern is
3233 begin
3234 return NSpan (S (Str));
3235 end NSpan;
3237 function NSpan (Str : Character) return Pattern is
3238 begin
3239 return (AFC with 0, new PE'(PC_NSpan_CH, 1, EOP, Str));
3240 end NSpan;
3242 function NSpan (Str : Character_Set) return Pattern is
3243 begin
3244 return (AFC with 0, new PE'(PC_NSpan_CS, 1, EOP, Str));
3245 end NSpan;
3247 function NSpan (Str : access VString) return Pattern is
3248 begin
3249 return (AFC with 0, new PE'(PC_NSpan_VP, 1, EOP, VString_Ptr (Str)));
3250 end NSpan;
3252 function NSpan (Str : VString_Func) return Pattern is
3253 begin
3254 return (AFC with 0, new PE'(PC_NSpan_VF, 1, EOP, Str));
3255 end NSpan;
3257 ---------
3258 -- Pos --
3259 ---------
3261 function Pos (Count : Natural) return Pattern is
3262 begin
3263 return (AFC with 0, new PE'(PC_Pos_Nat, 1, EOP, Count));
3264 end Pos;
3266 function Pos (Count : Natural_Func) return Pattern is
3267 begin
3268 return (AFC with 0, new PE'(PC_Pos_NF, 1, EOP, Count));
3269 end Pos;
3271 function Pos (Count : access Natural) return Pattern is
3272 begin
3273 return (AFC with 0, new PE'(PC_Pos_NP, 1, EOP, Natural_Ptr (Count)));
3274 end Pos;
3276 ----------
3277 -- PutD --
3278 ----------
3280 procedure PutD (Str : String) is
3281 begin
3282 if Internal_Debug then
3283 Put (Str);
3284 end if;
3285 end PutD;
3287 ---------------
3288 -- Put_LineD --
3289 ---------------
3291 procedure Put_LineD (Str : String) is
3292 begin
3293 if Internal_Debug then
3294 Put_Line (Str);
3295 end if;
3296 end Put_LineD;
3298 -------------
3299 -- Replace --
3300 -------------
3302 procedure Replace
3303 (Result : in out Match_Result;
3304 Replace : VString)
3306 begin
3307 if Result.Var /= null then
3308 Replace_Slice
3309 (Result.Var.all,
3310 Result.Start,
3311 Result.Stop,
3312 Get_String (Replace).all);
3313 Result.Var := null;
3314 end if;
3315 end Replace;
3317 ----------
3318 -- Rest --
3319 ----------
3321 function Rest return Pattern is
3322 begin
3323 return (AFC with 0, new PE'(PC_Rest, 1, EOP));
3324 end Rest;
3326 ----------
3327 -- Rpos --
3328 ----------
3330 function Rpos (Count : Natural) return Pattern is
3331 begin
3332 return (AFC with 0, new PE'(PC_RPos_Nat, 1, EOP, Count));
3333 end Rpos;
3335 function Rpos (Count : Natural_Func) return Pattern is
3336 begin
3337 return (AFC with 0, new PE'(PC_RPos_NF, 1, EOP, Count));
3338 end Rpos;
3340 function Rpos (Count : access Natural) return Pattern is
3341 begin
3342 return (AFC with 0, new PE'(PC_RPos_NP, 1, EOP, Natural_Ptr (Count)));
3343 end Rpos;
3345 ----------
3346 -- Rtab --
3347 ----------
3349 function Rtab (Count : Natural) return Pattern is
3350 begin
3351 return (AFC with 0, new PE'(PC_RTab_Nat, 1, EOP, Count));
3352 end Rtab;
3354 function Rtab (Count : Natural_Func) return Pattern is
3355 begin
3356 return (AFC with 0, new PE'(PC_RTab_NF, 1, EOP, Count));
3357 end Rtab;
3359 function Rtab (Count : access Natural) return Pattern is
3360 begin
3361 return (AFC with 0, new PE'(PC_RTab_NP, 1, EOP, Natural_Ptr (Count)));
3362 end Rtab;
3364 -------------
3365 -- S_To_PE --
3366 -------------
3368 function S_To_PE (Str : PString) return PE_Ptr is
3369 Len : constant Natural := Str'Length;
3371 begin
3372 case Len is
3373 when 0 =>
3374 return new PE'(PC_Null, 1, EOP);
3376 when 1 =>
3377 return new PE'(PC_Char, 1, EOP, Str (1));
3379 when 2 =>
3380 return new PE'(PC_String_2, 1, EOP, Str);
3382 when 3 =>
3383 return new PE'(PC_String_3, 1, EOP, Str);
3385 when 4 =>
3386 return new PE'(PC_String_4, 1, EOP, Str);
3388 when 5 =>
3389 return new PE'(PC_String_5, 1, EOP, Str);
3391 when 6 =>
3392 return new PE'(PC_String_6, 1, EOP, Str);
3394 when others =>
3395 return new PE'(PC_String, 1, EOP, new String'(Str));
3397 end case;
3398 end S_To_PE;
3400 -------------------
3401 -- Set_Successor --
3402 -------------------
3404 -- Note: this procedure is not used by the normal concatenation circuit,
3405 -- since other fixups are required on the left operand in this case, and
3406 -- they might as well be done all together.
3408 procedure Set_Successor (Pat : PE_Ptr; Succ : PE_Ptr) is
3409 begin
3410 if Pat = null then
3411 Uninitialized_Pattern;
3413 elsif Pat = EOP then
3414 Logic_Error;
3416 else
3417 declare
3418 Refs : Ref_Array (1 .. Pat.Index);
3419 -- We build a reference array for L whose N'th element points to
3420 -- the pattern element of L whose original Index value is N.
3422 P : PE_Ptr;
3424 begin
3425 Build_Ref_Array (Pat, Refs);
3427 for J in Refs'Range loop
3428 P := Refs (J);
3430 if P.Pthen = EOP then
3431 P.Pthen := Succ;
3432 end if;
3434 if P.Pcode in PC_Has_Alt and then P.Alt = EOP then
3435 P.Alt := Succ;
3436 end if;
3437 end loop;
3438 end;
3439 end if;
3440 end Set_Successor;
3442 ------------
3443 -- Setcur --
3444 ------------
3446 function Setcur (Var : access Natural) return Pattern is
3447 begin
3448 return (AFC with 0, new PE'(PC_Setcur, 1, EOP, Natural_Ptr (Var)));
3449 end Setcur;
3451 ----------
3452 -- Span --
3453 ----------
3455 function Span (Str : String) return Pattern is
3456 begin
3457 return (AFC with 0, new PE'(PC_Span_CS, 1, EOP, To_Set (Str)));
3458 end Span;
3460 function Span (Str : VString) return Pattern is
3461 begin
3462 return Span (S (Str));
3463 end Span;
3465 function Span (Str : Character) return Pattern is
3466 begin
3467 return (AFC with 0, new PE'(PC_Span_CH, 1, EOP, Str));
3468 end Span;
3470 function Span (Str : Character_Set) return Pattern is
3471 begin
3472 return (AFC with 0, new PE'(PC_Span_CS, 1, EOP, Str));
3473 end Span;
3475 function Span (Str : access VString) return Pattern is
3476 begin
3477 return (AFC with 0, new PE'(PC_Span_VP, 1, EOP, VString_Ptr (Str)));
3478 end Span;
3480 function Span (Str : VString_Func) return Pattern is
3481 begin
3482 return (AFC with 0, new PE'(PC_Span_VF, 1, EOP, Str));
3483 end Span;
3485 ------------
3486 -- Str_BF --
3487 ------------
3489 function Str_BF (A : Boolean_Func) return String is
3490 function To_A is new Unchecked_Conversion (Boolean_Func, Address);
3492 begin
3493 return "BF(" & Image (To_A (A)) & ')';
3494 end Str_BF;
3496 ------------
3497 -- Str_FP --
3498 ------------
3500 function Str_FP (A : File_Ptr) return String is
3501 begin
3502 return "FP(" & Image (A.all'Address) & ')';
3503 end Str_FP;
3505 ------------
3506 -- Str_NF --
3507 ------------
3509 function Str_NF (A : Natural_Func) return String is
3510 function To_A is new Unchecked_Conversion (Natural_Func, Address);
3512 begin
3513 return "NF(" & Image (To_A (A)) & ')';
3514 end Str_NF;
3516 ------------
3517 -- Str_NP --
3518 ------------
3520 function Str_NP (A : Natural_Ptr) return String is
3521 begin
3522 return "NP(" & Image (A.all'Address) & ')';
3523 end Str_NP;
3525 ------------
3526 -- Str_PP --
3527 ------------
3529 function Str_PP (A : Pattern_Ptr) return String is
3530 begin
3531 return "PP(" & Image (A.all'Address) & ')';
3532 end Str_PP;
3534 ------------
3535 -- Str_VF --
3536 ------------
3538 function Str_VF (A : VString_Func) return String is
3539 function To_A is new Unchecked_Conversion (VString_Func, Address);
3541 begin
3542 return "VF(" & Image (To_A (A)) & ')';
3543 end Str_VF;
3545 ------------
3546 -- Str_VP --
3547 ------------
3549 function Str_VP (A : VString_Ptr) return String is
3550 begin
3551 return "VP(" & Image (A.all'Address) & ')';
3552 end Str_VP;
3554 -------------
3555 -- Succeed --
3556 -------------
3558 function Succeed return Pattern is
3559 begin
3560 return (AFC with 1, new PE'(PC_Succeed, 1, EOP));
3561 end Succeed;
3563 ---------
3564 -- Tab --
3565 ---------
3567 function Tab (Count : Natural) return Pattern is
3568 begin
3569 return (AFC with 0, new PE'(PC_Tab_Nat, 1, EOP, Count));
3570 end Tab;
3572 function Tab (Count : Natural_Func) return Pattern is
3573 begin
3574 return (AFC with 0, new PE'(PC_Tab_NF, 1, EOP, Count));
3575 end Tab;
3577 function Tab (Count : access Natural) return Pattern is
3578 begin
3579 return (AFC with 0, new PE'(PC_Tab_NP, 1, EOP, Natural_Ptr (Count)));
3580 end Tab;
3582 ---------------------------
3583 -- Uninitialized_Pattern --
3584 ---------------------------
3586 procedure Uninitialized_Pattern is
3587 begin
3588 Raise_Exception
3589 (Program_Error'Identity,
3590 "uninitialized value of type GNAT.Spitbol.Patterns.Pattern");
3591 end Uninitialized_Pattern;
3593 ------------
3594 -- XMatch --
3595 ------------
3597 procedure XMatch
3598 (Subject : String;
3599 Pat_P : PE_Ptr;
3600 Pat_S : Natural;
3601 Start : out Natural;
3602 Stop : out Natural)
3604 Node : PE_Ptr;
3605 -- Pointer to current pattern node. Initialized from Pat_P, and then
3606 -- updated as the match proceeds through its constituent elements.
3608 Length : constant Natural := Subject'Length;
3609 -- Length of string (= Subject'Last, since Subject'First is always 1)
3611 Cursor : Integer := 0;
3612 -- If the value is non-negative, then this value is the index showing
3613 -- the current position of the match in the subject string. The next
3614 -- character to be matched is at Subject (Cursor + 1). Note that since
3615 -- our view of the subject string in XMatch always has a lower bound
3616 -- of one, regardless of original bounds, that this definition exactly
3617 -- corresponds to the cursor value as referenced by functions like Pos.
3619 -- If the value is negative, then this is a saved stack pointer,
3620 -- typically a base pointer of an inner or outer region. Cursor
3621 -- temporarily holds such a value when it is popped from the stack
3622 -- by Fail. In all cases, Cursor is reset to a proper non-negative
3623 -- cursor value before the match proceeds (e.g. by propagating the
3624 -- failure and popping a "real" cursor value from the stack.
3626 PE_Unanchored : aliased PE := (PC_Unanchored, 0, Pat_P);
3627 -- Dummy pattern element used in the unanchored case.
3629 Stack : Stack_Type;
3630 -- The pattern matching failure stack for this call to Match
3632 Stack_Ptr : Stack_Range;
3633 -- Current stack pointer. This points to the top element of the stack
3634 -- that is currently in use. At the outer level this is the special
3635 -- entry placed on the stack according to the anchor mode.
3637 Stack_Init : constant Stack_Range := Stack'First + 1;
3638 -- This is the initial value of the Stack_Ptr and Stack_Base. The
3639 -- initial (Stack'First) element of the stack is not used so that
3640 -- when we pop the last element off, Stack_Ptr is still in range.
3642 Stack_Base : Stack_Range;
3643 -- This value is the stack base value, i.e. the stack pointer for the
3644 -- first history stack entry in the current stack region. See separate
3645 -- section on handling of recursive pattern matches.
3647 Assign_OnM : Boolean := False;
3648 -- Set True if assign-on-match or write-on-match operations may be
3649 -- present in the history stack, which must then be scanned on a
3650 -- successful match.
3652 procedure Pop_Region;
3653 pragma Inline (Pop_Region);
3654 -- Used at the end of processing of an inner region. if the inner
3655 -- region left no stack entries, then all trace of it is removed.
3656 -- Otherwise a PC_Restore_Region entry is pushed to ensure proper
3657 -- handling of alternatives in the inner region.
3659 procedure Push (Node : PE_Ptr);
3660 pragma Inline (Push);
3661 -- Make entry in pattern matching stack with current cursor valeu
3663 procedure Push_Region;
3664 pragma Inline (Push_Region);
3665 -- This procedure makes a new region on the history stack. The
3666 -- caller first establishes the special entry on the stack, but
3667 -- does not push the stack pointer. Then this call stacks a
3668 -- PC_Remove_Region node, on top of this entry, using the cursor
3669 -- field of the PC_Remove_Region entry to save the outer level
3670 -- stack base value, and resets the stack base to point to this
3671 -- PC_Remove_Region node.
3673 ----------------
3674 -- Pop_Region --
3675 ----------------
3677 procedure Pop_Region is
3678 begin
3679 -- If nothing was pushed in the inner region, we can just get
3680 -- rid of it entirely, leaving no traces that it was ever there
3682 if Stack_Ptr = Stack_Base then
3683 Stack_Ptr := Stack_Base - 2;
3684 Stack_Base := Stack (Stack_Ptr + 2).Cursor;
3686 -- If stuff was pushed in the inner region, then we have to
3687 -- push a PC_R_Restore node so that we properly handle possible
3688 -- rematches within the region.
3690 else
3691 Stack_Ptr := Stack_Ptr + 1;
3692 Stack (Stack_Ptr).Cursor := Stack_Base;
3693 Stack (Stack_Ptr).Node := CP_R_Restore'Access;
3694 Stack_Base := Stack (Stack_Base).Cursor;
3695 end if;
3696 end Pop_Region;
3698 ----------
3699 -- Push --
3700 ----------
3702 procedure Push (Node : PE_Ptr) is
3703 begin
3704 Stack_Ptr := Stack_Ptr + 1;
3705 Stack (Stack_Ptr).Cursor := Cursor;
3706 Stack (Stack_Ptr).Node := Node;
3707 end Push;
3709 -----------------
3710 -- Push_Region --
3711 -----------------
3713 procedure Push_Region is
3714 begin
3715 Stack_Ptr := Stack_Ptr + 2;
3716 Stack (Stack_Ptr).Cursor := Stack_Base;
3717 Stack (Stack_Ptr).Node := CP_R_Remove'Access;
3718 Stack_Base := Stack_Ptr;
3719 end Push_Region;
3721 -- Start of processing for XMatch
3723 begin
3724 if Pat_P = null then
3725 Uninitialized_Pattern;
3726 end if;
3728 -- Check we have enough stack for this pattern. This check deals with
3729 -- every possibility except a match of a recursive pattern, where we
3730 -- make a check at each recursion level.
3732 if Pat_S >= Stack_Size - 1 then
3733 raise Pattern_Stack_Overflow;
3734 end if;
3736 -- In anchored mode, the bottom entry on the stack is an abort entry
3738 if Anchored_Mode then
3739 Stack (Stack_Init).Node := CP_Cancel'Access;
3740 Stack (Stack_Init).Cursor := 0;
3742 -- In unanchored more, the bottom entry on the stack references
3743 -- the special pattern element PE_Unanchored, whose Pthen field
3744 -- points to the initial pattern element. The cursor value in this
3745 -- entry is the number of anchor moves so far.
3747 else
3748 Stack (Stack_Init).Node := PE_Unanchored'Unchecked_Access;
3749 Stack (Stack_Init).Cursor := 0;
3750 end if;
3752 Stack_Ptr := Stack_Init;
3753 Stack_Base := Stack_Ptr;
3754 Cursor := 0;
3755 Node := Pat_P;
3756 goto Match;
3758 -----------------------------------------
3759 -- Main Pattern Matching State Control --
3760 -----------------------------------------
3762 -- This is a state machine which uses gotos to change state. The
3763 -- initial state is Match, to initiate the matching of the first
3764 -- element, so the goto Match above starts the match. In the
3765 -- following descriptions, we indicate the global values that
3766 -- are relevant for the state transition.
3768 -- Come here if entire match fails
3770 <<Match_Fail>>
3771 Start := 0;
3772 Stop := 0;
3773 return;
3775 -- Come here if entire match succeeds
3777 -- Cursor current position in subject string
3779 <<Match_Succeed>>
3780 Start := Stack (Stack_Init).Cursor + 1;
3781 Stop := Cursor;
3783 -- Scan history stack for deferred assignments or writes
3785 if Assign_OnM then
3786 for S in Stack_Init .. Stack_Ptr loop
3787 if Stack (S).Node = CP_Assign'Access then
3788 declare
3789 Inner_Base : constant Stack_Range :=
3790 Stack (S + 1).Cursor;
3791 Special_Entry : constant Stack_Range :=
3792 Inner_Base - 1;
3793 Node_OnM : constant PE_Ptr :=
3794 Stack (Special_Entry).Node;
3795 Start : constant Natural :=
3796 Stack (Special_Entry).Cursor + 1;
3797 Stop : constant Natural := Stack (S).Cursor;
3799 begin
3800 if Node_OnM.Pcode = PC_Assign_OnM then
3801 Set_String (Node_OnM.VP.all, Subject (Start .. Stop));
3803 elsif Node_OnM.Pcode = PC_Write_OnM then
3804 Put_Line (Node_OnM.FP.all, Subject (Start .. Stop));
3806 else
3807 Logic_Error;
3808 end if;
3809 end;
3810 end if;
3811 end loop;
3812 end if;
3814 return;
3816 -- Come here if attempt to match current element fails
3818 -- Stack_Base current stack base
3819 -- Stack_Ptr current stack pointer
3821 <<Fail>>
3822 Cursor := Stack (Stack_Ptr).Cursor;
3823 Node := Stack (Stack_Ptr).Node;
3824 Stack_Ptr := Stack_Ptr - 1;
3825 goto Match;
3827 -- Come here if attempt to match current element succeeds
3829 -- Cursor current position in subject string
3830 -- Node pointer to node successfully matched
3831 -- Stack_Base current stack base
3832 -- Stack_Ptr current stack pointer
3834 <<Succeed>>
3835 Node := Node.Pthen;
3837 -- Come here to match the next pattern element
3839 -- Cursor current position in subject string
3840 -- Node pointer to node to be matched
3841 -- Stack_Base current stack base
3842 -- Stack_Ptr current stack pointer
3844 <<Match>>
3846 --------------------------------------------------
3847 -- Main Pattern Match Element Matching Routines --
3848 --------------------------------------------------
3850 -- Here is the case statement that processes the current node. The
3851 -- processing for each element does one of five things:
3853 -- goto Succeed to move to the successor
3854 -- goto Match_Succeed if the entire match succeeds
3855 -- goto Match_Fail if the entire match fails
3856 -- goto Fail to signal failure of current match
3858 -- Processing is NOT allowed to fall through
3860 case Node.Pcode is
3862 -- Cancel
3864 when PC_Cancel =>
3865 goto Match_Fail;
3867 -- Alternation
3869 when PC_Alt =>
3870 Push (Node.Alt);
3871 Node := Node.Pthen;
3872 goto Match;
3874 -- Any (one character case)
3876 when PC_Any_CH =>
3877 if Cursor < Length
3878 and then Subject (Cursor + 1) = Node.Char
3879 then
3880 Cursor := Cursor + 1;
3881 goto Succeed;
3882 else
3883 goto Fail;
3884 end if;
3886 -- Any (character set case)
3888 when PC_Any_CS =>
3889 if Cursor < Length
3890 and then Is_In (Subject (Cursor + 1), Node.CS)
3891 then
3892 Cursor := Cursor + 1;
3893 goto Succeed;
3894 else
3895 goto Fail;
3896 end if;
3898 -- Any (string function case)
3900 when PC_Any_VF => declare
3901 U : constant VString := Node.VF.all;
3902 Str : constant String_Access := Get_String (U);
3904 begin
3905 if Cursor < Length
3906 and then Is_In (Subject (Cursor + 1), Str.all)
3907 then
3908 Cursor := Cursor + 1;
3909 goto Succeed;
3910 else
3911 goto Fail;
3912 end if;
3913 end;
3915 -- Any (string pointer case)
3917 when PC_Any_VP => declare
3918 Str : constant String_Access := Get_String (Node.VP.all);
3920 begin
3921 if Cursor < Length
3922 and then Is_In (Subject (Cursor + 1), Str.all)
3923 then
3924 Cursor := Cursor + 1;
3925 goto Succeed;
3926 else
3927 goto Fail;
3928 end if;
3929 end;
3931 -- Arb (initial match)
3933 when PC_Arb_X =>
3934 Push (Node.Alt);
3935 Node := Node.Pthen;
3936 goto Match;
3938 -- Arb (extension)
3940 when PC_Arb_Y =>
3941 if Cursor < Length then
3942 Cursor := Cursor + 1;
3943 Push (Node);
3944 goto Succeed;
3945 else
3946 goto Fail;
3947 end if;
3949 -- Arbno_S (simple Arbno initialize). This is the node that
3950 -- initiates the match of a simple Arbno structure.
3952 when PC_Arbno_S =>
3953 Push (Node.Alt);
3954 Node := Node.Pthen;
3955 goto Match;
3957 -- Arbno_X (Arbno initialize). This is the node that initiates
3958 -- the match of a complex Arbno structure.
3960 when PC_Arbno_X =>
3961 Push (Node.Alt);
3962 Node := Node.Pthen;
3963 goto Match;
3965 -- Arbno_Y (Arbno rematch). This is the node that is executed
3966 -- following successful matching of one instance of a complex
3967 -- Arbno pattern.
3969 when PC_Arbno_Y => declare
3970 Null_Match : Boolean := (Cursor = Stack (Stack_Base - 1).Cursor);
3972 begin
3973 Pop_Region;
3975 -- If arbno extension matched null, then immediately fail
3977 if Null_Match then
3978 goto Fail;
3979 end if;
3981 -- Here we must do a stack check to make sure enough stack
3982 -- is left. This check will happen once for each instance of
3983 -- the Arbno pattern that is matched. The Nat field of a
3984 -- PC_Arbno pattern contains the maximum stack entries needed
3985 -- for the Arbno with one instance and the successor pattern
3987 if Stack_Ptr + Node.Nat >= Stack'Last then
3988 raise Pattern_Stack_Overflow;
3989 end if;
3991 goto Succeed;
3992 end;
3994 -- Assign. If this node is executed, it means the assign-on-match
3995 -- or write-on-match operation will not happen after all, so we
3996 -- is propagate the failure, removing the PC_Assign node.
3998 when PC_Assign =>
3999 goto Fail;
4001 -- Assign immediate. This node performs the actual assignment.
4003 when PC_Assign_Imm =>
4004 Set_String
4005 (Node.VP.all,
4006 Subject (Stack (Stack_Base - 1).Cursor + 1 .. Cursor));
4007 Pop_Region;
4008 goto Succeed;
4010 -- Assign on match. This node sets up for the eventual assignment
4012 when PC_Assign_OnM =>
4013 Stack (Stack_Base - 1).Node := Node;
4014 Push (CP_Assign'Access);
4015 Pop_Region;
4016 Assign_OnM := True;
4017 goto Succeed;
4019 -- Bal
4021 when PC_Bal =>
4022 if Cursor >= Length or else Subject (Cursor + 1) = ')' then
4023 goto Fail;
4025 elsif Subject (Cursor + 1) = '(' then
4026 declare
4027 Paren_Count : Natural := 1;
4029 begin
4030 loop
4031 Cursor := Cursor + 1;
4033 if Cursor >= Length then
4034 goto Fail;
4036 elsif Subject (Cursor + 1) = '(' then
4037 Paren_Count := Paren_Count + 1;
4039 elsif Subject (Cursor + 1) = ')' then
4040 Paren_Count := Paren_Count - 1;
4041 exit when Paren_Count = 0;
4042 end if;
4043 end loop;
4044 end;
4045 end if;
4047 Cursor := Cursor + 1;
4048 Push (Node);
4049 goto Succeed;
4051 -- Break (one character case)
4053 when PC_Break_CH =>
4054 while Cursor < Length loop
4055 if Subject (Cursor + 1) = Node.Char then
4056 goto Succeed;
4057 else
4058 Cursor := Cursor + 1;
4059 end if;
4060 end loop;
4062 goto Fail;
4064 -- Break (character set case)
4066 when PC_Break_CS =>
4067 while Cursor < Length loop
4068 if Is_In (Subject (Cursor + 1), Node.CS) then
4069 goto Succeed;
4070 else
4071 Cursor := Cursor + 1;
4072 end if;
4073 end loop;
4075 goto Fail;
4077 -- Break (string function case)
4079 when PC_Break_VF => declare
4080 U : constant VString := Node.VF.all;
4081 Str : constant String_Access := Get_String (U);
4083 begin
4084 while Cursor < Length loop
4085 if Is_In (Subject (Cursor + 1), Str.all) then
4086 goto Succeed;
4087 else
4088 Cursor := Cursor + 1;
4089 end if;
4090 end loop;
4092 goto Fail;
4093 end;
4095 -- Break (string pointer case)
4097 when PC_Break_VP => declare
4098 Str : String_Access := Get_String (Node.VP.all);
4100 begin
4101 while Cursor < Length loop
4102 if Is_In (Subject (Cursor + 1), Str.all) then
4103 goto Succeed;
4104 else
4105 Cursor := Cursor + 1;
4106 end if;
4107 end loop;
4109 goto Fail;
4110 end;
4112 -- BreakX (one character case)
4114 when PC_BreakX_CH =>
4115 while Cursor < Length loop
4116 if Subject (Cursor + 1) = Node.Char then
4117 goto Succeed;
4118 else
4119 Cursor := Cursor + 1;
4120 end if;
4121 end loop;
4123 goto Fail;
4125 -- BreakX (character set case)
4127 when PC_BreakX_CS =>
4128 while Cursor < Length loop
4129 if Is_In (Subject (Cursor + 1), Node.CS) then
4130 goto Succeed;
4131 else
4132 Cursor := Cursor + 1;
4133 end if;
4134 end loop;
4136 goto Fail;
4138 -- BreakX (string function case)
4140 when PC_BreakX_VF => declare
4141 U : constant VString := Node.VF.all;
4142 Str : constant String_Access := Get_String (U);
4144 begin
4145 while Cursor < Length loop
4146 if Is_In (Subject (Cursor + 1), Str.all) then
4147 goto Succeed;
4148 else
4149 Cursor := Cursor + 1;
4150 end if;
4151 end loop;
4153 goto Fail;
4154 end;
4156 -- BreakX (string pointer case)
4158 when PC_BreakX_VP => declare
4159 Str : String_Access := Get_String (Node.VP.all);
4161 begin
4162 while Cursor < Length loop
4163 if Is_In (Subject (Cursor + 1), Str.all) then
4164 goto Succeed;
4165 else
4166 Cursor := Cursor + 1;
4167 end if;
4168 end loop;
4170 goto Fail;
4171 end;
4173 -- BreakX_X (BreakX extension). See section on "Compound Pattern
4174 -- Structures". This node is the alternative that is stacked to
4175 -- skip past the break character and extend the break.
4177 when PC_BreakX_X =>
4178 Cursor := Cursor + 1;
4179 goto Succeed;
4181 -- Character (one character string)
4183 when PC_Char =>
4184 if Cursor < Length
4185 and then Subject (Cursor + 1) = Node.Char
4186 then
4187 Cursor := Cursor + 1;
4188 goto Succeed;
4189 else
4190 goto Fail;
4191 end if;
4193 -- End of Pattern
4195 when PC_EOP =>
4196 if Stack_Base = Stack_Init then
4197 goto Match_Succeed;
4199 -- End of recursive inner match. See separate section on
4200 -- handing of recursive pattern matches for details.
4202 else
4203 Node := Stack (Stack_Base - 1).Node;
4204 Pop_Region;
4205 goto Match;
4206 end if;
4208 -- Fail
4210 when PC_Fail =>
4211 goto Fail;
4213 -- Fence (built in pattern)
4215 when PC_Fence =>
4216 Push (CP_Cancel'Access);
4217 goto Succeed;
4219 -- Fence function node X. This is the node that gets control
4220 -- after a successful match of the fenced pattern.
4222 when PC_Fence_X =>
4223 Stack_Ptr := Stack_Ptr + 1;
4224 Stack (Stack_Ptr).Cursor := Stack_Base;
4225 Stack (Stack_Ptr).Node := CP_Fence_Y'Access;
4226 Stack_Base := Stack (Stack_Base).Cursor;
4227 goto Succeed;
4229 -- Fence function node Y. This is the node that gets control on
4230 -- a failure that occurs after the fenced pattern has matched.
4232 -- Note: the Cursor at this stage is actually the inner stack
4233 -- base value. We don't reset this, but we do use it to strip
4234 -- off all the entries made by the fenced pattern.
4236 when PC_Fence_Y =>
4237 Stack_Ptr := Cursor - 2;
4238 goto Fail;
4240 -- Len (integer case)
4242 when PC_Len_Nat =>
4243 if Cursor + Node.Nat > Length then
4244 goto Fail;
4245 else
4246 Cursor := Cursor + Node.Nat;
4247 goto Succeed;
4248 end if;
4250 -- Len (Integer function case)
4252 when PC_Len_NF => declare
4253 N : constant Natural := Node.NF.all;
4255 begin
4256 if Cursor + N > Length then
4257 goto Fail;
4258 else
4259 Cursor := Cursor + N;
4260 goto Succeed;
4261 end if;
4262 end;
4264 -- Len (integer pointer case)
4266 when PC_Len_NP =>
4267 if Cursor + Node.NP.all > Length then
4268 goto Fail;
4269 else
4270 Cursor := Cursor + Node.NP.all;
4271 goto Succeed;
4272 end if;
4274 -- NotAny (one character case)
4276 when PC_NotAny_CH =>
4277 if Cursor < Length
4278 and then Subject (Cursor + 1) /= Node.Char
4279 then
4280 Cursor := Cursor + 1;
4281 goto Succeed;
4282 else
4283 goto Fail;
4284 end if;
4286 -- NotAny (character set case)
4288 when PC_NotAny_CS =>
4289 if Cursor < Length
4290 and then not Is_In (Subject (Cursor + 1), Node.CS)
4291 then
4292 Cursor := Cursor + 1;
4293 goto Succeed;
4294 else
4295 goto Fail;
4296 end if;
4298 -- NotAny (string function case)
4300 when PC_NotAny_VF => declare
4301 U : constant VString := Node.VF.all;
4302 Str : constant String_Access := Get_String (U);
4304 begin
4305 if Cursor < Length
4306 and then
4307 not Is_In (Subject (Cursor + 1), Str.all)
4308 then
4309 Cursor := Cursor + 1;
4310 goto Succeed;
4311 else
4312 goto Fail;
4313 end if;
4314 end;
4316 -- NotAny (string pointer case)
4318 when PC_NotAny_VP => declare
4319 Str : String_Access := Get_String (Node.VP.all);
4321 begin
4322 if Cursor < Length
4323 and then
4324 not Is_In (Subject (Cursor + 1), Str.all)
4325 then
4326 Cursor := Cursor + 1;
4327 goto Succeed;
4328 else
4329 goto Fail;
4330 end if;
4331 end;
4333 -- NSpan (one character case)
4335 when PC_NSpan_CH =>
4336 while Cursor < Length
4337 and then Subject (Cursor + 1) = Node.Char
4338 loop
4339 Cursor := Cursor + 1;
4340 end loop;
4342 goto Succeed;
4344 -- NSpan (character set case)
4346 when PC_NSpan_CS =>
4347 while Cursor < Length
4348 and then Is_In (Subject (Cursor + 1), Node.CS)
4349 loop
4350 Cursor := Cursor + 1;
4351 end loop;
4353 goto Succeed;
4355 -- NSpan (string function case)
4357 when PC_NSpan_VF => declare
4358 U : constant VString := Node.VF.all;
4359 Str : constant String_Access := Get_String (U);
4361 begin
4362 while Cursor < Length
4363 and then Is_In (Subject (Cursor + 1), Str.all)
4364 loop
4365 Cursor := Cursor + 1;
4366 end loop;
4368 goto Succeed;
4369 end;
4371 -- NSpan (string pointer case)
4373 when PC_NSpan_VP => declare
4374 Str : String_Access := Get_String (Node.VP.all);
4376 begin
4377 while Cursor < Length
4378 and then Is_In (Subject (Cursor + 1), Str.all)
4379 loop
4380 Cursor := Cursor + 1;
4381 end loop;
4383 goto Succeed;
4384 end;
4386 -- Null string
4388 when PC_Null =>
4389 goto Succeed;
4391 -- Pos (integer case)
4393 when PC_Pos_Nat =>
4394 if Cursor = Node.Nat then
4395 goto Succeed;
4396 else
4397 goto Fail;
4398 end if;
4400 -- Pos (Integer function case)
4402 when PC_Pos_NF => declare
4403 N : constant Natural := Node.NF.all;
4405 begin
4406 if Cursor = N then
4407 goto Succeed;
4408 else
4409 goto Fail;
4410 end if;
4411 end;
4413 -- Pos (integer pointer case)
4415 when PC_Pos_NP =>
4416 if Cursor = Node.NP.all then
4417 goto Succeed;
4418 else
4419 goto Fail;
4420 end if;
4422 -- Predicate function
4424 when PC_Pred_Func =>
4425 if Node.BF.all then
4426 goto Succeed;
4427 else
4428 goto Fail;
4429 end if;
4431 -- Region Enter. Initiate new pattern history stack region
4433 when PC_R_Enter =>
4434 Stack (Stack_Ptr + 1).Cursor := Cursor;
4435 Push_Region;
4436 goto Succeed;
4438 -- Region Remove node. This is the node stacked by an R_Enter.
4439 -- It removes the special format stack entry right underneath, and
4440 -- then restores the outer level stack base and signals failure.
4442 -- Note: the cursor value at this stage is actually the (negative)
4443 -- stack base value for the outer level.
4445 when PC_R_Remove =>
4446 Stack_Base := Cursor;
4447 Stack_Ptr := Stack_Ptr - 1;
4448 goto Fail;
4450 -- Region restore node. This is the node stacked at the end of an
4451 -- inner level match. Its function is to restore the inner level
4452 -- region, so that alternatives in this region can be sought.
4454 -- Note: the Cursor at this stage is actually the negative of the
4455 -- inner stack base value, which we use to restore the inner region.
4457 when PC_R_Restore =>
4458 Stack_Base := Cursor;
4459 goto Fail;
4461 -- Rest
4463 when PC_Rest =>
4464 Cursor := Length;
4465 goto Succeed;
4467 -- Initiate recursive match (pattern pointer case)
4469 when PC_Rpat =>
4470 Stack (Stack_Ptr + 1).Node := Node.Pthen;
4471 Push_Region;
4473 if Stack_Ptr + Node.PP.all.Stk >= Stack_Size then
4474 raise Pattern_Stack_Overflow;
4475 else
4476 Node := Node.PP.all.P;
4477 goto Match;
4478 end if;
4480 -- RPos (integer case)
4482 when PC_RPos_Nat =>
4483 if Cursor = (Length - Node.Nat) then
4484 goto Succeed;
4485 else
4486 goto Fail;
4487 end if;
4489 -- RPos (integer function case)
4491 when PC_RPos_NF => declare
4492 N : constant Natural := Node.NF.all;
4494 begin
4495 if Length - Cursor = N then
4496 goto Succeed;
4497 else
4498 goto Fail;
4499 end if;
4500 end;
4502 -- RPos (integer pointer case)
4504 when PC_RPos_NP =>
4505 if Cursor = (Length - Node.NP.all) then
4506 goto Succeed;
4507 else
4508 goto Fail;
4509 end if;
4511 -- RTab (integer case)
4513 when PC_RTab_Nat =>
4514 if Cursor <= (Length - Node.Nat) then
4515 Cursor := Length - Node.Nat;
4516 goto Succeed;
4517 else
4518 goto Fail;
4519 end if;
4521 -- RTab (integer function case)
4523 when PC_RTab_NF => declare
4524 N : constant Natural := Node.NF.all;
4526 begin
4527 if Length - Cursor >= N then
4528 Cursor := Length - N;
4529 goto Succeed;
4530 else
4531 goto Fail;
4532 end if;
4533 end;
4535 -- RTab (integer pointer case)
4537 when PC_RTab_NP =>
4538 if Cursor <= (Length - Node.NP.all) then
4539 Cursor := Length - Node.NP.all;
4540 goto Succeed;
4541 else
4542 goto Fail;
4543 end if;
4545 -- Cursor assignment
4547 when PC_Setcur =>
4548 Node.Var.all := Cursor;
4549 goto Succeed;
4551 -- Span (one character case)
4553 when PC_Span_CH => declare
4554 P : Natural := Cursor;
4556 begin
4557 while P < Length
4558 and then Subject (P + 1) = Node.Char
4559 loop
4560 P := P + 1;
4561 end loop;
4563 if P /= Cursor then
4564 Cursor := P;
4565 goto Succeed;
4566 else
4567 goto Fail;
4568 end if;
4569 end;
4571 -- Span (character set case)
4573 when PC_Span_CS => declare
4574 P : Natural := Cursor;
4576 begin
4577 while P < Length
4578 and then Is_In (Subject (P + 1), Node.CS)
4579 loop
4580 P := P + 1;
4581 end loop;
4583 if P /= Cursor then
4584 Cursor := P;
4585 goto Succeed;
4586 else
4587 goto Fail;
4588 end if;
4589 end;
4591 -- Span (string function case)
4593 when PC_Span_VF => declare
4594 U : constant VString := Node.VF.all;
4595 Str : constant String_Access := Get_String (U);
4596 P : Natural := Cursor;
4598 begin
4599 while P < Length
4600 and then Is_In (Subject (P + 1), Str.all)
4601 loop
4602 P := P + 1;
4603 end loop;
4605 if P /= Cursor then
4606 Cursor := P;
4607 goto Succeed;
4608 else
4609 goto Fail;
4610 end if;
4611 end;
4613 -- Span (string pointer case)
4615 when PC_Span_VP => declare
4616 Str : String_Access := Get_String (Node.VP.all);
4617 P : Natural := Cursor;
4619 begin
4620 while P < Length
4621 and then Is_In (Subject (P + 1), Str.all)
4622 loop
4623 P := P + 1;
4624 end loop;
4626 if P /= Cursor then
4627 Cursor := P;
4628 goto Succeed;
4629 else
4630 goto Fail;
4631 end if;
4632 end;
4634 -- String (two character case)
4636 when PC_String_2 =>
4637 if (Length - Cursor) >= 2
4638 and then Subject (Cursor + 1 .. Cursor + 2) = Node.Str2
4639 then
4640 Cursor := Cursor + 2;
4641 goto Succeed;
4642 else
4643 goto Fail;
4644 end if;
4646 -- String (three character case)
4648 when PC_String_3 =>
4649 if (Length - Cursor) >= 3
4650 and then Subject (Cursor + 1 .. Cursor + 3) = Node.Str3
4651 then
4652 Cursor := Cursor + 3;
4653 goto Succeed;
4654 else
4655 goto Fail;
4656 end if;
4658 -- String (four character case)
4660 when PC_String_4 =>
4661 if (Length - Cursor) >= 4
4662 and then Subject (Cursor + 1 .. Cursor + 4) = Node.Str4
4663 then
4664 Cursor := Cursor + 4;
4665 goto Succeed;
4666 else
4667 goto Fail;
4668 end if;
4670 -- String (five character case)
4672 when PC_String_5 =>
4673 if (Length - Cursor) >= 5
4674 and then Subject (Cursor + 1 .. Cursor + 5) = Node.Str5
4675 then
4676 Cursor := Cursor + 5;
4677 goto Succeed;
4678 else
4679 goto Fail;
4680 end if;
4682 -- String (six character case)
4684 when PC_String_6 =>
4685 if (Length - Cursor) >= 6
4686 and then Subject (Cursor + 1 .. Cursor + 6) = Node.Str6
4687 then
4688 Cursor := Cursor + 6;
4689 goto Succeed;
4690 else
4691 goto Fail;
4692 end if;
4694 -- String (case of more than six characters)
4696 when PC_String => declare
4697 Len : constant Natural := Node.Str'Length;
4699 begin
4700 if (Length - Cursor) >= Len
4701 and then Node.Str.all = Subject (Cursor + 1 .. Cursor + Len)
4702 then
4703 Cursor := Cursor + Len;
4704 goto Succeed;
4705 else
4706 goto Fail;
4707 end if;
4708 end;
4710 -- String (function case)
4712 when PC_String_VF => declare
4713 U : constant VString := Node.VF.all;
4714 Str : constant String_Access := Get_String (U);
4715 Len : constant Natural := Str'Length;
4717 begin
4718 if (Length - Cursor) >= Len
4719 and then Str.all = Subject (Cursor + 1 .. Cursor + Len)
4720 then
4721 Cursor := Cursor + Len;
4722 goto Succeed;
4723 else
4724 goto Fail;
4725 end if;
4726 end;
4728 -- String (pointer case)
4730 when PC_String_VP => declare
4731 S : String_Access := Get_String (Node.VP.all);
4732 Len : constant Natural := S'Length;
4734 begin
4735 if (Length - Cursor) >= Len
4736 and then S.all = Subject (Cursor + 1 .. Cursor + Len)
4737 then
4738 Cursor := Cursor + Len;
4739 goto Succeed;
4740 else
4741 goto Fail;
4742 end if;
4743 end;
4745 -- Succeed
4747 when PC_Succeed =>
4748 Push (Node);
4749 goto Succeed;
4751 -- Tab (integer case)
4753 when PC_Tab_Nat =>
4754 if Cursor <= Node.Nat then
4755 Cursor := Node.Nat;
4756 goto Succeed;
4757 else
4758 goto Fail;
4759 end if;
4761 -- Tab (integer function case)
4763 when PC_Tab_NF => declare
4764 N : constant Natural := Node.NF.all;
4766 begin
4767 if Cursor <= N then
4768 Cursor := N;
4769 goto Succeed;
4770 else
4771 goto Fail;
4772 end if;
4773 end;
4775 -- Tab (integer pointer case)
4777 when PC_Tab_NP =>
4778 if Cursor <= Node.NP.all then
4779 Cursor := Node.NP.all;
4780 goto Succeed;
4781 else
4782 goto Fail;
4783 end if;
4785 -- Unanchored movement
4787 when PC_Unanchored =>
4789 -- All done if we tried every position
4791 if Cursor > Length then
4792 goto Match_Fail;
4794 -- Otherwise extend the anchor point, and restack ourself
4796 else
4797 Cursor := Cursor + 1;
4798 Push (Node);
4799 goto Succeed;
4800 end if;
4802 -- Write immediate. This node performs the actual write
4804 when PC_Write_Imm =>
4805 Put_Line
4806 (Node.FP.all,
4807 Subject (Stack (Stack_Base - 1).Cursor + 1 .. Cursor));
4808 Pop_Region;
4809 goto Succeed;
4811 -- Write on match. This node sets up for the eventual write
4813 when PC_Write_OnM =>
4814 Stack (Stack_Base - 1).Node := Node;
4815 Push (CP_Assign'Access);
4816 Pop_Region;
4817 Assign_OnM := True;
4818 goto Succeed;
4820 end case;
4822 -- We are NOT allowed to fall though this case statement, since every
4823 -- match routine must end by executing a goto to the appropriate point
4824 -- in the finite state machine model.
4826 Logic_Error;
4828 end XMatch;
4830 -------------
4831 -- XMatchD --
4832 -------------
4834 -- Maintenance note: There is a LOT of code duplication between XMatch
4835 -- and XMatchD. This is quite intentional, the point is to avoid any
4836 -- unnecessary debugging overhead in the XMatch case, but this does mean
4837 -- that any changes to XMatchD must be mirrored in XMatch. In case of
4838 -- any major changes, the proper approach is to delete XMatch, make the
4839 -- changes to XMatchD, and then make a copy of XMatchD, removing all
4840 -- calls to Dout, and all Put and Put_Line operations. This copy becomes
4841 -- the new XMatch.
4843 procedure XMatchD
4844 (Subject : String;
4845 Pat_P : PE_Ptr;
4846 Pat_S : Natural;
4847 Start : out Natural;
4848 Stop : out Natural)
4850 Node : PE_Ptr;
4851 -- Pointer to current pattern node. Initialized from Pat_P, and then
4852 -- updated as the match proceeds through its constituent elements.
4854 Length : constant Natural := Subject'Length;
4855 -- Length of string (= Subject'Last, since Subject'First is always 1)
4857 Cursor : Integer := 0;
4858 -- If the value is non-negative, then this value is the index showing
4859 -- the current position of the match in the subject string. The next
4860 -- character to be matched is at Subject (Cursor + 1). Note that since
4861 -- our view of the subject string in XMatch always has a lower bound
4862 -- of one, regardless of original bounds, that this definition exactly
4863 -- corresponds to the cursor value as referenced by functions like Pos.
4865 -- If the value is negative, then this is a saved stack pointer,
4866 -- typically a base pointer of an inner or outer region. Cursor
4867 -- temporarily holds such a value when it is popped from the stack
4868 -- by Fail. In all cases, Cursor is reset to a proper non-negative
4869 -- cursor value before the match proceeds (e.g. by propagating the
4870 -- failure and popping a "real" cursor value from the stack.
4872 PE_Unanchored : aliased PE := (PC_Unanchored, 0, Pat_P);
4873 -- Dummy pattern element used in the unanchored case.
4875 Region_Level : Natural := 0;
4876 -- Keeps track of recursive region level. This is used only for
4877 -- debugging, it is the number of saved history stack base values.
4879 Stack : Stack_Type;
4880 -- The pattern matching failure stack for this call to Match
4882 Stack_Ptr : Stack_Range;
4883 -- Current stack pointer. This points to the top element of the stack
4884 -- that is currently in use. At the outer level this is the special
4885 -- entry placed on the stack according to the anchor mode.
4887 Stack_Init : constant Stack_Range := Stack'First + 1;
4888 -- This is the initial value of the Stack_Ptr and Stack_Base. The
4889 -- initial (Stack'First) element of the stack is not used so that
4890 -- when we pop the last element off, Stack_Ptr is still in range.
4892 Stack_Base : Stack_Range;
4893 -- This value is the stack base value, i.e. the stack pointer for the
4894 -- first history stack entry in the current stack region. See separate
4895 -- section on handling of recursive pattern matches.
4897 Assign_OnM : Boolean := False;
4898 -- Set True if assign-on-match or write-on-match operations may be
4899 -- present in the history stack, which must then be scanned on a
4900 -- successful match.
4902 procedure Dout (Str : String);
4903 -- Output string to standard error with bars indicating region level.
4905 procedure Dout (Str : String; A : Character);
4906 -- Calls Dout with the string S ('A')
4908 procedure Dout (Str : String; A : Character_Set);
4909 -- Calls Dout with the string S ("A")
4911 procedure Dout (Str : String; A : Natural);
4912 -- Calls Dout with the string S (A)
4914 procedure Dout (Str : String; A : String);
4915 -- Calls Dout with the string S ("A")
4917 function Img (P : PE_Ptr) return String;
4918 -- Returns a string of the form #nnn where nnn is P.Index
4920 procedure Pop_Region;
4921 pragma Inline (Pop_Region);
4922 -- Used at the end of processing of an inner region. if the inner
4923 -- region left no stack entries, then all trace of it is removed.
4924 -- Otherwise a PC_Restore_Region entry is pushed to ensure proper
4925 -- handling of alternatives in the inner region.
4927 procedure Push (Node : PE_Ptr);
4928 pragma Inline (Push);
4929 -- Make entry in pattern matching stack with current cursor valeu
4931 procedure Push_Region;
4932 pragma Inline (Push_Region);
4933 -- This procedure makes a new region on the history stack. The
4934 -- caller first establishes the special entry on the stack, but
4935 -- does not push the stack pointer. Then this call stacks a
4936 -- PC_Remove_Region node, on top of this entry, using the cursor
4937 -- field of the PC_Remove_Region entry to save the outer level
4938 -- stack base value, and resets the stack base to point to this
4939 -- PC_Remove_Region node.
4941 ----------
4942 -- Dout --
4943 ----------
4945 procedure Dout (Str : String) is
4946 begin
4947 for J in 1 .. Region_Level loop
4948 Put ("| ");
4949 end loop;
4951 Put_Line (Str);
4952 end Dout;
4954 procedure Dout (Str : String; A : Character) is
4955 begin
4956 Dout (Str & " ('" & A & "')");
4957 end Dout;
4959 procedure Dout (Str : String; A : Character_Set) is
4960 begin
4961 Dout (Str & " (" & Image (To_Sequence (A)) & ')');
4962 end Dout;
4964 procedure Dout (Str : String; A : Natural) is
4965 begin
4966 Dout (Str & " (" & A & ')');
4967 end Dout;
4969 procedure Dout (Str : String; A : String) is
4970 begin
4971 Dout (Str & " (" & Image (A) & ')');
4972 end Dout;
4974 ---------
4975 -- Img --
4976 ---------
4978 function Img (P : PE_Ptr) return String is
4979 begin
4980 return "#" & Integer (P.Index) & " ";
4981 end Img;
4983 ----------------
4984 -- Pop_Region --
4985 ----------------
4987 procedure Pop_Region is
4988 begin
4989 Region_Level := Region_Level - 1;
4991 -- If nothing was pushed in the inner region, we can just get
4992 -- rid of it entirely, leaving no traces that it was ever there
4994 if Stack_Ptr = Stack_Base then
4995 Stack_Ptr := Stack_Base - 2;
4996 Stack_Base := Stack (Stack_Ptr + 2).Cursor;
4998 -- If stuff was pushed in the inner region, then we have to
4999 -- push a PC_R_Restore node so that we properly handle possible
5000 -- rematches within the region.
5002 else
5003 Stack_Ptr := Stack_Ptr + 1;
5004 Stack (Stack_Ptr).Cursor := Stack_Base;
5005 Stack (Stack_Ptr).Node := CP_R_Restore'Access;
5006 Stack_Base := Stack (Stack_Base).Cursor;
5007 end if;
5008 end Pop_Region;
5010 ----------
5011 -- Push --
5012 ----------
5014 procedure Push (Node : PE_Ptr) is
5015 begin
5016 Stack_Ptr := Stack_Ptr + 1;
5017 Stack (Stack_Ptr).Cursor := Cursor;
5018 Stack (Stack_Ptr).Node := Node;
5019 end Push;
5021 -----------------
5022 -- Push_Region --
5023 -----------------
5025 procedure Push_Region is
5026 begin
5027 Region_Level := Region_Level + 1;
5028 Stack_Ptr := Stack_Ptr + 2;
5029 Stack (Stack_Ptr).Cursor := Stack_Base;
5030 Stack (Stack_Ptr).Node := CP_R_Remove'Access;
5031 Stack_Base := Stack_Ptr;
5032 end Push_Region;
5034 -- Start of processing for XMatchD
5036 begin
5037 New_Line;
5038 Put_Line ("Initiating pattern match, subject = " & Image (Subject));
5039 Put ("--------------------------------------");
5041 for J in 1 .. Length loop
5042 Put ('-');
5043 end loop;
5045 New_Line;
5046 Put_Line ("subject length = " & Length);
5048 if Pat_P = null then
5049 Uninitialized_Pattern;
5050 end if;
5052 -- Check we have enough stack for this pattern. This check deals with
5053 -- every possibility except a match of a recursive pattern, where we
5054 -- make a check at each recursion level.
5056 if Pat_S >= Stack_Size - 1 then
5057 raise Pattern_Stack_Overflow;
5058 end if;
5060 -- In anchored mode, the bottom entry on the stack is an abort entry
5062 if Anchored_Mode then
5063 Stack (Stack_Init).Node := CP_Cancel'Access;
5064 Stack (Stack_Init).Cursor := 0;
5066 -- In unanchored more, the bottom entry on the stack references
5067 -- the special pattern element PE_Unanchored, whose Pthen field
5068 -- points to the initial pattern element. The cursor value in this
5069 -- entry is the number of anchor moves so far.
5071 else
5072 Stack (Stack_Init).Node := PE_Unanchored'Unchecked_Access;
5073 Stack (Stack_Init).Cursor := 0;
5074 end if;
5076 Stack_Ptr := Stack_Init;
5077 Stack_Base := Stack_Ptr;
5078 Cursor := 0;
5079 Node := Pat_P;
5080 goto Match;
5082 -----------------------------------------
5083 -- Main Pattern Matching State Control --
5084 -----------------------------------------
5086 -- This is a state machine which uses gotos to change state. The
5087 -- initial state is Match, to initiate the matching of the first
5088 -- element, so the goto Match above starts the match. In the
5089 -- following descriptions, we indicate the global values that
5090 -- are relevant for the state transition.
5092 -- Come here if entire match fails
5094 <<Match_Fail>>
5095 Dout ("match fails");
5096 New_Line;
5097 Start := 0;
5098 Stop := 0;
5099 return;
5101 -- Come here if entire match succeeds
5103 -- Cursor current position in subject string
5105 <<Match_Succeed>>
5106 Dout ("match succeeds");
5107 Start := Stack (Stack_Init).Cursor + 1;
5108 Stop := Cursor;
5109 Dout ("first matched character index = " & Start);
5110 Dout ("last matched character index = " & Stop);
5111 Dout ("matched substring = " & Image (Subject (Start .. Stop)));
5113 -- Scan history stack for deferred assignments or writes
5115 if Assign_OnM then
5116 for S in Stack'First .. Stack_Ptr loop
5117 if Stack (S).Node = CP_Assign'Access then
5118 declare
5119 Inner_Base : constant Stack_Range :=
5120 Stack (S + 1).Cursor;
5121 Special_Entry : constant Stack_Range :=
5122 Inner_Base - 1;
5123 Node_OnM : constant PE_Ptr :=
5124 Stack (Special_Entry).Node;
5125 Start : constant Natural :=
5126 Stack (Special_Entry).Cursor + 1;
5127 Stop : constant Natural := Stack (S).Cursor;
5129 begin
5130 if Node_OnM.Pcode = PC_Assign_OnM then
5131 Set_String (Node_OnM.VP.all, Subject (Start .. Stop));
5132 Dout
5133 (Img (Stack (S).Node) &
5134 "deferred assignment of " &
5135 Image (Subject (Start .. Stop)));
5137 elsif Node_OnM.Pcode = PC_Write_OnM then
5138 Put_Line (Node_OnM.FP.all, Subject (Start .. Stop));
5139 Dout
5140 (Img (Stack (S).Node) &
5141 "deferred write of " &
5142 Image (Subject (Start .. Stop)));
5144 else
5145 Logic_Error;
5146 end if;
5147 end;
5148 end if;
5149 end loop;
5150 end if;
5152 New_Line;
5153 return;
5155 -- Come here if attempt to match current element fails
5157 -- Stack_Base current stack base
5158 -- Stack_Ptr current stack pointer
5160 <<Fail>>
5161 Cursor := Stack (Stack_Ptr).Cursor;
5162 Node := Stack (Stack_Ptr).Node;
5163 Stack_Ptr := Stack_Ptr - 1;
5165 if Cursor >= 0 then
5166 Dout ("failure, cursor reset to " & Cursor);
5167 end if;
5169 goto Match;
5171 -- Come here if attempt to match current element succeeds
5173 -- Cursor current position in subject string
5174 -- Node pointer to node successfully matched
5175 -- Stack_Base current stack base
5176 -- Stack_Ptr current stack pointer
5178 <<Succeed>>
5179 Dout ("success, cursor = " & Cursor);
5180 Node := Node.Pthen;
5182 -- Come here to match the next pattern element
5184 -- Cursor current position in subject string
5185 -- Node pointer to node to be matched
5186 -- Stack_Base current stack base
5187 -- Stack_Ptr current stack pointer
5189 <<Match>>
5191 --------------------------------------------------
5192 -- Main Pattern Match Element Matching Routines --
5193 --------------------------------------------------
5195 -- Here is the case statement that processes the current node. The
5196 -- processing for each element does one of five things:
5198 -- goto Succeed to move to the successor
5199 -- goto Match_Succeed if the entire match succeeds
5200 -- goto Match_Fail if the entire match fails
5201 -- goto Fail to signal failure of current match
5203 -- Processing is NOT allowed to fall through
5205 case Node.Pcode is
5207 -- Cancel
5209 when PC_Cancel =>
5210 Dout (Img (Node) & "matching Cancel");
5211 goto Match_Fail;
5213 -- Alternation
5215 when PC_Alt =>
5216 Dout
5217 (Img (Node) & "setting up alternative " & Img (Node.Alt));
5218 Push (Node.Alt);
5219 Node := Node.Pthen;
5220 goto Match;
5222 -- Any (one character case)
5224 when PC_Any_CH =>
5225 Dout (Img (Node) & "matching Any", Node.Char);
5227 if Cursor < Length
5228 and then Subject (Cursor + 1) = Node.Char
5229 then
5230 Cursor := Cursor + 1;
5231 goto Succeed;
5232 else
5233 goto Fail;
5234 end if;
5236 -- Any (character set case)
5238 when PC_Any_CS =>
5239 Dout (Img (Node) & "matching Any", Node.CS);
5241 if Cursor < Length
5242 and then Is_In (Subject (Cursor + 1), Node.CS)
5243 then
5244 Cursor := Cursor + 1;
5245 goto Succeed;
5246 else
5247 goto Fail;
5248 end if;
5250 -- Any (string function case)
5252 when PC_Any_VF => declare
5253 U : constant VString := Node.VF.all;
5254 Str : constant String_Access := Get_String (U);
5256 begin
5257 Dout (Img (Node) & "matching Any", Str.all);
5259 if Cursor < Length
5260 and then Is_In (Subject (Cursor + 1), Str.all)
5261 then
5262 Cursor := Cursor + 1;
5263 goto Succeed;
5264 else
5265 goto Fail;
5266 end if;
5267 end;
5269 -- Any (string pointer case)
5271 when PC_Any_VP => declare
5272 Str : String_Access := Get_String (Node.VP.all);
5274 begin
5275 Dout (Img (Node) & "matching Any", Str.all);
5277 if Cursor < Length
5278 and then Is_In (Subject (Cursor + 1), Str.all)
5279 then
5280 Cursor := Cursor + 1;
5281 goto Succeed;
5282 else
5283 goto Fail;
5284 end if;
5285 end;
5287 -- Arb (initial match)
5289 when PC_Arb_X =>
5290 Dout (Img (Node) & "matching Arb");
5291 Push (Node.Alt);
5292 Node := Node.Pthen;
5293 goto Match;
5295 -- Arb (extension)
5297 when PC_Arb_Y =>
5298 Dout (Img (Node) & "extending Arb");
5300 if Cursor < Length then
5301 Cursor := Cursor + 1;
5302 Push (Node);
5303 goto Succeed;
5304 else
5305 goto Fail;
5306 end if;
5308 -- Arbno_S (simple Arbno initialize). This is the node that
5309 -- initiates the match of a simple Arbno structure.
5311 when PC_Arbno_S =>
5312 Dout (Img (Node) &
5313 "setting up Arbno alternative " & Img (Node.Alt));
5314 Push (Node.Alt);
5315 Node := Node.Pthen;
5316 goto Match;
5318 -- Arbno_X (Arbno initialize). This is the node that initiates
5319 -- the match of a complex Arbno structure.
5321 when PC_Arbno_X =>
5322 Dout (Img (Node) &
5323 "setting up Arbno alternative " & Img (Node.Alt));
5324 Push (Node.Alt);
5325 Node := Node.Pthen;
5326 goto Match;
5328 -- Arbno_Y (Arbno rematch). This is the node that is executed
5329 -- following successful matching of one instance of a complex
5330 -- Arbno pattern.
5332 when PC_Arbno_Y => declare
5333 Null_Match : Boolean := (Cursor = Stack (Stack_Base - 1).Cursor);
5335 begin
5336 Dout (Img (Node) & "extending Arbno");
5337 Pop_Region;
5339 -- If arbno extension matched null, then immediately fail
5341 if Null_Match then
5342 Dout ("Arbno extension matched null, so fails");
5343 goto Fail;
5344 end if;
5346 -- Here we must do a stack check to make sure enough stack
5347 -- is left. This check will happen once for each instance of
5348 -- the Arbno pattern that is matched. The Nat field of a
5349 -- PC_Arbno pattern contains the maximum stack entries needed
5350 -- for the Arbno with one instance and the successor pattern
5352 if Stack_Ptr + Node.Nat >= Stack'Last then
5353 raise Pattern_Stack_Overflow;
5354 end if;
5356 goto Succeed;
5357 end;
5359 -- Assign. If this node is executed, it means the assign-on-match
5360 -- or write-on-match operation will not happen after all, so we
5361 -- is propagate the failure, removing the PC_Assign node.
5363 when PC_Assign =>
5364 Dout (Img (Node) & "deferred assign/write cancelled");
5365 goto Fail;
5367 -- Assign immediate. This node performs the actual assignment.
5369 when PC_Assign_Imm =>
5370 Dout
5371 (Img (Node) & "executing immediate assignment of " &
5372 Image (Subject (Stack (Stack_Base - 1).Cursor + 1 .. Cursor)));
5373 Set_String
5374 (Node.VP.all,
5375 Subject (Stack (Stack_Base - 1).Cursor + 1 .. Cursor));
5376 Pop_Region;
5377 goto Succeed;
5379 -- Assign on match. This node sets up for the eventual assignment
5381 when PC_Assign_OnM =>
5382 Dout (Img (Node) & "registering deferred assignment");
5383 Stack (Stack_Base - 1).Node := Node;
5384 Push (CP_Assign'Access);
5385 Pop_Region;
5386 Assign_OnM := True;
5387 goto Succeed;
5389 -- Bal
5391 when PC_Bal =>
5392 Dout (Img (Node) & "matching or extending Bal");
5393 if Cursor >= Length or else Subject (Cursor + 1) = ')' then
5394 goto Fail;
5396 elsif Subject (Cursor + 1) = '(' then
5397 declare
5398 Paren_Count : Natural := 1;
5400 begin
5401 loop
5402 Cursor := Cursor + 1;
5404 if Cursor >= Length then
5405 goto Fail;
5407 elsif Subject (Cursor + 1) = '(' then
5408 Paren_Count := Paren_Count + 1;
5410 elsif Subject (Cursor + 1) = ')' then
5411 Paren_Count := Paren_Count - 1;
5412 exit when Paren_Count = 0;
5413 end if;
5414 end loop;
5415 end;
5416 end if;
5418 Cursor := Cursor + 1;
5419 Push (Node);
5420 goto Succeed;
5422 -- Break (one character case)
5424 when PC_Break_CH =>
5425 Dout (Img (Node) & "matching Break", Node.Char);
5427 while Cursor < Length loop
5428 if Subject (Cursor + 1) = Node.Char then
5429 goto Succeed;
5430 else
5431 Cursor := Cursor + 1;
5432 end if;
5433 end loop;
5435 goto Fail;
5437 -- Break (character set case)
5439 when PC_Break_CS =>
5440 Dout (Img (Node) & "matching Break", Node.CS);
5442 while Cursor < Length loop
5443 if Is_In (Subject (Cursor + 1), Node.CS) then
5444 goto Succeed;
5445 else
5446 Cursor := Cursor + 1;
5447 end if;
5448 end loop;
5450 goto Fail;
5452 -- Break (string function case)
5454 when PC_Break_VF => declare
5455 U : constant VString := Node.VF.all;
5456 Str : constant String_Access := Get_String (U);
5458 begin
5459 Dout (Img (Node) & "matching Break", Str.all);
5461 while Cursor < Length loop
5462 if Is_In (Subject (Cursor + 1), Str.all) then
5463 goto Succeed;
5464 else
5465 Cursor := Cursor + 1;
5466 end if;
5467 end loop;
5469 goto Fail;
5470 end;
5472 -- Break (string pointer case)
5474 when PC_Break_VP => declare
5475 Str : String_Access := Get_String (Node.VP.all);
5477 begin
5478 Dout (Img (Node) & "matching Break", Str.all);
5480 while Cursor < Length loop
5481 if Is_In (Subject (Cursor + 1), Str.all) then
5482 goto Succeed;
5483 else
5484 Cursor := Cursor + 1;
5485 end if;
5486 end loop;
5488 goto Fail;
5489 end;
5491 -- BreakX (one character case)
5493 when PC_BreakX_CH =>
5494 Dout (Img (Node) & "matching BreakX", Node.Char);
5496 while Cursor < Length loop
5497 if Subject (Cursor + 1) = Node.Char then
5498 goto Succeed;
5499 else
5500 Cursor := Cursor + 1;
5501 end if;
5502 end loop;
5504 goto Fail;
5506 -- BreakX (character set case)
5508 when PC_BreakX_CS =>
5509 Dout (Img (Node) & "matching BreakX", Node.CS);
5511 while Cursor < Length loop
5512 if Is_In (Subject (Cursor + 1), Node.CS) then
5513 goto Succeed;
5514 else
5515 Cursor := Cursor + 1;
5516 end if;
5517 end loop;
5519 goto Fail;
5521 -- BreakX (string function case)
5523 when PC_BreakX_VF => declare
5524 U : constant VString := Node.VF.all;
5525 Str : constant String_Access := Get_String (U);
5527 begin
5528 Dout (Img (Node) & "matching BreakX", Str.all);
5530 while Cursor < Length loop
5531 if Is_In (Subject (Cursor + 1), Str.all) then
5532 goto Succeed;
5533 else
5534 Cursor := Cursor + 1;
5535 end if;
5536 end loop;
5538 goto Fail;
5539 end;
5541 -- BreakX (string pointer case)
5543 when PC_BreakX_VP => declare
5544 Str : String_Access := Get_String (Node.VP.all);
5546 begin
5547 Dout (Img (Node) & "matching BreakX", Str.all);
5549 while Cursor < Length loop
5550 if Is_In (Subject (Cursor + 1), Str.all) then
5551 goto Succeed;
5552 else
5553 Cursor := Cursor + 1;
5554 end if;
5555 end loop;
5557 goto Fail;
5558 end;
5560 -- BreakX_X (BreakX extension). See section on "Compound Pattern
5561 -- Structures". This node is the alternative that is stacked
5562 -- to skip past the break character and extend the break.
5564 when PC_BreakX_X =>
5565 Dout (Img (Node) & "extending BreakX");
5567 Cursor := Cursor + 1;
5568 goto Succeed;
5570 -- Character (one character string)
5572 when PC_Char =>
5573 Dout (Img (Node) & "matching '" & Node.Char & ''');
5575 if Cursor < Length
5576 and then Subject (Cursor + 1) = Node.Char
5577 then
5578 Cursor := Cursor + 1;
5579 goto Succeed;
5580 else
5581 goto Fail;
5582 end if;
5584 -- End of Pattern
5586 when PC_EOP =>
5587 if Stack_Base = Stack_Init then
5588 Dout ("end of pattern");
5589 goto Match_Succeed;
5591 -- End of recursive inner match. See separate section on
5592 -- handing of recursive pattern matches for details.
5594 else
5595 Dout ("terminating recursive match");
5596 Node := Stack (Stack_Base - 1).Node;
5597 Pop_Region;
5598 goto Match;
5599 end if;
5601 -- Fail
5603 when PC_Fail =>
5604 Dout (Img (Node) & "matching Fail");
5605 goto Fail;
5607 -- Fence (built in pattern)
5609 when PC_Fence =>
5610 Dout (Img (Node) & "matching Fence");
5611 Push (CP_Cancel'Access);
5612 goto Succeed;
5614 -- Fence function node X. This is the node that gets control
5615 -- after a successful match of the fenced pattern.
5617 when PC_Fence_X =>
5618 Dout (Img (Node) & "matching Fence function");
5619 Stack_Ptr := Stack_Ptr + 1;
5620 Stack (Stack_Ptr).Cursor := Stack_Base;
5621 Stack (Stack_Ptr).Node := CP_Fence_Y'Access;
5622 Stack_Base := Stack (Stack_Base).Cursor;
5623 Region_Level := Region_Level - 1;
5624 goto Succeed;
5626 -- Fence function node Y. This is the node that gets control on
5627 -- a failure that occurs after the fenced pattern has matched.
5629 -- Note: the Cursor at this stage is actually the inner stack
5630 -- base value. We don't reset this, but we do use it to strip
5631 -- off all the entries made by the fenced pattern.
5633 when PC_Fence_Y =>
5634 Dout (Img (Node) & "pattern matched by Fence caused failure");
5635 Stack_Ptr := Cursor - 2;
5636 goto Fail;
5638 -- Len (integer case)
5640 when PC_Len_Nat =>
5641 Dout (Img (Node) & "matching Len", Node.Nat);
5643 if Cursor + Node.Nat > Length then
5644 goto Fail;
5645 else
5646 Cursor := Cursor + Node.Nat;
5647 goto Succeed;
5648 end if;
5650 -- Len (Integer function case)
5652 when PC_Len_NF => declare
5653 N : constant Natural := Node.NF.all;
5655 begin
5656 Dout (Img (Node) & "matching Len", N);
5658 if Cursor + N > Length then
5659 goto Fail;
5660 else
5661 Cursor := Cursor + N;
5662 goto Succeed;
5663 end if;
5664 end;
5666 -- Len (integer pointer case)
5668 when PC_Len_NP =>
5669 Dout (Img (Node) & "matching Len", Node.NP.all);
5671 if Cursor + Node.NP.all > Length then
5672 goto Fail;
5673 else
5674 Cursor := Cursor + Node.NP.all;
5675 goto Succeed;
5676 end if;
5678 -- NotAny (one character case)
5680 when PC_NotAny_CH =>
5681 Dout (Img (Node) & "matching NotAny", Node.Char);
5683 if Cursor < Length
5684 and then Subject (Cursor + 1) /= Node.Char
5685 then
5686 Cursor := Cursor + 1;
5687 goto Succeed;
5688 else
5689 goto Fail;
5690 end if;
5692 -- NotAny (character set case)
5694 when PC_NotAny_CS =>
5695 Dout (Img (Node) & "matching NotAny", Node.CS);
5697 if Cursor < Length
5698 and then not Is_In (Subject (Cursor + 1), Node.CS)
5699 then
5700 Cursor := Cursor + 1;
5701 goto Succeed;
5702 else
5703 goto Fail;
5704 end if;
5706 -- NotAny (string function case)
5708 when PC_NotAny_VF => declare
5709 U : constant VString := Node.VF.all;
5710 Str : constant String_Access := Get_String (U);
5712 begin
5713 Dout (Img (Node) & "matching NotAny", Str.all);
5715 if Cursor < Length
5716 and then
5717 not Is_In (Subject (Cursor + 1), Str.all)
5718 then
5719 Cursor := Cursor + 1;
5720 goto Succeed;
5721 else
5722 goto Fail;
5723 end if;
5724 end;
5726 -- NotAny (string pointer case)
5728 when PC_NotAny_VP => declare
5729 Str : String_Access := Get_String (Node.VP.all);
5731 begin
5732 Dout (Img (Node) & "matching NotAny", Str.all);
5734 if Cursor < Length
5735 and then
5736 not Is_In (Subject (Cursor + 1), Str.all)
5737 then
5738 Cursor := Cursor + 1;
5739 goto Succeed;
5740 else
5741 goto Fail;
5742 end if;
5743 end;
5745 -- NSpan (one character case)
5747 when PC_NSpan_CH =>
5748 Dout (Img (Node) & "matching NSpan", Node.Char);
5750 while Cursor < Length
5751 and then Subject (Cursor + 1) = Node.Char
5752 loop
5753 Cursor := Cursor + 1;
5754 end loop;
5756 goto Succeed;
5758 -- NSpan (character set case)
5760 when PC_NSpan_CS =>
5761 Dout (Img (Node) & "matching NSpan", Node.CS);
5763 while Cursor < Length
5764 and then Is_In (Subject (Cursor + 1), Node.CS)
5765 loop
5766 Cursor := Cursor + 1;
5767 end loop;
5769 goto Succeed;
5771 -- NSpan (string function case)
5773 when PC_NSpan_VF => declare
5774 U : constant VString := Node.VF.all;
5775 Str : constant String_Access := Get_String (U);
5777 begin
5778 Dout (Img (Node) & "matching NSpan", Str.all);
5780 while Cursor < Length
5781 and then Is_In (Subject (Cursor + 1), Str.all)
5782 loop
5783 Cursor := Cursor + 1;
5784 end loop;
5786 goto Succeed;
5787 end;
5789 -- NSpan (string pointer case)
5791 when PC_NSpan_VP => declare
5792 Str : String_Access := Get_String (Node.VP.all);
5794 begin
5795 Dout (Img (Node) & "matching NSpan", Str.all);
5797 while Cursor < Length
5798 and then Is_In (Subject (Cursor + 1), Str.all)
5799 loop
5800 Cursor := Cursor + 1;
5801 end loop;
5803 goto Succeed;
5804 end;
5806 when PC_Null =>
5807 Dout (Img (Node) & "matching null");
5808 goto Succeed;
5810 -- Pos (integer case)
5812 when PC_Pos_Nat =>
5813 Dout (Img (Node) & "matching Pos", Node.Nat);
5815 if Cursor = Node.Nat then
5816 goto Succeed;
5817 else
5818 goto Fail;
5819 end if;
5821 -- Pos (Integer function case)
5823 when PC_Pos_NF => declare
5824 N : constant Natural := Node.NF.all;
5826 begin
5827 Dout (Img (Node) & "matching Pos", N);
5829 if Cursor = N then
5830 goto Succeed;
5831 else
5832 goto Fail;
5833 end if;
5834 end;
5836 -- Pos (integer pointer case)
5838 when PC_Pos_NP =>
5839 Dout (Img (Node) & "matching Pos", Node.NP.all);
5841 if Cursor = Node.NP.all then
5842 goto Succeed;
5843 else
5844 goto Fail;
5845 end if;
5847 -- Predicate function
5849 when PC_Pred_Func =>
5850 Dout (Img (Node) & "matching predicate function");
5852 if Node.BF.all then
5853 goto Succeed;
5854 else
5855 goto Fail;
5856 end if;
5858 -- Region Enter. Initiate new pattern history stack region
5860 when PC_R_Enter =>
5861 Dout (Img (Node) & "starting match of nested pattern");
5862 Stack (Stack_Ptr + 1).Cursor := Cursor;
5863 Push_Region;
5864 goto Succeed;
5866 -- Region Remove node. This is the node stacked by an R_Enter.
5867 -- It removes the special format stack entry right underneath, and
5868 -- then restores the outer level stack base and signals failure.
5870 -- Note: the cursor value at this stage is actually the (negative)
5871 -- stack base value for the outer level.
5873 when PC_R_Remove =>
5874 Dout ("failure, match of nested pattern terminated");
5875 Stack_Base := Cursor;
5876 Region_Level := Region_Level - 1;
5877 Stack_Ptr := Stack_Ptr - 1;
5878 goto Fail;
5880 -- Region restore node. This is the node stacked at the end of an
5881 -- inner level match. Its function is to restore the inner level
5882 -- region, so that alternatives in this region can be sought.
5884 -- Note: the Cursor at this stage is actually the negative of the
5885 -- inner stack base value, which we use to restore the inner region.
5887 when PC_R_Restore =>
5888 Dout ("failure, search for alternatives in nested pattern");
5889 Region_Level := Region_Level + 1;
5890 Stack_Base := Cursor;
5891 goto Fail;
5893 -- Rest
5895 when PC_Rest =>
5896 Dout (Img (Node) & "matching Rest");
5897 Cursor := Length;
5898 goto Succeed;
5900 -- Initiate recursive match (pattern pointer case)
5902 when PC_Rpat =>
5903 Stack (Stack_Ptr + 1).Node := Node.Pthen;
5904 Push_Region;
5905 Dout (Img (Node) & "initiating recursive match");
5907 if Stack_Ptr + Node.PP.all.Stk >= Stack_Size then
5908 raise Pattern_Stack_Overflow;
5909 else
5910 Node := Node.PP.all.P;
5911 goto Match;
5912 end if;
5914 -- RPos (integer case)
5916 when PC_RPos_Nat =>
5917 Dout (Img (Node) & "matching RPos", Node.Nat);
5919 if Cursor = (Length - Node.Nat) then
5920 goto Succeed;
5921 else
5922 goto Fail;
5923 end if;
5925 -- RPos (integer function case)
5927 when PC_RPos_NF => declare
5928 N : constant Natural := Node.NF.all;
5930 begin
5931 Dout (Img (Node) & "matching RPos", N);
5933 if Length - Cursor = N then
5934 goto Succeed;
5935 else
5936 goto Fail;
5937 end if;
5938 end;
5940 -- RPos (integer pointer case)
5942 when PC_RPos_NP =>
5943 Dout (Img (Node) & "matching RPos", Node.NP.all);
5945 if Cursor = (Length - Node.NP.all) then
5946 goto Succeed;
5947 else
5948 goto Fail;
5949 end if;
5951 -- RTab (integer case)
5953 when PC_RTab_Nat =>
5954 Dout (Img (Node) & "matching RTab", Node.Nat);
5956 if Cursor <= (Length - Node.Nat) then
5957 Cursor := Length - Node.Nat;
5958 goto Succeed;
5959 else
5960 goto Fail;
5961 end if;
5963 -- RTab (integer function case)
5965 when PC_RTab_NF => declare
5966 N : constant Natural := Node.NF.all;
5968 begin
5969 Dout (Img (Node) & "matching RPos", N);
5971 if Length - Cursor >= N then
5972 Cursor := Length - N;
5973 goto Succeed;
5974 else
5975 goto Fail;
5976 end if;
5977 end;
5979 -- RTab (integer pointer case)
5981 when PC_RTab_NP =>
5982 Dout (Img (Node) & "matching RPos", Node.NP.all);
5984 if Cursor <= (Length - Node.NP.all) then
5985 Cursor := Length - Node.NP.all;
5986 goto Succeed;
5987 else
5988 goto Fail;
5989 end if;
5991 -- Cursor assignment
5993 when PC_Setcur =>
5994 Dout (Img (Node) & "matching Setcur");
5995 Node.Var.all := Cursor;
5996 goto Succeed;
5998 -- Span (one character case)
6000 when PC_Span_CH => declare
6001 P : Natural := Cursor;
6003 begin
6004 Dout (Img (Node) & "matching Span", Node.Char);
6006 while P < Length
6007 and then Subject (P + 1) = Node.Char
6008 loop
6009 P := P + 1;
6010 end loop;
6012 if P /= Cursor then
6013 Cursor := P;
6014 goto Succeed;
6015 else
6016 goto Fail;
6017 end if;
6018 end;
6020 -- Span (character set case)
6022 when PC_Span_CS => declare
6023 P : Natural := Cursor;
6025 begin
6026 Dout (Img (Node) & "matching Span", Node.CS);
6028 while P < Length
6029 and then Is_In (Subject (P + 1), Node.CS)
6030 loop
6031 P := P + 1;
6032 end loop;
6034 if P /= Cursor then
6035 Cursor := P;
6036 goto Succeed;
6037 else
6038 goto Fail;
6039 end if;
6040 end;
6042 -- Span (string function case)
6044 when PC_Span_VF => declare
6045 U : constant VString := Node.VF.all;
6046 Str : constant String_Access := Get_String (U);
6047 P : Natural := Cursor;
6049 begin
6050 Dout (Img (Node) & "matching Span", Str.all);
6052 while P < Length
6053 and then Is_In (Subject (P + 1), Str.all)
6054 loop
6055 P := P + 1;
6056 end loop;
6058 if P /= Cursor then
6059 Cursor := P;
6060 goto Succeed;
6061 else
6062 goto Fail;
6063 end if;
6064 end;
6066 -- Span (string pointer case)
6068 when PC_Span_VP => declare
6069 Str : String_Access := Get_String (Node.VP.all);
6070 P : Natural := Cursor;
6072 begin
6073 Dout (Img (Node) & "matching Span", Str.all);
6075 while P < Length
6076 and then Is_In (Subject (P + 1), Str.all)
6077 loop
6078 P := P + 1;
6079 end loop;
6081 if P /= Cursor then
6082 Cursor := P;
6083 goto Succeed;
6084 else
6085 goto Fail;
6086 end if;
6087 end;
6089 -- String (two character case)
6091 when PC_String_2 =>
6092 Dout (Img (Node) & "matching " & Image (Node.Str2));
6094 if (Length - Cursor) >= 2
6095 and then Subject (Cursor + 1 .. Cursor + 2) = Node.Str2
6096 then
6097 Cursor := Cursor + 2;
6098 goto Succeed;
6099 else
6100 goto Fail;
6101 end if;
6103 -- String (three character case)
6105 when PC_String_3 =>
6106 Dout (Img (Node) & "matching " & Image (Node.Str3));
6108 if (Length - Cursor) >= 3
6109 and then Subject (Cursor + 1 .. Cursor + 3) = Node.Str3
6110 then
6111 Cursor := Cursor + 3;
6112 goto Succeed;
6113 else
6114 goto Fail;
6115 end if;
6117 -- String (four character case)
6119 when PC_String_4 =>
6120 Dout (Img (Node) & "matching " & Image (Node.Str4));
6122 if (Length - Cursor) >= 4
6123 and then Subject (Cursor + 1 .. Cursor + 4) = Node.Str4
6124 then
6125 Cursor := Cursor + 4;
6126 goto Succeed;
6127 else
6128 goto Fail;
6129 end if;
6131 -- String (five character case)
6133 when PC_String_5 =>
6134 Dout (Img (Node) & "matching " & Image (Node.Str5));
6136 if (Length - Cursor) >= 5
6137 and then Subject (Cursor + 1 .. Cursor + 5) = Node.Str5
6138 then
6139 Cursor := Cursor + 5;
6140 goto Succeed;
6141 else
6142 goto Fail;
6143 end if;
6145 -- String (six character case)
6147 when PC_String_6 =>
6148 Dout (Img (Node) & "matching " & Image (Node.Str6));
6150 if (Length - Cursor) >= 6
6151 and then Subject (Cursor + 1 .. Cursor + 6) = Node.Str6
6152 then
6153 Cursor := Cursor + 6;
6154 goto Succeed;
6155 else
6156 goto Fail;
6157 end if;
6159 -- String (case of more than six characters)
6161 when PC_String => declare
6162 Len : constant Natural := Node.Str'Length;
6164 begin
6165 Dout (Img (Node) & "matching " & Image (Node.Str.all));
6167 if (Length - Cursor) >= Len
6168 and then Node.Str.all = Subject (Cursor + 1 .. Cursor + Len)
6169 then
6170 Cursor := Cursor + Len;
6171 goto Succeed;
6172 else
6173 goto Fail;
6174 end if;
6175 end;
6177 -- String (function case)
6179 when PC_String_VF => declare
6180 U : constant VString := Node.VF.all;
6181 Str : constant String_Access := Get_String (U);
6182 Len : constant Natural := Str'Length;
6184 begin
6185 Dout (Img (Node) & "matching " & Image (Str.all));
6187 if (Length - Cursor) >= Len
6188 and then Str.all = Subject (Cursor + 1 .. Cursor + Len)
6189 then
6190 Cursor := Cursor + Len;
6191 goto Succeed;
6192 else
6193 goto Fail;
6194 end if;
6195 end;
6197 -- String (vstring pointer case)
6199 when PC_String_VP => declare
6200 S : String_Access := Get_String (Node.VP.all);
6201 Len : constant Natural :=
6202 Ada.Strings.Unbounded.Length (Node.VP.all);
6204 begin
6205 Dout
6206 (Img (Node) & "matching " & Image (S.all));
6208 if (Length - Cursor) >= Len
6209 and then S.all = Subject (Cursor + 1 .. Cursor + Len)
6210 then
6211 Cursor := Cursor + Len;
6212 goto Succeed;
6213 else
6214 goto Fail;
6215 end if;
6216 end;
6218 -- Succeed
6220 when PC_Succeed =>
6221 Dout (Img (Node) & "matching Succeed");
6222 Push (Node);
6223 goto Succeed;
6225 -- Tab (integer case)
6227 when PC_Tab_Nat =>
6228 Dout (Img (Node) & "matching Tab", Node.Nat);
6230 if Cursor <= Node.Nat then
6231 Cursor := Node.Nat;
6232 goto Succeed;
6233 else
6234 goto Fail;
6235 end if;
6237 -- Tab (integer function case)
6239 when PC_Tab_NF => declare
6240 N : constant Natural := Node.NF.all;
6242 begin
6243 Dout (Img (Node) & "matching Tab ", N);
6245 if Cursor <= N then
6246 Cursor := N;
6247 goto Succeed;
6248 else
6249 goto Fail;
6250 end if;
6251 end;
6253 -- Tab (integer pointer case)
6255 when PC_Tab_NP =>
6256 Dout (Img (Node) & "matching Tab ", Node.NP.all);
6258 if Cursor <= Node.NP.all then
6259 Cursor := Node.NP.all;
6260 goto Succeed;
6261 else
6262 goto Fail;
6263 end if;
6265 -- Unanchored movement
6267 when PC_Unanchored =>
6268 Dout ("attempting to move anchor point");
6270 -- All done if we tried every position
6272 if Cursor > Length then
6273 goto Match_Fail;
6275 -- Otherwise extend the anchor point, and restack ourself
6277 else
6278 Cursor := Cursor + 1;
6279 Push (Node);
6280 goto Succeed;
6281 end if;
6283 -- Write immediate. This node performs the actual write
6285 when PC_Write_Imm =>
6286 Dout (Img (Node) & "executing immediate write of " &
6287 Subject (Stack (Stack_Base - 1).Cursor + 1 .. Cursor));
6289 Put_Line
6290 (Node.FP.all,
6291 Subject (Stack (Stack_Base - 1).Cursor + 1 .. Cursor));
6292 Pop_Region;
6293 goto Succeed;
6295 -- Write on match. This node sets up for the eventual write
6297 when PC_Write_OnM =>
6298 Dout (Img (Node) & "registering deferred write");
6299 Stack (Stack_Base - 1).Node := Node;
6300 Push (CP_Assign'Access);
6301 Pop_Region;
6302 Assign_OnM := True;
6303 goto Succeed;
6305 end case;
6307 -- We are NOT allowed to fall though this case statement, since every
6308 -- match routine must end by executing a goto to the appropriate point
6309 -- in the finite state machine model.
6311 Logic_Error;
6313 end XMatchD;
6315 end GNAT.Spitbol.Patterns;