PR c++/3637
[official-gcc.git] / gcc / ada / g-spipat.adb
blobfbacdb600c92b0c38f369b867314560c415476fe
1 ------------------------------------------------------------------------------
2 -- --
3 -- GNAT LIBRARY COMPONENTS --
4 -- --
5 -- G N A T . S P I T B O L . P A T T E R N S --
6 -- --
7 -- B o d y --
8 -- --
9 -- $Revision: 1.21 $
10 -- --
11 -- Copyright (C) 1998-2001, Ada Core Technologies, Inc. --
12 -- --
13 -- GNAT is free software; you can redistribute it and/or modify it under --
14 -- terms of the GNU General Public License as published by the Free Soft- --
15 -- ware Foundation; either version 2, or (at your option) any later ver- --
16 -- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
17 -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
18 -- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License --
19 -- for more details. You should have received a copy of the GNU General --
20 -- Public License distributed with GNAT; see file COPYING. If not, write --
21 -- to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, --
22 -- MA 02111-1307, USA. --
23 -- --
24 -- As a special exception, if other files instantiate generics from this --
25 -- unit, or you link this unit with other files to produce an executable, --
26 -- this unit does not by itself cause the resulting executable to be --
27 -- covered by the GNU General Public License. This exception does not --
28 -- however invalidate any other reasons why the executable file might be --
29 -- covered by the GNU Public License. --
30 -- --
31 -- GNAT is maintained by Ada Core Technologies Inc (http://www.gnat.com). --
32 -- --
33 ------------------------------------------------------------------------------
35 -- Note: the data structures and general approach used in this implementation
36 -- are derived from the original MINIMAL sources for SPITBOL. The code is not
37 -- a direct translation, but the approach is followed closely. In particular,
38 -- we use the one stack approach developed in the SPITBOL implementation.
40 with Ada.Exceptions; use Ada.Exceptions;
41 with Ada.Strings.Maps; use Ada.Strings.Maps;
42 with Ada.Strings.Unbounded.Aux; use Ada.Strings.Unbounded.Aux;
44 with GNAT.Debug_Utilities; use GNAT.Debug_Utilities;
46 with System; use System;
48 with Unchecked_Conversion;
49 with Unchecked_Deallocation;
51 package body GNAT.Spitbol.Patterns is
53 ------------------------
54 -- Internal Debugging --
55 ------------------------
57 Internal_Debug : constant Boolean := False;
58 -- Set this flag to True to activate some built-in debugging traceback
59 -- These are all lines output with PutD and Put_LineD.
61 procedure New_LineD;
62 pragma Inline (New_LineD);
63 -- Output new blank line with New_Line if Internal_Debug is True
65 procedure PutD (Str : String);
66 pragma Inline (PutD);
67 -- Output string with Put if Internal_Debug is True
69 procedure Put_LineD (Str : String);
70 pragma Inline (Put_LineD);
71 -- Output string with Put_Line if Internal_Debug is True
73 -----------------------------
74 -- Local Type Declarations --
75 -----------------------------
77 subtype String_Ptr is Ada.Strings.Unbounded.String_Access;
78 subtype File_Ptr is Ada.Text_IO.File_Access;
80 function To_PE_Ptr is new Unchecked_Conversion (Address, PE_Ptr);
81 function To_Address is new Unchecked_Conversion (PE_Ptr, Address);
82 -- Used only for debugging output purposes
84 subtype AFC is Ada.Finalization.Controlled;
86 N : constant PE_Ptr := null;
87 -- Shorthand used to initialize Copy fields to null
89 type Character_Ptr is access all Character;
90 type Natural_Ptr is access all Natural;
91 type Pattern_Ptr is access all Pattern;
93 --------------------------------------------------
94 -- Description of Algorithm and Data Structures --
95 --------------------------------------------------
97 -- A pattern structure is represented as a linked graph of nodes
98 -- with the following structure:
100 -- +------------------------------------+
101 -- I Pcode I
102 -- +------------------------------------+
103 -- I Index I
104 -- +------------------------------------+
105 -- I Pthen I
106 -- +------------------------------------+
107 -- I parameter(s) I
108 -- +------------------------------------+
110 -- Pcode is a code value indicating the type of the patterm node. This
111 -- code is used both as the discriminant value for the record, and as
112 -- the case index in the main match routine that branches to the proper
113 -- match code for the given element.
115 -- Index is a serial index number. The use of these serial index
116 -- numbers is described in a separate section.
118 -- Pthen is a pointer to the successor node, i.e the node to be matched
119 -- if the attempt to match the node succeeds. If this is the last node
120 -- of the pattern to be matched, then Pthen points to a dummy node
121 -- of kind PC_EOP (end of pattern), which initiales pattern exit.
123 -- The parameter or parameters are present for certain node types,
124 -- and the type varies with the pattern code.
126 type Pattern_Code is (
127 PC_Arb_Y,
128 PC_Assign,
129 PC_Bal,
130 PC_BreakX_X,
131 PC_Cancel,
132 PC_EOP,
133 PC_Fail,
134 PC_Fence,
135 PC_Fence_X,
136 PC_Fence_Y,
137 PC_R_Enter,
138 PC_R_Remove,
139 PC_R_Restore,
140 PC_Rest,
141 PC_Succeed,
142 PC_Unanchored,
144 PC_Alt,
145 PC_Arb_X,
146 PC_Arbno_S,
147 PC_Arbno_X,
149 PC_Rpat,
151 PC_Pred_Func,
153 PC_Assign_Imm,
154 PC_Assign_OnM,
155 PC_Any_VP,
156 PC_Break_VP,
157 PC_BreakX_VP,
158 PC_NotAny_VP,
159 PC_NSpan_VP,
160 PC_Span_VP,
161 PC_String_VP,
163 PC_Write_Imm,
164 PC_Write_OnM,
166 PC_Null,
167 PC_String,
169 PC_String_2,
170 PC_String_3,
171 PC_String_4,
172 PC_String_5,
173 PC_String_6,
175 PC_Setcur,
177 PC_Any_CH,
178 PC_Break_CH,
179 PC_BreakX_CH,
180 PC_Char,
181 PC_NotAny_CH,
182 PC_NSpan_CH,
183 PC_Span_CH,
185 PC_Any_CS,
186 PC_Break_CS,
187 PC_BreakX_CS,
188 PC_NotAny_CS,
189 PC_NSpan_CS,
190 PC_Span_CS,
192 PC_Arbno_Y,
193 PC_Len_Nat,
194 PC_Pos_Nat,
195 PC_RPos_Nat,
196 PC_RTab_Nat,
197 PC_Tab_Nat,
199 PC_Pos_NF,
200 PC_Len_NF,
201 PC_RPos_NF,
202 PC_RTab_NF,
203 PC_Tab_NF,
205 PC_Pos_NP,
206 PC_Len_NP,
207 PC_RPos_NP,
208 PC_RTab_NP,
209 PC_Tab_NP,
211 PC_Any_VF,
212 PC_Break_VF,
213 PC_BreakX_VF,
214 PC_NotAny_VF,
215 PC_NSpan_VF,
216 PC_Span_VF,
217 PC_String_VF);
219 type IndexT is range 0 .. +(2 **15 - 1);
221 type PE (Pcode : Pattern_Code) is record
223 Index : IndexT;
224 -- Serial index number of pattern element within pattern.
226 Pthen : PE_Ptr;
227 -- Successor element, to be matched after this one
229 case Pcode is
231 when PC_Arb_Y |
232 PC_Assign |
233 PC_Bal |
234 PC_BreakX_X |
235 PC_Cancel |
236 PC_EOP |
237 PC_Fail |
238 PC_Fence |
239 PC_Fence_X |
240 PC_Fence_Y |
241 PC_Null |
242 PC_R_Enter |
243 PC_R_Remove |
244 PC_R_Restore |
245 PC_Rest |
246 PC_Succeed |
247 PC_Unanchored => null;
249 when PC_Alt |
250 PC_Arb_X |
251 PC_Arbno_S |
252 PC_Arbno_X => Alt : PE_Ptr;
254 when PC_Rpat => PP : Pattern_Ptr;
256 when PC_Pred_Func => BF : Boolean_Func;
258 when PC_Assign_Imm |
259 PC_Assign_OnM |
260 PC_Any_VP |
261 PC_Break_VP |
262 PC_BreakX_VP |
263 PC_NotAny_VP |
264 PC_NSpan_VP |
265 PC_Span_VP |
266 PC_String_VP => VP : VString_Ptr;
268 when PC_Write_Imm |
269 PC_Write_OnM => FP : File_Ptr;
271 when PC_String => Str : String_Ptr;
273 when PC_String_2 => Str2 : String (1 .. 2);
275 when PC_String_3 => Str3 : String (1 .. 3);
277 when PC_String_4 => Str4 : String (1 .. 4);
279 when PC_String_5 => Str5 : String (1 .. 5);
281 when PC_String_6 => Str6 : String (1 .. 6);
283 when PC_Setcur => Var : Natural_Ptr;
285 when PC_Any_CH |
286 PC_Break_CH |
287 PC_BreakX_CH |
288 PC_Char |
289 PC_NotAny_CH |
290 PC_NSpan_CH |
291 PC_Span_CH => Char : Character;
293 when PC_Any_CS |
294 PC_Break_CS |
295 PC_BreakX_CS |
296 PC_NotAny_CS |
297 PC_NSpan_CS |
298 PC_Span_CS => CS : Character_Set;
300 when PC_Arbno_Y |
301 PC_Len_Nat |
302 PC_Pos_Nat |
303 PC_RPos_Nat |
304 PC_RTab_Nat |
305 PC_Tab_Nat => Nat : Natural;
307 when PC_Pos_NF |
308 PC_Len_NF |
309 PC_RPos_NF |
310 PC_RTab_NF |
311 PC_Tab_NF => NF : Natural_Func;
313 when PC_Pos_NP |
314 PC_Len_NP |
315 PC_RPos_NP |
316 PC_RTab_NP |
317 PC_Tab_NP => NP : Natural_Ptr;
319 when PC_Any_VF |
320 PC_Break_VF |
321 PC_BreakX_VF |
322 PC_NotAny_VF |
323 PC_NSpan_VF |
324 PC_Span_VF |
325 PC_String_VF => VF : VString_Func;
327 end case;
328 end record;
330 subtype PC_Has_Alt is Pattern_Code range PC_Alt .. PC_Arbno_X;
331 -- Range of pattern codes that has an Alt field. This is used in the
332 -- recursive traversals, since these links must be followed.
334 EOP_Element : aliased constant PE := (PC_EOP, 0, N);
335 -- This is the end of pattern element, and is thus the representation of
336 -- a null pattern. It has a zero index element since it is never placed
337 -- inside a pattern. Furthermore it does not need a successor, since it
338 -- marks the end of the pattern, so that no more successors are needed.
340 EOP : constant PE_Ptr := EOP_Element'Unrestricted_Access;
341 -- This is the end of pattern pointer, that is used in the Pthen pointer
342 -- of other nodes to signal end of pattern.
344 -- The following array is used to determine if a pattern used as an
345 -- argument for Arbno is eligible for treatment using the simple Arbno
346 -- structure (i.e. it is a pattern that is guaranteed to match at least
347 -- one character on success, and not to make any entries on the stack.
349 OK_For_Simple_Arbno :
350 array (Pattern_Code) of Boolean := (
351 PC_Any_CS |
352 PC_Any_CH |
353 PC_Any_VF |
354 PC_Any_VP |
355 PC_Char |
356 PC_Len_Nat |
357 PC_NotAny_CS |
358 PC_NotAny_CH |
359 PC_NotAny_VF |
360 PC_NotAny_VP |
361 PC_Span_CS |
362 PC_Span_CH |
363 PC_Span_VF |
364 PC_Span_VP |
365 PC_String |
366 PC_String_2 |
367 PC_String_3 |
368 PC_String_4 |
369 PC_String_5 |
370 PC_String_6 => True,
372 others => False);
374 -------------------------------
375 -- The Pattern History Stack --
376 -------------------------------
378 -- The pattern history stack is used for controlling backtracking when
379 -- a match fails. The idea is to stack entries that give a cursor value
380 -- to be restored, and a node to be reestablished as the current node to
381 -- attempt an appropriate rematch operation. The processing for a pattern
382 -- element that has rematch alternatives pushes an appropriate entry or
383 -- entry on to the stack, and the proceeds. If a match fails at any point,
384 -- the top element of the stack is popped off, resetting the cursor and
385 -- the match continues by accessing the node stored with this entry.
387 type Stack_Entry is record
389 Cursor : Integer;
390 -- Saved cursor value that is restored when this entry is popped
391 -- from the stack if a match attempt fails. Occasionally, this
392 -- field is used to store a history stack pointer instead of a
393 -- cursor. Such cases are noted in the documentation and the value
394 -- stored is negative since stack pointer values are always negative.
396 Node : PE_Ptr;
397 -- This pattern element reference is reestablished as the current
398 -- Node to be matched (which will attempt an appropriate rematch).
400 end record;
402 subtype Stack_Range is Integer range -Stack_Size .. -1;
404 type Stack_Type is array (Stack_Range) of Stack_Entry;
405 -- The type used for a history stack. The actual instance of the stack
406 -- is declared as a local variable in the Match routine, to properly
407 -- handle recursive calls to Match. All stack pointer values are negative
408 -- to distinguish them from normal cursor values.
410 -- Note: the pattern matching stack is used only to handle backtracking.
411 -- If no backtracking occurs, its entries are never accessed, and never
412 -- popped off, and in particular it is normal for a successful match
413 -- to terminate with entries on the stack that are simply discarded.
415 -- Note: in subsequent diagrams of the stack, we always place element
416 -- zero (the deepest element) at the top of the page, then build the
417 -- stack down on the page with the most recent (top of stack) element
418 -- being the bottom-most entry on the page.
420 -- Stack checking is handled by labeling every pattern with the maximum
421 -- number of stack entries that are required, so a single check at the
422 -- start of matching the pattern suffices. There are two exceptions.
424 -- First, the count does not include entries for recursive pattern
425 -- references. Such recursions must therefore perform a specific
426 -- stack check with respect to the number of stack entries required
427 -- by the recursive pattern that is accessed and the amount of stack
428 -- that remains unused.
430 -- Second, the count includes only one iteration of an Arbno pattern,
431 -- so a specific check must be made on subsequent iterations that there
432 -- is still enough stack space left. The Arbno node has a field that
433 -- records the number of stack entries required by its argument for
434 -- this purpose.
436 ---------------------------------------------------
437 -- Use of Serial Index Field in Pattern Elements --
438 ---------------------------------------------------
440 -- The serial index numbers for the pattern elements are assigned as
441 -- a pattern is consructed from its constituent elements. Note that there
442 -- is never any sharing of pattern elements between patterns (copies are
443 -- always made), so the serial index numbers are unique to a particular
444 -- pattern as referenced from the P field of a value of type Pattern.
446 -- The index numbers meet three separate invariants, which are used for
447 -- various purposes as described in this section.
449 -- First, the numbers uniquely identify the pattern elements within a
450 -- pattern. If Num is the number of elements in a given pattern, then
451 -- the serial index numbers for the elements of this pattern will range
452 -- from 1 .. Num, so that each element has a separate value.
454 -- The purpose of this assignment is to provide a convenient auxiliary
455 -- data structure mechanism during operations which must traverse a
456 -- pattern (e.g. copy and finalization processing). Once constructed
457 -- patterns are strictly read only. This is necessary to allow sharing
458 -- of patterns between tasks. This means that we cannot go marking the
459 -- pattern (e.g. with a visited bit). Instead we cosntuct a separate
460 -- vector that contains the necessary information indexed by the Index
461 -- values in the pattern elements. For this purpose the only requirement
462 -- is that they be uniquely assigned.
464 -- Second, the pattern element referenced directly, i.e. the leading
465 -- pattern element, is always the maximum numbered element and therefore
466 -- indicates the total number of elements in the pattern. More precisely,
467 -- the element referenced by the P field of a pattern value, or the
468 -- element returned by any of the internal pattern construction routines
469 -- in the body (that return a value of type PE_Ptr) always is this
470 -- maximum element,
472 -- The purpose of this requirement is to allow an immediate determination
473 -- of the number of pattern elements within a pattern. This is used to
474 -- properly size the vectors used to contain auxiliary information for
475 -- traversal as described above.
477 -- Third, as compound pattern structures are constructed, the way in which
478 -- constituent parts of the pattern are constructed is stylized. This is
479 -- an automatic consequence of the way that these compounjd structures
480 -- are constructed, and basically what we are doing is simply documenting
481 -- and specifying the natural result of the pattern construction. The
482 -- section describing compound pattern structures gives details of the
483 -- numbering of each compound pattern structure.
485 -- The purpose of specifying the stylized numbering structures for the
486 -- compound patterns is to help simplify the processing in the Image
487 -- function, since it eases the task of retrieving the original recursive
488 -- structure of the pattern from the flat graph structure of elements.
489 -- This use in the Image function is the only point at which the code
490 -- makes use of the stylized structures.
492 type Ref_Array is array (IndexT range <>) of PE_Ptr;
493 -- This type is used to build an array whose N'th entry references the
494 -- element in a pattern whose Index value is N. See Build_Ref_Array.
496 procedure Build_Ref_Array (E : PE_Ptr; RA : out Ref_Array);
497 -- Given a pattern element which is the leading element of a pattern
498 -- structure, and a Ref_Array with bounds 1 .. E.Index, fills in the
499 -- Ref_Array so that its N'th entry references the element of the
500 -- referenced pattern whose Index value is N.
502 -------------------------------
503 -- Recursive Pattern Matches --
504 -------------------------------
506 -- The pattern primitive (+P) where P is a Pattern_Ptr or Pattern_Func
507 -- causes a recursive pattern match. This cannot be handled by an actual
508 -- recursive call to the outer level Match routine, since this would not
509 -- allow for possible backtracking into the region matched by the inner
510 -- pattern. Indeed this is the classical clash between recursion and
511 -- backtracking, and a simple recursive stack structure does not suffice.
513 -- This section describes how this recursion and the possible associated
514 -- backtracking is handled. We still use a single stack, but we establish
515 -- the concept of nested regions on this stack, each of which has a stack
516 -- base value pointing to the deepest stack entry of the region. The base
517 -- value for the outer level is zero.
519 -- When a recursive match is established, two special stack entries are
520 -- made. The first entry is used to save the original node that starts
521 -- the recursive match. This is saved so that the successor field of
522 -- this node is accessible at the end of the match, but it is never
523 -- popped and executed.
525 -- The second entry corresponds to a standard new region action. A
526 -- PC_R_Remove node is stacked, whose cursor field is used to store
527 -- the outer stack base, and the stack base is reset to point to
528 -- this PC_R_Remove node. Then the recursive pattern is matched and
529 -- it can make history stack entries in the normal matter, so now
530 -- the stack looks like:
532 -- (stack entries made by outer level)
534 -- (Special entry, node is (+P) successor
535 -- cursor entry is not used)
537 -- (PC_R_Remove entry, "cursor" value is (negative) <-- Stack base
538 -- saved base value for the enclosing region)
540 -- (stack entries made by inner level)
542 -- If a subsequent failure occurs and pops the PC_R_Remove node, it
543 -- removes itself and the special entry immediately underneath it,
544 -- restores the stack base value for the enclosing region, and then
545 -- again signals failure to look for alternatives that were stacked
546 -- before the recursion was initiated.
548 -- Now we need to consider what happens if the inner pattern succeeds, as
549 -- signalled by accessing the special PC_EOP pattern primitive. First we
550 -- recognize the nested case by looking at the Base value. If this Base
551 -- value is Stack'First, then the entire match has succeeded, but if the
552 -- base value is greater than Stack'First, then we have successfully
553 -- matched an inner pattern, and processing continues at the outer level.
555 -- There are two cases. The simple case is when the inner pattern has made
556 -- no stack entries, as recognized by the fact that the current stack
557 -- pointer is equal to the current base value. In this case it is fine to
558 -- remove all trace of the recursion by restoring the outer base value and
559 -- using the special entry to find the appropriate successor node.
561 -- The more complex case arises when the inner match does make stack
562 -- entries. In this case, the PC_EOP processing stacks a special entry
563 -- whose cursor value saves the saved inner base value (the one that
564 -- references the corresponding PC_R_Remove value), and whose node
565 -- pointer references a PC_R_Restore node, so the stack looks like:
567 -- (stack entries made by outer level)
569 -- (Special entry, node is (+P) successor,
570 -- cursor entry is not used)
572 -- (PC_R_Remove entry, "cursor" value is (negative)
573 -- saved base value for the enclosing region)
575 -- (stack entries made by inner level)
577 -- (PC_Region_Replace entry, "cursor" value is (negative)
578 -- stack pointer value referencing the PC_R_Remove entry).
580 -- If the entire match succeeds, then these stack entries are, as usual,
581 -- ignored and abandoned. If on the other hand a subsequent failure
582 -- causes the PC_Region_Replace entry to be popped, it restores the
583 -- inner base value from its saved "cursor" value and then fails again.
584 -- Note that it is OK that the cursor is temporarily clobbered by this
585 -- pop, since the second failure will reestablish a proper cursor value.
587 ---------------------------------
588 -- Compound Pattern Structures --
589 ---------------------------------
591 -- This section discusses the compound structures used to represent
592 -- constructed patterns. It shows the graph structures of pattern
593 -- elements that are constructed, and in the case of patterns that
594 -- provide backtracking possibilities, describes how the history
595 -- stack is used to control the backtracking. Finally, it notes the
596 -- way in which the Index numbers are assigned to the structure.
598 -- In all diagrams, solid lines (built witth minus signs or vertical
599 -- bars, represent successor pointers (Pthen fields) with > or V used
600 -- to indicate the direction of the pointer. The initial node of the
601 -- structure is in the upper left of the diagram. A dotted line is an
602 -- alternative pointer from the element above it to the element below
603 -- it. See individual sections for details on how alternatives are used.
605 -------------------
606 -- Concatenation --
607 -------------------
609 -- In the pattern structures listed in this section, a line that looks
610 -- lile ----> with nothing to the right indicates an end of pattern
611 -- (EOP) pointer that represents the end of the match.
613 -- When a pattern concatenation (L & R) occurs, the resulting structure
614 -- is obtained by finding all such EOP pointers in L, and replacing
615 -- them to point to R. This is the most important flattening that
616 -- occurs in constructing a pattern, and it means that the pattern
617 -- matching circuitry does not have to keep track of the structure
618 -- of a pattern with respect to concatenation, since the appropriate
619 -- succesor is always at hand.
621 -- Concatenation itself generates no additional possibilities for
622 -- backtracking, but the constituent patterns of the concatenated
623 -- structure will make stack entries as usual. The maximum amount
624 -- of stack required by the structure is thus simply the sum of the
625 -- maximums required by L and R.
627 -- The index numbering of a concatenation structure works by leaving
628 -- the numbering of the right hand pattern, R, unchanged and adjusting
629 -- the numbers in the left hand pattern, L up by the count of elements
630 -- in R. This ensures that the maximum numbered element is the leading
631 -- element as required (given that it was the leading element in L).
633 -----------------
634 -- Alternation --
635 -----------------
637 -- A pattern (L or R) constructs the structure:
639 -- +---+ +---+
640 -- | A |---->| L |---->
641 -- +---+ +---+
642 -- .
643 -- .
644 -- +---+
645 -- | R |---->
646 -- +---+
648 -- The A element here is a PC_Alt node, and the dotted line represents
649 -- the contents of the Alt field. When the PC_Alt element is matched,
650 -- it stacks a pointer to the leading element of R on the history stack
651 -- so that on subsequent failure, a match of R is attempted.
653 -- The A node is the higest numbered element in the pattern. The
654 -- original index numbers of R are unchanged, but the index numbers
655 -- of the L pattern are adjusted up by the count of elements in R.
657 -- Note that the difference between the index of the L leading element
658 -- the index of the R leading element (after building the alt structure)
659 -- indicates the number of nodes in L, and this is true even after the
660 -- structure is incorporated into some larger structure. For example,
661 -- if the A node has index 16, and L has index 15 and R has index
662 -- 5, then we know that L has 10 (15-5) elements in it.
664 -- Suppose that we now concatenate this structure to another pattern
665 -- with 9 elements in it. We will now have the A node with an index
666 -- of 25, L with an index of 24 and R with an index of 14. We still
667 -- know that L has 10 (24-14) elements in it, numbered 15-24, and
668 -- consequently the successor of the alternation structure has an
669 -- index with a value less than 15. This is used in Image to figure
670 -- out the original recursive structure of a pattern.
672 -- To clarify the interaction of the alternation and concatenation
673 -- structures, here is a more complex example of the structure built
674 -- for the pattern:
676 -- (V or W or X) (Y or Z)
678 -- where A,B,C,D,E are all single element patterns:
680 -- +---+ +---+ +---+ +---+
681 -- I A I---->I V I---+-->I A I---->I Y I---->
682 -- +---+ +---+ I +---+ +---+
683 -- . I .
684 -- . I .
685 -- +---+ +---+ I +---+
686 -- I A I---->I W I-->I I Z I---->
687 -- +---+ +---+ I +---+
688 -- . I
689 -- . I
690 -- +---+ I
691 -- I X I------------>+
692 -- +---+
694 -- The numbering of the nodes would be as follows:
696 -- +---+ +---+ +---+ +---+
697 -- I 8 I---->I 7 I---+-->I 3 I---->I 2 I---->
698 -- +---+ +---+ I +---+ +---+
699 -- . I .
700 -- . I .
701 -- +---+ +---+ I +---+
702 -- I 6 I---->I 5 I-->I I 1 I---->
703 -- +---+ +---+ I +---+
704 -- . I
705 -- . I
706 -- +---+ I
707 -- I 4 I------------>+
708 -- +---+
710 -- Note: The above structure actually corresponds to
712 -- (A or (B or C)) (D or E)
714 -- rather than
716 -- ((A or B) or C) (D or E)
718 -- which is the more natural interpretation, but in fact alternation
719 -- is associative, and the construction of an alternative changes the
720 -- left grouped pattern to the right grouped pattern in any case, so
721 -- that the Image function produces a more natural looking output.
723 ---------
724 -- Arb --
725 ---------
727 -- An Arb pattern builds the structure
729 -- +---+
730 -- | X |---->
731 -- +---+
732 -- .
733 -- .
734 -- +---+
735 -- | Y |---->
736 -- +---+
738 -- The X node is a PC_Arb_X node, which matches null, and stacks a
739 -- pointer to Y node, which is the PC_Arb_Y node that matches one
740 -- extra character and restacks itself.
742 -- The PC_Arb_X node is numbered 2, and the PC_Arb_Y node is 1.
744 -------------------------
745 -- Arbno (simple case) --
746 -------------------------
748 -- The simple form of Arbno can be used where the pattern always
749 -- matches at least one character if it succeeds, and it is known
750 -- not to make any history stack entries. In this case, Arbno (P)
751 -- can construct the following structure:
753 -- +-------------+
754 -- | ^
755 -- V |
756 -- +---+ |
757 -- | S |----> |
758 -- +---+ |
759 -- . |
760 -- . |
761 -- +---+ |
762 -- | P |---------->+
763 -- +---+
765 -- The S (PC_Arbno_S) node matches null stacking a pointer to the
766 -- pattern P. If a subsequent failure causes P to be matched and
767 -- this match succeeds, then node A gets restacked to try another
768 -- instance if needed by a subsequent failure.
770 -- The node numbering of the constituent pattern P is not affected.
771 -- The S node has a node number of P.Index + 1.
773 --------------------------
774 -- Arbno (complex case) --
775 --------------------------
777 -- A call to Arbno (P), where P can match null (or at least is not
778 -- known to require a non-null string) and/or P requires pattern stack
779 -- entries, constructs the following structure:
781 -- +--------------------------+
782 -- | ^
783 -- V |
784 -- +---+ |
785 -- | X |----> |
786 -- +---+ |
787 -- . |
788 -- . |
789 -- +---+ +---+ +---+ |
790 -- | E |---->| P |---->| Y |--->+
791 -- +---+ +---+ +---+
793 -- The node X (PC_Arbno_X) matches null, stacking a pointer to the
794 -- E-P-X structure used to match one Arbno instance.
796 -- Here E is the PC_R_Enter node which matches null and creates two
797 -- stack entries. The first is a special entry whose node field is
798 -- not used at all, and whose cursor field has the initial cursor.
800 -- The second entry corresponds to a standard new region action. A
801 -- PC_R_Remove node is stacked, whose cursor field is used to store
802 -- the outer stack base, and the stack base is reset to point to
803 -- this PC_R_Remove node. Then the pattern P is matched, and it can
804 -- make history stack entries in the normal manner, so now the stack
805 -- looks like:
807 -- (stack entries made before assign pattern)
809 -- (Special entry, node field not used,
810 -- used only to save initial cursor)
812 -- (PC_R_Remove entry, "cursor" value is (negative) <-- Stack Base
813 -- saved base value for the enclosing region)
815 -- (stack entries made by matching P)
817 -- If the match of P fails, then the PC_R_Remove entry is popped and
818 -- it removes both itself and the special entry underneath it,
819 -- restores the outer stack base, and signals failure.
821 -- If the match of P succeeds, then node Y, the PC_Arbno_Y node, pops
822 -- the inner region. There are two possibilities. If matching P left
823 -- no stack entries, then all traces of the inner region can be removed.
824 -- If there are stack entries, then we push an PC_Region_Replace stack
825 -- entry whose "cursor" value is the inner stack base value, and then
826 -- restore the outer stack base value, so the stack looks like:
828 -- (stack entries made before assign pattern)
830 -- (Special entry, node field not used,
831 -- used only to save initial cursor)
833 -- (PC_R_Remove entry, "cursor" value is (negative)
834 -- saved base value for the enclosing region)
836 -- (stack entries made by matching P)
838 -- (PC_Region_Replace entry, "cursor" value is (negative)
839 -- stack pointer value referencing the PC_R_Remove entry).
841 -- Now that we have matched another instance of the Arbno pattern,
842 -- we need to move to the successor. There are two cases. If the
843 -- Arbno pattern matched null, then there is no point in seeking
844 -- alternatives, since we would just match a whole bunch of nulls.
845 -- In this case we look through the alternative node, and move
846 -- directly to its successor (i.e. the successor of the Arbno
847 -- pattern). If on the other hand a non-null string was matched,
848 -- we simply follow the successor to the alternative node, which
849 -- sets up for another possible match of the Arbno pattern.
851 -- As noted in the section on stack checking, the stack count (and
852 -- hence the stack check) for a pattern includes only one iteration
853 -- of the Arbno pattern. To make sure that multiple iterations do not
854 -- overflow the stack, the Arbno node saves the stack count required
855 -- by a single iteration, and the Concat function increments this to
856 -- include stack entries required by any successor. The PC_Arbno_Y
857 -- node uses this count to ensure that sufficient stack remains
858 -- before proceeding after matching each new instance.
860 -- The node numbering of the constituent pattern P is not affected.
861 -- Where N is the number of nodes in P, the Y node is numbered N + 1,
862 -- the E node is N + 2, and the X node is N + 3.
864 ----------------------
865 -- Assign Immediate --
866 ----------------------
868 -- Immediate assignment (P * V) constructs the following structure
870 -- +---+ +---+ +---+
871 -- | E |---->| P |---->| A |---->
872 -- +---+ +---+ +---+
874 -- Here E is the PC_R_Enter node which matches null and creates two
875 -- stack entries. The first is a special entry whose node field is
876 -- not used at all, and whose cursor field has the initial cursor.
878 -- The second entry corresponds to a standard new region action. A
879 -- PC_R_Remove node is stacked, whose cursor field is used to store
880 -- the outer stack base, and the stack base is reset to point to
881 -- this PC_R_Remove node. Then the pattern P is matched, and it can
882 -- make history stack entries in the normal manner, so now the stack
883 -- looks like:
885 -- (stack entries made before assign pattern)
887 -- (Special entry, node field not used,
888 -- used only to save initial cursor)
890 -- (PC_R_Remove entry, "cursor" value is (negative) <-- Stack Base
891 -- saved base value for the enclosing region)
893 -- (stack entries made by matching P)
895 -- If the match of P fails, then the PC_R_Remove entry is popped
896 -- and it removes both itself and the special entry underneath it,
897 -- restores the outer stack base, and signals failure.
899 -- If the match of P succeeds, then node A, which is the actual
900 -- PC_Assign_Imm node, executes the assignment (using the stack
901 -- base to locate the entry with the saved starting cursor value),
902 -- and the pops the inner region. There are two possibilities, if
903 -- matching P left no stack entries, then all traces of the inner
904 -- region can be removed. If there are stack entries, then we push
905 -- an PC_Region_Replace stack entry whose "cursor" value is the
906 -- inner stack base value, and then restore the outer stack base
907 -- value, so the stack looks like:
909 -- (stack entries made before assign pattern)
911 -- (Special entry, node field not used,
912 -- used only to save initial cursor)
914 -- (PC_R_Remove entry, "cursor" value is (negative)
915 -- saved base value for the enclosing region)
917 -- (stack entries made by matching P)
919 -- (PC_Region_Replace entry, "cursor" value is the (negative)
920 -- stack pointer value referencing the PC_R_Remove entry).
922 -- If a subsequent failure occurs, the PC_Region_Replace node restores
923 -- the inner stack base value and signals failure to explore rematches
924 -- of the pattern P.
926 -- The node numbering of the constituent pattern P is not affected.
927 -- Where N is the number of nodes in P, the A node is numbered N + 1,
928 -- and the E node is N + 2.
930 ---------------------
931 -- Assign On Match --
932 ---------------------
934 -- The assign on match (**) pattern is quite similar to the assign
935 -- immediate pattern, except that the actual assignment has to be
936 -- delayed. The following structure is constructed:
938 -- +---+ +---+ +---+
939 -- | E |---->| P |---->| A |---->
940 -- +---+ +---+ +---+
942 -- The operation of this pattern is identical to that described above
943 -- for deferred assignment, up to the point where P has been matched.
945 -- The A node, which is the PC_Assign_OnM node first pushes a
946 -- PC_Assign node onto the history stack. This node saves the ending
947 -- cursor and acts as a flag for the final assignment, as further
948 -- described below.
950 -- It then stores a pointer to itself in the special entry node field.
951 -- This was otherwise unused, and is now used to retrive the address
952 -- of the variable to be assigned at the end of the pattern.
954 -- After that the inner region is terminated in the usual manner,
955 -- by stacking a PC_R_Restore entry as described for the assign
956 -- immediate case. Note that the optimization of completely
957 -- removing the inner region does not happen in this case, since
958 -- we have at least one stack entry (the PC_Assign one we just made).
959 -- The stack now looks like:
961 -- (stack entries made before assign pattern)
963 -- (Special entry, node points to copy of
964 -- the PC_Assign_OnM node, and the
965 -- cursor field saves the initial cursor).
967 -- (PC_R_Remove entry, "cursor" value is (negative)
968 -- saved base value for the enclosing region)
970 -- (stack entries made by matching P)
972 -- (PC_Assign entry, saves final cursor)
974 -- (PC_Region_Replace entry, "cursor" value is (negative)
975 -- stack pointer value referencing the PC_R_Remove entry).
977 -- If a subsequent failure causes the PC_Assign node to execute it
978 -- simply removes itself and propagates the failure.
980 -- If the match succeeds, then the history stack is scanned for
981 -- PC_Assign nodes, and the assignments are executed (examination
982 -- of the above diagram will show that all the necessary data is
983 -- at hand for the assignment).
985 -- To optimize the common case where no assign-on-match operations
986 -- are present, a global flag Assign_OnM is maintained which is
987 -- initialize to False, and gets set True as part of the execution
988 -- of the PC_Assign_OnM node. The scan of the history stack for
989 -- PC_Assign entries is done only if this flag is set.
991 -- The node numbering of the constituent pattern P is not affected.
992 -- Where N is the number of nodes in P, the A node is numbered N + 1,
993 -- and the E node is N + 2.
995 ---------
996 -- Bal --
997 ---------
999 -- Bal builds a single node:
1001 -- +---+
1002 -- | B |---->
1003 -- +---+
1005 -- The node B is the PC_Bal node which matches a parentheses balanced
1006 -- string, starting at the current cursor position. It then updates
1007 -- the cursor past this matched string, and stacks a pointer to itself
1008 -- with this updated cursor value on the history stack, to extend the
1009 -- matched string on a subequent failure.
1011 -- Since this is a single node it is numbered 1 (the reason we include
1012 -- it in the compound patterns section is that it backtracks).
1014 ------------
1015 -- BreakX --
1016 ------------
1018 -- BreakX builds the structure
1020 -- +---+ +---+
1021 -- | B |---->| A |---->
1022 -- +---+ +---+
1023 -- ^ .
1024 -- | .
1025 -- | +---+
1026 -- +<------| X |
1027 -- +---+
1029 -- Here the B node is the BreakX_xx node that performs a normal Break
1030 -- function. The A node is an alternative (PC_Alt) node that matches
1031 -- null, but stacks a pointer to node X (the PC_BreakX_X node) which
1032 -- extends the match one character (to eat up the previously detected
1033 -- break character), and then rematches the break.
1035 -- The B node is numbered 3, the alternative node is 1, and the X
1036 -- node is 2.
1038 -----------
1039 -- Fence --
1040 -----------
1042 -- Fence builds a single node:
1044 -- +---+
1045 -- | F |---->
1046 -- +---+
1048 -- The element F, PC_Fence, matches null, and stacks a pointer to a
1049 -- PC_Cancel element which will abort the match on a subsequent failure.
1051 -- Since this is a single element it is numbered 1 (the reason we
1052 -- include it in the compound patterns section is that it backtracks).
1054 --------------------
1055 -- Fence Function --
1056 --------------------
1058 -- A call to the Fence function builds the structure:
1060 -- +---+ +---+ +---+
1061 -- | E |---->| P |---->| X |---->
1062 -- +---+ +---+ +---+
1064 -- Here E is the PC_R_Enter node which matches null and creates two
1065 -- stack entries. The first is a special entry which is not used at
1066 -- all in the fence case (it is present merely for uniformity with
1067 -- other cases of region enter operations).
1069 -- The second entry corresponds to a standard new region action. A
1070 -- PC_R_Remove node is stacked, whose cursor field is used to store
1071 -- the outer stack base, and the stack base is reset to point to
1072 -- this PC_R_Remove node. Then the pattern P is matched, and it can
1073 -- make history stack entries in the normal manner, so now the stack
1074 -- looks like:
1076 -- (stack entries made before fence pattern)
1078 -- (Special entry, not used at all)
1080 -- (PC_R_Remove entry, "cursor" value is (negative) <-- Stack Base
1081 -- saved base value for the enclosing region)
1083 -- (stack entries made by matching P)
1085 -- If the match of P fails, then the PC_R_Remove entry is popped
1086 -- and it removes both itself and the special entry underneath it,
1087 -- restores the outer stack base, and signals failure.
1089 -- If the match of P succeeds, then node X, the PC_Fence_X node, gets
1090 -- control. One might be tempted to think that at this point, the
1091 -- history stack entries made by matching P can just be removed since
1092 -- they certainly are not going to be used for rematching (that is
1093 -- whole point of Fence after all!) However, this is wrong, because
1094 -- it would result in the loss of possible assign-on-match entries
1095 -- for deferred pattern assignments.
1097 -- Instead what we do is to make a special entry whose node references
1098 -- PC_Fence_Y, and whose cursor saves the inner stack base value, i.e.
1099 -- the pointer to the PC_R_Remove entry. Then the outer stack base
1100 -- pointer is restored, so the stack looks like:
1102 -- (stack entries made before assign pattern)
1104 -- (Special entry, not used at all)
1106 -- (PC_R_Remove entry, "cursor" value is (negative)
1107 -- saved base value for the enclosing region)
1109 -- (stack entries made by matching P)
1111 -- (PC_Fence_Y entry, "cursor" value is (negative) stack
1112 -- pointer value referencing the PC_R_Remove entry).
1114 -- If a subsequent failure occurs, then the PC_Fence_Y entry removes
1115 -- the entire inner region, including all entries made by matching P,
1116 -- and alternatives prior to the Fence pattern are sought.
1118 -- The node numbering of the constituent pattern P is not affected.
1119 -- Where N is the number of nodes in P, the X node is numbered N + 1,
1120 -- and the E node is N + 2.
1122 -------------
1123 -- Succeed --
1124 -------------
1126 -- Succeed builds a single node:
1128 -- +---+
1129 -- | S |---->
1130 -- +---+
1132 -- The node S is the PC_Succeed node which matches null, and stacks
1133 -- a pointer to itself on the history stack, so that a subsequent
1134 -- failure repeats the same match.
1136 -- Since this is a single node it is numbered 1 (the reason we include
1137 -- it in the compound patterns section is that it backtracks).
1139 ---------------------
1140 -- Write Immediate --
1141 ---------------------
1143 -- The structure built for a write immediate operation (P * F, where
1144 -- F is a file access value) is:
1146 -- +---+ +---+ +---+
1147 -- | E |---->| P |---->| W |---->
1148 -- +---+ +---+ +---+
1150 -- Here E is the PC_R_Enter node and W is the PC_Write_Imm node. The
1151 -- handling is identical to that described above for Assign Immediate,
1152 -- except that at the point where a successful match occurs, the matched
1153 -- substring is written to the referenced file.
1155 -- The node numbering of the constituent pattern P is not affected.
1156 -- Where N is the number of nodes in P, the W node is numbered N + 1,
1157 -- and the E node is N + 2.
1159 --------------------
1160 -- Write On Match --
1161 --------------------
1163 -- The structure built for a write on match operation (P ** F, where
1164 -- F is a file access value) is:
1166 -- +---+ +---+ +---+
1167 -- | E |---->| P |---->| W |---->
1168 -- +---+ +---+ +---+
1170 -- Here E is the PC_R_Enter node and W is the PC_Write_OnM node. The
1171 -- handling is identical to that described above for Assign On Match,
1172 -- except that at the point where a successful match has completed,
1173 -- the matched substring is written to the referenced file.
1175 -- The node numbering of the constituent pattern P is not affected.
1176 -- Where N is the number of nodes in P, the W node is numbered N + 1,
1177 -- and the E node is N + 2.
1178 -----------------------
1179 -- Constant Patterns --
1180 -----------------------
1182 -- The following pattern elements are referenced only from the pattern
1183 -- history stack. In each case the processing for the pattern element
1184 -- results in pattern match abort, or futher failure, so there is no
1185 -- need for a successor and no need for a node number
1187 CP_Assign : aliased PE := (PC_Assign, 0, N);
1188 CP_Cancel : aliased PE := (PC_Cancel, 0, N);
1189 CP_Fence_Y : aliased PE := (PC_Fence_Y, 0, N);
1190 CP_R_Remove : aliased PE := (PC_R_Remove, 0, N);
1191 CP_R_Restore : aliased PE := (PC_R_Restore, 0, N);
1193 -----------------------
1194 -- Local Subprograms --
1195 -----------------------
1197 function Alternate (L, R : PE_Ptr) return PE_Ptr;
1198 function "or" (L, R : PE_Ptr) return PE_Ptr renames Alternate;
1199 -- Build pattern structure corresponding to the alternation of L, R.
1200 -- (i.e. try to match L, and if that fails, try to match R).
1202 function Arbno_Simple (P : PE_Ptr) return PE_Ptr;
1203 -- Build simple Arbno pattern, P is a pattern that is guaranteed to
1204 -- match at least one character if it succeeds and to require no
1205 -- stack entries under all circumstances. The result returned is
1206 -- a simple Arbno structure as previously described.
1208 function Bracket (E, P, A : PE_Ptr) return PE_Ptr;
1209 -- Given two single node pattern elements E and A, and a (possible
1210 -- complex) pattern P, construct the concatenation E-->P-->A and
1211 -- return a pointer to E. The concatenation does not affect the
1212 -- node numbering in P. A has a number one higher than the maximum
1213 -- number in P, and E has a number two higher than the maximum
1214 -- number in P (see for example the Assign_Immediate structure to
1215 -- understand a typical use of this function).
1217 function BreakX_Make (B : PE_Ptr) return Pattern;
1218 -- Given a pattern element for a Break patternx, returns the
1219 -- corresponding BreakX compound pattern structure.
1221 function Concat (L, R : PE_Ptr; Incr : Natural) return PE_Ptr;
1222 -- Creates a pattern eelement that represents a concatenation of the
1223 -- two given pattern elements (i.e. the pattern L followed by R).
1224 -- The result returned is always the same as L, but the pattern
1225 -- referenced by L is modified to have R as a successor. This
1226 -- procedure does not copy L or R, so if a copy is required, it
1227 -- is the responsibility of the caller. The Incr parameter is an
1228 -- amount to be added to the Nat field of any P_Arbno_Y node that is
1229 -- in the left operand, it represents the additional stack space
1230 -- required by the right operand.
1232 function "&" (L, R : PE_Ptr) return PE_Ptr;
1233 pragma Inline ("&");
1234 -- Equivalent to Concat (L, R, 0)
1236 function C_To_PE (C : PChar) return PE_Ptr;
1237 -- Given a character, constructs a pattern element that matches
1238 -- the single character.
1240 function Copy (P : PE_Ptr) return PE_Ptr;
1241 -- Creates a copy of the pattern element referenced by the given
1242 -- pattern element reference. This is a deep copy, which means that
1243 -- it follows the Next and Alt pointers.
1245 function Image (P : PE_Ptr) return String;
1246 -- Returns the image of the address of the referenced pattern element.
1247 -- This is equivalent to Image (To_Address (P));
1249 function Is_In (C : Character; Str : String) return Boolean;
1250 pragma Inline (Is_In);
1251 -- Determines if the character C is in string Str.
1253 procedure Logic_Error;
1254 -- Called to raise Program_Error with an appropriate message if an
1255 -- internal logic error is detected.
1257 function Str_BF (A : Boolean_Func) return String;
1258 function Str_FP (A : File_Ptr) return String;
1259 function Str_NF (A : Natural_Func) return String;
1260 function Str_NP (A : Natural_Ptr) return String;
1261 function Str_PP (A : Pattern_Ptr) return String;
1262 function Str_VF (A : VString_Func) return String;
1263 function Str_VP (A : VString_Ptr) return String;
1264 -- These are debugging routines, which return a representation of the
1265 -- given access value (they are called only by Image and Dump)
1267 procedure Set_Successor (Pat : PE_Ptr; Succ : PE_Ptr);
1268 -- Adjusts all EOP pointers in Pat to point to Succ. No other changes
1269 -- are made. In particular, Succ is unchanged, and no index numbers
1270 -- are modified. Note that Pat may not be equal to EOP on entry.
1272 function S_To_PE (Str : PString) return PE_Ptr;
1273 -- Given a string, constructs a pattern element that matches the string
1275 procedure Uninitialized_Pattern;
1276 pragma No_Return (Uninitialized_Pattern);
1277 -- Called to raise Program_Error with an appropriate error message if
1278 -- an uninitialized pattern is used in any pattern construction or
1279 -- pattern matching operation.
1281 procedure XMatch
1282 (Subject : String;
1283 Pat_P : PE_Ptr;
1284 Pat_S : Natural;
1285 Start : out Natural;
1286 Stop : out Natural);
1287 -- This is the common pattern match routine. It is passed a string and
1288 -- a pattern, and it indicates success or failure, and on success the
1289 -- section of the string matched. It does not perform any assignments
1290 -- to the subject string, so pattern replacement is for the caller.
1292 -- Subject The subject string. The lower bound is always one. In the
1293 -- Match procedures, it is fine to use strings whose lower bound
1294 -- is not one, but we perform a one time conversion before the
1295 -- call to XMatch, so that XMatch does not have to be bothered
1296 -- with strange lower bounds.
1298 -- Pat_P Points to initial pattern element of pattern to be matched
1300 -- Pat_S Maximum required stack entries for pattern to be matched
1302 -- Start If match is successful, starting index of matched section.
1303 -- This value is always non-zero. A value of zero is used to
1304 -- indicate a failed match.
1306 -- Stop If match is successful, ending index of matched section.
1307 -- This can be zero if we match the null string at the start,
1308 -- in which case Start is set to zero, and Stop to one. If the
1309 -- Match fails, then the contents of Stop is undefined.
1311 procedure XMatchD
1312 (Subject : String;
1313 Pat_P : PE_Ptr;
1314 Pat_S : Natural;
1315 Start : out Natural;
1316 Stop : out Natural);
1317 -- Identical in all respects to XMatch, except that trace information is
1318 -- output on Standard_Ouput during execution of the match. This is the
1319 -- version that is called if the original Match call has Debug => True.
1321 ---------
1322 -- "&" --
1323 ---------
1325 function "&" (L : PString; R : Pattern) return Pattern is
1326 begin
1327 return (AFC with R.Stk, Concat (S_To_PE (L), Copy (R.P), R.Stk));
1328 end "&";
1330 function "&" (L : Pattern; R : PString) return Pattern is
1331 begin
1332 return (AFC with L.Stk, Concat (Copy (L.P), S_To_PE (R), 0));
1333 end "&";
1335 function "&" (L : PChar; R : Pattern) return Pattern is
1336 begin
1337 return (AFC with R.Stk, Concat (C_To_PE (L), Copy (R.P), R.Stk));
1338 end "&";
1340 function "&" (L : Pattern; R : PChar) return Pattern is
1341 begin
1342 return (AFC with L.Stk, Concat (Copy (L.P), C_To_PE (R), 0));
1343 end "&";
1345 function "&" (L : Pattern; R : Pattern) return Pattern is
1346 begin
1347 return (AFC with L.Stk + R.Stk, Concat (Copy (L.P), Copy (R.P), R.Stk));
1348 end "&";
1350 function "&" (L, R : PE_Ptr) return PE_Ptr is
1351 begin
1352 return Concat (L, R, 0);
1353 end "&";
1355 ---------
1356 -- "*" --
1357 ---------
1359 -- Assign immediate
1361 -- +---+ +---+ +---+
1362 -- | E |---->| P |---->| A |---->
1363 -- +---+ +---+ +---+
1365 -- The node numbering of the constituent pattern P is not affected.
1366 -- Where N is the number of nodes in P, the A node is numbered N + 1,
1367 -- and the E node is N + 2.
1369 function "*" (P : Pattern; Var : VString_Var) return Pattern is
1370 Pat : constant PE_Ptr := Copy (P.P);
1371 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1372 A : constant PE_Ptr :=
1373 new PE'(PC_Assign_Imm, 0, EOP, Var'Unrestricted_Access);
1375 begin
1376 return (AFC with P.Stk + 3, Bracket (E, Pat, A));
1377 end "*";
1379 function "*" (P : PString; Var : VString_Var) return Pattern is
1380 Pat : constant PE_Ptr := S_To_PE (P);
1381 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1382 A : constant PE_Ptr :=
1383 new PE'(PC_Assign_Imm, 0, EOP, Var'Unrestricted_Access);
1385 begin
1386 return (AFC with 3, Bracket (E, Pat, A));
1387 end "*";
1389 function "*" (P : PChar; Var : VString_Var) return Pattern is
1390 Pat : constant PE_Ptr := C_To_PE (P);
1391 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1392 A : constant PE_Ptr :=
1393 new PE'(PC_Assign_Imm, 0, EOP, Var'Unrestricted_Access);
1395 begin
1396 return (AFC with 3, Bracket (E, Pat, A));
1397 end "*";
1399 -- Write immediate
1401 -- +---+ +---+ +---+
1402 -- | E |---->| P |---->| W |---->
1403 -- +---+ +---+ +---+
1405 -- The node numbering of the constituent pattern P is not affected.
1406 -- Where N is the number of nodes in P, the W node is numbered N + 1,
1407 -- and the E node is N + 2.
1409 function "*" (P : Pattern; Fil : File_Access) return Pattern is
1410 Pat : constant PE_Ptr := Copy (P.P);
1411 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1412 W : constant PE_Ptr := new PE'(PC_Write_Imm, 0, EOP, Fil);
1414 begin
1415 return (AFC with 3, Bracket (E, Pat, W));
1416 end "*";
1418 function "*" (P : PString; Fil : File_Access) return Pattern is
1419 Pat : constant PE_Ptr := S_To_PE (P);
1420 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1421 W : constant PE_Ptr := new PE'(PC_Write_Imm, 0, EOP, Fil);
1423 begin
1424 return (AFC with 3, Bracket (E, Pat, W));
1425 end "*";
1427 function "*" (P : PChar; Fil : File_Access) return Pattern is
1428 Pat : constant PE_Ptr := C_To_PE (P);
1429 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1430 W : constant PE_Ptr := new PE'(PC_Write_Imm, 0, EOP, Fil);
1432 begin
1433 return (AFC with 3, Bracket (E, Pat, W));
1434 end "*";
1436 ----------
1437 -- "**" --
1438 ----------
1440 -- Assign on match
1442 -- +---+ +---+ +---+
1443 -- | E |---->| P |---->| A |---->
1444 -- +---+ +---+ +---+
1446 -- The node numbering of the constituent pattern P is not affected.
1447 -- Where N is the number of nodes in P, the A node is numbered N + 1,
1448 -- and the E node is N + 2.
1450 function "**" (P : Pattern; Var : VString_Var) return Pattern is
1451 Pat : constant PE_Ptr := Copy (P.P);
1452 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1453 A : constant PE_Ptr :=
1454 new PE'(PC_Assign_OnM, 0, EOP, Var'Unrestricted_Access);
1456 begin
1457 return (AFC with P.Stk + 3, Bracket (E, Pat, A));
1458 end "**";
1460 function "**" (P : PString; Var : VString_Var) return Pattern is
1461 Pat : constant PE_Ptr := S_To_PE (P);
1462 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1463 A : constant PE_Ptr :=
1464 new PE'(PC_Assign_OnM, 0, EOP, Var'Unrestricted_Access);
1466 begin
1467 return (AFC with 3, Bracket (E, Pat, A));
1468 end "**";
1470 function "**" (P : PChar; Var : VString_Var) return Pattern is
1471 Pat : constant PE_Ptr := C_To_PE (P);
1472 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1473 A : constant PE_Ptr :=
1474 new PE'(PC_Assign_OnM, 0, EOP, Var'Unrestricted_Access);
1476 begin
1477 return (AFC with 3, Bracket (E, Pat, A));
1478 end "**";
1480 -- Write on match
1482 -- +---+ +---+ +---+
1483 -- | E |---->| P |---->| W |---->
1484 -- +---+ +---+ +---+
1486 -- The node numbering of the constituent pattern P is not affected.
1487 -- Where N is the number of nodes in P, the W node is numbered N + 1,
1488 -- and the E node is N + 2.
1490 function "**" (P : Pattern; Fil : File_Access) return Pattern is
1491 Pat : constant PE_Ptr := Copy (P.P);
1492 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1493 W : constant PE_Ptr := new PE'(PC_Write_OnM, 0, EOP, Fil);
1495 begin
1496 return (AFC with P.Stk + 3, Bracket (E, Pat, W));
1497 end "**";
1499 function "**" (P : PString; Fil : File_Access) return Pattern is
1500 Pat : constant PE_Ptr := S_To_PE (P);
1501 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1502 W : constant PE_Ptr := new PE'(PC_Write_OnM, 0, EOP, Fil);
1504 begin
1505 return (AFC with 3, Bracket (E, Pat, W));
1506 end "**";
1508 function "**" (P : PChar; Fil : File_Access) return Pattern is
1509 Pat : constant PE_Ptr := C_To_PE (P);
1510 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1511 W : constant PE_Ptr := new PE'(PC_Write_OnM, 0, EOP, Fil);
1513 begin
1514 return (AFC with 3, Bracket (E, Pat, W));
1515 end "**";
1517 ---------
1518 -- "+" --
1519 ---------
1521 function "+" (Str : VString_Var) return Pattern is
1522 begin
1523 return
1524 (AFC with 0,
1525 new PE'(PC_String_VP, 1, EOP, Str'Unrestricted_Access));
1526 end "+";
1528 function "+" (Str : VString_Func) return Pattern is
1529 begin
1530 return (AFC with 0, new PE'(PC_String_VF, 1, EOP, Str));
1531 end "+";
1533 function "+" (P : Pattern_Var) return Pattern is
1534 begin
1535 return
1536 (AFC with 3,
1537 new PE'(PC_Rpat, 1, EOP, P'Unrestricted_Access));
1538 end "+";
1540 function "+" (P : Boolean_Func) return Pattern is
1541 begin
1542 return (AFC with 3, new PE'(PC_Pred_Func, 1, EOP, P));
1543 end "+";
1545 ----------
1546 -- "or" --
1547 ----------
1549 function "or" (L : PString; R : Pattern) return Pattern is
1550 begin
1551 return (AFC with R.Stk + 1, S_To_PE (L) or Copy (R.P));
1552 end "or";
1554 function "or" (L : Pattern; R : PString) return Pattern is
1555 begin
1556 return (AFC with L.Stk + 1, Copy (L.P) or S_To_PE (R));
1557 end "or";
1559 function "or" (L : PString; R : PString) return Pattern is
1560 begin
1561 return (AFC with 1, S_To_PE (L) or S_To_PE (R));
1562 end "or";
1564 function "or" (L : Pattern; R : Pattern) return Pattern is
1565 begin
1566 return (AFC with
1567 Natural'Max (L.Stk, R.Stk) + 1, Copy (L.P) or Copy (R.P));
1568 end "or";
1570 function "or" (L : PChar; R : Pattern) return Pattern is
1571 begin
1572 return (AFC with 1, C_To_PE (L) or Copy (R.P));
1573 end "or";
1575 function "or" (L : Pattern; R : PChar) return Pattern is
1576 begin
1577 return (AFC with 1, Copy (L.P) or C_To_PE (R));
1578 end "or";
1580 function "or" (L : PChar; R : PChar) return Pattern is
1581 begin
1582 return (AFC with 1, C_To_PE (L) or C_To_PE (R));
1583 end "or";
1585 function "or" (L : PString; R : PChar) return Pattern is
1586 begin
1587 return (AFC with 1, S_To_PE (L) or C_To_PE (R));
1588 end "or";
1590 function "or" (L : PChar; R : PString) return Pattern is
1591 begin
1592 return (AFC with 1, C_To_PE (L) or S_To_PE (R));
1593 end "or";
1595 ------------
1596 -- Adjust --
1597 ------------
1599 -- No two patterns share the same pattern elements, so the adjust
1600 -- procedure for a Pattern assignment must do a deep copy of the
1601 -- pattern element structure.
1603 procedure Adjust (Object : in out Pattern) is
1604 begin
1605 Object.P := Copy (Object.P);
1606 end Adjust;
1608 ---------------
1609 -- Alternate --
1610 ---------------
1612 function Alternate (L, R : PE_Ptr) return PE_Ptr is
1613 begin
1614 -- If the left pattern is null, then we just add the alternation
1615 -- node with an index one greater than the right hand pattern.
1617 if L = EOP then
1618 return new PE'(PC_Alt, R.Index + 1, EOP, R);
1620 -- If the left pattern is non-null, then build a reference vector
1621 -- for its elements, and adjust their index values to acccomodate
1622 -- the right hand elements. Then add the alternation node.
1624 else
1625 declare
1626 Refs : Ref_Array (1 .. L.Index);
1628 begin
1629 Build_Ref_Array (L, Refs);
1631 for J in Refs'Range loop
1632 Refs (J).Index := Refs (J).Index + R.Index;
1633 end loop;
1634 end;
1636 return new PE'(PC_Alt, L.Index + 1, L, R);
1637 end if;
1638 end Alternate;
1640 ---------
1641 -- Any --
1642 ---------
1644 function Any (Str : String) return Pattern is
1645 begin
1646 return (AFC with 0, new PE'(PC_Any_CS, 1, EOP, To_Set (Str)));
1647 end Any;
1649 function Any (Str : VString) return Pattern is
1650 begin
1651 return Any (S (Str));
1652 end Any;
1654 function Any (Str : Character) return Pattern is
1655 begin
1656 return (AFC with 0, new PE'(PC_Any_CH, 1, EOP, Str));
1657 end Any;
1659 function Any (Str : Character_Set) return Pattern is
1660 begin
1661 return (AFC with 0, new PE'(PC_Any_CS, 1, EOP, Str));
1662 end Any;
1664 function Any (Str : access VString) return Pattern is
1665 begin
1666 return (AFC with 0, new PE'(PC_Any_VP, 1, EOP, VString_Ptr (Str)));
1667 end Any;
1669 function Any (Str : VString_Func) return Pattern is
1670 begin
1671 return (AFC with 0, new PE'(PC_Any_VF, 1, EOP, Str));
1672 end Any;
1674 ---------
1675 -- Arb --
1676 ---------
1678 -- +---+
1679 -- | X |---->
1680 -- +---+
1681 -- .
1682 -- .
1683 -- +---+
1684 -- | Y |---->
1685 -- +---+
1687 -- The PC_Arb_X element is numbered 2, and the PC_Arb_Y element is 1.
1689 function Arb return Pattern is
1690 Y : constant PE_Ptr := new PE'(PC_Arb_Y, 1, EOP);
1691 X : constant PE_Ptr := new PE'(PC_Arb_X, 2, EOP, Y);
1693 begin
1694 return (AFC with 1, X);
1695 end Arb;
1697 -----------
1698 -- Arbno --
1699 -----------
1701 function Arbno (P : PString) return Pattern is
1702 begin
1703 if P'Length = 0 then
1704 return (AFC with 0, EOP);
1706 else
1707 return (AFC with 0, Arbno_Simple (S_To_PE (P)));
1708 end if;
1709 end Arbno;
1711 function Arbno (P : PChar) return Pattern is
1712 begin
1713 return (AFC with 0, Arbno_Simple (C_To_PE (P)));
1714 end Arbno;
1716 function Arbno (P : Pattern) return Pattern is
1717 Pat : constant PE_Ptr := Copy (P.P);
1719 begin
1720 if P.Stk = 0
1721 and then OK_For_Simple_Arbno (Pat.Pcode)
1722 then
1723 return (AFC with 0, Arbno_Simple (Pat));
1724 end if;
1726 -- This is the complex case, either the pattern makes stack entries
1727 -- or it is possible for the pattern to match the null string (more
1728 -- accurately, we don't know that this is not the case).
1730 -- +--------------------------+
1731 -- | ^
1732 -- V |
1733 -- +---+ |
1734 -- | X |----> |
1735 -- +---+ |
1736 -- . |
1737 -- . |
1738 -- +---+ +---+ +---+ |
1739 -- | E |---->| P |---->| Y |--->+
1740 -- +---+ +---+ +---+
1742 -- The node numbering of the constituent pattern P is not affected.
1743 -- Where N is the number of nodes in P, the Y node is numbered N + 1,
1744 -- the E node is N + 2, and the X node is N + 3.
1746 declare
1747 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
1748 X : constant PE_Ptr := new PE'(PC_Arbno_X, 0, EOP, E);
1749 Y : constant PE_Ptr := new PE'(PC_Arbno_Y, 0, X, P.Stk + 3);
1750 EPY : constant PE_Ptr := Bracket (E, Pat, Y);
1752 begin
1753 X.Alt := EPY;
1754 X.Index := EPY.Index + 1;
1755 return (AFC with P.Stk + 3, X);
1756 end;
1757 end Arbno;
1759 ------------------
1760 -- Arbno_Simple --
1761 ------------------
1763 -- +-------------+
1764 -- | ^
1765 -- V |
1766 -- +---+ |
1767 -- | S |----> |
1768 -- +---+ |
1769 -- . |
1770 -- . |
1771 -- +---+ |
1772 -- | P |---------->+
1773 -- +---+
1775 -- The node numbering of the constituent pattern P is not affected.
1776 -- The S node has a node number of P.Index + 1.
1778 -- Note that we know that P cannot be EOP, because a null pattern
1779 -- does not meet the requirements for simple Arbno.
1781 function Arbno_Simple (P : PE_Ptr) return PE_Ptr is
1782 S : constant PE_Ptr := new PE'(PC_Arbno_S, P.Index + 1, EOP, P);
1784 begin
1785 Set_Successor (P, S);
1786 return S;
1787 end Arbno_Simple;
1789 ---------
1790 -- Bal --
1791 ---------
1793 function Bal return Pattern is
1794 begin
1795 return (AFC with 1, new PE'(PC_Bal, 1, EOP));
1796 end Bal;
1798 -------------
1799 -- Bracket --
1800 -------------
1802 function Bracket (E, P, A : PE_Ptr) return PE_Ptr is
1803 begin
1804 if P = EOP then
1805 E.Pthen := A;
1806 E.Index := 2;
1807 A.Index := 1;
1809 else
1810 E.Pthen := P;
1811 Set_Successor (P, A);
1812 E.Index := P.Index + 2;
1813 A.Index := P.Index + 1;
1814 end if;
1816 return E;
1817 end Bracket;
1819 -----------
1820 -- Break --
1821 -----------
1823 function Break (Str : String) return Pattern is
1824 begin
1825 return (AFC with 0, new PE'(PC_Break_CS, 1, EOP, To_Set (Str)));
1826 end Break;
1828 function Break (Str : VString) return Pattern is
1829 begin
1830 return Break (S (Str));
1831 end Break;
1833 function Break (Str : Character) return Pattern is
1834 begin
1835 return (AFC with 0, new PE'(PC_Break_CH, 1, EOP, Str));
1836 end Break;
1838 function Break (Str : Character_Set) return Pattern is
1839 begin
1840 return (AFC with 0, new PE'(PC_Break_CS, 1, EOP, Str));
1841 end Break;
1843 function Break (Str : access VString) return Pattern is
1844 begin
1845 return (AFC with 0, new PE'(PC_Break_VP, 1, EOP, VString_Ptr (Str)));
1846 end Break;
1848 function Break (Str : VString_Func) return Pattern is
1849 begin
1850 return (AFC with 0, new PE'(PC_Break_VF, 1, EOP, Str));
1851 end Break;
1853 ------------
1854 -- BreakX --
1855 ------------
1857 function BreakX (Str : String) return Pattern is
1858 begin
1859 return BreakX_Make (new PE'(PC_BreakX_CS, 3, N, To_Set (Str)));
1860 end BreakX;
1862 function BreakX (Str : VString) return Pattern is
1863 begin
1864 return BreakX (S (Str));
1865 end BreakX;
1867 function BreakX (Str : Character) return Pattern is
1868 begin
1869 return BreakX_Make (new PE'(PC_BreakX_CH, 3, N, Str));
1870 end BreakX;
1872 function BreakX (Str : Character_Set) return Pattern is
1873 begin
1874 return BreakX_Make (new PE'(PC_BreakX_CS, 3, N, Str));
1875 end BreakX;
1877 function BreakX (Str : access VString) return Pattern is
1878 begin
1879 return BreakX_Make (new PE'(PC_BreakX_VP, 3, N, VString_Ptr (Str)));
1880 end BreakX;
1882 function BreakX (Str : VString_Func) return Pattern is
1883 begin
1884 return BreakX_Make (new PE'(PC_BreakX_VF, 3, N, Str));
1885 end BreakX;
1887 -----------------
1888 -- BreakX_Make --
1889 -----------------
1891 -- +---+ +---+
1892 -- | B |---->| A |---->
1893 -- +---+ +---+
1894 -- ^ .
1895 -- | .
1896 -- | +---+
1897 -- +<------| X |
1898 -- +---+
1900 -- The B node is numbered 3, the alternative node is 1, and the X
1901 -- node is 2.
1903 function BreakX_Make (B : PE_Ptr) return Pattern is
1904 X : constant PE_Ptr := new PE'(PC_BreakX_X, 2, B);
1905 A : constant PE_Ptr := new PE'(PC_Alt, 1, EOP, X);
1907 begin
1908 B.Pthen := A;
1909 return (AFC with 2, B);
1910 end BreakX_Make;
1912 ---------------------
1913 -- Build_Ref_Array --
1914 ---------------------
1916 procedure Build_Ref_Array (E : PE_Ptr; RA : out Ref_Array) is
1918 procedure Record_PE (E : PE_Ptr);
1919 -- Record given pattern element if not already recorded in RA,
1920 -- and also record any referenced pattern elements recursively.
1922 procedure Record_PE (E : PE_Ptr) is
1923 begin
1924 PutD (" Record_PE called with PE_Ptr = " & Image (E));
1926 if E = EOP or else RA (E.Index) /= null then
1927 Put_LineD (", nothing to do");
1928 return;
1930 else
1931 Put_LineD (", recording" & IndexT'Image (E.Index));
1932 RA (E.Index) := E;
1933 Record_PE (E.Pthen);
1935 if E.Pcode in PC_Has_Alt then
1936 Record_PE (E.Alt);
1937 end if;
1938 end if;
1939 end Record_PE;
1941 -- Start of processing for Build_Ref_Array
1943 begin
1944 New_LineD;
1945 Put_LineD ("Entering Build_Ref_Array");
1946 Record_PE (E);
1947 New_LineD;
1948 end Build_Ref_Array;
1950 -------------
1951 -- C_To_PE --
1952 -------------
1954 function C_To_PE (C : PChar) return PE_Ptr is
1955 begin
1956 return new PE'(PC_Char, 1, EOP, C);
1957 end C_To_PE;
1959 ------------
1960 -- Cancel --
1961 ------------
1963 function Cancel return Pattern is
1964 begin
1965 return (AFC with 0, new PE'(PC_Cancel, 1, EOP));
1966 end Cancel;
1968 ------------
1969 -- Concat --
1970 ------------
1972 -- Concat needs to traverse the left operand performing the following
1973 -- set of fixups:
1975 -- a) Any successor pointers (Pthen fields) that are set to EOP are
1976 -- reset to point to the second operand.
1978 -- b) Any PC_Arbno_Y node has its stack count field incremented
1979 -- by the parameter Incr provided for this purpose.
1981 -- d) Num fields of all pattern elements in the left operand are
1982 -- adjusted to include the elements of the right operand.
1984 -- Note: we do not use Set_Successor in the processing for Concat, since
1985 -- there is no point in doing two traversals, we may as well do everything
1986 -- at the same time.
1988 function Concat (L, R : PE_Ptr; Incr : Natural) return PE_Ptr is
1989 begin
1990 if L = EOP then
1991 return R;
1993 elsif R = EOP then
1994 return L;
1996 else
1997 declare
1998 Refs : Ref_Array (1 .. L.Index);
1999 -- We build a reference array for L whose N'th element points to
2000 -- the pattern element of L whose original Index value is N.
2002 P : PE_Ptr;
2004 begin
2005 Build_Ref_Array (L, Refs);
2007 for J in Refs'Range loop
2008 P := Refs (J);
2010 P.Index := P.Index + R.Index;
2012 if P.Pcode = PC_Arbno_Y then
2013 P.Nat := P.Nat + Incr;
2014 end if;
2016 if P.Pthen = EOP then
2017 P.Pthen := R;
2018 end if;
2020 if P.Pcode in PC_Has_Alt and then P.Alt = EOP then
2021 P.Alt := R;
2022 end if;
2023 end loop;
2024 end;
2026 return L;
2027 end if;
2028 end Concat;
2030 ----------
2031 -- Copy --
2032 ----------
2034 function Copy (P : PE_Ptr) return PE_Ptr is
2035 begin
2036 if P = null then
2037 Uninitialized_Pattern;
2039 else
2040 declare
2041 Refs : Ref_Array (1 .. P.Index);
2042 -- References to elements in P, indexed by Index field
2044 Copy : Ref_Array (1 .. P.Index);
2045 -- Holds copies of elements of P, indexed by Index field.
2047 E : PE_Ptr;
2049 begin
2050 Build_Ref_Array (P, Refs);
2052 -- Now copy all nodes
2054 for J in Refs'Range loop
2055 Copy (J) := new PE'(Refs (J).all);
2056 end loop;
2058 -- Adjust all internal references
2060 for J in Copy'Range loop
2061 E := Copy (J);
2063 -- Adjust successor pointer to point to copy
2065 if E.Pthen /= EOP then
2066 E.Pthen := Copy (E.Pthen.Index);
2067 end if;
2069 -- Adjust Alt pointer if there is one to point to copy
2071 if E.Pcode in PC_Has_Alt and then E.Alt /= EOP then
2072 E.Alt := Copy (E.Alt.Index);
2073 end if;
2075 -- Copy referenced string
2077 if E.Pcode = PC_String then
2078 E.Str := new String'(E.Str.all);
2079 end if;
2080 end loop;
2082 return Copy (P.Index);
2083 end;
2084 end if;
2085 end Copy;
2087 ----------
2088 -- Dump --
2089 ----------
2091 procedure Dump (P : Pattern) is
2093 subtype Count is Ada.Text_IO.Count;
2094 Scol : Count;
2095 -- Used to keep track of column in dump output
2097 Refs : Ref_Array (1 .. P.P.Index);
2098 -- We build a reference array whose N'th element points to the
2099 -- pattern element whose Index value is N.
2101 Cols : Natural := 2;
2102 -- Number of columns used for pattern numbers, minimum is 2
2104 E : PE_Ptr;
2106 procedure Write_Node_Id (E : PE_Ptr);
2107 -- Writes out a string identifying the given pattern element.
2109 procedure Write_Node_Id (E : PE_Ptr) is
2110 begin
2111 if E = EOP then
2112 Put ("EOP");
2114 for J in 4 .. Cols loop
2115 Put (' ');
2116 end loop;
2118 else
2119 declare
2120 Str : String (1 .. Cols);
2121 N : Natural := Natural (E.Index);
2123 begin
2124 Put ("#");
2126 for J in reverse Str'Range loop
2127 Str (J) := Character'Val (48 + N mod 10);
2128 N := N / 10;
2129 end loop;
2131 Put (Str);
2132 end;
2133 end if;
2134 end Write_Node_Id;
2136 begin
2137 New_Line;
2138 Put ("Pattern Dump Output (pattern at " &
2139 Image (P'Address) &
2140 ", S = " & Natural'Image (P.Stk) & ')');
2142 Scol := Col;
2143 New_Line;
2145 while Col < Scol loop
2146 Put ('-');
2147 end loop;
2149 New_Line;
2151 -- If uninitialized pattern, dump line and we are done
2153 if P.P = null then
2154 Put_Line ("Uninitialized pattern value");
2155 return;
2156 end if;
2158 -- If null pattern, just dump it and we are all done
2160 if P.P = EOP then
2161 Put_Line ("EOP (null pattern)");
2162 return;
2163 end if;
2165 Build_Ref_Array (P.P, Refs);
2167 -- Set number of columns required for node numbers
2169 while 10 ** Cols - 1 < Integer (P.P.Index) loop
2170 Cols := Cols + 1;
2171 end loop;
2173 -- Now dump the nodes in reverse sequence. We output them in reverse
2174 -- sequence since this corresponds to the natural order used to
2175 -- construct the patterns.
2177 for J in reverse Refs'Range loop
2178 E := Refs (J);
2179 Write_Node_Id (E);
2180 Set_Col (Count (Cols) + 4);
2181 Put (Image (E));
2182 Put (" ");
2183 Put (Pattern_Code'Image (E.Pcode));
2184 Put (" ");
2185 Set_Col (21 + Count (Cols) + Address_Image_Length);
2186 Write_Node_Id (E.Pthen);
2187 Set_Col (24 + 2 * Count (Cols) + Address_Image_Length);
2189 case E.Pcode is
2191 when PC_Alt |
2192 PC_Arb_X |
2193 PC_Arbno_S |
2194 PC_Arbno_X =>
2195 Write_Node_Id (E.Alt);
2197 when PC_Rpat =>
2198 Put (Str_PP (E.PP));
2200 when PC_Pred_Func =>
2201 Put (Str_BF (E.BF));
2203 when PC_Assign_Imm |
2204 PC_Assign_OnM |
2205 PC_Any_VP |
2206 PC_Break_VP |
2207 PC_BreakX_VP |
2208 PC_NotAny_VP |
2209 PC_NSpan_VP |
2210 PC_Span_VP |
2211 PC_String_VP =>
2212 Put (Str_VP (E.VP));
2214 when PC_Write_Imm |
2215 PC_Write_OnM =>
2216 Put (Str_FP (E.FP));
2218 when PC_String =>
2219 Put (Image (E.Str.all));
2221 when PC_String_2 =>
2222 Put (Image (E.Str2));
2224 when PC_String_3 =>
2225 Put (Image (E.Str3));
2227 when PC_String_4 =>
2228 Put (Image (E.Str4));
2230 when PC_String_5 =>
2231 Put (Image (E.Str5));
2233 when PC_String_6 =>
2234 Put (Image (E.Str6));
2236 when PC_Setcur =>
2237 Put (Str_NP (E.Var));
2239 when PC_Any_CH |
2240 PC_Break_CH |
2241 PC_BreakX_CH |
2242 PC_Char |
2243 PC_NotAny_CH |
2244 PC_NSpan_CH |
2245 PC_Span_CH =>
2246 Put (''' & E.Char & ''');
2248 when PC_Any_CS |
2249 PC_Break_CS |
2250 PC_BreakX_CS |
2251 PC_NotAny_CS |
2252 PC_NSpan_CS |
2253 PC_Span_CS =>
2254 Put ('"' & To_Sequence (E.CS) & '"');
2256 when PC_Arbno_Y |
2257 PC_Len_Nat |
2258 PC_Pos_Nat |
2259 PC_RPos_Nat |
2260 PC_RTab_Nat |
2261 PC_Tab_Nat =>
2262 Put (S (E.Nat));
2264 when PC_Pos_NF |
2265 PC_Len_NF |
2266 PC_RPos_NF |
2267 PC_RTab_NF |
2268 PC_Tab_NF =>
2269 Put (Str_NF (E.NF));
2271 when PC_Pos_NP |
2272 PC_Len_NP |
2273 PC_RPos_NP |
2274 PC_RTab_NP |
2275 PC_Tab_NP =>
2276 Put (Str_NP (E.NP));
2278 when PC_Any_VF |
2279 PC_Break_VF |
2280 PC_BreakX_VF |
2281 PC_NotAny_VF |
2282 PC_NSpan_VF |
2283 PC_Span_VF |
2284 PC_String_VF =>
2285 Put (Str_VF (E.VF));
2287 when others => null;
2289 end case;
2291 New_Line;
2292 end loop;
2294 New_Line;
2295 end Dump;
2297 ----------
2298 -- Fail --
2299 ----------
2301 function Fail return Pattern is
2302 begin
2303 return (AFC with 0, new PE'(PC_Fail, 1, EOP));
2304 end Fail;
2306 -----------
2307 -- Fence --
2308 -----------
2310 -- Simple case
2312 function Fence return Pattern is
2313 begin
2314 return (AFC with 1, new PE'(PC_Fence, 1, EOP));
2315 end Fence;
2317 -- Function case
2319 -- +---+ +---+ +---+
2320 -- | E |---->| P |---->| X |---->
2321 -- +---+ +---+ +---+
2323 -- The node numbering of the constituent pattern P is not affected.
2324 -- Where N is the number of nodes in P, the X node is numbered N + 1,
2325 -- and the E node is N + 2.
2327 function Fence (P : Pattern) return Pattern is
2328 Pat : constant PE_Ptr := Copy (P.P);
2329 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP);
2330 X : constant PE_Ptr := new PE'(PC_Fence_X, 0, EOP);
2332 begin
2333 return (AFC with P.Stk + 1, Bracket (E, Pat, X));
2334 end Fence;
2336 --------------
2337 -- Finalize --
2338 --------------
2340 procedure Finalize (Object : in out Pattern) is
2342 procedure Free is new Unchecked_Deallocation (PE, PE_Ptr);
2343 procedure Free is new Unchecked_Deallocation (String, String_Ptr);
2345 begin
2346 -- Nothing to do if already freed
2348 if Object.P = null then
2349 return;
2351 -- Otherwise we must free all elements
2353 else
2354 declare
2355 Refs : Ref_Array (1 .. Object.P.Index);
2356 -- References to elements in pattern to be finalized
2358 begin
2359 Build_Ref_Array (Object.P, Refs);
2361 for J in Refs'Range loop
2362 if Refs (J).Pcode = PC_String then
2363 Free (Refs (J).Str);
2364 end if;
2366 Free (Refs (J));
2367 end loop;
2369 Object.P := null;
2370 end;
2371 end if;
2372 end Finalize;
2374 -----------
2375 -- Image --
2376 -----------
2378 function Image (P : PE_Ptr) return String is
2379 begin
2380 return Image (To_Address (P));
2381 end Image;
2383 function Image (P : Pattern) return String is
2384 begin
2385 return S (Image (P));
2386 end Image;
2388 function Image (P : Pattern) return VString is
2390 Kill_Ampersand : Boolean := False;
2391 -- Set True to delete next & to be output to Result
2393 Result : VString := Nul;
2394 -- The result is accumulated here, using Append
2396 Refs : Ref_Array (1 .. P.P.Index);
2397 -- We build a reference array whose N'th element points to the
2398 -- pattern element whose Index value is N.
2400 procedure Delete_Ampersand;
2401 -- Deletes the ampersand at the end of Result
2403 procedure Image_Seq (E : PE_Ptr; Succ : PE_Ptr; Paren : Boolean);
2404 -- E refers to a pattern structure whose successor is given by Succ.
2405 -- This procedure appends to Result a representation of this pattern.
2406 -- The Paren parameter indicates whether parentheses are required if
2407 -- the output is more than one element.
2409 procedure Image_One (E : in out PE_Ptr);
2410 -- E refers to a pattern structure. This procedure appends to Result
2411 -- a representation of the single simple or compound pattern structure
2412 -- at the start of E and updates E to point to its successor.
2414 ----------------------
2415 -- Delete_Ampersand --
2416 ----------------------
2418 procedure Delete_Ampersand is
2419 L : Natural := Length (Result);
2421 begin
2422 if L > 2 then
2423 Delete (Result, L - 1, L);
2424 end if;
2425 end Delete_Ampersand;
2427 ---------------
2428 -- Image_One --
2429 ---------------
2431 procedure Image_One (E : in out PE_Ptr) is
2433 ER : PE_Ptr := E.Pthen;
2434 -- Successor set as result in E unless reset
2436 begin
2437 case E.Pcode is
2439 when PC_Cancel =>
2440 Append (Result, "Cancel");
2442 when PC_Alt => Alt : declare
2444 Elmts_In_L : constant IndexT := E.Pthen.Index - E.Alt.Index;
2445 -- Number of elements in left pattern of alternation.
2447 Lowest_In_L : constant IndexT := E.Index - Elmts_In_L;
2448 -- Number of lowest index in elements of left pattern
2450 E1 : PE_Ptr;
2452 begin
2453 -- The successor of the alternation node must have a lower
2454 -- index than any node that is in the left pattern or a
2455 -- higher index than the alternation node itself.
2457 while ER /= EOP
2458 and then ER.Index >= Lowest_In_L
2459 and then ER.Index < E.Index
2460 loop
2461 ER := ER.Pthen;
2462 end loop;
2464 Append (Result, '(');
2466 E1 := E;
2467 loop
2468 Image_Seq (E1.Pthen, ER, False);
2469 Append (Result, " or ");
2470 E1 := E1.Alt;
2471 exit when E1.Pcode /= PC_Alt;
2472 end loop;
2474 Image_Seq (E1, ER, False);
2475 Append (Result, ')');
2476 end Alt;
2478 when PC_Any_CS =>
2479 Append (Result, "Any (" & Image (To_Sequence (E.CS)) & ')');
2481 when PC_Any_VF =>
2482 Append (Result, "Any (" & Str_VF (E.VF) & ')');
2484 when PC_Any_VP =>
2485 Append (Result, "Any (" & Str_VP (E.VP) & ')');
2487 when PC_Arb_X =>
2488 Append (Result, "Arb");
2490 when PC_Arbno_S =>
2491 Append (Result, "Arbno (");
2492 Image_Seq (E.Alt, E, False);
2493 Append (Result, ')');
2495 when PC_Arbno_X =>
2496 Append (Result, "Arbno (");
2497 Image_Seq (E.Alt.Pthen, Refs (E.Index - 2), False);
2498 Append (Result, ')');
2500 when PC_Assign_Imm =>
2501 Delete_Ampersand;
2502 Append (Result, "* " & Str_VP (Refs (E.Index - 1).VP));
2504 when PC_Assign_OnM =>
2505 Delete_Ampersand;
2506 Append (Result, "** " & Str_VP (Refs (E.Index - 1).VP));
2508 when PC_Any_CH =>
2509 Append (Result, "Any ('" & E.Char & "')");
2511 when PC_Bal =>
2512 Append (Result, "Bal");
2514 when PC_Break_CH =>
2515 Append (Result, "Break ('" & E.Char & "')");
2517 when PC_Break_CS =>
2518 Append (Result, "Break (" & Image (To_Sequence (E.CS)) & ')');
2520 when PC_Break_VF =>
2521 Append (Result, "Break (" & Str_VF (E.VF) & ')');
2523 when PC_Break_VP =>
2524 Append (Result, "Break (" & Str_VP (E.VP) & ')');
2526 when PC_BreakX_CH =>
2527 Append (Result, "BreakX ('" & E.Char & "')");
2528 ER := ER.Pthen;
2530 when PC_BreakX_CS =>
2531 Append (Result, "BreakX (" & Image (To_Sequence (E.CS)) & ')');
2532 ER := ER.Pthen;
2534 when PC_BreakX_VF =>
2535 Append (Result, "BreakX (" & Str_VF (E.VF) & ')');
2536 ER := ER.Pthen;
2538 when PC_BreakX_VP =>
2539 Append (Result, "BreakX (" & Str_VP (E.VP) & ')');
2540 ER := ER.Pthen;
2542 when PC_Char =>
2543 Append (Result, ''' & E.Char & ''');
2545 when PC_Fail =>
2546 Append (Result, "Fail");
2548 when PC_Fence =>
2549 Append (Result, "Fence");
2551 when PC_Fence_X =>
2552 Append (Result, "Fence (");
2553 Image_Seq (E.Pthen, Refs (E.Index - 1), False);
2554 Append (Result, ")");
2555 ER := Refs (E.Index - 1).Pthen;
2557 when PC_Len_Nat =>
2558 Append (Result, "Len (" & E.Nat & ')');
2560 when PC_Len_NF =>
2561 Append (Result, "Len (" & Str_NF (E.NF) & ')');
2563 when PC_Len_NP =>
2564 Append (Result, "Len (" & Str_NP (E.NP) & ')');
2566 when PC_NotAny_CH =>
2567 Append (Result, "NotAny ('" & E.Char & "')");
2569 when PC_NotAny_CS =>
2570 Append (Result, "NotAny (" & Image (To_Sequence (E.CS)) & ')');
2572 when PC_NotAny_VF =>
2573 Append (Result, "NotAny (" & Str_VF (E.VF) & ')');
2575 when PC_NotAny_VP =>
2576 Append (Result, "NotAny (" & Str_VP (E.VP) & ')');
2578 when PC_NSpan_CH =>
2579 Append (Result, "NSpan ('" & E.Char & "')");
2581 when PC_NSpan_CS =>
2582 Append (Result, "NSpan (" & Image (To_Sequence (E.CS)) & ')');
2584 when PC_NSpan_VF =>
2585 Append (Result, "NSpan (" & Str_VF (E.VF) & ')');
2587 when PC_NSpan_VP =>
2588 Append (Result, "NSpan (" & Str_VP (E.VP) & ')');
2590 when PC_Null =>
2591 Append (Result, """""");
2593 when PC_Pos_Nat =>
2594 Append (Result, "Pos (" & E.Nat & ')');
2596 when PC_Pos_NF =>
2597 Append (Result, "Pos (" & Str_NF (E.NF) & ')');
2599 when PC_Pos_NP =>
2600 Append (Result, "Pos (" & Str_NP (E.NP) & ')');
2602 when PC_R_Enter =>
2603 Kill_Ampersand := True;
2605 when PC_Rest =>
2606 Append (Result, "Rest");
2608 when PC_Rpat =>
2609 Append (Result, "(+ " & Str_PP (E.PP) & ')');
2611 when PC_Pred_Func =>
2612 Append (Result, "(+ " & Str_BF (E.BF) & ')');
2614 when PC_RPos_Nat =>
2615 Append (Result, "RPos (" & E.Nat & ')');
2617 when PC_RPos_NF =>
2618 Append (Result, "RPos (" & Str_NF (E.NF) & ')');
2620 when PC_RPos_NP =>
2621 Append (Result, "RPos (" & Str_NP (E.NP) & ')');
2623 when PC_RTab_Nat =>
2624 Append (Result, "RTab (" & E.Nat & ')');
2626 when PC_RTab_NF =>
2627 Append (Result, "RTab (" & Str_NF (E.NF) & ')');
2629 when PC_RTab_NP =>
2630 Append (Result, "RTab (" & Str_NP (E.NP) & ')');
2632 when PC_Setcur =>
2633 Append (Result, "Setcur (" & Str_NP (E.Var) & ')');
2635 when PC_Span_CH =>
2636 Append (Result, "Span ('" & E.Char & "')");
2638 when PC_Span_CS =>
2639 Append (Result, "Span (" & Image (To_Sequence (E.CS)) & ')');
2641 when PC_Span_VF =>
2642 Append (Result, "Span (" & Str_VF (E.VF) & ')');
2644 when PC_Span_VP =>
2645 Append (Result, "Span (" & Str_VP (E.VP) & ')');
2647 when PC_String =>
2648 Append (Result, Image (E.Str.all));
2650 when PC_String_2 =>
2651 Append (Result, Image (E.Str2));
2653 when PC_String_3 =>
2654 Append (Result, Image (E.Str3));
2656 when PC_String_4 =>
2657 Append (Result, Image (E.Str4));
2659 when PC_String_5 =>
2660 Append (Result, Image (E.Str5));
2662 when PC_String_6 =>
2663 Append (Result, Image (E.Str6));
2665 when PC_String_VF =>
2666 Append (Result, "(+" & Str_VF (E.VF) & ')');
2668 when PC_String_VP =>
2669 Append (Result, "(+" & Str_VP (E.VP) & ')');
2671 when PC_Succeed =>
2672 Append (Result, "Succeed");
2674 when PC_Tab_Nat =>
2675 Append (Result, "Tab (" & E.Nat & ')');
2677 when PC_Tab_NF =>
2678 Append (Result, "Tab (" & Str_NF (E.NF) & ')');
2680 when PC_Tab_NP =>
2681 Append (Result, "Tab (" & Str_NP (E.NP) & ')');
2683 when PC_Write_Imm =>
2684 Append (Result, '(');
2685 Image_Seq (E, Refs (E.Index - 1), True);
2686 Append (Result, " * " & Str_FP (Refs (E.Index - 1).FP));
2687 ER := Refs (E.Index - 1).Pthen;
2689 when PC_Write_OnM =>
2690 Append (Result, '(');
2691 Image_Seq (E.Pthen, Refs (E.Index - 1), True);
2692 Append (Result, " ** " & Str_FP (Refs (E.Index - 1).FP));
2693 ER := Refs (E.Index - 1).Pthen;
2695 -- Other pattern codes should not appear as leading elements
2697 when PC_Arb_Y |
2698 PC_Arbno_Y |
2699 PC_Assign |
2700 PC_BreakX_X |
2701 PC_EOP |
2702 PC_Fence_Y |
2703 PC_R_Remove |
2704 PC_R_Restore |
2705 PC_Unanchored =>
2706 Append (Result, "???");
2708 end case;
2710 E := ER;
2711 end Image_One;
2713 ---------------
2714 -- Image_Seq --
2715 ---------------
2717 procedure Image_Seq (E : PE_Ptr; Succ : PE_Ptr; Paren : Boolean) is
2718 E1 : PE_Ptr := E;
2719 Mult : Boolean := False;
2720 Indx : Natural := Length (Result);
2722 begin
2723 -- The image of EOP is "" (the null string)
2725 if E = EOP then
2726 Append (Result, """""");
2728 -- Else generate appropriate concatenation sequence
2730 else
2731 loop
2732 Image_One (E1);
2733 exit when E1 = Succ;
2734 exit when E1 = EOP;
2735 Mult := True;
2737 if Kill_Ampersand then
2738 Kill_Ampersand := False;
2739 else
2740 Append (Result, " & ");
2741 end if;
2742 end loop;
2743 end if;
2745 if Mult and Paren then
2746 Insert (Result, Indx + 1, "(");
2747 Append (Result, ")");
2748 end if;
2749 end Image_Seq;
2751 -- Start of processing for Image
2753 begin
2754 Build_Ref_Array (P.P, Refs);
2755 Image_Seq (P.P, EOP, False);
2756 return Result;
2757 end Image;
2759 -----------
2760 -- Is_In --
2761 -----------
2763 function Is_In (C : Character; Str : String) return Boolean is
2764 begin
2765 for J in Str'Range loop
2766 if Str (J) = C then
2767 return True;
2768 end if;
2769 end loop;
2771 return False;
2772 end Is_In;
2774 ---------
2775 -- Len --
2776 ---------
2778 function Len (Count : Natural) return Pattern is
2779 begin
2780 -- Note, the following is not just an optimization, it is needed
2781 -- to ensure that Arbno (Len (0)) does not generate an infinite
2782 -- matching loop (since PC_Len_Nat is OK_For_Simple_Arbno).
2784 if Count = 0 then
2785 return (AFC with 0, new PE'(PC_Null, 1, EOP));
2787 else
2788 return (AFC with 0, new PE'(PC_Len_Nat, 1, EOP, Count));
2789 end if;
2790 end Len;
2792 function Len (Count : Natural_Func) return Pattern is
2793 begin
2794 return (AFC with 0, new PE'(PC_Len_NF, 1, EOP, Count));
2795 end Len;
2797 function Len (Count : access Natural) return Pattern is
2798 begin
2799 return (AFC with 0, new PE'(PC_Len_NP, 1, EOP, Natural_Ptr (Count)));
2800 end Len;
2802 -----------------
2803 -- Logic_Error --
2804 -----------------
2806 procedure Logic_Error is
2807 begin
2808 Raise_Exception
2809 (Program_Error'Identity,
2810 "Internal logic error in GNAT.Spitbol.Patterns");
2811 end Logic_Error;
2813 -----------
2814 -- Match --
2815 -----------
2817 function Match
2818 (Subject : VString;
2819 Pat : Pattern)
2820 return Boolean
2822 Start, Stop : Natural;
2824 begin
2825 if Debug_Mode then
2826 XMatchD (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
2827 else
2828 XMatch (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
2829 end if;
2831 return Start /= 0;
2832 end Match;
2834 function Match
2835 (Subject : String;
2836 Pat : Pattern)
2837 return Boolean
2839 Start, Stop : Natural;
2840 subtype String1 is String (1 .. Subject'Length);
2842 begin
2843 if Debug_Mode then
2844 XMatchD (String1 (Subject), Pat.P, Pat.Stk, Start, Stop);
2845 else
2846 XMatch (String1 (Subject), Pat.P, Pat.Stk, Start, Stop);
2847 end if;
2849 return Start /= 0;
2850 end Match;
2852 function Match
2853 (Subject : VString_Var;
2854 Pat : Pattern;
2855 Replace : VString)
2856 return Boolean
2858 Start, Stop : Natural;
2860 begin
2861 if Debug_Mode then
2862 XMatchD (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
2863 else
2864 XMatch (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
2865 end if;
2867 if Start = 0 then
2868 return False;
2869 else
2870 Replace_Slice
2871 (Subject'Unrestricted_Access.all,
2872 Start, Stop, Get_String (Replace).all);
2873 return True;
2874 end if;
2875 end Match;
2877 function Match
2878 (Subject : VString_Var;
2879 Pat : Pattern;
2880 Replace : String)
2881 return Boolean
2883 Start, Stop : Natural;
2885 begin
2886 if Debug_Mode then
2887 XMatchD (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
2888 else
2889 XMatch (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
2890 end if;
2892 if Start = 0 then
2893 return False;
2894 else
2895 Replace_Slice
2896 (Subject'Unrestricted_Access.all, Start, Stop, Replace);
2897 return True;
2898 end if;
2899 end Match;
2901 procedure Match
2902 (Subject : VString;
2903 Pat : Pattern)
2905 Start, Stop : Natural;
2907 begin
2908 if Debug_Mode then
2909 XMatchD (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
2910 else
2911 XMatch (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
2912 end if;
2914 end Match;
2916 procedure Match
2917 (Subject : String;
2918 Pat : Pattern)
2920 Start, Stop : Natural;
2921 subtype String1 is String (1 .. Subject'Length);
2922 begin
2923 if Debug_Mode then
2924 XMatchD (String1 (Subject), Pat.P, Pat.Stk, Start, Stop);
2925 else
2926 XMatch (String1 (Subject), Pat.P, Pat.Stk, Start, Stop);
2927 end if;
2928 end Match;
2930 procedure Match
2931 (Subject : in out VString;
2932 Pat : Pattern;
2933 Replace : VString)
2935 Start, Stop : Natural;
2937 begin
2938 if Debug_Mode then
2939 XMatchD (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
2940 else
2941 XMatch (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
2942 end if;
2944 if Start /= 0 then
2945 Replace_Slice (Subject, Start, Stop, Get_String (Replace).all);
2946 end if;
2947 end Match;
2949 procedure Match
2950 (Subject : in out VString;
2951 Pat : Pattern;
2952 Replace : String)
2954 Start, Stop : Natural;
2956 begin
2957 if Debug_Mode then
2958 XMatchD (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
2959 else
2960 XMatch (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
2961 end if;
2963 if Start /= 0 then
2964 Replace_Slice (Subject, Start, Stop, Replace);
2965 end if;
2966 end Match;
2968 function Match
2969 (Subject : VString;
2970 Pat : PString)
2971 return Boolean
2973 Pat_Len : constant Natural := Pat'Length;
2974 Sub_Len : constant Natural := Length (Subject);
2975 Sub_Str : constant String_Access := Get_String (Subject);
2977 begin
2978 if Anchored_Mode then
2979 if Pat_Len > Sub_Len then
2980 return False;
2981 else
2982 return Pat = Sub_Str.all (1 .. Pat_Len);
2983 end if;
2985 else
2986 for J in 1 .. Sub_Len - Pat_Len + 1 loop
2987 if Pat = Sub_Str.all (J .. J + (Pat_Len - 1)) then
2988 return True;
2989 end if;
2990 end loop;
2992 return False;
2993 end if;
2994 end Match;
2996 function Match
2997 (Subject : String;
2998 Pat : PString)
2999 return Boolean
3001 Pat_Len : constant Natural := Pat'Length;
3002 Sub_Len : constant Natural := Subject'Length;
3003 SFirst : constant Natural := Subject'First;
3005 begin
3006 if Anchored_Mode then
3007 if Pat_Len > Sub_Len then
3008 return False;
3009 else
3010 return Pat = Subject (SFirst .. SFirst + Pat_Len - 1);
3011 end if;
3013 else
3014 for J in SFirst .. SFirst + Sub_Len - Pat_Len loop
3015 if Pat = Subject (J .. J + (Pat_Len - 1)) then
3016 return True;
3017 end if;
3018 end loop;
3020 return False;
3021 end if;
3022 end Match;
3024 function Match
3025 (Subject : VString_Var;
3026 Pat : PString;
3027 Replace : VString)
3028 return Boolean
3030 Start, Stop : Natural;
3032 begin
3033 if Debug_Mode then
3034 XMatchD (Get_String (Subject).all, S_To_PE (Pat), 0, Start, Stop);
3035 else
3036 XMatch (Get_String (Subject).all, S_To_PE (Pat), 0, Start, Stop);
3037 end if;
3039 if Start = 0 then
3040 return False;
3041 else
3042 Replace_Slice
3043 (Subject'Unrestricted_Access.all,
3044 Start, Stop, Get_String (Replace).all);
3045 return True;
3046 end if;
3047 end Match;
3049 function Match
3050 (Subject : VString_Var;
3051 Pat : PString;
3052 Replace : String)
3053 return Boolean
3055 Start, Stop : Natural;
3057 begin
3058 if Debug_Mode then
3059 XMatchD (Get_String (Subject).all, S_To_PE (Pat), 0, Start, Stop);
3060 else
3061 XMatch (Get_String (Subject).all, S_To_PE (Pat), 0, Start, Stop);
3062 end if;
3064 if Start = 0 then
3065 return False;
3066 else
3067 Replace_Slice
3068 (Subject'Unrestricted_Access.all, Start, Stop, Replace);
3069 return True;
3070 end if;
3071 end Match;
3073 procedure Match
3074 (Subject : VString;
3075 Pat : PString)
3077 Start, Stop : Natural;
3079 begin
3080 if Debug_Mode then
3081 XMatchD (Get_String (Subject).all, S_To_PE (Pat), 0, Start, Stop);
3082 else
3083 XMatch (Get_String (Subject).all, S_To_PE (Pat), 0, Start, Stop);
3084 end if;
3085 end Match;
3087 procedure Match
3088 (Subject : String;
3089 Pat : PString)
3091 Start, Stop : Natural;
3092 subtype String1 is String (1 .. Subject'Length);
3094 begin
3095 if Debug_Mode then
3096 XMatchD (String1 (Subject), S_To_PE (Pat), 0, Start, Stop);
3097 else
3098 XMatch (String1 (Subject), S_To_PE (Pat), 0, Start, Stop);
3099 end if;
3100 end Match;
3102 procedure Match
3103 (Subject : in out VString;
3104 Pat : PString;
3105 Replace : VString)
3107 Start, Stop : Natural;
3109 begin
3110 if Debug_Mode then
3111 XMatchD (Get_String (Subject).all, S_To_PE (Pat), 0, Start, Stop);
3112 else
3113 XMatch (Get_String (Subject).all, S_To_PE (Pat), 0, Start, Stop);
3114 end if;
3116 if Start /= 0 then
3117 Replace_Slice (Subject, Start, Stop, Get_String (Replace).all);
3118 end if;
3119 end Match;
3121 procedure Match
3122 (Subject : in out VString;
3123 Pat : PString;
3124 Replace : String)
3126 Start, Stop : Natural;
3128 begin
3129 if Debug_Mode then
3130 XMatchD (Get_String (Subject).all, S_To_PE (Pat), 0, Start, Stop);
3131 else
3132 XMatch (Get_String (Subject).all, S_To_PE (Pat), 0, Start, Stop);
3133 end if;
3135 if Start /= 0 then
3136 Replace_Slice (Subject, Start, Stop, Replace);
3137 end if;
3138 end Match;
3140 function Match
3141 (Subject : VString_Var;
3142 Pat : Pattern;
3143 Result : Match_Result_Var)
3144 return Boolean
3146 Start, Stop : Natural;
3148 begin
3149 if Debug_Mode then
3150 XMatchD (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
3151 else
3152 XMatch (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
3153 end if;
3155 if Start = 0 then
3156 Result'Unrestricted_Access.all.Var := null;
3157 return False;
3159 else
3160 Result'Unrestricted_Access.all.Var := Subject'Unrestricted_Access;
3161 Result'Unrestricted_Access.all.Start := Start;
3162 Result'Unrestricted_Access.all.Stop := Stop;
3163 return True;
3164 end if;
3165 end Match;
3167 procedure Match
3168 (Subject : in out VString;
3169 Pat : Pattern;
3170 Result : out Match_Result)
3172 Start, Stop : Natural;
3174 begin
3175 if Debug_Mode then
3176 XMatchD (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
3177 else
3178 XMatch (Get_String (Subject).all, Pat.P, Pat.Stk, Start, Stop);
3179 end if;
3181 if Start = 0 then
3182 Result.Var := null;
3184 else
3185 Result.Var := Subject'Unrestricted_Access;
3186 Result.Start := Start;
3187 Result.Stop := Stop;
3188 end if;
3189 end Match;
3191 ---------------
3192 -- New_LineD --
3193 ---------------
3195 procedure New_LineD is
3196 begin
3197 if Internal_Debug then
3198 New_Line;
3199 end if;
3200 end New_LineD;
3202 ------------
3203 -- NotAny --
3204 ------------
3206 function NotAny (Str : String) return Pattern is
3207 begin
3208 return (AFC with 0, new PE'(PC_NotAny_CS, 1, EOP, To_Set (Str)));
3209 end NotAny;
3211 function NotAny (Str : VString) return Pattern is
3212 begin
3213 return NotAny (S (Str));
3214 end NotAny;
3216 function NotAny (Str : Character) return Pattern is
3217 begin
3218 return (AFC with 0, new PE'(PC_NotAny_CH, 1, EOP, Str));
3219 end NotAny;
3221 function NotAny (Str : Character_Set) return Pattern is
3222 begin
3223 return (AFC with 0, new PE'(PC_NotAny_CS, 1, EOP, Str));
3224 end NotAny;
3226 function NotAny (Str : access VString) return Pattern is
3227 begin
3228 return (AFC with 0, new PE'(PC_NotAny_VP, 1, EOP, VString_Ptr (Str)));
3229 end NotAny;
3231 function NotAny (Str : VString_Func) return Pattern is
3232 begin
3233 return (AFC with 0, new PE'(PC_NotAny_VF, 1, EOP, Str));
3234 end NotAny;
3236 -----------
3237 -- NSpan --
3238 -----------
3240 function NSpan (Str : String) return Pattern is
3241 begin
3242 return (AFC with 0, new PE'(PC_NSpan_CS, 1, EOP, To_Set (Str)));
3243 end NSpan;
3245 function NSpan (Str : VString) return Pattern is
3246 begin
3247 return NSpan (S (Str));
3248 end NSpan;
3250 function NSpan (Str : Character) return Pattern is
3251 begin
3252 return (AFC with 0, new PE'(PC_NSpan_CH, 1, EOP, Str));
3253 end NSpan;
3255 function NSpan (Str : Character_Set) return Pattern is
3256 begin
3257 return (AFC with 0, new PE'(PC_NSpan_CS, 1, EOP, Str));
3258 end NSpan;
3260 function NSpan (Str : access VString) return Pattern is
3261 begin
3262 return (AFC with 0, new PE'(PC_NSpan_VP, 1, EOP, VString_Ptr (Str)));
3263 end NSpan;
3265 function NSpan (Str : VString_Func) return Pattern is
3266 begin
3267 return (AFC with 0, new PE'(PC_NSpan_VF, 1, EOP, Str));
3268 end NSpan;
3270 ---------
3271 -- Pos --
3272 ---------
3274 function Pos (Count : Natural) return Pattern is
3275 begin
3276 return (AFC with 0, new PE'(PC_Pos_Nat, 1, EOP, Count));
3277 end Pos;
3279 function Pos (Count : Natural_Func) return Pattern is
3280 begin
3281 return (AFC with 0, new PE'(PC_Pos_NF, 1, EOP, Count));
3282 end Pos;
3284 function Pos (Count : access Natural) return Pattern is
3285 begin
3286 return (AFC with 0, new PE'(PC_Pos_NP, 1, EOP, Natural_Ptr (Count)));
3287 end Pos;
3289 ----------
3290 -- PutD --
3291 ----------
3293 procedure PutD (Str : String) is
3294 begin
3295 if Internal_Debug then
3296 Put (Str);
3297 end if;
3298 end PutD;
3300 ---------------
3301 -- Put_LineD --
3302 ---------------
3304 procedure Put_LineD (Str : String) is
3305 begin
3306 if Internal_Debug then
3307 Put_Line (Str);
3308 end if;
3309 end Put_LineD;
3311 -------------
3312 -- Replace --
3313 -------------
3315 procedure Replace
3316 (Result : in out Match_Result;
3317 Replace : VString)
3319 begin
3320 if Result.Var /= null then
3321 Replace_Slice
3322 (Result.Var.all,
3323 Result.Start,
3324 Result.Stop,
3325 Get_String (Replace).all);
3326 Result.Var := null;
3327 end if;
3328 end Replace;
3330 ----------
3331 -- Rest --
3332 ----------
3334 function Rest return Pattern is
3335 begin
3336 return (AFC with 0, new PE'(PC_Rest, 1, EOP));
3337 end Rest;
3339 ----------
3340 -- Rpos --
3341 ----------
3343 function Rpos (Count : Natural) return Pattern is
3344 begin
3345 return (AFC with 0, new PE'(PC_RPos_Nat, 1, EOP, Count));
3346 end Rpos;
3348 function Rpos (Count : Natural_Func) return Pattern is
3349 begin
3350 return (AFC with 0, new PE'(PC_RPos_NF, 1, EOP, Count));
3351 end Rpos;
3353 function Rpos (Count : access Natural) return Pattern is
3354 begin
3355 return (AFC with 0, new PE'(PC_RPos_NP, 1, EOP, Natural_Ptr (Count)));
3356 end Rpos;
3358 ----------
3359 -- Rtab --
3360 ----------
3362 function Rtab (Count : Natural) return Pattern is
3363 begin
3364 return (AFC with 0, new PE'(PC_RTab_Nat, 1, EOP, Count));
3365 end Rtab;
3367 function Rtab (Count : Natural_Func) return Pattern is
3368 begin
3369 return (AFC with 0, new PE'(PC_RTab_NF, 1, EOP, Count));
3370 end Rtab;
3372 function Rtab (Count : access Natural) return Pattern is
3373 begin
3374 return (AFC with 0, new PE'(PC_RTab_NP, 1, EOP, Natural_Ptr (Count)));
3375 end Rtab;
3377 -------------
3378 -- S_To_PE --
3379 -------------
3381 function S_To_PE (Str : PString) return PE_Ptr is
3382 Len : constant Natural := Str'Length;
3384 begin
3385 case Len is
3386 when 0 =>
3387 return new PE'(PC_Null, 1, EOP);
3389 when 1 =>
3390 return new PE'(PC_Char, 1, EOP, Str (1));
3392 when 2 =>
3393 return new PE'(PC_String_2, 1, EOP, Str);
3395 when 3 =>
3396 return new PE'(PC_String_3, 1, EOP, Str);
3398 when 4 =>
3399 return new PE'(PC_String_4, 1, EOP, Str);
3401 when 5 =>
3402 return new PE'(PC_String_5, 1, EOP, Str);
3404 when 6 =>
3405 return new PE'(PC_String_6, 1, EOP, Str);
3407 when others =>
3408 return new PE'(PC_String, 1, EOP, new String'(Str));
3410 end case;
3411 end S_To_PE;
3413 -------------------
3414 -- Set_Successor --
3415 -------------------
3417 -- Note: this procedure is not used by the normal concatenation circuit,
3418 -- since other fixups are required on the left operand in this case, and
3419 -- they might as well be done all together.
3421 procedure Set_Successor (Pat : PE_Ptr; Succ : PE_Ptr) is
3422 begin
3423 if Pat = null then
3424 Uninitialized_Pattern;
3426 elsif Pat = EOP then
3427 Logic_Error;
3429 else
3430 declare
3431 Refs : Ref_Array (1 .. Pat.Index);
3432 -- We build a reference array for L whose N'th element points to
3433 -- the pattern element of L whose original Index value is N.
3435 P : PE_Ptr;
3437 begin
3438 Build_Ref_Array (Pat, Refs);
3440 for J in Refs'Range loop
3441 P := Refs (J);
3443 if P.Pthen = EOP then
3444 P.Pthen := Succ;
3445 end if;
3447 if P.Pcode in PC_Has_Alt and then P.Alt = EOP then
3448 P.Alt := Succ;
3449 end if;
3450 end loop;
3451 end;
3452 end if;
3453 end Set_Successor;
3455 ------------
3456 -- Setcur --
3457 ------------
3459 function Setcur (Var : access Natural) return Pattern is
3460 begin
3461 return (AFC with 0, new PE'(PC_Setcur, 1, EOP, Natural_Ptr (Var)));
3462 end Setcur;
3464 ----------
3465 -- Span --
3466 ----------
3468 function Span (Str : String) return Pattern is
3469 begin
3470 return (AFC with 0, new PE'(PC_Span_CS, 1, EOP, To_Set (Str)));
3471 end Span;
3473 function Span (Str : VString) return Pattern is
3474 begin
3475 return Span (S (Str));
3476 end Span;
3478 function Span (Str : Character) return Pattern is
3479 begin
3480 return (AFC with 0, new PE'(PC_Span_CH, 1, EOP, Str));
3481 end Span;
3483 function Span (Str : Character_Set) return Pattern is
3484 begin
3485 return (AFC with 0, new PE'(PC_Span_CS, 1, EOP, Str));
3486 end Span;
3488 function Span (Str : access VString) return Pattern is
3489 begin
3490 return (AFC with 0, new PE'(PC_Span_VP, 1, EOP, VString_Ptr (Str)));
3491 end Span;
3493 function Span (Str : VString_Func) return Pattern is
3494 begin
3495 return (AFC with 0, new PE'(PC_Span_VF, 1, EOP, Str));
3496 end Span;
3498 ------------
3499 -- Str_BF --
3500 ------------
3502 function Str_BF (A : Boolean_Func) return String is
3503 function To_A is new Unchecked_Conversion (Boolean_Func, Address);
3505 begin
3506 return "BF(" & Image (To_A (A)) & ')';
3507 end Str_BF;
3509 ------------
3510 -- Str_FP --
3511 ------------
3513 function Str_FP (A : File_Ptr) return String is
3514 begin
3515 return "FP(" & Image (A.all'Address) & ')';
3516 end Str_FP;
3518 ------------
3519 -- Str_NF --
3520 ------------
3522 function Str_NF (A : Natural_Func) return String is
3523 function To_A is new Unchecked_Conversion (Natural_Func, Address);
3525 begin
3526 return "NF(" & Image (To_A (A)) & ')';
3527 end Str_NF;
3529 ------------
3530 -- Str_NP --
3531 ------------
3533 function Str_NP (A : Natural_Ptr) return String is
3534 begin
3535 return "NP(" & Image (A.all'Address) & ')';
3536 end Str_NP;
3538 ------------
3539 -- Str_PP --
3540 ------------
3542 function Str_PP (A : Pattern_Ptr) return String is
3543 begin
3544 return "PP(" & Image (A.all'Address) & ')';
3545 end Str_PP;
3547 ------------
3548 -- Str_VF --
3549 ------------
3551 function Str_VF (A : VString_Func) return String is
3552 function To_A is new Unchecked_Conversion (VString_Func, Address);
3554 begin
3555 return "VF(" & Image (To_A (A)) & ')';
3556 end Str_VF;
3558 ------------
3559 -- Str_VP --
3560 ------------
3562 function Str_VP (A : VString_Ptr) return String is
3563 begin
3564 return "VP(" & Image (A.all'Address) & ')';
3565 end Str_VP;
3567 -------------
3568 -- Succeed --
3569 -------------
3571 function Succeed return Pattern is
3572 begin
3573 return (AFC with 1, new PE'(PC_Succeed, 1, EOP));
3574 end Succeed;
3576 ---------
3577 -- Tab --
3578 ---------
3580 function Tab (Count : Natural) return Pattern is
3581 begin
3582 return (AFC with 0, new PE'(PC_Tab_Nat, 1, EOP, Count));
3583 end Tab;
3585 function Tab (Count : Natural_Func) return Pattern is
3586 begin
3587 return (AFC with 0, new PE'(PC_Tab_NF, 1, EOP, Count));
3588 end Tab;
3590 function Tab (Count : access Natural) return Pattern is
3591 begin
3592 return (AFC with 0, new PE'(PC_Tab_NP, 1, EOP, Natural_Ptr (Count)));
3593 end Tab;
3595 ---------------------------
3596 -- Uninitialized_Pattern --
3597 ---------------------------
3599 procedure Uninitialized_Pattern is
3600 begin
3601 Raise_Exception
3602 (Program_Error'Identity,
3603 "uninitialized value of type GNAT.Spitbol.Patterns.Pattern");
3604 end Uninitialized_Pattern;
3606 ------------
3607 -- XMatch --
3608 ------------
3610 procedure XMatch
3611 (Subject : String;
3612 Pat_P : PE_Ptr;
3613 Pat_S : Natural;
3614 Start : out Natural;
3615 Stop : out Natural)
3617 Node : PE_Ptr;
3618 -- Pointer to current pattern node. Initialized from Pat_P, and then
3619 -- updated as the match proceeds through its constituent elements.
3621 Length : constant Natural := Subject'Length;
3622 -- Length of string (= Subject'Last, since Subject'First is always 1)
3624 Cursor : Integer := 0;
3625 -- If the value is non-negative, then this value is the index showing
3626 -- the current position of the match in the subject string. The next
3627 -- character to be matched is at Subject (Cursor + 1). Note that since
3628 -- our view of the subject string in XMatch always has a lower bound
3629 -- of one, regardless of original bounds, that this definition exactly
3630 -- corresponds to the cursor value as referenced by functions like Pos.
3632 -- If the value is negative, then this is a saved stack pointer,
3633 -- typically a base pointer of an inner or outer region. Cursor
3634 -- temporarily holds such a value when it is popped from the stack
3635 -- by Fail. In all cases, Cursor is reset to a proper non-negative
3636 -- cursor value before the match proceeds (e.g. by propagating the
3637 -- failure and popping a "real" cursor value from the stack.
3639 PE_Unanchored : aliased PE := (PC_Unanchored, 0, Pat_P);
3640 -- Dummy pattern element used in the unanchored case.
3642 Stack : Stack_Type;
3643 -- The pattern matching failure stack for this call to Match
3645 Stack_Ptr : Stack_Range;
3646 -- Current stack pointer. This points to the top element of the stack
3647 -- that is currently in use. At the outer level this is the special
3648 -- entry placed on the stack according to the anchor mode.
3650 Stack_Init : constant Stack_Range := Stack'First + 1;
3651 -- This is the initial value of the Stack_Ptr and Stack_Base. The
3652 -- initial (Stack'First) element of the stack is not used so that
3653 -- when we pop the last element off, Stack_Ptr is still in range.
3655 Stack_Base : Stack_Range;
3656 -- This value is the stack base value, i.e. the stack pointer for the
3657 -- first history stack entry in the current stack region. See separate
3658 -- section on handling of recursive pattern matches.
3660 Assign_OnM : Boolean := False;
3661 -- Set True if assign-on-match or write-on-match operations may be
3662 -- present in the history stack, which must then be scanned on a
3663 -- successful match.
3665 procedure Pop_Region;
3666 pragma Inline (Pop_Region);
3667 -- Used at the end of processing of an inner region. if the inner
3668 -- region left no stack entries, then all trace of it is removed.
3669 -- Otherwise a PC_Restore_Region entry is pushed to ensure proper
3670 -- handling of alternatives in the inner region.
3672 procedure Push (Node : PE_Ptr);
3673 pragma Inline (Push);
3674 -- Make entry in pattern matching stack with current cursor valeu
3676 procedure Push_Region;
3677 pragma Inline (Push_Region);
3678 -- This procedure makes a new region on the history stack. The
3679 -- caller first establishes the special entry on the stack, but
3680 -- does not push the stack pointer. Then this call stacks a
3681 -- PC_Remove_Region node, on top of this entry, using the cursor
3682 -- field of the PC_Remove_Region entry to save the outer level
3683 -- stack base value, and resets the stack base to point to this
3684 -- PC_Remove_Region node.
3686 ----------------
3687 -- Pop_Region --
3688 ----------------
3690 procedure Pop_Region is
3691 begin
3692 -- If nothing was pushed in the inner region, we can just get
3693 -- rid of it entirely, leaving no traces that it was ever there
3695 if Stack_Ptr = Stack_Base then
3696 Stack_Ptr := Stack_Base - 2;
3697 Stack_Base := Stack (Stack_Ptr + 2).Cursor;
3699 -- If stuff was pushed in the inner region, then we have to
3700 -- push a PC_R_Restore node so that we properly handle possible
3701 -- rematches within the region.
3703 else
3704 Stack_Ptr := Stack_Ptr + 1;
3705 Stack (Stack_Ptr).Cursor := Stack_Base;
3706 Stack (Stack_Ptr).Node := CP_R_Restore'Access;
3707 Stack_Base := Stack (Stack_Base).Cursor;
3708 end if;
3709 end Pop_Region;
3711 ----------
3712 -- Push --
3713 ----------
3715 procedure Push (Node : PE_Ptr) is
3716 begin
3717 Stack_Ptr := Stack_Ptr + 1;
3718 Stack (Stack_Ptr).Cursor := Cursor;
3719 Stack (Stack_Ptr).Node := Node;
3720 end Push;
3722 -----------------
3723 -- Push_Region --
3724 -----------------
3726 procedure Push_Region is
3727 begin
3728 Stack_Ptr := Stack_Ptr + 2;
3729 Stack (Stack_Ptr).Cursor := Stack_Base;
3730 Stack (Stack_Ptr).Node := CP_R_Remove'Access;
3731 Stack_Base := Stack_Ptr;
3732 end Push_Region;
3734 -- Start of processing for XMatch
3736 begin
3737 if Pat_P = null then
3738 Uninitialized_Pattern;
3739 end if;
3741 -- Check we have enough stack for this pattern. This check deals with
3742 -- every possibility except a match of a recursive pattern, where we
3743 -- make a check at each recursion level.
3745 if Pat_S >= Stack_Size - 1 then
3746 raise Pattern_Stack_Overflow;
3747 end if;
3749 -- In anchored mode, the bottom entry on the stack is an abort entry
3751 if Anchored_Mode then
3752 Stack (Stack_Init).Node := CP_Cancel'Access;
3753 Stack (Stack_Init).Cursor := 0;
3755 -- In unanchored more, the bottom entry on the stack references
3756 -- the special pattern element PE_Unanchored, whose Pthen field
3757 -- points to the initial pattern element. The cursor value in this
3758 -- entry is the number of anchor moves so far.
3760 else
3761 Stack (Stack_Init).Node := PE_Unanchored'Unchecked_Access;
3762 Stack (Stack_Init).Cursor := 0;
3763 end if;
3765 Stack_Ptr := Stack_Init;
3766 Stack_Base := Stack_Ptr;
3767 Cursor := 0;
3768 Node := Pat_P;
3769 goto Match;
3771 -----------------------------------------
3772 -- Main Pattern Matching State Control --
3773 -----------------------------------------
3775 -- This is a state machine which uses gotos to change state. The
3776 -- initial state is Match, to initiate the matching of the first
3777 -- element, so the goto Match above starts the match. In the
3778 -- following descriptions, we indicate the global values that
3779 -- are relevant for the state transition.
3781 -- Come here if entire match fails
3783 <<Match_Fail>>
3784 Start := 0;
3785 Stop := 0;
3786 return;
3788 -- Come here if entire match succeeds
3790 -- Cursor current position in subject string
3792 <<Match_Succeed>>
3793 Start := Stack (Stack_Init).Cursor + 1;
3794 Stop := Cursor;
3796 -- Scan history stack for deferred assignments or writes
3798 if Assign_OnM then
3799 for S in Stack_Init .. Stack_Ptr loop
3800 if Stack (S).Node = CP_Assign'Access then
3801 declare
3802 Inner_Base : constant Stack_Range :=
3803 Stack (S + 1).Cursor;
3804 Special_Entry : constant Stack_Range :=
3805 Inner_Base - 1;
3806 Node_OnM : constant PE_Ptr :=
3807 Stack (Special_Entry).Node;
3808 Start : constant Natural :=
3809 Stack (Special_Entry).Cursor + 1;
3810 Stop : constant Natural := Stack (S).Cursor;
3812 begin
3813 if Node_OnM.Pcode = PC_Assign_OnM then
3814 Set_String (Node_OnM.VP.all, Subject (Start .. Stop));
3816 elsif Node_OnM.Pcode = PC_Write_OnM then
3817 Put_Line (Node_OnM.FP.all, Subject (Start .. Stop));
3819 else
3820 Logic_Error;
3821 end if;
3822 end;
3823 end if;
3824 end loop;
3825 end if;
3827 return;
3829 -- Come here if attempt to match current element fails
3831 -- Stack_Base current stack base
3832 -- Stack_Ptr current stack pointer
3834 <<Fail>>
3835 Cursor := Stack (Stack_Ptr).Cursor;
3836 Node := Stack (Stack_Ptr).Node;
3837 Stack_Ptr := Stack_Ptr - 1;
3838 goto Match;
3840 -- Come here if attempt to match current element succeeds
3842 -- Cursor current position in subject string
3843 -- Node pointer to node successfully matched
3844 -- Stack_Base current stack base
3845 -- Stack_Ptr current stack pointer
3847 <<Succeed>>
3848 Node := Node.Pthen;
3850 -- Come here to match the next pattern element
3852 -- Cursor current position in subject string
3853 -- Node pointer to node to be matched
3854 -- Stack_Base current stack base
3855 -- Stack_Ptr current stack pointer
3857 <<Match>>
3859 --------------------------------------------------
3860 -- Main Pattern Match Element Matching Routines --
3861 --------------------------------------------------
3863 -- Here is the case statement that processes the current node. The
3864 -- processing for each element does one of five things:
3866 -- goto Succeed to move to the successor
3867 -- goto Match_Succeed if the entire match succeeds
3868 -- goto Match_Fail if the entire match fails
3869 -- goto Fail to signal failure of current match
3871 -- Processing is NOT allowed to fall through
3873 case Node.Pcode is
3875 -- Cancel
3877 when PC_Cancel =>
3878 goto Match_Fail;
3880 -- Alternation
3882 when PC_Alt =>
3883 Push (Node.Alt);
3884 Node := Node.Pthen;
3885 goto Match;
3887 -- Any (one character case)
3889 when PC_Any_CH =>
3890 if Cursor < Length
3891 and then Subject (Cursor + 1) = Node.Char
3892 then
3893 Cursor := Cursor + 1;
3894 goto Succeed;
3895 else
3896 goto Fail;
3897 end if;
3899 -- Any (character set case)
3901 when PC_Any_CS =>
3902 if Cursor < Length
3903 and then Is_In (Subject (Cursor + 1), Node.CS)
3904 then
3905 Cursor := Cursor + 1;
3906 goto Succeed;
3907 else
3908 goto Fail;
3909 end if;
3911 -- Any (string function case)
3913 when PC_Any_VF => declare
3914 U : constant VString := Node.VF.all;
3915 Str : constant String_Access := Get_String (U);
3917 begin
3918 if Cursor < Length
3919 and then Is_In (Subject (Cursor + 1), Str.all)
3920 then
3921 Cursor := Cursor + 1;
3922 goto Succeed;
3923 else
3924 goto Fail;
3925 end if;
3926 end;
3928 -- Any (string pointer case)
3930 when PC_Any_VP => declare
3931 Str : constant String_Access := Get_String (Node.VP.all);
3933 begin
3934 if Cursor < Length
3935 and then Is_In (Subject (Cursor + 1), Str.all)
3936 then
3937 Cursor := Cursor + 1;
3938 goto Succeed;
3939 else
3940 goto Fail;
3941 end if;
3942 end;
3944 -- Arb (initial match)
3946 when PC_Arb_X =>
3947 Push (Node.Alt);
3948 Node := Node.Pthen;
3949 goto Match;
3951 -- Arb (extension)
3953 when PC_Arb_Y =>
3954 if Cursor < Length then
3955 Cursor := Cursor + 1;
3956 Push (Node);
3957 goto Succeed;
3958 else
3959 goto Fail;
3960 end if;
3962 -- Arbno_S (simple Arbno initialize). This is the node that
3963 -- initiates the match of a simple Arbno structure.
3965 when PC_Arbno_S =>
3966 Push (Node.Alt);
3967 Node := Node.Pthen;
3968 goto Match;
3970 -- Arbno_X (Arbno initialize). This is the node that initiates
3971 -- the match of a complex Arbno structure.
3973 when PC_Arbno_X =>
3974 Push (Node.Alt);
3975 Node := Node.Pthen;
3976 goto Match;
3978 -- Arbno_Y (Arbno rematch). This is the node that is executed
3979 -- following successful matching of one instance of a complex
3980 -- Arbno pattern.
3982 when PC_Arbno_Y => declare
3983 Null_Match : Boolean := (Cursor = Stack (Stack_Base - 1).Cursor);
3985 begin
3986 Pop_Region;
3988 -- If arbno extension matched null, then immediately fail
3990 if Null_Match then
3991 goto Fail;
3992 end if;
3994 -- Here we must do a stack check to make sure enough stack
3995 -- is left. This check will happen once for each instance of
3996 -- the Arbno pattern that is matched. The Nat field of a
3997 -- PC_Arbno pattern contains the maximum stack entries needed
3998 -- for the Arbno with one instance and the successor pattern
4000 if Stack_Ptr + Node.Nat >= Stack'Last then
4001 raise Pattern_Stack_Overflow;
4002 end if;
4004 goto Succeed;
4005 end;
4007 -- Assign. If this node is executed, it means the assign-on-match
4008 -- or write-on-match operation will not happen after all, so we
4009 -- is propagate the failure, removing the PC_Assign node.
4011 when PC_Assign =>
4012 goto Fail;
4014 -- Assign immediate. This node performs the actual assignment.
4016 when PC_Assign_Imm =>
4017 Set_String
4018 (Node.VP.all,
4019 Subject (Stack (Stack_Base - 1).Cursor + 1 .. Cursor));
4020 Pop_Region;
4021 goto Succeed;
4023 -- Assign on match. This node sets up for the eventual assignment
4025 when PC_Assign_OnM =>
4026 Stack (Stack_Base - 1).Node := Node;
4027 Push (CP_Assign'Access);
4028 Pop_Region;
4029 Assign_OnM := True;
4030 goto Succeed;
4032 -- Bal
4034 when PC_Bal =>
4035 if Cursor >= Length or else Subject (Cursor + 1) = ')' then
4036 goto Fail;
4038 elsif Subject (Cursor + 1) = '(' then
4039 declare
4040 Paren_Count : Natural := 1;
4042 begin
4043 loop
4044 Cursor := Cursor + 1;
4046 if Cursor >= Length then
4047 goto Fail;
4049 elsif Subject (Cursor + 1) = '(' then
4050 Paren_Count := Paren_Count + 1;
4052 elsif Subject (Cursor + 1) = ')' then
4053 Paren_Count := Paren_Count - 1;
4054 exit when Paren_Count = 0;
4055 end if;
4056 end loop;
4057 end;
4058 end if;
4060 Cursor := Cursor + 1;
4061 Push (Node);
4062 goto Succeed;
4064 -- Break (one character case)
4066 when PC_Break_CH =>
4067 while Cursor < Length loop
4068 if Subject (Cursor + 1) = Node.Char then
4069 goto Succeed;
4070 else
4071 Cursor := Cursor + 1;
4072 end if;
4073 end loop;
4075 goto Fail;
4077 -- Break (character set case)
4079 when PC_Break_CS =>
4080 while Cursor < Length loop
4081 if Is_In (Subject (Cursor + 1), Node.CS) then
4082 goto Succeed;
4083 else
4084 Cursor := Cursor + 1;
4085 end if;
4086 end loop;
4088 goto Fail;
4090 -- Break (string function case)
4092 when PC_Break_VF => declare
4093 U : constant VString := Node.VF.all;
4094 Str : constant String_Access := Get_String (U);
4096 begin
4097 while Cursor < Length loop
4098 if Is_In (Subject (Cursor + 1), Str.all) then
4099 goto Succeed;
4100 else
4101 Cursor := Cursor + 1;
4102 end if;
4103 end loop;
4105 goto Fail;
4106 end;
4108 -- Break (string pointer case)
4110 when PC_Break_VP => declare
4111 Str : String_Access := Get_String (Node.VP.all);
4113 begin
4114 while Cursor < Length loop
4115 if Is_In (Subject (Cursor + 1), Str.all) then
4116 goto Succeed;
4117 else
4118 Cursor := Cursor + 1;
4119 end if;
4120 end loop;
4122 goto Fail;
4123 end;
4125 -- BreakX (one character case)
4127 when PC_BreakX_CH =>
4128 while Cursor < Length loop
4129 if Subject (Cursor + 1) = Node.Char then
4130 goto Succeed;
4131 else
4132 Cursor := Cursor + 1;
4133 end if;
4134 end loop;
4136 goto Fail;
4138 -- BreakX (character set case)
4140 when PC_BreakX_CS =>
4141 while Cursor < Length loop
4142 if Is_In (Subject (Cursor + 1), Node.CS) then
4143 goto Succeed;
4144 else
4145 Cursor := Cursor + 1;
4146 end if;
4147 end loop;
4149 goto Fail;
4151 -- BreakX (string function case)
4153 when PC_BreakX_VF => declare
4154 U : constant VString := Node.VF.all;
4155 Str : constant String_Access := Get_String (U);
4157 begin
4158 while Cursor < Length loop
4159 if Is_In (Subject (Cursor + 1), Str.all) then
4160 goto Succeed;
4161 else
4162 Cursor := Cursor + 1;
4163 end if;
4164 end loop;
4166 goto Fail;
4167 end;
4169 -- BreakX (string pointer case)
4171 when PC_BreakX_VP => declare
4172 Str : String_Access := Get_String (Node.VP.all);
4174 begin
4175 while Cursor < Length loop
4176 if Is_In (Subject (Cursor + 1), Str.all) then
4177 goto Succeed;
4178 else
4179 Cursor := Cursor + 1;
4180 end if;
4181 end loop;
4183 goto Fail;
4184 end;
4186 -- BreakX_X (BreakX extension). See section on "Compound Pattern
4187 -- Structures". This node is the alternative that is stacked to
4188 -- skip past the break character and extend the break.
4190 when PC_BreakX_X =>
4191 Cursor := Cursor + 1;
4192 goto Succeed;
4194 -- Character (one character string)
4196 when PC_Char =>
4197 if Cursor < Length
4198 and then Subject (Cursor + 1) = Node.Char
4199 then
4200 Cursor := Cursor + 1;
4201 goto Succeed;
4202 else
4203 goto Fail;
4204 end if;
4206 -- End of Pattern
4208 when PC_EOP =>
4209 if Stack_Base = Stack_Init then
4210 goto Match_Succeed;
4212 -- End of recursive inner match. See separate section on
4213 -- handing of recursive pattern matches for details.
4215 else
4216 Node := Stack (Stack_Base - 1).Node;
4217 Pop_Region;
4218 goto Match;
4219 end if;
4221 -- Fail
4223 when PC_Fail =>
4224 goto Fail;
4226 -- Fence (built in pattern)
4228 when PC_Fence =>
4229 Push (CP_Cancel'Access);
4230 goto Succeed;
4232 -- Fence function node X. This is the node that gets control
4233 -- after a successful match of the fenced pattern.
4235 when PC_Fence_X =>
4236 Stack_Ptr := Stack_Ptr + 1;
4237 Stack (Stack_Ptr).Cursor := Stack_Base;
4238 Stack (Stack_Ptr).Node := CP_Fence_Y'Access;
4239 Stack_Base := Stack (Stack_Base).Cursor;
4240 goto Succeed;
4242 -- Fence function node Y. This is the node that gets control on
4243 -- a failure that occurs after the fenced pattern has matched.
4245 -- Note: the Cursor at this stage is actually the inner stack
4246 -- base value. We don't reset this, but we do use it to strip
4247 -- off all the entries made by the fenced pattern.
4249 when PC_Fence_Y =>
4250 Stack_Ptr := Cursor - 2;
4251 goto Fail;
4253 -- Len (integer case)
4255 when PC_Len_Nat =>
4256 if Cursor + Node.Nat > Length then
4257 goto Fail;
4258 else
4259 Cursor := Cursor + Node.Nat;
4260 goto Succeed;
4261 end if;
4263 -- Len (Integer function case)
4265 when PC_Len_NF => declare
4266 N : constant Natural := Node.NF.all;
4268 begin
4269 if Cursor + N > Length then
4270 goto Fail;
4271 else
4272 Cursor := Cursor + N;
4273 goto Succeed;
4274 end if;
4275 end;
4277 -- Len (integer pointer case)
4279 when PC_Len_NP =>
4280 if Cursor + Node.NP.all > Length then
4281 goto Fail;
4282 else
4283 Cursor := Cursor + Node.NP.all;
4284 goto Succeed;
4285 end if;
4287 -- NotAny (one character case)
4289 when PC_NotAny_CH =>
4290 if Cursor < Length
4291 and then Subject (Cursor + 1) /= Node.Char
4292 then
4293 Cursor := Cursor + 1;
4294 goto Succeed;
4295 else
4296 goto Fail;
4297 end if;
4299 -- NotAny (character set case)
4301 when PC_NotAny_CS =>
4302 if Cursor < Length
4303 and then not Is_In (Subject (Cursor + 1), Node.CS)
4304 then
4305 Cursor := Cursor + 1;
4306 goto Succeed;
4307 else
4308 goto Fail;
4309 end if;
4311 -- NotAny (string function case)
4313 when PC_NotAny_VF => declare
4314 U : constant VString := Node.VF.all;
4315 Str : constant String_Access := Get_String (U);
4317 begin
4318 if Cursor < Length
4319 and then
4320 not Is_In (Subject (Cursor + 1), Str.all)
4321 then
4322 Cursor := Cursor + 1;
4323 goto Succeed;
4324 else
4325 goto Fail;
4326 end if;
4327 end;
4329 -- NotAny (string pointer case)
4331 when PC_NotAny_VP => declare
4332 Str : String_Access := Get_String (Node.VP.all);
4334 begin
4335 if Cursor < Length
4336 and then
4337 not Is_In (Subject (Cursor + 1), Str.all)
4338 then
4339 Cursor := Cursor + 1;
4340 goto Succeed;
4341 else
4342 goto Fail;
4343 end if;
4344 end;
4346 -- NSpan (one character case)
4348 when PC_NSpan_CH =>
4349 while Cursor < Length
4350 and then Subject (Cursor + 1) = Node.Char
4351 loop
4352 Cursor := Cursor + 1;
4353 end loop;
4355 goto Succeed;
4357 -- NSpan (character set case)
4359 when PC_NSpan_CS =>
4360 while Cursor < Length
4361 and then Is_In (Subject (Cursor + 1), Node.CS)
4362 loop
4363 Cursor := Cursor + 1;
4364 end loop;
4366 goto Succeed;
4368 -- NSpan (string function case)
4370 when PC_NSpan_VF => declare
4371 U : constant VString := Node.VF.all;
4372 Str : constant String_Access := Get_String (U);
4374 begin
4375 while Cursor < Length
4376 and then Is_In (Subject (Cursor + 1), Str.all)
4377 loop
4378 Cursor := Cursor + 1;
4379 end loop;
4381 goto Succeed;
4382 end;
4384 -- NSpan (string pointer case)
4386 when PC_NSpan_VP => declare
4387 Str : String_Access := Get_String (Node.VP.all);
4389 begin
4390 while Cursor < Length
4391 and then Is_In (Subject (Cursor + 1), Str.all)
4392 loop
4393 Cursor := Cursor + 1;
4394 end loop;
4396 goto Succeed;
4397 end;
4399 -- Null string
4401 when PC_Null =>
4402 goto Succeed;
4404 -- Pos (integer case)
4406 when PC_Pos_Nat =>
4407 if Cursor = Node.Nat then
4408 goto Succeed;
4409 else
4410 goto Fail;
4411 end if;
4413 -- Pos (Integer function case)
4415 when PC_Pos_NF => declare
4416 N : constant Natural := Node.NF.all;
4418 begin
4419 if Cursor = N then
4420 goto Succeed;
4421 else
4422 goto Fail;
4423 end if;
4424 end;
4426 -- Pos (integer pointer case)
4428 when PC_Pos_NP =>
4429 if Cursor = Node.NP.all then
4430 goto Succeed;
4431 else
4432 goto Fail;
4433 end if;
4435 -- Predicate function
4437 when PC_Pred_Func =>
4438 if Node.BF.all then
4439 goto Succeed;
4440 else
4441 goto Fail;
4442 end if;
4444 -- Region Enter. Initiate new pattern history stack region
4446 when PC_R_Enter =>
4447 Stack (Stack_Ptr + 1).Cursor := Cursor;
4448 Push_Region;
4449 goto Succeed;
4451 -- Region Remove node. This is the node stacked by an R_Enter.
4452 -- It removes the special format stack entry right underneath, and
4453 -- then restores the outer level stack base and signals failure.
4455 -- Note: the cursor value at this stage is actually the (negative)
4456 -- stack base value for the outer level.
4458 when PC_R_Remove =>
4459 Stack_Base := Cursor;
4460 Stack_Ptr := Stack_Ptr - 1;
4461 goto Fail;
4463 -- Region restore node. This is the node stacked at the end of an
4464 -- inner level match. Its function is to restore the inner level
4465 -- region, so that alternatives in this region can be sought.
4467 -- Note: the Cursor at this stage is actually the negative of the
4468 -- inner stack base value, which we use to restore the inner region.
4470 when PC_R_Restore =>
4471 Stack_Base := Cursor;
4472 goto Fail;
4474 -- Rest
4476 when PC_Rest =>
4477 Cursor := Length;
4478 goto Succeed;
4480 -- Initiate recursive match (pattern pointer case)
4482 when PC_Rpat =>
4483 Stack (Stack_Ptr + 1).Node := Node.Pthen;
4484 Push_Region;
4486 if Stack_Ptr + Node.PP.all.Stk >= Stack_Size then
4487 raise Pattern_Stack_Overflow;
4488 else
4489 Node := Node.PP.all.P;
4490 goto Match;
4491 end if;
4493 -- RPos (integer case)
4495 when PC_RPos_Nat =>
4496 if Cursor = (Length - Node.Nat) then
4497 goto Succeed;
4498 else
4499 goto Fail;
4500 end if;
4502 -- RPos (integer function case)
4504 when PC_RPos_NF => declare
4505 N : constant Natural := Node.NF.all;
4507 begin
4508 if Length - Cursor = N then
4509 goto Succeed;
4510 else
4511 goto Fail;
4512 end if;
4513 end;
4515 -- RPos (integer pointer case)
4517 when PC_RPos_NP =>
4518 if Cursor = (Length - Node.NP.all) then
4519 goto Succeed;
4520 else
4521 goto Fail;
4522 end if;
4524 -- RTab (integer case)
4526 when PC_RTab_Nat =>
4527 if Cursor <= (Length - Node.Nat) then
4528 Cursor := Length - Node.Nat;
4529 goto Succeed;
4530 else
4531 goto Fail;
4532 end if;
4534 -- RTab (integer function case)
4536 when PC_RTab_NF => declare
4537 N : constant Natural := Node.NF.all;
4539 begin
4540 if Length - Cursor >= N then
4541 Cursor := Length - N;
4542 goto Succeed;
4543 else
4544 goto Fail;
4545 end if;
4546 end;
4548 -- RTab (integer pointer case)
4550 when PC_RTab_NP =>
4551 if Cursor <= (Length - Node.NP.all) then
4552 Cursor := Length - Node.NP.all;
4553 goto Succeed;
4554 else
4555 goto Fail;
4556 end if;
4558 -- Cursor assignment
4560 when PC_Setcur =>
4561 Node.Var.all := Cursor;
4562 goto Succeed;
4564 -- Span (one character case)
4566 when PC_Span_CH => declare
4567 P : Natural := Cursor;
4569 begin
4570 while P < Length
4571 and then Subject (P + 1) = Node.Char
4572 loop
4573 P := P + 1;
4574 end loop;
4576 if P /= Cursor then
4577 Cursor := P;
4578 goto Succeed;
4579 else
4580 goto Fail;
4581 end if;
4582 end;
4584 -- Span (character set case)
4586 when PC_Span_CS => declare
4587 P : Natural := Cursor;
4589 begin
4590 while P < Length
4591 and then Is_In (Subject (P + 1), Node.CS)
4592 loop
4593 P := P + 1;
4594 end loop;
4596 if P /= Cursor then
4597 Cursor := P;
4598 goto Succeed;
4599 else
4600 goto Fail;
4601 end if;
4602 end;
4604 -- Span (string function case)
4606 when PC_Span_VF => declare
4607 U : constant VString := Node.VF.all;
4608 Str : constant String_Access := Get_String (U);
4609 P : Natural := Cursor;
4611 begin
4612 while P < Length
4613 and then Is_In (Subject (P + 1), Str.all)
4614 loop
4615 P := P + 1;
4616 end loop;
4618 if P /= Cursor then
4619 Cursor := P;
4620 goto Succeed;
4621 else
4622 goto Fail;
4623 end if;
4624 end;
4626 -- Span (string pointer case)
4628 when PC_Span_VP => declare
4629 Str : String_Access := Get_String (Node.VP.all);
4630 P : Natural := Cursor;
4632 begin
4633 while P < Length
4634 and then Is_In (Subject (P + 1), Str.all)
4635 loop
4636 P := P + 1;
4637 end loop;
4639 if P /= Cursor then
4640 Cursor := P;
4641 goto Succeed;
4642 else
4643 goto Fail;
4644 end if;
4645 end;
4647 -- String (two character case)
4649 when PC_String_2 =>
4650 if (Length - Cursor) >= 2
4651 and then Subject (Cursor + 1 .. Cursor + 2) = Node.Str2
4652 then
4653 Cursor := Cursor + 2;
4654 goto Succeed;
4655 else
4656 goto Fail;
4657 end if;
4659 -- String (three character case)
4661 when PC_String_3 =>
4662 if (Length - Cursor) >= 3
4663 and then Subject (Cursor + 1 .. Cursor + 3) = Node.Str3
4664 then
4665 Cursor := Cursor + 3;
4666 goto Succeed;
4667 else
4668 goto Fail;
4669 end if;
4671 -- String (four character case)
4673 when PC_String_4 =>
4674 if (Length - Cursor) >= 4
4675 and then Subject (Cursor + 1 .. Cursor + 4) = Node.Str4
4676 then
4677 Cursor := Cursor + 4;
4678 goto Succeed;
4679 else
4680 goto Fail;
4681 end if;
4683 -- String (five character case)
4685 when PC_String_5 =>
4686 if (Length - Cursor) >= 5
4687 and then Subject (Cursor + 1 .. Cursor + 5) = Node.Str5
4688 then
4689 Cursor := Cursor + 5;
4690 goto Succeed;
4691 else
4692 goto Fail;
4693 end if;
4695 -- String (six character case)
4697 when PC_String_6 =>
4698 if (Length - Cursor) >= 6
4699 and then Subject (Cursor + 1 .. Cursor + 6) = Node.Str6
4700 then
4701 Cursor := Cursor + 6;
4702 goto Succeed;
4703 else
4704 goto Fail;
4705 end if;
4707 -- String (case of more than six characters)
4709 when PC_String => declare
4710 Len : constant Natural := Node.Str'Length;
4712 begin
4713 if (Length - Cursor) >= Len
4714 and then Node.Str.all = Subject (Cursor + 1 .. Cursor + Len)
4715 then
4716 Cursor := Cursor + Len;
4717 goto Succeed;
4718 else
4719 goto Fail;
4720 end if;
4721 end;
4723 -- String (function case)
4725 when PC_String_VF => declare
4726 U : constant VString := Node.VF.all;
4727 Str : constant String_Access := Get_String (U);
4728 Len : constant Natural := Str'Length;
4730 begin
4731 if (Length - Cursor) >= Len
4732 and then Str.all = Subject (Cursor + 1 .. Cursor + Len)
4733 then
4734 Cursor := Cursor + Len;
4735 goto Succeed;
4736 else
4737 goto Fail;
4738 end if;
4739 end;
4741 -- String (pointer case)
4743 when PC_String_VP => declare
4744 S : String_Access := Get_String (Node.VP.all);
4745 Len : constant Natural := S'Length;
4747 begin
4748 if (Length - Cursor) >= Len
4749 and then S.all = Subject (Cursor + 1 .. Cursor + Len)
4750 then
4751 Cursor := Cursor + Len;
4752 goto Succeed;
4753 else
4754 goto Fail;
4755 end if;
4756 end;
4758 -- Succeed
4760 when PC_Succeed =>
4761 Push (Node);
4762 goto Succeed;
4764 -- Tab (integer case)
4766 when PC_Tab_Nat =>
4767 if Cursor <= Node.Nat then
4768 Cursor := Node.Nat;
4769 goto Succeed;
4770 else
4771 goto Fail;
4772 end if;
4774 -- Tab (integer function case)
4776 when PC_Tab_NF => declare
4777 N : constant Natural := Node.NF.all;
4779 begin
4780 if Cursor <= N then
4781 Cursor := N;
4782 goto Succeed;
4783 else
4784 goto Fail;
4785 end if;
4786 end;
4788 -- Tab (integer pointer case)
4790 when PC_Tab_NP =>
4791 if Cursor <= Node.NP.all then
4792 Cursor := Node.NP.all;
4793 goto Succeed;
4794 else
4795 goto Fail;
4796 end if;
4798 -- Unanchored movement
4800 when PC_Unanchored =>
4802 -- All done if we tried every position
4804 if Cursor > Length then
4805 goto Match_Fail;
4807 -- Otherwise extend the anchor point, and restack ourself
4809 else
4810 Cursor := Cursor + 1;
4811 Push (Node);
4812 goto Succeed;
4813 end if;
4815 -- Write immediate. This node performs the actual write
4817 when PC_Write_Imm =>
4818 Put_Line
4819 (Node.FP.all,
4820 Subject (Stack (Stack_Base - 1).Cursor + 1 .. Cursor));
4821 Pop_Region;
4822 goto Succeed;
4824 -- Write on match. This node sets up for the eventual write
4826 when PC_Write_OnM =>
4827 Stack (Stack_Base - 1).Node := Node;
4828 Push (CP_Assign'Access);
4829 Pop_Region;
4830 Assign_OnM := True;
4831 goto Succeed;
4833 end case;
4835 -- We are NOT allowed to fall though this case statement, since every
4836 -- match routine must end by executing a goto to the appropriate point
4837 -- in the finite state machine model.
4839 Logic_Error;
4841 end XMatch;
4843 -------------
4844 -- XMatchD --
4845 -------------
4847 -- Maintenance note: There is a LOT of code duplication between XMatch
4848 -- and XMatchD. This is quite intentional, the point is to avoid any
4849 -- unnecessary debugging overhead in the XMatch case, but this does mean
4850 -- that any changes to XMatchD must be mirrored in XMatch. In case of
4851 -- any major changes, the proper approach is to delete XMatch, make the
4852 -- changes to XMatchD, and then make a copy of XMatchD, removing all
4853 -- calls to Dout, and all Put and Put_Line operations. This copy becomes
4854 -- the new XMatch.
4856 procedure XMatchD
4857 (Subject : String;
4858 Pat_P : PE_Ptr;
4859 Pat_S : Natural;
4860 Start : out Natural;
4861 Stop : out Natural)
4863 Node : PE_Ptr;
4864 -- Pointer to current pattern node. Initialized from Pat_P, and then
4865 -- updated as the match proceeds through its constituent elements.
4867 Length : constant Natural := Subject'Length;
4868 -- Length of string (= Subject'Last, since Subject'First is always 1)
4870 Cursor : Integer := 0;
4871 -- If the value is non-negative, then this value is the index showing
4872 -- the current position of the match in the subject string. The next
4873 -- character to be matched is at Subject (Cursor + 1). Note that since
4874 -- our view of the subject string in XMatch always has a lower bound
4875 -- of one, regardless of original bounds, that this definition exactly
4876 -- corresponds to the cursor value as referenced by functions like Pos.
4878 -- If the value is negative, then this is a saved stack pointer,
4879 -- typically a base pointer of an inner or outer region. Cursor
4880 -- temporarily holds such a value when it is popped from the stack
4881 -- by Fail. In all cases, Cursor is reset to a proper non-negative
4882 -- cursor value before the match proceeds (e.g. by propagating the
4883 -- failure and popping a "real" cursor value from the stack.
4885 PE_Unanchored : aliased PE := (PC_Unanchored, 0, Pat_P);
4886 -- Dummy pattern element used in the unanchored case.
4888 Region_Level : Natural := 0;
4889 -- Keeps track of recursive region level. This is used only for
4890 -- debugging, it is the number of saved history stack base values.
4892 Stack : Stack_Type;
4893 -- The pattern matching failure stack for this call to Match
4895 Stack_Ptr : Stack_Range;
4896 -- Current stack pointer. This points to the top element of the stack
4897 -- that is currently in use. At the outer level this is the special
4898 -- entry placed on the stack according to the anchor mode.
4900 Stack_Init : constant Stack_Range := Stack'First + 1;
4901 -- This is the initial value of the Stack_Ptr and Stack_Base. The
4902 -- initial (Stack'First) element of the stack is not used so that
4903 -- when we pop the last element off, Stack_Ptr is still in range.
4905 Stack_Base : Stack_Range;
4906 -- This value is the stack base value, i.e. the stack pointer for the
4907 -- first history stack entry in the current stack region. See separate
4908 -- section on handling of recursive pattern matches.
4910 Assign_OnM : Boolean := False;
4911 -- Set True if assign-on-match or write-on-match operations may be
4912 -- present in the history stack, which must then be scanned on a
4913 -- successful match.
4915 procedure Dout (Str : String);
4916 -- Output string to standard error with bars indicating region level.
4918 procedure Dout (Str : String; A : Character);
4919 -- Calls Dout with the string S ('A')
4921 procedure Dout (Str : String; A : Character_Set);
4922 -- Calls Dout with the string S ("A")
4924 procedure Dout (Str : String; A : Natural);
4925 -- Calls Dout with the string S (A)
4927 procedure Dout (Str : String; A : String);
4928 -- Calls Dout with the string S ("A")
4930 function Img (P : PE_Ptr) return String;
4931 -- Returns a string of the form #nnn where nnn is P.Index
4933 procedure Pop_Region;
4934 pragma Inline (Pop_Region);
4935 -- Used at the end of processing of an inner region. if the inner
4936 -- region left no stack entries, then all trace of it is removed.
4937 -- Otherwise a PC_Restore_Region entry is pushed to ensure proper
4938 -- handling of alternatives in the inner region.
4940 procedure Push (Node : PE_Ptr);
4941 pragma Inline (Push);
4942 -- Make entry in pattern matching stack with current cursor valeu
4944 procedure Push_Region;
4945 pragma Inline (Push_Region);
4946 -- This procedure makes a new region on the history stack. The
4947 -- caller first establishes the special entry on the stack, but
4948 -- does not push the stack pointer. Then this call stacks a
4949 -- PC_Remove_Region node, on top of this entry, using the cursor
4950 -- field of the PC_Remove_Region entry to save the outer level
4951 -- stack base value, and resets the stack base to point to this
4952 -- PC_Remove_Region node.
4954 ----------
4955 -- Dout --
4956 ----------
4958 procedure Dout (Str : String) is
4959 begin
4960 for J in 1 .. Region_Level loop
4961 Put ("| ");
4962 end loop;
4964 Put_Line (Str);
4965 end Dout;
4967 procedure Dout (Str : String; A : Character) is
4968 begin
4969 Dout (Str & " ('" & A & "')");
4970 end Dout;
4972 procedure Dout (Str : String; A : Character_Set) is
4973 begin
4974 Dout (Str & " (" & Image (To_Sequence (A)) & ')');
4975 end Dout;
4977 procedure Dout (Str : String; A : Natural) is
4978 begin
4979 Dout (Str & " (" & A & ')');
4980 end Dout;
4982 procedure Dout (Str : String; A : String) is
4983 begin
4984 Dout (Str & " (" & Image (A) & ')');
4985 end Dout;
4987 ---------
4988 -- Img --
4989 ---------
4991 function Img (P : PE_Ptr) return String is
4992 begin
4993 return "#" & Integer (P.Index) & " ";
4994 end Img;
4996 ----------------
4997 -- Pop_Region --
4998 ----------------
5000 procedure Pop_Region is
5001 begin
5002 Region_Level := Region_Level - 1;
5004 -- If nothing was pushed in the inner region, we can just get
5005 -- rid of it entirely, leaving no traces that it was ever there
5007 if Stack_Ptr = Stack_Base then
5008 Stack_Ptr := Stack_Base - 2;
5009 Stack_Base := Stack (Stack_Ptr + 2).Cursor;
5011 -- If stuff was pushed in the inner region, then we have to
5012 -- push a PC_R_Restore node so that we properly handle possible
5013 -- rematches within the region.
5015 else
5016 Stack_Ptr := Stack_Ptr + 1;
5017 Stack (Stack_Ptr).Cursor := Stack_Base;
5018 Stack (Stack_Ptr).Node := CP_R_Restore'Access;
5019 Stack_Base := Stack (Stack_Base).Cursor;
5020 end if;
5021 end Pop_Region;
5023 ----------
5024 -- Push --
5025 ----------
5027 procedure Push (Node : PE_Ptr) is
5028 begin
5029 Stack_Ptr := Stack_Ptr + 1;
5030 Stack (Stack_Ptr).Cursor := Cursor;
5031 Stack (Stack_Ptr).Node := Node;
5032 end Push;
5034 -----------------
5035 -- Push_Region --
5036 -----------------
5038 procedure Push_Region is
5039 begin
5040 Region_Level := Region_Level + 1;
5041 Stack_Ptr := Stack_Ptr + 2;
5042 Stack (Stack_Ptr).Cursor := Stack_Base;
5043 Stack (Stack_Ptr).Node := CP_R_Remove'Access;
5044 Stack_Base := Stack_Ptr;
5045 end Push_Region;
5047 -- Start of processing for XMatchD
5049 begin
5050 New_Line;
5051 Put_Line ("Initiating pattern match, subject = " & Image (Subject));
5052 Put ("--------------------------------------");
5054 for J in 1 .. Length loop
5055 Put ('-');
5056 end loop;
5058 New_Line;
5059 Put_Line ("subject length = " & Length);
5061 if Pat_P = null then
5062 Uninitialized_Pattern;
5063 end if;
5065 -- Check we have enough stack for this pattern. This check deals with
5066 -- every possibility except a match of a recursive pattern, where we
5067 -- make a check at each recursion level.
5069 if Pat_S >= Stack_Size - 1 then
5070 raise Pattern_Stack_Overflow;
5071 end if;
5073 -- In anchored mode, the bottom entry on the stack is an abort entry
5075 if Anchored_Mode then
5076 Stack (Stack_Init).Node := CP_Cancel'Access;
5077 Stack (Stack_Init).Cursor := 0;
5079 -- In unanchored more, the bottom entry on the stack references
5080 -- the special pattern element PE_Unanchored, whose Pthen field
5081 -- points to the initial pattern element. The cursor value in this
5082 -- entry is the number of anchor moves so far.
5084 else
5085 Stack (Stack_Init).Node := PE_Unanchored'Unchecked_Access;
5086 Stack (Stack_Init).Cursor := 0;
5087 end if;
5089 Stack_Ptr := Stack_Init;
5090 Stack_Base := Stack_Ptr;
5091 Cursor := 0;
5092 Node := Pat_P;
5093 goto Match;
5095 -----------------------------------------
5096 -- Main Pattern Matching State Control --
5097 -----------------------------------------
5099 -- This is a state machine which uses gotos to change state. The
5100 -- initial state is Match, to initiate the matching of the first
5101 -- element, so the goto Match above starts the match. In the
5102 -- following descriptions, we indicate the global values that
5103 -- are relevant for the state transition.
5105 -- Come here if entire match fails
5107 <<Match_Fail>>
5108 Dout ("match fails");
5109 New_Line;
5110 Start := 0;
5111 Stop := 0;
5112 return;
5114 -- Come here if entire match succeeds
5116 -- Cursor current position in subject string
5118 <<Match_Succeed>>
5119 Dout ("match succeeds");
5120 Start := Stack (Stack_Init).Cursor + 1;
5121 Stop := Cursor;
5122 Dout ("first matched character index = " & Start);
5123 Dout ("last matched character index = " & Stop);
5124 Dout ("matched substring = " & Image (Subject (Start .. Stop)));
5126 -- Scan history stack for deferred assignments or writes
5128 if Assign_OnM then
5129 for S in Stack'First .. Stack_Ptr loop
5130 if Stack (S).Node = CP_Assign'Access then
5131 declare
5132 Inner_Base : constant Stack_Range :=
5133 Stack (S + 1).Cursor;
5134 Special_Entry : constant Stack_Range :=
5135 Inner_Base - 1;
5136 Node_OnM : constant PE_Ptr :=
5137 Stack (Special_Entry).Node;
5138 Start : constant Natural :=
5139 Stack (Special_Entry).Cursor + 1;
5140 Stop : constant Natural := Stack (S).Cursor;
5142 begin
5143 if Node_OnM.Pcode = PC_Assign_OnM then
5144 Set_String (Node_OnM.VP.all, Subject (Start .. Stop));
5145 Dout
5146 (Img (Stack (S).Node) &
5147 "deferred assignment of " &
5148 Image (Subject (Start .. Stop)));
5150 elsif Node_OnM.Pcode = PC_Write_OnM then
5151 Put_Line (Node_OnM.FP.all, Subject (Start .. Stop));
5152 Dout
5153 (Img (Stack (S).Node) &
5154 "deferred write of " &
5155 Image (Subject (Start .. Stop)));
5157 else
5158 Logic_Error;
5159 end if;
5160 end;
5161 end if;
5162 end loop;
5163 end if;
5165 New_Line;
5166 return;
5168 -- Come here if attempt to match current element fails
5170 -- Stack_Base current stack base
5171 -- Stack_Ptr current stack pointer
5173 <<Fail>>
5174 Cursor := Stack (Stack_Ptr).Cursor;
5175 Node := Stack (Stack_Ptr).Node;
5176 Stack_Ptr := Stack_Ptr - 1;
5178 if Cursor >= 0 then
5179 Dout ("failure, cursor reset to " & Cursor);
5180 end if;
5182 goto Match;
5184 -- Come here if attempt to match current element succeeds
5186 -- Cursor current position in subject string
5187 -- Node pointer to node successfully matched
5188 -- Stack_Base current stack base
5189 -- Stack_Ptr current stack pointer
5191 <<Succeed>>
5192 Dout ("success, cursor = " & Cursor);
5193 Node := Node.Pthen;
5195 -- Come here to match the next pattern element
5197 -- Cursor current position in subject string
5198 -- Node pointer to node to be matched
5199 -- Stack_Base current stack base
5200 -- Stack_Ptr current stack pointer
5202 <<Match>>
5204 --------------------------------------------------
5205 -- Main Pattern Match Element Matching Routines --
5206 --------------------------------------------------
5208 -- Here is the case statement that processes the current node. The
5209 -- processing for each element does one of five things:
5211 -- goto Succeed to move to the successor
5212 -- goto Match_Succeed if the entire match succeeds
5213 -- goto Match_Fail if the entire match fails
5214 -- goto Fail to signal failure of current match
5216 -- Processing is NOT allowed to fall through
5218 case Node.Pcode is
5220 -- Cancel
5222 when PC_Cancel =>
5223 Dout (Img (Node) & "matching Cancel");
5224 goto Match_Fail;
5226 -- Alternation
5228 when PC_Alt =>
5229 Dout
5230 (Img (Node) & "setting up alternative " & Img (Node.Alt));
5231 Push (Node.Alt);
5232 Node := Node.Pthen;
5233 goto Match;
5235 -- Any (one character case)
5237 when PC_Any_CH =>
5238 Dout (Img (Node) & "matching Any", Node.Char);
5240 if Cursor < Length
5241 and then Subject (Cursor + 1) = Node.Char
5242 then
5243 Cursor := Cursor + 1;
5244 goto Succeed;
5245 else
5246 goto Fail;
5247 end if;
5249 -- Any (character set case)
5251 when PC_Any_CS =>
5252 Dout (Img (Node) & "matching Any", Node.CS);
5254 if Cursor < Length
5255 and then Is_In (Subject (Cursor + 1), Node.CS)
5256 then
5257 Cursor := Cursor + 1;
5258 goto Succeed;
5259 else
5260 goto Fail;
5261 end if;
5263 -- Any (string function case)
5265 when PC_Any_VF => declare
5266 U : constant VString := Node.VF.all;
5267 Str : constant String_Access := Get_String (U);
5269 begin
5270 Dout (Img (Node) & "matching Any", Str.all);
5272 if Cursor < Length
5273 and then Is_In (Subject (Cursor + 1), Str.all)
5274 then
5275 Cursor := Cursor + 1;
5276 goto Succeed;
5277 else
5278 goto Fail;
5279 end if;
5280 end;
5282 -- Any (string pointer case)
5284 when PC_Any_VP => declare
5285 Str : String_Access := Get_String (Node.VP.all);
5287 begin
5288 Dout (Img (Node) & "matching Any", Str.all);
5290 if Cursor < Length
5291 and then Is_In (Subject (Cursor + 1), Str.all)
5292 then
5293 Cursor := Cursor + 1;
5294 goto Succeed;
5295 else
5296 goto Fail;
5297 end if;
5298 end;
5300 -- Arb (initial match)
5302 when PC_Arb_X =>
5303 Dout (Img (Node) & "matching Arb");
5304 Push (Node.Alt);
5305 Node := Node.Pthen;
5306 goto Match;
5308 -- Arb (extension)
5310 when PC_Arb_Y =>
5311 Dout (Img (Node) & "extending Arb");
5313 if Cursor < Length then
5314 Cursor := Cursor + 1;
5315 Push (Node);
5316 goto Succeed;
5317 else
5318 goto Fail;
5319 end if;
5321 -- Arbno_S (simple Arbno initialize). This is the node that
5322 -- initiates the match of a simple Arbno structure.
5324 when PC_Arbno_S =>
5325 Dout (Img (Node) &
5326 "setting up Arbno alternative " & Img (Node.Alt));
5327 Push (Node.Alt);
5328 Node := Node.Pthen;
5329 goto Match;
5331 -- Arbno_X (Arbno initialize). This is the node that initiates
5332 -- the match of a complex Arbno structure.
5334 when PC_Arbno_X =>
5335 Dout (Img (Node) &
5336 "setting up Arbno alternative " & Img (Node.Alt));
5337 Push (Node.Alt);
5338 Node := Node.Pthen;
5339 goto Match;
5341 -- Arbno_Y (Arbno rematch). This is the node that is executed
5342 -- following successful matching of one instance of a complex
5343 -- Arbno pattern.
5345 when PC_Arbno_Y => declare
5346 Null_Match : Boolean := (Cursor = Stack (Stack_Base - 1).Cursor);
5348 begin
5349 Dout (Img (Node) & "extending Arbno");
5350 Pop_Region;
5352 -- If arbno extension matched null, then immediately fail
5354 if Null_Match then
5355 Dout ("Arbno extension matched null, so fails");
5356 goto Fail;
5357 end if;
5359 -- Here we must do a stack check to make sure enough stack
5360 -- is left. This check will happen once for each instance of
5361 -- the Arbno pattern that is matched. The Nat field of a
5362 -- PC_Arbno pattern contains the maximum stack entries needed
5363 -- for the Arbno with one instance and the successor pattern
5365 if Stack_Ptr + Node.Nat >= Stack'Last then
5366 raise Pattern_Stack_Overflow;
5367 end if;
5369 goto Succeed;
5370 end;
5372 -- Assign. If this node is executed, it means the assign-on-match
5373 -- or write-on-match operation will not happen after all, so we
5374 -- is propagate the failure, removing the PC_Assign node.
5376 when PC_Assign =>
5377 Dout (Img (Node) & "deferred assign/write cancelled");
5378 goto Fail;
5380 -- Assign immediate. This node performs the actual assignment.
5382 when PC_Assign_Imm =>
5383 Dout
5384 (Img (Node) & "executing immediate assignment of " &
5385 Image (Subject (Stack (Stack_Base - 1).Cursor + 1 .. Cursor)));
5386 Set_String
5387 (Node.VP.all,
5388 Subject (Stack (Stack_Base - 1).Cursor + 1 .. Cursor));
5389 Pop_Region;
5390 goto Succeed;
5392 -- Assign on match. This node sets up for the eventual assignment
5394 when PC_Assign_OnM =>
5395 Dout (Img (Node) & "registering deferred assignment");
5396 Stack (Stack_Base - 1).Node := Node;
5397 Push (CP_Assign'Access);
5398 Pop_Region;
5399 Assign_OnM := True;
5400 goto Succeed;
5402 -- Bal
5404 when PC_Bal =>
5405 Dout (Img (Node) & "matching or extending Bal");
5406 if Cursor >= Length or else Subject (Cursor + 1) = ')' then
5407 goto Fail;
5409 elsif Subject (Cursor + 1) = '(' then
5410 declare
5411 Paren_Count : Natural := 1;
5413 begin
5414 loop
5415 Cursor := Cursor + 1;
5417 if Cursor >= Length then
5418 goto Fail;
5420 elsif Subject (Cursor + 1) = '(' then
5421 Paren_Count := Paren_Count + 1;
5423 elsif Subject (Cursor + 1) = ')' then
5424 Paren_Count := Paren_Count - 1;
5425 exit when Paren_Count = 0;
5426 end if;
5427 end loop;
5428 end;
5429 end if;
5431 Cursor := Cursor + 1;
5432 Push (Node);
5433 goto Succeed;
5435 -- Break (one character case)
5437 when PC_Break_CH =>
5438 Dout (Img (Node) & "matching Break", Node.Char);
5440 while Cursor < Length loop
5441 if Subject (Cursor + 1) = Node.Char then
5442 goto Succeed;
5443 else
5444 Cursor := Cursor + 1;
5445 end if;
5446 end loop;
5448 goto Fail;
5450 -- Break (character set case)
5452 when PC_Break_CS =>
5453 Dout (Img (Node) & "matching Break", Node.CS);
5455 while Cursor < Length loop
5456 if Is_In (Subject (Cursor + 1), Node.CS) then
5457 goto Succeed;
5458 else
5459 Cursor := Cursor + 1;
5460 end if;
5461 end loop;
5463 goto Fail;
5465 -- Break (string function case)
5467 when PC_Break_VF => declare
5468 U : constant VString := Node.VF.all;
5469 Str : constant String_Access := Get_String (U);
5471 begin
5472 Dout (Img (Node) & "matching Break", Str.all);
5474 while Cursor < Length loop
5475 if Is_In (Subject (Cursor + 1), Str.all) then
5476 goto Succeed;
5477 else
5478 Cursor := Cursor + 1;
5479 end if;
5480 end loop;
5482 goto Fail;
5483 end;
5485 -- Break (string pointer case)
5487 when PC_Break_VP => declare
5488 Str : String_Access := Get_String (Node.VP.all);
5490 begin
5491 Dout (Img (Node) & "matching Break", Str.all);
5493 while Cursor < Length loop
5494 if Is_In (Subject (Cursor + 1), Str.all) then
5495 goto Succeed;
5496 else
5497 Cursor := Cursor + 1;
5498 end if;
5499 end loop;
5501 goto Fail;
5502 end;
5504 -- BreakX (one character case)
5506 when PC_BreakX_CH =>
5507 Dout (Img (Node) & "matching BreakX", Node.Char);
5509 while Cursor < Length loop
5510 if Subject (Cursor + 1) = Node.Char then
5511 goto Succeed;
5512 else
5513 Cursor := Cursor + 1;
5514 end if;
5515 end loop;
5517 goto Fail;
5519 -- BreakX (character set case)
5521 when PC_BreakX_CS =>
5522 Dout (Img (Node) & "matching BreakX", Node.CS);
5524 while Cursor < Length loop
5525 if Is_In (Subject (Cursor + 1), Node.CS) then
5526 goto Succeed;
5527 else
5528 Cursor := Cursor + 1;
5529 end if;
5530 end loop;
5532 goto Fail;
5534 -- BreakX (string function case)
5536 when PC_BreakX_VF => declare
5537 U : constant VString := Node.VF.all;
5538 Str : constant String_Access := Get_String (U);
5540 begin
5541 Dout (Img (Node) & "matching BreakX", Str.all);
5543 while Cursor < Length loop
5544 if Is_In (Subject (Cursor + 1), Str.all) then
5545 goto Succeed;
5546 else
5547 Cursor := Cursor + 1;
5548 end if;
5549 end loop;
5551 goto Fail;
5552 end;
5554 -- BreakX (string pointer case)
5556 when PC_BreakX_VP => declare
5557 Str : String_Access := Get_String (Node.VP.all);
5559 begin
5560 Dout (Img (Node) & "matching BreakX", Str.all);
5562 while Cursor < Length loop
5563 if Is_In (Subject (Cursor + 1), Str.all) then
5564 goto Succeed;
5565 else
5566 Cursor := Cursor + 1;
5567 end if;
5568 end loop;
5570 goto Fail;
5571 end;
5573 -- BreakX_X (BreakX extension). See section on "Compound Pattern
5574 -- Structures". This node is the alternative that is stacked
5575 -- to skip past the break character and extend the break.
5577 when PC_BreakX_X =>
5578 Dout (Img (Node) & "extending BreakX");
5580 Cursor := Cursor + 1;
5581 goto Succeed;
5583 -- Character (one character string)
5585 when PC_Char =>
5586 Dout (Img (Node) & "matching '" & Node.Char & ''');
5588 if Cursor < Length
5589 and then Subject (Cursor + 1) = Node.Char
5590 then
5591 Cursor := Cursor + 1;
5592 goto Succeed;
5593 else
5594 goto Fail;
5595 end if;
5597 -- End of Pattern
5599 when PC_EOP =>
5600 if Stack_Base = Stack_Init then
5601 Dout ("end of pattern");
5602 goto Match_Succeed;
5604 -- End of recursive inner match. See separate section on
5605 -- handing of recursive pattern matches for details.
5607 else
5608 Dout ("terminating recursive match");
5609 Node := Stack (Stack_Base - 1).Node;
5610 Pop_Region;
5611 goto Match;
5612 end if;
5614 -- Fail
5616 when PC_Fail =>
5617 Dout (Img (Node) & "matching Fail");
5618 goto Fail;
5620 -- Fence (built in pattern)
5622 when PC_Fence =>
5623 Dout (Img (Node) & "matching Fence");
5624 Push (CP_Cancel'Access);
5625 goto Succeed;
5627 -- Fence function node X. This is the node that gets control
5628 -- after a successful match of the fenced pattern.
5630 when PC_Fence_X =>
5631 Dout (Img (Node) & "matching Fence function");
5632 Stack_Ptr := Stack_Ptr + 1;
5633 Stack (Stack_Ptr).Cursor := Stack_Base;
5634 Stack (Stack_Ptr).Node := CP_Fence_Y'Access;
5635 Stack_Base := Stack (Stack_Base).Cursor;
5636 Region_Level := Region_Level - 1;
5637 goto Succeed;
5639 -- Fence function node Y. This is the node that gets control on
5640 -- a failure that occurs after the fenced pattern has matched.
5642 -- Note: the Cursor at this stage is actually the inner stack
5643 -- base value. We don't reset this, but we do use it to strip
5644 -- off all the entries made by the fenced pattern.
5646 when PC_Fence_Y =>
5647 Dout (Img (Node) & "pattern matched by Fence caused failure");
5648 Stack_Ptr := Cursor - 2;
5649 goto Fail;
5651 -- Len (integer case)
5653 when PC_Len_Nat =>
5654 Dout (Img (Node) & "matching Len", Node.Nat);
5656 if Cursor + Node.Nat > Length then
5657 goto Fail;
5658 else
5659 Cursor := Cursor + Node.Nat;
5660 goto Succeed;
5661 end if;
5663 -- Len (Integer function case)
5665 when PC_Len_NF => declare
5666 N : constant Natural := Node.NF.all;
5668 begin
5669 Dout (Img (Node) & "matching Len", N);
5671 if Cursor + N > Length then
5672 goto Fail;
5673 else
5674 Cursor := Cursor + N;
5675 goto Succeed;
5676 end if;
5677 end;
5679 -- Len (integer pointer case)
5681 when PC_Len_NP =>
5682 Dout (Img (Node) & "matching Len", Node.NP.all);
5684 if Cursor + Node.NP.all > Length then
5685 goto Fail;
5686 else
5687 Cursor := Cursor + Node.NP.all;
5688 goto Succeed;
5689 end if;
5691 -- NotAny (one character case)
5693 when PC_NotAny_CH =>
5694 Dout (Img (Node) & "matching NotAny", Node.Char);
5696 if Cursor < Length
5697 and then Subject (Cursor + 1) /= Node.Char
5698 then
5699 Cursor := Cursor + 1;
5700 goto Succeed;
5701 else
5702 goto Fail;
5703 end if;
5705 -- NotAny (character set case)
5707 when PC_NotAny_CS =>
5708 Dout (Img (Node) & "matching NotAny", Node.CS);
5710 if Cursor < Length
5711 and then not Is_In (Subject (Cursor + 1), Node.CS)
5712 then
5713 Cursor := Cursor + 1;
5714 goto Succeed;
5715 else
5716 goto Fail;
5717 end if;
5719 -- NotAny (string function case)
5721 when PC_NotAny_VF => declare
5722 U : constant VString := Node.VF.all;
5723 Str : constant String_Access := Get_String (U);
5725 begin
5726 Dout (Img (Node) & "matching NotAny", Str.all);
5728 if Cursor < Length
5729 and then
5730 not Is_In (Subject (Cursor + 1), Str.all)
5731 then
5732 Cursor := Cursor + 1;
5733 goto Succeed;
5734 else
5735 goto Fail;
5736 end if;
5737 end;
5739 -- NotAny (string pointer case)
5741 when PC_NotAny_VP => declare
5742 Str : String_Access := Get_String (Node.VP.all);
5744 begin
5745 Dout (Img (Node) & "matching NotAny", Str.all);
5747 if Cursor < Length
5748 and then
5749 not Is_In (Subject (Cursor + 1), Str.all)
5750 then
5751 Cursor := Cursor + 1;
5752 goto Succeed;
5753 else
5754 goto Fail;
5755 end if;
5756 end;
5758 -- NSpan (one character case)
5760 when PC_NSpan_CH =>
5761 Dout (Img (Node) & "matching NSpan", Node.Char);
5763 while Cursor < Length
5764 and then Subject (Cursor + 1) = Node.Char
5765 loop
5766 Cursor := Cursor + 1;
5767 end loop;
5769 goto Succeed;
5771 -- NSpan (character set case)
5773 when PC_NSpan_CS =>
5774 Dout (Img (Node) & "matching NSpan", Node.CS);
5776 while Cursor < Length
5777 and then Is_In (Subject (Cursor + 1), Node.CS)
5778 loop
5779 Cursor := Cursor + 1;
5780 end loop;
5782 goto Succeed;
5784 -- NSpan (string function case)
5786 when PC_NSpan_VF => declare
5787 U : constant VString := Node.VF.all;
5788 Str : constant String_Access := Get_String (U);
5790 begin
5791 Dout (Img (Node) & "matching NSpan", Str.all);
5793 while Cursor < Length
5794 and then Is_In (Subject (Cursor + 1), Str.all)
5795 loop
5796 Cursor := Cursor + 1;
5797 end loop;
5799 goto Succeed;
5800 end;
5802 -- NSpan (string pointer case)
5804 when PC_NSpan_VP => declare
5805 Str : String_Access := Get_String (Node.VP.all);
5807 begin
5808 Dout (Img (Node) & "matching NSpan", Str.all);
5810 while Cursor < Length
5811 and then Is_In (Subject (Cursor + 1), Str.all)
5812 loop
5813 Cursor := Cursor + 1;
5814 end loop;
5816 goto Succeed;
5817 end;
5819 when PC_Null =>
5820 Dout (Img (Node) & "matching null");
5821 goto Succeed;
5823 -- Pos (integer case)
5825 when PC_Pos_Nat =>
5826 Dout (Img (Node) & "matching Pos", Node.Nat);
5828 if Cursor = Node.Nat then
5829 goto Succeed;
5830 else
5831 goto Fail;
5832 end if;
5834 -- Pos (Integer function case)
5836 when PC_Pos_NF => declare
5837 N : constant Natural := Node.NF.all;
5839 begin
5840 Dout (Img (Node) & "matching Pos", N);
5842 if Cursor = N then
5843 goto Succeed;
5844 else
5845 goto Fail;
5846 end if;
5847 end;
5849 -- Pos (integer pointer case)
5851 when PC_Pos_NP =>
5852 Dout (Img (Node) & "matching Pos", Node.NP.all);
5854 if Cursor = Node.NP.all then
5855 goto Succeed;
5856 else
5857 goto Fail;
5858 end if;
5860 -- Predicate function
5862 when PC_Pred_Func =>
5863 Dout (Img (Node) & "matching predicate function");
5865 if Node.BF.all then
5866 goto Succeed;
5867 else
5868 goto Fail;
5869 end if;
5871 -- Region Enter. Initiate new pattern history stack region
5873 when PC_R_Enter =>
5874 Dout (Img (Node) & "starting match of nested pattern");
5875 Stack (Stack_Ptr + 1).Cursor := Cursor;
5876 Push_Region;
5877 goto Succeed;
5879 -- Region Remove node. This is the node stacked by an R_Enter.
5880 -- It removes the special format stack entry right underneath, and
5881 -- then restores the outer level stack base and signals failure.
5883 -- Note: the cursor value at this stage is actually the (negative)
5884 -- stack base value for the outer level.
5886 when PC_R_Remove =>
5887 Dout ("failure, match of nested pattern terminated");
5888 Stack_Base := Cursor;
5889 Region_Level := Region_Level - 1;
5890 Stack_Ptr := Stack_Ptr - 1;
5891 goto Fail;
5893 -- Region restore node. This is the node stacked at the end of an
5894 -- inner level match. Its function is to restore the inner level
5895 -- region, so that alternatives in this region can be sought.
5897 -- Note: the Cursor at this stage is actually the negative of the
5898 -- inner stack base value, which we use to restore the inner region.
5900 when PC_R_Restore =>
5901 Dout ("failure, search for alternatives in nested pattern");
5902 Region_Level := Region_Level + 1;
5903 Stack_Base := Cursor;
5904 goto Fail;
5906 -- Rest
5908 when PC_Rest =>
5909 Dout (Img (Node) & "matching Rest");
5910 Cursor := Length;
5911 goto Succeed;
5913 -- Initiate recursive match (pattern pointer case)
5915 when PC_Rpat =>
5916 Stack (Stack_Ptr + 1).Node := Node.Pthen;
5917 Push_Region;
5918 Dout (Img (Node) & "initiating recursive match");
5920 if Stack_Ptr + Node.PP.all.Stk >= Stack_Size then
5921 raise Pattern_Stack_Overflow;
5922 else
5923 Node := Node.PP.all.P;
5924 goto Match;
5925 end if;
5927 -- RPos (integer case)
5929 when PC_RPos_Nat =>
5930 Dout (Img (Node) & "matching RPos", Node.Nat);
5932 if Cursor = (Length - Node.Nat) then
5933 goto Succeed;
5934 else
5935 goto Fail;
5936 end if;
5938 -- RPos (integer function case)
5940 when PC_RPos_NF => declare
5941 N : constant Natural := Node.NF.all;
5943 begin
5944 Dout (Img (Node) & "matching RPos", N);
5946 if Length - Cursor = N then
5947 goto Succeed;
5948 else
5949 goto Fail;
5950 end if;
5951 end;
5953 -- RPos (integer pointer case)
5955 when PC_RPos_NP =>
5956 Dout (Img (Node) & "matching RPos", Node.NP.all);
5958 if Cursor = (Length - Node.NP.all) then
5959 goto Succeed;
5960 else
5961 goto Fail;
5962 end if;
5964 -- RTab (integer case)
5966 when PC_RTab_Nat =>
5967 Dout (Img (Node) & "matching RTab", Node.Nat);
5969 if Cursor <= (Length - Node.Nat) then
5970 Cursor := Length - Node.Nat;
5971 goto Succeed;
5972 else
5973 goto Fail;
5974 end if;
5976 -- RTab (integer function case)
5978 when PC_RTab_NF => declare
5979 N : constant Natural := Node.NF.all;
5981 begin
5982 Dout (Img (Node) & "matching RPos", N);
5984 if Length - Cursor >= N then
5985 Cursor := Length - N;
5986 goto Succeed;
5987 else
5988 goto Fail;
5989 end if;
5990 end;
5992 -- RTab (integer pointer case)
5994 when PC_RTab_NP =>
5995 Dout (Img (Node) & "matching RPos", Node.NP.all);
5997 if Cursor <= (Length - Node.NP.all) then
5998 Cursor := Length - Node.NP.all;
5999 goto Succeed;
6000 else
6001 goto Fail;
6002 end if;
6004 -- Cursor assignment
6006 when PC_Setcur =>
6007 Dout (Img (Node) & "matching Setcur");
6008 Node.Var.all := Cursor;
6009 goto Succeed;
6011 -- Span (one character case)
6013 when PC_Span_CH => declare
6014 P : Natural := Cursor;
6016 begin
6017 Dout (Img (Node) & "matching Span", Node.Char);
6019 while P < Length
6020 and then Subject (P + 1) = Node.Char
6021 loop
6022 P := P + 1;
6023 end loop;
6025 if P /= Cursor then
6026 Cursor := P;
6027 goto Succeed;
6028 else
6029 goto Fail;
6030 end if;
6031 end;
6033 -- Span (character set case)
6035 when PC_Span_CS => declare
6036 P : Natural := Cursor;
6038 begin
6039 Dout (Img (Node) & "matching Span", Node.CS);
6041 while P < Length
6042 and then Is_In (Subject (P + 1), Node.CS)
6043 loop
6044 P := P + 1;
6045 end loop;
6047 if P /= Cursor then
6048 Cursor := P;
6049 goto Succeed;
6050 else
6051 goto Fail;
6052 end if;
6053 end;
6055 -- Span (string function case)
6057 when PC_Span_VF => declare
6058 U : constant VString := Node.VF.all;
6059 Str : constant String_Access := Get_String (U);
6060 P : Natural := Cursor;
6062 begin
6063 Dout (Img (Node) & "matching Span", Str.all);
6065 while P < Length
6066 and then Is_In (Subject (P + 1), Str.all)
6067 loop
6068 P := P + 1;
6069 end loop;
6071 if P /= Cursor then
6072 Cursor := P;
6073 goto Succeed;
6074 else
6075 goto Fail;
6076 end if;
6077 end;
6079 -- Span (string pointer case)
6081 when PC_Span_VP => declare
6082 Str : String_Access := Get_String (Node.VP.all);
6083 P : Natural := Cursor;
6085 begin
6086 Dout (Img (Node) & "matching Span", Str.all);
6088 while P < Length
6089 and then Is_In (Subject (P + 1), Str.all)
6090 loop
6091 P := P + 1;
6092 end loop;
6094 if P /= Cursor then
6095 Cursor := P;
6096 goto Succeed;
6097 else
6098 goto Fail;
6099 end if;
6100 end;
6102 -- String (two character case)
6104 when PC_String_2 =>
6105 Dout (Img (Node) & "matching " & Image (Node.Str2));
6107 if (Length - Cursor) >= 2
6108 and then Subject (Cursor + 1 .. Cursor + 2) = Node.Str2
6109 then
6110 Cursor := Cursor + 2;
6111 goto Succeed;
6112 else
6113 goto Fail;
6114 end if;
6116 -- String (three character case)
6118 when PC_String_3 =>
6119 Dout (Img (Node) & "matching " & Image (Node.Str3));
6121 if (Length - Cursor) >= 3
6122 and then Subject (Cursor + 1 .. Cursor + 3) = Node.Str3
6123 then
6124 Cursor := Cursor + 3;
6125 goto Succeed;
6126 else
6127 goto Fail;
6128 end if;
6130 -- String (four character case)
6132 when PC_String_4 =>
6133 Dout (Img (Node) & "matching " & Image (Node.Str4));
6135 if (Length - Cursor) >= 4
6136 and then Subject (Cursor + 1 .. Cursor + 4) = Node.Str4
6137 then
6138 Cursor := Cursor + 4;
6139 goto Succeed;
6140 else
6141 goto Fail;
6142 end if;
6144 -- String (five character case)
6146 when PC_String_5 =>
6147 Dout (Img (Node) & "matching " & Image (Node.Str5));
6149 if (Length - Cursor) >= 5
6150 and then Subject (Cursor + 1 .. Cursor + 5) = Node.Str5
6151 then
6152 Cursor := Cursor + 5;
6153 goto Succeed;
6154 else
6155 goto Fail;
6156 end if;
6158 -- String (six character case)
6160 when PC_String_6 =>
6161 Dout (Img (Node) & "matching " & Image (Node.Str6));
6163 if (Length - Cursor) >= 6
6164 and then Subject (Cursor + 1 .. Cursor + 6) = Node.Str6
6165 then
6166 Cursor := Cursor + 6;
6167 goto Succeed;
6168 else
6169 goto Fail;
6170 end if;
6172 -- String (case of more than six characters)
6174 when PC_String => declare
6175 Len : constant Natural := Node.Str'Length;
6177 begin
6178 Dout (Img (Node) & "matching " & Image (Node.Str.all));
6180 if (Length - Cursor) >= Len
6181 and then Node.Str.all = Subject (Cursor + 1 .. Cursor + Len)
6182 then
6183 Cursor := Cursor + Len;
6184 goto Succeed;
6185 else
6186 goto Fail;
6187 end if;
6188 end;
6190 -- String (function case)
6192 when PC_String_VF => declare
6193 U : constant VString := Node.VF.all;
6194 Str : constant String_Access := Get_String (U);
6195 Len : constant Natural := Str'Length;
6197 begin
6198 Dout (Img (Node) & "matching " & Image (Str.all));
6200 if (Length - Cursor) >= Len
6201 and then Str.all = Subject (Cursor + 1 .. Cursor + Len)
6202 then
6203 Cursor := Cursor + Len;
6204 goto Succeed;
6205 else
6206 goto Fail;
6207 end if;
6208 end;
6210 -- String (vstring pointer case)
6212 when PC_String_VP => declare
6213 S : String_Access := Get_String (Node.VP.all);
6214 Len : constant Natural :=
6215 Ada.Strings.Unbounded.Length (Node.VP.all);
6217 begin
6218 Dout
6219 (Img (Node) & "matching " & Image (S.all));
6221 if (Length - Cursor) >= Len
6222 and then S.all = Subject (Cursor + 1 .. Cursor + Len)
6223 then
6224 Cursor := Cursor + Len;
6225 goto Succeed;
6226 else
6227 goto Fail;
6228 end if;
6229 end;
6231 -- Succeed
6233 when PC_Succeed =>
6234 Dout (Img (Node) & "matching Succeed");
6235 Push (Node);
6236 goto Succeed;
6238 -- Tab (integer case)
6240 when PC_Tab_Nat =>
6241 Dout (Img (Node) & "matching Tab", Node.Nat);
6243 if Cursor <= Node.Nat then
6244 Cursor := Node.Nat;
6245 goto Succeed;
6246 else
6247 goto Fail;
6248 end if;
6250 -- Tab (integer function case)
6252 when PC_Tab_NF => declare
6253 N : constant Natural := Node.NF.all;
6255 begin
6256 Dout (Img (Node) & "matching Tab ", N);
6258 if Cursor <= N then
6259 Cursor := N;
6260 goto Succeed;
6261 else
6262 goto Fail;
6263 end if;
6264 end;
6266 -- Tab (integer pointer case)
6268 when PC_Tab_NP =>
6269 Dout (Img (Node) & "matching Tab ", Node.NP.all);
6271 if Cursor <= Node.NP.all then
6272 Cursor := Node.NP.all;
6273 goto Succeed;
6274 else
6275 goto Fail;
6276 end if;
6278 -- Unanchored movement
6280 when PC_Unanchored =>
6281 Dout ("attempting to move anchor point");
6283 -- All done if we tried every position
6285 if Cursor > Length then
6286 goto Match_Fail;
6288 -- Otherwise extend the anchor point, and restack ourself
6290 else
6291 Cursor := Cursor + 1;
6292 Push (Node);
6293 goto Succeed;
6294 end if;
6296 -- Write immediate. This node performs the actual write
6298 when PC_Write_Imm =>
6299 Dout (Img (Node) & "executing immediate write of " &
6300 Subject (Stack (Stack_Base - 1).Cursor + 1 .. Cursor));
6302 Put_Line
6303 (Node.FP.all,
6304 Subject (Stack (Stack_Base - 1).Cursor + 1 .. Cursor));
6305 Pop_Region;
6306 goto Succeed;
6308 -- Write on match. This node sets up for the eventual write
6310 when PC_Write_OnM =>
6311 Dout (Img (Node) & "registering deferred write");
6312 Stack (Stack_Base - 1).Node := Node;
6313 Push (CP_Assign'Access);
6314 Pop_Region;
6315 Assign_OnM := True;
6316 goto Succeed;
6318 end case;
6320 -- We are NOT allowed to fall though this case statement, since every
6321 -- match routine must end by executing a goto to the appropriate point
6322 -- in the finite state machine model.
6324 Logic_Error;
6326 end XMatchD;
6328 end GNAT.Spitbol.Patterns;