1 (***********************************************************************)
5 (* Xavier Leroy, projet Cristal, *)
6 (* Luc Maranget, projet Moscova, *)
7 (* INRIA Rocquencourt *)
9 (* Copyright 1996 Institut National de Recherche en Informatique et *)
10 (* en Automatique. All rights reserved. This file is distributed *)
11 (* under the terms of the Q Public License version 1.0. *)
13 (***********************************************************************)
17 (* Compiling a lexer definition *)
22 exception Memory_overflow
24 (* Deep abstract syntax for regular expressions *)
26 type ident
= string * Syntax.location
28 type tag_info
= {id
: string ; start
: bool ; action
: int}
35 | Seq
of regexp
* regexp
36 | Alt
of regexp
* regexp
39 type tag_base
= Start
| End
| Mem
of int
40 type tag_addr
= Sum
of (tag_base
* int)
42 | Ident_string
of bool * tag_addr
* tag_addr
43 | Ident_char
of bool * tag_addr
44 type t_env
= (ident
* ident_info
) list
46 type ('args
,'action
) lexer_entry
=
50 lex_actions
: (int * t_env
* 'action
) list
}
54 Perform
of int * tag_action list
55 | Shift
of automata_trans
* (automata_move
* memory_action list
) array
59 | Remember
of int * tag_action list
69 and tag_action
= SetTag
of int * int | EraseTag
of int
71 (* Representation of entry points *)
73 type ('args
,'action
) automata_entry
=
77 auto_initial_state
: int * memory_action list
;
78 auto_actions
: (int * t_env
* 'action
) list
}
81 (* A lot of sets and map structures *)
83 module Ints
= Set.Make
(struct type t
= int let compare = compare end)
85 let id_compare (id1
,_
) (id2
,_
) = String.compare id1 id2
87 let tag_compare t1 t2
= Pervasives.compare t1 t2
89 module Tags
= Set.Make
(struct type t
= tag_info
let compare = tag_compare end)
92 Map.Make
(struct type t
= tag_info
let compare = tag_compare end)
95 Set.Make
(struct type t
= ident
let compare = id_compare end)
98 Map.Make
(struct type t
= ident
let compare = id_compare end)
100 (*********************)
101 (* Variable cleaning *)
102 (*********************)
104 (* Silently eliminate nested variables *)
106 let rec do_remove_nested to_remove
= function
108 if IdSet.mem x to_remove
then
109 do_remove_nested to_remove e
111 Bind
(do_remove_nested (IdSet.add x to_remove
) e
, x
)
112 | Epsilon
|Eof
|Characters _
as e
-> e
113 | Sequence
(e1
, e2
) ->
115 (do_remove_nested to_remove e1
, do_remove_nested to_remove e2
)
116 | Alternative
(e1
, e2
) ->
118 (do_remove_nested to_remove e1
, do_remove_nested to_remove e2
)
120 Repetition
(do_remove_nested to_remove e
)
122 let remove_nested_as e
= do_remove_nested IdSet.empty e
124 (*********************)
125 (* Variable analysis *)
126 (*********************)
130 A variable is optional when matching of regexp does not
133 ("" | 'a' as x) -> optional
134 ("" as x | 'a' as x) -> non-optional
137 let stringset_delta s1 s2
=
142 let rec find_all_vars = function
143 | Characters _
|Epsilon
|Eof
->
146 IdSet.add x
(find_all_vars e
)
147 | Sequence
(e1
,e2
)|Alternative
(e1
,e2
) ->
148 IdSet.union
(find_all_vars e1
) (find_all_vars e2
)
149 | Repetition e
-> find_all_vars e
152 let rec do_find_opt = function
153 | Characters _
|Epsilon
|Eof
-> IdSet.empty
, IdSet.empty
155 let opt,all
= do_find_opt e
in
157 | Sequence
(e1
,e2
) ->
158 let opt1,all1
= do_find_opt e1
159 and opt2
,all2
= do_find_opt e2
in
160 IdSet.union
opt1 opt2
, IdSet.union all1 all2
161 | Alternative
(e1
,e2
) ->
162 let opt1,all1
= do_find_opt e1
163 and opt2
,all2
= do_find_opt e2
in
165 (IdSet.union
opt1 opt2
)
166 (stringset_delta all1 all2
),
167 IdSet.union all1 all2
169 let r = find_all_vars e
in
172 let find_optional e
=
173 let r,_
= do_find_opt e
in r
177 A variable is double when it can be bound more than once
184 let rec do_find_double = function
185 | Characters _
|Epsilon
|Eof
-> IdSet.empty
, IdSet.empty
187 let dbl,all
= do_find_double e
in
188 (if IdSet.mem x all
then
193 | Sequence
(e1
,e2
) ->
194 let dbl1, all1
= do_find_double e1
195 and dbl2
, all2
= do_find_double e2
in
197 (IdSet.inter all1 all2
)
198 (IdSet.union
dbl1 dbl2
),
199 IdSet.union all1 all2
200 | Alternative
(e1
,e2
) ->
201 let dbl1, all1
= do_find_double e1
202 and dbl2
, all2
= do_find_double e2
in
203 IdSet.union
dbl1 dbl2
,
204 IdSet.union all1 all2
206 let r = find_all_vars e
in
209 let find_double e
= do_find_double e
213 A variable is bound to a char when all its occurences
214 bind a pattern of length 1.
219 let add_some x
= function
220 | Some i
-> Some
(x
+i
)
223 let add_some_some x y
= match x
,y
with
224 | Some i
, Some j
-> Some
(i
+j
)
227 let rec do_find_chars sz
= function
228 | Epsilon
|Eof
-> IdSet.empty
, IdSet.empty
, sz
229 | Characters _
-> IdSet.empty
, IdSet.empty
, add_some 1 sz
231 let c,s
,e_sz
= do_find_chars (Some
0) e
in
232 begin match e_sz
with
234 IdSet.add x
c,s
,add_some 1 sz
236 c, IdSet.add x s
, add_some_some sz e_sz
238 | Sequence
(e1
,e2
) ->
239 let c1,s1
,sz1
= do_find_chars sz e1
in
240 let c2,s2
,sz2
= do_find_chars sz1 e2
in
244 | Alternative
(e1
,e2
) ->
245 let c1,s1
,sz1
= do_find_chars sz e1
246 and c2,s2
,sz2
= do_find_chars sz e2
in
249 (if sz1
= sz2
then sz1
else None
)
250 | Repetition e
-> do_find_chars None e
255 let c,s
,_
= do_find_chars (Some
0) e
in
258 (*******************************)
259 (* From shallow to deep syntax *)
260 (*******************************)
262 let chars = ref ([] : Cset.t list
)
263 let chars_count = ref 0
266 let rec encode_regexp char_vars act
= function
269 let n = !chars_count in
270 chars := cl
:: !chars;
274 let n = !chars_count in
275 chars := Cset.eof
:: !chars;
279 let r1 = encode_regexp char_vars act
r1 in
280 let r2 = encode_regexp char_vars act
r2 in
282 | Alternative
(r1,r2) ->
283 let r1 = encode_regexp char_vars act
r1 in
284 let r2 = encode_regexp char_vars act
r2 in
287 let r = encode_regexp char_vars act
r in
289 | Bind
(r,((name
,_
) as x
)) ->
290 let r = encode_regexp char_vars act
r in
291 if IdSet.mem x char_vars
then
292 Seq
(Tag
{id
=name
; start
=true ; action
=act
},r)
294 Seq
(Tag
{id
=name
; start
=true ; action
=act
},
295 Seq
(r, Tag
{id
=name
; start
=false ; action
=act
}))
299 Static optimization :
300 Replace tags by offsets relative to the beginning
301 or end of matched string.
302 Dynamic optimization:
303 Replace some non-optional, non-double tags by offsets w.r.t
304 a previous similar tag.
307 let incr_pos = function
309 | Some i
-> Some
(i
+1)
311 let decr_pos = function
313 | Some i
-> Some
(i
-1)
318 let mk_seq r1 r2 = match r1,r2 with
323 let add_pos p i
= match p
with
324 | Some
(Sum
(a
,n)) -> Some
(Sum
(a
,n+i
))
327 let mem_name name id_set
=
328 IdSet.exists
(fun (id_name
,_
) -> name
= id_name
) id_set
330 let opt_regexp all_vars char_vars optional_vars double_vars
r =
332 (* From removed tags to their addresses *)
333 let env = Hashtbl.create
17 in
335 (* First static optimizations, from start position *)
336 let rec size_forward pos
= function
337 | Empty
|Chars
(_
,true)|Tag _
-> Some pos
338 | Chars
(_
,false) -> Some
(pos
+1)
340 begin match size_forward pos
r1 with
342 | Some pos
-> size_forward pos
r2
345 let pos1 = size_forward pos
r1
346 and pos2
= size_forward pos
r2 in
347 if pos1=pos2
then pos1 else None
349 | Action _
-> assert false in
351 let rec simple_forward pos
r = match r with
353 if mem_name n.id double_vars
then
356 Hashtbl.add
env (n.id
,n.start
) (Sum
(Start
, pos
)) ;
359 | Empty
-> r, Some pos
360 | Chars
(_
,is_eof
) ->
361 r,Some
(if is_eof
then pos
else pos
+1)
363 let r1,pos
= simple_forward pos
r1 in
365 | None
-> mk_seq r1 r2,None
367 let r2,pos
= simple_forward pos
r2 in
371 let pos1 = size_forward pos
r1
372 and pos2
= size_forward pos
r2 in
373 r,(if pos1=pos2
then pos1 else None
)
375 | Action _
-> assert false in
377 (* Then static optimizations, from end position *)
378 let rec size_backward pos
= function
379 | Empty
|Chars
(_
,true)|Tag _
-> Some pos
380 | Chars
(_
,false) -> Some
(pos
-1)
382 begin match size_backward pos
r2 with
384 | Some pos
-> size_backward pos
r1
387 let pos1 = size_backward pos
r1
388 and pos2
= size_backward pos
r2 in
389 if pos1=pos2
then pos1 else None
391 | Action _
-> assert false in
394 let rec simple_backward pos
r = match r with
396 if mem_name n.id double_vars
then
399 Hashtbl.add
env (n.id
,n.start
) (Sum
(End
, pos
)) ;
402 | Empty
-> r,Some pos
403 | Chars
(_
,is_eof
) ->
404 r,Some
(if is_eof
then pos
else pos
-1)
406 let r2,pos
= simple_backward pos
r2 in
408 | None
-> mk_seq r1 r2,None
410 let r1,pos
= simple_backward pos
r1 in
414 let pos1 = size_backward pos
r1
415 and pos2
= size_backward pos
r2 in
416 r,(if pos1=pos2
then pos1 else None
)
418 | Action _
-> assert false in
422 let r,_
= simple_forward 0 r in
423 let r,_
= simple_backward 0 r in
428 let loc_count = ref 0 in
434 let n = !loc_count in
436 Hashtbl.add
env t
(Sum
(Mem
n,0)) ;
439 let rec alloc_exp pos
r = match r with
441 if mem_name n.id double_vars
then
443 else begin match pos
with
445 Hashtbl.add
env (n.id
,n.start
) a
;
448 let a = get_tag_addr (n.id
,n.start
) in
453 | Chars
(_
,is_eof
) -> r,(if is_eof
then pos
else add_pos pos
1)
455 let r1,pos
= alloc_exp pos
r1 in
456 let r2,pos
= alloc_exp pos
r2 in
459 let off = size_forward 0 r in
461 | Some i
-> r,add_pos pos i
465 | Action _
-> assert false in
467 let r,_
= alloc_exp None
r in
470 (fun ((name
,_
) as x
) r ->
473 if IdSet.mem x char_vars
then
475 (IdSet.mem x optional_vars
, get_tag_addr (name
,true))
478 (IdSet.mem x optional_vars
,
479 get_tag_addr (name
,true),
480 get_tag_addr (name
,false)) in
487 let encode_casedef casedef
=
490 (fun (reg
,actions
,count
,ntags
) (expr
, act
) ->
491 let expr = remove_nested_as expr in
492 let char_vars = find_chars expr in
493 let r = encode_regexp char_vars count
expr
494 and opt_vars
= find_optional expr
495 and double_vars
,all_vars
= find_double expr in
497 opt_regexp all_vars
char_vars opt_vars double_vars
r in
498 Alt
(reg
, Seq
(r, Action count
)),
499 (count
, m ,act
) :: actions
,
506 let encode_lexdef def
=
511 (fun {name
=entry_name
; args
=args
; shortest
=shortest
; clauses
= casedef
} ->
512 let (re
,actions
,_
,ntags
) = encode_casedef casedef
in
513 { lex_name
= entry_name
;
515 lex_mem_tags
= ntags
;
516 lex_actions
= List.rev actions
},args
,shortest
)
518 let chr = Array.of_list
(List.rev
!chars) in
522 (* To generate directly a NFA from a regular expression.
523 Confer Aho-Sethi-Ullman, dragon book, chap. 3
524 Extension to tagged automata.
527 ``NFAs with Tagged Transitions, their Conversion to Deterministic
528 Automata and Application to Regular Expressions''.
529 Symposium on String Processing and Information Retrieval (SPIRE 2000),
530 http://kouli.iki.fi/~vlaurika/spire2000-tnfa.ps
532 http://kouli.iki.fi/~vlaurika/regex-submatch.ps.gz
539 type transition
= t_transition
* Tags.t
541 let trans_compare (t1
,tags1
) (t2
,tags2
) =
542 match Pervasives.compare t1 t2
with
543 | 0 -> Tags.compare tags1 tags2
548 Set.Make
(struct type t
= transition
let compare = trans_compare end)
550 let rec nullable = function
551 | Empty
|Tag _
-> true
552 | Chars
(_
,_
)|Action _
-> false
553 | Seq
(r1,r2) -> nullable r1 && nullable r2
554 | Alt
(r1,r2) -> nullable r1 || nullable r2
557 let rec emptymatch = function
558 | Empty
| Chars
(_
,_
) | Action _
-> Tags.empty
559 | Tag t
-> Tags.add t
Tags.empty
560 | Seq
(r1,r2) -> Tags.union
(emptymatch r1) (emptymatch r2)
572 let addtags transs tags
=
574 (fun (t
,tags_t
) r -> TransSet.add
(t
, Tags.union tags tags_t
) r)
575 transs
TransSet.empty
578 let rec firstpos = function
579 Empty
|Tag _
-> TransSet.empty
580 | Chars
(pos
,_
) -> TransSet.add
(OnChars pos
,Tags.empty
) TransSet.empty
581 | Action act
-> TransSet.add
(ToAction act
,Tags.empty
) TransSet.empty
584 TransSet.union
(firstpos r1) (addtags (firstpos r2) (emptymatch r1))
587 | Alt
(r1,r2) -> TransSet.union
(firstpos r1) (firstpos r2)
588 | Star
r -> firstpos r
591 (* Berry-sethi followpos *)
592 let followpos size
entry_list =
593 let v = Array.create size
TransSet.empty
in
594 let rec fill s
= function
595 | Empty
|Action _
|Tag _
-> ()
596 | Chars
(n,_
) -> v.(n) <- s
598 fill s
r1 ; fill s
r2
602 TransSet.union
(firstpos r2) (addtags s
(emptymatch r2))
608 fill (TransSet.union
(firstpos r) s
) r in
609 List.iter
(fun (entry
,_
,_
) -> fill TransSet.empty entry
.lex_regexp
) entry_list ;
612 (************************)
613 (* The algorithm itself *)
614 (************************)
616 let no_action = max_int
619 Set.Make
(struct type t
= t_transition
let compare = Pervasives.compare end)
623 Map.Make
(struct type t
= int let compare = Pervasives.compare end)
626 {final
: int * ('
a * int TagMap.t
) ;
627 others
: ('
a * int TagMap.t
) MemMap.t
}
631 fprintf oc "%s<%s>" t.id (if t.start then "s" else "e")
633 let dmem_map dp ds m =
636 eprintf "%d -> " k ; dp x ; ds ())
639 and dtag_map dp ds m =
642 dtag stderr t ; eprintf " -> " ; dp x ; ds ())
645 let dstate {final=(act,(_,m)) ; others=o} =
646 if act <> no_action then begin
647 eprintf "final=%d " act ;
648 dtag_map (fun x -> eprintf "%d" x) (fun () -> prerr_string " ,") m ;
653 dtag_map (fun x -> eprintf "%d" x) (fun () -> prerr_string " ,") m)
654 (fun () -> prerr_endline "")
658 let dfa_state_empty =
659 {final
=(no_action, (max_int
,TagMap.empty
)) ;
662 and dfa_state_is_empty
{final
=(act
,_
) ; others
=o
} =
667 (* A key is an abstraction on a dfa state,
668 two states with the same key can be made the same by
669 copying some memory cells into others *)
673 Set.Make
(struct type t
= StateSet.t
let compare = StateSet.compare end)
675 type t_equiv
= {tag
:tag_info
; equiv
:StateSetSet.t
}
682 let compare e1 e2
= match Pervasives.compare e1
.tag e2
.tag
with
683 | 0 -> StateSetSet.compare e1
.equiv e2
.equiv
687 type dfa_key
= {kstate
: StateSet.t
; kmem
: MemKey.t
}
689 (* Map a state to its key *)
695 let ss = TagMap.find tag
r in
696 let r = TagMap.remove tag
r in
697 TagMap.add tag
(StateSetSet.add s
ss) r
700 TagMap.add tag
(StateSetSet.add s
StateSetSet.empty
) r)
703 (fun tag
ss r -> MemKey.add
{tag
=tag
; equiv
=ss} r)
707 (* trans is nfa_state, m is associated memory map *)
708 let inverse_mem_map trans
m r =
712 let otag,s
= MemMap.find addr
r in
713 assert (tag
= otag) ;
714 let r = MemMap.remove addr
r in
715 MemMap.add addr
(tag
,StateSet.add trans s
) r
718 MemMap.add addr
(tag
,StateSet.add trans
StateSet.empty
) r)
721 let inverse_mem_map_other n (_
,m) r = inverse_mem_map (OnChars
n) m r
723 let get_key {final
=(act
,(_
,m_act
)) ; others
=o
} =
725 MemMap.fold
inverse_mem_map_other
727 (if act
= no_action then MemMap.empty
728 else inverse_mem_map (ToAction act
) m_act
MemMap.empty
) in
730 MemMap.fold
(fun n _
r -> StateSet.add
(OnChars
n) r) o
731 (if act
=no_action then StateSet.empty
732 else StateSet.add
(ToAction act
) StateSet.empty
) in
733 let mem_key = env_to_class env in
734 {kstate
= state_key ; kmem
= mem_key}
737 let key_compare k1 k2
= match StateSet.compare k1
.kstate k2
.kstate
with
738 | 0 -> MemKey.compare k1
.kmem k2
.kmem
741 (* Association dfa_state -> state_num *)
744 Map.Make
(struct type t
= dfa_key
let compare = key_compare end)
746 let state_map = ref (StateMap.empty
: int StateMap.t
)
747 let todo = Stack.create
()
748 let next_state_num = ref 0
749 let next_mem_cell = ref 0
750 let temp_pending = ref false
751 let tag_cells = Hashtbl.create
17
752 let state_table = Table.create
dfa_state_empty
755 let reset_state_mem () =
756 state_map := StateMap.empty
;
758 next_state_num := 0 ;
759 let _ = Table.trim
state_table in
762 (* Allocation of memory cells *)
763 let reset_cell_mem ntags
=
764 next_mem_cell := ntags
;
765 Hashtbl.clear
tag_cells ;
766 temp_pending := false
768 let do_alloc_temp () =
769 temp_pending := true ;
770 let n = !next_mem_cell in
773 let do_alloc_cell used t
=
775 try Hashtbl.find
tag_cells t
with Not_found
-> Ints.empty
in
777 Ints.choose
(Ints.diff
available used
)
780 temp_pending := false ;
781 let n = !next_mem_cell in
782 if n >= 255 then raise Memory_overflow
;
783 Hashtbl.replace
tag_cells t
(Ints.add
n available) ;
787 let is_old_addr a = a >= 0
788 and is_new_addr
a = a < 0
793 if is_old_addr addr
then
799 let alloc_map used
m mvs
=
801 (fun tag
a (r,mvs
) ->
803 if is_new_addr
a then
804 let a = do_alloc_cell used tag
in
807 TagMap.add tag
a r,mvs
)
810 let create_new_state {final
=(act
,(_,m_act
)) ; others
=o
} =
812 MemMap.fold
(fun _ (_,m) r -> old_in_map m r)
813 o
(old_in_map m_act
Ints.empty
) in
815 let new_m_act,mvs
= alloc_map used m_act
Ints.empty
in
817 MemMap.fold
(fun k
(x
,m) (r,mvs
) ->
818 let m,mvs
= alloc_map used m mvs
in
819 MemMap.add k
(x
,m) r,mvs
)
820 o
(MemMap.empty
,mvs
) in
821 {final
=(act
,(0,new_m_act)) ; others
=new_o},
822 Ints.fold
(fun x
r -> Set x
::r) mvs
[]
824 type new_addr_gen
= {mutable count
: int ; mutable env : int TagMap.t
}
826 let create_new_addr_gen () = {count
= -1 ; env = TagMap.empty
}
828 let alloc_new_addr tag
r =
830 TagMap.find tag
r.env
835 r.env <- TagMap.add tag
a r.env ;
839 let create_mem_map tags gen
=
841 (fun tag
r -> TagMap.add tag
(alloc_new_addr tag gen
) r)
844 let create_init_state pos
=
845 let gen = create_new_addr_gen () in
851 let on,otags
= st.final
in
853 {st with final
= (n, (0,create_mem_map tags
gen))}
858 let _ = MemMap.find
n st.others
in assert false
862 MemMap.add
n (0,create_mem_map tags
gen) st.others
})
863 pos
dfa_state_empty in
867 let get_map t
st = match t
with
868 | ToAction
_ -> let _,(_,m) = st.final
in m
870 let (_,m) = MemMap.find
n st.others
in
873 let dest = function | Copy
(d
,_) | Set d
-> d
874 and orig
= function | Copy
(_,o
) -> o
| Set
_ -> -1
876 let pmv oc mv
= fprintf oc
"%d <- %d" (dest mv
) (orig mv
)
878 List.iter
(fun mv
-> fprintf oc
"%a " pmv mv
) mvs
;
879 output_char oc '
\n'
; flush oc
882 (* Topological sort << a la louche >> *)
884 let rec do_rec r mvs
= match mvs
with
889 (fun r mv
-> Ints.add
(dest mv
) r)
893 (fun mv
-> Ints.mem
(orig mv
) dests)
899 let d'
= do_alloc_temp () in
911 | _ -> do_rec (here
@r) rem in
914 let move_to mem_key src tgt
=
917 (fun {tag
=tag
; equiv
=m} r ->
921 let t = StateSet.choose s
in
922 let src = TagMap.find tag
(get_map t src)
923 and tgt
= TagMap.find tag
(get_map t tgt
) in
924 if src <> tgt
then begin
925 if is_new_addr
src then
932 | Not_found
-> assert false)
935 (* Moves are topologically sorted *)
940 let key = get_key st in
942 let num = StateMap.find
key !state_map in
943 num,move_to key.kmem
st (Table.get
state_table num)
945 let num = !next_state_num in
947 let st,mvs = create_new_state st in
948 Table.emit
state_table st ;
949 state_map := StateMap.add
key num !state_map;
950 Stack.push
(st, num) todo;
953 let map_on_all_states f old_res
=
954 let res = ref old_res
in
957 let (st, i
) = Stack.pop
todo in
959 res := (r, i
) :: !res
961 with Stack.Empty
-> ()
967 dfa_state_is_empty
st
971 let n,moves
= get_state st in
974 (****************************)
975 (* compute reachable states *)
976 (****************************)
978 let add_tags_to_map gen tags
m =
981 let m = TagMap.remove tag
m in
982 TagMap.add tag
(alloc_new_addr tag
gen) m)
985 let apply_transition gen r pri
m = function
987 let on,(opri
,_) = r.final
in
988 if n < on || (on=n && pri
< opri
) then
989 let m = add_tags_to_map gen tags
m in
990 {r with final
=n,(pri
,m)}
994 let (opri
,_) = MemMap.find
n r.others
in
996 let m = add_tags_to_map gen tags
m in
997 {r with others
=MemMap.add
n (pri
,m) (MemMap.remove
n r.others
)}
1002 let m = add_tags_to_map gen tags
m in
1003 {r with others
=MemMap.add
n (pri
,m) r.others
}
1005 (* add transitions ts to new state r
1006 transitions in ts start from state pri and memory map m
1008 let apply_transitions gen r pri
m ts
=
1010 (fun t r -> apply_transition gen r pri
m t)
1014 (* For a given nfa_state pos, refine char partition *)
1015 let rec split_env gen follow pos
m s
= function
1016 | [] -> (* Can occur ! because of non-matching regexp ([^'\000'-'\255']) *)
1018 | (s1
,st1
) as p
::rem ->
1019 let here = Cset.inter s s1
in
1020 if Cset.is_empty
here then
1021 p
::split_env gen follow pos
m s
rem
1023 let rest = Cset.diff s
here in
1025 if Cset.is_empty
rest then
1028 split_env gen follow pos
m rest rem
1029 and new_st
= apply_transitions gen st1 pos
m follow
in
1030 let stay = Cset.diff s1
here in
1031 if Cset.is_empty
stay then
1034 (stay, st1
)::(here, new_st
)::rem
1037 (* For all nfa_state pos in a dfa state st *)
1038 let comp_shift gen chars follow
st =
1040 (fun pos
(_,m) env -> split_env gen follow
.(pos
) pos
m chars.(pos
) env)
1041 st [Cset.all_chars_eof
,dfa_state_empty]
1044 let reachs chars follow
st =
1045 let gen = create_new_addr_gen () in
1046 (* build a association list (char set -> new state) *)
1047 let env = comp_shift gen chars follow
st in
1048 (* change it into (char set -> new state_num) *)
1051 (fun (s
,dfa_state
) -> s
,goto_state dfa_state
) env in
1052 (* finally build the char indexed array -> new state num *)
1053 let shift = Cset.env_to_array
env in
1057 let get_tag_mem n env t =
1059 TagMap.find
t env.(n)
1061 | Not_found
-> assert false
1063 let do_tag_actions n env m =
1066 TagMap.fold
(fun t m (used,r) ->
1067 let a = get_tag_mem n env t in
1068 Ints.add
a used,SetTag
(a,m)::r) m (Ints.empty
,[]) in
1071 (fun tag
m (used,r) ->
1072 if not
(Ints.mem
m used) && tag
.start
then
1073 Ints.add
m used, EraseTag
m::r
1080 let translate_state shortest_match tags
chars follow
st =
1081 let (n,(_,m)) = st.final
in
1082 if MemMap.empty
= st.others
then
1083 Perform
(n,do_tag_actions n tags
m)
1084 else if shortest_match
then begin
1086 Shift
(No_remember
,reachs chars follow
st.others
)
1088 Perform
(n, do_tag_actions n tags
m)
1091 (if n = no_action then
1094 Remember
(n,do_tag_actions n tags
m)),
1095 reachs chars follow
st.others
)
1099 let dtags chan tags =
1101 (fun t -> fprintf chan " %a" dtag t)
1106 (fun trans -> match trans with
1108 eprintf " (-> %d,%a)" i dtags tags
1109 | ToAction i,tags ->
1110 eprintf " ([%d],%a)" i dtags tags)
1114 eprintf "follow=[" ;
1115 for i = 0 to Array.length t-1 do
1122 let make_tag_entry id start act
a r = match a with
1124 TagMap.add
{id
=id
; start
=start
; action
=act
} m r
1127 let extract_tags l
=
1128 let envs = Array.create
(List.length l
) TagMap.empty
in
1133 (fun ((name
,_),v) r -> match v with
1134 | Ident_char
(_,t) -> make_tag_entry name
true act
t r
1135 | Ident_string
(_,t1
,t2
) ->
1136 make_tag_entry name
true act t1
1137 (make_tag_entry name
false act t2
r))
1143 let make_dfa lexdef
=
1144 let (chars, entry_list) = encode_lexdef lexdef
in
1145 let follow = followpos (Array.length
chars) entry_list in
1149 reset_state_mem () ;
1150 let r_states = ref [] in
1151 let initial_states =
1153 (fun (le
,args
,shortest
) ->
1154 let tags = extract_tags le
.lex_actions
in
1155 reset_cell_mem le
.lex_mem_tags
;
1156 let pos_set = firstpos le
.lex_regexp
in
1158 prerr_string "trans={" ; dtransset pos_set ; prerr_endline "}" ;
1160 let init_state = create_init_state pos_set in
1161 let init_num = get_state init_state in
1164 (translate_state shortest
tags chars follow) !r_states ;
1165 { auto_name
= le
.lex_name
;
1168 (if !temp_pending then !next_mem_cell+1 else !next_mem_cell) ;
1169 auto_initial_state
= init_num ;
1170 auto_actions
= le
.lex_actions
})
1172 let states = !r_states in
1174 prerr_endline "** states **" ;
1175 for i = 0 to !next_state_num-1 do
1176 eprintf "+++ %d +++\n" i ;
1177 dstate (Table.get state_table i) ;
1180 eprintf "%d states\n" !next_state_num ;
1182 let actions = Array.create
!next_state_num (Perform
(0,[])) in
1183 List.iter
(fun (act
, i
) -> actions.(i
) <- act
) states;
1184 reset_state_mem () ;
1186 (initial_states, actions)