1 /***********************************************************************/
5 /* Xavier Leroy, projet Cristal, INRIA Rocquencourt */
7 /* Copyright 1996 Institut National de Recherche en Informatique et */
8 /* en Automatique. All rights reserved. This file is distributed */
9 /* under the terms of the Q Public License version 1.0. */
11 /***********************************************************************/
15 /* The grammar for lexer definitions */
20 (* Auxiliaries for the parser. *)
23 (Hashtbl.create 13 : (string, regular_expression) Hashtbl.t)
25 let regexp_for_string s =
27 if n >= String.length s then Epsilon
28 else if succ n = String.length s then
29 Characters (Cset.singleton (Char.code s.[n]))
32 (Characters(Cset.singleton (Char.code s.[n])),
36 let rec remove_as = function
37 | Bind (e,_) -> remove_as e
38 | Epsilon|Eof|Characters _ as e -> e
39 | Sequence (e1, e2) -> Sequence (remove_as e1, remove_as e2)
40 | Alternative (e1, e2) -> Alternative (remove_as e1, remove_as e2)
41 | Repetition e -> Repetition (remove_as e)
43 let as_cset = function
49 %token <string> Tident
51 %token <string> Tstring
52 %token <Syntax.location> Taction
53 %token Trule Tparse Tparse_shortest Tand Tequal Tend Tor Tunderscore Teof Tlbracket Trbracket
54 %token Tstar Tmaybe Tplus Tlparen Trparen Tcaret Tdash Tlet Tas Tsharp
60 %nonassoc Tmaybe Tstar Tplus
61 Tident Tchar Tstring Tunderscore Teof Tlbracket Tlparen
63 %start lexer_definition
64 %type <Syntax.lexer_definition> lexer_definition
69 header named_regexps Trule definition other_definitions header Tend
71 entrypoints = $4 :: List.rev $5;
78 { { start_pos = 0; end_pos = 0; start_line = 1; start_col = 0 } }
81 named_regexps Tlet Tident Tequal regexp
82 { Hashtbl.add named_regexps $3 $5 }
87 other_definitions Tand definition
93 Tident arguments Tequal Tparse entry
94 { {name=$1 ; shortest=false ; args=$2 ; clauses=$5} }
95 | Tident arguments Tequal Tparse_shortest entry
96 { {name=$1 ; shortest=true ; args=$2 ; clauses=$5} }
100 Tident arguments { $1::$2 }
108 | Tor case rest_of_entry
113 rest_of_entry Tor case
124 { Characters Cset.all_chars }
128 { Characters (Cset.singleton $1) }
130 { regexp_for_string $1 }
131 | Tlbracket char_class Trbracket
136 { Alternative(Epsilon, $1) }
138 { Sequence(Repetition (remove_as $1), $1) }
139 | regexp Tsharp regexp
142 and s2 = as_cset $3 in
143 Characters (Cset.diff s1 s2)
146 { Alternative($1,$3) }
147 | regexp regexp %prec CONCAT
149 | Tlparen regexp Trparen
153 Hashtbl.find named_regexps $1
155 let p = Parsing.symbol_start_pos () in
156 Printf.eprintf "File \"%s\", line %d, character %d:\n\
157 Reference to unbound regexp name `%s'.\n"
158 p.Lexing.pos_fname p.Lexing.pos_lnum
159 (p.Lexing.pos_cnum - p.Lexing.pos_bol)
163 {let p1 = Parsing.rhs_start_pos 3
164 and p2 = Parsing.rhs_end_pos 3 in
166 start_pos = p1.Lexing.pos_cnum ;
167 end_pos = p2.Lexing.pos_cnum ;
168 start_line = p1.Lexing.pos_lnum ;
169 start_col = p1.Lexing.pos_cnum - p1.Lexing.pos_bol ; } in
179 { Cset.complement $2 }
185 { Cset.interval $1 $3 }
187 { Cset.singleton $1 }
188 | char_class1 char_class1 %prec CONCAT