lex/parser.mly

   1 /***********************************************************************/
   2 /*                                                                     */
   3 /*                           Objective Caml                            */
   4 /*                                                                     */
   5 /*            Xavier Leroy, projet Cristal, INRIA Rocquencourt         */
   6 /*                                                                     */
   7 /*  Copyright 1996 Institut National de Recherche en Informatique et   */
   8 /*  en Automatique.  All rights reserved.  This file is distributed    */
   9 /*  under the terms of the Q Public License version 1.0.               */
  10 /*                                                                     */
  11 /***********************************************************************/
  12
  13 /* $Id$ */
  14
  15 /* The grammar for lexer definitions */
  16
  17 %{
  18 open Syntax
  19
  20 (* Auxiliaries for the parser. *)
  21
  22 let named_regexps =
  23   (Hashtbl.create 13 : (string, regular_expression) Hashtbl.t)
  24
  25 let regexp_for_string s =
  26   let rec re_string n =
  27     if n >= String.length s then Epsilon
  28     else if succ n = String.length s then
  29       Characters (Cset.singleton (Char.code s.[n]))
  30     else
  31       Sequence
  32         (Characters(Cset.singleton (Char.code s.[n])),
  33          re_string (succ n))
  34   in re_string 0
  35
  36 let rec remove_as = function
  37   | Bind (e,_) -> remove_as e
  38   | Epsilon|Eof|Characters _ as e -> e
  39   | Sequence (e1, e2) -> Sequence (remove_as e1, remove_as e2)
  40   | Alternative (e1, e2) -> Alternative (remove_as e1, remove_as e2)
  41   | Repetition e -> Repetition (remove_as e)
  42
  43 let as_cset = function
  44   | Characters s -> s
  45   | _ -> raise Cset.Bad
  46
  47 %}
  48
  49 %token <string> Tident
  50 %token <int> Tchar
  51 %token <string> Tstring
  52 %token <Syntax.location> Taction
  53 %token Trule Tparse Tparse_shortest Tand Tequal Tend Tor Tunderscore Teof Tlbracket Trbracket
  54 %token Tstar Tmaybe Tplus Tlparen Trparen Tcaret Tdash Tlet Tas Tsharp
  55
  56 %right Tas
  57 %left Tsharp
  58 %left Tor
  59 %nonassoc CONCAT
  60 %nonassoc Tmaybe Tstar Tplus
  61           Tident Tchar Tstring Tunderscore Teof Tlbracket Tlparen
  62
  63 %start lexer_definition
  64 %type <Syntax.lexer_definition> lexer_definition
  65
  66 %%
  67
  68 lexer_definition:
  69     header named_regexps Trule definition other_definitions header Tend
  70         { {header = $1;
  71            entrypoints = $4 :: List.rev $5;
  72            trailer = $6} }
  73 ;
  74 header:
  75     Taction
  76         { $1 }
  77   | /*epsilon*/
  78         { { start_pos = 0; end_pos = 0; start_line = 1; start_col = 0 } }
  79 ;
  80 named_regexps:
  81     named_regexps Tlet Tident Tequal regexp
  82         { Hashtbl.add named_regexps $3 $5 }
  83   | /*epsilon*/
  84         { () }
  85 ;
  86 other_definitions:
  87     other_definitions Tand definition
  88         { $3::$1 }
  89   | /*epsilon*/
  90         { [] }
  91 ;
  92 definition:
  93     Tident arguments Tequal Tparse entry
  94         { {name=$1 ; shortest=false ; args=$2 ; clauses=$5} }
  95   |  Tident arguments Tequal Tparse_shortest entry
  96         { {name=$1 ; shortest=true ; args=$2 ; clauses=$5} }
  97 ;
  98
  99 arguments:
 100     Tident arguments        { $1::$2 }
 101 |     /*epsilon*/           { [] }
 102 ;
 103
 104
 105 entry:
 106     case rest_of_entry
 107         { $1::List.rev $2 }
 108 |   Tor case rest_of_entry
 109         { $2::List.rev $3 }
 110 ;
 111
 112 rest_of_entry:
 113     rest_of_entry Tor case
 114         { $3::$1 }
 115   |
 116         { [] }
 117 ;
 118 case:
 119     regexp Taction
 120         { ($1,$2) }
 121 ;
 122 regexp:
 123     Tunderscore
 124         { Characters Cset.all_chars }
 125   | Teof
 126         { Eof }
 127   | Tchar
 128         { Characters (Cset.singleton $1) }
 129   | Tstring
 130         { regexp_for_string $1 }
 131   | Tlbracket char_class Trbracket
 132         { Characters $2 }
 133   | regexp Tstar
 134         { Repetition $1 }
 135   | regexp Tmaybe
 136         { Alternative(Epsilon, $1) }
 137   | regexp Tplus
 138         { Sequence(Repetition (remove_as $1), $1) }
 139   | regexp Tsharp regexp
 140         {
 141           let s1 = as_cset $1
 142           and s2 = as_cset $3 in
 143           Characters (Cset.diff s1 s2)
 144         }
 145   | regexp Tor regexp
 146         { Alternative($1,$3) }
 147   | regexp regexp %prec CONCAT
 148         { Sequence($1,$2) }
 149   | Tlparen regexp Trparen
 150         { $2 }
 151   | Tident
 152         { try
 153             Hashtbl.find named_regexps $1
 154           with Not_found ->
 155             let p = Parsing.symbol_start_pos () in
 156             Printf.eprintf "File \"%s\", line %d, character %d:\n\
 157                              Reference to unbound regexp name `%s'.\n"
 158                            p.Lexing.pos_fname p.Lexing.pos_lnum
 159                            (p.Lexing.pos_cnum - p.Lexing.pos_bol)
 160                            $1;
 161             exit 2 }
 162   | regexp Tas ident
 163         {let p1 = Parsing.rhs_start_pos 3
 164          and p2 = Parsing.rhs_end_pos 3 in
 165          let p = {
 166            start_pos = p1.Lexing.pos_cnum ;
 167            end_pos = p2.Lexing.pos_cnum ;
 168            start_line = p1.Lexing.pos_lnum ;
 169            start_col = p1.Lexing.pos_cnum - p1.Lexing.pos_bol ; } in
 170          Bind ($1, ($3, p))}
 171 ;
 172
 173 ident:
 174   Tident {$1}
 175 ;
 176
 177 char_class:
 178     Tcaret char_class1
 179         { Cset.complement $2 }
 180   | char_class1
 181         { $1 }
 182 ;
 183 char_class1:
 184     Tchar Tdash Tchar
 185         { Cset.interval $1 $3 }
 186   | Tchar
 187         { Cset.singleton $1 }
 188   | char_class1 char_class1 %prec CONCAT
 189         { Cset.union $1 $2 }
 190 ;
 191
 192 %%