1 (***********************************************************************)
5 (* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
7 (* Copyright 1996 Institut National de Recherche en Informatique et *)
8 (* en Automatique. All rights reserved. This file is distributed *)
9 (* under the terms of the Q Public License version 1.0. *)
11 (***********************************************************************)
15 (* The lexer definition *)
23 | Illegal_character of char
24 | Illegal_escape of string
25 | Unterminated_comment
27 | Unterminated_string_in_comment
28 | Keyword_as_label of string
29 | Literal_overflow of string
32 exception Error of error * Location.t;;
34 (* The table of keywords *)
37 create_hashtable 149 [
43 "constraint", CONSTRAINT;
49 "exception", EXCEPTION;
60 "initializer", INITIALIZER;
72 (* "parser", PARSER; *)
88 "mod", INFIXOP3("mod");
89 "land", INFIXOP3("land");
90 "lor", INFIXOP3("lor");
91 "lxor", INFIXOP3("lxor");
92 "lsl", INFIXOP4("lsl");
93 "lsr", INFIXOP4("lsr");
94 "asr", INFIXOP4("asr")
97 (* To buffer string literals *)
99 let initial_string_buffer = String.create 256
100 let string_buff = ref initial_string_buffer
101 let string_index = ref 0
103 let reset_string_buffer () =
104 string_buff := initial_string_buffer;
107 let store_string_char c =
108 if !string_index >= String.length (!string_buff) then begin
109 let new_buff = String.create (String.length (!string_buff) * 2) in
110 String.blit (!string_buff) 0 new_buff 0 (String.length (!string_buff));
111 string_buff := new_buff
113 String.unsafe_set (!string_buff) (!string_index) c;
116 let get_stored_string () =
117 let s = String.sub (!string_buff) 0 (!string_index) in
118 string_buff := initial_string_buffer;
121 (* To store the position of the beginning of a string and comment *)
122 let string_start_loc = ref Location.none;;
123 let comment_start_loc = ref [];;
124 let in_comment () = !comment_start_loc <> [];;
126 (* To translate escape sequences *)
128 let char_for_backslash = function
135 let char_for_decimal_code lexbuf i =
136 let c = 100 * (Char.code(Lexing.lexeme_char lexbuf i) - 48) +
137 10 * (Char.code(Lexing.lexeme_char lexbuf (i+1)) - 48) +
138 (Char.code(Lexing.lexeme_char lexbuf (i+2)) - 48) in
139 if (c < 0 || c > 255) && not (in_comment ())
140 then raise (Error(Illegal_escape (Lexing.lexeme lexbuf),
141 Location.curr lexbuf))
144 let char_for_hexadecimal_code lexbuf i =
145 let d1 = Char.code (Lexing.lexeme_char lexbuf i) in
146 let val1 = if d1 >= 97 then d1 - 87
147 else if d1 >= 65 then d1 - 55
150 let d2 = Char.code (Lexing.lexeme_char lexbuf (i+1)) in
151 let val2 = if d2 >= 97 then d2 - 87
152 else if d2 >= 65 then d2 - 55
155 Char.chr (val1 * 16 + val2)
157 (* Remove underscores from float literals *)
159 let remove_underscores s =
160 let l = String.length s in
161 let rec remove src dst =
163 if dst >= l then s else String.sub s 0 dst
166 '_' -> remove (src + 1) dst
167 | c -> s.[dst] <- c; remove (src + 1) (dst + 1)
170 (* Update the current location with file name and line number. *)
172 let update_loc lexbuf file line absolute chars =
173 let pos = lexbuf.lex_curr_p in
174 let new_file = match file with
175 | None -> pos.pos_fname
178 lexbuf.lex_curr_p <- { pos with
179 pos_fname = new_file;
180 pos_lnum = if absolute then line else pos.pos_lnum + line;
181 pos_bol = pos.pos_cnum - chars;
189 let report_error ppf = function
190 | Illegal_character c ->
191 fprintf ppf "Illegal character (%s)" (Char.escaped c)
192 | Illegal_escape s ->
193 fprintf ppf "Illegal backslash escape in string or character (%s)" s
194 | Unterminated_comment ->
195 fprintf ppf "Comment not terminated"
196 | Unterminated_string ->
197 fprintf ppf "String literal not terminated"
198 | Unterminated_string_in_comment ->
199 fprintf ppf "This comment contains an unterminated string literal"
200 | Keyword_as_label kwd ->
201 fprintf ppf "`%s' is a keyword, it cannot be used as label name" kwd
202 | Literal_overflow ty ->
203 fprintf ppf "Integer literal exceeds the range of representable integers of type %s" ty
208 let newline = ('\010' | '\013' | "\013\010")
209 let blank = [' ' '\009' '\012']
210 let lowercase = ['a'-'z' '\223'-'\246' '\248'-'\255' '_']
211 let uppercase = ['A'-'Z' '\192'-'\214' '\216'-'\222']
213 ['A'-'Z' 'a'-'z' '_' '\192'-'\214' '\216'-'\246' '\248'-'\255' '\'' '0'-'9']
215 ['!' '$' '%' '&' '*' '+' '-' '.' '/' ':' '<' '=' '>' '?' '@' '^' '|' '~']
216 let decimal_literal =
217 ['0'-'9'] ['0'-'9' '_']*
219 '0' ['x' 'X'] ['0'-'9' 'A'-'F' 'a'-'f']['0'-'9' 'A'-'F' 'a'-'f' '_']*
221 '0' ['o' 'O'] ['0'-'7'] ['0'-'7' '_']*
223 '0' ['b' 'B'] ['0'-'1'] ['0'-'1' '_']*
225 decimal_literal | hex_literal | oct_literal | bin_literal
227 ['0'-'9'] ['0'-'9' '_']*
228 ('.' ['0'-'9' '_']* )?
229 (['e' 'E'] ['+' '-']? ['0'-'9'] ['0'-'9' '_']*)?
233 { update_loc lexbuf None 1 false 0;
241 | "~" lowercase identchar * ':'
242 { let s = Lexing.lexeme lexbuf in
243 let name = String.sub s 1 (String.length s - 2) in
244 if Hashtbl.mem keyword_table name then
245 raise (Error(Keyword_as_label name, Location.curr lexbuf));
248 | "??" { QUESTIONQUESTION }
249 | "?" lowercase identchar * ':'
250 { let s = Lexing.lexeme lexbuf in
251 let name = String.sub s 1 (String.length s - 2) in
252 if Hashtbl.mem keyword_table name then
253 raise (Error(Keyword_as_label name, Location.curr lexbuf));
255 | lowercase identchar *
256 { let s = Lexing.lexeme lexbuf in
258 Hashtbl.find keyword_table s
261 | uppercase identchar *
262 { UIDENT(Lexing.lexeme lexbuf) } (* No capitalized keywords *)
265 INT (int_of_string(Lexing.lexeme lexbuf))
267 raise (Error(Literal_overflow "int", Location.curr lexbuf))
270 { FLOAT (remove_underscores(Lexing.lexeme lexbuf)) }
272 { let s = Lexing.lexeme lexbuf in
274 INT32 (Int32.of_string(String.sub s 0 (String.length s - 1)))
276 raise (Error(Literal_overflow "int32", Location.curr lexbuf)) }
278 { let s = Lexing.lexeme lexbuf in
280 INT64 (Int64.of_string(String.sub s 0 (String.length s - 1)))
282 raise (Error(Literal_overflow "int64", Location.curr lexbuf)) }
284 { let s = Lexing.lexeme lexbuf in
287 (Nativeint.of_string(String.sub s 0 (String.length s - 1)))
289 raise (Error(Literal_overflow "nativeint", Location.curr lexbuf)) }
291 { reset_string_buffer();
292 let string_start = lexbuf.lex_start_p in
293 string_start_loc := Location.curr lexbuf;
295 lexbuf.lex_start_p <- string_start;
296 STRING (get_stored_string()) }
298 { update_loc lexbuf None 1 false 1;
299 CHAR (Lexing.lexeme_char lexbuf 1) }
300 | "'" [^ '\\' '\'' '\010' '\013'] "'"
301 { CHAR(Lexing.lexeme_char lexbuf 1) }
302 | "'\\" ['\\' '\'' '"' 'n' 't' 'b' 'r' ' '] "'"
303 { CHAR(char_for_backslash (Lexing.lexeme_char lexbuf 2)) }
304 | "'\\" ['0'-'9'] ['0'-'9'] ['0'-'9'] "'"
305 { CHAR(char_for_decimal_code lexbuf 2) }
306 | "'\\" 'x' ['0'-'9' 'a'-'f' 'A'-'F'] ['0'-'9' 'a'-'f' 'A'-'F'] "'"
307 { CHAR(char_for_hexadecimal_code lexbuf 3) }
309 { let l = Lexing.lexeme lexbuf in
310 let esc = String.sub l 1 (String.length l - 1) in
311 raise (Error(Illegal_escape esc, Location.curr lexbuf))
314 { comment_start_loc := [Location.curr lexbuf];
318 { let loc = Location.curr lexbuf in
319 Location.prerr_warning loc Warnings.Comment_start;
320 comment_start_loc := [Location.curr lexbuf];
325 { let loc = Location.curr lexbuf in
326 Location.prerr_warning loc Warnings.Comment_not_end;
327 lexbuf.Lexing.lex_curr_pos <- lexbuf.Lexing.lex_curr_pos - 1;
328 let curpos = lexbuf.lex_curr_p in
329 lexbuf.lex_curr_p <- { curpos with pos_cnum = curpos.pos_cnum - 1 };
332 | "#" [' ' '\t']* (['0'-'9']+ as num) [' ' '\t']*
333 ("\"" ([^ '\010' '\013' '"' ] * as name) "\"")?
334 [^ '\010' '\013'] * newline
335 { update_loc lexbuf name (int_of_string num) true 0;
340 | "&&" { AMPERAMPER }
347 | "->" { MINUSGREATER }
351 | "::" { COLONCOLON }
352 | ":=" { COLONEQUAL }
353 | ":>" { COLONGREATER }
360 | "[|" { LBRACKETBAR }
361 | "[<" { LBRACKETLESS }
362 | "[>" { LBRACKETGREATER }
365 | "{<" { LBRACELESS }
368 | "|]" { BARRBRACKET }
370 | ">]" { GREATERRBRACKET }
372 | ">}" { GREATERRBRACE }
374 | "!=" { INFIXOP0 "!=" }
380 { PREFIXOP(Lexing.lexeme lexbuf) }
381 | ['~' '?'] symbolchar +
382 { PREFIXOP(Lexing.lexeme lexbuf) }
383 | ['=' '<' '>' '|' '&' '$'] symbolchar *
384 { INFIXOP0(Lexing.lexeme lexbuf) }
385 | ['@' '^'] symbolchar *
386 { INFIXOP1(Lexing.lexeme lexbuf) }
387 | ['+' '-'] symbolchar *
388 { INFIXOP2(Lexing.lexeme lexbuf) }
390 { INFIXOP4(Lexing.lexeme lexbuf) }
391 | ['*' '/' '%'] symbolchar *
392 { INFIXOP3(Lexing.lexeme lexbuf) }
395 { raise (Error(Illegal_character (Lexing.lexeme_char lexbuf 0),
396 Location.curr lexbuf))
401 { comment_start_loc := (Location.curr lexbuf) :: !comment_start_loc;
405 { match !comment_start_loc with
407 | [x] -> comment_start_loc := [];
408 | _ :: l -> comment_start_loc := l;
412 { reset_string_buffer();
413 string_start_loc := Location.curr lexbuf;
414 begin try string lexbuf
415 with Error (Unterminated_string, _) ->
416 match !comment_start_loc with
418 | loc :: _ -> comment_start_loc := [];
419 raise (Error (Unterminated_string_in_comment, loc))
421 reset_string_buffer ();
426 { update_loc lexbuf None 1 false 1;
429 | "'" [^ '\\' '\'' '\010' '\013' ] "'"
431 | "'\\" ['\\' '"' '\'' 'n' 't' 'b' 'r' ' '] "'"
433 | "'\\" ['0'-'9'] ['0'-'9'] ['0'-'9'] "'"
435 | "'\\" 'x' ['0'-'9' 'a'-'f' 'A'-'F'] ['0'-'9' 'a'-'f' 'A'-'F'] "'"
438 { match !comment_start_loc with
440 | loc :: _ -> comment_start_loc := [];
441 raise (Error (Unterminated_comment, loc))
444 { update_loc lexbuf None 1 false 0;
453 | '\\' newline ([' ' '\t'] * as space)
454 { update_loc lexbuf None 1 false (String.length space);
457 | '\\' ['\\' '\'' '"' 'n' 't' 'b' 'r' ' ']
458 { store_string_char(char_for_backslash(Lexing.lexeme_char lexbuf 1));
460 | '\\' ['0'-'9'] ['0'-'9'] ['0'-'9']
461 { store_string_char(char_for_decimal_code lexbuf 1);
463 | '\\' 'x' ['0'-'9' 'a'-'f' 'A'-'F'] ['0'-'9' 'a'-'f' 'A'-'F']
464 { store_string_char(char_for_hexadecimal_code lexbuf 2);
470 (* Should be an error, but we are very lax.
471 raise (Error (Illegal_escape (Lexing.lexeme lexbuf),
472 Location.curr lexbuf))
474 let loc = Location.curr lexbuf in
475 Location.prerr_warning loc Warnings.Illegal_backslash;
476 store_string_char (Lexing.lexeme_char lexbuf 0);
477 store_string_char (Lexing.lexeme_char lexbuf 1);
482 { update_loc lexbuf None 1 false 0;
483 let s = Lexing.lexeme lexbuf in
484 for i = 0 to String.length s - 1 do
485 store_string_char s.[i];
490 { raise (Error (Unterminated_string, !string_start_loc)) }
492 { store_string_char(Lexing.lexeme_char lexbuf 0);
495 and skip_sharp_bang = parse
496 | "#!" [^ '\n']* '\n' [^ '\n']* "\n!#\n"
497 { update_loc lexbuf None 3 false 0 }
498 | "#!" [^ '\n']* '\n'
499 { update_loc lexbuf None 1 false 0 }