patch #7303
[mldonkey.git] / src / utils / cdk / genlex2.mli
blob0769e9eebf2867d4ded3446801c6a79cd5d56b91
1 (* Copyright 2001, 2002 b8_bavard, b8_fee_carabine, INRIA *)
2 (*
3 This file is part of mldonkey.
5 mldonkey is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 mldonkey is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with mldonkey; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 (** A generic lexical analyzer.
23 This module implements a simple ``standard'' lexical analyzer, presented
24 as a function from character streams to token streams. It implements
25 roughly the lexical conventions of Caml, but is parameterized by the
26 set of keywords of your language.
29 Example: a lexer suitable for a desk calculator is obtained by
30 {[ let lexer = make_lexer ["+";"-";"*";"/";"let";"="; "("; ")"] ]}
32 The associated parser would be a function from [token stream]
33 to, for instance, [int], and would have rules such as:
36 let parse_expr = parser
37 [< 'Int n >] -> n
38 | [< 'Kwd "("; n = parse_expr; 'Kwd ")" >] -> n
39 | [< n1 = parse_expr; n2 = parse_remainder n1 >] -> n2
40 and parse_remainder n1 = parser
41 [< 'Kwd "+"; n2 = parse_expr >] -> n1+n2
42 | ...
47 (** The type of tokens. The lexical classes are: [Int] and [Float]
48 for integer and floating-point numbers; [String] for
49 string literals, enclosed in double quotes; [Char] for
50 character literals, enclosed in single quotes; [Ident] for
51 identifiers (either sequences of letters, digits, underscores
52 and quotes, or sequences of ``operator characters'' such as
53 [+], [*], etc); and [Kwd] for keywords (either identifiers or
54 single ``special characters'' such as [(], [}], etc). *)
55 type token =
56 Kwd of string
57 | Ident of string
58 | Int of int64
59 | Float of float
60 | String of string
61 | Char of char
63 (** Construct the lexer function. The first argument is the list of
64 keywords. An identifier [s] is returned as [Kwd s] if [s]
65 belongs to this list, and as [Ident s] otherwise.
66 A special character [s] is returned as [Kwd s] if [s]
67 belongs to this list, and cause a lexical error (exception
68 [Parse_error]) otherwise. Blanks and newlines are skipped.
69 Comments delimited by [(*] and [*)] are skipped as well,
70 and can be nested. *)
71 val make_lexer: string list -> (char Stream.t -> token Stream.t)