7 let store str = curStr := str
9 let error buf callerID =
10 Error.report "Lexer error : %s" callerID;
12 raise Parsing.Parse_error
14 let advance_line_pos pos =
15 let module L = Lexing in
16 {L.pos_fname = pos.L.pos_fname;
17 pos_lnum = pos.L.pos_lnum + 1;
18 pos_bol = pos.L.pos_cnum;
19 pos_cnum = pos.L.pos_cnum;}
21 let advance_line lexbuf =
22 lexbuf.Lexing.lex_curr_p <- advance_line_pos lexbuf.Lexing.lex_curr_p
24 (* use Map or Hashtbl ? *)
44 "precision",PRECISION;
52 "character",CHARACTER;
80 "intersect",INTERSECT;
82 "temporary",TEMPORARY;
86 let all token l = k := !k @ List.map (fun x -> x,token) l in
87 all (FUNCTION (Some T.Int)) ["max"; "min"; "length"; "random";"count";"sum"];
88 all (FUNCTION (Some T.Text)) ["concat"];
89 all CONFLICT_ALGO ["ignore"; "replace"; "abort"; "fail"; "rollback"];
90 all JOIN_TYPE1 ["left";"right";"full"];
91 all JOIN_TYPE2 ["inner";"outer"];
92 all LIKE_OP ["like";"glob";"regexp";"match"];
93 all AUTOINCREMENT ["autoincrement";"auto_increment"];
94 (* standard built-in types
95 CHARACTER, CHARACTER VARYING, CHARACTER LARGE OBJECT,
96 BINARY, BINARY VARYING, BINARY LARGE OBJECT,
97 NUMERIC, DECIMAL, INTEGER, SMALLINT, BIGINT,
98 FLOAT, REAL, DOUBLE PRECISION,
100 DATE, TIME, TIMESTAMP, INTERVAL
102 all T_INTEGER ["integer";"int";"smallint";"bigint";"tinyint";"mediumint";"middleint"];
103 all T_INTEGER ["numeric";"decimal";"dec";"fixed"];
104 all T_BOOLEAN ["bool";"boolean"];
105 all T_FLOAT ["float";"real";"double";"float4";"float8";"int1";"int2";"int3";"int4";"int8"];
106 all T_BLOB ["blob";"varbinary";"tinyblob";"mediumblob";"longblob"];
107 all T_TEXT ["text";"char";"varchar";"tinytext";"mediumtext";"longtext"];
108 all T_DATETIME ["datetime";"date";"time";"timestamp";"year";];
112 Q: Why not convert all input to lowercase before lexing?
113 A: Sometimes SQL is case-sensitive, also string contents should be preserved
116 let keywords = List.map (fun (k,v) -> (String.lowercase k, v)) keywords
119 let str = String.lowercase str in
120 try List.assoc str keywords with Not_found -> IDENT str
123 let digit = ['0'-'9']
124 let alpha = ['a'-'z' 'A'-'Z']
125 let ident = (alpha) (alpha | digit | '_' )*
126 let wsp = [' ' '\r' '\t']
128 rule ruleStatement props = parse
129 | ['\n' ' ' '\r' '\t']+ { ruleStatement props lexbuf }
131 | "--" wsp* "[sqlgg]" wsp+ (ident+ as n) wsp* "=" wsp* ([^'\n']* as v) '\n'
133 ruleStatement (Props.set props n v) lexbuf
135 | "--" wsp* "@" (ident+ as name) [^'\n']* '\n'
137 ruleStatement (Props.set props "name" name) lexbuf
139 | "--" { store ""; ignore (ruleComment lexbuf); ruleStatement props lexbuf }
140 | alpha [^ ';']+ as stmt ';' { Some (stmt,props) }
144 | wsp { ruleMain lexbuf }
145 (* update line number *)
146 | '\n' { advance_line lexbuf; ruleMain lexbuf}
153 | "--" | "//" { store ""; ignore (ruleComment lexbuf); ruleMain lexbuf }
163 | "/" | "%" | ">" | ">=" | "<=" | "<" | "&" | "|" { NUM_BINARY_OP }
165 | "?" { PARAM Stmt.Next }
166 | "?" (digit+ as str) { PARAM (Stmt.Numbered (int_of_string str)) }
167 | [':' '@'] (ident as str) { PARAM (Stmt.Named str) }
169 | '"' { IDENT (ruleInQuotes "" lexbuf) }
170 | "'" { TEXT (ruleInSingleQuotes "" lexbuf) }
171 | ['x' 'X'] "'" { BLOB (ruleInSingleQuotes "" lexbuf) }
173 | ident as str { get_ident str }
174 | digit+ as str { INTEGER (int_of_string str) }
176 | _ { error lexbuf "ruleMain" }
178 ruleInQuotes acc = parse
180 | eof { error lexbuf "no terminating quote" }
181 | '\n' { advance_line lexbuf; error lexbuf "EOL before terminating quote" }
182 | "\"\"" { ruleInQuotes (acc ^ "\"") lexbuf }
183 | [^'"' '\n']+ { ruleInQuotes (acc ^ Lexing.lexeme lexbuf) lexbuf }
184 | _ { error lexbuf "ruleInQuotes" }
186 ruleInSingleQuotes acc = parse
188 | eof { error lexbuf "no terminating single quote" }
189 | '\n' { advance_line lexbuf; error lexbuf "EOL before terminating single quote" }
190 | "''" { ruleInSingleQuotes (acc ^ "'") lexbuf }
191 | [^'\'' '\n']+ { ruleInSingleQuotes (acc ^ Lexing.lexeme lexbuf) lexbuf }
192 | _ { error lexbuf "ruleInSingleQuotes" }
195 | '\n' { advance_line lexbuf; !curStr }
197 | [^'\n']+ { store (Lexing.lexeme lexbuf); ruleComment lexbuf; }
198 | _ { error lexbuf "ruleComment"; }
202 let parse_rule lexbuf = ruleMain lexbuf