10 prerr_endline (sprintf "Lexer error : %s" callerID);
12 raise Parsing.Parse_error
14 let pos lexbuf = (lexeme_start lexbuf, lexeme_end lexbuf)
16 let advance_line_pos pos =
17 { pos with pos_lnum = pos.pos_lnum + 1; pos_bol = pos.pos_cnum; }
19 let advance_line lexbuf =
20 lexbuf.lex_curr_p <- advance_line_pos lexbuf.lex_curr_p
41 "precision",PRECISION;
49 "character",CHARACTER;
80 "intersect",INTERSECT;
82 "temporary",TEMPORARY;
89 "references",REFERENCES;
93 "timestamp",TIMESTAMP;
99 "constraint",CONSTRAINT;
117 let all token l = k := !k @ List.map (fun x -> x,token) l in
118 all (FUNCTION (T.Int,true)) ["max"; "min"; "count";"sum";"avg"];
119 all (FUNCTION (T.Int,false)) ["length"; "random";"unix_timestamp"];
120 all (FUNCTION (T.Int,false)) ["least"; "greatest"];
121 all (FUNCTION (T.Text,false)) ["concat";"lower";"upper"];
122 all (FUNCTION (T.Any,false)) ["coalesce"];
123 all DATETIME_FUNC ["current_date";"current_timestamp";"current_time";"localtime";"localtimestamp";"now";];
124 all DATETIME_FUNC ["getdate"]; (* mssql? *)
125 all CONFLICT_ALGO ["ignore"; "abort"; "fail"; "rollback"];
126 all JOIN_TYPE1 ["left";"right";"full"];
127 all JOIN_TYPE2 ["inner";"outer"];
128 all LIKE_OP ["glob";"regexp";"match"];
129 all AUTOINCREMENT ["autoincrement";"auto_increment"];
130 (* standard built-in types
131 CHARACTER, CHARACTER VARYING, CHARACTER LARGE OBJECT,
132 BINARY, BINARY VARYING, BINARY LARGE OBJECT,
133 NUMERIC, DECIMAL, INTEGER, SMALLINT, BIGINT,
134 FLOAT, REAL, DOUBLE PRECISION,
136 DATE, TIME, TIMESTAMP, INTERVAL
138 all T_INTEGER ["integer";"int";"smallint";"bigint";"tinyint";"mediumint";"middleint";"serial";"identity"];
139 all T_INTEGER ["numeric";"decimal";"dec";"fixed"];
140 all T_INTEGER ["number"]; (* oracle *)
141 all T_BOOLEAN ["bool";"boolean"];
142 all T_FLOAT ["float";"real";"double";"float4";"float8";"int1";"int2";"int3";"int4";"int8"];
143 all T_BLOB ["blob";"varbinary";"tinyblob";"mediumblob";"longblob"];
144 all T_TEXT ["text";"char";"varchar";"tinytext";"mediumtext";"longtext"];
145 all T_TEXT ["varchar2"]; (* oracle *)
146 all T_DATETIME ["datetime";"year";];
150 Q: Why not convert all input to lowercase before lexing?
151 A: Sometimes SQL is case-sensitive, also string contents should be preserved
154 module Keywords = Map.Make(String)
158 let k = String.lowercase k in
159 if Keywords.mem k map then
160 failwith (sprintf "Lexeme %s is already associated with keyword." k)
164 List.fold_left add Keywords.empty keywords
166 (* FIXME case sensitivity??! *)
169 let str = String.lowercase str in
170 try Keywords.find str keywords with Not_found -> IDENT str
172 let ident str = IDENT (String.lowercase str)
174 let as_literal ch s =
175 let s = String.replace_chars (fun x -> String.make (if x = ch then 2 else 1) x) s in
176 sprintf "%c%s%c" ch s ch
179 let digit = ['0'-'9']
180 let alpha = ['a'-'z' 'A'-'Z']
181 let ident = (alpha) (alpha | digit | '_' )*
182 let wsp = [' ' '\r' '\t']
183 let cmnt = "--" | "//" | "#"
185 (* extract separate statements *)
186 rule ruleStatement = parse
187 | ['\n' ' ' '\r' '\t']+ as tok { `Space tok }
188 | cmnt wsp* "[sqlgg]" wsp+ (ident+ as n) wsp* "=" wsp* ([^'\n']* as v) '\n' { `Prop (n,v) }
189 | cmnt wsp* "@" (ident+ as name) [^'\n']* '\n' { `Prop ("name",name) }
190 | '"' { let s = ruleInQuotes "" lexbuf in `Token (as_literal '"' s) }
191 | "'" { let s = ruleInSingleQuotes "" lexbuf in `Token (as_literal '\'' s) }
192 | cmnt as s { `Comment (s ^ ruleComment "" lexbuf) }
193 | "/*" { `Comment ("/*" ^ ruleCommentMulti "" lexbuf ^ "*/") }
195 | [^ ';'] as c { `Char c }
198 (* extract tail of the input *)
201 | _* as str { ruleTail (acc ^ str) lexbuf }
204 | wsp { ruleMain lexbuf }
205 (* update line number *)
206 | '\n' { advance_line lexbuf; ruleMain lexbuf}
213 | cmnt { ignore (ruleComment "" lexbuf); ruleMain lexbuf }
214 | "/*" { ignore (ruleCommentMulti "" lexbuf); ruleMain lexbuf }
224 | "/" | "%" { NUM_DIV_OP }
225 | "<<" | ">>" | "|" | "&" { NUM_BIT_OP }
226 | ">" | ">=" | "<=" | "<" { NUM_CMP_OP }
227 | "<>" | "!=" | "==" { NUM_EQ_OP }
229 | "?" { PARAM (None,pos lexbuf) }
230 | [':' '@'] (ident as str) { PARAM (Some str,pos lexbuf) }
232 | '"' { ident (ruleInQuotes "" lexbuf) }
233 | "'" { TEXT (ruleInSingleQuotes "" lexbuf) }
234 | "`" { ident (ruleInBackQuotes "" lexbuf) }
235 | "[" { ident (ruleInBrackets "" lexbuf) }
236 | ['x' 'X'] "'" { BLOB (ruleInSingleQuotes "" lexbuf) }
238 | ident as str { get_ident str }
239 | digit+ as str { INTEGER (int_of_string str) }
240 | digit+ '.' digit+ as str { FLOAT (float_of_string str) }
242 | _ { error lexbuf "ruleMain" }
244 (* FIXME factor out all that ruleIn* rules *)
245 ruleInQuotes acc = parse
247 | eof { error lexbuf "no terminating quote" }
248 | '\n' { advance_line lexbuf; error lexbuf "EOL before terminating quote" }
249 | "\"\"" { ruleInQuotes (acc^"\"") lexbuf }
250 | [^'"' '\n']+ as s { ruleInQuotes (acc^s) lexbuf }
251 | _ { error lexbuf "ruleInQuotes" }
253 ruleInBrackets acc = parse
255 | eof { error lexbuf "no terminating bracket" }
256 | '\n' { advance_line lexbuf; error lexbuf "EOL before terminating bracket" }
257 (* | "\"\"" { ruleInQuotes (acc ^ "\"") lexbuf } *)
258 | [^']' '\n']+ { ruleInBrackets (acc ^ lexeme lexbuf) lexbuf }
259 | _ { error lexbuf "ruleInBrackets" }
261 ruleInSingleQuotes acc = parse
263 | eof { error lexbuf "no terminating single quote" }
264 | '\n' { advance_line lexbuf; error lexbuf "EOL before terminating single quote" }
265 | "''" { ruleInSingleQuotes (acc ^ "'") lexbuf }
266 | [^'\'' '\n']+ { ruleInSingleQuotes (acc ^ lexeme lexbuf) lexbuf }
267 | _ { error lexbuf "ruleInSingleQuotes" }
269 ruleInBackQuotes acc = parse
271 | eof { error lexbuf "no terminating back quote" }
272 | '\n' { advance_line lexbuf; error lexbuf "EOL before terminating back quote" }
273 | "``" { ruleInBackQuotes (acc ^ "`") lexbuf }
274 | [^'`' '\n']+ { ruleInBackQuotes (acc ^ lexeme lexbuf) lexbuf }
275 | _ { error lexbuf "ruleInBackQuotes" }
277 ruleComment acc = parse
278 | '\n' { advance_line lexbuf; acc }
280 | [^'\n']+ { let s = lexeme lexbuf in ruleComment (acc ^ s) lexbuf; }
281 | _ { error lexbuf "ruleComment"; }
283 ruleCommentMulti acc = parse
284 | '\n' { advance_line lexbuf; ruleCommentMulti (acc ^ "\n") lexbuf }
287 | [^'\n' '*']+ { let s = lexeme lexbuf in ruleCommentMulti (acc ^ s) lexbuf }
288 | _ { error lexbuf "ruleCommentMulti" }
292 let parse_rule lexbuf =
293 let module P = Parser_state in
294 let token = ruleMain lexbuf in
298 (* eprintf "ignored: %s\n" (lexeme lexbuf); *)
299 if (token = EOF) then token else IGNORED