10 prerr_endline (sprintf "Lexer error : %s" callerID);
12 raise Parsing.Parse_error
14 let pos lexbuf = (lexeme_start lexbuf, lexeme_end lexbuf)
16 let advance_line_pos pos =
17 { pos with pos_lnum = pos.pos_lnum + 1; pos_bol = pos.pos_cnum; }
19 let advance_line lexbuf =
20 lexbuf.lex_curr_p <- advance_line_pos lexbuf.lex_curr_p
39 "precision",PRECISION;
47 "character",CHARACTER;
78 "intersect",INTERSECT;
80 "temporary",TEMPORARY;
87 "references",REFERENCES;
91 "timestamp",TIMESTAMP;
97 "constraint",CONSTRAINT;
124 "interval", INTERVAL;
125 "microsecond", MICROSECOND;
134 "second_microsecond", SECOND_MICROSECOND;
135 "minute_microsecond", MINUTE_MICROSECOND;
136 "minute_second", MINUTE_SECOND;
137 "hour_microsecond", HOUR_MICROSECOND;
138 "hour_second", HOUR_SECOND;
139 "hour_minute", HOUR_MINUTE;
140 "day_microsecond", DAY_MICROSECOND;
141 "day_second", DAY_SECOND;
142 "day_minute", DAY_MINUTE;
143 "day_hour", DAY_HOUR;
144 "year_month", YEAR_MONTH;
147 "duplicate", DUPLICATE;
149 let all token l = k := !k @ List.map (fun x -> x,token) l in
150 let func x l = all (FUNCTION x) l in
151 func T.Agg ["max";"min";"sum"];
152 func T.(Group (Int,true)) ["count"];
153 func T.(Group (Float,false)) ["avg"];
154 func T.(fixed Text [Text;Text]) ["strftime"];
155 func T.(fixed Text [Text]) ["lower";"upper"];
156 func T.(Ret Text) ["concat"];
157 func T.(Ret Any) ["coalesce"];
158 func T.(Ret Int) ["length"; "random";"unix_timestamp";"least";"greatest"];
159 all DATETIME_FUNC ["current_date";"current_timestamp";"current_time";"localtime";"localtimestamp";"now";];
160 all DATETIME_FUNC ["getdate"]; (* mssql? *)
161 all CONFLICT_ALGO ["ignore"; "abort"; "fail"; "rollback"];
162 all JOIN_TYPE1 ["left";"right";"full"];
163 all JOIN_TYPE2 ["inner";"outer"];
164 all LIKE_OP ["glob";"regexp";"match"];
165 all AUTOINCREMENT ["autoincrement";"auto_increment"];
166 (* standard built-in types
167 CHARACTER, CHARACTER VARYING, CHARACTER LARGE OBJECT,
168 BINARY, BINARY VARYING, BINARY LARGE OBJECT,
169 NUMERIC, DECIMAL, INTEGER, SMALLINT, BIGINT,
170 FLOAT, REAL, DOUBLE PRECISION,
172 DATE, TIME, TIMESTAMP, INTERVAL
174 all T_INTEGER ["integer";"int";"smallint";"bigint";"tinyint";"mediumint";"middleint";"serial";"identity"];
175 all T_INTEGER ["numeric";"decimal";"dec";"fixed"];
176 all T_INTEGER ["number"]; (* oracle *)
177 all T_BOOLEAN ["bool";"boolean"];
178 all T_FLOAT ["float";"real";"double";"float4";"float8";"int1";"int2";"int3";"int4";"int8"];
179 all T_BLOB ["blob";"varbinary";"tinyblob";"mediumblob";"longblob"];
180 all T_TEXT ["text";"char";"varchar";"tinytext";"mediumtext";"longtext"];
181 all T_TEXT ["varchar2"]; (* oracle *)
182 all T_DATETIME ["datetime"];
186 Q: Why not convert all input to lowercase before lexing?
187 A: Sometimes SQL is case-sensitive, also string contents should be preserved
190 module Keywords = Map.Make(String)
194 let k = String.lowercase k in
195 if Keywords.mem k map then
196 failwith (sprintf "Lexeme %s is already associated with keyword." k)
200 List.fold_left add Keywords.empty keywords
202 (* FIXME case sensitivity??! *)
205 let str = String.lowercase str in
206 try Keywords.find str keywords with Not_found -> IDENT str
208 let ident str = IDENT (String.lowercase str)
210 let as_literal ch s =
211 let s = String.replace_chars (fun x -> String.make (if x = ch then 2 else 1) x) s in
212 sprintf "%c%s%c" ch s ch
215 let digit = ['0'-'9']
216 let alpha = ['a'-'z' 'A'-'Z']
217 let ident = (alpha) (alpha | digit | '_' )*
218 let wsp = [' ' '\r' '\t']
219 let cmnt = "--" | "//" | "#"
221 (* extract separate statements *)
222 rule ruleStatement = parse
223 | ['\n' ' ' '\r' '\t']+ as tok { `Space tok }
224 | cmnt wsp* "[sqlgg]" wsp+ (ident+ as n) wsp* "=" wsp* ([^'\n']* as v) '\n' { `Prop (n,v) }
225 | cmnt wsp* "@" (ident+ as name) [^'\n']* '\n' { `Prop ("name",name) }
226 | '"' { let s = ruleInQuotes "" lexbuf in `Token (as_literal '"' s) }
227 | "'" { let s = ruleInSingleQuotes "" lexbuf in `Token (as_literal '\'' s) }
228 | cmnt as s { `Comment (s ^ ruleComment "" lexbuf) }
229 | "/*" { `Comment ("/*" ^ ruleCommentMulti "" lexbuf ^ "*/") }
231 | [^ ';'] as c { `Char c }
234 (* extract tail of the input *)
237 | _* as str { ruleTail (acc ^ str) lexbuf }
240 | wsp { ruleMain lexbuf }
241 (* update line number *)
242 | '\n' { advance_line lexbuf; ruleMain lexbuf}
249 | cmnt { ignore (ruleComment "" lexbuf); ruleMain lexbuf }
250 | "/*" { ignore (ruleCommentMulti "" lexbuf); ruleMain lexbuf }
260 | "/" | "%" { NUM_DIV_OP }
261 | "<<" | ">>" | "|" | "&" { NUM_BIT_OP }
262 | ">" | ">=" | "<=" | "<" { NUM_CMP_OP }
263 | "<>" | "!=" | "==" { NUM_EQ_OP }
265 | "?" { PARAM (None,pos lexbuf) }
266 | [':' '@'] (ident as str) { PARAM (Some str,pos lexbuf) }
268 | '"' { ident (ruleInQuotes "" lexbuf) }
269 | "'" { TEXT (ruleInSingleQuotes "" lexbuf) }
270 | "`" { ident (ruleInBackQuotes "" lexbuf) }
271 | "[" { ident (ruleInBrackets "" lexbuf) }
272 | ['x' 'X'] "'" { BLOB (ruleInSingleQuotes "" lexbuf) }
274 | ident as str { get_ident str }
275 | digit+ as str { INTEGER (int_of_string str) }
276 | digit+ '.' digit+ as str { FLOAT (float_of_string str) }
278 | _ { error lexbuf "ruleMain" }
280 (* FIXME factor out all that ruleIn* rules *)
281 ruleInQuotes acc = parse
283 | eof { error lexbuf "no terminating quote" }
284 | '\n' { advance_line lexbuf; error lexbuf "EOL before terminating quote" }
285 | "\"\"" { ruleInQuotes (acc^"\"") lexbuf }
286 | [^'"' '\n']+ as s { ruleInQuotes (acc^s) lexbuf }
287 | _ { error lexbuf "ruleInQuotes" }
289 ruleInBrackets acc = parse
291 | eof { error lexbuf "no terminating bracket" }
292 | '\n' { advance_line lexbuf; error lexbuf "EOL before terminating bracket" }
293 (* | "\"\"" { ruleInQuotes (acc ^ "\"") lexbuf } *)
294 | [^']' '\n']+ { ruleInBrackets (acc ^ lexeme lexbuf) lexbuf }
295 | _ { error lexbuf "ruleInBrackets" }
297 ruleInSingleQuotes acc = parse
299 | eof { error lexbuf "no terminating single quote" }
300 | '\n' { advance_line lexbuf; error lexbuf "EOL before terminating single quote" }
301 | "''" { ruleInSingleQuotes (acc ^ "'") lexbuf }
302 | [^'\'' '\n']+ { ruleInSingleQuotes (acc ^ lexeme lexbuf) lexbuf }
303 | _ { error lexbuf "ruleInSingleQuotes" }
305 ruleInBackQuotes acc = parse
307 | eof { error lexbuf "no terminating back quote" }
308 | '\n' { advance_line lexbuf; error lexbuf "EOL before terminating back quote" }
309 | "``" { ruleInBackQuotes (acc ^ "`") lexbuf }
310 | [^'`' '\n']+ { ruleInBackQuotes (acc ^ lexeme lexbuf) lexbuf }
311 | _ { error lexbuf "ruleInBackQuotes" }
313 ruleComment acc = parse
314 | '\n' { advance_line lexbuf; acc }
316 | [^'\n']+ { let s = lexeme lexbuf in ruleComment (acc ^ s) lexbuf; }
317 | _ { error lexbuf "ruleComment"; }
319 ruleCommentMulti acc = parse
320 | '\n' { advance_line lexbuf; ruleCommentMulti (acc ^ "\n") lexbuf }
323 | [^'\n' '*']+ { let s = lexeme lexbuf in ruleCommentMulti (acc ^ s) lexbuf }
324 | _ { error lexbuf "ruleCommentMulti" }
328 let parse_rule lexbuf =
329 let module P = Parser_state in
330 let token = ruleMain lexbuf in
334 (* eprintf "ignored: %s\n" (lexeme lexbuf); *)
335 if (token = EOF) then token else IGNORED