7 let error buf callerID =
8 Error.report "Lexer error : %s" callerID;
10 raise Parsing.Parse_error
12 let pos lexbuf = (lexeme_start lexbuf, lexeme_end lexbuf)
14 let advance_line_pos pos =
15 { pos with pos_lnum = pos.pos_lnum + 1; pos_bol = pos.pos_cnum; }
17 let advance_line lexbuf =
18 lexbuf.lex_curr_p <- advance_line_pos lexbuf.lex_curr_p
20 (* use Map or Hashtbl ? *)
40 "precision",PRECISION;
48 "character",CHARACTER;
79 "intersect",INTERSECT;
81 "temporary",TEMPORARY;
88 "references",REFERENCES;
92 "timestamp",TIMESTAMP;
98 "constraint",CONSTRAINT;
108 let all token l = k := !k @ List.map (fun x -> x,token) l in
109 all (FUNCTION T.Int) ["max"; "min"; "length"; "random";"count";"sum";"avg"];
110 all (FUNCTION T.Text) ["concat";"lower";"upper"];
111 all (FUNCTION T.Any) ["coalesce"];
112 all DATETIME_FUNC ["current_date";"current_timestamp";"current_time";"localtime";"localtimestamp";"now";"unix_timestamp"];
113 all CONFLICT_ALGO ["ignore"; "replace"; "abort"; "fail"; "rollback"];
114 all JOIN_TYPE1 ["left";"right";"full"];
115 all JOIN_TYPE2 ["inner";"outer"];
116 all LIKE_OP ["glob";"regexp";"match"];
117 all AUTOINCREMENT ["autoincrement";"auto_increment"];
118 (* standard built-in types
119 CHARACTER, CHARACTER VARYING, CHARACTER LARGE OBJECT,
120 BINARY, BINARY VARYING, BINARY LARGE OBJECT,
121 NUMERIC, DECIMAL, INTEGER, SMALLINT, BIGINT,
122 FLOAT, REAL, DOUBLE PRECISION,
124 DATE, TIME, TIMESTAMP, INTERVAL
126 all T_INTEGER ["integer";"int";"smallint";"bigint";"tinyint";"mediumint";"middleint";"serial";"identity"];
127 all T_INTEGER ["numeric";"decimal";"dec";"fixed"];
128 all T_INTEGER ["number"]; (* oracle *)
129 all T_BOOLEAN ["bool";"boolean"];
130 all T_FLOAT ["float";"real";"double";"float4";"float8";"int1";"int2";"int3";"int4";"int8"];
131 all T_BLOB ["blob";"varbinary";"tinyblob";"mediumblob";"longblob"];
132 all T_TEXT ["text";"char";"varchar";"tinytext";"mediumtext";"longtext"];
133 all T_TEXT ["varchar2"]; (* oracle *)
134 all T_DATETIME ["datetime";"year";];
138 Q: Why not convert all input to lowercase before lexing?
139 A: Sometimes SQL is case-sensitive, also string contents should be preserved
142 let keywords = List.map (fun (k,v) -> (String.lowercase k, v)) keywords
145 let str = String.lowercase str in
146 try List.assoc str keywords with Not_found -> IDENT str
149 let digit = ['0'-'9']
150 let alpha = ['a'-'z' 'A'-'Z']
151 let ident = (alpha) (alpha | digit | '_' )*
152 let wsp = [' ' '\r' '\t']
153 let cmnt = "--" | "//" | "#"
155 rule ruleStatement props = parse
156 | ['\n' ' ' '\r' '\t']+ { ruleStatement props lexbuf }
158 | cmnt wsp* "[sqlgg]" wsp+ (ident+ as n) wsp* "=" wsp* ([^'\n']* as v) '\n'
160 ruleStatement (Props.set props n v) lexbuf
162 | cmnt wsp* "@" (ident+ as name) [^'\n']* '\n'
164 ruleStatement (Props.set props "name" name) lexbuf
166 | cmnt { ignore (ruleComment "" lexbuf); ruleStatement props lexbuf }
167 | "/*" { ignore (ruleCommentMulti "" lexbuf); ruleStatement props lexbuf }
168 | alpha [^ ';']+ as stmt ';' { Some (stmt,props) }
172 | wsp { ruleMain lexbuf }
173 (* update line number *)
174 | '\n' { advance_line lexbuf; ruleMain lexbuf}
181 | cmnt { ignore (ruleComment "" lexbuf); ruleMain lexbuf }
182 | "/*" { ignore (ruleCommentMulti "" lexbuf); ruleMain lexbuf }
192 | "/" | "%" | "|" | "&" { NUM_BINARY_OP }
193 | ">" | ">=" | "<=" | "<" | "<>" { COMPARISON_OP }
195 | "?" { PARAM (None,pos lexbuf) }
196 | [':' '@'] (ident as str) { PARAM (Some str,pos lexbuf) }
198 | '"' { IDENT (ruleInQuotes "" lexbuf) }
199 | "'" { TEXT (ruleInSingleQuotes "" lexbuf) }
200 | "`" { IDENT (ruleInBackQuotes "" lexbuf) }
201 | "[" { IDENT (ruleInBrackets "" lexbuf) }
202 | ['x' 'X'] "'" { BLOB (ruleInSingleQuotes "" lexbuf) }
204 | ident as str { get_ident str }
205 | digit+ as str { INTEGER (int_of_string str) }
206 | digit+ '.' digit+ as str { FLOAT (float_of_string str) }
208 | _ { error lexbuf "ruleMain" }
210 (* FIXME factor out all that ruleIn* rules *)
211 ruleInQuotes acc = parse
213 | eof { error lexbuf "no terminating quote" }
214 | '\n' { advance_line lexbuf; error lexbuf "EOL before terminating quote" }
215 | "\"\"" { ruleInQuotes (acc ^ "\"") lexbuf }
216 | [^'"' '\n']+ { ruleInQuotes (acc ^ lexeme lexbuf) lexbuf }
217 | _ { error lexbuf "ruleInQuotes" }
219 ruleInBrackets acc = parse
221 | eof { error lexbuf "no terminating bracket" }
222 | '\n' { advance_line lexbuf; error lexbuf "EOL before terminating bracket" }
223 (* | "\"\"" { ruleInQuotes (acc ^ "\"") lexbuf } *)
224 | [^']' '\n']+ { ruleInBrackets (acc ^ lexeme lexbuf) lexbuf }
225 | _ { error lexbuf "ruleInBrackets" }
227 ruleInSingleQuotes acc = parse
229 | eof { error lexbuf "no terminating single quote" }
230 | '\n' { advance_line lexbuf; error lexbuf "EOL before terminating single quote" }
231 | "''" { ruleInSingleQuotes (acc ^ "'") lexbuf }
232 | [^'\'' '\n']+ { ruleInSingleQuotes (acc ^ lexeme lexbuf) lexbuf }
233 | _ { error lexbuf "ruleInSingleQuotes" }
235 ruleInBackQuotes acc = parse
237 | eof { error lexbuf "no terminating back quote" }
238 | '\n' { advance_line lexbuf; error lexbuf "EOL before terminating back quote" }
239 | "``" { ruleInBackQuotes (acc ^ "`") lexbuf }
240 | [^'`' '\n']+ { ruleInBackQuotes (acc ^ lexeme lexbuf) lexbuf }
241 | _ { error lexbuf "ruleInBackQuotes" }
243 ruleComment acc = parse
244 | '\n' { advance_line lexbuf; acc }
246 | [^'\n']+ { let s = lexeme lexbuf in ruleComment (acc ^ s) lexbuf; }
247 | _ { error lexbuf "ruleComment"; }
249 ruleCommentMulti acc = parse
250 | '\n' { advance_line lexbuf; ruleCommentMulti (acc ^ "\n") lexbuf }
252 | [^'\n']+ { let s = lexeme lexbuf in ruleCommentMulti (acc ^ s) lexbuf }
253 | _ { error lexbuf "ruleCommentMulti" }
257 let parse_rule lexbuf =
258 let module P = Parser_state in
259 let token = ruleMain lexbuf in
263 (* Printf.eprintf "ignored: %s\n" (lexeme lexbuf); *)
264 if (token = EOF) then token else IGNORED