9 Error.log "Lexer error : %s" callerID;
11 raise Parsing.Parse_error
13 let pos lexbuf = (lexeme_start lexbuf, lexeme_end lexbuf)
15 let advance_line_pos pos =
16 { pos with pos_lnum = pos.pos_lnum + 1; pos_bol = pos.pos_cnum; }
18 let advance_line lexbuf =
19 lexbuf.lex_curr_p <- advance_line_pos lexbuf.lex_curr_p
40 "precision",PRECISION;
48 "character",CHARACTER;
79 "intersect",INTERSECT;
81 "temporary",TEMPORARY;
88 "references",REFERENCES;
92 "timestamp",TIMESTAMP;
98 "constraint",CONSTRAINT;
116 let all token l = k := !k @ List.map (fun x -> x,token) l in
117 all (FUNCTION (T.Int,true)) ["max"; "min"; "count";"sum";"avg"];
118 all (FUNCTION (T.Int,false)) ["length"; "random";"unix_timestamp"];
119 all (FUNCTION (T.Int,false)) ["least"; "greatest"];
120 all (FUNCTION (T.Text,false)) ["concat";"lower";"upper"];
121 all (FUNCTION (T.Any,false)) ["coalesce"];
122 all DATETIME_FUNC ["current_date";"current_timestamp";"current_time";"localtime";"localtimestamp";"now";];
123 all DATETIME_FUNC ["getdate"]; (* mssql? *)
124 all CONFLICT_ALGO ["ignore"; "abort"; "fail"; "rollback"];
125 all JOIN_TYPE1 ["left";"right";"full"];
126 all JOIN_TYPE2 ["inner";"outer"];
127 all LIKE_OP ["glob";"regexp";"match"];
128 all AUTOINCREMENT ["autoincrement";"auto_increment"];
129 (* standard built-in types
130 CHARACTER, CHARACTER VARYING, CHARACTER LARGE OBJECT,
131 BINARY, BINARY VARYING, BINARY LARGE OBJECT,
132 NUMERIC, DECIMAL, INTEGER, SMALLINT, BIGINT,
133 FLOAT, REAL, DOUBLE PRECISION,
135 DATE, TIME, TIMESTAMP, INTERVAL
137 all T_INTEGER ["integer";"int";"smallint";"bigint";"tinyint";"mediumint";"middleint";"serial";"identity"];
138 all T_INTEGER ["numeric";"decimal";"dec";"fixed"];
139 all T_INTEGER ["number"]; (* oracle *)
140 all T_BOOLEAN ["bool";"boolean"];
141 all T_FLOAT ["float";"real";"double";"float4";"float8";"int1";"int2";"int3";"int4";"int8"];
142 all T_BLOB ["blob";"varbinary";"tinyblob";"mediumblob";"longblob"];
143 all T_TEXT ["text";"char";"varchar";"tinytext";"mediumtext";"longtext"];
144 all T_TEXT ["varchar2"]; (* oracle *)
145 all T_DATETIME ["datetime";"year";];
149 Q: Why not convert all input to lowercase before lexing?
150 A: Sometimes SQL is case-sensitive, also string contents should be preserved
153 module Keywords = Map.Make(String)
157 let k = String.lowercase k in
158 if Keywords.mem k map then
159 failwith (Printf.sprintf "Lexeme %s is already associated with keyword." k)
163 List.fold_left add Keywords.empty keywords
165 (* FIXME case sensitivity??! *)
168 let str = String.lowercase str in
169 try Keywords.find str keywords with Not_found -> IDENT str
171 let ident str = IDENT (String.lowercase str)
173 let as_literal ch s =
174 let s = String.replace_chars (fun x -> String.make (if x = ch then 2 else 1) x) s in
175 Printf.sprintf "%c%s%c" ch s ch
178 let digit = ['0'-'9']
179 let alpha = ['a'-'z' 'A'-'Z']
180 let ident = (alpha) (alpha | digit | '_' )*
181 let wsp = [' ' '\r' '\t']
182 let cmnt = "--" | "//" | "#"
184 (* extract separate statements *)
185 rule ruleStatement = parse
186 | ['\n' ' ' '\r' '\t']+ as tok { `Space tok }
187 | cmnt wsp* "[sqlgg]" wsp+ (ident+ as n) wsp* "=" wsp* ([^'\n']* as v) '\n' { `Prop (n,v) }
188 | cmnt wsp* "@" (ident+ as name) [^'\n']* '\n' { `Prop ("name",name) }
189 | '"' { let s = ruleInQuotes "" lexbuf in `Token (as_literal '"' s) }
190 | "'" { let s = ruleInSingleQuotes "" lexbuf in `Token (as_literal '\'' s) }
191 | cmnt as s { `Comment (s ^ ruleComment "" lexbuf) }
192 | "/*" { `Comment ("/*" ^ ruleCommentMulti "" lexbuf ^ "*/") }
194 | [^ ';'] as c { `Char c }
197 (* extract tail of the input *)
200 | _* as str { ruleTail (acc ^ str) lexbuf }
203 | wsp { ruleMain lexbuf }
204 (* update line number *)
205 | '\n' { advance_line lexbuf; ruleMain lexbuf}
212 | cmnt { ignore (ruleComment "" lexbuf); ruleMain lexbuf }
213 | "/*" { ignore (ruleCommentMulti "" lexbuf); ruleMain lexbuf }
223 | "/" | "%" { NUM_DIV_OP }
224 | "<<" | ">>" | "|" | "&" { NUM_BIT_OP }
225 | ">" | ">=" | "<=" | "<" { NUM_CMP_OP }
226 | "<>" | "!=" | "==" { NUM_EQ_OP }
228 | "?" { PARAM (None,pos lexbuf) }
229 | [':' '@'] (ident as str) { PARAM (Some str,pos lexbuf) }
231 | '"' { ident (ruleInQuotes "" lexbuf) }
232 | "'" { TEXT (ruleInSingleQuotes "" lexbuf) }
233 | "`" { ident (ruleInBackQuotes "" lexbuf) }
234 | "[" { ident (ruleInBrackets "" lexbuf) }
235 | ['x' 'X'] "'" { BLOB (ruleInSingleQuotes "" lexbuf) }
237 | ident as str { get_ident str }
238 | digit+ as str { INTEGER (int_of_string str) }
239 | digit+ '.' digit+ as str { FLOAT (float_of_string str) }
241 | _ { error lexbuf "ruleMain" }
243 (* FIXME factor out all that ruleIn* rules *)
244 ruleInQuotes acc = parse
246 | eof { error lexbuf "no terminating quote" }
247 | '\n' { advance_line lexbuf; error lexbuf "EOL before terminating quote" }
248 | "\"\"" { ruleInQuotes (acc^"\"") lexbuf }
249 | [^'"' '\n']+ as s { ruleInQuotes (acc^s) lexbuf }
250 | _ { error lexbuf "ruleInQuotes" }
252 ruleInBrackets acc = parse
254 | eof { error lexbuf "no terminating bracket" }
255 | '\n' { advance_line lexbuf; error lexbuf "EOL before terminating bracket" }
256 (* | "\"\"" { ruleInQuotes (acc ^ "\"") lexbuf } *)
257 | [^']' '\n']+ { ruleInBrackets (acc ^ lexeme lexbuf) lexbuf }
258 | _ { error lexbuf "ruleInBrackets" }
260 ruleInSingleQuotes acc = parse
262 | eof { error lexbuf "no terminating single quote" }
263 | '\n' { advance_line lexbuf; error lexbuf "EOL before terminating single quote" }
264 | "''" { ruleInSingleQuotes (acc ^ "'") lexbuf }
265 | [^'\'' '\n']+ { ruleInSingleQuotes (acc ^ lexeme lexbuf) lexbuf }
266 | _ { error lexbuf "ruleInSingleQuotes" }
268 ruleInBackQuotes acc = parse
270 | eof { error lexbuf "no terminating back quote" }
271 | '\n' { advance_line lexbuf; error lexbuf "EOL before terminating back quote" }
272 | "``" { ruleInBackQuotes (acc ^ "`") lexbuf }
273 | [^'`' '\n']+ { ruleInBackQuotes (acc ^ lexeme lexbuf) lexbuf }
274 | _ { error lexbuf "ruleInBackQuotes" }
276 ruleComment acc = parse
277 | '\n' { advance_line lexbuf; acc }
279 | [^'\n']+ { let s = lexeme lexbuf in ruleComment (acc ^ s) lexbuf; }
280 | _ { error lexbuf "ruleComment"; }
282 ruleCommentMulti acc = parse
283 | '\n' { advance_line lexbuf; ruleCommentMulti (acc ^ "\n") lexbuf }
286 | [^'\n' '*']+ { let s = lexeme lexbuf in ruleCommentMulti (acc ^ s) lexbuf }
287 | _ { error lexbuf "ruleCommentMulti" }
291 let parse_rule lexbuf =
292 let module P = Parser_state in
293 let token = ruleMain lexbuf in
297 (* Printf.eprintf "ignored: %s\n" (lexeme lexbuf); *)
298 if (token = EOF) then token else IGNORED