9 Error.log "Lexer error : %s" callerID;
11 raise Parsing.Parse_error
13 let pos lexbuf = (lexeme_start lexbuf, lexeme_end lexbuf)
15 let advance_line_pos pos =
16 { pos with pos_lnum = pos.pos_lnum + 1; pos_bol = pos.pos_cnum; }
18 let advance_line lexbuf =
19 lexbuf.lex_curr_p <- advance_line_pos lexbuf.lex_curr_p
40 "precision",PRECISION;
48 "character",CHARACTER;
78 "intersect",INTERSECT;
80 "temporary",TEMPORARY;
87 "references",REFERENCES;
91 "timestamp",TIMESTAMP;
97 "constraint",CONSTRAINT;
115 let all token l = k := !k @ List.map (fun x -> x,token) l in
116 all (FUNCTION (T.Int,true)) ["max"; "min"; "count";"sum";"avg"];
117 all (FUNCTION (T.Int,false)) ["length"; "random";"unix_timestamp"];
118 all (FUNCTION (T.Int,false)) ["least"; "greatest"];
119 all (FUNCTION (T.Text,false)) ["concat";"lower";"upper"];
120 all (FUNCTION (T.Any,false)) ["coalesce"];
121 all DATETIME_FUNC ["current_date";"current_timestamp";"current_time";"localtime";"localtimestamp";"now";];
122 all DATETIME_FUNC ["getdate"]; (* mssql? *)
123 all CONFLICT_ALGO ["ignore"; "abort"; "fail"; "rollback"];
124 all JOIN_TYPE1 ["left";"right";"full"];
125 all JOIN_TYPE2 ["inner";"outer"];
126 all LIKE_OP ["glob";"regexp";"match"];
127 all AUTOINCREMENT ["autoincrement";"auto_increment"];
128 (* standard built-in types
129 CHARACTER, CHARACTER VARYING, CHARACTER LARGE OBJECT,
130 BINARY, BINARY VARYING, BINARY LARGE OBJECT,
131 NUMERIC, DECIMAL, INTEGER, SMALLINT, BIGINT,
132 FLOAT, REAL, DOUBLE PRECISION,
134 DATE, TIME, TIMESTAMP, INTERVAL
136 all T_INTEGER ["integer";"int";"smallint";"bigint";"tinyint";"mediumint";"middleint";"serial";"identity"];
137 all T_INTEGER ["numeric";"decimal";"dec";"fixed"];
138 all T_INTEGER ["number"]; (* oracle *)
139 all T_BOOLEAN ["bool";"boolean"];
140 all T_FLOAT ["float";"real";"double";"float4";"float8";"int1";"int2";"int3";"int4";"int8"];
141 all T_BLOB ["blob";"varbinary";"tinyblob";"mediumblob";"longblob"];
142 all T_TEXT ["text";"char";"varchar";"tinytext";"mediumtext";"longtext"];
143 all T_TEXT ["varchar2"]; (* oracle *)
144 all T_DATETIME ["datetime";"year";];
148 Q: Why not convert all input to lowercase before lexing?
149 A: Sometimes SQL is case-sensitive, also string contents should be preserved
152 module Keywords = Map.Make(String)
156 let k = String.lowercase k in
157 if Keywords.mem k map then
158 failwith (Printf.sprintf "Lexeme %s is already associated with keyword." k)
162 List.fold_left add Keywords.empty keywords
164 (* FIXME case sensitivity??! *)
167 let str = String.lowercase str in
168 try Keywords.find str keywords with Not_found -> IDENT str
170 let ident str = IDENT (String.lowercase str)
172 let as_literal ch s =
173 let s = String.replace_chars (fun x -> String.make (if x = ch then 2 else 1) x) s in
174 Printf.sprintf "%c%s%c" ch s ch
177 let digit = ['0'-'9']
178 let alpha = ['a'-'z' 'A'-'Z']
179 let ident = (alpha) (alpha | digit | '_' )*
180 let wsp = [' ' '\r' '\t']
181 let cmnt = "--" | "//" | "#"
183 (* extract separate statements *)
184 rule ruleStatement = parse
185 | ['\n' ' ' '\r' '\t']+ as tok { `Space tok }
186 | cmnt wsp* "[sqlgg]" wsp+ (ident+ as n) wsp* "=" wsp* ([^'\n']* as v) '\n' { `Prop (n,v) }
187 | cmnt wsp* "@" (ident+ as name) [^'\n']* '\n' { `Prop ("name",name) }
188 | '"' { let s = ruleInQuotes "" lexbuf in `Token (as_literal '"' s) }
189 | "'" { let s = ruleInSingleQuotes "" lexbuf in `Token (as_literal '\'' s) }
190 | cmnt as s { `Comment (s ^ ruleComment "" lexbuf) }
191 | "/*" { `Comment ("/*" ^ ruleCommentMulti "" lexbuf ^ "*/") }
193 | [^ ';'] as c { `Char c }
196 (* extract tail of the input *)
199 | _* as str { ruleTail (acc ^ str) lexbuf }
202 | wsp { ruleMain lexbuf }
203 (* update line number *)
204 | '\n' { advance_line lexbuf; ruleMain lexbuf}
211 | cmnt { ignore (ruleComment "" lexbuf); ruleMain lexbuf }
212 | "/*" { ignore (ruleCommentMulti "" lexbuf); ruleMain lexbuf }
222 | "/" | "%" { NUM_DIV_OP }
223 | "<<" | ">>" | "|" | "&" { NUM_BIT_OP }
224 | ">" | ">=" | "<=" | "<" { NUM_CMP_OP }
225 | "<>" | "!=" | "==" { NUM_EQ_OP }
227 | "?" { PARAM (None,pos lexbuf) }
228 | [':' '@'] (ident as str) { PARAM (Some str,pos lexbuf) }
230 | '"' { ident (ruleInQuotes "" lexbuf) }
231 | "'" { TEXT (ruleInSingleQuotes "" lexbuf) }
232 | "`" { ident (ruleInBackQuotes "" lexbuf) }
233 | "[" { ident (ruleInBrackets "" lexbuf) }
234 | ['x' 'X'] "'" { BLOB (ruleInSingleQuotes "" lexbuf) }
236 | ident as str { get_ident str }
237 | digit+ as str { INTEGER (int_of_string str) }
238 | digit+ '.' digit+ as str { FLOAT (float_of_string str) }
240 | _ { error lexbuf "ruleMain" }
242 (* FIXME factor out all that ruleIn* rules *)
243 ruleInQuotes acc = parse
245 | eof { error lexbuf "no terminating quote" }
246 | '\n' { advance_line lexbuf; error lexbuf "EOL before terminating quote" }
247 | "\"\"" { ruleInQuotes (acc^"\"") lexbuf }
248 | [^'"' '\n']+ as s { ruleInQuotes (acc^s) lexbuf }
249 | _ { error lexbuf "ruleInQuotes" }
251 ruleInBrackets acc = parse
253 | eof { error lexbuf "no terminating bracket" }
254 | '\n' { advance_line lexbuf; error lexbuf "EOL before terminating bracket" }
255 (* | "\"\"" { ruleInQuotes (acc ^ "\"") lexbuf } *)
256 | [^']' '\n']+ { ruleInBrackets (acc ^ lexeme lexbuf) lexbuf }
257 | _ { error lexbuf "ruleInBrackets" }
259 ruleInSingleQuotes acc = parse
261 | eof { error lexbuf "no terminating single quote" }
262 | '\n' { advance_line lexbuf; error lexbuf "EOL before terminating single quote" }
263 | "''" { ruleInSingleQuotes (acc ^ "'") lexbuf }
264 | [^'\'' '\n']+ { ruleInSingleQuotes (acc ^ lexeme lexbuf) lexbuf }
265 | _ { error lexbuf "ruleInSingleQuotes" }
267 ruleInBackQuotes acc = parse
269 | eof { error lexbuf "no terminating back quote" }
270 | '\n' { advance_line lexbuf; error lexbuf "EOL before terminating back quote" }
271 | "``" { ruleInBackQuotes (acc ^ "`") lexbuf }
272 | [^'`' '\n']+ { ruleInBackQuotes (acc ^ lexeme lexbuf) lexbuf }
273 | _ { error lexbuf "ruleInBackQuotes" }
275 ruleComment acc = parse
276 | '\n' { advance_line lexbuf; acc }
278 | [^'\n']+ { let s = lexeme lexbuf in ruleComment (acc ^ s) lexbuf; }
279 | _ { error lexbuf "ruleComment"; }
281 ruleCommentMulti acc = parse
282 | '\n' { advance_line lexbuf; ruleCommentMulti (acc ^ "\n") lexbuf }
285 | [^'\n' '*']+ { let s = lexeme lexbuf in ruleCommentMulti (acc ^ s) lexbuf }
286 | _ { error lexbuf "ruleCommentMulti" }
290 let parse_rule lexbuf =
291 let module P = Parser_state in
292 let token = ruleMain lexbuf in
296 (* Printf.eprintf "ignored: %s\n" (lexeme lexbuf); *)
297 if (token = EOF) then token else IGNORED