9 Error.report "Lexer error : %s" callerID;
11 raise Parsing.Parse_error
13 let pos lexbuf = (lexeme_start lexbuf, lexeme_end lexbuf)
15 let advance_line_pos pos =
16 { pos with pos_lnum = pos.pos_lnum + 1; pos_bol = pos.pos_cnum; }
18 let advance_line lexbuf =
19 lexbuf.lex_curr_p <- advance_line_pos lexbuf.lex_curr_p
40 "precision",PRECISION;
48 "character",CHARACTER;
78 "intersect",INTERSECT;
80 "temporary",TEMPORARY;
87 "references",REFERENCES;
91 "timestamp",TIMESTAMP;
97 "constraint",CONSTRAINT;
112 let all token l = k := !k @ List.map (fun x -> x,token) l in
113 all (FUNCTION (T.Int,true)) ["max"; "min"; "count";"sum";"avg"];
114 all (FUNCTION (T.Int,false)) ["length"; "random";"unix_timestamp"];
115 all (FUNCTION (T.Int,false)) ["least"; "greatest"];
116 all (FUNCTION (T.Text,false)) ["concat";"lower";"upper"];
117 all (FUNCTION (T.Any,false)) ["coalesce"];
118 all DATETIME_FUNC ["current_date";"current_timestamp";"current_time";"localtime";"localtimestamp";"now"];
119 all CONFLICT_ALGO ["ignore"; "abort"; "fail"; "rollback"];
120 all JOIN_TYPE1 ["left";"right";"full"];
121 all JOIN_TYPE2 ["inner";"outer"];
122 all LIKE_OP ["glob";"regexp";"match"];
123 all AUTOINCREMENT ["autoincrement";"auto_increment"];
124 (* standard built-in types
125 CHARACTER, CHARACTER VARYING, CHARACTER LARGE OBJECT,
126 BINARY, BINARY VARYING, BINARY LARGE OBJECT,
127 NUMERIC, DECIMAL, INTEGER, SMALLINT, BIGINT,
128 FLOAT, REAL, DOUBLE PRECISION,
130 DATE, TIME, TIMESTAMP, INTERVAL
132 all T_INTEGER ["integer";"int";"smallint";"bigint";"tinyint";"mediumint";"middleint";"serial";"identity"];
133 all T_INTEGER ["numeric";"decimal";"dec";"fixed"];
134 all T_INTEGER ["number"]; (* oracle *)
135 all T_BOOLEAN ["bool";"boolean"];
136 all T_FLOAT ["float";"real";"double";"float4";"float8";"int1";"int2";"int3";"int4";"int8"];
137 all T_BLOB ["blob";"varbinary";"tinyblob";"mediumblob";"longblob"];
138 all T_TEXT ["text";"char";"varchar";"tinytext";"mediumtext";"longtext"];
139 all T_TEXT ["varchar2"]; (* oracle *)
140 all T_DATETIME ["datetime";"year";];
144 Q: Why not convert all input to lowercase before lexing?
145 A: Sometimes SQL is case-sensitive, also string contents should be preserved
148 module Keywords = Map.Make(String)
152 let k = String.lowercase k in
153 if Keywords.mem k map then
154 failwith (Printf.sprintf "Lexeme %s is already associated with keyword." k)
158 List.fold_left add Keywords.empty keywords
161 let str = String.lowercase str in
162 try Keywords.find str keywords with Not_found -> IDENT str
164 let as_literal ch s =
165 let s = String.replace_chars (fun x -> String.make (if x = ch then 2 else 1) x) s in
166 Printf.sprintf "%c%s%c" ch s ch
169 let digit = ['0'-'9']
170 let alpha = ['a'-'z' 'A'-'Z']
171 let ident = (alpha) (alpha | digit | '_' )*
172 let wsp = [' ' '\r' '\t']
173 let cmnt = "--" | "//" | "#"
175 (* extract separate statements *)
176 rule ruleStatement = parse
177 | ['\n' ' ' '\r' '\t']+ as tok { `Space tok }
178 | cmnt wsp* "[sqlgg]" wsp+ (ident+ as n) wsp* "=" wsp* ([^'\n']* as v) '\n' { `Prop (n,v) }
179 | cmnt wsp* "@" (ident+ as name) [^'\n']* '\n' { `Prop ("name",name) }
180 | '"' { let s = ruleInQuotes "" lexbuf in `Token (as_literal '"' s) }
181 | "'" { let s = ruleInSingleQuotes "" lexbuf in `Token (as_literal '\'' s) }
182 | cmnt as s { `Comment (s ^ ruleComment "" lexbuf) }
183 | "/*" { `Comment ("/*" ^ ruleCommentMulti "" lexbuf ^ "*/") }
185 | [^ ';'] as c { `Char c }
188 (* extract tail of the input *)
191 | _* as str { ruleTail (acc ^ str) lexbuf }
194 | wsp { ruleMain lexbuf }
195 (* update line number *)
196 | '\n' { advance_line lexbuf; ruleMain lexbuf}
203 | cmnt { ignore (ruleComment "" lexbuf); ruleMain lexbuf }
204 | "/*" { ignore (ruleCommentMulti "" lexbuf); ruleMain lexbuf }
214 | "/" | "%" { NUM_DIV_OP }
215 | "<<" | ">>" | "|" | "&" { NUM_BIT_OP }
216 | ">" | ">=" | "<=" | "<" { NUM_CMP_OP }
217 | "<>" | "!=" | "==" { NUM_EQ_OP }
219 | "?" { PARAM (None,pos lexbuf) }
220 | [':' '@'] (ident as str) { PARAM (Some str,pos lexbuf) }
222 | '"' { IDENT (ruleInQuotes "" lexbuf) }
223 | "'" { TEXT (ruleInSingleQuotes "" lexbuf) }
224 | "`" { IDENT (ruleInBackQuotes "" lexbuf) }
225 | "[" { IDENT (ruleInBrackets "" lexbuf) }
226 | ['x' 'X'] "'" { BLOB (ruleInSingleQuotes "" lexbuf) }
228 | ident as str { get_ident str }
229 | digit+ as str { INTEGER (int_of_string str) }
230 | digit+ '.' digit+ as str { FLOAT (float_of_string str) }
232 | _ { error lexbuf "ruleMain" }
234 (* FIXME factor out all that ruleIn* rules *)
235 ruleInQuotes acc = parse
237 | eof { error lexbuf "no terminating quote" }
238 | '\n' { advance_line lexbuf; error lexbuf "EOL before terminating quote" }
239 | "\"\"" { ruleInQuotes (acc^"\"") lexbuf }
240 | [^'"' '\n']+ as s { ruleInQuotes (acc^s) lexbuf }
241 | _ { error lexbuf "ruleInQuotes" }
243 ruleInBrackets acc = parse
245 | eof { error lexbuf "no terminating bracket" }
246 | '\n' { advance_line lexbuf; error lexbuf "EOL before terminating bracket" }
247 (* | "\"\"" { ruleInQuotes (acc ^ "\"") lexbuf } *)
248 | [^']' '\n']+ { ruleInBrackets (acc ^ lexeme lexbuf) lexbuf }
249 | _ { error lexbuf "ruleInBrackets" }
251 ruleInSingleQuotes acc = parse
253 | eof { error lexbuf "no terminating single quote" }
254 | '\n' { advance_line lexbuf; error lexbuf "EOL before terminating single quote" }
255 | "''" { ruleInSingleQuotes (acc ^ "'") lexbuf }
256 | [^'\'' '\n']+ { ruleInSingleQuotes (acc ^ lexeme lexbuf) lexbuf }
257 | _ { error lexbuf "ruleInSingleQuotes" }
259 ruleInBackQuotes acc = parse
261 | eof { error lexbuf "no terminating back quote" }
262 | '\n' { advance_line lexbuf; error lexbuf "EOL before terminating back quote" }
263 | "``" { ruleInBackQuotes (acc ^ "`") lexbuf }
264 | [^'`' '\n']+ { ruleInBackQuotes (acc ^ lexeme lexbuf) lexbuf }
265 | _ { error lexbuf "ruleInBackQuotes" }
267 ruleComment acc = parse
268 | '\n' { advance_line lexbuf; acc }
270 | [^'\n']+ { let s = lexeme lexbuf in ruleComment (acc ^ s) lexbuf; }
271 | _ { error lexbuf "ruleComment"; }
273 ruleCommentMulti acc = parse
274 | '\n' { advance_line lexbuf; ruleCommentMulti (acc ^ "\n") lexbuf }
277 | [^'\n' '*']+ { let s = lexeme lexbuf in ruleCommentMulti (acc ^ s) lexbuf }
278 | _ { error lexbuf "ruleCommentMulti" }
282 let parse_rule lexbuf =
283 let module P = Parser_state in
284 let token = ruleMain lexbuf in
288 (* Printf.eprintf "ignored: %s\n" (lexeme lexbuf); *)
289 if (token = EOF) then token else IGNORED