7 let error buf callerID =
8 Error.report "Lexer error : %s" callerID;
10 raise Parsing.Parse_error
12 let pos lexbuf = (lexeme_start lexbuf, lexeme_end lexbuf)
14 let advance_line_pos pos =
15 { pos with pos_lnum = pos.pos_lnum + 1; pos_bol = pos.pos_cnum; }
17 let advance_line lexbuf =
18 lexbuf.lex_curr_p <- advance_line_pos lexbuf.lex_curr_p
20 (* use Map or Hashtbl ? *)
40 "precision",PRECISION;
48 "character",CHARACTER;
79 "intersect",INTERSECT;
81 "temporary",TEMPORARY;
88 "references",REFERENCES;
92 "timestamp",TIMESTAMP;
98 "constraint",CONSTRAINT;
107 let all token l = k := !k @ List.map (fun x -> x,token) l in
108 all (FUNCTION (Some T.Int)) ["max"; "min"; "length"; "random";"count";"sum";"avg"];
109 all (FUNCTION (Some T.Text)) ["concat";"lower";"upper"];
110 all DATETIME_FUNC ["current_date";"current_timestamp";"current_time";"localtime";"localtimestamp";"now";"unix_timestamp"];
111 all CONFLICT_ALGO ["ignore"; "replace"; "abort"; "fail"; "rollback"];
112 all JOIN_TYPE1 ["left";"right";"full"];
113 all JOIN_TYPE2 ["inner";"outer"];
114 all LIKE_OP ["like";"glob";"regexp";"match"];
115 all AUTOINCREMENT ["autoincrement";"auto_increment"];
116 (* standard built-in types
117 CHARACTER, CHARACTER VARYING, CHARACTER LARGE OBJECT,
118 BINARY, BINARY VARYING, BINARY LARGE OBJECT,
119 NUMERIC, DECIMAL, INTEGER, SMALLINT, BIGINT,
120 FLOAT, REAL, DOUBLE PRECISION,
122 DATE, TIME, TIMESTAMP, INTERVAL
124 all T_INTEGER ["integer";"int";"smallint";"bigint";"tinyint";"mediumint";"middleint";"serial";"identity"];
125 all T_INTEGER ["numeric";"decimal";"dec";"fixed"];
126 all T_BOOLEAN ["bool";"boolean"];
127 all T_FLOAT ["float";"real";"double";"float4";"float8";"int1";"int2";"int3";"int4";"int8"];
128 all T_BLOB ["blob";"varbinary";"tinyblob";"mediumblob";"longblob"];
129 all T_TEXT ["text";"char";"varchar";"tinytext";"mediumtext";"longtext"];
130 all T_DATETIME ["datetime";"year";];
134 Q: Why not convert all input to lowercase before lexing?
135 A: Sometimes SQL is case-sensitive, also string contents should be preserved
138 let keywords = List.map (fun (k,v) -> (String.lowercase k, v)) keywords
141 let str = String.lowercase str in
142 try List.assoc str keywords with Not_found -> IDENT str
145 let digit = ['0'-'9']
146 let alpha = ['a'-'z' 'A'-'Z']
147 let ident = (alpha) (alpha | digit | '_' )*
148 let wsp = [' ' '\r' '\t']
149 let cmnt = "--" | "//" | "#"
151 rule ruleStatement props = parse
152 | ['\n' ' ' '\r' '\t']+ { ruleStatement props lexbuf }
154 | cmnt wsp* "[sqlgg]" wsp+ (ident+ as n) wsp* "=" wsp* ([^'\n']* as v) '\n'
156 ruleStatement (Props.set props n v) lexbuf
158 | cmnt wsp* "@" (ident+ as name) [^'\n']* '\n'
160 ruleStatement (Props.set props "name" name) lexbuf
162 | cmnt { ignore (ruleComment "" lexbuf); ruleStatement props lexbuf }
163 | "/*" { ignore (ruleCommentMulti "" lexbuf); ruleStatement props lexbuf }
164 | alpha [^ ';']+ as stmt ';' { Some (stmt,props) }
168 | wsp { ruleMain lexbuf }
169 (* update line number *)
170 | '\n' { advance_line lexbuf; ruleMain lexbuf}
177 | cmnt { ignore (ruleComment "" lexbuf); ruleMain lexbuf }
178 | "/*" { ignore (ruleCommentMulti "" lexbuf); ruleMain lexbuf }
188 | "/" | "%" | "|" | "&" { NUM_BINARY_OP }
189 | ">" | ">=" | "<=" | "<" | "<>" { COMPARISON_OP }
191 | "?" { PARAM (None,pos lexbuf) }
192 | [':' '@'] (ident as str) { PARAM (Some str,pos lexbuf) }
194 | '"' { IDENT (ruleInQuotes "" lexbuf) }
195 | "'" { TEXT (ruleInSingleQuotes "" lexbuf) }
196 | "`" { IDENT (ruleInBackQuotes "" lexbuf) }
197 | ['x' 'X'] "'" { BLOB (ruleInSingleQuotes "" lexbuf) }
199 | ident as str { get_ident str }
200 | digit+ as str { INTEGER (int_of_string str) }
201 | digit+ '.' digit+ as str { FLOAT (float_of_string str) }
203 | _ { error lexbuf "ruleMain" }
205 ruleInQuotes acc = parse
207 | eof { error lexbuf "no terminating quote" }
208 | '\n' { advance_line lexbuf; error lexbuf "EOL before terminating quote" }
209 | "\"\"" { ruleInQuotes (acc ^ "\"") lexbuf }
210 | [^'"' '\n']+ { ruleInQuotes (acc ^ lexeme lexbuf) lexbuf }
211 | _ { error lexbuf "ruleInQuotes" }
213 ruleInSingleQuotes acc = parse
215 | eof { error lexbuf "no terminating single quote" }
216 | '\n' { advance_line lexbuf; error lexbuf "EOL before terminating single quote" }
217 | "''" { ruleInSingleQuotes (acc ^ "'") lexbuf }
218 | [^'\'' '\n']+ { ruleInSingleQuotes (acc ^ lexeme lexbuf) lexbuf }
219 | _ { error lexbuf "ruleInSingleQuotes" }
221 ruleInBackQuotes acc = parse
223 | eof { error lexbuf "no terminating back quote" }
224 | '\n' { advance_line lexbuf; error lexbuf "EOL before terminating back quote" }
225 | "``" { ruleInBackQuotes (acc ^ "`") lexbuf }
226 | [^'`' '\n']+ { ruleInBackQuotes (acc ^ lexeme lexbuf) lexbuf }
227 | _ { error lexbuf "ruleInBackQuotes" }
229 ruleComment acc = parse
230 | '\n' { advance_line lexbuf; acc }
232 | [^'\n']+ { let s = lexeme lexbuf in ruleComment (acc ^ s) lexbuf; }
233 | _ { error lexbuf "ruleComment"; }
235 ruleCommentMulti acc = parse
236 | '\n' { advance_line lexbuf; ruleCommentMulti (acc ^ "\n") lexbuf }
238 | [^'\n']+ { let s = lexeme lexbuf in ruleCommentMulti (acc ^ s) lexbuf }
239 | _ { error lexbuf "ruleCommentMulti" }
243 let parse_rule lexbuf =
244 let module P = Parser_state in
245 let token = ruleMain lexbuf in
249 (* Printf.eprintf "ignored: %s\n" (lexeme lexbuf); *)
250 if (token = EOF) then token else IGNORED