6 let error buf callerID =
7 Error.report "Lexer error : %s" callerID;
9 raise Parsing.Parse_error
11 let advance_line_pos pos =
12 let module L = Lexing in
13 {L.pos_fname = pos.L.pos_fname;
14 pos_lnum = pos.L.pos_lnum + 1;
15 pos_bol = pos.L.pos_cnum;
16 pos_cnum = pos.L.pos_cnum;}
18 let advance_line lexbuf =
19 lexbuf.Lexing.lex_curr_p <- advance_line_pos lexbuf.Lexing.lex_curr_p
21 (* use Map or Hashtbl ? *)
41 "precision",PRECISION;
49 "character",CHARACTER;
80 "intersect",INTERSECT;
82 "temporary",TEMPORARY;
89 "references",REFERENCES;
93 "timestamp",TIMESTAMP;
99 "constraint",CONSTRAINT;
108 let all token l = k := !k @ List.map (fun x -> x,token) l in
109 all (FUNCTION (Some T.Int)) ["max"; "min"; "length"; "random";"count";"sum";"avg"];
110 all (FUNCTION (Some T.Text)) ["concat";"lower";"upper"];
111 all DATETIME_FUNC ["current_date";"current_timestamp";"current_time";"localtime";"localtimestamp"];
112 all CONFLICT_ALGO ["ignore"; "replace"; "abort"; "fail"; "rollback"];
113 all JOIN_TYPE1 ["left";"right";"full"];
114 all JOIN_TYPE2 ["inner";"outer"];
115 all LIKE_OP ["like";"glob";"regexp";"match"];
116 all AUTOINCREMENT ["autoincrement";"auto_increment"];
117 (* standard built-in types
118 CHARACTER, CHARACTER VARYING, CHARACTER LARGE OBJECT,
119 BINARY, BINARY VARYING, BINARY LARGE OBJECT,
120 NUMERIC, DECIMAL, INTEGER, SMALLINT, BIGINT,
121 FLOAT, REAL, DOUBLE PRECISION,
123 DATE, TIME, TIMESTAMP, INTERVAL
125 all T_INTEGER ["integer";"int";"smallint";"bigint";"tinyint";"mediumint";"middleint";"serial";"identity"];
126 all T_INTEGER ["numeric";"decimal";"dec";"fixed"];
127 all T_BOOLEAN ["bool";"boolean"];
128 all T_FLOAT ["float";"real";"double";"float4";"float8";"int1";"int2";"int3";"int4";"int8"];
129 all T_BLOB ["blob";"varbinary";"tinyblob";"mediumblob";"longblob"];
130 all T_TEXT ["text";"char";"varchar";"tinytext";"mediumtext";"longtext"];
131 all T_DATETIME ["datetime";"year";];
135 Q: Why not convert all input to lowercase before lexing?
136 A: Sometimes SQL is case-sensitive, also string contents should be preserved
139 let keywords = List.map (fun (k,v) -> (String.lowercase k, v)) keywords
142 let str = String.lowercase str in
143 try List.assoc str keywords with Not_found -> IDENT str
146 let digit = ['0'-'9']
147 let alpha = ['a'-'z' 'A'-'Z']
148 let ident = (alpha) (alpha | digit | '_' )*
149 let wsp = [' ' '\r' '\t']
150 let cmnt = "--" | "//" | "#"
152 rule ruleStatement props = parse
153 | ['\n' ' ' '\r' '\t']+ { ruleStatement props lexbuf }
155 | cmnt wsp* "[sqlgg]" wsp+ (ident+ as n) wsp* "=" wsp* ([^'\n']* as v) '\n'
157 ruleStatement (Props.set props n v) lexbuf
159 | cmnt wsp* "@" (ident+ as name) [^'\n']* '\n'
161 ruleStatement (Props.set props "name" name) lexbuf
163 | cmnt { ignore (ruleComment "" lexbuf); ruleStatement props lexbuf }
164 | alpha [^ ';']+ as stmt ';' { Some (stmt,props) }
168 | wsp { ruleMain lexbuf }
169 (* update line number *)
170 | '\n' { advance_line lexbuf; ruleMain lexbuf}
177 | cmnt { ignore (ruleComment "" lexbuf); ruleMain lexbuf }
187 | "/" | "%" | "|" | "&" { NUM_BINARY_OP }
188 | ">" | ">=" | "<=" | "<" | "<>" { COMPARISON_OP }
190 | "?" { PARAM Stmt.Next }
191 | "?" (digit+ as str) { PARAM (Stmt.Numbered (int_of_string str)) }
192 | [':' '@'] (ident as str) { PARAM (Stmt.Named str) }
194 | '"' { IDENT (ruleInQuotes "" lexbuf) }
195 | "'" { TEXT (ruleInSingleQuotes "" lexbuf) }
196 | "`" { IDENT (ruleInBackQuotes "" lexbuf) }
197 | ['x' 'X'] "'" { BLOB (ruleInSingleQuotes "" lexbuf) }
199 | ident as str { get_ident str }
200 | digit+ as str { INTEGER (int_of_string str) }
201 | digit+ '.' digit+ as str { FLOAT (float_of_string str) }
203 | _ { error lexbuf "ruleMain" }
205 ruleInQuotes acc = parse
207 | eof { error lexbuf "no terminating quote" }
208 | '\n' { advance_line lexbuf; error lexbuf "EOL before terminating quote" }
209 | "\"\"" { ruleInQuotes (acc ^ "\"") lexbuf }
210 | [^'"' '\n']+ { ruleInQuotes (acc ^ Lexing.lexeme lexbuf) lexbuf }
211 | _ { error lexbuf "ruleInQuotes" }
213 ruleInSingleQuotes acc = parse
215 | eof { error lexbuf "no terminating single quote" }
216 | '\n' { advance_line lexbuf; error lexbuf "EOL before terminating single quote" }
217 | "''" { ruleInSingleQuotes (acc ^ "'") lexbuf }
218 | [^'\'' '\n']+ { ruleInSingleQuotes (acc ^ Lexing.lexeme lexbuf) lexbuf }
219 | _ { error lexbuf "ruleInSingleQuotes" }
221 ruleInBackQuotes acc = parse
223 | eof { error lexbuf "no terminating back quote" }
224 | '\n' { advance_line lexbuf; error lexbuf "EOL before terminating back quote" }
225 | "``" { ruleInBackQuotes (acc ^ "`") lexbuf }
226 | [^'`' '\n']+ { ruleInBackQuotes (acc ^ Lexing.lexeme lexbuf) lexbuf }
227 | _ { error lexbuf "ruleInBackQuotes" }
229 ruleComment acc = parse
230 | '\n' { advance_line lexbuf; acc }
232 | [^'\n']+ { let s = Lexing.lexeme lexbuf in ruleComment (acc ^ s) lexbuf; }
233 | _ { error lexbuf "ruleComment"; }
237 let parse_rule lexbuf =
238 let module P = Parser_state in
239 let token = ruleMain lexbuf in
243 (* Printf.eprintf "ignored: %s\n" (Lexing.lexeme lexbuf); *)
244 if (token = EOF) then token else IGNORED