6 let error buf callerID =
7 Error.report "Lexer error : %s" callerID;
9 raise Parsing.Parse_error
11 let advance_line_pos pos =
12 let module L = Lexing in
13 {L.pos_fname = pos.L.pos_fname;
14 pos_lnum = pos.L.pos_lnum + 1;
15 pos_bol = pos.L.pos_cnum;
16 pos_cnum = pos.L.pos_cnum;}
18 let advance_line lexbuf =
19 lexbuf.Lexing.lex_curr_p <- advance_line_pos lexbuf.Lexing.lex_curr_p
21 (* use Map or Hashtbl ? *)
41 "precision",PRECISION;
49 "character",CHARACTER;
80 "intersect",INTERSECT;
82 "temporary",TEMPORARY;
86 let all token l = k := !k @ List.map (fun x -> x,token) l in
87 all (FUNCTION (Some T.Int)) ["max"; "min"; "length"; "random";"count";"sum";"avg"];
88 all (FUNCTION (Some T.Text)) ["concat";"lower";"upper"];
89 all (FUNCTION (Some T.Datetime)) ["current_date";"current_timestamp";"current_time"];
90 all CONFLICT_ALGO ["ignore"; "replace"; "abort"; "fail"; "rollback"];
91 all JOIN_TYPE1 ["left";"right";"full"];
92 all JOIN_TYPE2 ["inner";"outer"];
93 all LIKE_OP ["like";"glob";"regexp";"match"];
94 all AUTOINCREMENT ["autoincrement";"auto_increment"];
95 (* standard built-in types
96 CHARACTER, CHARACTER VARYING, CHARACTER LARGE OBJECT,
97 BINARY, BINARY VARYING, BINARY LARGE OBJECT,
98 NUMERIC, DECIMAL, INTEGER, SMALLINT, BIGINT,
99 FLOAT, REAL, DOUBLE PRECISION,
101 DATE, TIME, TIMESTAMP, INTERVAL
103 all T_INTEGER ["integer";"int";"smallint";"bigint";"tinyint";"mediumint";"middleint";"serial"];
104 all T_INTEGER ["numeric";"decimal";"dec";"fixed"];
105 all T_BOOLEAN ["bool";"boolean"];
106 all T_FLOAT ["float";"real";"double";"float4";"float8";"int1";"int2";"int3";"int4";"int8"];
107 all T_BLOB ["blob";"varbinary";"tinyblob";"mediumblob";"longblob"];
108 all T_TEXT ["text";"char";"varchar";"tinytext";"mediumtext";"longtext"];
109 all T_DATETIME ["datetime";"date";"time";"timestamp";"year";];
113 Q: Why not convert all input to lowercase before lexing?
114 A: Sometimes SQL is case-sensitive, also string contents should be preserved
117 let keywords = List.map (fun (k,v) -> (String.lowercase k, v)) keywords
120 let str = String.lowercase str in
121 try List.assoc str keywords with Not_found -> IDENT str
124 let digit = ['0'-'9']
125 let alpha = ['a'-'z' 'A'-'Z']
126 let ident = (alpha) (alpha | digit | '_' )*
127 let wsp = [' ' '\r' '\t']
128 let cmnt = "--" | "//" | "#"
130 rule ruleStatement props = parse
131 | ['\n' ' ' '\r' '\t']+ { ruleStatement props lexbuf }
133 | cmnt wsp* "[sqlgg]" wsp+ (ident+ as n) wsp* "=" wsp* ([^'\n']* as v) '\n'
135 ruleStatement (Props.set props n v) lexbuf
137 | cmnt wsp* "@" (ident+ as name) [^'\n']* '\n'
139 ruleStatement (Props.set props "name" name) lexbuf
141 | cmnt { ignore (ruleComment "" lexbuf); ruleStatement props lexbuf }
142 | alpha [^ ';']+ as stmt ';' { Some (stmt,props) }
146 | wsp { ruleMain lexbuf }
147 (* update line number *)
148 | '\n' { advance_line lexbuf; ruleMain lexbuf}
155 | cmnt { ignore (ruleComment "" lexbuf); ruleMain lexbuf }
165 | "/" | "%" | "|" | "&" { NUM_BINARY_OP }
166 | ">" | ">=" | "<=" | "<" | "<>" { COMPARISON_OP }
168 | "?" { PARAM Stmt.Next }
169 | "?" (digit+ as str) { PARAM (Stmt.Numbered (int_of_string str)) }
170 | [':' '@'] (ident as str) { PARAM (Stmt.Named str) }
172 | '"' { IDENT (ruleInQuotes "" lexbuf) }
173 | "'" { TEXT (ruleInSingleQuotes "" lexbuf) }
174 | ['x' 'X'] "'" { BLOB (ruleInSingleQuotes "" lexbuf) }
176 | ident as str { get_ident str }
177 | digit+ as str { INTEGER (int_of_string str) }
178 | digit+ '.' digit+ as str { FLOAT (float_of_string str) }
180 | _ { error lexbuf "ruleMain" }
182 ruleInQuotes acc = parse
184 | eof { error lexbuf "no terminating quote" }
185 | '\n' { advance_line lexbuf; error lexbuf "EOL before terminating quote" }
186 | "\"\"" { ruleInQuotes (acc ^ "\"") lexbuf }
187 | [^'"' '\n']+ { ruleInQuotes (acc ^ Lexing.lexeme lexbuf) lexbuf }
188 | _ { error lexbuf "ruleInQuotes" }
190 ruleInSingleQuotes acc = parse
192 | eof { error lexbuf "no terminating single quote" }
193 | '\n' { advance_line lexbuf; error lexbuf "EOL before terminating single quote" }
194 | "''" { ruleInSingleQuotes (acc ^ "'") lexbuf }
195 | [^'\'' '\n']+ { ruleInSingleQuotes (acc ^ Lexing.lexeme lexbuf) lexbuf }
196 | _ { error lexbuf "ruleInSingleQuotes" }
198 ruleComment acc = parse
199 | '\n' { advance_line lexbuf; acc }
201 | [^'\n']+ { let s = Lexing.lexeme lexbuf in ruleComment (acc ^ s) lexbuf; }
202 | _ { error lexbuf "ruleComment"; }
206 let parse_rule lexbuf = ruleMain lexbuf