6 let error buf callerID =
7 Error.report "Lexer error : %s" callerID;
9 raise Parsing.Parse_error
11 let advance_line_pos pos =
12 let module L = Lexing in
13 {L.pos_fname = pos.L.pos_fname;
14 pos_lnum = pos.L.pos_lnum + 1;
15 pos_bol = pos.L.pos_cnum;
16 pos_cnum = pos.L.pos_cnum;}
18 let advance_line lexbuf =
19 lexbuf.Lexing.lex_curr_p <- advance_line_pos lexbuf.Lexing.lex_curr_p
21 (* use Map or Hashtbl ? *)
41 "precision",PRECISION;
49 "character",CHARACTER;
77 "intersect",INTERSECT;
79 "temporary",TEMPORARY;
83 let all token l = k := !k @ List.map (fun x -> x,token) l in
84 all (FUNCTION (Some T.Int)) ["max"; "min"; "length"; "random";"count";"sum"];
85 all (FUNCTION (Some T.Text)) ["concat"];
86 all CONFLICT_ALGO ["ignore"; "replace"; "abort"; "fail"; "rollback"];
87 all JOIN_TYPE1 ["left";"right";"full"];
88 all JOIN_TYPE2 ["inner";"outer"];
89 all LIKE_OP ["like";"glob";"regexp";"match"];
90 all AUTOINCREMENT ["autoincrement";"auto_increment"];
91 (* standard built-in types
92 CHARACTER, CHARACTER VARYING, CHARACTER LARGE OBJECT,
93 BINARY, BINARY VARYING, BINARY LARGE OBJECT,
94 NUMERIC, DECIMAL, INTEGER, SMALLINT, BIGINT,
95 FLOAT, REAL, DOUBLE PRECISION,
97 DATE, TIME, TIMESTAMP, INTERVAL
99 all T_INTEGER ["integer";"int";"smallint";"bigint";"tinyint";"mediumint";"middleint";"serial"];
100 all T_INTEGER ["numeric";"decimal";"dec";"fixed"];
101 all T_BOOLEAN ["bool";"boolean"];
102 all T_FLOAT ["float";"real";"double";"float4";"float8";"int1";"int2";"int3";"int4";"int8"];
103 all T_BLOB ["blob";"varbinary";"tinyblob";"mediumblob";"longblob"];
104 all T_TEXT ["text";"char";"varchar";"tinytext";"mediumtext";"longtext"];
105 all T_DATETIME ["datetime";"date";"time";"timestamp";"year";];
109 Q: Why not convert all input to lowercase before lexing?
110 A: Sometimes SQL is case-sensitive, also string contents should be preserved
113 let keywords = List.map (fun (k,v) -> (String.lowercase k, v)) keywords
116 let str = String.lowercase str in
117 try List.assoc str keywords with Not_found -> IDENT str
120 let digit = ['0'-'9']
121 let alpha = ['a'-'z' 'A'-'Z']
122 let ident = (alpha) (alpha | digit | '_' )*
123 let wsp = [' ' '\r' '\t']
124 let cmnt = "--" | "//" | "#"
126 rule ruleStatement props = parse
127 | ['\n' ' ' '\r' '\t']+ { ruleStatement props lexbuf }
129 | cmnt wsp* "[sqlgg]" wsp+ (ident+ as n) wsp* "=" wsp* ([^'\n']* as v) '\n'
131 ruleStatement (Props.set props n v) lexbuf
133 | cmnt wsp* "@" (ident+ as name) [^'\n']* '\n'
135 ruleStatement (Props.set props "name" name) lexbuf
137 | cmnt { ignore (ruleComment "" lexbuf); ruleStatement props lexbuf }
138 | alpha [^ ';']+ as stmt ';' { Some (stmt,props) }
142 | wsp { ruleMain lexbuf }
143 (* update line number *)
144 | '\n' { advance_line lexbuf; ruleMain lexbuf}
151 | cmnt { ignore (ruleComment "" lexbuf); ruleMain lexbuf }
161 | "/" | "%" | ">" | ">=" | "<=" | "<" | "&" | "|" { NUM_BINARY_OP }
163 | "?" { PARAM Stmt.Next }
164 | "?" (digit+ as str) { PARAM (Stmt.Numbered (int_of_string str)) }
165 | [':' '@'] (ident as str) { PARAM (Stmt.Named str) }
167 | '"' { IDENT (ruleInQuotes "" lexbuf) }
168 | "'" { TEXT (ruleInSingleQuotes "" lexbuf) }
169 | ['x' 'X'] "'" { BLOB (ruleInSingleQuotes "" lexbuf) }
171 | ident as str { get_ident str }
172 | digit+ as str { INTEGER (int_of_string str) }
174 | _ { error lexbuf "ruleMain" }
176 ruleInQuotes acc = parse
178 | eof { error lexbuf "no terminating quote" }
179 | '\n' { advance_line lexbuf; error lexbuf "EOL before terminating quote" }
180 | "\"\"" { ruleInQuotes (acc ^ "\"") lexbuf }
181 | [^'"' '\n']+ { ruleInQuotes (acc ^ Lexing.lexeme lexbuf) lexbuf }
182 | _ { error lexbuf "ruleInQuotes" }
184 ruleInSingleQuotes acc = parse
186 | eof { error lexbuf "no terminating single quote" }
187 | '\n' { advance_line lexbuf; error lexbuf "EOL before terminating single quote" }
188 | "''" { ruleInSingleQuotes (acc ^ "'") lexbuf }
189 | [^'\'' '\n']+ { ruleInSingleQuotes (acc ^ Lexing.lexeme lexbuf) lexbuf }
190 | _ { error lexbuf "ruleInSingleQuotes" }
192 ruleComment acc = parse
193 | '\n' { advance_line lexbuf; acc }
195 | [^'\n']+ { let s = Lexing.lexeme lexbuf in ruleComment (acc ^ s) lexbuf; }
196 | _ { error lexbuf "ruleComment"; }
200 let parse_rule lexbuf = ruleMain lexbuf