dbfwork: 'to_number(x)' is treated as simply 'x'
[ocaml-dbf.git] / dbfwork.ml
blob5085cb32cff148e0bb0955c4e78143a89917a29a
1 open Mlxbase
4 open Simplesqllex
7 open Simplesqlcommon
11 value default_codepage = "866"
16 value rec dump () =
17 let t = sqltoken lb in
18 let () =
19 Printf.printf "%s\n%!" (string_of_sqltoken t)
21 if t = Eof
22 then ()
23 else dump ()
26 dump ();
32 value xbasetype_of_oratype ot =
33 match (String.lowercase ot) with
34 [ "varchar2" -> CT_Character
35 | "number" -> CT_Number
36 | "date" -> CT_Date
37 | x -> failwith (Printf.sprintf "unsupported oracle type %S" x)
41 value xbasecolspec_of_oracolspec ocs =
42 let (nam, ty, optsz) = ocs in
43 let (fsz, fdc) =
44 match optsz with
45 [ None -> (0, 0)
46 | Some szs -> szs
49 let xty = xbasetype_of_oratype ty
51 { col_name = nam
52 ; col_type = xty
53 ; field_size = if xty = CT_Date then 8 else fsz
54 ; field_decim = fdc
59 value writers = Hashtbl.create 5
63 value exec_createtable nam lcs =
64 let () = assert (not (Hashtbl.mem writers nam)) in
65 let xbspecs = (List.map xbasecolspec_of_oracolspec lcs) in
66 let pcs =
67 List.fold_left
68 (fun acc cs ->
69 if (List.mem_assoc cs.col_name acc)
70 then
71 failwith (Printf.sprintf
72 "Column names must be unique (%S)" cs.col_name)
73 else
74 [ (cs.col_name, (cs.col_type, (cs.field_size, cs.field_decim)))
75 :: acc ]
78 xbspecs
80 let colord = Hashtbl.create (List.length pcs) in
81 let () = do
82 { inner 0 lcs
83 where rec inner i lcs =
84 match lcs with
85 [ [] -> ()
86 | [(nam, _, _) :: tl] ->
87 let () = Hashtbl.add colord nam i in
88 let () = Printf.printf "%s is %i's column\n" nam i in
89 inner (i+1) tl
93 let () = Printf.printf "pcs len = %i\n" (List.length pcs) in
94 let w : dbf_writer =
95 open_writer
96 ~encode:Cpcvt.default_converter.Cpcvt.exporter
97 (nam ^ ".dbf")
98 xbspecs
99 default_codepage
101 Hashtbl.add writers nam (w, pcs, colord)
105 value ident_null = "NULL"
108 value ident_is_null i =
109 (compare_ident ident_null i) = 0
113 value gente tyexp =
114 fun t ->
115 failwith (Printf.sprintf "type error: %s expected, %s got" tyexp t)
119 value stringval_of_sqlexpr se =
120 let te = gente "string"
122 match se with
123 [ SEStr s -> s
124 | SEIdent i when ident_is_null i -> ""
125 | SENum _ -> te "number"
126 | SECall _ _ -> te "function call"
127 | SEIdent _ -> te "identifier"
132 value num_parse str =
133 let () =
134 String.iter
135 (fun c ->
136 if c >= '0' && c <= '9'
137 then ()
138 else failwith (Printf.sprintf "number %S contains non-digits" str)
142 Some str
146 type tmpdate =
147 { dyear : option string
148 ; dmon : option string
149 ; dday : option string
154 value charlist_of_string s =
155 inner [] ((String.length s) - 1)
156 where rec inner acc i =
157 if i < 0
158 then acc
159 else inner [s.[i] :: acc] (i - 1)
163 value string_of_charlist cl =
164 let r = String.make (List.length cl) ' ' in
165 let (_:int) = List.fold_left
166 (fun i c -> do { r.[i] := c; i+1 } )
174 value exec_todate args =
175 match args with
176 [ [se_datetext; se_dateformat] ->
177 match (se_datetext, se_dateformat) with
178 [ (SEStr datetext, SEStr dateformat) ->
179 let optset ov cl =
180 let () = assert (ov = None) in
181 Some (string_of_charlist cl)
182 and nomatch () =
183 failwith (Printf.sprintf
184 "string %S doesn't match date format %S"
185 datetext dateformat)
187 date_of_fmt
188 { dyear = None; dmon = None; dday = None }
189 (charlist_of_string datetext)
190 (charlist_of_string dateformat)
191 where rec date_of_fmt acc dt df =
192 match (df, dt) with
193 [ (['D'; 'D' :: dftl], [d1; d2 :: dttl]) ->
194 date_of_fmt
195 {(acc) with dday = optset acc.dday [d1; d2]}
196 dttl dftl
197 | (['M'; 'M' :: dftl], [m1; m2 :: dttl]) ->
198 date_of_fmt
199 {(acc) with dmon = optset acc.dmon [m1; m2]}
200 dttl dftl
201 | (['R'; 'R'; 'R'; 'R' :: dftl], [y1;y2;y3;y4::dttl])
202 | (['Y'; 'Y'; 'Y'; 'Y' :: dftl], [y1;y2;y3;y4::dttl]) ->
203 date_of_fmt
204 {(acc) with dyear = optset acc.dyear [y1;y2;y3;y4]}
205 dttl dftl
206 | (['M'; 'I' :: dftl], [_;_ :: dttl])
207 | (['H'; 'H'; '2'; '4' :: dftl], [_;_ :: dttl])
208 | (['H'; 'H'; '1'; '2' :: dftl], [_;_ :: dttl])
209 | (['H'; 'H' :: dftl], [_;_ :: dttl])
210 | (['S'; 'S' :: dftl], [_;_ :: dttl]) ->
211 date_of_fmt acc dttl dftl
213 | ([fc :: dftl], [tc :: dttl]) ->
214 if fc = tc
215 then
216 date_of_fmt acc dttl dftl
217 else
218 nomatch ()
219 | ([], []) ->
220 match (acc.dyear, acc.dmon, acc.dday) with
221 [ (Some y, Some m, Some d) ->
222 let () = assert
223 ( ((String.length y) = 4)
224 && ((String.length m) = 2)
225 && ((String.length d) = 2)
228 Some (y ^ m ^ d)
229 | _ -> failwith "year, month and day must be defined"
232 | ([], _) | (_, []) -> nomatch ()
234 | (SENum _, SENum _) | (SENum _, SEStr _) | (SENum _, SECall _ _)
235 | (SENum _, SEIdent _) | (SEStr _, SENum _) | (SEStr _, SECall _ _)
236 | (SEStr _, SEIdent _) | (SECall _ _, SECall _ _)
237 | (SECall _ _, SENum _) | (SECall _ _, SEStr _)
238 | (SECall _ _, SEIdent _) | (SEIdent _, SENum _) | (SEIdent _, SEStr _)
239 | (SEIdent _, SECall _ _) | (SEIdent _, SEIdent _)
240 -> failwith "invalid argument types for TO_DATE"
242 | [ SEIdent i ] when ident_is_null i -> None
243 | _ -> failwith "invalid argument(s) for TO_DATE"
248 value dateval_of_sqlexpr se =
249 let te = gente "date" in
250 match se with
251 [ SEStr _ -> te "string"
252 | SEIdent i ->
253 if ident_is_null i
254 then None
255 else te "identifier"
256 | SENum _ -> te "number"
257 | SECall funname funargs ->
258 if (compare_ident funname "to_date") = 0
259 then
260 exec_todate funargs
261 else
262 te "unknown function call"
268 value lrpad dir padch sz s =
269 let slen = String.length s in
270 let () = assert (sz >= slen) in
271 if slen = sz
272 then s
273 else
274 let padding = (String.make (sz - slen) padch) in
275 if dir
276 then padding ^ s
277 else s ^ padding
281 value lpad = lrpad True
282 and rpad = lrpad False
286 value rec optnumval_of_sqlexpr se =
287 let te = gente "number"
289 match se with
290 [ SEStr str ->
291 if str = ""
292 then None
293 else Some (split_number str)
295 failwith
296 (Printf.sprintf
297 "optnumval_of_sqlexpr: ᤥ« âì â ª¨ ¯ àá¥à : string -> option (string * string) ¤«ï %S."
302 if .... ᤥ« âì â ª¨ ¯ àá¥à : string -> option (string * string)
303 then Some str
304 else failwith (Printf.sprintf
305 "type error: number expected, string %S got" str
308 | SEIdent i when ident_is_null i -> None
309 | SECall funname [funarg]
310 when (compare_ident funname "to_number") = 0
312 optnumval_of_sqlexpr funarg
313 | SECall _ _ ->
314 te "unknown function call"
315 | SEIdent _ -> te "identifier"
316 | SENum a b -> Some (a, b)
321 value xbase_format_value ty (field_size, field_decim) v =
322 match ty with
323 [ CT_Character ->
324 let sv = stringval_of_sqlexpr v in
325 let sv_len = String.length sv in
326 if sv_len > field_size
327 then failwith "string value does not fit column"
328 else
329 rpad ' ' field_size sv
330 | CT_Number ->
331 let osv = optnumval_of_sqlexpr v in
332 match osv with
333 [ None -> lpad ' ' field_size ""
334 | Some (n_int, n_frac) ->
335 if String.length n_frac > field_decim
336 then failwith (Printf.sprintf
337 "number %s has more than %i fractional digits"
338 (string_of_sqlexpr v) field_decim)
339 else
340 if field_decim = 0
341 then
342 lpad ' ' field_size n_int
343 else
344 (lpad ' ' (field_size - field_decim - 1) n_int)
345 ^ "."
346 ^ (rpad '0' field_decim n_frac)
348 | CT_Date ->
349 match (dateval_of_sqlexpr v) with
350 [ None -> lpad ' ' 8 ""
351 | Some v -> v
353 | CT_Logical | CT_Memo | CT_Float | CT_Binary | CT_General
354 | CT_Picture | CT_Currency | CT_Datetime | CT_Integer
355 | CT_Varifield | CT_VariantX | CT_Timestamp | CT_Double
356 | CT_Autoincrement | CT_NullFlags ->
357 failwith "field type not supported"
362 value exec_insert nam cols vals =
363 let (w, pcs, ord) =
364 try Hashtbl.find writers nam
365 with
366 [ Not_found -> failwith "table must be created in this 'session'"
369 let pcs_count = List.length pcs in
370 let () = assert ((List.length cols) = (List.length vals)) in
371 let () = Printf.printf "cols_count = %i\n" pcs_count in
372 let xvals = Array.make pcs_count None in
373 let setval colname v =
374 let i = Hashtbl.find ord colname in
375 let () = Printf.printf "setval ord: %i / %i\n" i (Array.length xvals) in
376 let () = assert (xvals.(i) = None) in
377 xvals.(i) := Some (v)
379 let () =
380 inner cols vals
381 where rec inner cols vals =
382 match (cols, vals) with
383 [ ([], []) -> ()
384 | ([], _) | (_, []) -> assert False
385 | ([colname::colstl], [v::valstl]) ->
386 let (ty, wid) = List.assoc colname pcs in
387 let fv =
388 try xbase_format_value ty wid v
389 with
390 [ e -> failwith (Printf.sprintf
391 "error formatting value for column %s: %s"
392 colname (Printexc.to_string e))
395 let () = setval colname fv in
396 inner colstl valstl
399 let xbase_vals =
400 Array.fold_right
401 (fun optv acc ->
402 match optv with
403 [ None ->
404 let () = print_string "insert: value for column not specified\n"
405 in [("") :: acc]
406 | Some v -> [v :: acc]
409 xvals
412 write_record w xbase_vals
416 value exec_cmd cmd =
417 let () = Printf.printf "%s\n===============\n%!" (string_of_sqlcmd cmd) in
418 match cmd with
419 [ Create_table nam lcs -> exec_createtable nam lcs
420 | Insert nam cols vals -> exec_insert nam cols vals
421 | Ignore_command -> ()
426 value cmdlist_of_file exec_cmd fn =
427 let lb = Lexing.from_channel (open_in_bin fn) in
428 Simplesqlparse.main Simplesqllex.sqltoken lb
430 value exec_cmdlist = List.iter
431 (fun cmd -> do
432 ; exec_cmd cmd
438 value () =
439 Simplesqlcommon.execute_sql_command_val.val := exec_cmd
443 value exec_file fn =
445 exec_cmdlist (cmdlist_of_file fn)
447 let lb = Lexing.from_channel
448 (if fn = "-"
449 then do { set_binary_mode_in stdin True; stdin }
450 else open_in_bin fn
451 ) in
452 while (not (Simplesqlparse.sqlcmdsep Simplesqllex.sqltoken lb))
453 do { () }
457 value close_writers () =
458 Hashtbl.iter (fun _ (w, _, _) -> close_writer w) writers
462 value import files = do
463 { List.iter exec_file files
464 ; close_writers ()
469 value escape_string s =
470 let b = Buffer.create (10 + String.length s)
471 in do
472 { Buffer.add_char b '\''
473 ; String.iter
474 ( fun c ->
475 match c with
476 [ '\'' -> Buffer.add_string b "\'\'"
477 | x -> Buffer.add_char b x
481 ; Buffer.add_char b '\''
482 ; Buffer.contents b
487 value numsizes cs = do
488 { assert (cs.col_type=CT_Number)
489 ; if cs.field_decim = 0
490 then
491 (cs.field_size, 0)
492 else
493 (cs.field_size - cs.field_decim - 1, cs.field_decim)
498 value escape_number cs =
499 let (bef, aft) = numsizes cs in
500 let escafter =
501 ", "
502 ^ escape_string
504 (String.make bef '9')
505 ^ (if aft = 0
506 then
508 else
509 "." ^ (String.make aft '9')
512 ^ ")"
514 fun n ->
515 if n = ""
516 then "to_number(NULL)"
517 else "to_number(" ^ (escape_string n) ^ escafter
521 value guess_century y = if y < "70" then "20" else "19";
523 value string_forall pred s =
524 let len = String.length s in
525 inner 0
526 where rec inner i =
527 (i = len) || ((pred s.[i]) && (inner (i+1)))
531 value alldigits = string_forall (fun c -> c>='0' && c<='9')
535 value escape_date d =
536 if d = ""
537 then
538 "to_date(NULL)"
539 else
540 let bdf reason =
541 failwith (Printf.sprintf "Bad data format (%s)" reason)
543 let canondate =
544 match (String.length d) with
545 [ 6 (* YYMMDD *) -> (guess_century (String.sub d 0 2)) ^ d
546 | 8 (* YYYYMMDD *) -> d
547 | x -> bdf (Printf.sprintf "length=%i" x)
550 if not (alldigits canondate)
551 then
552 bdf "date must consist only of digits"
553 else
554 "to_date(" ^ (escape_string canondate) ^ ", 'YYYYMMDD')"
558 value rtrim_hof f = fun x -> f (rtrim x);
560 value ltrim_hof f = fun x -> f (ltrim x);
562 value export_int32 str =
563 ( assert (String.length str = 4)
564 ; let byte i = Int32.shift_left (Int32.of_int (Char.code str.[i])) (i*8) in
565 let r =
566 Int32.add (byte 0) (
567 Int32.add (byte 1) (
568 Int32.add (byte 2) (
569 (byte 3) ))) in
570 Int32.to_string r
574 value mk_escape_func cs =
575 match cs.col_type with
576 [ CT_Character -> rtrim_hof escape_string
577 | CT_Number -> rtrim_hof (ltrim_hof (escape_number cs))
578 | CT_Date -> rtrim_hof (ltrim_hof escape_date)
579 | CT_Integer -> export_int32
581 | CT_Logical | CT_Memo | CT_Float
582 | CT_Binary | CT_General | CT_Picture | CT_Currency | CT_Datetime
583 | CT_Varifield | CT_VariantX | CT_Timestamp | CT_Double
584 | CT_Autoincrement | CT_NullFlags
585 -> failwith (Printf.sprintf
586 "only character, number and date field types are supported (field '%c')"
587 (char_of_coltype cs.col_type)
593 value is_alpha c =
594 (c >= 'A' && c <= 'Z')
595 || (c >= 'a' && c <= 'z')
599 value name_need_escape s =
600 (not (is_alpha s.[0]))
601 || (string_exists
602 (fun c ->
604 ( (is_alpha c)
605 || (c >= '0' && c <= '9')
606 || c = '.' || c = '_' || c = '$' || c = '#'
614 value escape_name s =
615 if name_need_escape s
616 then "\"" ^ s ^ "\""
617 else s
621 type skip_info = [= `Use | `Skip of string ]
624 value skip_mask colspecs : list skip_info =
625 (List.map
626 (fun cs ->
627 if cs.col_type = CT_Memo
628 then `Skip "MEMO"
629 else if cs.col_type = CT_NullFlags
630 then `Skip "_NullFlags"
631 else `Use
633 colspecs
638 value rec list_map3 f a b c =
639 match (a,b,c) with
640 [ ([],_,_) | (_,[],_) | (_,_,[]) -> []
641 | ([ah::at], [bh::bt], [ch::ct]) ->
642 [(f ah bh ch) :: (list_map3 f at bt ct)]
647 value export_iter_dbf_to_sql tblname colspecs =
648 let skip_mask = skip_mask colspecs in
649 let ins_pre_values =
650 Printf.sprintf "insert into %s (%s) values \n ("
651 (escape_name tblname)
652 (String.concat
653 ", "
654 (List.flatten
655 (List.map2
656 (fun cs sk ->
657 match sk with
658 [ `Skip _reason -> []
659 | `Use -> [ escape_name cs.col_name ]
662 colspecs
663 skip_mask
667 and ins_post_values = ");\n"
668 and escape_funcs =
669 List.map2
670 (fun cs sk ->
671 match sk with
672 [ `Skip _reason -> fun _ -> assert False
673 | `Use -> mk_escape_func cs
676 colspecs
677 skip_mask
679 fun fieldlist ->
681 { print_string ins_pre_values
682 ; print_string
683 (String.concat ", "
684 (List.flatten
685 (list_map3
686 (fun escf v sk ->
687 match sk with
688 [ `Skip _reason -> []
689 | `Use -> [ escf v ]
692 escape_funcs
693 fieldlist
694 skip_mask
698 ; print_string ins_post_values
703 value export_dbf_to_sql filename tblname =
705 { Printf.printf "/* export dbf %s into table %s */\n" filename tblname
706 ; let d = open_reader filename in
707 let d =
708 match get_reader_codepage d with
709 [ "866" | "865" ->
710 reader_with_decoder Cpcvt.default_converter.Cpcvt.importer d
711 | _ -> d
714 let cs = colspecs_of_reader d in
715 let iterfunc = export_iter_dbf_to_sql tblname cs in
716 try do
717 { Printf.printf "/* dbf codepage: %s */\n" (codepage_of_reader d)
718 ; while True do
719 { let r = read_record d
721 iterfunc r
724 with
725 [ End_of_file -> do
726 { close_reader d
727 ; print_string "commit;\n"
729 | x -> do
730 { close_reader d
731 ; raise x
738 value ddl_of_colspec cs =
739 (escape_name cs.col_name) ^ " " ^ type_ddl
740 where type_ddl =
741 match cs.col_type with
742 [ CT_Character -> Printf.sprintf "varchar2(%d)" cs.field_size
743 | CT_Number ->
744 let (bef, aft) = numsizes cs
746 Printf.sprintf "number(%d,%d)" bef aft
747 | CT_Date -> "date"
748 | CT_Integer -> "number(10)" (* 4 bytes *)
750 | CT_Logical | CT_Memo | CT_Float
751 | CT_Binary | CT_General | CT_Picture | CT_Currency | CT_Datetime
752 | CT_Varifield | CT_VariantX | CT_Timestamp | CT_Double
753 | CT_Autoincrement | CT_NullFlags
754 -> failwith "only character, number and date field types are supported"
759 value skip_fields colspecs =
760 List.flatten
761 (List.map2
762 (fun cs sk ->
763 match sk with
764 [ `Use -> [ cs ]
765 | `Skip reason ->
766 ( Printf.eprintf
767 "warning: skipping %s-column %S\n%!"
768 reason cs.col_name
769 ; []
773 colspecs
774 (skip_mask colspecs)
779 value extract_ddl filename tblname =
781 { Printf.printf "/* ddl for dbf %s (table name %s) */\n" filename tblname
782 ; let d = open_reader
783 ~decode:Cpcvt.default_converter.Cpcvt.importer
784 filename
785 in do
786 { Printf.printf "create table %s\n ( %s\n )\n/\ncomment on table %s is %s\n/\n"
787 tblname
788 (String.concat "\n , "
789 (List.map
790 ddl_of_colspec
791 (skip_fields (colspecs_of_reader d))
794 tblname
795 (escape_string (Printf.sprintf "Created by dbfwork from %s" filename))
801 value dbf_fix dbfname =
802 Mlxbase.dbf_fix dbfname
806 value main () =
807 match List.tl (Array.to_list Sys.argv) with
808 [ ["export"; dbfname; tblname] ->
809 export_dbf_to_sql dbfname tblname
810 | ["ddl"; dbfname; tblname] ->
811 extract_ddl dbfname tblname
812 | ["import" :: files] ->
813 import files
814 | ["fix"; dbfname] ->
815 dbf_fix dbfname
816 | _ ->
817 Printf.eprintf
818 "usage: %s export <dbffile.dbf> table_name > insert-script.sql\n\
819 \ or %s ddl <dbffile.dbf> table_name > ddl-script.sql\n\
820 \ or %s import <file.sql> [<file.sql> ...]\n\
821 \ or %s fix <dbffile.dbf>\n"
822 Sys.argv.(0) Sys.argv.(0) Sys.argv.(0) Sys.argv.(0)
826 main ();