dbfwork: 'to_number(x)' is treated as simply 'x'
[ocaml-dbf.git] / mlxbase.ml
blob0df3cd82002109fb08b8d759382b2c181f874a8c
1 (*
2 See:
3 http://www.clicketyclick.dk/databases/xbase/format/
4 http://www.dbase.com/KnowledgeBase/int/db7_file_fmt.htm
5 *)
7 external identity_string : string -> string = "%identity"
11 type col_type =
12 [ CT_Character | CT_Number | CT_Logical | CT_Date | CT_Memo | CT_Float
13 | CT_Binary | CT_General | CT_Picture | CT_Currency | CT_Datetime
14 | CT_Integer | CT_Varifield | CT_VariantX | CT_Timestamp | CT_Double
15 | CT_Autoincrement | CT_NullFlags
20 type col_spec =
21 { col_name : string
22 ; col_type : col_type
23 ; field_size : int
24 ; field_decim : int
29 type dbf_common =
30 { c_record_len : int
31 ; c_header_len : int
32 ; c_colspecs : list col_spec
33 ; c_codepage : string
37 type xbase_hint = [ IgnoreDeletedFlag ]
40 type dbf_reader =
41 { r_com : dbf_common
42 ; r_version : int
43 ; r_records : int
44 ; r_ch : in_channel
45 ; r_cpcvt : (string -> string)
46 ; r_ignore_deleted_flag : bool
51 value get_reader_codepage r = r.r_com.c_codepage
55 value reader_with_decoder dec r =
56 { (r) with r_cpcvt = dec }
60 exception Bad_format of string;
62 value badformat fmt = Printf.ksprintf (fun s -> raise (Bad_format s)) fmt
66 value coltypemaps =
67 [ ('C', CT_Character) ; ('N', CT_Number) ; ('L', CT_Logical)
68 ; ('D', CT_Date) ; ('M', CT_Memo) ; ('F', CT_Float) ; ('B', CT_Binary)
69 ; ('G', CT_General) ; ('P', CT_Picture) ; ('Y', CT_Currency)
70 ; ('T', CT_Datetime) ; ('I', CT_Integer) ; ('V', CT_Varifield)
71 ; ('X', CT_VariantX) ; ('@', CT_Timestamp) ; ('O', CT_Double)
72 ; ('+', CT_Autoincrement); ('0', CT_NullFlags)
78 value coltype_of_char c =
79 try
80 List.assoc c coltypemaps
81 with
82 [ Not_found -> badformat "column type '\\x%02x' not supported" (Char.code c)
87 value rec list_snd_assoc el lst =
88 match lst with
89 [ [] -> raise Not_found
90 | [(h1, h2) :: t] ->
91 if h2 = el
92 then
94 else
95 list_snd_assoc el t
100 value char_of_coltype ct =
101 list_snd_assoc ct coltypemaps
102 (* no exc handling. *)
106 value colspecs_of_reader d = d.r_com.c_colspecs
109 value codepage_of_reader d = d.r_com.c_codepage
113 value get_uint16 str ofs =
114 (Char.code str.[ofs]) + ((Char.code str.[ofs+1]) lsl 8)
118 value get_uint8 str ofs =
119 (Char.code str.[ofs])
123 value get_uint32 str ofs =
124 let lo = get_uint16 str ofs
125 and hi = get_uint16 str (ofs+2) in
126 if hi >= 0x4000
127 then failwith "get_uint32: current limitation, more than max_int"
128 else lo + (hi lsl 16)
133 value string_of_asciiz a =
135 let z = String.index a '\x00'
137 String.sub a 0 z
138 with
139 [ Not_found -> a ]
143 value cpcodemaps =
144 [ (0x01, "437")
145 ; (0x02, "850")
146 ; (0x03, "1252")
147 ; (0x64, "852")
148 ; (0x65, "865")
149 ; (0x66, "866")
150 ; (0x6A, "437G")
151 ; (0xC8, "1250")
152 ; (0xC9, "1251")
157 value cp_of_code co =
158 try (List.assoc co cpcodemaps)
159 with
160 [ Not_found -> Printf.sprintf "language_driver_0x%02x" co
165 value code_of_cp cp =
166 list_snd_assoc cp cpcodemaps
170 value read_hdr_of_ch decode hints ch =
171 let hdr = String.make 0x20 '\x00' in
172 let () = assert (0x20 = input ch hdr 0 0x20) in
173 let version = get_uint8 hdr 0x00
174 and headerlen = get_uint16 hdr 0x08
175 and recordlen = get_uint16 hdr 0x0A
176 and records = get_uint32 hdr 0x04
177 and cpage = cp_of_code (get_uint8 hdr 0x1D)
179 let rec read_col_spec ch =
180 let col = String.make 0x20 '\x00' in
181 let readbytes = input ch col 0 0x20
183 if readbytes = 0
184 then
185 raise (Bad_format "colspec read=0")
186 else
187 if col.[0] = '\x0D'
188 then
190 else
191 if readbytes <> 0x20
192 then
193 badformat "expected colspec of 0x20 bytes, got %u bytes" readbytes
194 else
195 let colname = string_of_asciiz (String.sub col 0 0x0B)
196 and coltype = coltype_of_char col.[0x0B]
197 and fieldsize = get_uint8 col 0x10
198 and decimals = get_uint8 col 0x11 in
199 let colrec =
200 { col_name = colname
201 ; col_type = coltype
202 ; field_size = fieldsize
203 ; field_decim = decimals
206 [colrec :: read_col_spec ch]
208 let cspecs = read_col_spec ch
211 { seek_in ch headerlen
213 { r_records = records
214 ; r_com =
215 { c_record_len = recordlen
216 ; c_header_len = headerlen
217 ; c_colspecs = cspecs
218 ; c_codepage = cpage
220 ; r_version = version
221 ; r_ch = ch
222 ; r_cpcvt = decode
223 ; r_ignore_deleted_flag = List.mem IgnoreDeletedFlag hints
229 value open_reader ?(decode=identity_string) ?(hints=[]) dbffname =
230 let ch = open_in_bin dbffname
233 read_hdr_of_ch decode hints ch
234 with
235 [ x -> do {close_in ch; raise x} ]
239 value close_reader d =
240 close_in d.r_ch
244 value read_record d =
245 let r = String.make d.r_com.c_record_len '\x00'
246 and parse_record =
247 let rec parse_record_inner r cs i =
248 (* îñòàâøèåñÿ colspecs -- cs, â ñòðîêå r, ñî ñìåùåíèÿ i. *)
249 match cs with
250 [ [] -> []
251 | [c :: cstl] ->
252 let sz = c.field_size
254 [ (String.sub r i sz)
255 :: parse_record_inner r cstl (i+sz)
259 fun r ->
260 parse_record_inner r d.r_com.c_colspecs 1
262 let rec read_record_inner () =
263 let read () =
264 let rec loop ofs =
265 let toread = d.r_com.c_record_len - ofs in
266 match input d.r_ch r ofs toread with
267 [ 0 -> ofs
268 | n -> if n = toread then ofs + n else loop (ofs + n)
271 loop 0
273 match (read (), d.r_version) with
274 [ (0, 2) -> badformat "No EOF mark"
275 | (0, _) -> raise End_of_file
276 | (n, _) when n = d.r_com.c_record_len ->
277 if d.r_version = 2 && r.[0] = '\x1A' then raise End_of_file else
278 match (d.r_ignore_deleted_flag, r.[0]) with
279 [ (True,_)
280 | (False,' ') -> parse_record (d.r_cpcvt r)
281 | (False,'*') -> read_record_inner ()
282 | _ -> badformat "bad deleted_flag '%c'" r.[0]
284 | (n, _) -> if r.[0] = '\x1A' then raise End_of_file else badformat "Expected EOF mark, got '%c'" r.[0]
287 read_record_inner ()
292 value iterate_dbf filename fu =
293 let d = open_reader dbf_testfname
295 let cs = d.r_com.c_colspecs
297 try do
299 while True do
301 let r = read_record d
303 fu cs r
306 with
307 [ End_of_file -> do { close_dbf d; () }
308 | x -> do { close_dbf d; raise x } ]
314 (**************** writing ***************)
318 type dbf_writer =
319 { w_com : dbf_common
320 ; w_records : mutable int
321 ; w_ch : out_channel
322 ; w_cpcvt : (string -> string)
327 value set_uint8 str ofs v =
328 let () = assert (v >= 0 && v <= 255) in
329 str.[ofs] := Char.chr v
332 value set_uint16 str ofs v = do
333 { assert (v >= 0 && v <= 0xFFFF)
334 ; set_uint8 str ofs (v land 0xFF)
335 ; set_uint8 str (ofs+1) (v lsr 8)
339 value set_uint32 str ofs v = do
340 { assert (v >= 0)
341 ; set_uint16 str ofs (v land 0xFFFF)
342 ; set_uint16 str (ofs+2) (v lsr 16)
348 value string_of_header dw =
349 let cp =
350 let cptxt = dw.w_com.c_codepage in
351 try code_of_cp cptxt
352 with
353 [ Not_found ->
354 invalid_arg (Printf.sprintf "unknown codepage %S" cptxt)
357 let h = String.make 0x20 '\x00' in
358 let set_uint8 = set_uint8 h
359 and set_uint16 = set_uint16 h
360 and set_uint32 = set_uint32 h
361 and t = Unix.gmtime (Unix.time ()) in
363 { set_uint8 0 3
364 ; set_uint8 1 (t.Unix.tm_year mod 100)
365 ; set_uint8 2 (1 + t.Unix.tm_mon)
366 ; set_uint8 3 t.Unix.tm_mday
367 ; set_uint32 4 dw.w_records
368 ; set_uint16 8 dw.w_com.c_header_len
369 ; set_uint16 10 dw.w_com.c_record_len
370 ; set_uint8 29 cp
376 value make_asciiz wid str =
377 let len = String.length str in
378 let () = assert (len <= wid - 1) in
379 let r = String.make wid '\x00' in
380 let () = String.blit str 0 r 0 len in
385 value string_of_char = String.make 1
388 value str0 len = String.make len '\x00'
391 value string_of_uint8 i =
392 let () = assert (i >= 0 && i <= 255) in
393 string_of_char (Char.chr i)
397 value output_field_descr ch fd =
398 let () = assert (fd.field_size <= 255)
399 and () =
400 if fd.col_type = CT_Number
401 then assert (fd.field_decim >= 0 && fd.field_decim <= 15
402 && (fd.field_decim = 0 || fd.field_decim < fd.field_size-1))
403 else assert (fd.field_decim = 0)
405 let d =
406 (let n = fd.col_name in
407 if String.length n < 11
408 then make_asciiz 11 n
409 else failwith (Printf.sprintf
410 "Field name must have length from 1 to 10 (%S)" n)
412 ^ (string_of_char (char_of_coltype fd.col_type))
413 ^ (str0 4)
414 ^ (string_of_uint8 fd.field_size)
415 ^ (string_of_uint8 fd.field_decim)
416 ^ (str0 2)
417 ^ (string_of_uint8 0x01) (* "work area id" *)
418 ^ (str0 11)
420 let () = assert (0x20 = String.length d) in
421 output_string ch d
425 value open_writer ?(encode=identity_string) filename colspecs codepage =
426 let com =
427 { c_record_len =
428 List.fold_left
429 ( + ) 1
430 (List.map (fun cs -> cs.field_size) colspecs)
431 ; c_header_len = 0x20 + (0x20 * (List.length colspecs)) + 1
432 ; c_colspecs = colspecs
433 ; c_codepage = codepage
434 } in
435 let ch = open_out_bin filename in
436 let w =
437 { w_com = com
438 ; w_records = 0
439 ; w_ch = ch
440 ; w_cpcvt = encode
442 in do
443 { output_string ch (String.make 0x20 '\xFF')
444 ; List.iter (output_field_descr ch) colspecs
445 ; output_char ch '\x0D'
446 ; flush ch
452 value rewrite_header dw =
453 let ch = dw.w_ch in do
454 { seek_out ch 0
455 ; output_string ch (string_of_header dw)
456 ; flush ch
461 value close_writer dw =
462 let ch = dw.w_ch in do
463 { ()
464 ; output_char ch '\x1A'
465 ; rewrite_header dw
466 ; flush ch
467 ; close_out ch
472 value write_record dw vals =
473 let b = String.make dw.w_com.c_record_len '\x00' in
474 let rec make_record ofs colspecs vals =
475 match (colspecs, vals) with
476 [ ([], []) -> ()
477 | ([], _) | (_, []) -> assert False
478 | ([cs :: cstail], [v :: valstail]) ->
479 let fsz = cs.field_size in do
480 { if fsz <> String.length v
481 then do
482 { print_string "field length must be equal to value length"
484 else ()
485 ; let v' = v ^ (String.make (fsz - String.length v) ' ')
487 ; Printf.printf "field value = %S\n" v'
489 ; String.blit v' 0 b ofs fsz
490 ; make_record (ofs + fsz) cstail valstail
493 in do
494 { b.[0] := '\x20'
495 ; make_record 1 dw.w_com.c_colspecs vals
496 ; assert
497 ( try do { let (_:int) = String.index b '\x00' in False } with [ Not_found -> True ]
500 ; output_string dw.w_ch (dw.w_cpcvt b)
501 ; dw.w_records := dw.w_records + 1
505 (***************** fix ******************)
507 open Unix
509 open Printf
513 value dbf_fix dbfname =
514 let filelen = (stat dbfname).st_size in
515 let r = open_reader
516 ~decode:(fun _ -> failwith "achtung! dbf_fix reads records!")
517 dbfname in
518 let h = r.r_com in
519 let real_records = (filelen - h.c_header_len) / h.c_record_len in
520 let last_ofs = h.c_header_len + real_records * h.c_record_len in
521 let last_size = last_ofs+1 in
523 let do_fix () = do
524 { close_reader r
525 ; let w = open_out_gen [ Open_wronly; Open_binary ] 0 dbfname
526 ; let rec_count_bin = String.make 4 '\x00' in do
527 { set_uint32 rec_count_bin 0 real_records
528 ; seek_out w 4
529 ; output_string w rec_count_bin
531 ; seek_out w last_ofs
532 ; output_char w '\x1A'
533 ; close_out w
534 ; try
535 truncate dbfname last_size
536 with
537 [ Invalid_argument (*arg when arg =*) "Unix.truncate not implemented"
539 printf "%s: truncate not implemented, skipping.\n" dbfname
544 if real_records <> r.r_records
545 then do
546 { printf "%s: records in header: %i, records in file: %i\n"
547 dbfname r.r_records real_records
548 ; do_fix ()
550 else
551 let () = printf "%s: records' count ok.\n" dbfname in
552 if last_size <> filelen
553 then do
554 { printf "%s: file length is %i, must be %i\n"
555 dbfname filelen last_size
556 ; do_fix ()
558 else do
559 { printf "%s: file length ok.\n" dbfname
560 ; seek_in r.r_ch last_ofs
561 ; let c = input_char r.r_ch
562 ; if c <> '\x1A'
563 then do
564 { printf "%s: file terminator is %C, must be %C\n" dbfname c '\x1A'
565 ; do_fix ()
567 else
568 let () = close_reader r