continue fix [#1], discovered by tests
[deriving.git] / lib / pickle.ml
blob147641f34c3bd38d60dec5ae88297a407423a842
1 (*pp deriving *)
3 (* Copyright Jeremy Yallop 2007.
4 This file is free software, distributed under the MIT license.
5 See the file COPYING for details.
6 *)
8 (*
9 Idea:
10 1. every object receives a serializable id.
11 2. an object is serialized using the ids of its subobjects
13 module Pickle =
14 struct
15 exception UnknownTag of int * string
16 exception UnpicklingError of string
18 module Id :
19 sig
20 type t deriving (Show, Dump, Eq)
21 val initial : t
22 val compare : t -> t -> int
23 val next : t -> t
24 end =
25 struct
26 type t = int deriving (Show, Dump, Eq)
27 let initial = 0
28 let compare = compare
29 let next = succ
30 end
31 module IdMap = Map.Make (Id)
32 type id = Id.t deriving (Show, Dump)
34 module Repr : sig
35 (* Break abstraction for the sake of efficiency for now *)
36 type t = Bytes of string | CApp of (int option * Id.t list) deriving (Dump, Show)
37 val of_string : string -> t
38 val to_string : t -> string
39 val make : ?constructor:int -> id list -> t
40 val unpack_ctor : t -> int option * id list
41 end =
42 struct
43 type t = Bytes of string | CApp of (int option * Id.t list) deriving (Dump, Show)
44 let of_string s = Bytes s
45 let to_string = function
46 | Bytes s -> s
47 | _ -> invalid_arg "string_of_repr"
48 let make ?constructor ids =
49 match constructor with
50 | Some n -> CApp (Some n, ids)
51 | None -> CApp (None, ids)
52 let unpack_ctor = function
53 | CApp arg -> arg
54 | _ -> raise (UnpicklingError "Error unpickling constructor")
55 end
56 type repr = Repr.t
58 module Write : sig
59 type s = {
60 nextid : Id.t;
61 obj2id : Id.t Dynmap.DynMap.t;
62 id2rep : repr IdMap.t;
64 val initial_output_state : s
65 include Monad.Monad_state_type with type state = s
67 module Utils (T : Typeable.Typeable) (E : Eq.Eq with type a = T.a) : sig
68 val allocate : T.a -> (id -> unit m) -> id m
69 val store_repr : id -> Repr.t -> unit m
70 end
71 end =
72 struct
73 type s = {
74 nextid : Id.t; (* the next id to be allocated *)
75 obj2id : Id.t Dynmap.DynMap.t; (* map from typerep to id cache for the corresponding type *)
76 id2rep : repr IdMap.t;
78 let initial_output_state = {
79 nextid = Id.initial;
80 obj2id = Dynmap.DynMap.empty;
81 id2rep = IdMap.empty;
83 include Monad.Monad_state (struct type state = s end)
84 module Utils (T : Typeable.Typeable) (E : Eq.Eq with type a = T.a) =
85 struct
86 module C = Dynmap.Comp(T)(E)
87 let comparator = C.eq
89 let allocate o f =
90 let obj = T.make_dynamic o in
91 get >>= fun ({nextid=nextid;obj2id=obj2id} as t) ->
92 match Dynmap.DynMap.find obj obj2id with
93 | Some id -> return id
94 | None ->
95 let id, nextid = nextid, Id.next nextid in
96 put {t with
97 obj2id=Dynmap.DynMap.add obj id comparator obj2id;
98 nextid=nextid} >>
99 f id >> return id
101 let store_repr id repr =
102 get >>= fun state ->
103 put {state with id2rep = IdMap.add id repr state.id2rep}
107 module Read : sig
108 type s = (repr * (Typeable.dynamic option)) IdMap.t
109 include Monad.Monad_state_type with type state = s
110 val find_by_id : id -> (Repr.t * Typeable.dynamic option) m
111 module Utils (T : Typeable.Typeable) : sig
112 val sum : (int * id list -> T.a m) -> id -> T.a m
113 val tuple : (id list -> T.a m) -> id -> T.a m
114 val record : (T.a -> id list -> T.a m) -> int -> id -> T.a m
115 val update_map : id -> (T.a -> unit m)
117 end =
118 struct
119 type s = (repr * (Typeable.dynamic option)) IdMap.t
120 include Monad.Monad_state (struct type state = s end)
122 let find_by_id id =
123 get >>= fun state ->
124 return (IdMap.find id state)
126 module Utils (T : Typeable.Typeable) = struct
127 let decode_repr_ctor c = match Repr.unpack_ctor c with
128 | (Some c, ids) -> (c, ids)
129 | _ -> invalid_arg "decode_repr_ctor"
131 let decode_repr_noctor c = match Repr.unpack_ctor c with
132 | (None, ids) -> ids
133 | _ -> invalid_arg "decode_repr_ctor"
135 let update_map id obj =
136 let dynamic = T.make_dynamic obj in
137 get >>= fun state ->
138 match IdMap.find id state with
139 | (repr, None) ->
140 put (IdMap.add id (repr, Some dynamic) state)
141 | (_, Some _) ->
142 return ()
143 (* Checking for id already present causes unpickling to fail
144 when there is circularity involving immutable values (even
145 if the recursion wholly depends on mutability).
147 For example, consider the code
149 type t = A | B of t ref deriving (Typeable, Eq, Pickle)
150 let s = ref A in
151 let r = B s in
152 s := r;
153 let pickled = Pickle_t.pickleS r in
154 Pickle_t.unpickleS r
156 which results in the value
157 B {contents = B {contents = B { ... }}}
159 During deserialization the following steps occur:
160 1. lookup "B {...}" in the dictionary (not there)
161 2. unpickle the contents of B:
162 3. lookup the contents in the dictionary (not there)
163 4. create a blank reference, insert it into the dictionary
164 5. unpickle the contents of the reference:
165 6. lookup ("B {...}") in the dictionary (not there)
166 7. unpickle the contents of B:
167 8. lookup the contents in the dictionary (there)
168 9. insert "B{...}" into the dictionary.
169 10. insert "B{...}" into the dictionary.
173 let whizzy f id decode =
174 find_by_id id >>= fun (repr, dynopt) ->
175 match dynopt with
176 | None ->
177 f (decode repr) >>= fun obj ->
178 update_map id obj >>
179 return obj
180 | Some obj -> return (T.throwing_cast obj)
182 let sum f id = whizzy f id decode_repr_ctor
183 let tuple f id = whizzy f id decode_repr_noctor
184 let record_tag = 0
185 let record f size id =
186 find_by_id id >>= fun (repr, obj) ->
187 match obj with
188 | None ->
189 let this = Obj.magic (Obj.new_block record_tag size) in
190 update_map id this >>
191 f this (decode_repr_noctor repr) >>
192 return this
193 | Some obj -> return (T.throwing_cast obj)
200 module type Pickle =
202 type a
203 module T : Typeable.Typeable with type a = a
204 module E : Eq.Eq with type a = a
205 val pickle : a -> id Write.m
206 val unpickle : id -> a Read.m
207 val to_buffer : Buffer.t -> a -> unit
208 val to_string : a -> string
209 val to_channel : out_channel -> a -> unit
210 val from_stream : char Stream.t -> a
211 val from_string : string -> a
212 val from_channel : in_channel -> a
215 module Defaults
216 (S : sig
217 type a
218 module T : Typeable.Typeable with type a = a
219 module E : Eq.Eq with type a = a
220 val pickle : a -> id Write.m
221 val unpickle : id -> a Read.m
222 end) : Pickle with type a = S.a =
223 struct
224 include S
226 type ids = (Id.t * Repr.t) list
227 deriving (Dump, Show)
229 type dumpable = id * ids
230 deriving (Show, Dump)
232 type ('a,'b) pair = 'a * 'b deriving (Dump)
233 type capp = int option * Id.t list deriving (Dump)
235 (* We don't serialize ids of each object at all: we just use the
236 ordering in the output file to implicitly record the ids of
237 objects.
239 Also, we don't serialize the repr constructors. All values with
240 a particular constructor are grouped in a single list.
242 This can (and should) all be written much more efficiently.
244 type discriminated =
245 (Id.t * string) list
246 * (Id.t * (int * Id.t list)) list
247 * (Id.t * (Id.t list)) list
248 deriving (Dump, Show)
250 type discriminated_ordered =
251 string list
252 * (int * Id.t list) list
253 * (Id.t list) list
254 deriving (Dump, Show)
256 let reorder : Id.t * discriminated -> Id.t * discriminated_ordered =
257 fun (root,(a,b,c)) ->
258 let collect_ids items (map,counter) =
259 List.fold_left
260 (fun (map,counter) (id,_) ->
261 IdMap.add id counter map, Id.next counter)
262 (map,counter) items in
264 let map, _ =
265 collect_ids c
266 (collect_ids b
267 (collect_ids a
268 (IdMap.empty, Id.initial))) in
269 let lookup id = IdMap.find id map in
270 (lookup root,
271 (List.map snd a,
272 List.map (fun (_,(c,l)) -> c, List.map lookup l) b,
273 List.map (fun (_,l) -> List.map lookup l) c))
275 let unorder : Id.t * discriminated_ordered -> Id.t * discriminated
276 = fun (root,(a,b,c)) ->
277 let number_sequentially id items =
278 List.fold_left
279 (fun (id,items) item ->
280 (Id.next id, (id,item)::items))
281 (id,[]) items in
282 let id = Id.initial in
283 let id, a = number_sequentially id a in
284 let id, b = number_sequentially id b in
285 let _, c = number_sequentially id c in
286 (root, (a,b,c))
288 type ('a,'b) either = Left of 'a | Right of 'b
289 let either_partition (f : 'a -> ('b, 'c) either) (l : 'a list)
290 : 'b list * 'c list =
291 let rec aux (lefts, rights) = function
292 | [] -> (List.rev lefts, List.rev rights)
293 | x::xs ->
294 match f x with
295 | Left l -> aux (l :: lefts, rights) xs
296 | Right r -> aux (lefts, r :: rights) xs
297 in aux ([], []) l
299 type discriminated_dumpable = Id.t * discriminated deriving (Dump)
301 let discriminate : (Id.t * Repr.t) list -> discriminated
302 = fun input ->
303 let bytes, others =
304 either_partition
305 (function
306 | id, (Repr.Bytes s) -> Left (id,s)
307 | id, (Repr.CApp c) -> Right (id,c))
308 input in
309 let ctors, no_ctors =
310 either_partition
311 (function
312 | id, (Some c, ps) -> Left (id, (c,ps))
313 | id, (None, ps) -> Right (id,ps))
314 others in
315 (bytes, ctors, no_ctors)
317 let undiscriminate : discriminated -> (Id.t * Repr.t) list
318 = fun (a,b,c) ->
319 List.map (fun (id,s) -> (id,Repr.Bytes s)) a
320 @ List.map (fun (id,(c,ps)) -> (id,Repr.CApp (Some c,ps))) b
321 @ List.map (fun (id,(ps)) -> (id,Repr.CApp (None,ps))) c
323 type do_pair = Id.t * discriminated_ordered
324 deriving (Show, Dump)
326 let write_discriminated f
327 = fun (root,map) ->
328 let dmap = discriminate map in
329 let rmap = reorder (root,dmap) in
330 f rmap
332 let read_discriminated (f : 'b -> 'a) : 'b -> Id.t * (Id.t * Repr.t) list
333 = fun s ->
334 let rmap = f s in
335 let (root,dmap) = unorder rmap in
336 (root, undiscriminate dmap)
338 open Write
340 let decode_pickled_string (f : 'a -> Id.t * discriminated_ordered) : 'b -> Id.t * Read.s =
341 fun s ->
342 let (id, state : dumpable) =
343 read_discriminated f s
345 id, (List.fold_right
346 (fun (id,repr) map -> IdMap.add id (repr,None) map)
347 state
348 IdMap.empty)
350 let encode_pickled_string f =
351 fun (id,state) ->
352 let input_state =
353 id, IdMap.fold (fun id repr output -> (id,repr)::output)
354 state.id2rep [] in
355 write_discriminated f input_state
357 let doPickle f v : 'a =
358 let id, state = runState (S.pickle v) initial_output_state in
359 encode_pickled_string f (id, state)
361 let doUnpickle f input =
362 let id, initial_input_state = decode_pickled_string f input in
363 let value, _ = Read.runState (S.unpickle id) initial_input_state in
364 value
366 let from_channel = doUnpickle Dump.from_channel<do_pair>
367 let from_string = doUnpickle Dump.from_string<do_pair>
368 let from_stream = doUnpickle Dump.from_stream<do_pair>
369 let to_channel channel = doPickle (Dump.to_channel<do_pair> channel)
370 let to_buffer buffer = doPickle (Dump.to_buffer<do_pair> buffer)
371 let to_string = doPickle Dump.to_string<do_pair>
374 module Pickle_from_dump
375 (P : Dump.Dump)
376 (E : Eq.Eq with type a = P.a)
377 (T : Typeable.Typeable with type a = P.a)
378 : Pickle with type a = P.a
379 and type a = T.a = Defaults
380 (struct
381 type a = T.a
382 module T = T
383 module E = E
384 module Comp = Dynmap.Comp(T)(E)
385 open Write
386 module W = Utils(T)(E)
387 let pickle obj =
388 W.allocate obj
389 (fun id -> W.store_repr id (Repr.of_string (P.to_string obj)))
390 open Read
391 module U = Utils(T)
392 let unpickle id =
393 find_by_id id >>= fun (repr, dynopt) ->
394 match dynopt with
395 | None ->
396 let obj : a = P.from_string (Repr.to_string repr) in
397 U.update_map id obj >>
398 return obj
399 | Some obj -> return (T.throwing_cast obj)
400 end)
402 module Pickle_unit : Pickle with type a = unit = Pickle_from_dump(Dump.Dump_unit)(Eq.Eq_unit)(Typeable.Typeable_unit)
403 module Pickle_bool = Pickle_from_dump(Dump.Dump_bool)(Eq.Eq_bool)(Typeable.Typeable_bool)
404 module Pickle_int = Pickle_from_dump(Dump.Dump_int)(Eq.Eq_int)(Typeable.Typeable_int)
405 module Pickle_char = Pickle_from_dump(Dump.Dump_char)(Eq.Eq_char)(Typeable.Typeable_char)
406 module Pickle_float = Pickle_from_dump(Dump.Dump_float)(Eq.Eq_float)(Typeable.Typeable_float)
407 module Pickle_num = Pickle_from_dump(Dump.Dump_num)(Eq.Eq_num)(Typeable.Typeable_num)
408 module Pickle_string = Pickle_from_dump(Dump.Dump_string)(Eq.Eq_string)(Typeable.Typeable_string)
410 module Pickle_option (V0 : Pickle) : Pickle with type a = V0.a option = Defaults(
411 struct
412 module T = Typeable.Typeable_option (V0.T)
413 module E = Eq.Eq_option (V0.E)
414 module Comp = Dynmap.Comp (T) (E)
415 open Write
416 type a = V0.a option
417 let rec pickle =
418 let module W = Utils(T)(E) in
419 function
420 None as obj ->
421 W.allocate obj
422 (fun id -> W.store_repr id (Repr.make ~constructor:0 []))
423 | Some v0 as obj ->
424 W.allocate obj
425 (fun thisid ->
426 V0.pickle v0 >>= fun id0 ->
427 W.store_repr thisid (Repr.make ~constructor:1 [id0]))
428 open Read
429 let unpickle =
430 let module W = Utils(T) in
431 let f = function
432 | 0, [] -> return None
433 | 1, [id] -> V0.unpickle id >>= fun obj -> return (Some obj)
434 | n, _ -> raise (UnpicklingError
435 ("Unexpected tag encountered unpickling "
436 ^"option : " ^ string_of_int n)) in
437 W.sum f
438 end)
441 module Pickle_list (V0 : Pickle)
442 : Pickle with type a = V0.a list = Defaults (
443 struct
444 module T = Typeable.Typeable_list (V0.T)
445 module E = Eq.Eq_list (V0.E)
446 module Comp = Dynmap.Comp (T) (E)
447 type a = V0.a list
448 open Write
449 module U = Utils(T)(E)
450 let rec pickle = function
451 [] as obj ->
452 U.allocate obj
453 (fun this -> U.store_repr this (Repr.make ~constructor:0 []))
454 | (v0::v1) as obj ->
455 U.allocate obj
456 (fun this -> V0.pickle v0 >>= fun id0 ->
457 pickle v1 >>= fun id1 ->
458 U.store_repr this (Repr.make ~constructor:1 [id0; id1]))
459 open Read
460 module W = Utils (T)
461 let rec unpickle id =
462 let f = function
463 | 0, [] -> return []
464 | 1, [car;cdr] ->
465 V0.unpickle car >>= fun car ->
466 unpickle cdr >>= fun cdr ->
467 return (car :: cdr)
468 | n, _ -> raise (UnpicklingError
469 ("Unexpected tag encountered unpickling "
470 ^"option : " ^ string_of_int n)) in
471 W.sum f id
472 end)
474 include Pickle
476 type 'a ref = 'a Pervasives.ref = { mutable contents : 'a }
477 deriving (Pickle)
479 (* Idea: keep pointers to values that we've serialized in a global
480 weak hash table so that we can share structure with them if we
481 deserialize any equal values in the same process *)
483 (* Idea: serialize small objects (bools, chars) in place rather than
484 using the extra level of indirection (and space) introduced by ids
487 (* Idea: bitwise output instead of bytewise. Probably a bit much to
488 implement now, but should have a significant impact (e.g. one using
489 bit instead of one byte for two-constructor sums) *)
491 (* Should we use a different representation for lists? i.e. write out
492 the length followed by the elements? we could no longer claim
493 sharing maximization, but it would actually be more efficient in
494 most cases.