3 (* Copyright Jeremy Yallop 2007.
4 This file is free software, distributed under the MIT license.
5 See the file COPYING for details.
10 1. every object receives a serializable id.
11 2. an object is serialized using the ids of its subobjects
15 exception UnknownTag
of int * string
16 exception UnpicklingError
of string
20 type t deriving
(Show
, Dump
, Eq
)
22 val compare
: t
-> t
-> int
26 type t
= int deriving
(Show
, Dump
, Eq
)
31 module IdMap
= Map.Make
(Id
)
32 type id
= Id.t deriving
(Show
, Dump
)
35 (* Break abstraction for the sake of efficiency for now *)
36 type t
= Bytes
of string | CApp
of (int option * Id.t list
) deriving
(Dump
, Show
)
37 val of_string
: string -> t
38 val to_string
: t
-> string
39 val make
: ?constructor
:int -> id list
-> t
40 val unpack_ctor
: t
-> int option * id list
43 type t
= Bytes
of string | CApp
of (int option * Id.t list
) deriving
(Dump
, Show
)
44 let of_string s
= Bytes s
45 let to_string = function
47 | _
-> invalid_arg
"string_of_repr"
48 let make ?constructor ids
=
49 match constructor
with
50 | Some n
-> CApp
(Some n
, ids
)
51 | None
-> CApp
(None
, ids
)
52 let unpack_ctor = function
54 | _
-> raise
(UnpicklingError
"Error unpickling constructor")
61 obj2id
: Id.t
Dynmap.DynMap.t
;
62 id2rep
: repr
IdMap.t
;
64 val initial_output_state
: s
65 include Monad.Monad_state_type
with type state
= s
67 module Utils
(T
: Typeable.Typeable
) (E
: Eq.Eq
with type a
= T.a
) : sig
68 val allocate
: T.a
-> (id
-> unit m
) -> id m
69 val store_repr
: id
-> Repr.t
-> unit m
74 nextid
: Id.t
; (* the next id to be allocated *)
75 obj2id
: Id.t
Dynmap.DynMap.t
; (* map from typerep to id cache for the corresponding type *)
76 id2rep
: repr
IdMap.t
;
78 let initial_output_state = {
80 obj2id
= Dynmap.DynMap.empty
;
83 include Monad.Monad_state
(struct type state
= s
end)
84 module Utils
(T
: Typeable.Typeable
) (E
: Eq.Eq
with type a
= T.a
) =
86 module C
= Dynmap.Comp
(T
)(E
)
90 let obj = T.make_dynamic o
in
91 get
>>= fun ({nextid
=nextid
;obj2id
=obj2id
} as t
) ->
92 match Dynmap.DynMap.find
obj obj2id
with
93 | Some id
-> return id
95 let id, nextid
= nextid
, Id.next nextid
in
97 obj2id
=Dynmap.DynMap.add
obj id comparator obj2id
;
101 let store_repr id repr
=
103 put
{state
with id2rep
= IdMap.add
id repr state
.id2rep
}
108 type s
= (repr
* (Typeable.dynamic
option)) IdMap.t
109 include Monad.Monad_state_type
with type state
= s
110 val find_by_id
: id -> (Repr.t
* Typeable.dynamic
option) m
111 module Utils
(T
: Typeable.Typeable
) : sig
112 val sum
: (int * id list
-> T.a m
) -> id -> T.a m
113 val tuple
: (id list
-> T.a m
) -> id -> T.a m
114 val record
: (T.a
-> id list
-> T.a m
) -> int -> id -> T.a m
115 val update_map
: id -> (T.a
-> unit m
)
119 type s
= (repr
* (Typeable.dynamic
option)) IdMap.t
120 include Monad.Monad_state
(struct type state
= s
end)
124 return
(IdMap.find
id state
)
126 module Utils
(T
: Typeable.Typeable
) = struct
127 let decode_repr_ctor c
= match Repr.unpack_ctor c
with
128 | (Some c
, ids
) -> (c
, ids
)
129 | _
-> invalid_arg
"decode_repr_ctor"
131 let decode_repr_noctor c
= match Repr.unpack_ctor c
with
133 | _
-> invalid_arg
"decode_repr_ctor"
135 let update_map id obj =
136 let dynamic = T.make_dynamic
obj in
138 match IdMap.find
id state
with
140 put
(IdMap.add
id (repr
, Some
dynamic) state
)
143 (* Checking for id already present causes unpickling to fail
144 when there is circularity involving immutable values (even
145 if the recursion wholly depends on mutability).
147 For example, consider the code
149 type t = A | B of t ref deriving (Typeable, Eq, Pickle)
153 let pickled = Pickle_t.pickleS r in
156 which results in the value
157 B {contents = B {contents = B { ... }}}
159 During deserialization the following steps occur:
160 1. lookup "B {...}" in the dictionary (not there)
161 2. unpickle the contents of B:
162 3. lookup the contents in the dictionary (not there)
163 4. create a blank reference, insert it into the dictionary
164 5. unpickle the contents of the reference:
165 6. lookup ("B {...}") in the dictionary (not there)
166 7. unpickle the contents of B:
167 8. lookup the contents in the dictionary (there)
168 9. insert "B{...}" into the dictionary.
169 10. insert "B{...}" into the dictionary.
173 let whizzy f
id decode
=
174 find_by_id id >>= fun (repr
, dynopt
) ->
177 f
(decode repr
) >>= fun obj ->
180 | Some
obj -> return
(T.throwing_cast
obj)
182 let sum f
id = whizzy f
id decode_repr_ctor
183 let tuple f
id = whizzy f
id decode_repr_noctor
185 let record f size
id =
186 find_by_id id >>= fun (repr
, obj) ->
189 let this = Obj.magic
(Obj.new_block
record_tag size
) in
190 update_map id this >>
191 f
this (decode_repr_noctor repr
) >>
193 | Some
obj -> return
(T.throwing_cast
obj)
203 module T
: Typeable.Typeable
with type a
= a
204 module E
: Eq.Eq
with type a
= a
205 val pickle
: a
-> id Write.m
206 val unpickle
: id -> a
Read.m
207 val to_buffer
: Buffer.t
-> a
-> unit
208 val to_string : a
-> string
209 val to_channel
: out_channel
-> a
-> unit
210 val from_stream
: char
Stream.t
-> a
211 val from_string
: string -> a
212 val from_channel
: in_channel
-> a
218 module T
: Typeable.Typeable
with type a
= a
219 module E
: Eq.Eq
with type a
= a
220 val pickle
: a
-> id Write.m
221 val unpickle
: id -> a
Read.m
222 end) : Pickle
with type a
= S.a
=
226 type ids
= (Id.t
* Repr.t
) list
227 deriving
(Dump
, Show
)
229 type dumpable
= id * ids
230 deriving
(Show
, Dump
)
232 type ('a
,'b
) pair
= 'a
* 'b deriving
(Dump
)
233 type capp
= int option * Id.t list deriving
(Dump
)
235 (* We don't serialize ids of each object at all: we just use the
236 ordering in the output file to implicitly record the ids of
239 Also, we don't serialize the repr constructors. All values with
240 a particular constructor are grouped in a single list.
242 This can (and should) all be written much more efficiently.
246 * (Id.t
* (int * Id.t list
)) list
247 * (Id.t
* (Id.t list
)) list
248 deriving
(Dump
, Show
)
250 type discriminated_ordered
=
252 * (int * Id.t list
) list
254 deriving
(Dump
, Show
)
256 let reorder : Id.t
* discriminated
-> Id.t
* discriminated_ordered
=
257 fun (root
,(a
,b
,c
)) ->
258 let collect_ids items
(map
,counter
) =
260 (fun (map
,counter
) (id,_
) ->
261 IdMap.add
id counter map
, Id.next counter
)
262 (map
,counter
) items
in
268 (IdMap.empty
, Id.initial))) in
269 let lookup id = IdMap.find
id map in
272 List.map (fun (_
,(c
,l
)) -> c
, List.map lookup l
) b
,
273 List.map (fun (_
,l
) -> List.map lookup l
) c
))
275 let unorder : Id.t
* discriminated_ordered
-> Id.t
* discriminated
276 = fun (root
,(a
,b
,c
)) ->
277 let number_sequentially id items
=
279 (fun (id,items
) item
->
280 (Id.next id, (id,item
)::items
))
282 let id = Id.initial in
283 let id, a
= number_sequentially id a
in
284 let id, b
= number_sequentially id b
in
285 let _, c
= number_sequentially id c
in
288 type ('a
,'b
) either
= Left
of 'a
| Right
of 'b
289 let either_partition (f
: 'a
-> ('b
, 'c
) either
) (l
: 'a list
)
290 : 'b list
* 'c list
=
291 let rec aux (lefts
, rights
) = function
292 | [] -> (List.rev lefts
, List.rev rights
)
295 | Left l
-> aux (l
:: lefts
, rights
) xs
296 | Right
r -> aux (lefts
, r :: rights
) xs
299 type discriminated_dumpable
= Id.t
* discriminated deriving
(Dump
)
301 let discriminate : (Id.t
* Repr.t
) list
-> discriminated
306 | id, (Repr.Bytes
s) -> Left
(id,s)
307 | id, (Repr.CApp c
) -> Right
(id,c
))
309 let ctors, no_ctors
=
312 | id, (Some c
, ps
) -> Left
(id, (c
,ps
))
313 | id, (None
, ps
) -> Right
(id,ps
))
315 (bytes, ctors, no_ctors
)
317 let undiscriminate : discriminated
-> (Id.t
* Repr.t
) list
319 List.map (fun (id,s) -> (id,Repr.Bytes
s)) a
320 @ List.map (fun (id,(c
,ps
)) -> (id,Repr.CApp
(Some c
,ps
))) b
321 @ List.map (fun (id,(ps
)) -> (id,Repr.CApp
(None
,ps
))) c
323 type do_pair
= Id.t
* discriminated_ordered
324 deriving
(Show
, Dump
)
326 let write_discriminated f
328 let dmap = discriminate map in
329 let rmap = reorder (root
,dmap) in
332 let read_discriminated (f
: 'b
-> 'a
) : 'b
-> Id.t
* (Id.t
* Repr.t
) list
335 let (root
,dmap) = unorder rmap in
336 (root
, undiscriminate dmap)
340 let decode_pickled_string (f
: 'a
-> Id.t
* discriminated_ordered
) : 'b
-> Id.t
* Read.s =
342 let (id, state
: dumpable
) =
343 read_discriminated f
s
346 (fun (id,repr
) map -> IdMap.add
id (repr
,None
) map)
350 let encode_pickled_string f
=
353 id, IdMap.fold
(fun id repr output
-> (id,repr
)::output
)
355 write_discriminated f
input_state
357 let doPickle f v
: 'a
=
358 let id, state
= runState
(S.pickle v
) initial_output_state in
359 encode_pickled_string f
(id, state
)
361 let doUnpickle f input
=
362 let id, initial_input_state
= decode_pickled_string f input
in
363 let value, _ = Read.runState
(S.unpickle
id) initial_input_state
in
366 let from_channel = doUnpickle Dump.from_channel<do_pair
>
367 let from_string = doUnpickle Dump.from_string<do_pair
>
368 let from_stream = doUnpickle Dump.from_stream<do_pair
>
369 let to_channel channel
= doPickle (Dump.to_channel<do_pair
> channel
)
370 let to_buffer buffer
= doPickle (Dump.to_buffer<do_pair
> buffer
)
371 let to_string = doPickle Dump.to_string<do_pair
>
374 module Pickle_from_dump
376 (E
: Eq.Eq
with type a
= P.a
)
377 (T
: Typeable.Typeable
with type a
= P.a
)
378 : Pickle
with type a
= P.a
379 and type a
= T.a
= Defaults
384 module Comp
= Dynmap.Comp
(T
)(E
)
386 module W
= Utils
(T
)(E
)
389 (fun id -> W.store_repr id (Repr.of_string (P.to_string obj)))
393 find_by_id id >>= fun (repr
, dynopt
) ->
396 let obj : a
= P.from_string (Repr.to_string repr
) in
397 U.update_map id obj >>
399 | Some
obj -> return
(T.throwing_cast
obj)
402 module Pickle_unit
: Pickle
with type a
= unit = Pickle_from_dump
(Dump.Dump_unit
)(Eq.Eq_unit
)(Typeable.Typeable_unit
)
403 module Pickle_bool
= Pickle_from_dump
(Dump.Dump_bool
)(Eq.Eq_bool
)(Typeable.Typeable_bool
)
404 module Pickle_int
= Pickle_from_dump
(Dump.Dump_int
)(Eq.Eq_int
)(Typeable.Typeable_int
)
405 module Pickle_char
= Pickle_from_dump
(Dump.Dump_char
)(Eq.Eq_char
)(Typeable.Typeable_char
)
406 module Pickle_float
= Pickle_from_dump
(Dump.Dump_float
)(Eq.Eq_float
)(Typeable.Typeable_float
)
407 module Pickle_num
= Pickle_from_dump
(Dump.Dump_num
)(Eq.Eq_num
)(Typeable.Typeable_num
)
408 module Pickle_string
= Pickle_from_dump
(Dump.Dump_string
)(Eq.Eq_string
)(Typeable.Typeable_string
)
410 module Pickle_option
(V0
: Pickle
) : Pickle
with type a
= V0.a
option = Defaults
(
412 module T
= Typeable.Typeable_option
(V0.T
)
413 module E
= Eq.Eq_option
(V0.E
)
414 module Comp
= Dynmap.Comp
(T
) (E
)
418 let module W
= Utils
(T
)(E
) in
422 (fun id -> W.store_repr id (Repr.make ~constructor
:0 []))
426 V0.pickle v0
>>= fun id0
->
427 W.store_repr thisid
(Repr.make ~constructor
:1 [id0
]))
430 let module W
= Utils
(T
) in
432 | 0, [] -> return None
433 | 1, [id] -> V0.unpickle id >>= fun obj -> return
(Some
obj)
434 | n
, _ -> raise
(UnpicklingError
435 ("Unexpected tag encountered unpickling "
436 ^
"option : " ^ string_of_int n
)) in
441 module Pickle_list
(V0
: Pickle
)
442 : Pickle
with type a
= V0.a list
= Defaults
(
444 module T
= Typeable.Typeable_list
(V0.T
)
445 module E
= Eq.Eq_list
(V0.E
)
446 module Comp
= Dynmap.Comp
(T
) (E
)
449 module U
= Utils
(T
)(E
)
450 let rec pickle = function
453 (fun this -> U.store_repr this (Repr.make ~constructor
:0 []))
456 (fun this -> V0.pickle v0
>>= fun id0
->
457 pickle v1
>>= fun id1
->
458 U.store_repr this (Repr.make ~constructor
:1 [id0
; id1
]))
461 let rec unpickle id =
465 V0.unpickle car
>>= fun car
->
466 unpickle cdr
>>= fun cdr
->
468 | n
, _ -> raise
(UnpicklingError
469 ("Unexpected tag encountered unpickling "
470 ^
"option : " ^ string_of_int n
)) in
476 type 'a
ref = 'a
Pervasives.ref = { mutable contents
: 'a
}
479 (* Idea: keep pointers to values that we've serialized in a global
480 weak hash table so that we can share structure with them if we
481 deserialize any equal values in the same process *)
483 (* Idea: serialize small objects (bools, chars) in place rather than
484 using the extra level of indirection (and space) introduced by ids
487 (* Idea: bitwise output instead of bytewise. Probably a bit much to
488 implement now, but should have a significant impact (e.g. one using
489 bit instead of one byte for two-constructor sums) *)
491 (* Should we use a different representation for lists? i.e. write out
492 the length followed by the elements? we could no longer claim
493 sharing maximization, but it would actually be more efficient in