From 438b54f98e0f85f0ed59e9921dd4f7ccb83fcab9 Mon Sep 17 00:00:00 2001 From: Andrew Kennedy Date: Tue, 12 Jul 2016 01:39:36 -0700 Subject: [PATCH] Avoid hh_get producing temporary strings on local heaps Summary: Maps in the shared heap (implemented in hh_shared.c and sharedMem.ml) currently work as follows. * To add a value to a map, the value is first serialized to a string (in Serial.make), using Caml's Marshal.to_string. Space is then allocated in the shared heap, and the contents of the string are copied across (in hh_store_ocaml). * To get a value from a map, space is allocated for a string, and the bytes copied across from the shared heap (in hh_get). The string is the deserialized to a value (in Serial.get), using Caml's Marshal.from_string. For "get", we don't need to create a string and then deserialize (the string is immediately garbage). Instead, we can use the native caml_input_value_from_block to deserialize directly from the shared heap. We save on garbage (although it would never leave the minor generation), and on allocation and copying costs. [Not in this diff: for "add", it's not so clear what can be done, as we don't know ahead of time how much space to allocate. The Caml code for marshalling creates a sequence of fixed-size blocks using malloc, then creates the string off these, but we don't have control over this, and we're not using malloc, instead rolling our own allocator in hh_shared. But it might be worth looking at in the future.] Reviewed By: gabelevi, dlreeves Differential Revision: D3456769 fbshipit-source-id: 1e5399a91db405dce5d230de373802ef88cc9d5a --- hphp/hack/src/heap/hh_shared.c | 23 ++++++++++++++++++---- hphp/hack/src/heap/sharedMem.ml | 42 ++++++++++++++++++++++++----------------- 2 files changed, 44 insertions(+), 21 deletions(-) diff --git a/hphp/hack/src/heap/hh_shared.c b/hphp/hack/src/heap/hh_shared.c index 46e0dbebd2b..1bed66ce7c7 100644 --- a/hphp/hack/src/heap/hh_shared.c +++ b/hphp/hack/src/heap/hh_shared.c @@ -85,6 +85,7 @@ #include #include #include +#include #include @@ -1517,22 +1518,36 @@ value hh_mem(value key) { } /*****************************************************************************/ -/* Returns the value associated to a given key. The key MUST be present. */ +/* Returns the value associated to a given key, and deserialize it. */ +/* The key MUST be present. */ /*****************************************************************************/ -CAMLprim value hh_get(value key) { +CAMLprim value hh_get_and_deserialize(value key) { CAMLparam1(key); CAMLlocal1(result); unsigned int slot = find_slot(key); assert(hashtbl[slot].hash == get_hash(key)); size_t size = *(size_t*)(hashtbl[slot].addr - sizeof(size_t)); - result = caml_alloc_string(size); - memcpy(String_val(result), hashtbl[slot].addr, size); + result = caml_input_value_from_block(hashtbl[slot].addr, size); CAMLreturn(result); } /*****************************************************************************/ +/* Returns the size of the value associated to a given key. */ +/* The key MUST be present. */ +/*****************************************************************************/ +CAMLprim value hh_get_size(value key) { + CAMLparam1(key); + + unsigned int slot = find_slot(key); + assert(hashtbl[slot].hash == get_hash(key)); + size_t size = *(size_t*)(hashtbl[slot].addr - sizeof(size_t)); + + CAMLreturn(Long_val(size)); +} + +/*****************************************************************************/ /* Moves the data associated to key1 to key2. * key1 must be present. * key2 must be free. diff --git a/hphp/hack/src/heap/sharedMem.ml b/hphp/hack/src/heap/sharedMem.ml index bbe697ad445..5891ca602ac 100644 --- a/hphp/hack/src/heap/sharedMem.ml +++ b/hphp/hack/src/heap/sharedMem.ml @@ -340,8 +340,11 @@ end module Serial: functor(Value:Value.Type) -> sig type t + (* Serialize a value and log stats *) val make : Value.t -> t - val get : t -> Value.t + + (* Log stats for the number of bytes given *) + val log_deserialize : int -> Obj.t -> unit end = functor(Value:Value.Type) -> struct type t = string @@ -358,33 +361,30 @@ end = functor(Value:Value.Type) -> struct end; s - let get x = - let r = Marshal.from_string x 0 in - if hh_log_level() > 0 - then begin - let sharedheap = float (String.length x) in - let localheap = float (value_size (Obj.repr r)) in + let log_deserialize l r = + let sharedheap = float l in + let localheap = float (value_size r) in + begin Measure.sample (Value.description ^ " (bytes deserialized from shared heap)") sharedheap; Measure.sample ("ALL bytes deserialized from shared heap") sharedheap; Measure.sample (Value.description ^ " (bytes allocated for deserialized value)") localheap; Measure.sample ("ALL bytes allocated for deserialized value") localheap - end; - r + end end - (*****************************************************************************) (* Raw interface to shared memory (cf hh_shared.c for the underlying * representation). *) (*****************************************************************************) -module Raw (Key: Key) (Value: sig type t end) = struct +module Raw (Key: Key) (SerializedValue: sig type t end) (Value:Value.Type) = struct - external hh_add : Key.md5 -> Value.t -> unit = "hh_add" + external hh_add : Key.md5 -> SerializedValue.t -> unit = "hh_add" external hh_mem : Key.md5 -> bool = "hh_mem" - external hh_get : Key.md5 -> Value.t = "hh_get" + external hh_get_size : Key.md5 -> int = "hh_get_size" + external hh_get_and_deserialize: Key.md5 -> Value.t = "hh_get_and_deserialize" external hh_remove : Key.md5 -> unit = "hh_remove" external hh_move : Key.md5 -> Key.md5 -> unit = "hh_move" end @@ -423,7 +423,7 @@ module New : functor (Key : Key) -> functor(Value: Value.Type) -> sig end = functor (Key : Key) -> functor (Value : Value.Type) -> struct module Data = Serial(Value) - module Raw = Raw (Key) (Data) + module Raw = Raw (Key) (Data) (Value) let add key value = Raw.hh_add (Key.md5 key) (Data.make value) let mem key = Raw.hh_mem (Key.md5 key) @@ -431,7 +431,11 @@ end = functor (Key : Key) -> functor (Value : Value.Type) -> struct let get key = let key = Key.md5 key in if Raw.hh_mem key - then Some (Data.get (Raw.hh_get key)) + then + let v = Raw.hh_get_and_deserialize key in + if hh_log_level() > 0 + then (Data.log_deserialize (Raw.hh_get_size key) (Obj.repr v)); + Some v else None let find_unsafe key = @@ -471,12 +475,16 @@ module Old : functor (Key : Key) -> functor (Value : Value.Type) -> sig end = functor (Key : Key) -> functor (Value: Value.Type) -> struct module Data = Serial(Value) - module Raw = Raw (Key) (Data) + module Raw = Raw (Key) (Data) (Value) let get key = let key = Key.md5_old key in if Raw.hh_mem key - then Some (Data.get (Raw.hh_get key)) + then + let v = Raw.hh_get_and_deserialize key in + if hh_log_level() > 0 + then Data.log_deserialize (Raw.hh_get_size key) (Obj.repr v); + Some v else None let mem key = Raw.hh_mem (Key.md5_old key) -- 2.11.4.GIT