2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #ifndef incl_HPHP_CONCURRENT_SHARED_STORE_H_
18 #define incl_HPHP_CONCURRENT_SHARED_STORE_H_
26 #include <folly/SharedMutex.h>
28 #include <tbb/concurrent_hash_map.h>
29 #include <tbb/concurrent_priority_queue.h>
31 #include "hphp/util/either.h"
32 #include "hphp/util/smalllocks.h"
34 #include "hphp/runtime/base/apc-handle.h"
35 #include "hphp/runtime/base/apc-stats.h"
36 #include "hphp/runtime/base/builtin-functions.h"
37 #include "hphp/runtime/base/runtime-option.h"
38 #include "hphp/runtime/ext/apc/snapshot-loader.h"
39 #include "hphp/runtime/server/server-stats.h"
43 //////////////////////////////////////////////////////////////////////
46 * This is the in-APC representation of a value, in ConcurrentTableSharedStore.
50 * Index into cache layer. Valid indices are >= 0 and never invalidated.
52 using HotCacheIdx
= int32_t;
53 using HandleOrSerial
= Either
<APCHandle
*,char*,either_policy::high_bit
>;
55 StoreValue() = default;
56 StoreValue(const StoreValue
& o
)
57 : tagged_data
{o
.data()}
59 , dataSize
{o
.dataSize
}
61 , readOnly(o
.readOnly
)
64 // Copy everything except the lock
66 hotIndex
.store(o
.hotIndex
.load(std::memory_order_relaxed
),
67 std::memory_order_relaxed
);
70 HandleOrSerial
data() const {
71 return tagged_data
.load(std::memory_order_acquire
);
74 tagged_data
.store(nullptr, std::memory_order_release
);
76 void setHandle(APCHandle
* v
) {
78 tagged_data
.store(v
, std::memory_order_release
);
80 APCKind
getKind() const {
81 assert(data().left());
82 assert(data().left()->kind() == kind
);
85 Variant
toLocal() const {
86 return data().left()->toLocal();
88 void set(APCHandle
* v
, int64_t ttl
);
91 int32_t getSerializedSize() const {
92 assert(data().right() != nullptr);
96 bool isSerializedObj() const {
97 assert(data().right() != nullptr);
101 /* Special invalid indices; used to classify cache lookup misses. */
102 static constexpr HotCacheIdx kHotCacheUnknown
= -1;
103 static constexpr HotCacheIdx kHotCacheKnownIneligible
= -2;
105 // Mutable fields here are so that we can deserialize the object from disk
106 // while holding a const pointer to the StoreValue, or remember a cache entry.
109 * Each entry in APC is either
110 * (a) an APCHandle* or,
111 * (b) a char* to serialized prime data; unserializes to (a) on first access.
112 * All primed values have an expiration time of zero, but make use of a
113 * lock during their initial (b) -> (a) conversion, so these two fields
114 * are unioned. Readers must check for (a)/(b) using data.left/right and
115 * acquire our lock for case (b). Writers must ensure any left -> right tag
116 * update happens after all other modifictions to our StoreValue.
118 * Note also that 'expire' may not be safe to read even if data.left() is
119 * valid, due to non-atomicity of updates; use 'expired()'.
121 * Note: expiration, creation, and modification times are stored unsigned
122 * in 32-bits as seconds since the Epoch to save cache-line space.
123 * HHVM might get confused after 2106 :)
125 mutable std::atomic
<HandleOrSerial
> tagged_data
{HandleOrSerial()};
126 union { uint32_t expire
; mutable SmallLock lock
; };
127 int32_t dataSize
{0}; // For file storage, negative means serialized object
128 // Reference to any HotCache entry to be cleared if the value is treadmilled.
129 mutable std::atomic
<HotCacheIdx
> hotIndex
{kHotCacheUnknown
};
130 APCKind kind
; // Only valid if data is an APCHandle*.
131 bool readOnly
{false}; // Set for primed entries that will never change.
132 char padding
[10]; // Make APCMap nodes cache-line sized (it static_asserts).
133 uint32_t c_time
{0}; // Creation time; 0 for primed values
134 uint32_t mtime
{0}; // Modification time
137 //////////////////////////////////////////////////////////////////////
140 * Hold info about an entry in APC. Used as a temporary holder to expose
141 * information about APC entries.
165 EntryInfo(const char* apckey
,
181 static Type
getAPCType(const APCHandle
* handle
);
192 //////////////////////////////////////////////////////////////////////
195 * This is the backing store for APC. Maintains a key to value mapping, where
196 * each value optionally has a time-to-live.
198 * After a value reaches its TTL, it's considered "expired", and most
199 * operations on the table will act like it's not present (exceptions to this
200 * should be documented below). TTL function arguments to this module are
201 * specified in seconds.
203 struct ConcurrentTableSharedStore
{
204 struct KeyValuePair
{
205 KeyValuePair() : value(nullptr), sAddr(nullptr), readOnly(false) {}
211 bool inMem() const { return value
!= nullptr; }
214 ConcurrentTableSharedStore() = default;
215 ConcurrentTableSharedStore(const ConcurrentTableSharedStore
&) = delete;
216 ConcurrentTableSharedStore
&
217 operator=(const ConcurrentTableSharedStore
&) = delete;
220 * Retrieve a value from the store. Returns false if the value wasn't
221 * contained in the table (or was expired).
223 bool get(const String
& key
, Variant
& value
);
226 * Add a value to the store if no (unexpired) value with this key is already
229 * The requested ttl is limited by the ApcTTLLimit.
231 * Returns: true if the value was added, including if we've replaced an
234 bool add(const String
& key
, const Variant
& val
, int64_t ttl
);
237 * Set the value for `key' to `val'. If there was an existing value, it is
240 * The requested ttl is limited by the ApcTTLLimit, unless we're overwriting
243 void set(const String
& key
, const Variant
& val
, int64_t ttl
);
246 * Set the value for `key' to `val', without any TTL, even if it wasn't
249 void setWithoutTTL(const String
& key
, const Variant
& val
);
252 * Increment the value for the key `key' by step, iff it is present,
253 * non-expired, and a numeric (KindOfInt64 or KindOfDouble) value. Sets
254 * `found' to true if the increment is performed, false otherwise.
256 * Returns: the new value for the key, or zero if the key was not found.
258 int64_t inc(const String
& key
, int64_t step
, bool& found
);
261 * Attempt to atomically compare and swap the value for the key `key' from
262 * `oldVal' to `newVal'. If the key is present, non-expired, and has the
263 * same value as `oldVal' (after conversions), set it to `newVal' and return
264 * true. Otherwise returns false.
266 bool cas(const String
& key
, int64_t oldVal
, int64_t newVal
);
269 * Returns: true if this key exists in the store, and is not expired.
271 bool exists(const String
& key
);
274 * Remove the specified key, if it exists in the table.
276 * Returns: false if the key was not in the table, true if the key was in the
277 * table **even if it was expired**.
279 bool eraseKey(const String
& key
);
282 * Clear the entire APC table.
287 * The API for priming APC. Poorly documented.
289 void prime(std::vector
<KeyValuePair
>&& vars
);
290 bool constructPrime(const String
& v
, KeyValuePair
& item
, bool serialized
);
291 bool constructPrime(const Variant
& v
, KeyValuePair
& item
);
293 // Returns false on failure (in particular, for the old file format).
294 bool primeFromSnapshot(const char* filename
);
295 // Evict any file-backed APC values from OS page cache.
299 * Debugging. Dump information about the table to an output stream.
301 * This is extremely expensive and not recommended for use outside of
302 * development scenarios.
304 enum class DumpMode
{
309 void dump(std::ostream
& out
, DumpMode dumpMode
);
312 * Dump random key and entry size to output stream
314 void dumpRandomKeys(std::ostream
& out
, uint32_t count
);
317 * Return 'count' randomly chosen entries, possibly with duplicates. If the
318 * store is empty or this operation is not supported, returns an empty vector.
320 std::vector
<EntryInfo
> sampleEntriesInfo(uint32_t count
);
323 * Debugging. Access information about all the entries in this table.
325 * This is extremely expensive and not recommended for use outside of
326 * development scenarios.
328 std::vector
<EntryInfo
> getEntriesInfo();
331 // Fake a StringData as a char* with the high bit set. charHashCompare below
332 // will properly handle the value and reuse the hash value of the StringData.
334 static char* tagStringData(StringData
* s
) {
335 return reinterpret_cast<char*>(-reinterpret_cast<intptr_t>(s
));
338 static StringData
* getStringData(const char* s
) {
339 assert(reinterpret_cast<intptr_t>(s
) < 0);
340 return reinterpret_cast<StringData
*>(-reinterpret_cast<intptr_t>(s
));
343 inline static bool isTaggedStringData(const char* s
) {
344 return reinterpret_cast<intptr_t>(s
) < 0;
348 struct CharHashCompare
{
349 bool equal(const char* s1
, const char* s2
) const {
351 // tbb implementation call equal with the second pointer being the
352 // value in the table and thus not a StringData*. We are asserting
353 // to make sure that is the case
354 assert(!isTaggedStringData(s2
));
355 if (isTaggedStringData(s1
)) {
356 s1
= getStringData(s1
)->data();
358 return strcmp(s1
, s2
) == 0;
360 size_t hash(const char* s
) const {
362 return isTaggedStringData(s
) ? getStringData(s
)->hash() :
363 StringData::hash(s
, strlen(s
));
368 template<typename Key
, typename T
, typename HashCompare
>
370 tbb::concurrent_hash_map
<Key
,T
,HashCompare
,HugeAllocator
<char>> {
371 // Append a random entry to 'entries'. The map must be non-empty and not
372 // concurrently accessed. Returns false if this operation is not supported.
373 bool getRandomAPCEntry(std::vector
<EntryInfo
>& entries
);
375 using node
= typename
tbb::concurrent_hash_map
<Key
,T
,HashCompare
,
376 HugeAllocator
<char>>::node
;
377 static_assert(sizeof(node
) == 64, "Node should be cache-line sized");
380 using Map
= APCMap
<const char*,StoreValue
,CharHashCompare
>;
381 using ExpirationPair
= std::pair
<intptr_t,time_t>;
382 using ExpMap
= tbb::concurrent_hash_map
<intptr_t,int>;
384 struct ExpirationCompare
{
385 bool operator()(const ExpirationPair
& p1
, const ExpirationPair
& p2
) const {
386 return p1
.second
> p2
.second
;
391 bool eraseImpl(const char*, bool, int64_t, ExpMap::accessor
* expAcc
);
392 bool storeImpl(const String
&, const Variant
&, int64_t, bool, bool);
394 bool handlePromoteObj(const String
&, APCHandle
*, const Variant
&);
395 APCHandle
* unserialize(const String
&, StoreValue
*);
396 void dumpKeyAndValue(std::ostream
&);
397 static EntryInfo
makeEntryInfo(const char*, StoreValue
*, int64_t curr_time
);
401 folly::SharedMutex m_lock
;
403 * m_expQueue is a queue of keys to be expired. We purge items from
404 * it every n (configurable) apc_stores.
406 * We can't (easily) remove items from m_expQueue, so if we add a
407 * new entry every time an item is updated we could end up with a
408 * lot of copies of the same key in the queue. To avoid that, we use
409 * m_expMap, and only add an entry to the queue if there isn't one
412 * In the current implementation, that means that if an element is
413 * updated before it expires, when its entry in m_expQueue is
414 * processed, it does nothing; and from then on, the item has no
415 * entry in the queue. I think this is intentional, because items
416 * that are updated frequently (or at all) are probably read
417 * frequently; so it will be expired naturally. It also means that
418 * we don't bother updating the queue every time for keys that are
419 * updated frequently.
421 * This implementation uses the apc key's address as the key into
422 * m_expMap, and as the identifier in ExpirationPair. We ensure that
423 * the m_expMap entry is removed before the apc key is freed, and
424 * guarantee that the key is valid as a char* if it exists in
425 * m_expMap. If the entry subsequently pops off m_expQueue, we check
426 * to see if its in m_expMap, and only try to purge it from apc if
429 * Note that its possible that the apc key was freed and
430 * reallocated, and the entry in m_expQueue doesn't correspond to
431 * the new key; but thats fine - if the key really has expired, it
432 * will be purged, and if not, nothing will happen.
434 tbb::concurrent_priority_queue
<ExpirationPair
,
435 ExpirationCompare
> m_expQueue
;
437 std::atomic
<uint64_t> m_purgeCounter
{0};
439 std::unique_ptr
<SnapshotLoader
> m_snapshotLoader
;
442 //////////////////////////////////////////////////////////////////////