Fix memory leak in cilk
[official-gcc.git] / libitm / method-ml.cc
blobb4cabc8805268d2359fe846bf4d8167ffd64f0bb
1 /* Copyright (C) 2012-2015 Free Software Foundation, Inc.
2 Contributed by Torvald Riegel <triegel@redhat.com>.
4 This file is part of the GNU Transactional Memory Library (libitm).
6 Libitm is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 more details.
16 Under Section 7 of GPL version 3, you are granted additional
17 permissions described in the GCC Runtime Library Exception, version
18 3.1, as published by the Free Software Foundation.
20 You should have received a copy of the GNU General Public License and
21 a copy of the GCC Runtime Library Exception along with this program;
22 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 <http://www.gnu.org/licenses/>. */
25 #include "libitm_i.h"
27 using namespace GTM;
29 namespace {
31 // This group consists of all TM methods that synchronize via multiple locks
32 // (or ownership records).
33 struct ml_mg : public method_group
35 static const gtm_word LOCK_BIT = (~(gtm_word)0 >> 1) + 1;
36 static const gtm_word INCARNATION_BITS = 3;
37 static const gtm_word INCARNATION_MASK = 7;
38 // Maximum time is all bits except the lock bit, the overflow reserve bit,
39 // and the incarnation bits).
40 static const gtm_word TIME_MAX = (~(gtm_word)0 >> (2 + INCARNATION_BITS));
41 // The overflow reserve bit is the MSB of the timestamp part of an orec,
42 // so we can have TIME_MAX+1 pending timestamp increases before we overflow.
43 static const gtm_word OVERFLOW_RESERVE = TIME_MAX + 1;
45 static bool is_locked(gtm_word o) { return o & LOCK_BIT; }
46 static gtm_word set_locked(gtm_thread *tx)
48 return ((uintptr_t)tx >> 1) | LOCK_BIT;
50 // Returns a time that includes the lock bit, which is required by both
51 // validate() and is_more_recent_or_locked().
52 static gtm_word get_time(gtm_word o) { return o >> INCARNATION_BITS; }
53 static gtm_word set_time(gtm_word time) { return time << INCARNATION_BITS; }
54 static bool is_more_recent_or_locked(gtm_word o, gtm_word than_time)
56 // LOCK_BIT is the MSB; thus, if O is locked, it is larger than TIME_MAX.
57 return get_time(o) > than_time;
59 static bool has_incarnation_left(gtm_word o)
61 return (o & INCARNATION_MASK) < INCARNATION_MASK;
63 static gtm_word inc_incarnation(gtm_word o) { return o + 1; }
65 // The shared time base.
66 atomic<gtm_word> time __attribute__((aligned(HW_CACHELINE_SIZE)));
68 // The array of ownership records.
69 atomic<gtm_word>* orecs __attribute__((aligned(HW_CACHELINE_SIZE)));
70 char tailpadding[HW_CACHELINE_SIZE - sizeof(atomic<gtm_word>*)];
72 // Location-to-orec mapping. Stripes of 32B mapped to 2^16 orecs using
73 // multiplicative hashing. See Section 5.2.2 of Torvald Riegel's PhD thesis
74 // for the background on this choice of hash function and parameters:
75 // http://nbn-resolving.de/urn:nbn:de:bsz:14-qucosa-115596
76 // We pick the Mult32 hash because it works well with fewer orecs (i.e.,
77 // less space overhead and just 32b multiplication).
78 // We may want to check and potentially change these settings once we get
79 // better or just more benchmarks.
80 static const gtm_word L2O_ORECS_BITS = 16;
81 static const gtm_word L2O_ORECS = 1 << L2O_ORECS_BITS;
82 // An iterator over the orecs covering the region [addr,addr+len).
83 struct orec_iterator
85 static const gtm_word L2O_SHIFT = 5;
86 static const uint32_t L2O_MULT32 = 81007;
87 uint32_t mult;
88 size_t orec;
89 size_t orec_end;
90 orec_iterator (const void* addr, size_t len)
92 uint32_t a = (uintptr_t) addr >> L2O_SHIFT;
93 uint32_t ae = ((uintptr_t) addr + len + (1 << L2O_SHIFT) - 1)
94 >> L2O_SHIFT;
95 mult = a * L2O_MULT32;
96 orec = mult >> (32 - L2O_ORECS_BITS);
97 // We can't really avoid this second multiplication unless we use a
98 // branch instead or know more about the alignment of addr. (We often
99 // know len at compile time because of instantiations of functions
100 // such as _ITM_RU* for accesses of specific lengths.
101 orec_end = (ae * L2O_MULT32) >> (32 - L2O_ORECS_BITS);
103 size_t get() { return orec; }
104 void advance()
106 // We cannot simply increment orec because L2O_MULT32 is larger than
107 // 1 << (32 - L2O_ORECS_BITS), and thus an increase of the stripe (i.e.,
108 // addr >> L2O_SHIFT) could increase the resulting orec index by more
109 // than one; with the current parameters, we would roughly acquire a
110 // fourth more orecs than necessary for regions covering more than orec.
111 // Keeping mult around as extra state shouldn't matter much.
112 mult += L2O_MULT32;
113 orec = mult >> (32 - L2O_ORECS_BITS);
115 bool reached_end() { return orec == orec_end; }
118 virtual void init()
120 // We assume that an atomic<gtm_word> is backed by just a gtm_word, so
121 // starting with zeroed memory is fine.
122 orecs = (atomic<gtm_word>*) xcalloc(
123 sizeof(atomic<gtm_word>) * L2O_ORECS, true);
124 // This store is only executed while holding the serial lock, so relaxed
125 // memory order is sufficient here.
126 time.store(0, memory_order_relaxed);
129 virtual void fini()
131 free(orecs);
134 // We only re-initialize when our time base overflows. Thus, only reset
135 // the time base and the orecs but do not re-allocate the orec array.
136 virtual void reinit()
138 // This store is only executed while holding the serial lock, so relaxed
139 // memory order is sufficient here. Same holds for the memset.
140 time.store(0, memory_order_relaxed);
141 memset(orecs, 0, sizeof(atomic<gtm_word>) * L2O_ORECS);
145 static ml_mg o_ml_mg;
148 // The multiple lock, write-through TM method.
149 // Maps each memory location to one of the orecs in the orec array, and then
150 // acquires the associated orec eagerly before writing through.
151 // Writes require undo-logging because we are dealing with several locks/orecs
152 // and need to resolve deadlocks if necessary by aborting one of the
153 // transactions.
154 // Reads do time-based validation with snapshot time extensions. Incarnation
155 // numbers are used to decrease contention on the time base (with those,
156 // aborted transactions do not need to acquire a new version number for the
157 // data that has been previously written in the transaction and needs to be
158 // rolled back).
159 // gtm_thread::shared_state is used to store a transaction's current
160 // snapshot time (or commit time). The serial lock uses ~0 for inactive
161 // transactions and 0 for active ones. Thus, we always have a meaningful
162 // timestamp in shared_state that can be used to implement quiescence-based
163 // privatization safety.
164 class ml_wt_dispatch : public abi_dispatch
166 protected:
167 static void pre_write(gtm_thread *tx, const void *addr, size_t len)
169 gtm_word snapshot = tx->shared_state.load(memory_order_relaxed);
170 gtm_word locked_by_tx = ml_mg::set_locked(tx);
172 // Lock all orecs that cover the region.
173 ml_mg::orec_iterator oi(addr, len);
176 // Load the orec. Relaxed memory order is sufficient here because
177 // either we have acquired the orec or we will try to acquire it with
178 // a CAS with stronger memory order.
179 gtm_word o = o_ml_mg.orecs[oi.get()].load(memory_order_relaxed);
181 // Check whether we have acquired the orec already.
182 if (likely (locked_by_tx != o))
184 // If not, acquire. Make sure that our snapshot time is larger or
185 // equal than the orec's version to avoid masking invalidations of
186 // our snapshot with our own writes.
187 if (unlikely (ml_mg::is_locked(o)))
188 tx->restart(RESTART_LOCKED_WRITE);
190 if (unlikely (ml_mg::get_time(o) > snapshot))
192 // We only need to extend the snapshot if we have indeed read
193 // from this orec before. Given that we are an update
194 // transaction, we will have to extend anyway during commit.
195 // ??? Scan the read log instead, aborting if we have read
196 // from data covered by this orec before?
197 snapshot = extend(tx);
200 // We need acquire memory order here to synchronize with other
201 // (ownership) releases of the orec. We do not need acq_rel order
202 // because whenever another thread reads from this CAS'
203 // modification, then it will abort anyway and does not rely on
204 // any further happens-before relation to be established.
205 if (unlikely (!o_ml_mg.orecs[oi.get()].compare_exchange_strong(
206 o, locked_by_tx, memory_order_acquire)))
207 tx->restart(RESTART_LOCKED_WRITE);
209 // We use an explicit fence here to avoid having to use release
210 // memory order for all subsequent data stores. This fence will
211 // synchronize with loads of the data with acquire memory order.
212 // See post_load() for why this is necessary.
213 // Adding require memory order to the prior CAS is not sufficient,
214 // at least according to the Batty et al. formalization of the
215 // memory model.
216 atomic_thread_fence(memory_order_release);
218 // We log the previous value here to be able to use incarnation
219 // numbers when we have to roll back.
220 // ??? Reserve capacity early to avoid capacity checks here?
221 gtm_rwlog_entry *e = tx->writelog.push();
222 e->orec = o_ml_mg.orecs + oi.get();
223 e->value = o;
225 oi.advance();
227 while (!oi.reached_end());
229 // Do undo logging. We do not know which region prior writes logged
230 // (even if orecs have been acquired), so just log everything.
231 tx->undolog.log(addr, len);
234 static void pre_write(const void *addr, size_t len)
236 gtm_thread *tx = gtm_thr();
237 pre_write(tx, addr, len);
240 // Returns true iff all the orecs in our read log still have the same time
241 // or have been locked by the transaction itself.
242 static bool validate(gtm_thread *tx)
244 gtm_word locked_by_tx = ml_mg::set_locked(tx);
245 // ??? This might get called from pre_load() via extend(). In that case,
246 // we don't really need to check the new entries that pre_load() is
247 // adding. Stop earlier?
248 for (gtm_rwlog_entry *i = tx->readlog.begin(), *ie = tx->readlog.end();
249 i != ie; i++)
251 // Relaxed memory order is sufficient here because we do not need to
252 // establish any new synchronizes-with relationships. We only need
253 // to read a value that is as least as current as enforced by the
254 // callers: extend() loads global time with acquire, and trycommit()
255 // increments global time with acquire. Therefore, we will see the
256 // most recent orec updates before the global time that we load.
257 gtm_word o = i->orec->load(memory_order_relaxed);
258 // We compare only the time stamp and the lock bit here. We know that
259 // we have read only committed data before, so we can ignore
260 // intermediate yet rolled-back updates presented by the incarnation
261 // number bits.
262 if (ml_mg::get_time(o) != ml_mg::get_time(i->value)
263 && o != locked_by_tx)
264 return false;
266 return true;
269 // Tries to extend the snapshot to a more recent time. Returns the new
270 // snapshot time and updates TX->SHARED_STATE. If the snapshot cannot be
271 // extended to the current global time, TX is restarted.
272 static gtm_word extend(gtm_thread *tx)
274 // We read global time here, even if this isn't strictly necessary
275 // because we could just return the maximum of the timestamps that
276 // validate sees. However, the potential cache miss on global time is
277 // probably a reasonable price to pay for avoiding unnecessary extensions
278 // in the future.
279 // We need acquire memory oder because we have to synchronize with the
280 // increment of global time by update transactions, whose lock
281 // acquisitions we have to observe (also see trycommit()).
282 gtm_word snapshot = o_ml_mg.time.load(memory_order_acquire);
283 if (!validate(tx))
284 tx->restart(RESTART_VALIDATE_READ);
286 // Update our public snapshot time. Probably useful to decrease waiting
287 // due to quiescence-based privatization safety.
288 // Use release memory order to establish synchronizes-with with the
289 // privatizers; prior data loads should happen before the privatizers
290 // potentially modify anything.
291 tx->shared_state.store(snapshot, memory_order_release);
292 return snapshot;
295 // First pass over orecs. Load and check all orecs that cover the region.
296 // Write to read log, extend snapshot time if necessary.
297 static gtm_rwlog_entry* pre_load(gtm_thread *tx, const void* addr,
298 size_t len)
300 // Don't obtain an iterator yet because the log might get resized.
301 size_t log_start = tx->readlog.size();
302 gtm_word snapshot = tx->shared_state.load(memory_order_relaxed);
303 gtm_word locked_by_tx = ml_mg::set_locked(tx);
305 ml_mg::orec_iterator oi(addr, len);
308 // We need acquire memory order here so that this load will
309 // synchronize with the store that releases the orec in trycommit().
310 // In turn, this makes sure that subsequent data loads will read from
311 // a visible sequence of side effects that starts with the most recent
312 // store to the data right before the release of the orec.
313 gtm_word o = o_ml_mg.orecs[oi.get()].load(memory_order_acquire);
315 if (likely (!ml_mg::is_more_recent_or_locked(o, snapshot)))
317 success:
318 gtm_rwlog_entry *e = tx->readlog.push();
319 e->orec = o_ml_mg.orecs + oi.get();
320 e->value = o;
322 else if (!ml_mg::is_locked(o))
324 // We cannot read this part of the region because it has been
325 // updated more recently than our snapshot time. If we can extend
326 // our snapshot, then we can read.
327 snapshot = extend(tx);
328 goto success;
330 else
332 // If the orec is locked by us, just skip it because we can just
333 // read from it. Otherwise, restart the transaction.
334 if (o != locked_by_tx)
335 tx->restart(RESTART_LOCKED_READ);
337 oi.advance();
339 while (!oi.reached_end());
340 return &tx->readlog[log_start];
343 // Second pass over orecs, verifying that the we had a consistent read.
344 // Restart the transaction if any of the orecs is locked by another
345 // transaction.
346 static void post_load(gtm_thread *tx, gtm_rwlog_entry* log)
348 for (gtm_rwlog_entry *end = tx->readlog.end(); log != end; log++)
350 // Check that the snapshot is consistent. We expect the previous data
351 // load to have acquire memory order, or be atomic and followed by an
352 // acquire fence.
353 // As a result, the data load will synchronize with the release fence
354 // issued by the transactions whose data updates the data load has read
355 // from. This forces the orec load to read from a visible sequence of
356 // side effects that starts with the other updating transaction's
357 // store that acquired the orec and set it to locked.
358 // We therefore either read a value with the locked bit set (and
359 // restart) or read an orec value that was written after the data had
360 // been written. Either will allow us to detect inconsistent reads
361 // because it will have a higher/different value.
362 // Also note that differently to validate(), we compare the raw value
363 // of the orec here, including incarnation numbers. We must prevent
364 // returning uncommitted data from loads (whereas when validating, we
365 // already performed a consistent load).
366 gtm_word o = log->orec->load(memory_order_relaxed);
367 if (log->value != o)
368 tx->restart(RESTART_VALIDATE_READ);
372 template <typename V> static V load(const V* addr, ls_modifier mod)
374 // Read-for-write should be unlikely, but we need to handle it or will
375 // break later WaW optimizations.
376 if (unlikely(mod == RfW))
378 pre_write(addr, sizeof(V));
379 return *addr;
381 if (unlikely(mod == RaW))
382 return *addr;
383 // ??? Optimize for RaR?
385 gtm_thread *tx = gtm_thr();
386 gtm_rwlog_entry* log = pre_load(tx, addr, sizeof(V));
388 // Load the data.
389 // This needs to have acquire memory order (see post_load()).
390 // Alternatively, we can put an acquire fence after the data load but this
391 // is probably less efficient.
392 // FIXME We would need an atomic load with acquire memory order here but
393 // we can't just forge an atomic load for nonatomic data because this
394 // might not work on all implementations of atomics. However, we need
395 // the acquire memory order and we can only establish this if we link
396 // it to the matching release using a reads-from relation between atomic
397 // loads. Also, the compiler is allowed to optimize nonatomic accesses
398 // differently than atomic accesses (e.g., if the load would be moved to
399 // after the fence, we potentially don't synchronize properly anymore).
400 // Instead of the following, just use an ordinary load followed by an
401 // acquire fence, and hope that this is good enough for now:
402 // V v = atomic_load_explicit((atomic<V>*)addr, memory_order_acquire);
403 V v = *addr;
404 atomic_thread_fence(memory_order_acquire);
406 // ??? Retry the whole load if it wasn't consistent?
407 post_load(tx, log);
409 return v;
412 template <typename V> static void store(V* addr, const V value,
413 ls_modifier mod)
415 if (likely(mod != WaW))
416 pre_write(addr, sizeof(V));
417 // FIXME We would need an atomic store here but we can't just forge an
418 // atomic load for nonatomic data because this might not work on all
419 // implementations of atomics. However, we need this store to link the
420 // release fence in pre_write() to the acquire operation in load, which
421 // is only guaranteed if we have a reads-from relation between atomic
422 // accesses. Also, the compiler is allowed to optimize nonatomic accesses
423 // differently than atomic accesses (e.g., if the store would be moved
424 // to before the release fence in pre_write(), things could go wrong).
425 // atomic_store_explicit((atomic<V>*)addr, value, memory_order_relaxed);
426 *addr = value;
429 public:
430 static void memtransfer_static(void *dst, const void* src, size_t size,
431 bool may_overlap, ls_modifier dst_mod, ls_modifier src_mod)
433 gtm_rwlog_entry* log = 0;
434 gtm_thread *tx = 0;
436 if (src_mod == RfW)
438 tx = gtm_thr();
439 pre_write(tx, src, size);
441 else if (src_mod != RaW && src_mod != NONTXNAL)
443 tx = gtm_thr();
444 log = pre_load(tx, src, size);
446 // ??? Optimize for RaR?
448 if (dst_mod != NONTXNAL && dst_mod != WaW)
450 if (src_mod != RfW && (src_mod == RaW || src_mod == NONTXNAL))
451 tx = gtm_thr();
452 pre_write(tx, dst, size);
455 // FIXME We should use atomics here (see store()). Let's just hope that
456 // memcpy/memmove are good enough.
457 if (!may_overlap)
458 ::memcpy(dst, src, size);
459 else
460 ::memmove(dst, src, size);
462 // ??? Retry the whole memtransfer if it wasn't consistent?
463 if (src_mod != RfW && src_mod != RaW && src_mod != NONTXNAL)
465 // See load() for why we need the acquire fence here.
466 atomic_thread_fence(memory_order_acquire);
467 post_load(tx, log);
471 static void memset_static(void *dst, int c, size_t size, ls_modifier mod)
473 if (mod != WaW)
474 pre_write(dst, size);
475 // FIXME We should use atomics here (see store()). Let's just hope that
476 // memset is good enough.
477 ::memset(dst, c, size);
480 virtual gtm_restart_reason begin_or_restart()
482 // We don't need to do anything for nested transactions.
483 gtm_thread *tx = gtm_thr();
484 if (tx->parent_txns.size() > 0)
485 return NO_RESTART;
487 // Read the current time, which becomes our snapshot time.
488 // Use acquire memory oder so that we see the lock acquisitions by update
489 // transcations that incremented the global time (see trycommit()).
490 gtm_word snapshot = o_ml_mg.time.load(memory_order_acquire);
491 // Re-initialize method group on time overflow.
492 if (snapshot >= o_ml_mg.TIME_MAX)
493 return RESTART_INIT_METHOD_GROUP;
495 // We don't need to enforce any ordering for the following store. There
496 // are no earlier data loads in this transaction, so the store cannot
497 // become visible before those (which could lead to the violation of
498 // privatization safety). The store can become visible after later loads
499 // but this does not matter because the previous value will have been
500 // smaller or equal (the serial lock will set shared_state to zero when
501 // marking the transaction as active, and restarts enforce immediate
502 // visibility of a smaller or equal value with a barrier (see
503 // rollback()).
504 tx->shared_state.store(snapshot, memory_order_relaxed);
505 return NO_RESTART;
508 virtual bool trycommit(gtm_word& priv_time)
510 gtm_thread* tx = gtm_thr();
512 // If we haven't updated anything, we can commit.
513 if (!tx->writelog.size())
515 tx->readlog.clear();
516 return true;
519 // Get a commit time.
520 // Overflow of o_ml_mg.time is prevented in begin_or_restart().
521 // We need acq_rel here because (1) the acquire part is required for our
522 // own subsequent call to validate(), and the release part is necessary to
523 // make other threads' validate() work as explained there and in extend().
524 gtm_word ct = o_ml_mg.time.fetch_add(1, memory_order_acq_rel) + 1;
526 // Extend our snapshot time to at least our commit time.
527 // Note that we do not need to validate if our snapshot time is right
528 // before the commit time because we are never sharing the same commit
529 // time with other transactions.
530 // No need to reset shared_state, which will be modified by the serial
531 // lock right after our commit anyway.
532 gtm_word snapshot = tx->shared_state.load(memory_order_relaxed);
533 if (snapshot < ct - 1 && !validate(tx))
534 return false;
536 // Release orecs.
537 // See pre_load() / post_load() for why we need release memory order.
538 // ??? Can we use a release fence and relaxed stores?
539 gtm_word v = ml_mg::set_time(ct);
540 for (gtm_rwlog_entry *i = tx->writelog.begin(), *ie = tx->writelog.end();
541 i != ie; i++)
542 i->orec->store(v, memory_order_release);
544 // We're done, clear the logs.
545 tx->writelog.clear();
546 tx->readlog.clear();
548 // Need to ensure privatization safety. Every other transaction must
549 // have a snapshot time that is at least as high as our commit time
550 // (i.e., our commit must be visible to them).
551 priv_time = ct;
552 return true;
555 virtual void rollback(gtm_transaction_cp *cp)
557 // We don't do anything for rollbacks of nested transactions.
558 // ??? We could release locks here if we snapshot writelog size. readlog
559 // is similar. This is just a performance optimization though. Nested
560 // aborts should be rather infrequent, so the additional save/restore
561 // overhead for the checkpoints could be higher.
562 if (cp != 0)
563 return;
565 gtm_thread *tx = gtm_thr();
566 gtm_word overflow_value = 0;
568 // Release orecs.
569 for (gtm_rwlog_entry *i = tx->writelog.begin(), *ie = tx->writelog.end();
570 i != ie; i++)
572 // If possible, just increase the incarnation number.
573 // See pre_load() / post_load() for why we need release memory order.
574 // ??? Can we use a release fence and relaxed stores? (Same below.)
575 if (ml_mg::has_incarnation_left(i->value))
576 i->orec->store(ml_mg::inc_incarnation(i->value),
577 memory_order_release);
578 else
580 // We have an incarnation overflow. Acquire a new timestamp, and
581 // use it from now on as value for each orec whose incarnation
582 // number cannot be increased.
583 // Overflow of o_ml_mg.time is prevented in begin_or_restart().
584 // See pre_load() / post_load() for why we need release memory
585 // order.
586 if (!overflow_value)
587 // Release memory order is sufficient but required here.
588 // In contrast to the increment in trycommit(), we need release
589 // for the same reason but do not need the acquire because we
590 // do not validate subsequently.
591 overflow_value = ml_mg::set_time(
592 o_ml_mg.time.fetch_add(1, memory_order_release) + 1);
593 i->orec->store(overflow_value, memory_order_release);
597 // We need this release fence to ensure that privatizers see the
598 // rolled-back original state (not any uncommitted values) when they read
599 // the new snapshot time that we write in begin_or_restart().
600 atomic_thread_fence(memory_order_release);
602 // We're done, clear the logs.
603 tx->writelog.clear();
604 tx->readlog.clear();
607 virtual bool supports(unsigned number_of_threads)
609 // Each txn can commit and fail and rollback once before checking for
610 // overflow, so this bounds the number of threads that we can support.
611 // In practice, this won't be a problem but we check it anyway so that
612 // we never break in the occasional weird situation.
613 return (number_of_threads * 2 <= ml_mg::OVERFLOW_RESERVE);
616 CREATE_DISPATCH_METHODS(virtual, )
617 CREATE_DISPATCH_METHODS_MEM()
619 ml_wt_dispatch() : abi_dispatch(false, true, false, false, 0, &o_ml_mg)
623 } // anon namespace
625 static const ml_wt_dispatch o_ml_wt_dispatch;
627 abi_dispatch *
628 GTM::dispatch_ml_wt ()
630 return const_cast<ml_wt_dispatch *>(&o_ml_wt_dispatch);