[ARM] PR target/78439: Update movdi constraints for Cortex-A8 tuning to handle LDRD...
[official-gcc.git] / libcilkrts / runtime / reducer_impl.cpp
blobf3acb3c30f13b7cba264cf40ded8a80ae78d37fe
1 /* reducer_impl.cpp -*-C++-*-
3 *************************************************************************
5 * Copyright (C) 2009-2016, Intel Corporation
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
12 * * Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * * Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
17 * distribution.
18 * * Neither the name of Intel Corporation nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
29 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
32 * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 * POSSIBILITY OF SUCH DAMAGE.
35 * *********************************************************************
37 * PLEASE NOTE: This file is a downstream copy of a file mainitained in
38 * a repository at cilkplus.org. Changes made to this file that are not
39 * submitted through the contribution process detailed at
40 * http://www.cilkplus.org/submit-cilk-contribution will be lost the next
41 * time that a new version is released. Changes only submitted to the
42 * GNU compiler collection or posted to the git repository at
43 * https://bitbucket.org/intelcilkruntime/intel-cilk-runtime.git are
44 * not tracked.
46 * We welcome your contributions to this open source project. Thank you
47 * for your assistance in helping us improve Cilk Plus.
49 * Patents Pending, Intel Corporation.
50 **************************************************************************/
52 /**
53 * Support for reducers
56 // ICL: Don't complain about conversion from pointer to same-sized integral type
57 // in hashfun. That's why we're using size_t
58 #ifdef _WIN32
59 # pragma warning(disable: 1684)
60 #endif
62 #include "reducer_impl.h"
63 #include "scheduler.h"
64 #include "bug.h"
65 #include "os.h"
66 #include "global_state.h"
67 #include "frame_malloc.h"
69 #include "cilk/hyperobject_base.h"
70 #include "cilktools/cilkscreen.h"
71 #include "internal/abi.h"
73 #if REDPAR_DEBUG > 0
74 #include <stdio.h>
75 #include <stdlib.h>
76 #endif
79 #define DBG if(0) // if(1) enables some internal checks
81 // Check that w is the currently executing worker. This method is a
82 // no-op unless the debug level is set high enough.
83 static inline void verify_current_wkr(__cilkrts_worker *w)
85 #if REDPAR_DEBUG >= 5
86 __cilkrts_worker* tmp = __cilkrts_get_tls_worker();
87 if (w != tmp) {
88 fprintf(stderr, "W=%d, actual=%d... missing a refresh....\n",
89 w->self,
90 tmp->self);
92 CILK_ASSERT(w == tmp); // __cilkrts_get_tls_worker());
93 #endif
96 // Suppress clang warning that the expression result is unused
97 #if defined(__clang__) && (! defined(__INTEL_COMPILER))
98 # pragma clang diagnostic push
99 # pragma clang diagnostic ignored "-Wunused-value"
100 #endif // __clang__
102 /// Helper class to disable and re-enable Cilkscreen
103 struct DisableCilkscreen
105 DisableCilkscreen () { __cilkscreen_disable_checking(); }
106 ~DisableCilkscreen () { __cilkscreen_enable_checking(); }
109 /// Helper class to enable and re-disable Cilkscreen
110 struct EnableCilkscreen
112 EnableCilkscreen () { __cilkscreen_enable_checking(); }
113 ~EnableCilkscreen () { __cilkscreen_disable_checking(); }
116 #if defined(__clang__) && (! defined(__INTEL_COMPILER))
117 # pragma clang diagnostic pop
118 #endif // __clang__
121 * @brief Element for a hyperobject
123 struct elem {
124 void *key; ///< Shared key for this hyperobject
125 __cilkrts_hyperobject_base *hb; ///< Base of the hyperobject.
126 void *view; ///< Strand-private view of this hyperobject
127 /// Destroy and deallocate the view object for this element and set view to
128 /// null.
129 void destroy();
131 /// Returns true if this element contains a leftmost view.
132 bool is_leftmost() const;
135 /** Bucket containing at most NMAX elements */
136 struct bucket {
137 /// Size of the array of elements for this bucket
138 size_t nmax;
141 * We use the ``struct hack'' to allocate an array of variable
142 * dimension at the end of the struct. However, we allocate a
143 * total of NMAX+1 elements instead of NMAX. The last one always
144 * has key == 0, which we use as a termination criterion
146 elem el[1];
150 * Class that implements the map for reducers so we can find the
151 * view for a strand.
153 struct cilkred_map {
154 /** Handy pointer to the global state */
155 global_state_t *g;
157 /** Number of elements in table */
158 size_t nelem;
160 /** Number of buckets */
161 size_t nbuckets;
163 /** Array of pointers to buckets */
164 bucket **buckets;
166 /** Set true if merging (for debugging purposes) */
167 bool merging;
169 /** Set true for leftmost reducer map */
170 bool is_leftmost;
172 /** @brief Return element mapped to 'key' or null if not found. */
173 elem *lookup(void *key);
176 * @brief Insert key/value element into hash map without rehashing.
177 * Does not check for duplicate key.
179 elem *insert_no_rehash(__cilkrts_worker *w,
180 void *key,
181 __cilkrts_hyperobject_base *hb,
182 void *value);
185 * @brief Insert key/value element into hash map, rehashing if necessary.
186 * Does not check for duplicate key.
188 inline elem *rehash_and_insert(__cilkrts_worker *w,
189 void *key,
190 __cilkrts_hyperobject_base *hb,
191 void *value);
193 /** @brief Grow bucket by one element, reallocating bucket if necessary */
194 static elem *grow(__cilkrts_worker *w, bucket **bp);
196 /** @brief Rehash a worker's reducer map */
197 void rehash(__cilkrts_worker *);
200 * @brief Returns true if a rehash is needed due to the number of elements that
201 * have been inserted.
203 inline bool need_rehash_p() const;
205 /** @brief Allocate and initialize the buckets */
206 void make_buckets(__cilkrts_worker *w, size_t nbuckets);
209 * Specify behavior when the same key is present in both maps passed
210 * into merge().
212 enum merge_kind
214 MERGE_UNORDERED, ///< Assertion fails
215 MERGE_INTO_LEFT, ///< Merges the argument from the right into the left
216 MERGE_INTO_RIGHT ///< Merges the argument from the left into the right
220 * @brief Merge another reducer map into this one, destroying the other map in
221 * the process.
223 __cilkrts_worker* merge(__cilkrts_worker *current_wkr,
224 cilkred_map *other_map,
225 enum merge_kind kind);
227 /** @brief check consistency of a reducer map */
228 void check(bool allow_null_view);
230 /** @brief Test whether the cilkred_map is empty */
231 bool is_empty() { return nelem == 0; }
234 static inline struct cilkred_map* install_new_reducer_map(__cilkrts_worker *w) {
235 cilkred_map *h;
236 h = __cilkrts_make_reducer_map(w);
237 w->reducer_map = h;
238 return h;
241 static size_t sizeof_bucket(size_t nmax)
243 bucket *b = 0;
244 return (sizeof(*b) + nmax * sizeof(b->el[0]));
247 static bucket *alloc_bucket(__cilkrts_worker *w, size_t nmax)
249 bucket *b = (bucket *)
250 __cilkrts_frame_malloc(w, sizeof_bucket(nmax));
251 b->nmax = nmax;
252 return b;
255 static void free_bucket(__cilkrts_worker *w, bucket **bp)
257 bucket *b = *bp;
258 if (b) {
259 __cilkrts_frame_free(w, b, sizeof_bucket(b->nmax));
260 *bp = 0;
264 /* round up nmax to fill a memory allocator block completely */
265 static size_t roundup(size_t nmax)
267 size_t sz = sizeof_bucket(nmax);
269 /* round up size to a full malloc block */
270 sz = __cilkrts_frame_malloc_roundup(sz);
272 /* invert sizeof_bucket() */
273 nmax = ((sz - sizeof(bucket)) / sizeof(elem));
275 return nmax;
278 static bool is_power_of_2(size_t n)
280 return (n & (n - 1)) == 0;
283 void cilkred_map::make_buckets(__cilkrts_worker *w,
284 size_t new_nbuckets)
286 nbuckets = new_nbuckets;
288 CILK_ASSERT(is_power_of_2(nbuckets));
289 #if defined __GNUC__ && defined __ICC
290 /* bug workaround -- suppress calls to _intel_fast_memset */
291 bucket *volatile*new_buckets = (bucket *volatile*)
292 #else
293 bucket **new_buckets = (bucket **)
294 #endif
295 __cilkrts_frame_malloc(w, nbuckets * sizeof(*(buckets)));
297 #if REDPAR_DEBUG >= 1
298 fprintf(stderr, "W=%d, desc=make_buckets, new_buckets=%p, new_nbuckets=%zd\n",
299 w->self, new_buckets, new_nbuckets);
300 #endif
302 for (size_t i = 0; i < new_nbuckets; ++i)
303 new_buckets[i] = 0;
304 #if defined __GNUC__ && defined __ICC
305 buckets = (bucket **)new_buckets;
306 #else
307 buckets = new_buckets;
308 #endif
309 nelem = 0;
312 static void free_buckets(__cilkrts_worker *w,
313 bucket **buckets,
314 size_t nbuckets)
316 size_t i;
318 #if REDPAR_DEBUG >= 1
319 verify_current_wkr(w);
320 fprintf(stderr, "W=%d, desc=free_buckets, buckets=%p, size=%zd\n",
321 w->self, buckets,
322 nbuckets * sizeof(*buckets));
323 #endif
325 for (i = 0; i < nbuckets; ++i)
326 free_bucket(w, buckets + i);
328 __cilkrts_frame_free(w, buckets, nbuckets * sizeof(*buckets));
331 static size_t minsz(size_t nelem)
333 return 1U + nelem + nelem / 8U;
336 static size_t nextsz(size_t nelem)
338 return 2 * nelem;
341 bool cilkred_map::need_rehash_p() const
343 return minsz(nelem) > nbuckets;
346 static inline size_t hashfun(const cilkred_map *h, void *key)
348 size_t k = (size_t) key;
350 k ^= k >> 21;
351 k ^= k >> 8;
352 k ^= k >> 3;
354 return k & (h->nbuckets - 1);
357 // Given a __cilkrts_hyperobject_base, return the key to that hyperobject in
358 // the reducer map.
359 static inline void* get_hyperobject_key(__cilkrts_hyperobject_base *hb)
361 // The current implementation uses the address of the lefmost view as the
362 // key.
363 return reinterpret_cast<char*>(hb) + hb->__view_offset;
366 // Given a hyperobject key, return a pointer to the leftmost object. In the
367 // current implementation, the address of the leftmost object IS the key, so
368 // this function is an effective noop.
369 static inline void* get_leftmost_view(void *key)
371 return key;
374 /* debugging support: check consistency of a reducer map */
375 void cilkred_map::check(bool allow_null_view)
377 size_t count = 0;
379 CILK_ASSERT(buckets);
380 for (size_t i = 0; i < nbuckets; ++i) {
381 bucket *b = buckets[i];
382 if (b)
383 for (elem *el = b->el; el->key; ++el) {
384 CILK_ASSERT(allow_null_view || el->view);
385 ++count;
388 CILK_ASSERT(nelem == count);
389 /*global_reducer_map::check();*/
392 /* grow bucket by one element, reallocating bucket if necessary */
393 elem *cilkred_map::grow(__cilkrts_worker *w,
394 bucket **bp)
396 size_t i, nmax, nnmax;
397 bucket *b, *nb;
399 b = *bp;
400 if (b) {
401 nmax = b->nmax;
402 /* find empty element if any */
403 for (i = 0; i < nmax; ++i)
404 if (b->el[i].key == 0)
405 return &(b->el[i]);
406 /* do not use the last one even if empty */
407 } else {
408 nmax = 0;
411 verify_current_wkr(w);
412 /* allocate a new bucket */
413 nnmax = roundup(2 * nmax);
414 nb = alloc_bucket(w, nnmax);
417 /* copy old bucket into new */
418 for (i = 0; i < nmax; ++i)
419 nb->el[i] = b->el[i];
421 free_bucket(w, bp); *bp = nb;
423 /* zero out extra elements */
424 for (; i < nnmax; ++i)
425 nb->el[i].key = 0;
427 /* zero out the last one */
428 nb->el[i].key = 0;
430 return &(nb->el[nmax]);
433 elem *cilkred_map::insert_no_rehash(__cilkrts_worker *w,
434 void *key,
435 __cilkrts_hyperobject_base *hb,
436 void *view)
439 #if REDPAR_DEBUG >= 2
440 fprintf(stderr, "[W=%d, desc=insert_no_rehash, this_map=%p]\n",
441 w->self, this);
442 verify_current_wkr(w);
443 #endif
445 CILK_ASSERT((w == 0 && g == 0) || w->g == g);
446 CILK_ASSERT(key != 0);
447 CILK_ASSERT(view != 0);
449 elem *el = grow(w, &(buckets[hashfun(this, key)]));
451 #if REDPAR_DEBUG >= 3
452 fprintf(stderr, "[W=%d, this=%p, inserting key=%p, view=%p, el = %p]\n",
453 w->self, this, key, view, el);
454 #endif
456 el->key = key;
457 el->hb = hb;
458 el->view = view;
459 ++nelem;
461 return el;
464 void cilkred_map::rehash(__cilkrts_worker *w)
466 #if REDPAR_DEBUG >= 1
467 fprintf(stderr, "[W=%d, desc=rehash, this_map=%p, g=%p, w->g=%p]\n",
468 w->self, this, g, w->g);
469 verify_current_wkr(w);
470 #endif
471 CILK_ASSERT((w == 0 && g == 0) || w->g == g);
473 size_t onbuckets = nbuckets;
474 size_t onelem = nelem;
475 bucket **obuckets = buckets;
476 size_t i;
477 bucket *b;
479 make_buckets(w, nextsz(nbuckets));
481 for (i = 0; i < onbuckets; ++i) {
482 b = obuckets[i];
483 if (b) {
484 elem *oel;
485 for (oel = b->el; oel->key; ++oel)
486 insert_no_rehash(w, oel->key, oel->hb, oel->view);
490 CILK_ASSERT(nelem == onelem);
492 free_buckets(w, obuckets, onbuckets);
495 elem *cilkred_map::rehash_and_insert(__cilkrts_worker *w,
496 void *key,
497 __cilkrts_hyperobject_base *hb,
498 void *view)
501 #if REDPAR_DEBUG >= 1
502 fprintf(stderr, "W=%d, this_map =%p, inserting key=%p, view=%p\n",
503 w->self, this, key, view);
504 verify_current_wkr(w);
505 #endif
507 if (need_rehash_p())
508 rehash(w);
510 return insert_no_rehash(w, key, hb, view);
514 elem *cilkred_map::lookup(void *key)
516 bucket *b = buckets[hashfun(this, key)];
518 if (b) {
519 elem *el;
520 for (el = b->el; el->key; ++el) {
521 if (el->key == key) {
522 CILK_ASSERT(el->view);
523 return el;
528 return 0;
531 void elem::destroy()
533 if (! is_leftmost()) {
535 // Call destroy_fn and deallocate_fn on the view, but not if it's the
536 // leftmost view.
537 cilk_c_monoid *monoid = &(hb->__c_monoid);
538 cilk_c_reducer_destroy_fn_t destroy_fn = monoid->destroy_fn;
539 cilk_c_reducer_deallocate_fn_t deallocate_fn = monoid->deallocate_fn;
541 destroy_fn((void*)hb, view);
542 deallocate_fn((void*)hb, view);
545 view = 0;
548 inline
549 bool elem::is_leftmost() const
551 // implementation uses the address of the leftmost view as the key, so if
552 // key == view, then this element refers to the leftmost view.
553 return key == view;
556 /* remove the reducer from the current reducer map. If the reducer
557 exists in maps other than the current one, the behavior is
558 undefined. */
559 extern "C"
560 CILK_EXPORT void __CILKRTS_STRAND_STALE(
561 __cilkrts_hyper_destroy(__cilkrts_hyperobject_base *hb))
563 // Disable Cilkscreen for the duration of this call. The destructor for
564 // this class will re-enable Cilkscreen when the method returns. This
565 // will prevent Cilkscreen from reporting apparent races in reducers
566 DisableCilkscreen x;
568 __cilkrts_worker* w = __cilkrts_get_tls_worker();
569 if (! w) {
570 // If no worker, then Cilk is not running and there is no reducer
571 // map. Do nothing. The reducer's destructor will take care of
572 // destroying the leftmost view.
573 return;
576 const char *UNSYNCED_REDUCER_MSG =
577 "Destroying a reducer while it is visible to unsynced child tasks, or\n"
578 "calling CILK_C_UNREGISTER_REDUCER() on an unregistered reducer.\n"
579 "Did you forget a _Cilk_sync or CILK_C_REGISTER_REDUCER()?";
581 cilkred_map* h = w->reducer_map;
582 if (NULL == h)
583 cilkos_error(UNSYNCED_REDUCER_MSG); // Does not return
585 if (h->merging) {
586 verify_current_wkr(w);
587 __cilkrts_bug("User error: hyperobject used by another hyperobject");
590 void* key = get_hyperobject_key(hb);
591 elem *el = h->lookup(key);
593 // Verify that the reducer is being destroyed from the leftmost strand for
594 // which the reducer is defined.
595 if (! (el && el->is_leftmost()))
596 cilkos_error(UNSYNCED_REDUCER_MSG);
598 #if REDPAR_DEBUG >= 3
599 fprintf(stderr, "[W=%d, key=%p, lookup in map %p, found el=%p, about to destroy]\n",
600 w->self, key, h, el);
601 #endif
603 // Remove the element from the hash bucket. Do not bother shrinking
604 // the bucket. Note that the destroy() function does not actually
605 // call the destructor for the leftmost view.
606 el->destroy();
607 do {
608 el[0] = el[1];
609 ++el;
610 } while (el->key);
611 --h->nelem;
613 #if REDPAR_DEBUG >= 2
614 fprintf(stderr, "[W=%d, desc=hyper_destroy_finish, key=%p, w->reducer_map=%p]\n",
615 w->self, key, w->reducer_map);
616 #endif
619 extern "C"
620 CILK_EXPORT
621 void __cilkrts_hyper_create(__cilkrts_hyperobject_base *hb)
623 // This function registers the specified hyperobject in the current
624 // reducer map and registers the initial value of the hyperobject as the
625 // leftmost view of the reducer.
626 __cilkrts_worker *w = __cilkrts_get_tls_worker();
627 if (! w) {
628 // If there is no worker, then there is nothing to do: The iniitial
629 // value will automatically be used as the left-most view when we
630 // enter Cilk.
631 return;
634 // Disable Cilkscreen for the duration of this call. The destructor for
635 // this class will re-enable Cilkscreen when the method returns. This
636 // will prevent Cilkscreen from reporting apparent races in reducers
637 DisableCilkscreen x;
639 void* key = get_hyperobject_key(hb);
640 void* view = get_leftmost_view(key);
641 cilkred_map *h = w->reducer_map;
643 if (__builtin_expect(!h, 0)) {
644 h = install_new_reducer_map(w);
645 #if REDPAR_DEBUG >= 2
646 fprintf(stderr, "[W=%d, hb=%p, hyper_create, isntalled new map %p, view=%p]\n",
647 w->self, hb, h, view);
648 #endif
651 /* Must not exist. */
652 CILK_ASSERT(h->lookup(key) == NULL);
654 #if REDPAR_DEBUG >= 3
655 verify_current_wkr(w);
656 fprintf(stderr, "[W=%d, hb=%p, lookup in map %p of view %p, should be null]\n",
657 w->self, hb, h, view);
658 fprintf(stderr, "W=%d, h=%p, inserting key %p, view%p\n",
659 w->self,
661 &(hb->__c_monoid),
662 view);
663 #endif
665 if (h->merging)
666 __cilkrts_bug("User error: hyperobject used by another hyperobject");
668 CILK_ASSERT(w->reducer_map == h);
669 // The address of the leftmost value is the same as the key for lookup.
670 (void) h->rehash_and_insert(w, view, hb, view);
673 extern "C"
674 CILK_EXPORT void* __CILKRTS_STRAND_PURE(
675 __cilkrts_hyper_lookup(__cilkrts_hyperobject_base *hb))
677 __cilkrts_worker* w = __cilkrts_get_tls_worker_fast();
678 void* key = get_hyperobject_key(hb);
679 if (! w)
680 return get_leftmost_view(key);
682 // Disable Cilkscreen for the duration of this call. This will
683 // prevent Cilkscreen from reporting apparent races in reducers
684 DisableCilkscreen dguard;
686 if (__builtin_expect(w->g->force_reduce, 0))
687 __cilkrts_promote_own_deque(w);
688 cilkred_map* h = w->reducer_map;
690 if (__builtin_expect(!h, 0)) {
691 h = install_new_reducer_map(w);
694 if (h->merging)
695 __cilkrts_bug("User error: hyperobject used by another hyperobject");
696 elem* el = h->lookup(key);
697 if (! el) {
698 /* lookup failed; insert a new default element */
699 void *rep;
702 /* re-enable cilkscreen while calling the constructor */
703 EnableCilkscreen eguard;
704 if (h->is_leftmost)
706 // This special case is called only if the reducer was not
707 // registered using __cilkrts_hyper_create, e.g., if this is a
708 // C reducer in global scope or if there is no bound worker.
709 rep = get_leftmost_view(key);
711 else
713 rep = hb->__c_monoid.allocate_fn((void*)hb,
714 hb->__view_size);
715 // TBD: Handle exception on identity function
716 hb->__c_monoid.identity_fn((void*)hb, rep);
720 #if REDPAR_DEBUG >= 3
721 fprintf(stderr, "W=%d, h=%p, inserting key %p, view%p\n",
722 w->self,
724 &(hb->__c_monoid),
725 rep);
726 CILK_ASSERT(w->reducer_map == h);
727 #endif
728 el = h->rehash_and_insert(w, key, hb, rep);
731 return el->view;
734 extern "C" CILK_EXPORT
735 void* __cilkrts_hyperobject_alloc(void* ignore, std::size_t bytes)
737 return std::malloc(bytes);
740 extern "C" CILK_EXPORT
741 void __cilkrts_hyperobject_dealloc(void* ignore, void* view)
743 std::free(view);
746 /* No-op destroy function */
747 extern "C" CILK_EXPORT
748 void __cilkrts_hyperobject_noop_destroy(void* ignore, void* ignore2)
752 cilkred_map *__cilkrts_make_reducer_map(__cilkrts_worker *w)
754 CILK_ASSERT(w);
756 cilkred_map *h;
757 size_t nbuckets = 1; /* default value */
759 h = (cilkred_map *)__cilkrts_frame_malloc(w, sizeof(*h));
760 #if REDPAR_DEBUG >= 1
761 fprintf(stderr, "[W=%d, desc=make_reducer_frame_malloc_reducer_map, h=%p]\n",
762 w->self, h);
763 #endif
765 h->g = w ? w->g : 0;
766 h->make_buckets(w, nbuckets);
767 h->merging = false;
768 h->is_leftmost = false;
770 return h;
773 /* Destroy a reducer map. The map must have been allocated
774 from the worker's global context and should have been
775 allocated from the same worker. */
776 void __cilkrts_destroy_reducer_map(__cilkrts_worker *w, cilkred_map *h)
778 CILK_ASSERT((w == 0 && h->g == 0) || w->g == h->g);
779 verify_current_wkr(w);
781 /* the reducer map is allowed to contain el->view == NULL here (and
782 only here). We set el->view == NULL only when we know that the
783 map will be destroyed immediately afterwards. */
784 DBG h->check(/*allow_null_view=*/true);
786 bucket *b;
787 size_t i;
789 for (i = 0; i < h->nbuckets; ++i) {
790 b = h->buckets[i];
791 if (b) {
792 elem *el;
793 for (el = b->el; el->key; ++el) {
794 if (el->view)
795 el->destroy();
800 free_buckets(w, h->buckets, h->nbuckets);
802 #if REDPAR_DEBUG >= 1
803 fprintf(stderr, "W=%d, destroy_red_map, freeing map h=%p, size=%zd\n",
804 w->self, h, sizeof(*h));
805 #endif
807 __cilkrts_frame_free(w, h, sizeof(*h));
810 /* Set the specified reducer map as the leftmost map if is_leftmost is true,
811 otherwise, set it to not be the leftmost map. */
812 void __cilkrts_set_leftmost_reducer_map(cilkred_map *h, int is_leftmost)
814 h->is_leftmost = is_leftmost;
818 __cilkrts_worker* cilkred_map::merge(__cilkrts_worker *w,
819 cilkred_map *other_map,
820 enum merge_kind kind)
822 // Disable Cilkscreen while the we merge the maps. The destructor for
823 // the guard class will re-enable Cilkscreen when it goes out of scope.
824 // This will prevent Cilkscreen from reporting apparent races in between
825 // the reduce function and the reducer operations. The Cilk runtime
826 // guarantees that a pair of reducer maps will only be merged when no
827 // other strand will access them.
828 DisableCilkscreen guard;
830 #if REDPAR_DEBUG >= 2
831 fprintf(stderr, "[W=%d, desc=merge, this_map=%p, other_map=%p]\n",
832 w->self,
833 this, other_map);
834 #endif
835 // Remember the current stack frame.
836 __cilkrts_stack_frame *current_sf = w->current_stack_frame;
837 merging = true;
838 other_map->merging = true;
840 // Merging to the leftmost view is a special case because every leftmost
841 // element must be initialized before the merge.
842 CILK_ASSERT(!other_map->is_leftmost /* || kind == MERGE_UNORDERED */);
843 bool merge_to_leftmost = (this->is_leftmost
844 /* && !other_map->is_leftmost */);
846 DBG check(/*allow_null_view=*/false);
847 DBG other_map->check(/*allow_null_view=*/false);
849 for (size_t i = 0; i < other_map->nbuckets; ++i) {
850 bucket *b = other_map->buckets[i];
851 if (b) {
852 for (elem *other_el = b->el; other_el->key; ++other_el) {
853 /* Steal the value from the other map, which will be
854 destroyed at the end of this operation. */
855 void *other_view = other_el->view;
856 CILK_ASSERT(other_view);
858 void *key = other_el->key;
859 __cilkrts_hyperobject_base *hb = other_el->hb;
860 elem *this_el = lookup(key);
862 if (this_el == 0 && merge_to_leftmost) {
863 /* Initialize leftmost view before merging. */
864 void* leftmost = get_leftmost_view(key);
865 // leftmost == other_view can be true if the initial view
866 // was created in other than the leftmost strand of the
867 // spawn tree, but then made visible to subsequent strands
868 // (E.g., the reducer was allocated on the heap and the
869 // pointer was returned to the caller.) In such cases,
870 // parallel semantics says that syncing with earlier
871 // strands will always result in 'this_el' being null,
872 // thus propagating the initial view up the spawn tree
873 // until it reaches the leftmost strand. When synching
874 // with the leftmost strand, leftmost == other_view will be
875 // true and we must avoid reducing the initial view with
876 // itself.
877 if (leftmost != other_view)
878 this_el = rehash_and_insert(w, key, hb, leftmost);
881 if (this_el == 0) {
882 /* move object from other map into this one */
883 rehash_and_insert(w, key, hb, other_view);
884 other_el->view = 0;
885 continue; /* No element-level merge necessary */
888 /* The same key is present in both maps with values
889 A and B. Three choices: fail, A OP B, B OP A. */
890 switch (kind)
892 case MERGE_UNORDERED:
893 __cilkrts_bug("TLS Reducer race");
894 break;
895 case MERGE_INTO_RIGHT:
896 /* Swap elements in order to preserve object
897 identity */
898 other_el->view = this_el->view;
899 this_el->view = other_view;
900 /* FALL THROUGH */
901 case MERGE_INTO_LEFT: {
902 /* Stealing should be disabled during reduce
903 (even if force-reduce is enabled). */
905 #if DISABLE_PARALLEL_REDUCERS
906 __cilkrts_stack_frame * volatile *saved_protected_tail;
907 saved_protected_tail = __cilkrts_disallow_stealing(w, NULL);
908 #endif
911 CILK_ASSERT(current_sf->worker == w);
912 CILK_ASSERT(w->current_stack_frame == current_sf);
914 /* TBD: if reduce throws an exception we need to stop it
915 here. */
916 hb->__c_monoid.reduce_fn((void*)hb,
917 this_el->view,
918 other_el->view);
919 w = current_sf->worker;
921 #if REDPAR_DEBUG >= 2
922 verify_current_wkr(w);
923 CILK_ASSERT(w->current_stack_frame == current_sf);
924 #endif
927 #if DISABLE_PARALLEL_REDUCERS
928 /* Restore stealing */
929 __cilkrts_restore_stealing(w, saved_protected_tail);
930 #endif
932 } break;
937 this->is_leftmost = this->is_leftmost || other_map->is_leftmost;
938 merging = false;
939 other_map->merging = false;
940 verify_current_wkr(w);
941 __cilkrts_destroy_reducer_map(w, other_map);
942 return w;
947 * Print routine for debugging the merging of reducer maps.
948 * A no-op unless REDPAR_DEBUG set high enough.
950 static inline
951 void debug_map_merge(__cilkrts_worker *w,
952 cilkred_map *left_map,
953 cilkred_map *right_map,
954 __cilkrts_worker **final_wkr)
956 #if REDPAR_DEBUG >= 2
957 fprintf(stderr, "[W=%d, desc=finish_merge, left_map=%p, right_map=%p, w->reducer_map=%p, right_ans=%p, final_wkr=%d]\n",
958 w->self, left_map, right_map, w->reducer_map, right_map, (*final_wkr)->self);
959 #endif
964 * merge RIGHT into LEFT;
965 * return whichever map allows for faster merge, and destroy the other one.
967 * *w_ptr should be the currently executing worker.
968 * *w_ptr may change during execution if the reduction is parallel.
970 cilkred_map*
971 merge_reducer_maps(__cilkrts_worker **w_ptr,
972 cilkred_map *left_map,
973 cilkred_map *right_map)
975 __cilkrts_worker *w = *w_ptr;
976 if (!left_map) {
977 debug_map_merge(w, left_map, right_map, w_ptr);
978 return right_map;
981 if (!right_map) {
982 debug_map_merge(w, left_map, right_map, w_ptr);
983 return left_map;
986 /* Special case, if left_map is leftmost, then always merge into it.
987 For C reducers this forces lazy creation of the leftmost views. */
988 if (left_map->is_leftmost || left_map->nelem > right_map->nelem) {
989 *w_ptr = left_map->merge(w, right_map, cilkred_map::MERGE_INTO_LEFT);
990 debug_map_merge(*w_ptr, left_map, right_map, w_ptr);
991 return left_map;
992 } else {
993 *w_ptr = right_map->merge(w, left_map, cilkred_map::MERGE_INTO_RIGHT);
994 debug_map_merge(*w_ptr, left_map, right_map, w_ptr);
995 return right_map;
1000 * Merges RIGHT into LEFT, and then repeatedly calls
1001 * merge_reducer_maps_helper() until (*w_ptr)->reducer_map is NULL.
1003 * *w_ptr may change as reductions execute.
1005 cilkred_map*
1006 repeated_merge_reducer_maps(__cilkrts_worker **w_ptr,
1007 cilkred_map *left_map,
1008 cilkred_map *right_map)
1010 // Note: if right_map == NULL but w->reducer_map != NULL, then
1011 // this loop will reduce w->reducer_map into left_map.
1012 do {
1013 left_map = merge_reducer_maps(w_ptr, left_map, right_map);
1014 verify_current_wkr(*w_ptr);
1016 // Pull any newly created reducer map and loop around again.
1017 right_map = (*w_ptr)->reducer_map;
1018 (*w_ptr)->reducer_map = NULL;
1019 } while (right_map);
1020 return left_map;
1023 /* End reducer_impl.cpp */