4 #include <string.h> /* for strlen */
7 #include <forward_list>
16 //Uncomment for debug print statements
17 #define AMPI_DEBUG(...) //CkPrintf(__VA_ARGS__)
20 * All MPI_* routines must be defined using the AMPI_API_IMPL macro.
21 * All calls inside AMPI to MPI_* routines must use MPI_* as the name.
22 * There are two reasons for this:
24 * 1. AMPI supports the PMPI interface only on Linux.
26 * 2. When AMPI is built on top of MPI, we rename the user's MPI_* calls as AMPI_*.
28 #define STRINGIFY_INTERNAL(a) #a
29 #define STRINGIFY(a) STRINGIFY_INTERNAL(a)
32 #define AMPI_API_IMPL(ret, name, ...) \
33 _Pragma(STRINGIFY(weak name)) \
34 _Pragma(STRINGIFY(weak P##name = name)) \
37 #else // not Linux (no PMPI support):
38 #define AMPI_API_IMPL(ret, name, ...) \
44 #include "ckliststring.h"
45 static CkListString msgLogRanks
;
46 static int msgLogWrite
;
47 static int msgLogRead
;
48 static char *msgLogFilename
;
53 class zdisk
: public er
{
55 gzFile F
;//Disk file to read from/write to
56 zdisk(unsigned int type
,gzFile f
):er(type
),F(f
) {}
57 zdisk(const zdisk
&p
); //You don't want to copy
58 void operator=(const zdisk
&p
); // You don't want to copy
60 //For seeking (pack/unpack in different orders)
61 virtual void impl_startSeek(seekBlock
&s
); /*Begin a seeking block*/
62 virtual int impl_tell(seekBlock
&s
); /*Give the current offset*/
63 virtual void impl_seek(seekBlock
&s
,int off
); /*Seek to the given offset*/
66 //For packing to a disk file
67 class tozDisk
: public zdisk
{
69 //Generic bottleneck: pack n items of size itemSize from p.
70 virtual void bytes(void *p
,int n
,size_t itemSize
,dataType t
);
72 //Write data to the given file pointer
73 // (must be opened for binary write)
74 // You must close the file yourself when done.
75 tozDisk(gzFile f
):zdisk(IS_PACKING
,f
) {}
78 //For unpacking from a disk file
79 class fromzDisk
: public zdisk
{
81 //Generic bottleneck: unpack n items of size itemSize from p.
82 virtual void bytes(void *p
,int n
,size_t itemSize
,dataType t
);
84 //Write data to the given file pointer
85 // (must be opened for binary read)
86 // You must close the file yourself when done.
87 fromzDisk(gzFile f
):zdisk(IS_UNPACKING
,f
) {}
93 /* AMPI sends messages inline to PE-local destination VPs if: BigSim is not being used and
94 * if tracing is not being used (see bug #1640 for more details on the latter). */
95 #ifndef AMPI_LOCAL_IMPL
96 #define AMPI_LOCAL_IMPL ( !CMK_BIGSIM_CHARM && !CMK_TRACE_ENABLED )
99 /* AMPI uses RDMA sends if BigSim is not being used and the underlying comm
100 * layer supports it (except for GNI, which has experimental RDMA support). */
101 #ifndef AMPI_RDMA_IMPL
102 #define AMPI_RDMA_IMPL ( !CMK_BIGSIM_CHARM && CMK_ONESIDED_IMPL && !CMK_CONVERSE_UGNI )
105 /* contiguous messages larger than or equal to this threshold are sent via RDMA */
106 #ifndef AMPI_RDMA_THRESHOLD_DEFAULT
107 #if CMK_USE_IBVERBS || CMK_OFI || CMK_CONVERSE_UGNI
108 #define AMPI_RDMA_THRESHOLD_DEFAULT 65536
110 #define AMPI_RDMA_THRESHOLD_DEFAULT 32768
114 /* contiguous messages larger than or equal to this threshold that are being sent
115 * within a process are sent via RDMA. */
116 #ifndef AMPI_SMP_RDMA_THRESHOLD_DEFAULT
117 #define AMPI_SMP_RDMA_THRESHOLD_DEFAULT 16384
120 extern int AMPI_RDMA_THRESHOLD
;
121 extern int AMPI_SMP_RDMA_THRESHOLD
;
123 #define AMPI_ALLTOALL_THROTTLE 64
124 #define AMPI_ALLTOALL_SHORT_MSG 256
126 #define AMPI_ALLTOALL_LONG_MSG 4194304
128 #define AMPI_ALLTOALL_LONG_MSG 32768
131 typedef void (*MPI_MigrateFn
)(void);
134 * AMPI Message Matching (Amm) Interface:
135 * messages are matched on 2 ints: [tag, src]
141 // Number of AmmEntry<T>'s in AmmEntryPool for pt2pt msgs:
142 #ifndef AMPI_AMM_PT2PT_POOL_SIZE
143 #define AMPI_AMM_PT2PT_POOL_SIZE 32
146 // Number of AmmEntry<T>'s in AmmEntryPool for coll msgs:
147 #ifndef AMPI_AMM_COLL_POOL_SIZE
148 #define AMPI_AMM_COLL_POOL_SIZE 4
151 class AmpiRequestList
;
153 typedef void (*AmmPupMessageFn
)(PUP::er
& p
, void **msg
);
158 int tags
[AMM_NTAGS
]; // [tag, src]
160 T msg
; // T is either an AmpiRequest* or an AmpiMsg*
161 AmmEntry(T m
) noexcept
{ tags
[AMM_TAG
] = m
->getTag(); tags
[AMM_SRC
] = m
->getSrcRank(); next
= NULL
; msg
= m
; }
162 AmmEntry(int tag
, int src
, T m
) noexcept
{ tags
[AMM_TAG
] = tag
; tags
[AMM_SRC
] = src
; next
= NULL
; msg
= m
; }
163 AmmEntry() = default;
164 ~AmmEntry() = default;
167 template <class T
, size_t N
>
175 std::bitset
<N
> validEntries
;
176 std::array
<AmmEntry
<T
>, N
> entryPool
;
179 Amm() noexcept
: first(NULL
), lasth(&first
), startIdx(0) { validEntries
.reset(); }
181 inline AmmEntry
<T
>* newEntry(int tag
, int src
, T msg
) noexcept
{
182 if (validEntries
.all()) {
183 return new AmmEntry
<T
>(tag
, src
, msg
);
185 for (int i
=startIdx
; i
<validEntries
.size(); i
++) {
186 if (!validEntries
[i
]) {
188 AmmEntry
<T
>* ent
= new (&entryPool
[i
]) AmmEntry
<T
>(tag
, src
, msg
);
193 CkAbort("AMPI> failed to find a free entry in pool!");
197 inline AmmEntry
<T
>* newEntry(T msg
) noexcept
{
198 if (validEntries
.all()) {
199 return new AmmEntry
<T
>(msg
);
201 for (int i
=startIdx
; i
<validEntries
.size(); i
++) {
202 if (!validEntries
[i
]) {
204 AmmEntry
<T
>* ent
= new (&entryPool
[i
]) AmmEntry
<T
>(msg
);
209 CkAbort("AMPI> failed to find a free entry in pool!");
213 inline void deleteEntry(AmmEntry
<T
> *ent
) noexcept
{
214 if (ent
>= &entryPool
.front() && ent
<= &entryPool
.back()) {
215 int idx
= (int)((intptr_t)ent
- (intptr_t)&entryPool
.front()) / sizeof(AmmEntry
<T
>);
216 validEntries
[idx
] = 0;
217 startIdx
= std::min(idx
, startIdx
);
222 void freeAll() noexcept
;
223 void flushMsgs() noexcept
;
224 inline bool match(const int tags1
[AMM_NTAGS
], const int tags2
[AMM_NTAGS
]) const noexcept
;
225 inline void put(T msg
) noexcept
;
226 inline void put(int tag
, int src
, T msg
) noexcept
;
227 inline T
get(int tag
, int src
, int* rtags
=NULL
) noexcept
;
228 inline T
probe(int tag
, int src
, int* rtags
) noexcept
;
229 inline int size() const noexcept
;
230 void pup(PUP::er
& p
, AmmPupMessageFn msgpup
) noexcept
;
233 PUPfunctionpointer(MPI_User_function
*)
236 * OpStruct's are used to lookup an MPI_User_function* and check its commutativity.
237 * They are also used to create AmpiOpHeader's, which are transmitted in reductions
238 * that are user-defined or else lack an equivalent Charm++ reducer type.
242 MPI_User_function
* func
;
248 OpStruct() = default;
249 OpStruct(MPI_User_function
* f
) noexcept
: func(f
), isCommutative(true), isValid(true) {}
250 OpStruct(MPI_User_function
* f
, bool c
) noexcept
: func(f
), isCommutative(c
), isValid(true) {}
251 void init(MPI_User_function
* f
, bool c
) noexcept
{
256 bool isFree() const noexcept
{ return !isValid
; }
257 void free() noexcept
{ isValid
= false; }
258 void pup(PUP::er
&p
) {
259 p
|func
; p
|isCommutative
; p
|isValid
;
265 MPI_User_function
* func
;
269 AmpiOpHeader(MPI_User_function
* f
,MPI_Datatype d
,int l
,int szd
) noexcept
:
270 func(f
),dtype(d
),len(l
),szdata(szd
) { }
273 //------------------- added by YAN for one-sided communication -----------
274 /* the index is unique within a communicator */
283 vector
<int> exposureRankList
;
284 vector
<int> accessRankList
;
285 vector
<MPI_Request
> requestList
;
288 WinStruct() noexcept
: comm(MPI_COMM_NULL
), index(-1), areRecvsPosted(false), inEpoch(false) {
289 exposureRankList
.clear(); accessRankList
.clear(); requestList
.clear();
291 WinStruct(MPI_Comm comm_
, int index_
) noexcept
: comm(comm_
), index(index_
), areRecvsPosted(false), inEpoch(false) {
292 exposureRankList
.clear(); accessRankList
.clear(); requestList
.clear();
294 void pup(PUP::er
&p
) noexcept
{
295 p
|comm
; p
|index
; p
|areRecvsPosted
; p
|inEpoch
; p
|exposureRankList
; p
|accessRankList
; p
|requestList
;
297 void clearEpochAccess() noexcept
{
298 accessRankList
.clear(); inEpoch
= false;
300 void clearEpochExposure() noexcept
{
301 exposureRankList
.clear(); areRecvsPosted
= false; requestList
.clear(); inEpoch
=false;
303 vector
<int>& getExposureRankList() noexcept
{return exposureRankList
;}
304 vector
<int>& getAccessRankList() noexcept
{return accessRankList
;}
305 void setExposureRankList(vector
<int> &tmpExposureRankList
) noexcept
{exposureRankList
= tmpExposureRankList
;}
306 void setAccessRankList(vector
<int> &tmpAccessRankList
) noexcept
{accessRankList
= tmpAccessRankList
;}
307 vector
<int>& getRequestList() noexcept
{return requestList
;}
308 bool AreRecvsPosted() const noexcept
{return areRecvsPosted
;}
309 void setAreRecvsPosted(bool setR
) noexcept
{areRecvsPosted
= setR
;}
310 bool isInEpoch() const noexcept
{return inEpoch
;}
311 void setInEpoch(bool arg
) noexcept
{inEpoch
= arg
;}
314 class lockQueueEntry
{
318 lockQueueEntry (int _requestRank
, int _lock_type
) noexcept
319 : requestRank(_requestRank
), lock_type(_lock_type
) {}
320 lockQueueEntry() = default;
323 typedef CkQ
<lockQueueEntry
*> LockQueue
;
334 int owner
; // Rank of owner of the lock, -1 if not locked
335 LockQueue lockQueue
; // queue of waiting processors for the lock
336 // top of queue is the one holding the lock
337 // queue is empty if lock is not applied
341 vector
<int> keyvals
; // list of keyval attributes
343 void setName(const char *src
) noexcept
;
344 void getName(char *src
,int *len
) noexcept
;
347 void pup(PUP::er
&p
) noexcept
;
350 win_obj(const char *name
, void *base
, MPI_Aint size
, int disp_unit
, MPI_Comm comm
) noexcept
;
353 int create(const char *name
, void *base
, MPI_Aint size
, int disp_unit
,
354 MPI_Comm comm
) noexcept
;
357 vector
<int>& getKeyvals() { return keyvals
; }
359 int put(void *orgaddr
, int orgcnt
, int orgunit
,
360 MPI_Aint targdisp
, int targcnt
, int targunit
) noexcept
;
362 int get(void *orgaddr
, int orgcnt
, int orgunit
,
363 MPI_Aint targdisp
, int targcnt
, int targunit
) noexcept
;
364 int accumulate(void *orgaddr
, int count
, MPI_Aint targdisp
, MPI_Datatype targtype
,
365 MPI_Op op
, ampiParent
* pptr
) noexcept
;
367 int iget(int orgcnt
, MPI_Datatype orgtype
,
368 MPI_Aint targdisp
, int targcnt
, MPI_Datatype targtype
) noexcept
;
369 int igetWait(MPI_Request
*req
, MPI_Status
*status
) noexcept
;
370 int igetFree(MPI_Request
*req
, MPI_Status
*status
) noexcept
;
372 int fence() noexcept
;
374 int lock(int requestRank
, int lock_type
) noexcept
;
375 int unlock(int requestRank
) noexcept
;
379 int start() noexcept
;
380 int complete() noexcept
;
382 void lockTopQueue() noexcept
;
383 void enqueue(int requestRank
, int lock_type
) noexcept
;
384 void dequeue() noexcept
;
385 bool emptyQueue() noexcept
;
387 //-----------------------End of code by YAN ----------------------
394 KeyvalPair() = default;
395 KeyvalPair(const char* k
, const char* v
) noexcept
;
396 ~KeyvalPair() = default;
397 void pup(PUP::er
& p
) noexcept
{
401 friend class InfoStruct
;
405 CkPupPtrVec
<KeyvalPair
> nodes
;
408 InfoStruct() noexcept
: valid(true) { }
409 void setvalid(bool valid_
) noexcept
{ valid
= valid_
; }
410 bool getvalid() const noexcept
{ return valid
; }
411 int set(const char* k
, const char* v
) noexcept
;
412 int dup(InfoStruct
& src
) noexcept
;
413 int get(const char* k
, int vl
, char*& v
, int *flag
) const noexcept
;
414 int deletek(const char* k
) noexcept
;
415 int get_valuelen(const char* k
, int* vl
, int *flag
) const noexcept
;
416 int get_nkeys(int *nkeys
) const noexcept
;
417 int get_nthkey(int n
,char* k
) const noexcept
;
418 void myfree() noexcept
;
419 void pup(PUP::er
& p
) noexcept
;
423 class CProxyElement_ampi
;
425 //Virtual class describing a virtual topology: Cart, Graph, DistGraph
428 vector
<int> v
; // dummy variable for const& returns from virtual functions
431 virtual ~ampiTopology() noexcept
{};
432 virtual void pup(PUP::er
&p
) noexcept
=0;
433 virtual int getType() const noexcept
=0;
434 virtual void dup(ampiTopology
* topo
) noexcept
=0;
435 virtual const vector
<int> &getnbors() const noexcept
=0;
436 virtual void setnbors(const vector
<int> &nbors_
) noexcept
=0;
438 virtual const vector
<int> &getdims() const noexcept
{CkAbort("AMPI: instance of invalid Virtual Topology class."); return v
;}
439 virtual const vector
<int> &getperiods() const noexcept
{CkAbort("AMPI: instance of invalid Virtual Topology class."); return v
;}
440 virtual int getndims() const noexcept
{CkAbort("AMPI: instance of invalid Virtual Topology class."); return -1;}
441 virtual void setdims(const vector
<int> &dims_
) noexcept
{CkAbort("AMPI: instance of invalid Virtual Topology class.");}
442 virtual void setperiods(const vector
<int> &periods_
) noexcept
{CkAbort("AMPI: instance of invalid Virtual Topology class.");}
443 virtual void setndims(int ndims_
) noexcept
{CkAbort("AMPI: instance of invalid Virtual Topology class.");}
445 virtual int getnvertices() const noexcept
{CkAbort("AMPI: instance of invalid Virtual Topology class."); return -1;}
446 virtual const vector
<int> &getindex() const noexcept
{CkAbort("AMPI: instance of invalid Virtual Topology class."); return v
;}
447 virtual const vector
<int> &getedges() const noexcept
{CkAbort("AMPI: instance of invalid Virtual Topology class."); return v
;}
448 virtual void setnvertices(int nvertices_
) noexcept
{CkAbort("AMPI: instance of invalid Virtual Topology class.");}
449 virtual void setindex(const vector
<int> &index_
) noexcept
{CkAbort("AMPI: instance of invalid Virtual Topology class.");}
450 virtual void setedges(const vector
<int> &edges_
) noexcept
{CkAbort("AMPI: instance of invalid Virtual Topology class.");}
452 virtual int getInDegree() const noexcept
{CkAbort("AMPI: instance of invalid Virtual Topology class."); return -1;}
453 virtual const vector
<int> &getSources() const noexcept
{CkAbort("AMPI: instance of invalid Virtual Topology class."); return v
;}
454 virtual const vector
<int> &getSourceWeights() const noexcept
{CkAbort("AMPI: instance of invalid Virtual Topology class."); return v
;}
455 virtual int getOutDegree() const noexcept
{CkAbort("AMPI: instance of invalid Virtual Topology class."); return -1;}
456 virtual const vector
<int> &getDestinations() const noexcept
{CkAbort("AMPI: instance of invalid Virtual Topology class."); return v
;}
457 virtual const vector
<int> &getDestWeights() const noexcept
{CkAbort("AMPI: instance of invalid Virtual Topology class."); return v
;}
458 virtual bool areSourcesWeighted() const noexcept
{CkAbort("AMPI: instance of invalid Virtual Topology class."); return false;}
459 virtual bool areDestsWeighted() const noexcept
{CkAbort("AMPI: instance of invalid Virtual Topology class."); return false;}
460 virtual void setAreSourcesWeighted(bool val
) noexcept
{CkAbort("AMPI: instance of invalid Virtual Topology class.");}
461 virtual void setAreDestsWeighted(bool val
) noexcept
{CkAbort("AMPI: instance of invalid Virtual Topology class.");}
462 virtual void setInDegree(int degree
) noexcept
{CkAbort("AMPI: instance of invalid Virtual Topology class.");}
463 virtual void setSources(const vector
<int> &sources
) noexcept
{CkAbort("AMPI: instance of invalid Virtual Topology class.");}
464 virtual void setSourceWeights(const vector
<int> &sourceWeights
) noexcept
{CkAbort("AMPI: instance of invalid Virtual Topology class.");}
465 virtual void setOutDegree(int degree
) noexcept
{CkAbort("AMPI: instance of invalid Virtual Topology class.");}
466 virtual void setDestinations(const vector
<int> &destinations
) noexcept
{CkAbort("AMPI: instance of invalid Virtual Topology class.");}
467 virtual void setDestWeights(const vector
<int> &destWeights
) noexcept
{CkAbort("AMPI: instance of invalid Virtual Topology class.");}
470 class ampiCartTopology final
: public ampiTopology
{
473 vector
<int> dims
, periods
, nbors
;
476 ampiCartTopology() noexcept
: ndims(-1) {}
478 void pup(PUP::er
&p
) noexcept
{
485 inline int getType() const noexcept
{return MPI_CART
;}
486 inline void dup(ampiTopology
* topo
) noexcept
{
487 CkAssert(topo
->getType() == MPI_CART
);
488 setndims(topo
->getndims());
489 setdims(topo
->getdims());
490 setperiods(topo
->getperiods());
491 setnbors(topo
->getnbors());
494 inline const vector
<int> &getdims() const noexcept
{return dims
;}
495 inline const vector
<int> &getperiods() const noexcept
{return periods
;}
496 inline int getndims() const noexcept
{return ndims
;}
497 inline const vector
<int> &getnbors() const noexcept
{return nbors
;}
499 inline void setdims(const vector
<int> &d
) noexcept
{dims
= d
; dims
.shrink_to_fit();}
500 inline void setperiods(const vector
<int> &p
) noexcept
{periods
= p
; periods
.shrink_to_fit();}
501 inline void setndims(int nd
) noexcept
{ndims
= nd
;}
502 inline void setnbors(const vector
<int> &n
) noexcept
{nbors
= n
; nbors
.shrink_to_fit();}
505 class ampiGraphTopology final
: public ampiTopology
{
508 vector
<int> index
, edges
, nbors
;
511 ampiGraphTopology() noexcept
: nvertices(-1) {}
513 void pup(PUP::er
&p
) noexcept
{
520 inline int getType() const noexcept
{return MPI_GRAPH
;}
521 inline void dup(ampiTopology
* topo
) noexcept
{
522 CkAssert(topo
->getType() == MPI_GRAPH
);
523 setnvertices(topo
->getnvertices());
524 setindex(topo
->getindex());
525 setedges(topo
->getedges());
526 setnbors(topo
->getnbors());
529 inline int getnvertices() const noexcept
{return nvertices
;}
530 inline const vector
<int> &getindex() const noexcept
{return index
;}
531 inline const vector
<int> &getedges() const noexcept
{return edges
;}
532 inline const vector
<int> &getnbors() const noexcept
{return nbors
;}
534 inline void setnvertices(int nv
) noexcept
{nvertices
= nv
;}
535 inline void setindex(const vector
<int> &i
) noexcept
{index
= i
; index
.shrink_to_fit();}
536 inline void setedges(const vector
<int> &e
) noexcept
{edges
= e
; edges
.shrink_to_fit();}
537 inline void setnbors(const vector
<int> &n
) noexcept
{nbors
= n
; nbors
.shrink_to_fit();}
540 class ampiDistGraphTopology final
: public ampiTopology
{
542 int inDegree
, outDegree
;
543 bool sourcesWeighted
, destsWeighted
;
544 vector
<int> sources
, sourceWeights
, destinations
, destWeights
, nbors
;
547 ampiDistGraphTopology() noexcept
: inDegree(-1), outDegree(-1), sourcesWeighted(false), destsWeighted(false) {}
549 void pup(PUP::er
&p
) noexcept
{
561 inline int getType() const noexcept
{return MPI_DIST_GRAPH
;}
562 inline void dup(ampiTopology
* topo
) noexcept
{
563 CkAssert(topo
->getType() == MPI_DIST_GRAPH
);
564 setAreSourcesWeighted(topo
->areSourcesWeighted());
565 setAreDestsWeighted(topo
->areDestsWeighted());
566 setInDegree(topo
->getInDegree());
567 setSources(topo
->getSources());
568 setSourceWeights(topo
->getSourceWeights());
569 setOutDegree(topo
->getOutDegree());
570 setDestinations(topo
->getDestinations());
571 setDestWeights(topo
->getDestWeights());
572 setnbors(topo
->getnbors());
575 inline int getInDegree() const noexcept
{return inDegree
;}
576 inline const vector
<int> &getSources() const noexcept
{return sources
;}
577 inline const vector
<int> &getSourceWeights() const noexcept
{return sourceWeights
;}
578 inline int getOutDegree() const noexcept
{return outDegree
;}
579 inline const vector
<int> &getDestinations() const noexcept
{return destinations
;}
580 inline const vector
<int> &getDestWeights() const noexcept
{return destWeights
;}
581 inline bool areSourcesWeighted() const noexcept
{return sourcesWeighted
;}
582 inline bool areDestsWeighted() const noexcept
{return destsWeighted
;}
583 inline const vector
<int> &getnbors() const noexcept
{return nbors
;}
585 inline void setAreSourcesWeighted(bool v
) noexcept
{sourcesWeighted
= v
? 1 : 0;}
586 inline void setAreDestsWeighted(bool v
) noexcept
{destsWeighted
= v
? 1 : 0;}
587 inline void setInDegree(int d
) noexcept
{inDegree
= d
;}
588 inline void setSources(const vector
<int> &s
) noexcept
{sources
= s
; sources
.shrink_to_fit();}
589 inline void setSourceWeights(const vector
<int> &sw
) noexcept
{sourceWeights
= sw
; sourceWeights
.shrink_to_fit();}
590 inline void setOutDegree(int d
) noexcept
{outDegree
= d
;}
591 inline void setDestinations(const vector
<int> &d
) noexcept
{destinations
= d
; destinations
.shrink_to_fit();}
592 inline void setDestWeights(const vector
<int> &dw
) noexcept
{destWeights
= dw
; destWeights
.shrink_to_fit();}
593 inline void setnbors(const vector
<int> &nbors_
) noexcept
{nbors
= nbors_
; nbors
.shrink_to_fit();}
596 /* KeyValue class for attribute caching */
600 MPI_Copy_function
*copy_fn
;
601 MPI_Delete_function
*delete_fn
;
606 KeyvalNode() : val(NULL
), copy_fn(NULL
), delete_fn(NULL
), extra_state(NULL
), refCount(1), isValSet(false) { }
607 KeyvalNode(MPI_Copy_function
*cf
, MPI_Delete_function
*df
, void* es
) :
608 val(NULL
), copy_fn(cf
), delete_fn(df
), extra_state(es
), refCount(1), isValSet(false) { }
609 bool hasVal() const { return isValSet
; }
610 void clearVal() { isValSet
= false; }
611 void setVal(void *v
) { val
= v
; isValSet
= true; }
612 void* getVal() const { return val
; }
613 void incRefCount() { refCount
++; }
614 int decRefCount() { CkAssert(refCount
> 0); refCount
--; return refCount
; }
615 void pup(PUP::er
& p
) {
616 p((char *)val
, sizeof(void *));
617 p((char *)copy_fn
, sizeof(void *));
618 p((char *)delete_fn
, sizeof(void *));
619 p((char *)extra_state
, sizeof(void *));
625 // Only store Group ranks explicitly when they can't be
626 // lazily and transiently created via std::iota()
629 int sz
; // -1 if ranks is valid, otherwise the size to pass to std::iota()
633 bool ranksIsIota() const noexcept
{
634 for (int i
=0; i
<ranks
.size(); i
++)
641 groupStruct() noexcept
: sz(0) {}
642 groupStruct(int s
) noexcept
: sz(s
) {}
643 groupStruct(vector
<int> r
) noexcept
: sz(-1), ranks(std::move(r
)) {
648 ranks
.shrink_to_fit();
650 groupStruct
&operator=(const groupStruct
&obj
) noexcept
{
655 ~groupStruct() = default;
656 void pup(PUP::er
& p
) noexcept
{
660 bool isIota() const noexcept
{return (sz
!= -1);}
661 int operator[](int i
) const noexcept
{return (isIota()) ? i
: ranks
[i
];}
662 int size() const noexcept
{return (isIota()) ? sz
: ranks
.size();}
663 vector
<int> getRanks() const noexcept
{
665 // Lazily create ranks:
666 vector
<int> tmpRanks(sz
);
667 std::iota(tmpRanks
.begin(), tmpRanks
.end(), 0);
668 tmpRanks
.shrink_to_fit();
677 enum AmpiCommType
: uint8_t {
683 //Describes an AMPI communicator
684 class ampiCommStruct
{
686 MPI_Comm comm
; //Communicator
687 CkArrayID ampiID
; //ID of corresponding ampi array
688 int size
; //Number of processes in communicator
689 AmpiCommType commType
; //COMM_WORLD, intracomm, intercomm?
690 groupStruct indices
; //indices[r] gives the array index for rank r
691 groupStruct remoteIndices
; // remote group for inter-communicator
693 ampiTopology
*ampiTopo
; // Virtual topology
694 int topoType
; // Type of virtual topology: MPI_CART, MPI_GRAPH, MPI_DIST_GRAPH, or MPI_UNDEFINED
696 // For communicator attributes (MPI_*_get_attr): indexed by keyval
699 // For communicator names
700 std::string commName
;
703 ampiCommStruct(int ignored
=0) noexcept
704 : size(-1), commType(INTRA
), ampiTopo(NULL
), topoType(MPI_UNDEFINED
)
706 ampiCommStruct(MPI_Comm comm_
,const CkArrayID
&id_
,int size_
) noexcept
707 : comm(comm_
), ampiID(id_
),size(size_
), commType(WORLD
), indices(size_
),
708 ampiTopo(NULL
), topoType(MPI_UNDEFINED
)
710 ampiCommStruct(MPI_Comm comm_
,const CkArrayID
&id_
, const vector
<int> &indices_
) noexcept
711 : comm(comm_
), ampiID(id_
), size(indices_
.size()), commType(INTRA
), indices(indices_
),
712 ampiTopo(NULL
), topoType(MPI_UNDEFINED
)
714 ampiCommStruct(MPI_Comm comm_
, const CkArrayID
&id_
, const vector
<int> &indices_
,
715 const vector
<int> &remoteIndices_
) noexcept
716 : comm(comm_
), ampiID(id_
), size(indices_
.size()), commType(INTER
), indices(indices_
),
717 remoteIndices(remoteIndices_
), ampiTopo(NULL
), topoType(MPI_UNDEFINED
)
720 ~ampiCommStruct() noexcept
{
721 if (ampiTopo
!= NULL
)
725 // Overloaded copy constructor. Used when creating virtual topologies.
726 ampiCommStruct(const ampiCommStruct
&obj
, int topoNumber
=MPI_UNDEFINED
) noexcept
{
727 switch (topoNumber
) {
729 ampiTopo
= new ampiCartTopology();
732 ampiTopo
= new ampiGraphTopology();
735 ampiTopo
= new ampiDistGraphTopology();
741 topoType
= topoNumber
;
745 commType
= obj
.commType
;
746 indices
= obj
.indices
;
747 remoteIndices
= obj
.remoteIndices
;
748 keyvals
= obj
.keyvals
;
749 commName
= obj
.commName
;
752 ampiCommStruct
&operator=(const ampiCommStruct
&obj
) noexcept
{
756 switch (obj
.topoType
) {
758 ampiTopo
= new ampiCartTopology(*(static_cast<ampiCartTopology
*>(obj
.ampiTopo
)));
761 ampiTopo
= new ampiGraphTopology(*(static_cast<ampiGraphTopology
*>(obj
.ampiTopo
)));
764 ampiTopo
= new ampiDistGraphTopology(*(static_cast<ampiDistGraphTopology
*>(obj
.ampiTopo
)));
770 topoType
= obj
.topoType
;
774 commType
= obj
.commType
;
775 indices
= obj
.indices
;
776 remoteIndices
= obj
.remoteIndices
;
777 keyvals
= obj
.keyvals
;
778 commName
= obj
.commName
;
782 const ampiTopology
* getTopologyforNeighbors() const noexcept
{
786 ampiTopology
* getTopology() noexcept
{
790 inline bool isinter() const noexcept
{return commType
==INTER
;}
791 void setArrayID(const CkArrayID
&nID
) noexcept
{ampiID
=nID
;}
793 MPI_Comm
getComm() const noexcept
{return comm
;}
794 inline vector
<int> getIndices() const noexcept
{return indices
.getRanks();}
795 inline vector
<int> getRemoteIndices() const noexcept
{return remoteIndices
.getRanks();}
796 vector
<int> &getKeyvals() noexcept
{return keyvals
;}
798 void setName(const char *src
) noexcept
{
799 CkDDT_SetName(commName
, src
);
802 void getName(char *name
, int *len
) const noexcept
{
803 int length
= *len
= commName
.size();
804 memcpy(name
, commName
.data(), length
);
808 //Get the proxy for the entire array
809 CProxy_ampi
getProxy() const noexcept
;
811 //Get the array index for rank r in this communicator
812 int getIndexForRank(int r
) const noexcept
{
813 #if CMK_ERROR_CHECKING
814 if (r
>=size
) CkAbort("AMPI> You passed in an out-of-bounds process rank!");
818 int getIndexForRemoteRank(int r
) const noexcept
{
819 #if CMK_ERROR_CHECKING
820 if (r
>=remoteIndices
.size()) CkAbort("AMPI> You passed in an out-of-bounds intercomm remote process rank!");
822 return remoteIndices
[r
];
824 //Get the rank for this array index (Warning: linear time)
825 int getRankForIndex(int i
) const noexcept
{
826 if (indices
.isIota()) return i
;
828 const vector
<int>& ind
= indices
.getRanks();
829 for (int r
=0;r
<ind
.size();r
++)
830 if (ind
[r
]==i
) return r
;
831 return -1; /*That index isn't in this communicator*/
835 int getSize() const noexcept
{return size
;}
837 void pup(PUP::er
&p
) noexcept
{
847 if (topoType
!= MPI_UNDEFINED
) {
848 if (p
.isUnpacking()) {
851 ampiTopo
= new ampiCartTopology();
854 ampiTopo
= new ampiGraphTopology();
857 ampiTopo
= new ampiDistGraphTopology();
860 CkAbort("AMPI> Communicator has an invalid topology!");
868 if (p
.isDeleting()) {
869 delete ampiTopo
; ampiTopo
= NULL
;
873 PUPmarshall(ampiCommStruct
)
875 class mpi_comm_worlds
{
876 ampiCommStruct comms
[MPI_MAX_COMM_WORLDS
];
878 ampiCommStruct
&operator[](int i
) noexcept
{return comms
[i
];}
879 void pup(PUP::er
&p
) noexcept
{
880 for (int i
=0;i
<MPI_MAX_COMM_WORLDS
;i
++)
886 inline void outputOp(const vector
<int>& vec
) noexcept
{
887 if (vec
.size() > 50) {
888 CkPrintf("vector too large to output!\n");
891 CkPrintf("output vector: size=%d {",vec
.size());
892 for (int i
=0; i
<vec
.size(); i
++) {
893 CkPrintf(" %d ", vec
[i
]);
898 inline int getPosOp(int idx
, const vector
<int>& vec
) noexcept
{
899 for (int r
=0; r
<vec
.size(); r
++) {
904 return MPI_UNDEFINED
;
907 inline vector
<int> unionOp(const vector
<int>& vec1
, const vector
<int>& vec2
) noexcept
{
908 vector
<int> newvec(vec1
);
909 for (int i
=0; i
<vec2
.size(); i
++) {
910 if (getPosOp(vec2
[i
], vec1
) == MPI_UNDEFINED
) {
911 newvec
.push_back(vec2
[i
]);
917 inline vector
<int> intersectOp(const vector
<int>& vec1
, const vector
<int>& vec2
) noexcept
{
919 for (int i
=0; i
<vec1
.size(); i
++) {
920 if (getPosOp(vec1
[i
], vec2
) != MPI_UNDEFINED
) {
921 newvec
.push_back(vec1
[i
]);
927 inline vector
<int> diffOp(const vector
<int>& vec1
, const vector
<int>& vec2
) noexcept
{
929 for (int i
=0; i
<vec1
.size(); i
++) {
930 if (getPosOp(vec1
[i
], vec2
) == MPI_UNDEFINED
) {
931 newvec
.push_back(vec1
[i
]);
937 inline int* translateRanksOp(int n
, const vector
<int>& vec1
, const int* ranks1
,
938 const vector
<int>& vec2
, int *ret
) noexcept
{
939 for (int i
=0; i
<n
; i
++) {
940 ret
[i
] = (ranks1
[i
] == MPI_PROC_NULL
) ? MPI_PROC_NULL
: getPosOp(vec1
[ranks1
[i
]], vec2
);
945 inline int compareVecOp(const vector
<int>& vec1
, const vector
<int>& vec2
) noexcept
{
946 int pos
, ret
= MPI_IDENT
;
947 if (vec1
.size() != vec2
.size()) {
950 for (int i
=0; i
<vec1
.size(); i
++) {
951 pos
= getPosOp(vec1
[i
], vec2
);
952 if (pos
== MPI_UNDEFINED
) {
962 inline vector
<int> inclOp(int n
, const int* ranks
, const vector
<int>& vec
) noexcept
{
963 vector
<int> retvec(n
);
964 for (int i
=0; i
<n
; i
++) {
965 retvec
[i
] = vec
[ranks
[i
]];
970 inline vector
<int> exclOp(int n
, const int* ranks
, const vector
<int>& vec
) noexcept
{
973 for (int j
=0; j
<vec
.size(); j
++) {
974 for (int i
=0; i
<n
; i
++) {
981 retvec
.push_back(vec
[j
]);
990 inline vector
<int> rangeInclOp(int n
, int ranges
[][3], const vector
<int>& vec
,
991 int *flag
) noexcept
{
993 int first
, last
, stride
;
994 for (int i
=0; i
<n
; i
++) {
995 first
= ranges
[i
][0];
997 stride
= ranges
[i
][2];
999 for (int j
=0; j
<=(last
-first
)/stride
; j
++) {
1000 retvec
.push_back(vec
[first
+stride
*j
]);
1004 *flag
= MPI_ERR_ARG
;
1005 return vector
<int>();
1008 *flag
= MPI_SUCCESS
;
1012 inline vector
<int> rangeExclOp(int n
, int ranges
[][3], const vector
<int>& vec
,
1013 int *flag
) noexcept
{
1015 int first
, last
, stride
;
1016 for (int i
=0; i
<n
; i
++) {
1017 first
= ranges
[i
][0];
1018 last
= ranges
[i
][1];
1019 stride
= ranges
[i
][2];
1021 for (int j
=0; j
<=(last
-first
)/stride
; j
++) {
1022 ranks
.push_back(first
+stride
*j
);
1026 *flag
= MPI_ERR_ARG
;
1027 return vector
<int>();
1030 *flag
= MPI_SUCCESS
;
1031 return exclOp(ranks
.size(), &ranks
[0], vec
);
1035 #include "tcharmc.h"
1037 #include "ampi.decl.h"
1038 #include "charm-api.h"
1039 #include <sys/stat.h> // for mkdir
1041 extern int _mpi_nworlds
;
1043 //MPI_ANY_TAG is defined in ampi.h to MPI_TAG_UB_VALUE+1
1044 #define MPI_ATA_SEQ_TAG MPI_TAG_UB_VALUE+2
1045 #define MPI_BCAST_TAG MPI_TAG_UB_VALUE+3
1046 #define MPI_REDN_TAG MPI_TAG_UB_VALUE+4
1047 #define MPI_SCATTER_TAG MPI_TAG_UB_VALUE+5
1048 #define MPI_SCAN_TAG MPI_TAG_UB_VALUE+6
1049 #define MPI_EXSCAN_TAG MPI_TAG_UB_VALUE+7
1050 #define MPI_ATA_TAG MPI_TAG_UB_VALUE+8
1051 #define MPI_NBOR_TAG MPI_TAG_UB_VALUE+9
1052 #define MPI_RMA_TAG MPI_TAG_UB_VALUE+10
1053 #define MPI_EPOCH_START_TAG MPI_TAG_UB_VALUE+11
1054 #define MPI_EPOCH_END_TAG MPI_TAG_UB_VALUE+12
1056 #define AMPI_COLL_SOURCE 0
1057 #define AMPI_COLL_COMM MPI_COMM_WORLD
1059 enum AmpiReqType
: uint8_t {
1060 AMPI_INVALID_REQ
= 0,
1066 AMPI_GATHER_REQ
= 6,
1067 AMPI_GATHERV_REQ
= 7,
1074 inline void operator|(PUP::er
&p
, AmpiReqType
&r
) {
1075 pup_bytes(&p
, (void *)&r
, sizeof(AmpiReqType
));
1078 enum AmpiReqSts
: char {
1079 AMPI_REQ_PENDING
= 0,
1080 AMPI_REQ_BLOCKED
= 1,
1081 AMPI_REQ_COMPLETED
= 2
1084 enum AmpiSendType
: bool {
1085 BLOCKING_SEND
= false,
1089 #define MyAlign8(x) (((x)+7)&(~7))
1092 Represents an MPI request that has been initiated
1093 using Isend, Irecv, Ialltoall, Send_init, etc.
1097 void *buf
= nullptr;
1099 MPI_Datatype type
= MPI_DATATYPE_NULL
;
1100 int tag
= MPI_ANY_TAG
; // the order must match MPI_Status
1101 int src
= MPI_ANY_SOURCE
;
1102 MPI_Comm comm
= MPI_COMM_NULL
;
1103 MPI_Request reqIdx
= MPI_REQUEST_NULL
;
1104 bool complete
= false;
1105 bool blocked
= false; // this req is currently blocked on
1107 #if CMK_BIGSIM_CHARM
1109 void *event
= nullptr; // the event point that corresponds to this message
1110 int eventPe
= -1; // the PE that the event is located on
1114 AmpiRequest() =default;
1115 virtual ~AmpiRequest() =default;
1117 /// Activate this persistent request.
1118 /// Only meaningful for persistent Ireq, SendReq, and SsendReq requests.
1119 virtual void start(MPI_Request reqIdx
) noexcept
{}
1121 /// Used by AmmEntry's constructor
1122 virtual int getTag() const noexcept
{ return tag
; }
1123 virtual int getSrcRank() const noexcept
{ return src
; }
1125 /// Return true if this request is finished (progress):
1126 virtual bool test(MPI_Status
*sts
=MPI_STATUS_IGNORE
) noexcept
=0;
1128 /// Block until this request is finished,
1129 /// returning a valid MPI error code.
1130 virtual int wait(MPI_Status
*sts
) noexcept
=0;
1132 /// Mark this request for cancellation.
1133 /// Supported only for IReq requests
1134 virtual void cancel() noexcept
{}
1136 /// Mark this request persistent.
1137 /// Supported only for IReq, SendReq, and SsendReq requests
1138 virtual void setPersistent(bool p
) noexcept
{}
1139 virtual bool isPersistent() const noexcept
{ return false; }
1141 /// Receive an AmpiMsg
1142 virtual void receive(ampi
*ptr
, AmpiMsg
*msg
, bool deleteMsg
=true) noexcept
=0;
1144 /// Receive a CkReductionMsg
1145 virtual void receive(ampi
*ptr
, CkReductionMsg
*msg
) noexcept
=0;
1147 /// Receive an Rdma message
1148 virtual void receiveRdma(ampi
*ptr
, char *sbuf
, int slength
, int ssendReq
,
1149 int srcRank
, MPI_Comm scomm
) noexcept
{ }
1151 /// Set the request's index into AmpiRequestList
1152 void setReqIdx(MPI_Request idx
) noexcept
{ reqIdx
= idx
; }
1153 MPI_Request
getReqIdx() const noexcept
{ return reqIdx
; }
1155 /// Free the request's datatype
1156 void free(CkDDT
* ddt
) noexcept
{
1157 if (type
!= MPI_DATATYPE_NULL
) ddt
->freeType(type
);
1160 /// Set whether the request is currently blocked on
1161 void setBlocked(bool b
) noexcept
{ blocked
= b
; }
1162 bool isBlocked() const noexcept
{ return blocked
; }
1164 /// Returns the type of request:
1165 /// AMPI_I_REQ, AMPI_ATA_REQ, AMPI_SEND_REQ, AMPI_SSEND_REQ,
1166 /// AMPI_REDN_REQ, AMPI_GATHER_REQ, AMPI_GATHERV_REQ, AMPI_G_REQ
1167 virtual AmpiReqType
getType() const noexcept
=0;
1169 /// Returns whether this request will need to be matched.
1170 /// It is used to determine whether this request should be inserted into postedReqs.
1171 /// AMPI_SEND_REQ, AMPI_SSEND_REQ, and AMPI_ATA_REQ should not be posted.
1172 virtual bool isUnmatched() const noexcept
=0;
1174 /// Returns whether this type is pooled or not:
1175 /// Only AMPI_I_REQ, AMPI_SEND_REQ, and AMPI_SSEND_REQs are pooled.
1176 virtual bool isPooledType() const noexcept
{ return false; }
1178 /// Return the actual number of bytes that were received.
1179 virtual int getNumReceivedBytes(CkDDT
*ddt
) const noexcept
{
1180 // by default, return number of bytes requested
1181 return count
* ddt
->getSize(type
);
1184 virtual void pup(PUP::er
&p
) noexcept
{
1185 p((char *)&buf
, sizeof(void *)); //supposed to work only with Isomalloc
1194 #if CMK_BIGSIM_CHARM
1195 //needed for bigsim out-of-core emulation
1196 //as the "log" is not moved from memory, this pointer is safe
1198 p((char *)&event
, sizeof(void *));
1203 virtual void print() const noexcept
=0;
1206 // This is used in the constructors of the AmpiRequest types below,
1207 // assuming arguments: (MPI_Datatype type_, CkDDT* ddt_, AmpiReqSts sts_)
1208 #define AMPI_REQUEST_COMMON_INIT \
1210 complete = (sts_ == AMPI_REQ_COMPLETED); \
1211 blocked = (sts_ == AMPI_REQ_BLOCKED); \
1212 if (type_ != MPI_DATATYPE_NULL) { \
1213 ddt_->getType(type_)->incRefCount(); \
1217 class IReq final
: public AmpiRequest
{
1219 bool cancelled
= false; // track if request is cancelled
1220 bool persistent
= false; // Is this a persistent recv request?
1221 int length
= 0; // recv'ed length in bytes
1223 IReq(void *buf_
, int count_
, MPI_Datatype type_
, int src_
, int tag_
,
1224 MPI_Comm comm_
, CkDDT
*ddt_
, AmpiReqSts sts_
=AMPI_REQ_PENDING
) noexcept
1232 AMPI_REQUEST_COMMON_INIT
1236 bool test(MPI_Status
*sts
=MPI_STATUS_IGNORE
) noexcept override
;
1237 int wait(MPI_Status
*sts
) noexcept override
;
1238 void cancel() noexcept override
{ if (!complete
) cancelled
= true; }
1239 AmpiReqType
getType() const noexcept override
{ return AMPI_I_REQ
; }
1240 bool isUnmatched() const noexcept override
{ return !complete
; }
1241 bool isPooledType() const noexcept override
{ return true; }
1242 void setPersistent(bool p
) noexcept override
{ persistent
= p
; }
1243 bool isPersistent() const noexcept override
{ return persistent
; }
1244 void start(MPI_Request reqIdx
) noexcept override
;
1245 void receive(ampi
*ptr
, AmpiMsg
*msg
, bool deleteMsg
=true) noexcept override
;
1246 void receive(ampi
*ptr
, CkReductionMsg
*msg
) noexcept override
{}
1247 void receiveRdma(ampi
*ptr
, char *sbuf
, int slength
, int ssendReq
, int srcRank
, MPI_Comm scomm
) noexcept override
;
1248 int getNumReceivedBytes(CkDDT
*ptr
) const noexcept override
{
1251 void pup(PUP::er
&p
) noexcept override
{
1252 AmpiRequest::pup(p
);
1257 void print() const noexcept override
;
1260 class RednReq final
: public AmpiRequest
{
1262 MPI_Op op
= MPI_OP_NULL
;
1264 RednReq(void *buf_
, int count_
, MPI_Datatype type_
, MPI_Comm comm_
,
1265 MPI_Op op_
, CkDDT
* ddt_
, AmpiReqSts sts_
=AMPI_REQ_PENDING
) noexcept
1270 src
= AMPI_COLL_SOURCE
;
1274 AMPI_REQUEST_COMMON_INIT
1277 ~RednReq() =default;
1278 bool test(MPI_Status
*sts
=MPI_STATUS_IGNORE
) noexcept override
;
1279 int wait(MPI_Status
*sts
) noexcept override
;
1280 void cancel() noexcept override
{}
1281 AmpiReqType
getType() const noexcept override
{ return AMPI_REDN_REQ
; }
1282 bool isUnmatched() const noexcept override
{ return !complete
; }
1283 void receive(ampi
*ptr
, AmpiMsg
*msg
, bool deleteMsg
=true) noexcept override
{}
1284 void receive(ampi
*ptr
, CkReductionMsg
*msg
) noexcept override
;
1285 void pup(PUP::er
&p
) noexcept override
{
1286 AmpiRequest::pup(p
);
1289 void print() const noexcept override
;
1292 class GatherReq final
: public AmpiRequest
{
1294 GatherReq(void *buf_
, int count_
, MPI_Datatype type_
, MPI_Comm comm_
,
1295 CkDDT
*ddt_
, AmpiReqSts sts_
=AMPI_REQ_PENDING
) noexcept
1300 src
= AMPI_COLL_SOURCE
;
1303 AMPI_REQUEST_COMMON_INIT
1305 GatherReq() =default;
1306 ~GatherReq() =default;
1307 bool test(MPI_Status
*sts
=MPI_STATUS_IGNORE
) noexcept override
;
1308 int wait(MPI_Status
*sts
) noexcept override
;
1309 void cancel() noexcept override
{}
1310 AmpiReqType
getType() const noexcept override
{ return AMPI_GATHER_REQ
; }
1311 bool isUnmatched() const noexcept override
{ return !complete
; }
1312 void receive(ampi
*ptr
, AmpiMsg
*msg
, bool deleteMsg
=true) noexcept override
{}
1313 void receive(ampi
*ptr
, CkReductionMsg
*msg
) noexcept override
;
1314 void pup(PUP::er
&p
) noexcept override
{
1315 AmpiRequest::pup(p
);
1317 void print() const noexcept override
;
1320 class GathervReq final
: public AmpiRequest
{
1322 vector
<int> recvCounts
;
1325 GathervReq(void *buf_
, int count_
, MPI_Datatype type_
, MPI_Comm comm_
, const int *rc
,
1326 const int *d
, CkDDT
* ddt_
, AmpiReqSts sts_
=AMPI_REQ_PENDING
) noexcept
1331 src
= AMPI_COLL_SOURCE
;
1334 recvCounts
.assign(rc
, rc
+count
);
1335 displs
.assign(d
, d
+count
);
1336 AMPI_REQUEST_COMMON_INIT
1338 GathervReq() =default;
1339 ~GathervReq() =default;
1340 bool test(MPI_Status
*sts
=MPI_STATUS_IGNORE
) noexcept override
;
1341 int wait(MPI_Status
*sts
) noexcept override
;
1342 AmpiReqType
getType() const noexcept override
{ return AMPI_GATHERV_REQ
; }
1343 bool isUnmatched() const noexcept override
{ return !complete
; }
1344 void receive(ampi
*ptr
, AmpiMsg
*msg
, bool deleteMsg
=true) noexcept override
{}
1345 void receive(ampi
*ptr
, CkReductionMsg
*msg
) noexcept override
;
1346 void pup(PUP::er
&p
) noexcept override
{
1347 AmpiRequest::pup(p
);
1351 void print() const noexcept override
;
1354 class SendReq final
: public AmpiRequest
{
1355 bool persistent
= false; // is this a persistent send request?
1358 SendReq(MPI_Datatype type_
, MPI_Comm comm_
, CkDDT
* ddt_
, AmpiReqSts sts_
=AMPI_REQ_PENDING
) noexcept
1362 AMPI_REQUEST_COMMON_INIT
1364 SendReq(void* buf_
, int count_
, MPI_Datatype type_
, int dest_
, int tag_
,
1365 MPI_Comm comm_
, CkDDT
* ddt_
, AmpiReqSts sts_
=AMPI_REQ_PENDING
) noexcept
1373 AMPI_REQUEST_COMMON_INIT
1375 SendReq() noexcept
{}
1376 ~SendReq() noexcept
{}
1377 bool test(MPI_Status
*sts
=MPI_STATUS_IGNORE
) noexcept override
;
1378 int wait(MPI_Status
*sts
) noexcept override
;
1379 void setPersistent(bool p
) noexcept override
{ persistent
= p
; }
1380 bool isPersistent() const noexcept override
{ return persistent
; }
1381 void start(MPI_Request reqIdx
) noexcept override
;
1382 void receive(ampi
*ptr
, AmpiMsg
*msg
, bool deleteMsg
=true) noexcept override
{}
1383 void receive(ampi
*ptr
, CkReductionMsg
*msg
) noexcept override
{}
1384 AmpiReqType
getType() const noexcept override
{ return AMPI_SEND_REQ
; }
1385 bool isUnmatched() const noexcept override
{ return false; }
1386 bool isPooledType() const noexcept override
{ return true; }
1387 void pup(PUP::er
&p
) noexcept override
{
1388 AmpiRequest::pup(p
);
1391 void print() const noexcept override
;
1394 class SsendReq final
: public AmpiRequest
{
1396 bool persistent
= false; // is this a persistent Ssend request?
1399 SsendReq(MPI_Datatype type_
, MPI_Comm comm_
, CkDDT
* ddt_
, AmpiReqSts sts_
=AMPI_REQ_PENDING
) noexcept
1403 AMPI_REQUEST_COMMON_INIT
1405 SsendReq(void* buf_
, int count_
, MPI_Datatype type_
, int dest_
, int tag_
, MPI_Comm comm_
,
1406 CkDDT
* ddt_
, AmpiReqSts sts_
=AMPI_REQ_PENDING
) noexcept
1414 AMPI_REQUEST_COMMON_INIT
1416 SsendReq(void* buf_
, int count_
, MPI_Datatype type_
, int dest_
, int tag_
, MPI_Comm comm_
,
1417 int src_
, CkDDT
* ddt_
, AmpiReqSts sts_
=AMPI_REQ_PENDING
) noexcept
1425 AMPI_REQUEST_COMMON_INIT
1427 SsendReq() =default;
1428 ~SsendReq() =default;
1429 bool test(MPI_Status
*sts
=MPI_STATUS_IGNORE
) noexcept override
;
1430 int wait(MPI_Status
*sts
) noexcept override
;
1431 void setPersistent(bool p
) noexcept override
{ persistent
= p
; }
1432 bool isPersistent() const noexcept override
{ return persistent
; }
1433 void start(MPI_Request reqIdx
) noexcept override
;
1434 void receive(ampi
*ptr
, AmpiMsg
*msg
, bool deleteMsg
=true) noexcept override
{}
1435 void receive(ampi
*ptr
, CkReductionMsg
*msg
) noexcept override
{}
1436 AmpiReqType
getType() const noexcept override
{ return AMPI_SSEND_REQ
; }
1437 bool isUnmatched() const noexcept override
{ return false; }
1438 bool isPooledType() const noexcept override
{ return true; }
1439 void pup(PUP::er
&p
) noexcept override
{
1440 AmpiRequest::pup(p
);
1443 void print() const noexcept override
;
1447 class GPUReq
: public AmpiRequest
{
1451 bool test(MPI_Status
*sts
=MPI_STATUS_IGNORE
) noexcept override
;
1452 int wait(MPI_Status
*sts
) noexcept override
;
1453 void receive(ampi
*ptr
, AmpiMsg
*msg
, bool deleteMsg
=true) noexcept override
;
1454 void receive(ampi
*ptr
, CkReductionMsg
*msg
) noexcept override
;
1455 AmpiReqType
getType() const noexcept override
{ return AMPI_GPU_REQ
; }
1456 bool isUnmatched() const noexcept override
{ return false; }
1457 void setComplete() noexcept
;
1458 void print() const noexcept override
;
1462 class ATAReq final
: public AmpiRequest
{
1464 vector
<MPI_Request
> reqs
;
1466 ATAReq(int numReqs_
) noexcept
: reqs(numReqs_
) {}
1469 bool test(MPI_Status
*sts
=MPI_STATUS_IGNORE
) noexcept override
;
1470 int wait(MPI_Status
*sts
) noexcept override
;
1471 void receive(ampi
*ptr
, AmpiMsg
*msg
, bool deleteMsg
=true) noexcept override
{}
1472 void receive(ampi
*ptr
, CkReductionMsg
*msg
) noexcept override
{}
1473 int getCount() const noexcept
{ return reqs
.size(); }
1474 AmpiReqType
getType() const noexcept override
{ return AMPI_ATA_REQ
; }
1475 bool isUnmatched() const noexcept override
{ return false; }
1476 void pup(PUP::er
&p
) noexcept override
{
1477 AmpiRequest::pup(p
);
1480 void print() const noexcept override
;
1483 class GReq final
: public AmpiRequest
{
1485 MPI_Grequest_query_function
* queryFn
;
1486 MPI_Grequest_free_function
* freeFn
;
1487 MPI_Grequest_cancel_function
* cancelFn
;
1488 MPIX_Grequest_poll_function
* pollFn
;
1489 MPIX_Grequest_wait_function
* waitFn
;
1493 GReq(MPI_Grequest_query_function
* q
, MPI_Grequest_free_function
* f
, MPI_Grequest_cancel_function
* c
, void* es
) noexcept
1494 : queryFn(q
), freeFn(f
), cancelFn(c
), pollFn(nullptr), waitFn(nullptr), extraState(es
) {}
1495 GReq(MPI_Grequest_query_function
*q
, MPI_Grequest_free_function
* f
, MPI_Grequest_cancel_function
* c
, MPIX_Grequest_poll_function
* p
, void* es
) noexcept
1496 : queryFn(q
), freeFn(f
), cancelFn(c
), pollFn(p
), waitFn(nullptr), extraState(es
) {}
1497 GReq(MPI_Grequest_query_function
*q
, MPI_Grequest_free_function
* f
, MPI_Grequest_cancel_function
* c
, MPIX_Grequest_poll_function
* p
, MPIX_Grequest_wait_function
* w
, void* es
) noexcept
1498 : queryFn(q
), freeFn(f
), cancelFn(c
), pollFn(p
), waitFn(w
), extraState(es
) {}
1500 ~GReq() noexcept
{ (*freeFn
)(extraState
); }
1501 bool test(MPI_Status
*sts
=MPI_STATUS_IGNORE
) noexcept override
;
1502 int wait(MPI_Status
*sts
) noexcept override
;
1503 void receive(ampi
*ptr
, AmpiMsg
*msg
, bool deleteMsg
=true) noexcept override
{}
1504 void receive(ampi
*ptr
, CkReductionMsg
*msg
) noexcept override
{}
1505 void cancel() noexcept override
{ (*cancelFn
)(extraState
, complete
); }
1506 AmpiReqType
getType() const noexcept override
{ return AMPI_G_REQ
; }
1507 bool isUnmatched() const noexcept override
{ return false; }
1508 void pup(PUP::er
&p
) noexcept override
{
1509 AmpiRequest::pup(p
);
1510 p((char *)queryFn
, sizeof(void *));
1511 p((char *)freeFn
, sizeof(void *));
1512 p((char *)cancelFn
, sizeof(void *));
1513 p((char *)pollFn
, sizeof(void *));
1514 p((char *)waitFn
, sizeof(void *));
1515 p((char *)extraState
, sizeof(void *));
1517 void print() const noexcept override
;
1520 class AmpiRequestPool
;
1522 class AmpiRequestList
{
1524 vector
<AmpiRequest
*> reqs
; // indexed by MPI_Request
1525 int startIdx
; // start next search from this index
1526 AmpiRequestPool
* reqPool
;
1528 AmpiRequestList() noexcept
: startIdx(0) {}
1529 AmpiRequestList(int size
, AmpiRequestPool
* reqPoolPtr
) noexcept
1530 : reqs(size
), startIdx(0), reqPool(reqPoolPtr
) {}
1531 ~AmpiRequestList() noexcept
{}
1533 inline AmpiRequest
* operator[](int n
) noexcept
{
1534 #if CMK_ERROR_CHECKING
1540 void free(AmpiRequestPool
& reqPool
, int idx
, CkDDT
*ddt
) noexcept
;
1541 void freeNonPersReq(int &idx
) noexcept
;
1542 inline int insert(AmpiRequest
* req
) noexcept
{
1543 for (int i
=startIdx
; i
<reqs
.size(); i
++) {
1544 if (reqs
[i
] == NULL
) {
1551 reqs
.push_back(req
);
1552 int idx
= reqs
.size()-1;
1553 req
->setReqIdx(idx
);
1558 inline void checkRequest(MPI_Request idx
) const noexcept
{
1559 if (idx
!= MPI_REQUEST_NULL
&& (idx
< 0 || idx
>= reqs
.size()))
1560 CkAbort("Invalid MPI_Request\n");
1563 inline void unblockReqs(MPI_Request
*requests
, int numReqs
) noexcept
{
1564 for (int i
=0; i
<numReqs
; i
++) {
1565 if (requests
[i
] != MPI_REQUEST_NULL
) {
1566 reqs
[requests
[i
]]->setBlocked(false);
1571 void pup(PUP::er
&p
, AmpiRequestPool
* reqPool
) noexcept
;
1573 void print() const noexcept
{
1574 for (int i
=0; i
<reqs
.size(); i
++) {
1575 if (reqs
[i
] == NULL
) continue;
1576 CkPrintf("AmpiRequestList Element %d [%p]: \n", i
+1, reqs
[i
]);
1582 //A simple memory buffer
1587 memBuf(int size
) noexcept
: buf(size
) {}
1588 void setSize(int s
) noexcept
{buf
.resize(s
);}
1589 int getSize() const noexcept
{return buf
.size();}
1590 const void *getData() const noexcept
{return (const void *)&buf
[0];}
1591 void *getData() noexcept
{return (void *)&buf
[0];}
1595 inline void pupIntoBuf(memBuf
&b
,T
&t
) noexcept
{
1597 b
.setSize(ps
.size());
1598 PUP::toMem
pm(b
.getData()); pm
|t
;
1602 inline void pupFromBuf(const void *data
,T
&t
) noexcept
{
1603 PUP::fromMem
p(data
); p
|t
;
1606 #define COLL_SEQ_IDX -1
1610 class AmpiMsg final
: public CMessage_AmpiMsg
{
1612 int ssendReq
; //Index to the sender's request
1614 int srcRank
; //Communicator rank for source
1615 int length
; //Number of bytes in this message
1616 int origLength
; // true size of allocation
1617 MPI_Comm comm
; // Communicator
1619 char *data
; //Payload
1620 #if CMK_BIGSIM_CHARM
1623 int eventPe
; // the PE that the event is located
1627 AmpiMsg() noexcept
{ data
= NULL
; }
1628 AmpiMsg(int sreq
, int t
, int sRank
, int l
) noexcept
:
1629 ssendReq(sreq
), tag(t
), srcRank(sRank
), length(l
), origLength(l
)
1630 { /* only called from AmpiMsg::pup() since the refnum (seq) will get pup'ed by the runtime */ }
1631 AmpiMsg(CMK_REFNUM_TYPE seq
, int sreq
, int t
, int sRank
, int l
) noexcept
:
1632 ssendReq(sreq
), tag(t
), srcRank(sRank
), length(l
), origLength(l
)
1633 { CkSetRefNum(this, seq
); }
1634 inline void setSsendReq(int s
) noexcept
{ CkAssert(s
>= 0); ssendReq
= s
; }
1635 inline void setSeq(CMK_REFNUM_TYPE s
) noexcept
{ CkAssert(s
>= 0); UsrToEnv(this)->setRef(s
); }
1636 inline void setSrcRank(int sr
) noexcept
{ srcRank
= sr
; }
1637 inline void setLength(int l
) noexcept
{ length
= l
; }
1638 inline void setTag(int t
) noexcept
{ tag
= t
; }
1639 inline void setComm(MPI_Comm c
) noexcept
{ comm
= c
; }
1640 inline CMK_REFNUM_TYPE
getSeq() const noexcept
{ return UsrToEnv(this)->getRef(); }
1641 inline int getSsendReq() const noexcept
{ return ssendReq
; }
1642 inline int getSeqIdx() const noexcept
{
1643 // seqIdx is srcRank, unless this message was part of a collective
1644 if (tag
>= MPI_BCAST_TAG
&& tag
<= MPI_ATA_TAG
) {
1645 return COLL_SEQ_IDX
;
1651 inline int getSrcRank() const noexcept
{ return srcRank
; }
1652 inline int getLength() const noexcept
{ return length
; }
1653 inline char* getData() const noexcept
{ return data
; }
1654 inline int getTag() const noexcept
{ return tag
; }
1655 inline MPI_Comm
getComm() const noexcept
{ return comm
; }
1656 static AmpiMsg
* pup(PUP::er
&p
, AmpiMsg
*m
) noexcept
1658 int ref
, ssendReq
, tag
, srcRank
, length
, origLength
;
1660 if(p
.isPacking() || p
.isSizing()) {
1661 ref
= CkGetRefNum(m
);
1662 ssendReq
= m
->ssendReq
;
1664 srcRank
= m
->srcRank
;
1666 origLength
= m
->origLength
;
1669 p(ref
); p(ssendReq
); p(tag
); p(srcRank
); p(length
); p(origLength
); p(comm
);
1670 if(p
.isUnpacking()) {
1671 m
= new (origLength
, 0) AmpiMsg(ref
, ssendReq
, tag
, srcRank
, origLength
);
1672 m
->setLength(length
);
1676 if(p
.isDeleting()) {
1686 #define AMPI_MSG_POOL_SIZE 32 // Max # of AmpiMsg's allowed in the pool
1687 #define AMPI_POOLED_MSG_SIZE 64 // Max # of Bytes in pooled msgs' payload
1691 std::forward_list
<AmpiMsg
*> msgs
; // list of free msgs
1692 int msgLength
; // AmpiMsg::length of messages in the pool
1693 int maxMsgs
; // max # of msgs in the pool
1694 int currMsgs
; // current # of msgs in the pool
1697 AmpiMsgPool(int _numMsgs
= 0, int _msgLength
= 0) noexcept
1698 : msgLength(_msgLength
), maxMsgs(_numMsgs
), currMsgs(0) {}
1699 ~AmpiMsgPool() =default;
1700 inline void clear() noexcept
{
1701 while (!msgs
.empty()) {
1702 delete msgs
.front();
1707 inline AmpiMsg
* newAmpiMsg(CMK_REFNUM_TYPE seq
, int ssendReq
, int tag
, int srcRank
, int len
) noexcept
{
1708 if (msgs
.empty() || msgs
.front()->origLength
< len
) {
1709 int newlen
= std::max(msgLength
, len
);
1710 AmpiMsg
* msg
= new (newlen
, 0) AmpiMsg(seq
, ssendReq
, tag
, srcRank
, newlen
);
1711 msg
->setLength(len
);
1714 AmpiMsg
* msg
= msgs
.front();
1718 msg
->setSsendReq(ssendReq
);
1720 msg
->setSrcRank(srcRank
);
1721 msg
->setLength(len
);
1725 inline void deleteAmpiMsg(AmpiMsg
* msg
) noexcept
{
1726 /* msg->origLength is the true size of the message's data buffer, while
1727 * msg->length is the space taken by the payload within it. */
1728 if (currMsgs
!= maxMsgs
&& msg
->origLength
>= msgLength
&& msg
->origLength
< 2*msgLength
) {
1729 msgs
.push_front(msg
);
1735 void pup(PUP::er
& p
) {
1738 // Don't PUP the msgs in the free list or currMsgs, let the pool fill lazily
1742 // Number of requests in the pool
1743 #ifndef AMPI_REQ_POOL_SIZE
1744 #define AMPI_REQ_POOL_SIZE 64
1747 // Helper macro for pool size and alignment calculations
1748 #define DefinePooledReqX(name, func) \
1749 static const size_t ireq##name = func(IReq); \
1750 static const size_t sreq##name = func(SendReq); \
1751 static const size_t ssreq##name = func(SsendReq); \
1752 static const size_t pooledReq##name = (ireq##name >= sreq##name && ireq##name >= ssreq##name) ? ireq##name : \
1753 (sreq##name >= ireq##name && sreq##name >= ssreq##name) ? sreq##name : \
1756 // This defines 'static const size_t pooledReqSize = ... ;'
1757 DefinePooledReqX(Size
, sizeof)
1759 // This defines 'static const size_t pooledReqAlign = ... ;'
1760 DefinePooledReqX(Align
, alignof
)
1762 // Pool of IReq, SendReq, and SsendReq objects:
1763 // These are different sizes, but we use a single pool for them so
1764 // that iteration over these objects is fast, as in AMPI_Waitall.
1765 // We also try to always allocate new requests from the start to the end
1766 // of the pool, so that forward iteration over requests is fast.
1767 class AmpiRequestPool
{
1769 std::bitset
<AMPI_REQ_POOL_SIZE
> validReqs
; // reqs in the pool are either valid (being used by a real req) or invalid
1770 int startIdx
= 0; // start next search from this index
1771 alignas(pooledReqAlign
) std::array
<char, AMPI_REQ_POOL_SIZE
*pooledReqSize
> reqs
; // pool of memory for requests
1774 AmpiRequestPool() =default;
1775 ~AmpiRequestPool() =default;
1776 template <typename T
, typename
... Args
>
1777 inline T
* newReq(Args
&&... args
) noexcept
{
1778 if (validReqs
.all()) {
1779 return new T(std::forward
<Args
>(args
)...);
1781 for (int i
=startIdx
; i
<validReqs
.size(); i
++) {
1782 if (!validReqs
[i
]) {
1785 T
* req
= new (&reqs
[i
*pooledReqSize
]) T(std::forward
<Args
>(args
)...);
1789 CkAbort("AMPI> failed to find a free request in pool!");
1793 inline void deleteReq(AmpiRequest
* req
) noexcept
{
1794 if (req
->isPooledType() &&
1795 ((char*)req
>= &reqs
.front() && (char*)req
<= &reqs
.back()))
1797 int idx
= (int)((intptr_t)req
- (intptr_t)&reqs
[0]) / pooledReqSize
;
1799 startIdx
= std::min(idx
, startIdx
);
1804 void pup(PUP::er
& p
) noexcept
{
1805 // Nothing to do here, because AmpiRequestList::pup will be the
1806 // one to actually PUP the AmpiRequest objects to/from the pool
1811 Our local representation of another AMPI
1812 array element. Used to keep track of incoming
1813 and outgoing message sequence numbers, and
1814 the out-of-order message list.
1816 class AmpiOtherElement
{
1818 /// Next incoming and outgoing message sequence number
1819 CMK_REFNUM_TYPE seqIncoming
, seqOutgoing
;
1821 /// Number of messages in out-of-order queue (normally 0)
1822 uint16_t numOutOfOrder
;
1825 /// seqIncoming starts from 1, b/c 0 means unsequenced
1826 /// seqOutgoing starts from 0, b/c this will be incremented for the first real seq #
1827 AmpiOtherElement() noexcept
: seqIncoming(1), seqOutgoing(0), numOutOfOrder(0) {}
1829 /// Handle wrap around of unsigned type CMK_REFNUM_TYPE
1830 inline void incSeqIncoming() noexcept
{ seqIncoming
++; if (seqIncoming
==0) seqIncoming
=1; }
1831 inline CMK_REFNUM_TYPE
getSeqIncoming() const noexcept
{ return seqIncoming
; }
1833 inline void incSeqOutgoing() noexcept
{ seqOutgoing
++; if (seqOutgoing
==0) seqOutgoing
=1; }
1834 inline CMK_REFNUM_TYPE
getSeqOutgoing() const noexcept
{ return seqOutgoing
; }
1836 inline void incNumOutOfOrder() noexcept
{ numOutOfOrder
++; }
1837 inline void decNumOutOfOrder() noexcept
{ numOutOfOrder
--; }
1838 inline uint16_t getNumOutOfOrder() const noexcept
{ return numOutOfOrder
; }
1840 PUPbytes(AmpiOtherElement
)
1842 class AmpiSeqQ
: private CkNoncopyable
{
1843 CkMsgQ
<AmpiMsg
> out
; // all out of order messages
1844 std::unordered_map
<int, AmpiOtherElement
> elements
; // element info: indexed by seqIdx (comm rank)
1847 AmpiSeqQ() =default;
1848 AmpiSeqQ(int commSize
) noexcept
{
1849 elements
.reserve(std::min(commSize
, 64));
1851 ~AmpiSeqQ() =default;
1852 void pup(PUP::er
&p
) noexcept
;
1854 /// Insert this message in the table. Returns the number
1855 /// of messages now available for the element.
1856 /// If 0, the message was out-of-order and is buffered.
1857 /// If 1, this message can be immediately processed.
1858 /// If >1, this message can be immediately processed,
1859 /// and you should call "getOutOfOrder" repeatedly.
1860 inline int put(int seqIdx
, AmpiMsg
*msg
) noexcept
{
1861 AmpiOtherElement
&el
= elements
[seqIdx
];
1862 if (msg
->getSeq() == el
.getSeqIncoming()) { // In order:
1863 el
.incSeqIncoming();
1864 return 1+el
.getNumOutOfOrder();
1866 else { // Out of order: stash message
1867 putOutOfOrder(seqIdx
, msg
);
1872 /// Is this message in order (return >0) or not (return 0)?
1873 /// Same as put() except we don't call putOutOfOrder() here,
1874 /// so the caller should do that separately
1875 inline int isInOrder(int srcRank
, CMK_REFNUM_TYPE seq
) noexcept
{
1876 AmpiOtherElement
&el
= elements
[srcRank
];
1877 if (seq
== el
.getSeqIncoming()) { // In order:
1878 el
.incSeqIncoming();
1879 return 1+el
.getNumOutOfOrder();
1881 else { // Out of order: caller should stash message
1886 /// Get an out-of-order message from the table.
1887 /// (in-order messages never go into the table)
1888 AmpiMsg
*getOutOfOrder(int seqIdx
) noexcept
;
1890 /// Stash an out-of-order message
1891 void putOutOfOrder(int seqIdx
, AmpiMsg
*msg
) noexcept
;
1893 /// Increment the outgoing sequence number.
1894 inline void incCollSeqOutgoing() noexcept
{
1895 elements
[COLL_SEQ_IDX
].incSeqOutgoing();
1898 /// Return the next outgoing sequence number, and increment it.
1899 inline CMK_REFNUM_TYPE
nextOutgoing(int destRank
) noexcept
{
1900 AmpiOtherElement
&el
= elements
[destRank
];
1901 el
.incSeqOutgoing();
1902 return el
.getSeqOutgoing();
1905 PUPmarshall(AmpiSeqQ
)
1908 inline CProxy_ampi
ampiCommStruct::getProxy() const noexcept
{return ampiID
;}
1909 const ampiCommStruct
&universeComm2CommStruct(MPI_Comm universeNo
) noexcept
;
1911 // Max value of a predefined MPI_Op (values defined in ampi.h)
1912 #define AMPI_MAX_PREDEFINED_OP 13
1915 An ampiParent holds all the communicators and the TCharm thread
1916 for its children, which are bound to it.
1918 class ampiParent final
: public CBase_ampiParent
{
1921 CProxy_TCharm threads
;
1923 public: // Communication state:
1924 int numBlockedReqs
; // number of requests currently blocked on
1925 bool resumeOnRecv
, resumeOnColl
;
1926 AmpiRequestList ampiReqs
;
1927 AmpiRequestPool reqPool
;
1928 AmpiRequest
*blockingReq
;
1932 MPI_Comm worldNo
; //My MPI_COMM_WORLD
1933 ampi
*worldPtr
; //AMPI element corresponding to MPI_COMM_WORLD
1935 CkPupPtrVec
<ampiCommStruct
> splitComm
; //Communicators from MPI_Comm_split
1936 CkPupPtrVec
<ampiCommStruct
> groupComm
; //Communicators from MPI_Comm_group
1937 CkPupPtrVec
<ampiCommStruct
> cartComm
; //Communicators from MPI_Cart_create
1938 CkPupPtrVec
<ampiCommStruct
> graphComm
; //Communicators from MPI_Graph_create
1939 CkPupPtrVec
<ampiCommStruct
> distGraphComm
; //Communicators from MPI_Dist_graph_create
1940 CkPupPtrVec
<ampiCommStruct
> interComm
; //Communicators from MPI_Intercomm_create
1941 CkPupPtrVec
<ampiCommStruct
> intraComm
; //Communicators from MPI_Intercomm_merge
1943 CkPupPtrVec
<groupStruct
> groups
; // "Wild" groups that don't have a communicator
1944 CkPupPtrVec
<WinStruct
> winStructList
; //List of windows for one-sided communication
1945 CkPupPtrVec
<InfoStruct
> infos
; // list of all MPI_Infos
1946 const std::array
<MPI_User_function
*, AMPI_MAX_PREDEFINED_OP
+1>& predefinedOps
; // owned by ampiNodeMgr
1947 vector
<OpStruct
> userOps
; // list of any user-defined MPI_Ops
1948 vector
<AmpiMsg
*> matchedMsgs
; // for use with MPI_Mprobe and MPI_Mrecv
1950 /* MPI_*_get_attr C binding returns a *pointer* to an integer,
1951 * so there needs to be some storage somewhere to point to.
1952 * All builtin keyvals are ints, except for MPI_WIN_BASE, which
1953 * is a pointer, and MPI_WIN_SIZE, which is an MPI_Aint. */
1954 int* kv_builtin_storage
;
1955 MPI_Aint
* win_size_storage
;
1956 void** win_base_storage
;
1957 CkPupPtrVec
<KeyvalNode
> kvlist
;
1958 void* bsendBuffer
; // NOTE: we don't actually use this for buffering of MPI_Bsend's,
1959 int bsendBufferSize
; // we only keep track of it to return it from MPI_Buffer_detach
1961 // Intercommunicator creation:
1962 bool isTmpRProxySet
;
1963 CProxy_ampi tmpRProxy
;
1965 MPI_MigrateFn userAboutToMigrateFn
, userJustMigratedFn
;
1968 bool ampiInitCallDone
;
1971 bool kv_set_builtin(int keyval
, void* attribute_val
) noexcept
;
1972 bool kv_get_builtin(int keyval
) noexcept
;
1975 void prepareCtv() noexcept
;
1977 MPI_Message
putMatchedMsg(AmpiMsg
* msg
) noexcept
{
1978 // Search thru matchedMsgs for any NULL ones first:
1979 for (int i
=0; i
<matchedMsgs
.size(); i
++) {
1980 if (matchedMsgs
[i
] == NULL
) {
1981 matchedMsgs
[i
] = msg
;
1985 // No NULL entries, so create a new one:
1986 matchedMsgs
.push_back(msg
);
1987 return matchedMsgs
.size() - 1;
1989 AmpiMsg
* getMatchedMsg(MPI_Message message
) noexcept
{
1990 if (message
== MPI_MESSAGE_NO_PROC
|| message
== MPI_MESSAGE_NULL
) {
1993 CkAssert(message
>= 0 && message
< matchedMsgs
.size());
1994 AmpiMsg
* msg
= matchedMsgs
[message
];
1995 // Mark this matchedMsg index NULL and free from back of vector:
1996 matchedMsgs
[message
] = NULL
;
1997 while (matchedMsgs
.back() == NULL
) {
1998 matchedMsgs
.pop_back();
2003 inline void attachBuffer(void *buffer
, int size
) noexcept
{
2004 bsendBuffer
= buffer
;
2005 bsendBufferSize
= size
;
2007 inline void detachBuffer(void *buffer
, int *size
) noexcept
{
2008 *(void **)buffer
= bsendBuffer
;
2009 *size
= bsendBufferSize
;
2011 inline bool isSplit(MPI_Comm comm
) const noexcept
{
2012 return (comm
>=MPI_COMM_FIRST_SPLIT
&& comm
<MPI_COMM_FIRST_GROUP
);
2014 const ampiCommStruct
&getSplit(MPI_Comm comm
) const noexcept
{
2015 int idx
=comm
-MPI_COMM_FIRST_SPLIT
;
2016 if (idx
>=splitComm
.size()) CkAbort("Bad split communicator used");
2017 return *splitComm
[idx
];
2019 void splitChildRegister(const ampiCommStruct
&s
) noexcept
;
2021 inline bool isGroup(MPI_Comm comm
) const noexcept
{
2022 return (comm
>=MPI_COMM_FIRST_GROUP
&& comm
<MPI_COMM_FIRST_CART
);
2024 const ampiCommStruct
&getGroup(MPI_Comm comm
) const noexcept
{
2025 int idx
=comm
-MPI_COMM_FIRST_GROUP
;
2026 if (idx
>=groupComm
.size()) CkAbort("Bad group communicator used");
2027 return *groupComm
[idx
];
2029 void groupChildRegister(const ampiCommStruct
&s
) noexcept
;
2030 inline bool isInGroups(MPI_Group group
) const noexcept
{
2031 return (group
>=0 && group
<groups
.size());
2034 void cartChildRegister(const ampiCommStruct
&s
) noexcept
;
2035 void graphChildRegister(const ampiCommStruct
&s
) noexcept
;
2036 void distGraphChildRegister(const ampiCommStruct
&s
) noexcept
;
2037 void interChildRegister(const ampiCommStruct
&s
) noexcept
;
2038 void intraChildRegister(const ampiCommStruct
&s
) noexcept
;
2041 ampiParent(MPI_Comm worldNo_
,CProxy_TCharm threads_
,int nRanks_
) noexcept
;
2042 ampiParent(CkMigrateMessage
*msg
) noexcept
;
2043 void ckAboutToMigrate() noexcept
;
2044 void ckJustMigrated() noexcept
;
2045 void ckJustRestored() noexcept
;
2046 void setUserAboutToMigrateFn(MPI_MigrateFn f
) noexcept
;
2047 void setUserJustMigratedFn(MPI_MigrateFn f
) noexcept
;
2048 ~ampiParent() noexcept
;
2050 //Children call this when they are first created, or just migrated
2051 TCharm
*registerAmpi(ampi
*ptr
,ampiCommStruct s
,bool forMigration
) noexcept
;
2053 // exchange proxy info between two ampi proxies
2054 void ExchangeProxy(CProxy_ampi rproxy
) noexcept
{
2055 if(!isTmpRProxySet
){ tmpRProxy
=rproxy
; isTmpRProxySet
=true; }
2056 else{ tmpRProxy
.setRemoteProxy(rproxy
); rproxy
.setRemoteProxy(tmpRProxy
); isTmpRProxySet
=false; }
2059 //Grab the next available split/group communicator
2060 MPI_Comm
getNextSplit() const noexcept
{return MPI_COMM_FIRST_SPLIT
+splitComm
.size();}
2061 MPI_Comm
getNextGroup() const noexcept
{return MPI_COMM_FIRST_GROUP
+groupComm
.size();}
2062 MPI_Comm
getNextCart() const noexcept
{return MPI_COMM_FIRST_CART
+cartComm
.size();}
2063 MPI_Comm
getNextGraph() const noexcept
{return MPI_COMM_FIRST_GRAPH
+graphComm
.size();}
2064 MPI_Comm
getNextDistGraph() const noexcept
{return MPI_COMM_FIRST_DIST_GRAPH
+distGraphComm
.size();}
2065 MPI_Comm
getNextInter() const noexcept
{return MPI_COMM_FIRST_INTER
+interComm
.size();}
2066 MPI_Comm
getNextIntra() const noexcept
{return MPI_COMM_FIRST_INTRA
+intraComm
.size();}
2068 inline bool isCart(MPI_Comm comm
) const noexcept
{
2069 return (comm
>=MPI_COMM_FIRST_CART
&& comm
<MPI_COMM_FIRST_GRAPH
);
2071 ampiCommStruct
&getCart(MPI_Comm comm
) const noexcept
{
2072 int idx
=comm
-MPI_COMM_FIRST_CART
;
2073 if (idx
>=cartComm
.size()) CkAbort("AMPI> Bad cartesian communicator used!\n");
2074 return *cartComm
[idx
];
2076 inline bool isGraph(MPI_Comm comm
) const noexcept
{
2077 return (comm
>=MPI_COMM_FIRST_GRAPH
&& comm
<MPI_COMM_FIRST_DIST_GRAPH
);
2079 ampiCommStruct
&getGraph(MPI_Comm comm
) const noexcept
{
2080 int idx
=comm
-MPI_COMM_FIRST_GRAPH
;
2081 if (idx
>=graphComm
.size()) CkAbort("AMPI> Bad graph communicator used!\n");
2082 return *graphComm
[idx
];
2084 inline bool isDistGraph(MPI_Comm comm
) const noexcept
{
2085 return (comm
>= MPI_COMM_FIRST_DIST_GRAPH
&& comm
< MPI_COMM_FIRST_INTER
);
2087 ampiCommStruct
&getDistGraph(MPI_Comm comm
) const noexcept
{
2088 int idx
= comm
-MPI_COMM_FIRST_DIST_GRAPH
;
2089 if (idx
>=distGraphComm
.size()) CkAbort("Bad distributed graph communicator used");
2090 return *distGraphComm
[idx
];
2092 inline bool isInter(MPI_Comm comm
) const noexcept
{
2093 return (comm
>=MPI_COMM_FIRST_INTER
&& comm
<MPI_COMM_FIRST_INTRA
);
2095 const ampiCommStruct
&getInter(MPI_Comm comm
) const noexcept
{
2096 int idx
=comm
-MPI_COMM_FIRST_INTER
;
2097 if (idx
>=interComm
.size()) CkAbort("AMPI> Bad inter-communicator used!\n");
2098 return *interComm
[idx
];
2100 inline bool isIntra(MPI_Comm comm
) const noexcept
{
2101 return (comm
>=MPI_COMM_FIRST_INTRA
&& comm
<MPI_COMM_FIRST_RESVD
);
2103 const ampiCommStruct
&getIntra(MPI_Comm comm
) const noexcept
{
2104 int idx
=comm
-MPI_COMM_FIRST_INTRA
;
2105 if (idx
>=intraComm
.size()) CkAbort("Bad intra-communicator used");
2106 return *intraComm
[idx
];
2109 void pup(PUP::er
&p
) noexcept
;
2111 void startCheckpoint(const char* dname
) noexcept
;
2112 void Checkpoint(int len
, const char* dname
) noexcept
;
2113 void ResumeThread() noexcept
;
2114 TCharm
* getTCharmThread() const noexcept
{return thread
;}
2115 inline ampiParent
* blockOnRecv() noexcept
;
2116 inline CkDDT
* getDDT() noexcept
{ return &myDDT
; }
2119 void setMigratable(bool mig
) noexcept
{
2120 thread
->setMigratable(mig
);
2124 const ampiCommStruct
&getWorldStruct() const noexcept
;
2126 inline const ampiCommStruct
&comm2CommStruct(MPI_Comm comm
) const noexcept
{
2127 if (comm
==MPI_COMM_WORLD
) return getWorldStruct();
2128 if (comm
==worldNo
) return getWorldStruct();
2129 if (isSplit(comm
)) return getSplit(comm
);
2130 if (isGroup(comm
)) return getGroup(comm
);
2131 if (isCart(comm
)) return getCart(comm
);
2132 if (isGraph(comm
)) return getGraph(comm
);
2133 if (isDistGraph(comm
)) return getDistGraph(comm
);
2134 if (isInter(comm
)) return getInter(comm
);
2135 if (isIntra(comm
)) return getIntra(comm
);
2136 return universeComm2CommStruct(comm
);
2139 inline vector
<int>& getKeyvals(MPI_Comm comm
) noexcept
{
2140 ampiCommStruct
&cs
= *(ampiCommStruct
*)&comm2CommStruct(comm
);
2141 return cs
.getKeyvals();
2144 inline ampi
*comm2ampi(MPI_Comm comm
) const noexcept
{
2145 if (comm
==MPI_COMM_WORLD
) return worldPtr
;
2146 if (comm
==worldNo
) return worldPtr
;
2147 if (isSplit(comm
)) {
2148 const ampiCommStruct
&st
=getSplit(comm
);
2149 return st
.getProxy()[thisIndex
].ckLocal();
2151 if (isGroup(comm
)) {
2152 const ampiCommStruct
&st
=getGroup(comm
);
2153 return st
.getProxy()[thisIndex
].ckLocal();
2156 const ampiCommStruct
&st
= getCart(comm
);
2157 return st
.getProxy()[thisIndex
].ckLocal();
2159 if (isGraph(comm
)) {
2160 const ampiCommStruct
&st
= getGraph(comm
);
2161 return st
.getProxy()[thisIndex
].ckLocal();
2163 if (isDistGraph(comm
)) {
2164 const ampiCommStruct
&st
= getDistGraph(comm
);
2165 return st
.getProxy()[thisIndex
].ckLocal();
2167 if (isInter(comm
)) {
2168 const ampiCommStruct
&st
=getInter(comm
);
2169 return st
.getProxy()[thisIndex
].ckLocal();
2171 if (isIntra(comm
)) {
2172 const ampiCommStruct
&st
=getIntra(comm
);
2173 return st
.getProxy()[thisIndex
].ckLocal();
2175 if (comm
>MPI_COMM_WORLD
) return worldPtr
; //Use MPI_WORLD ampi for cross-world messages:
2176 CkAbort("Invalid communicator used!");
2180 inline bool hasComm(const MPI_Group group
) const noexcept
{
2181 MPI_Comm comm
= (MPI_Comm
)group
;
2182 return ( comm
==MPI_COMM_WORLD
|| comm
==worldNo
|| isSplit(comm
) || isGroup(comm
) ||
2183 isCart(comm
) || isGraph(comm
) || isDistGraph(comm
) || isIntra(comm
) );
2184 //isInter omitted because its comm number != its group number
2186 inline vector
<int> group2vec(MPI_Group group
) const noexcept
{
2187 if (group
== MPI_GROUP_NULL
|| group
== MPI_GROUP_EMPTY
) {
2188 return vector
<int>();
2190 else if (hasComm(group
)) {
2191 return comm2CommStruct((MPI_Comm
)group
).getIndices();
2194 CkAssert(isInGroups(group
));
2195 return groups
[group
]->getRanks();
2198 inline MPI_Group
saveGroupStruct(const vector
<int>& vec
) noexcept
{
2199 if (vec
.empty()) return MPI_GROUP_EMPTY
;
2200 int idx
= groups
.size();
2201 groups
.resize(idx
+1);
2202 groups
[idx
]=new groupStruct(vec
);
2203 return (MPI_Group
)idx
;
2205 inline int getRank(const MPI_Group group
) const noexcept
{
2206 vector
<int> vec
= group2vec(group
);
2207 return getPosOp(thisIndex
,vec
);
2209 inline AmpiRequestList
&getReqs() noexcept
{ return ampiReqs
; }
2210 inline int getMyPe() const noexcept
{
2213 inline bool hasWorld() const noexcept
{
2214 return worldPtr
!=NULL
;
2217 inline void checkComm(MPI_Comm comm
) const noexcept
{
2218 if ((comm
!= MPI_COMM_SELF
&& comm
!= MPI_COMM_WORLD
)
2219 || (isSplit(comm
) && comm
-MPI_COMM_FIRST_SPLIT
>= splitComm
.size())
2220 || (isGroup(comm
) && comm
-MPI_COMM_FIRST_GROUP
>= groupComm
.size())
2221 || (isCart(comm
) && comm
-MPI_COMM_FIRST_CART
>= cartComm
.size())
2222 || (isGraph(comm
) && comm
-MPI_COMM_FIRST_GRAPH
>= graphComm
.size())
2223 || (isDistGraph(comm
) && comm
-MPI_COMM_FIRST_DIST_GRAPH
>= distGraphComm
.size())
2224 || (isInter(comm
) && comm
-MPI_COMM_FIRST_INTER
>= interComm
.size())
2225 || (isIntra(comm
) && comm
-MPI_COMM_FIRST_INTRA
>= intraComm
.size()) )
2226 CkAbort("Invalid MPI_Comm\n");
2229 /// if intra-communicator, return comm, otherwise return null group
2230 inline MPI_Group
comm2group(const MPI_Comm comm
) const noexcept
{
2231 if(isInter(comm
)) return MPI_GROUP_NULL
; // we don't support inter-communicator in such functions
2232 ampiCommStruct s
= comm2CommStruct(comm
);
2233 if(comm
!=MPI_COMM_WORLD
&& comm
!=s
.getComm()) CkAbort("Error in ampiParent::comm2group()");
2234 return (MPI_Group
)(s
.getComm());
2237 inline int getRemoteSize(const MPI_Comm comm
) const noexcept
{
2238 if(isInter(comm
)) return getInter(comm
).getRemoteIndices().size();
2241 inline MPI_Group
getRemoteGroup(const MPI_Comm comm
) noexcept
{
2242 if(isInter(comm
)) return saveGroupStruct(getInter(comm
).getRemoteIndices());
2243 else return MPI_GROUP_NULL
;
2246 int createKeyval(MPI_Copy_function
*copy_fn
, MPI_Delete_function
*delete_fn
,
2247 int *keyval
, void* extra_state
) noexcept
;
2248 bool getBuiltinKeyval(int keyval
, void *attribute_val
) noexcept
;
2249 int setUserKeyval(MPI_Comm comm
, int keyval
, void *attribute_val
) noexcept
;
2250 bool getUserKeyval(MPI_Comm comm
, vector
<int>& keyvals
, int keyval
, void *attribute_val
, int *flag
) noexcept
;
2251 int dupUserKeyvals(MPI_Comm old_comm
, MPI_Comm new_comm
) noexcept
;
2252 int freeUserKeyval(int context
, vector
<int>& keyvals
, int *keyval
) noexcept
;
2253 int freeUserKeyvals(int context
, vector
<int>& keyvals
) noexcept
;
2255 int setAttr(MPI_Comm comm
, vector
<int>& keyvals
, int keyval
, void *attribute_val
) noexcept
;
2256 int getAttr(MPI_Comm comm
, vector
<int>& keyvals
, int keyval
, void *attribute_val
, int *flag
) noexcept
;
2257 int deleteAttr(MPI_Comm comm
, vector
<int>& keyvals
, int keyval
) noexcept
;
2259 int addWinStruct(WinStruct
*win
) noexcept
;
2260 WinStruct
*getWinStruct(MPI_Win win
) const noexcept
;
2261 void removeWinStruct(WinStruct
*win
) noexcept
;
2263 int createInfo(MPI_Info
*newinfo
) noexcept
;
2264 int dupInfo(MPI_Info info
, MPI_Info
*newinfo
) noexcept
;
2265 int setInfo(MPI_Info info
, const char *key
, const char *value
) noexcept
;
2266 int deleteInfo(MPI_Info info
, const char *key
) noexcept
;
2267 int getInfo(MPI_Info info
, const char *key
, int valuelen
, char *value
, int *flag
) const noexcept
;
2268 int getInfoValuelen(MPI_Info info
, const char *key
, int *valuelen
, int *flag
) const noexcept
;
2269 int getInfoNkeys(MPI_Info info
, int *nkeys
) const noexcept
;
2270 int getInfoNthkey(MPI_Info info
, int n
, char *key
) const noexcept
;
2271 int freeInfo(MPI_Info info
) noexcept
;
2272 void defineInfoEnv(int nRanks_
) noexcept
;
2273 void defineInfoMigration() noexcept
;
2275 // An 'MPI_Op' is an integer that indexes into either:
2276 // A) an array of predefined ops owned by ampiNodeMgr, or
2277 // B) a vector of user-defined ops owned by ampiParent
2278 // The MPI_Op is compared to AMPI_MAX_PREDEFINED_OP to disambiguate.
2279 inline int createOp(MPI_User_function
*fn
, bool isCommutative
) noexcept
{
2280 // Search thru non-predefined op's for any invalidated ones:
2281 for (int i
=0; i
<userOps
.size(); i
++) {
2282 if (userOps
[i
].isFree()) {
2283 userOps
[i
].init(fn
, isCommutative
);
2284 return AMPI_MAX_PREDEFINED_OP
+ 1 + i
;
2287 // No invalid entries, so create a new one:
2288 userOps
.emplace_back(fn
, isCommutative
);
2289 return AMPI_MAX_PREDEFINED_OP
+ userOps
.size();
2291 inline void freeOp(MPI_Op op
) noexcept
{
2292 // Don't free predefined op's:
2293 if (!opIsPredefined(op
)) {
2294 // Invalidate op, then free all invalid op's from the back of the userOp's vector
2295 int opIdx
= op
- 1 - AMPI_MAX_PREDEFINED_OP
;
2296 CkAssert(opIdx
< userOps
.size());
2297 userOps
[opIdx
].free();
2298 while (!userOps
.empty() && userOps
.back().isFree()) {
2303 inline bool opIsPredefined(MPI_Op op
) const noexcept
{
2304 return (op
<= AMPI_MAX_PREDEFINED_OP
);
2306 inline bool opIsCommutative(MPI_Op op
) const noexcept
{
2307 if (opIsPredefined(op
)) {
2308 return true; // all predefined ops are commutative
2311 int opIdx
= op
- 1 - AMPI_MAX_PREDEFINED_OP
;
2312 CkAssert(opIdx
< userOps
.size());
2313 return userOps
[opIdx
].isCommutative
;
2316 inline MPI_User_function
* op2User_function(MPI_Op op
) const noexcept
{
2317 if (opIsPredefined(op
)) {
2318 return predefinedOps
[op
];
2321 int opIdx
= op
- 1 - AMPI_MAX_PREDEFINED_OP
;
2322 CkAssert(opIdx
< userOps
.size());
2323 return userOps
[opIdx
].func
;
2326 inline AmpiOpHeader
op2AmpiOpHeader(MPI_Op op
, MPI_Datatype type
, int count
) const noexcept
{
2327 if (opIsPredefined(op
)) {
2328 int size
= myDDT
.getType(type
)->getSize(count
);
2329 return AmpiOpHeader(predefinedOps
[op
], type
, count
, size
);
2332 int opIdx
= op
- 1 - AMPI_MAX_PREDEFINED_OP
;
2333 CkAssert(opIdx
< userOps
.size());
2334 int size
= myDDT
.getType(type
)->getSize(count
);
2335 return AmpiOpHeader(userOps
[opIdx
].func
, type
, count
, size
);
2338 inline void applyOp(MPI_Datatype datatype
, MPI_Op op
, int count
, const void* invec
, void* inoutvec
) const noexcept
{
2339 // inoutvec[i] = invec[i] op inoutvec[i]
2340 MPI_User_function
*func
= op2User_function(op
);
2341 (func
)((void*)invec
, inoutvec
, &count
, &datatype
);
2344 void init() noexcept
;
2345 void finalize() noexcept
;
2346 void block() noexcept
;
2347 void yield() noexcept
;
2349 #if AMPI_PRINT_MSG_SIZES
2350 // Map of AMPI routine names to message sizes and number of messages:
2351 // ["AMPI_Routine"][ [msg_size][num_msgs] ]
2352 std::unordered_map
<std::string
, std::map
<int, int> > msgSizes
;
2353 inline bool isRankRecordingMsgSizes() noexcept
;
2354 inline void recordMsgSize(const char* func
, int msgSize
) noexcept
;
2355 void printMsgSizes() noexcept
;
2359 /* message logging */
2361 #if CMK_USE_ZLIB && 0
2363 PUP::tozDisk
*toPUPer
;
2364 PUP::fromzDisk
*fromPUPer
;
2367 PUP::toDisk
*toPUPer
;
2368 PUP::fromDisk
*fromPUPer
;
2373 // Store a generalized request class created by MPIX_Grequest_class_create
2374 class greq_class_desc
{
2376 MPI_Grequest_query_function
*query_fn
;
2377 MPI_Grequest_free_function
*free_fn
;
2378 MPI_Grequest_cancel_function
*cancel_fn
;
2379 MPIX_Grequest_poll_function
*poll_fn
;
2380 MPIX_Grequest_wait_function
*wait_fn
;
2382 void pup(PUP::er
&p
) noexcept
{
2383 p((char *)query_fn
, sizeof(void *));
2384 p((char *)free_fn
, sizeof(void *));
2385 p((char *)cancel_fn
, sizeof(void *));
2386 p((char *)poll_fn
, sizeof(void *));
2387 p((char *)wait_fn
, sizeof(void *));
2392 An ampi manages the communication of one thread over
2393 one MPI communicator.
2395 class ampi final
: public CBase_ampi
{
2397 friend class IReq
; // for checking resumeOnRecv
2398 friend class SendReq
;
2399 friend class SsendReq
;
2400 friend class RednReq
;
2401 friend class GatherReq
;
2402 friend class GathervReq
;
2405 CProxy_ampiParent parentProxy
;
2412 * AMPI Message Matching (Amm) queues are indexed by the tag and sender.
2413 * Since ampi objects are per-communicator, there are separate Amm's per communicator.
2415 Amm
<AmpiRequest
*, AMPI_AMM_PT2PT_POOL_SIZE
> postedReqs
;
2416 Amm
<AmpiMsg
*, AMPI_AMM_PT2PT_POOL_SIZE
> unexpectedMsgs
;
2418 // Bcast requests / msgs must be kept separate from pt2pt,
2419 // so we don't match them to wildcard recv's
2420 Amm
<AmpiRequest
*, AMPI_AMM_COLL_POOL_SIZE
> postedBcastReqs
;
2421 Amm
<AmpiMsg
*, AMPI_AMM_COLL_POOL_SIZE
> unexpectedBcastMsgs
;
2423 // Store generalized request classes created by MPIX_Grequest_class_create
2424 vector
<greq_class_desc
> greq_classes
;
2427 ampiCommStruct myComm
;
2428 vector
<int> tmpVec
; // stores temp group info
2429 CProxy_ampi remoteProxy
; // valid only for intercommunicator
2430 CkPupPtrVec
<win_obj
> winObjects
;
2433 void inorder(AmpiMsg
*msg
) noexcept
;
2434 void inorderBcast(AmpiMsg
*msg
, bool deleteMsg
) noexcept
;
2435 void inorderRdma(char* buf
, int size
, CMK_REFNUM_TYPE seq
, int tag
, int srcRank
,
2436 MPI_Comm comm
, int ssendReq
) noexcept
;
2438 void init() noexcept
;
2439 void findParent(bool forMigration
) noexcept
;
2441 public: // entry methods
2443 ampi(CkArrayID parent_
,const ampiCommStruct
&s
) noexcept
;
2444 ampi(CkMigrateMessage
*msg
) noexcept
;
2445 void ckJustMigrated() noexcept
;
2446 void ckJustRestored() noexcept
;
2449 void pup(PUP::er
&p
) noexcept
;
2451 void allInitDone() noexcept
;
2452 void setInitDoneFlag() noexcept
;
2454 void unblock() noexcept
;
2455 void injectMsg(int size
, char* buf
) noexcept
;
2456 void generic(AmpiMsg
*) noexcept
;
2457 void genericRdma(char* buf
, int size
, CMK_REFNUM_TYPE seq
, int tag
, int srcRank
,
2458 MPI_Comm destcomm
, int ssendReq
) noexcept
;
2459 void completedRdmaSend(CkDataMsg
*msg
) noexcept
;
2460 void ssend_ack(int sreq
) noexcept
;
2461 void barrierResult() noexcept
;
2462 void ibarrierResult() noexcept
;
2463 void bcastResult(AmpiMsg
*msg
) noexcept
;
2464 void rednResult(CkReductionMsg
*msg
) noexcept
;
2465 void irednResult(CkReductionMsg
*msg
) noexcept
;
2467 void splitPhase1(CkReductionMsg
*msg
) noexcept
;
2468 void splitPhaseInter(CkReductionMsg
*msg
) noexcept
;
2469 void commCreatePhase1(MPI_Comm nextGroupComm
) noexcept
;
2470 void intercommCreatePhase1(MPI_Comm nextInterComm
) noexcept
;
2471 void intercommMergePhase1(MPI_Comm nextIntraComm
) noexcept
;
2473 private: // Used by the above entry methods that create new MPI_Comm objects
2474 CProxy_ampi
createNewChildAmpiSync() noexcept
;
2475 void insertNewChildAmpiElements(MPI_Comm newComm
, CProxy_ampi newAmpi
) noexcept
;
2477 inline void handleBlockedReq(AmpiRequest
* req
) noexcept
{
2478 if (req
->isBlocked() && parent
->numBlockedReqs
!= 0) {
2479 parent
->numBlockedReqs
--;
2482 inline void resumeThreadIfReady() noexcept
{
2483 if (parent
->resumeOnRecv
&& parent
->numBlockedReqs
== 0) {
2488 public: // to be used by MPI_* functions
2489 inline const ampiCommStruct
&comm2CommStruct(MPI_Comm comm
) const noexcept
{
2490 return parent
->comm2CommStruct(comm
);
2492 inline const ampiCommStruct
&getCommStruct() const noexcept
{ return myComm
; }
2494 inline ampi
* blockOnRecv() noexcept
;
2495 inline ampi
* blockOnColl() noexcept
;
2496 inline void setBlockingReq(AmpiRequest
*req
) noexcept
;
2497 MPI_Request
postReq(AmpiRequest
* newreq
) noexcept
;
2499 inline CMK_REFNUM_TYPE
getSeqNo(int destRank
, MPI_Comm destcomm
, int tag
) noexcept
;
2500 AmpiMsg
*makeBcastMsg(const void *buf
,int count
,MPI_Datatype type
,int root
,MPI_Comm destcomm
) noexcept
;
2501 AmpiMsg
*makeAmpiMsg(int destRank
,int t
,int sRank
,const void *buf
,int count
,
2502 MPI_Datatype type
,MPI_Comm destcomm
, int ssendReq
=0) noexcept
;
2504 MPI_Request
send(int t
, int s
, const void* buf
, int count
, MPI_Datatype type
, int rank
,
2505 MPI_Comm destcomm
, int ssendReq
=0, AmpiSendType sendType
=BLOCKING_SEND
) noexcept
;
2506 static void sendraw(int t
, int s
, void* buf
, int len
, CkArrayID aid
, int idx
) noexcept
;
2507 inline MPI_Request
sendLocalMsg(int t
, int sRank
, const void* buf
, int size
, MPI_Datatype type
, int destRank
,
2508 MPI_Comm destcomm
, ampi
* destPtr
, int ssendReq
, AmpiSendType sendType
) noexcept
;
2509 inline MPI_Request
sendRdmaMsg(int t
, int sRank
, const void* buf
, int size
, MPI_Datatype type
, int destIdx
,
2510 int destRank
, MPI_Comm destcomm
, CProxy_ampi arrProxy
, int ssendReq
) noexcept
;
2511 inline bool destLikelyWithinProcess(CProxy_ampi arrProxy
, int destIdx
) const noexcept
{
2512 CkArray
* localBranch
= arrProxy
.ckLocalBranch();
2513 int destPe
= localBranch
->lastKnown(CkArrayIndex1D(destIdx
));
2514 return (CkNodeOf(destPe
) == CkMyNode());
2516 MPI_Request
delesend(int t
, int s
, const void* buf
, int count
, MPI_Datatype type
, int rank
,
2517 MPI_Comm destcomm
, CProxy_ampi arrproxy
, int ssend
, AmpiSendType sendType
) noexcept
;
2518 inline void processAmpiMsg(AmpiMsg
*msg
, void* buf
, MPI_Datatype type
, int count
) noexcept
;
2519 inline void processRdmaMsg(const void *sbuf
, int slength
, int ssendReq
, int srank
, void* rbuf
,
2520 int rcount
, MPI_Datatype rtype
, MPI_Comm comm
) noexcept
;
2521 inline void processRednMsg(CkReductionMsg
*msg
, void* buf
, MPI_Datatype type
, int count
) noexcept
;
2522 inline void processNoncommutativeRednMsg(CkReductionMsg
*msg
, void* buf
, MPI_Datatype type
, int count
,
2523 MPI_User_function
* func
) noexcept
;
2524 inline void processGatherMsg(CkReductionMsg
*msg
, void* buf
, MPI_Datatype type
, int recvCount
) noexcept
;
2525 inline void processGathervMsg(CkReductionMsg
*msg
, void* buf
, MPI_Datatype type
,
2526 int* recvCounts
, int* displs
) noexcept
;
2527 inline AmpiMsg
* getMessage(int t
, int s
, MPI_Comm comm
, int *sts
) const noexcept
;
2528 int recv(int t
,int s
,void* buf
,int count
,MPI_Datatype type
,MPI_Comm comm
,MPI_Status
*sts
=NULL
) noexcept
;
2529 void irecv(void *buf
, int count
, MPI_Datatype type
, int src
,
2530 int tag
, MPI_Comm comm
, MPI_Request
*request
) noexcept
;
2531 void mrecv(int tag
, int src
, void* buf
, int count
, MPI_Datatype datatype
, MPI_Comm comm
,
2532 MPI_Status
* status
, MPI_Message
* message
) noexcept
;
2533 void imrecv(void* buf
, int count
, MPI_Datatype datatype
, int src
, int tag
, MPI_Comm comm
,
2534 MPI_Request
* request
, MPI_Message
* message
) noexcept
;
2535 void irecvBcast(void *buf
, int count
, MPI_Datatype type
, int src
,
2536 MPI_Comm comm
, MPI_Request
*request
) noexcept
;
2537 void sendrecv(const void *sbuf
, int scount
, MPI_Datatype stype
, int dest
, int stag
,
2538 void *rbuf
, int rcount
, MPI_Datatype rtype
, int src
, int rtag
,
2539 MPI_Comm comm
, MPI_Status
*sts
) noexcept
;
2540 void sendrecv_replace(void* buf
, int count
, MPI_Datatype datatype
,
2541 int dest
, int sendtag
, int source
, int recvtag
,
2542 MPI_Comm comm
, MPI_Status
*status
) noexcept
;
2543 void probe(int t
,int s
,MPI_Comm comm
,MPI_Status
*sts
) noexcept
;
2544 void mprobe(int t
, int s
, MPI_Comm comm
, MPI_Status
*sts
, MPI_Message
*message
) noexcept
;
2545 int iprobe(int t
,int s
,MPI_Comm comm
,MPI_Status
*sts
) noexcept
;
2546 int improbe(int t
, int s
, MPI_Comm comm
, MPI_Status
*sts
, MPI_Message
*message
) noexcept
;
2547 void barrier() noexcept
;
2548 void ibarrier(MPI_Request
*request
) noexcept
;
2549 void bcast(int root
, void* buf
, int count
, MPI_Datatype type
, MPI_Comm comm
) noexcept
;
2550 int intercomm_bcast(int root
, void* buf
, int count
, MPI_Datatype type
, MPI_Comm intercomm
) noexcept
;
2551 void ibcast(int root
, void* buf
, int count
, MPI_Datatype type
, MPI_Comm comm
, MPI_Request
* request
) noexcept
;
2552 int intercomm_ibcast(int root
, void* buf
, int count
, MPI_Datatype type
, MPI_Comm intercomm
, MPI_Request
*request
) noexcept
;
2553 static void bcastraw(void* buf
, int len
, CkArrayID aid
) noexcept
;
2554 void split(int color
,int key
,MPI_Comm
*dest
, int type
) noexcept
;
2555 void commCreate(const vector
<int>& vec
,MPI_Comm
*newcomm
) noexcept
;
2556 MPI_Comm
cartCreate0D() noexcept
;
2557 MPI_Comm
cartCreate(vector
<int>& vec
, int ndims
, const int* dims
) noexcept
;
2558 void graphCreate(const vector
<int>& vec
, MPI_Comm
*newcomm
) noexcept
;
2559 void distGraphCreate(const vector
<int>& vec
, MPI_Comm
*newcomm
) noexcept
;
2560 void intercommCreate(const vector
<int>& rvec
, int root
, MPI_Comm tcomm
, MPI_Comm
*ncomm
) noexcept
;
2562 inline bool isInter() const noexcept
{ return myComm
.isinter(); }
2563 void intercommMerge(int first
, MPI_Comm
*ncomm
) noexcept
;
2565 inline int getWorldRank() const noexcept
{return parent
->thisIndex
;}
2566 /// Return our rank in this communicator
2567 inline int getRank() const noexcept
{return myRank
;}
2568 inline int getSize() const noexcept
{return myComm
.getSize();}
2569 inline MPI_Comm
getComm() const noexcept
{return myComm
.getComm();}
2570 inline void setCommName(const char *name
) noexcept
{myComm
.setName(name
);}
2571 inline void getCommName(char *name
, int *len
) const noexcept
{myComm
.getName(name
,len
);}
2572 inline vector
<int> getIndices() const noexcept
{ return myComm
.getIndices(); }
2573 inline vector
<int> getRemoteIndices() const noexcept
{ return myComm
.getRemoteIndices(); }
2574 inline const CProxy_ampi
&getProxy() const noexcept
{return thisProxy
;}
2575 inline const CProxy_ampi
&getRemoteProxy() const noexcept
{return remoteProxy
;}
2576 inline void setRemoteProxy(CProxy_ampi rproxy
) noexcept
{ remoteProxy
= rproxy
; thread
->resume(); }
2577 inline int getIndexForRank(int r
) const noexcept
{return myComm
.getIndexForRank(r
);}
2578 inline int getIndexForRemoteRank(int r
) const noexcept
{return myComm
.getIndexForRemoteRank(r
);}
2579 void findNeighbors(MPI_Comm comm
, int rank
, vector
<int>& neighbors
) const noexcept
;
2580 inline const vector
<int>& getNeighbors() const noexcept
{ return myComm
.getTopologyforNeighbors()->getnbors(); }
2581 inline bool opIsCommutative(MPI_Op op
) const noexcept
{ return parent
->opIsCommutative(op
); }
2582 inline MPI_User_function
* op2User_function(MPI_Op op
) const noexcept
{ return parent
->op2User_function(op
); }
2583 void topoDup(int topoType
, int rank
, MPI_Comm comm
, MPI_Comm
*newcomm
) noexcept
;
2585 inline AmpiRequestList
& getReqs() noexcept
{ return parent
->ampiReqs
; }
2586 CkDDT
*getDDT() noexcept
{return &parent
->myDDT
;}
2587 CthThread
getThread() const noexcept
{ return thread
->getThread(); }
2590 MPI_Win
createWinInstance(void *base
, MPI_Aint size
, int disp_unit
, MPI_Info info
) noexcept
;
2591 int deleteWinInstance(MPI_Win win
) noexcept
;
2592 int winGetGroup(WinStruct
*win
, MPI_Group
*group
) const noexcept
;
2593 int winPut(const void *orgaddr
, int orgcnt
, MPI_Datatype orgtype
, int rank
,
2594 MPI_Aint targdisp
, int targcnt
, MPI_Datatype targtype
, WinStruct
*win
) noexcept
;
2595 int winGet(void *orgaddr
, int orgcnt
, MPI_Datatype orgtype
, int rank
,
2596 MPI_Aint targdisp
, int targcnt
, MPI_Datatype targtype
, WinStruct
*win
) noexcept
;
2597 int winIget(MPI_Aint orgdisp
, int orgcnt
, MPI_Datatype orgtype
, int rank
,
2598 MPI_Aint targdisp
, int targcnt
, MPI_Datatype targtype
, WinStruct
*win
,
2599 MPI_Request
*req
) noexcept
;
2600 int winIgetWait(MPI_Request
*request
, MPI_Status
*status
) noexcept
;
2601 int winIgetFree(MPI_Request
*request
, MPI_Status
*status
) noexcept
;
2602 void winRemotePut(int orgtotalsize
, char* orgaddr
, int orgcnt
, MPI_Datatype orgtype
,
2603 MPI_Aint targdisp
, int targcnt
, MPI_Datatype targtype
, int winIndex
) noexcept
;
2604 char* winLocalGet(int orgcnt
, MPI_Datatype orgtype
, MPI_Aint targdisp
, int targcnt
,
2605 MPI_Datatype targtype
, int winIndex
) noexcept
;
2606 AmpiMsg
* winRemoteGet(int orgcnt
, MPI_Datatype orgtype
, MPI_Aint targdisp
,
2607 int targcnt
, MPI_Datatype targtype
, int winIndex
) noexcept
;
2608 AmpiMsg
* winRemoteIget(MPI_Aint orgdisp
, int orgcnt
, MPI_Datatype orgtype
, MPI_Aint targdisp
,
2609 int targcnt
, MPI_Datatype targtype
, int winIndex
) noexcept
;
2610 int winLock(int lock_type
, int rank
, WinStruct
*win
) noexcept
;
2611 int winUnlock(int rank
, WinStruct
*win
) noexcept
;
2612 void winRemoteLock(int lock_type
, int winIndex
, int requestRank
) noexcept
;
2613 void winRemoteUnlock(int winIndex
, int requestRank
) noexcept
;
2614 int winAccumulate(const void *orgaddr
, int orgcnt
, MPI_Datatype orgtype
, int rank
,
2615 MPI_Aint targdisp
, int targcnt
, MPI_Datatype targtype
,
2616 MPI_Op op
, WinStruct
*win
) noexcept
;
2617 void winRemoteAccumulate(int orgtotalsize
, char* orgaddr
, int orgcnt
, MPI_Datatype orgtype
,
2618 MPI_Aint targdisp
, int targcnt
, MPI_Datatype targtype
,
2619 MPI_Op op
, int winIndex
) noexcept
;
2620 int winGetAccumulate(const void *orgaddr
, int orgcnt
, MPI_Datatype orgtype
, void *resaddr
,
2621 int rescnt
, MPI_Datatype restype
, int rank
, MPI_Aint targdisp
,
2622 int targcnt
, MPI_Datatype targtype
, MPI_Op op
, WinStruct
*win
) noexcept
;
2623 void winLocalGetAccumulate(int orgtotalsize
, char* sorgaddr
, int orgcnt
, MPI_Datatype orgtype
,
2624 MPI_Aint targdisp
, int targcnt
, MPI_Datatype targtype
, MPI_Op op
,
2625 char *resaddr
, int winIndex
) noexcept
;
2626 AmpiMsg
* winRemoteGetAccumulate(int orgtotalsize
, char* sorgaddr
, int orgcnt
, MPI_Datatype orgtype
,
2627 MPI_Aint targdisp
, int targcnt
, MPI_Datatype targtype
, MPI_Op op
,
2628 int winIndex
) noexcept
;
2629 int winCompareAndSwap(const void *orgaddr
, const void *compaddr
, void *resaddr
, MPI_Datatype type
,
2630 int rank
, MPI_Aint targdisp
, WinStruct
*win
) noexcept
;
2631 char* winLocalCompareAndSwap(int size
, char* sorgaddr
, char* compaddr
, MPI_Datatype type
,
2632 MPI_Aint targdisp
, int winIndex
) noexcept
;
2633 AmpiMsg
* winRemoteCompareAndSwap(int size
, char *sorgaddr
, char *compaddr
, MPI_Datatype type
,
2634 MPI_Aint targdisp
, int winIndex
) noexcept
;
2635 void winSetName(WinStruct
*win
, const char *name
) noexcept
;
2636 void winGetName(WinStruct
*win
, char *name
, int *length
) const noexcept
;
2637 win_obj
* getWinObjInstance(WinStruct
*win
) const noexcept
;
2638 int getNewSemaId() noexcept
;
2640 int intercomm_scatter(int root
, const void *sendbuf
, int sendcount
, MPI_Datatype sendtype
,
2641 void *recvbuf
, int recvcount
, MPI_Datatype recvtype
, MPI_Comm intercomm
) noexcept
;
2642 int intercomm_iscatter(int root
, const void *sendbuf
, int sendcount
, MPI_Datatype sendtype
,
2643 void *recvbuf
, int recvcount
, MPI_Datatype recvtype
,
2644 MPI_Comm intercomm
, MPI_Request
*request
) noexcept
;
2645 int intercomm_scatterv(int root
, const void* sendbuf
, const int* sendcounts
, const int* displs
,
2646 MPI_Datatype sendtype
, void* recvbuf
, int recvcount
,
2647 MPI_Datatype recvtype
, MPI_Comm intercomm
) noexcept
;
2648 int intercomm_iscatterv(int root
, const void* sendbuf
, const int* sendcounts
, const int* displs
,
2649 MPI_Datatype sendtype
, void* recvbuf
, int recvcount
,
2650 MPI_Datatype recvtype
, MPI_Comm intercomm
, MPI_Request
* request
) noexcept
;
2653 ampiParent
*getAmpiParent() noexcept
;
2654 bool isAmpiThread() noexcept
;
2655 ampi
*getAmpiInstance(MPI_Comm comm
) noexcept
;
2656 void checkComm(MPI_Comm comm
) noexcept
;
2657 void checkRequest(MPI_Request req
) noexcept
;
2658 void handle_MPI_BOTTOM(void* &buf
, MPI_Datatype type
) noexcept
;
2659 void handle_MPI_BOTTOM(void* &buf1
, MPI_Datatype type1
, void* &buf2
, MPI_Datatype type2
) noexcept
;
2661 #if AMPI_ERROR_CHECKING
2662 int ampiErrhandler(const char* func
, int errcode
) noexcept
;
2664 #define ampiErrhandler(func, errcode) (errcode)
2668 #if CMK_TRACE_ENABLED
2670 // List of AMPI functions to trace:
2671 static const char *funclist
[] = {"AMPI_Abort", "AMPI_Add_error_class", "AMPI_Add_error_code", "AMPI_Add_error_string",
2672 "AMPI_Address", "AMPI_Allgather", "AMPI_Allgatherv", "AMPI_Allreduce", "AMPI_Alltoall",
2673 "AMPI_Alltoallv", "AMPI_Alltoallw", "AMPI_Attr_delete", "AMPI_Attr_get",
2674 "AMPI_Attr_put", "AMPI_Barrier", "AMPI_Bcast", "AMPI_Bsend", "AMPI_Cancel",
2675 "AMPI_Cart_coords", "AMPI_Cart_create", "AMPI_Cart_get", "AMPI_Cart_map",
2676 "AMPI_Cart_rank", "AMPI_Cart_shift", "AMPI_Cart_sub", "AMPI_Cartdim_get",
2677 "AMPI_Comm_call_errhandler", "AMPI_Comm_compare", "AMPI_Comm_create", "AMPI_Comm_create_group",
2678 "AMPI_Comm_create_errhandler", "AMPI_Comm_create_keyval", "AMPI_Comm_delete_attr",
2679 "AMPI_Comm_dup", "AMPI_Comm_dup_with_info", "AMPI_Comm_free",
2680 "AMPI_Comm_free_errhandler", "AMPI_Comm_free_keyval", "AMPI_Comm_get_attr",
2681 "AMPI_Comm_get_errhandler", "AMPI_Comm_get_info", "AMPI_Comm_get_name",
2682 "AMPI_Comm_group", "AMPI_Comm_rank", "AMPI_Comm_remote_group", "AMPI_Comm_remote_size",
2683 "AMPI_Comm_set_attr", "AMPI_Comm_set_errhandler", "AMPI_Comm_set_info", "AMPI_Comm_set_name",
2684 "AMPI_Comm_size", "AMPI_Comm_split", "AMPI_Comm_split_type", "AMPI_Comm_test_inter",
2685 "AMPI_Dims_create", "AMPI_Dist_graph_create", "AMPI_Dist_graph_create_adjacent",
2686 "AMPI_Dist_graph_neighbors", "AMPI_Dist_graph_neighbors_count",
2687 "AMPI_Errhandler_create", "AMPI_Errhandler_free", "AMPI_Errhandler_get",
2688 "AMPI_Errhandler_set", "AMPI_Error_class", "AMPI_Error_string", "AMPI_Exscan", "AMPI_Finalize",
2689 "AMPI_Finalized", "AMPI_Gather", "AMPI_Gatherv", "AMPI_Get_address", "AMPI_Get_count",
2690 "AMPI_Get_elements", "AMPI_Get_library_version", "AMPI_Get_processor_name", "AMPI_Get_version",
2691 "AMPI_Graph_create", "AMPI_Graph_get", "AMPI_Graph_map", "AMPI_Graph_neighbors",
2692 "AMPI_Graph_neighbors_count", "AMPI_Graphdims_get", "AMPI_Group_compare", "AMPI_Group_difference",
2693 "AMPI_Group_excl", "AMPI_Group_free", "AMPI_Group_incl", "AMPI_Group_intersection",
2694 "AMPI_Group_range_excl", "AMPI_Group_range_incl", "AMPI_Group_rank", "AMPI_Group_size",
2695 "AMPI_Group_translate_ranks", "AMPI_Group_union", "AMPI_Iallgather", "AMPI_Iallgatherv",
2696 "AMPI_Iallreduce", "AMPI_Ialltoall", "AMPI_Ialltoallv", "AMPI_Ialltoallw", "AMPI_Ibarrier",
2697 "AMPI_Ibcast", "AMPI_Iexscan", "AMPI_Igather", "AMPI_Igatherv", "AMPI_Ineighbor_allgather",
2698 "AMPI_Ineighbor_allgatherv", "AMPI_Ineighbor_alltoall", "AMPI_Ineighbor_alltoallv",
2699 "AMPI_Ineighbor_alltoallw", "AMPI_Init", "AMPI_Init_thread", "AMPI_Initialized", "AMPI_Intercomm_create",
2700 "AMPI_Intercomm_merge", "AMPI_Iprobe", "AMPI_Irecv", "AMPI_Ireduce", "AMPI_Ireduce_scatter",
2701 "AMPI_Ireduce_scatter_block", "AMPI_Is_thread_main", "AMPI_Iscan", "AMPI_Iscatter", "AMPI_Iscatterv",
2702 "AMPI_Isend", "AMPI_Issend", "AMPI_Keyval_create", "AMPI_Keyval_free", "AMPI_Neighbor_allgather",
2703 "AMPI_Neighbor_allgatherv", "AMPI_Neighbor_alltoall", "AMPI_Neighbor_alltoallv", "AMPI_Neighbor_alltoallw",
2704 "AMPI_Op_commutative", "AMPI_Op_create", "AMPI_Op_free", "AMPI_Pack", "AMPI_Pack_size",
2705 "AMPI_Pcontrol", "AMPI_Probe", "AMPI_Query_thread", "AMPI_Recv", "AMPI_Recv_init", "AMPI_Reduce",
2706 "AMPI_Reduce_local", "AMPI_Reduce_scatter", "AMPI_Reduce_scatter_block", "AMPI_Request_free",
2707 "AMPI_Request_get_status", "AMPI_Rsend", "AMPI_Scan", "AMPI_Scatter", "AMPI_Scatterv", "AMPI_Send",
2708 "AMPI_Send_init", "AMPI_Sendrecv", "AMPI_Sendrecv_replace", "AMPI_Ssend", "AMPI_Ssend_init",
2709 "AMPI_Start", "AMPI_Startall", "AMPI_Status_set_cancelled", "AMPI_Status_set_elements", "AMPI_Test",
2710 "AMPI_Test_cancelled", "AMPI_Testall", "AMPI_Testany", "AMPI_Testsome", "AMPI_Topo_test",
2711 "AMPI_Type_commit", "AMPI_Type_contiguous", "AMPI_Type_create_hindexed",
2712 "AMPI_Type_create_hindexed_block", "AMPI_Type_create_hvector", "AMPI_Type_create_indexed_block",
2713 "AMPI_Type_create_keyval", "AMPI_Type_create_resized", "AMPI_Type_create_struct",
2714 "AMPI_Type_delete_attr", "AMPI_Type_dup", "AMPI_Type_extent", "AMPI_Type_free",
2715 "AMPI_Type_free_keyval", "AMPI_Type_get_attr", "AMPI_Type_get_contents", "AMPI_Type_get_envelope",
2716 "AMPI_Type_get_extent", "AMPI_Type_get_name", "AMPI_Type_get_true_extent", "AMPI_Type_hindexed",
2717 "AMPI_Type_hvector", "AMPI_Type_indexed", "AMPI_Type_lb", "AMPI_Type_set_attr",
2718 "AMPI_Type_set_name", "AMPI_Type_size", "AMPI_Type_struct", "AMPI_Type_ub", "AMPI_Type_vector",
2719 "AMPI_Type_create_darray", "AMPI_Type_create_subarray",
2720 "AMPI_Unpack", "AMPI_Wait", "AMPI_Waitall", "AMPI_Waitany", "AMPI_Waitsome", "AMPI_Wtick", "AMPI_Wtime",
2721 "AMPI_Accumulate", "AMPI_Compare_and_swap", "AMPI_Fetch_and_op", "AMPI_Get", "AMPI_Get_accumulate",
2722 "AMPI_Info_create", "AMPI_Info_delete", "AMPI_Info_dup", "AMPI_Info_free", "AMPI_Info_get",
2723 "AMPI_Info_get_nkeys", "AMPI_Info_get_nthkey", "AMPI_Info_get_valuelen",
2724 "AMPI_Info_set", "AMPI_Put", "AMPI_Raccumulate", "AMPI_Rget", "AMPI_Rget_accumulate",
2725 "AMPI_Rput", "AMPI_Win_complete", "AMPI_Win_create", "AMPI_Win_create_errhandler",
2726 "AMPI_Win_create_keyval", "AMPI_Win_delete_attr", "AMPI_Win_fence", "AMPI_Win_free",
2727 "AMPI_Win_free_keyval", "AMPI_Win_get_attr", "AMPI_Win_get_errhandler",
2728 "AMPI_Win_get_group", "AMPI_Win_get_info", "AMPI_Win_get_name", "AMPI_Win_lock",
2729 "AMPI_Win_post", "AMPI_Win_set_attr", "AMPI_Win_set_errhandler", "AMPI_Win_set_info",
2730 "AMPI_Win_set_name", "AMPI_Win_start", "AMPI_Win_test", "AMPI_Win_unlock",
2731 "AMPI_Win_wait", "AMPI_Exit" /*AMPI extensions:*/, "AMPI_Migrate",
2732 "AMPI_Load_start_measure", "AMPI_Load_stop_measure",
2733 "AMPI_Load_set_value", "AMPI_Migrate_to_pe", "AMPI_Set_migratable",
2734 "AMPI_Register_pup", "AMPI_Get_pup_data", "AMPI_Register_main",
2735 "AMPI_Register_about_to_migrate", "AMPI_Register_just_migrated",
2736 "AMPI_Iget", "AMPI_Iget_wait", "AMPI_Iget_free", "AMPI_Iget_data",
2737 "AMPI_Type_is_contiguous", "AMPI_Yield", "AMPI_Suspend",
2738 "AMPI_Resume", "AMPI_Print", "AMPI_Alltoall_medium",
2739 "AMPI_Alltoall_long", "AMPI_System"};
2741 // not traced: AMPI_Trace_begin, AMPI_Trace_end
2743 #endif // CMK_TRACE_ENABLED
2745 //Use this to mark the start of AMPI interface routines that can only be called on AMPI threads:
2746 #if CMK_ERROR_CHECKING
2747 #define AMPI_API(routineName) \
2748 if (!isAmpiThread()) { CkAbort("AMPI> cannot call MPI routines from non-AMPI threads!"); } \
2749 TCHARM_API_TRACE(routineName, "ampi");
2751 #define AMPI_API(routineName) TCHARM_API_TRACE(routineName, "ampi")
2754 //Use this for MPI_Init and routines than can be called before AMPI threads have been initialized:
2755 #define AMPI_API_INIT(routineName) TCHARM_API_TRACE(routineName, "ampi")
2757 #endif // _AMPIIMPL_H