Wrap all RTS functions exposed to AMPI programs in special macros
[charm.git] / src / libs / ck-libs / ampi / ampiimpl.h
blob1cedf8b582983eaf014c4c4b41dffbf22ed78131
1 #ifndef _AMPIIMPL_H
2 #define _AMPIIMPL_H
4 #include <string.h> /* for strlen */
5 #include <algorithm>
6 #include <numeric>
7 #include <forward_list>
8 #include <bitset>
10 #include "ampi.h"
11 #include "ddt.h"
12 #include "charm++.h"
14 using std::vector;
16 //Uncomment for debug print statements
17 #define AMPI_DEBUG(...) //CkPrintf(__VA_ARGS__)
20 * All MPI_* routines must be defined using the AMPI_API_IMPL macro.
21 * All calls inside AMPI to MPI_* routines must use MPI_* as the name.
22 * There are two reasons for this:
24 * 1. AMPI supports the PMPI interface only on Linux.
26 * 2. When AMPI is built on top of MPI, we rename the user's MPI_* calls as AMPI_*.
28 #define STRINGIFY_INTERNAL(a) #a
29 #define STRINGIFY(a) STRINGIFY_INTERNAL(a)
31 #if AMPI_HAVE_PMPI
32 #define AMPI_API_IMPL(ret, name, ...) \
33 _Pragma(STRINGIFY(weak name)) \
34 _Pragma(STRINGIFY(weak P##name = name)) \
35 CLINKAGE \
36 ret name(__VA_ARGS__)
37 #else // not Linux (no PMPI support):
38 #define AMPI_API_IMPL(ret, name, ...) \
39 CLINKAGE \
40 ret name(__VA_ARGS__)
41 #endif
43 #if AMPIMSGLOG
44 #include "ckliststring.h"
45 static CkListString msgLogRanks;
46 static int msgLogWrite;
47 static int msgLogRead;
48 static char *msgLogFilename;
50 #if CMK_USE_ZLIB && 0
51 #include <zlib.h>
52 namespace PUP{
53 class zdisk : public er {
54 protected:
55 gzFile F;//Disk file to read from/write to
56 zdisk(unsigned int type,gzFile f):er(type),F(f) {}
57 zdisk(const zdisk &p); //You don't want to copy
58 void operator=(const zdisk &p); // You don't want to copy
60 //For seeking (pack/unpack in different orders)
61 virtual void impl_startSeek(seekBlock &s); /*Begin a seeking block*/
62 virtual int impl_tell(seekBlock &s); /*Give the current offset*/
63 virtual void impl_seek(seekBlock &s,int off); /*Seek to the given offset*/
66 //For packing to a disk file
67 class tozDisk : public zdisk {
68 protected:
69 //Generic bottleneck: pack n items of size itemSize from p.
70 virtual void bytes(void *p,int n,size_t itemSize,dataType t);
71 public:
72 //Write data to the given file pointer
73 // (must be opened for binary write)
74 // You must close the file yourself when done.
75 tozDisk(gzFile f):zdisk(IS_PACKING,f) {}
78 //For unpacking from a disk file
79 class fromzDisk : public zdisk {
80 protected:
81 //Generic bottleneck: unpack n items of size itemSize from p.
82 virtual void bytes(void *p,int n,size_t itemSize,dataType t);
83 public:
84 //Write data to the given file pointer
85 // (must be opened for binary read)
86 // You must close the file yourself when done.
87 fromzDisk(gzFile f):zdisk(IS_UNPACKING,f) {}
89 }; // namespace PUP
90 #endif
91 #endif // AMPIMSGLOG
93 /* AMPI sends messages inline to PE-local destination VPs if: BigSim is not being used and
94 * if tracing is not being used (see bug #1640 for more details on the latter). */
95 #ifndef AMPI_LOCAL_IMPL
96 #define AMPI_LOCAL_IMPL ( !CMK_BIGSIM_CHARM && !CMK_TRACE_ENABLED )
97 #endif
99 /* AMPI uses RDMA sends if BigSim is not being used and the underlying comm
100 * layer supports it (except for GNI, which has experimental RDMA support). */
101 #ifndef AMPI_RDMA_IMPL
102 #define AMPI_RDMA_IMPL ( !CMK_BIGSIM_CHARM && CMK_ONESIDED_IMPL && !CMK_CONVERSE_UGNI )
103 #endif
105 /* contiguous messages larger than or equal to this threshold are sent via RDMA */
106 #ifndef AMPI_RDMA_THRESHOLD_DEFAULT
107 #if CMK_USE_IBVERBS || CMK_OFI || CMK_CONVERSE_UGNI
108 #define AMPI_RDMA_THRESHOLD_DEFAULT 65536
109 #else
110 #define AMPI_RDMA_THRESHOLD_DEFAULT 32768
111 #endif
112 #endif
114 /* contiguous messages larger than or equal to this threshold that are being sent
115 * within a process are sent via RDMA. */
116 #ifndef AMPI_SMP_RDMA_THRESHOLD_DEFAULT
117 #define AMPI_SMP_RDMA_THRESHOLD_DEFAULT 16384
118 #endif
120 extern int AMPI_RDMA_THRESHOLD;
121 extern int AMPI_SMP_RDMA_THRESHOLD;
123 #define AMPI_ALLTOALL_THROTTLE 64
124 #define AMPI_ALLTOALL_SHORT_MSG 256
125 #if CMK_BIGSIM_CHARM
126 #define AMPI_ALLTOALL_LONG_MSG 4194304
127 #else
128 #define AMPI_ALLTOALL_LONG_MSG 32768
129 #endif
131 typedef void (*MPI_MigrateFn)(void);
134 * AMPI Message Matching (Amm) Interface:
135 * messages are matched on 2 ints: [tag, src]
137 #define AMM_TAG 0
138 #define AMM_SRC 1
139 #define AMM_NTAGS 2
141 // Number of AmmEntry<T>'s in AmmEntryPool for pt2pt msgs:
142 #ifndef AMPI_AMM_PT2PT_POOL_SIZE
143 #define AMPI_AMM_PT2PT_POOL_SIZE 32
144 #endif
146 // Number of AmmEntry<T>'s in AmmEntryPool for coll msgs:
147 #ifndef AMPI_AMM_COLL_POOL_SIZE
148 #define AMPI_AMM_COLL_POOL_SIZE 4
149 #endif
151 class AmpiRequestList;
153 typedef void (*AmmPupMessageFn)(PUP::er& p, void **msg);
155 template <class T>
156 class AmmEntry {
157 public:
158 int tags[AMM_NTAGS]; // [tag, src]
159 AmmEntry<T>* next;
160 T msg; // T is either an AmpiRequest* or an AmpiMsg*
161 AmmEntry(T m) noexcept { tags[AMM_TAG] = m->getTag(); tags[AMM_SRC] = m->getSrcRank(); next = NULL; msg = m; }
162 AmmEntry(int tag, int src, T m) noexcept { tags[AMM_TAG] = tag; tags[AMM_SRC] = src; next = NULL; msg = m; }
163 AmmEntry() = default;
164 ~AmmEntry() = default;
167 template <class T, size_t N>
168 class Amm {
169 public:
170 AmmEntry<T>* first;
171 AmmEntry<T>** lasth;
173 private:
174 int startIdx;
175 std::bitset<N> validEntries;
176 std::array<AmmEntry<T>, N> entryPool;
178 public:
179 Amm() noexcept : first(NULL), lasth(&first), startIdx(0) { validEntries.reset(); }
180 ~Amm() = default;
181 inline AmmEntry<T>* newEntry(int tag, int src, T msg) noexcept {
182 if (validEntries.all()) {
183 return new AmmEntry<T>(tag, src, msg);
184 } else {
185 for (int i=startIdx; i<validEntries.size(); i++) {
186 if (!validEntries[i]) {
187 validEntries[i] = 1;
188 AmmEntry<T>* ent = new (&entryPool[i]) AmmEntry<T>(tag, src, msg);
189 startIdx = i+1;
190 return ent;
193 CkAbort("AMPI> failed to find a free entry in pool!");
194 return NULL;
197 inline AmmEntry<T>* newEntry(T msg) noexcept {
198 if (validEntries.all()) {
199 return new AmmEntry<T>(msg);
200 } else {
201 for (int i=startIdx; i<validEntries.size(); i++) {
202 if (!validEntries[i]) {
203 validEntries[i] = 1;
204 AmmEntry<T>* ent = new (&entryPool[i]) AmmEntry<T>(msg);
205 startIdx = i+1;
206 return ent;
209 CkAbort("AMPI> failed to find a free entry in pool!");
210 return NULL;
213 inline void deleteEntry(AmmEntry<T> *ent) noexcept {
214 if (ent >= &entryPool.front() && ent <= &entryPool.back()) {
215 int idx = (int)((intptr_t)ent - (intptr_t)&entryPool.front()) / sizeof(AmmEntry<T>);
216 validEntries[idx] = 0;
217 startIdx = std::min(idx, startIdx);
218 } else {
219 delete ent;
222 void freeAll() noexcept;
223 void flushMsgs() noexcept;
224 inline bool match(const int tags1[AMM_NTAGS], const int tags2[AMM_NTAGS]) const noexcept;
225 inline void put(T msg) noexcept;
226 inline void put(int tag, int src, T msg) noexcept;
227 inline T get(int tag, int src, int* rtags=NULL) noexcept;
228 inline T probe(int tag, int src, int* rtags) noexcept;
229 inline int size() const noexcept;
230 void pup(PUP::er& p, AmmPupMessageFn msgpup) noexcept;
233 PUPfunctionpointer(MPI_User_function*)
236 * OpStruct's are used to lookup an MPI_User_function* and check its commutativity.
237 * They are also used to create AmpiOpHeader's, which are transmitted in reductions
238 * that are user-defined or else lack an equivalent Charm++ reducer type.
240 class OpStruct {
241 public:
242 MPI_User_function* func;
243 bool isCommutative;
244 private:
245 bool isValid;
247 public:
248 OpStruct() = default;
249 OpStruct(MPI_User_function* f) noexcept : func(f), isCommutative(true), isValid(true) {}
250 OpStruct(MPI_User_function* f, bool c) noexcept : func(f), isCommutative(c), isValid(true) {}
251 void init(MPI_User_function* f, bool c) noexcept {
252 func = f;
253 isCommutative = c;
254 isValid = true;
256 bool isFree() const noexcept { return !isValid; }
257 void free() noexcept { isValid = false; }
258 void pup(PUP::er &p) {
259 p|func; p|isCommutative; p|isValid;
263 class AmpiOpHeader {
264 public:
265 MPI_User_function* func;
266 MPI_Datatype dtype;
267 int len;
268 int szdata;
269 AmpiOpHeader(MPI_User_function* f,MPI_Datatype d,int l,int szd) noexcept :
270 func(f),dtype(d),len(l),szdata(szd) { }
273 //------------------- added by YAN for one-sided communication -----------
274 /* the index is unique within a communicator */
275 class WinStruct{
276 public:
277 MPI_Comm comm;
278 int index;
280 private:
281 bool areRecvsPosted;
282 bool inEpoch;
283 vector<int> exposureRankList;
284 vector<int> accessRankList;
285 vector<MPI_Request> requestList;
287 public:
288 WinStruct() noexcept : comm(MPI_COMM_NULL), index(-1), areRecvsPosted(false), inEpoch(false) {
289 exposureRankList.clear(); accessRankList.clear(); requestList.clear();
291 WinStruct(MPI_Comm comm_, int index_) noexcept : comm(comm_), index(index_), areRecvsPosted(false), inEpoch(false) {
292 exposureRankList.clear(); accessRankList.clear(); requestList.clear();
294 void pup(PUP::er &p) noexcept {
295 p|comm; p|index; p|areRecvsPosted; p|inEpoch; p|exposureRankList; p|accessRankList; p|requestList;
297 void clearEpochAccess() noexcept {
298 accessRankList.clear(); inEpoch = false;
300 void clearEpochExposure() noexcept {
301 exposureRankList.clear(); areRecvsPosted = false; requestList.clear(); inEpoch=false;
303 vector<int>& getExposureRankList() noexcept {return exposureRankList;}
304 vector<int>& getAccessRankList() noexcept {return accessRankList;}
305 void setExposureRankList(vector<int> &tmpExposureRankList) noexcept {exposureRankList = tmpExposureRankList;}
306 void setAccessRankList(vector<int> &tmpAccessRankList) noexcept {accessRankList = tmpAccessRankList;}
307 vector<int>& getRequestList() noexcept {return requestList;}
308 bool AreRecvsPosted() const noexcept {return areRecvsPosted;}
309 void setAreRecvsPosted(bool setR) noexcept {areRecvsPosted = setR;}
310 bool isInEpoch() const noexcept {return inEpoch;}
311 void setInEpoch(bool arg) noexcept {inEpoch = arg;}
314 class lockQueueEntry {
315 public:
316 int requestRank;
317 int lock_type;
318 lockQueueEntry (int _requestRank, int _lock_type) noexcept
319 : requestRank(_requestRank), lock_type(_lock_type) {}
320 lockQueueEntry() = default;
323 typedef CkQ<lockQueueEntry *> LockQueue;
325 class ampiParent;
327 class win_obj {
328 public:
329 void *baseAddr;
330 MPI_Aint winSize;
331 int disp_unit;
332 MPI_Comm comm;
334 int owner; // Rank of owner of the lock, -1 if not locked
335 LockQueue lockQueue; // queue of waiting processors for the lock
336 // top of queue is the one holding the lock
337 // queue is empty if lock is not applied
338 std::string winName;
339 bool initflag;
341 vector<int> keyvals; // list of keyval attributes
343 void setName(const char *src) noexcept;
344 void getName(char *src,int *len) noexcept;
346 public:
347 void pup(PUP::er &p) noexcept;
349 win_obj() noexcept;
350 win_obj(const char *name, void *base, MPI_Aint size, int disp_unit, MPI_Comm comm) noexcept;
351 ~win_obj() noexcept;
353 int create(const char *name, void *base, MPI_Aint size, int disp_unit,
354 MPI_Comm comm) noexcept;
355 int free() noexcept;
357 vector<int>& getKeyvals() { return keyvals; }
359 int put(void *orgaddr, int orgcnt, int orgunit,
360 MPI_Aint targdisp, int targcnt, int targunit) noexcept;
362 int get(void *orgaddr, int orgcnt, int orgunit,
363 MPI_Aint targdisp, int targcnt, int targunit) noexcept;
364 int accumulate(void *orgaddr, int count, MPI_Aint targdisp, MPI_Datatype targtype,
365 MPI_Op op, ampiParent* pptr) noexcept;
367 int iget(int orgcnt, MPI_Datatype orgtype,
368 MPI_Aint targdisp, int targcnt, MPI_Datatype targtype) noexcept;
369 int igetWait(MPI_Request *req, MPI_Status *status) noexcept;
370 int igetFree(MPI_Request *req, MPI_Status *status) noexcept;
372 int fence() noexcept;
374 int lock(int requestRank, int lock_type) noexcept;
375 int unlock(int requestRank) noexcept;
377 int wait() noexcept;
378 int post() noexcept;
379 int start() noexcept;
380 int complete() noexcept;
382 void lockTopQueue() noexcept;
383 void enqueue(int requestRank, int lock_type) noexcept;
384 void dequeue() noexcept;
385 bool emptyQueue() noexcept;
387 //-----------------------End of code by YAN ----------------------
389 class KeyvalPair{
390 protected:
391 std::string key;
392 std::string val;
393 public:
394 KeyvalPair() = default;
395 KeyvalPair(const char* k, const char* v) noexcept;
396 ~KeyvalPair() = default;
397 void pup(PUP::er& p) noexcept {
398 p|key;
399 p|val;
401 friend class InfoStruct;
404 class InfoStruct{
405 CkPupPtrVec<KeyvalPair> nodes;
406 bool valid;
407 public:
408 InfoStruct() noexcept : valid(true) { }
409 void setvalid(bool valid_) noexcept { valid = valid_; }
410 bool getvalid() const noexcept { return valid; }
411 int set(const char* k, const char* v) noexcept;
412 int dup(InfoStruct& src) noexcept;
413 int get(const char* k, int vl, char*& v, int *flag) const noexcept;
414 int deletek(const char* k) noexcept;
415 int get_valuelen(const char* k, int* vl, int *flag) const noexcept;
416 int get_nkeys(int *nkeys) const noexcept;
417 int get_nthkey(int n,char* k) const noexcept;
418 void myfree() noexcept;
419 void pup(PUP::er& p) noexcept;
422 class CProxy_ampi;
423 class CProxyElement_ampi;
425 //Virtual class describing a virtual topology: Cart, Graph, DistGraph
426 class ampiTopology {
427 private:
428 vector<int> v; // dummy variable for const& returns from virtual functions
430 public:
431 virtual ~ampiTopology() noexcept {};
432 virtual void pup(PUP::er &p) noexcept =0;
433 virtual int getType() const noexcept =0;
434 virtual void dup(ampiTopology* topo) noexcept =0;
435 virtual const vector<int> &getnbors() const noexcept =0;
436 virtual void setnbors(const vector<int> &nbors_) noexcept =0;
438 virtual const vector<int> &getdims() const noexcept {CkAbort("AMPI: instance of invalid Virtual Topology class."); return v;}
439 virtual const vector<int> &getperiods() const noexcept {CkAbort("AMPI: instance of invalid Virtual Topology class."); return v;}
440 virtual int getndims() const noexcept {CkAbort("AMPI: instance of invalid Virtual Topology class."); return -1;}
441 virtual void setdims(const vector<int> &dims_) noexcept {CkAbort("AMPI: instance of invalid Virtual Topology class.");}
442 virtual void setperiods(const vector<int> &periods_) noexcept {CkAbort("AMPI: instance of invalid Virtual Topology class.");}
443 virtual void setndims(int ndims_) noexcept {CkAbort("AMPI: instance of invalid Virtual Topology class.");}
445 virtual int getnvertices() const noexcept {CkAbort("AMPI: instance of invalid Virtual Topology class."); return -1;}
446 virtual const vector<int> &getindex() const noexcept {CkAbort("AMPI: instance of invalid Virtual Topology class."); return v;}
447 virtual const vector<int> &getedges() const noexcept {CkAbort("AMPI: instance of invalid Virtual Topology class."); return v;}
448 virtual void setnvertices(int nvertices_) noexcept {CkAbort("AMPI: instance of invalid Virtual Topology class.");}
449 virtual void setindex(const vector<int> &index_) noexcept {CkAbort("AMPI: instance of invalid Virtual Topology class.");}
450 virtual void setedges(const vector<int> &edges_) noexcept {CkAbort("AMPI: instance of invalid Virtual Topology class.");}
452 virtual int getInDegree() const noexcept {CkAbort("AMPI: instance of invalid Virtual Topology class."); return -1;}
453 virtual const vector<int> &getSources() const noexcept {CkAbort("AMPI: instance of invalid Virtual Topology class."); return v;}
454 virtual const vector<int> &getSourceWeights() const noexcept {CkAbort("AMPI: instance of invalid Virtual Topology class."); return v;}
455 virtual int getOutDegree() const noexcept {CkAbort("AMPI: instance of invalid Virtual Topology class."); return -1;}
456 virtual const vector<int> &getDestinations() const noexcept {CkAbort("AMPI: instance of invalid Virtual Topology class."); return v;}
457 virtual const vector<int> &getDestWeights() const noexcept {CkAbort("AMPI: instance of invalid Virtual Topology class."); return v;}
458 virtual bool areSourcesWeighted() const noexcept {CkAbort("AMPI: instance of invalid Virtual Topology class."); return false;}
459 virtual bool areDestsWeighted() const noexcept {CkAbort("AMPI: instance of invalid Virtual Topology class."); return false;}
460 virtual void setAreSourcesWeighted(bool val) noexcept {CkAbort("AMPI: instance of invalid Virtual Topology class.");}
461 virtual void setAreDestsWeighted(bool val) noexcept {CkAbort("AMPI: instance of invalid Virtual Topology class.");}
462 virtual void setInDegree(int degree) noexcept {CkAbort("AMPI: instance of invalid Virtual Topology class.");}
463 virtual void setSources(const vector<int> &sources) noexcept {CkAbort("AMPI: instance of invalid Virtual Topology class.");}
464 virtual void setSourceWeights(const vector<int> &sourceWeights) noexcept {CkAbort("AMPI: instance of invalid Virtual Topology class.");}
465 virtual void setOutDegree(int degree) noexcept {CkAbort("AMPI: instance of invalid Virtual Topology class.");}
466 virtual void setDestinations(const vector<int> &destinations) noexcept {CkAbort("AMPI: instance of invalid Virtual Topology class.");}
467 virtual void setDestWeights(const vector<int> &destWeights) noexcept {CkAbort("AMPI: instance of invalid Virtual Topology class.");}
470 class ampiCartTopology final : public ampiTopology {
471 private:
472 int ndims;
473 vector<int> dims, periods, nbors;
475 public:
476 ampiCartTopology() noexcept : ndims(-1) {}
478 void pup(PUP::er &p) noexcept {
479 p|ndims;
480 p|dims;
481 p|periods;
482 p|nbors;
485 inline int getType() const noexcept {return MPI_CART;}
486 inline void dup(ampiTopology* topo) noexcept {
487 CkAssert(topo->getType() == MPI_CART);
488 setndims(topo->getndims());
489 setdims(topo->getdims());
490 setperiods(topo->getperiods());
491 setnbors(topo->getnbors());
494 inline const vector<int> &getdims() const noexcept {return dims;}
495 inline const vector<int> &getperiods() const noexcept {return periods;}
496 inline int getndims() const noexcept {return ndims;}
497 inline const vector<int> &getnbors() const noexcept {return nbors;}
499 inline void setdims(const vector<int> &d) noexcept {dims = d; dims.shrink_to_fit();}
500 inline void setperiods(const vector<int> &p) noexcept {periods = p; periods.shrink_to_fit();}
501 inline void setndims(int nd) noexcept {ndims = nd;}
502 inline void setnbors(const vector<int> &n) noexcept {nbors = n; nbors.shrink_to_fit();}
505 class ampiGraphTopology final : public ampiTopology {
506 private:
507 int nvertices;
508 vector<int> index, edges, nbors;
510 public:
511 ampiGraphTopology() noexcept : nvertices(-1) {}
513 void pup(PUP::er &p) noexcept {
514 p|nvertices;
515 p|index;
516 p|edges;
517 p|nbors;
520 inline int getType() const noexcept {return MPI_GRAPH;}
521 inline void dup(ampiTopology* topo) noexcept {
522 CkAssert(topo->getType() == MPI_GRAPH);
523 setnvertices(topo->getnvertices());
524 setindex(topo->getindex());
525 setedges(topo->getedges());
526 setnbors(topo->getnbors());
529 inline int getnvertices() const noexcept {return nvertices;}
530 inline const vector<int> &getindex() const noexcept {return index;}
531 inline const vector<int> &getedges() const noexcept {return edges;}
532 inline const vector<int> &getnbors() const noexcept {return nbors;}
534 inline void setnvertices(int nv) noexcept {nvertices = nv;}
535 inline void setindex(const vector<int> &i) noexcept {index = i; index.shrink_to_fit();}
536 inline void setedges(const vector<int> &e) noexcept {edges = e; edges.shrink_to_fit();}
537 inline void setnbors(const vector<int> &n) noexcept {nbors = n; nbors.shrink_to_fit();}
540 class ampiDistGraphTopology final : public ampiTopology {
541 private:
542 int inDegree, outDegree;
543 bool sourcesWeighted, destsWeighted;
544 vector<int> sources, sourceWeights, destinations, destWeights, nbors;
546 public:
547 ampiDistGraphTopology() noexcept : inDegree(-1), outDegree(-1), sourcesWeighted(false), destsWeighted(false) {}
549 void pup(PUP::er &p) noexcept {
550 p|inDegree;
551 p|outDegree;
552 p|sourcesWeighted;
553 p|destsWeighted;
554 p|sources;
555 p|sourceWeights;
556 p|destinations;
557 p|destWeights;
558 p|nbors;
561 inline int getType() const noexcept {return MPI_DIST_GRAPH;}
562 inline void dup(ampiTopology* topo) noexcept {
563 CkAssert(topo->getType() == MPI_DIST_GRAPH);
564 setAreSourcesWeighted(topo->areSourcesWeighted());
565 setAreDestsWeighted(topo->areDestsWeighted());
566 setInDegree(topo->getInDegree());
567 setSources(topo->getSources());
568 setSourceWeights(topo->getSourceWeights());
569 setOutDegree(topo->getOutDegree());
570 setDestinations(topo->getDestinations());
571 setDestWeights(topo->getDestWeights());
572 setnbors(topo->getnbors());
575 inline int getInDegree() const noexcept {return inDegree;}
576 inline const vector<int> &getSources() const noexcept {return sources;}
577 inline const vector<int> &getSourceWeights() const noexcept {return sourceWeights;}
578 inline int getOutDegree() const noexcept {return outDegree;}
579 inline const vector<int> &getDestinations() const noexcept {return destinations;}
580 inline const vector<int> &getDestWeights() const noexcept {return destWeights;}
581 inline bool areSourcesWeighted() const noexcept {return sourcesWeighted;}
582 inline bool areDestsWeighted() const noexcept {return destsWeighted;}
583 inline const vector<int> &getnbors() const noexcept {return nbors;}
585 inline void setAreSourcesWeighted(bool v) noexcept {sourcesWeighted = v ? 1 : 0;}
586 inline void setAreDestsWeighted(bool v) noexcept {destsWeighted = v ? 1 : 0;}
587 inline void setInDegree(int d) noexcept {inDegree = d;}
588 inline void setSources(const vector<int> &s) noexcept {sources = s; sources.shrink_to_fit();}
589 inline void setSourceWeights(const vector<int> &sw) noexcept {sourceWeights = sw; sourceWeights.shrink_to_fit();}
590 inline void setOutDegree(int d) noexcept {outDegree = d;}
591 inline void setDestinations(const vector<int> &d) noexcept {destinations = d; destinations.shrink_to_fit();}
592 inline void setDestWeights(const vector<int> &dw) noexcept {destWeights = dw; destWeights.shrink_to_fit();}
593 inline void setnbors(const vector<int> &nbors_) noexcept {nbors = nbors_; nbors.shrink_to_fit();}
596 /* KeyValue class for attribute caching */
597 class KeyvalNode {
598 public:
599 void *val;
600 MPI_Copy_function *copy_fn;
601 MPI_Delete_function *delete_fn;
602 void *extra_state;
603 int refCount;
604 bool isValSet;
606 KeyvalNode() : val(NULL), copy_fn(NULL), delete_fn(NULL), extra_state(NULL), refCount(1), isValSet(false) { }
607 KeyvalNode(MPI_Copy_function *cf, MPI_Delete_function *df, void* es) :
608 val(NULL), copy_fn(cf), delete_fn(df), extra_state(es), refCount(1), isValSet(false) { }
609 bool hasVal() const { return isValSet; }
610 void clearVal() { isValSet = false; }
611 void setVal(void *v) { val = v; isValSet = true; }
612 void* getVal() const { return val; }
613 void incRefCount() { refCount++; }
614 int decRefCount() { CkAssert(refCount > 0); refCount--; return refCount; }
615 void pup(PUP::er& p) {
616 p((char *)val, sizeof(void *));
617 p((char *)copy_fn, sizeof(void *));
618 p((char *)delete_fn, sizeof(void *));
619 p((char *)extra_state, sizeof(void *));
620 p|refCount;
621 p|isValSet;
625 // Only store Group ranks explicitly when they can't be
626 // lazily and transiently created via std::iota()
627 class groupStruct {
628 private:
629 int sz; // -1 if ranks is valid, otherwise the size to pass to std::iota()
630 vector<int> ranks;
632 private:
633 bool ranksIsIota() const noexcept {
634 for (int i=0; i<ranks.size(); i++)
635 if (ranks[i] != i)
636 return false;
637 return true;
640 public:
641 groupStruct() noexcept : sz(0) {}
642 groupStruct(int s) noexcept : sz(s) {}
643 groupStruct(vector<int> r) noexcept : sz(-1), ranks(std::move(r)) {
644 if (ranksIsIota()) {
645 sz = ranks.size();
646 ranks.clear();
648 ranks.shrink_to_fit();
650 groupStruct &operator=(const groupStruct &obj) noexcept {
651 sz = obj.sz;
652 ranks = obj.ranks;
653 return *this;
655 ~groupStruct() = default;
656 void pup(PUP::er& p) noexcept {
657 p|sz;
658 p|ranks;
660 bool isIota() const noexcept {return (sz != -1);}
661 int operator[](int i) const noexcept {return (isIota()) ? i : ranks[i];}
662 int size() const noexcept {return (isIota()) ? sz : ranks.size();}
663 vector<int> getRanks() const noexcept {
664 if (isIota()) {
665 // Lazily create ranks:
666 vector<int> tmpRanks(sz);
667 std::iota(tmpRanks.begin(), tmpRanks.end(), 0);
668 tmpRanks.shrink_to_fit();
669 return tmpRanks;
671 else {
672 return ranks;
677 enum AmpiCommType : uint8_t {
678 WORLD = 0
679 ,INTRA = 1
680 ,INTER = 2
683 //Describes an AMPI communicator
684 class ampiCommStruct {
685 private:
686 MPI_Comm comm; //Communicator
687 CkArrayID ampiID; //ID of corresponding ampi array
688 int size; //Number of processes in communicator
689 AmpiCommType commType; //COMM_WORLD, intracomm, intercomm?
690 groupStruct indices; //indices[r] gives the array index for rank r
691 groupStruct remoteIndices; // remote group for inter-communicator
693 ampiTopology *ampiTopo; // Virtual topology
694 int topoType; // Type of virtual topology: MPI_CART, MPI_GRAPH, MPI_DIST_GRAPH, or MPI_UNDEFINED
696 // For communicator attributes (MPI_*_get_attr): indexed by keyval
697 vector<int> keyvals;
699 // For communicator names
700 std::string commName;
702 public:
703 ampiCommStruct(int ignored=0) noexcept
704 : size(-1), commType(INTRA), ampiTopo(NULL), topoType(MPI_UNDEFINED)
706 ampiCommStruct(MPI_Comm comm_,const CkArrayID &id_,int size_) noexcept
707 : comm(comm_), ampiID(id_),size(size_), commType(WORLD), indices(size_),
708 ampiTopo(NULL), topoType(MPI_UNDEFINED)
710 ampiCommStruct(MPI_Comm comm_,const CkArrayID &id_, const vector<int> &indices_) noexcept
711 : comm(comm_), ampiID(id_), size(indices_.size()), commType(INTRA), indices(indices_),
712 ampiTopo(NULL), topoType(MPI_UNDEFINED)
714 ampiCommStruct(MPI_Comm comm_, const CkArrayID &id_, const vector<int> &indices_,
715 const vector<int> &remoteIndices_) noexcept
716 : comm(comm_), ampiID(id_), size(indices_.size()), commType(INTER), indices(indices_),
717 remoteIndices(remoteIndices_), ampiTopo(NULL), topoType(MPI_UNDEFINED)
720 ~ampiCommStruct() noexcept {
721 if (ampiTopo != NULL)
722 delete ampiTopo;
725 // Overloaded copy constructor. Used when creating virtual topologies.
726 ampiCommStruct(const ampiCommStruct &obj, int topoNumber=MPI_UNDEFINED) noexcept {
727 switch (topoNumber) {
728 case MPI_CART:
729 ampiTopo = new ampiCartTopology();
730 break;
731 case MPI_GRAPH:
732 ampiTopo = new ampiGraphTopology();
733 break;
734 case MPI_DIST_GRAPH:
735 ampiTopo = new ampiDistGraphTopology();
736 break;
737 default:
738 ampiTopo = NULL;
739 break;
741 topoType = topoNumber;
742 comm = obj.comm;
743 ampiID = obj.ampiID;
744 size = obj.size;
745 commType = obj.commType;
746 indices = obj.indices;
747 remoteIndices = obj.remoteIndices;
748 keyvals = obj.keyvals;
749 commName = obj.commName;
752 ampiCommStruct &operator=(const ampiCommStruct &obj) noexcept {
753 if (this == &obj) {
754 return *this;
756 switch (obj.topoType) {
757 case MPI_CART:
758 ampiTopo = new ampiCartTopology(*(static_cast<ampiCartTopology*>(obj.ampiTopo)));
759 break;
760 case MPI_GRAPH:
761 ampiTopo = new ampiGraphTopology(*(static_cast<ampiGraphTopology*>(obj.ampiTopo)));
762 break;
763 case MPI_DIST_GRAPH:
764 ampiTopo = new ampiDistGraphTopology(*(static_cast<ampiDistGraphTopology*>(obj.ampiTopo)));
765 break;
766 default:
767 ampiTopo = NULL;
768 break;
770 topoType = obj.topoType;
771 comm = obj.comm;
772 ampiID = obj.ampiID;
773 size = obj.size;
774 commType = obj.commType;
775 indices = obj.indices;
776 remoteIndices = obj.remoteIndices;
777 keyvals = obj.keyvals;
778 commName = obj.commName;
779 return *this;
782 const ampiTopology* getTopologyforNeighbors() const noexcept {
783 return ampiTopo;
786 ampiTopology* getTopology() noexcept {
787 return ampiTopo;
790 inline bool isinter() const noexcept {return commType==INTER;}
791 void setArrayID(const CkArrayID &nID) noexcept {ampiID=nID;}
793 MPI_Comm getComm() const noexcept {return comm;}
794 inline vector<int> getIndices() const noexcept {return indices.getRanks();}
795 inline vector<int> getRemoteIndices() const noexcept {return remoteIndices.getRanks();}
796 vector<int> &getKeyvals() noexcept {return keyvals;}
798 void setName(const char *src) noexcept {
799 CkDDT_SetName(commName, src);
802 void getName(char *name, int *len) const noexcept {
803 int length = *len = commName.size();
804 memcpy(name, commName.data(), length);
805 name[length] = '\0';
808 //Get the proxy for the entire array
809 CProxy_ampi getProxy() const noexcept;
811 //Get the array index for rank r in this communicator
812 int getIndexForRank(int r) const noexcept {
813 #if CMK_ERROR_CHECKING
814 if (r>=size) CkAbort("AMPI> You passed in an out-of-bounds process rank!");
815 #endif
816 return indices[r];
818 int getIndexForRemoteRank(int r) const noexcept {
819 #if CMK_ERROR_CHECKING
820 if (r>=remoteIndices.size()) CkAbort("AMPI> You passed in an out-of-bounds intercomm remote process rank!");
821 #endif
822 return remoteIndices[r];
824 //Get the rank for this array index (Warning: linear time)
825 int getRankForIndex(int i) const noexcept {
826 if (indices.isIota()) return i;
827 else {
828 const vector<int>& ind = indices.getRanks();
829 for (int r=0;r<ind.size();r++)
830 if (ind[r]==i) return r;
831 return -1; /*That index isn't in this communicator*/
835 int getSize() const noexcept {return size;}
837 void pup(PUP::er &p) noexcept {
838 p|comm;
839 p|ampiID;
840 p|size;
841 p|commType;
842 p|indices;
843 p|remoteIndices;
844 p|keyvals;
845 p|commName;
846 p|topoType;
847 if (topoType != MPI_UNDEFINED) {
848 if (p.isUnpacking()) {
849 switch (topoType) {
850 case MPI_CART:
851 ampiTopo = new ampiCartTopology();
852 break;
853 case MPI_GRAPH:
854 ampiTopo = new ampiGraphTopology();
855 break;
856 case MPI_DIST_GRAPH:
857 ampiTopo = new ampiDistGraphTopology();
858 break;
859 default:
860 CkAbort("AMPI> Communicator has an invalid topology!");
861 break;
864 ampiTopo->pup(p);
865 } else {
866 ampiTopo = NULL;
868 if (p.isDeleting()) {
869 delete ampiTopo; ampiTopo = NULL;
873 PUPmarshall(ampiCommStruct)
875 class mpi_comm_worlds{
876 ampiCommStruct comms[MPI_MAX_COMM_WORLDS];
877 public:
878 ampiCommStruct &operator[](int i) noexcept {return comms[i];}
879 void pup(PUP::er &p) noexcept {
880 for (int i=0;i<MPI_MAX_COMM_WORLDS;i++)
881 comms[i].pup(p);
885 // group operations
886 inline void outputOp(const vector<int>& vec) noexcept {
887 if (vec.size() > 50) {
888 CkPrintf("vector too large to output!\n");
889 return;
891 CkPrintf("output vector: size=%d {",vec.size());
892 for (int i=0; i<vec.size(); i++) {
893 CkPrintf(" %d ", vec[i]);
895 CkPrintf("}\n");
898 inline int getPosOp(int idx, const vector<int>& vec) noexcept {
899 for (int r=0; r<vec.size(); r++) {
900 if (vec[r] == idx) {
901 return r;
904 return MPI_UNDEFINED;
907 inline vector<int> unionOp(const vector<int>& vec1, const vector<int>& vec2) noexcept {
908 vector<int> newvec(vec1);
909 for (int i=0; i<vec2.size(); i++) {
910 if (getPosOp(vec2[i], vec1) == MPI_UNDEFINED) {
911 newvec.push_back(vec2[i]);
914 return newvec;
917 inline vector<int> intersectOp(const vector<int>& vec1, const vector<int>& vec2) noexcept {
918 vector<int> newvec;
919 for (int i=0; i<vec1.size(); i++) {
920 if (getPosOp(vec1[i], vec2) != MPI_UNDEFINED) {
921 newvec.push_back(vec1[i]);
924 return newvec;
927 inline vector<int> diffOp(const vector<int>& vec1, const vector<int>& vec2) noexcept {
928 vector<int> newvec;
929 for (int i=0; i<vec1.size(); i++) {
930 if (getPosOp(vec1[i], vec2) == MPI_UNDEFINED) {
931 newvec.push_back(vec1[i]);
934 return newvec;
937 inline int* translateRanksOp(int n, const vector<int>& vec1, const int* ranks1,
938 const vector<int>& vec2, int *ret) noexcept {
939 for (int i=0; i<n; i++) {
940 ret[i] = (ranks1[i] == MPI_PROC_NULL) ? MPI_PROC_NULL : getPosOp(vec1[ranks1[i]], vec2);
942 return ret;
945 inline int compareVecOp(const vector<int>& vec1, const vector<int>& vec2) noexcept {
946 int pos, ret = MPI_IDENT;
947 if (vec1.size() != vec2.size()) {
948 return MPI_UNEQUAL;
950 for (int i=0; i<vec1.size(); i++) {
951 pos = getPosOp(vec1[i], vec2);
952 if (pos == MPI_UNDEFINED) {
953 return MPI_UNEQUAL;
955 else if (pos != i) {
956 ret = MPI_SIMILAR;
959 return ret;
962 inline vector<int> inclOp(int n, const int* ranks, const vector<int>& vec) noexcept {
963 vector<int> retvec(n);
964 for (int i=0; i<n; i++) {
965 retvec[i] = vec[ranks[i]];
967 return retvec;
970 inline vector<int> exclOp(int n, const int* ranks, const vector<int>& vec) noexcept {
971 vector<int> retvec;
972 bool add = true;
973 for (int j=0; j<vec.size(); j++) {
974 for (int i=0; i<n; i++) {
975 if (j == ranks[i]) {
976 add = false;
977 break;
980 if (add) {
981 retvec.push_back(vec[j]);
983 else {
984 add = true;
987 return retvec;
990 inline vector<int> rangeInclOp(int n, int ranges[][3], const vector<int>& vec,
991 int *flag) noexcept {
992 vector<int> retvec;
993 int first, last, stride;
994 for (int i=0; i<n; i++) {
995 first = ranges[i][0];
996 last = ranges[i][1];
997 stride = ranges[i][2];
998 if (stride != 0) {
999 for (int j=0; j<=(last-first)/stride; j++) {
1000 retvec.push_back(vec[first+stride*j]);
1003 else {
1004 *flag = MPI_ERR_ARG;
1005 return vector<int>();
1008 *flag = MPI_SUCCESS;
1009 return retvec;
1012 inline vector<int> rangeExclOp(int n, int ranges[][3], const vector<int>& vec,
1013 int *flag) noexcept {
1014 vector<int> ranks;
1015 int first, last, stride;
1016 for (int i=0; i<n; i++) {
1017 first = ranges[i][0];
1018 last = ranges[i][1];
1019 stride = ranges[i][2];
1020 if (stride != 0) {
1021 for (int j=0; j<=(last-first)/stride; j++) {
1022 ranks.push_back(first+stride*j);
1025 else {
1026 *flag = MPI_ERR_ARG;
1027 return vector<int>();
1030 *flag = MPI_SUCCESS;
1031 return exclOp(ranks.size(), &ranks[0], vec);
1034 #include "tcharm.h"
1035 #include "tcharmc.h"
1037 #include "ampi.decl.h"
1038 #include "charm-api.h"
1039 #include <sys/stat.h> // for mkdir
1041 extern int _mpi_nworlds;
1043 //MPI_ANY_TAG is defined in ampi.h to MPI_TAG_UB_VALUE+1
1044 #define MPI_ATA_SEQ_TAG MPI_TAG_UB_VALUE+2
1045 #define MPI_BCAST_TAG MPI_TAG_UB_VALUE+3
1046 #define MPI_REDN_TAG MPI_TAG_UB_VALUE+4
1047 #define MPI_SCATTER_TAG MPI_TAG_UB_VALUE+5
1048 #define MPI_SCAN_TAG MPI_TAG_UB_VALUE+6
1049 #define MPI_EXSCAN_TAG MPI_TAG_UB_VALUE+7
1050 #define MPI_ATA_TAG MPI_TAG_UB_VALUE+8
1051 #define MPI_NBOR_TAG MPI_TAG_UB_VALUE+9
1052 #define MPI_RMA_TAG MPI_TAG_UB_VALUE+10
1053 #define MPI_EPOCH_START_TAG MPI_TAG_UB_VALUE+11
1054 #define MPI_EPOCH_END_TAG MPI_TAG_UB_VALUE+12
1056 #define AMPI_COLL_SOURCE 0
1057 #define AMPI_COLL_COMM MPI_COMM_WORLD
1059 enum AmpiReqType : uint8_t {
1060 AMPI_INVALID_REQ = 0,
1061 AMPI_I_REQ = 1,
1062 AMPI_ATA_REQ = 2,
1063 AMPI_SEND_REQ = 3,
1064 AMPI_SSEND_REQ = 4,
1065 AMPI_REDN_REQ = 5,
1066 AMPI_GATHER_REQ = 6,
1067 AMPI_GATHERV_REQ = 7,
1068 AMPI_G_REQ = 8,
1069 #if CMK_CUDA
1070 AMPI_GPU_REQ = 9
1071 #endif
1074 inline void operator|(PUP::er &p, AmpiReqType &r) {
1075 pup_bytes(&p, (void *)&r, sizeof(AmpiReqType));
1078 enum AmpiReqSts : char {
1079 AMPI_REQ_PENDING = 0,
1080 AMPI_REQ_BLOCKED = 1,
1081 AMPI_REQ_COMPLETED = 2
1084 enum AmpiSendType : bool {
1085 BLOCKING_SEND = false,
1086 I_SEND = true
1089 #define MyAlign8(x) (((x)+7)&(~7))
1092 Represents an MPI request that has been initiated
1093 using Isend, Irecv, Ialltoall, Send_init, etc.
1095 class AmpiRequest {
1096 public:
1097 void *buf = nullptr;
1098 int count = 0;
1099 MPI_Datatype type = MPI_DATATYPE_NULL;
1100 int tag = MPI_ANY_TAG; // the order must match MPI_Status
1101 int src = MPI_ANY_SOURCE;
1102 MPI_Comm comm = MPI_COMM_NULL;
1103 MPI_Request reqIdx = MPI_REQUEST_NULL;
1104 bool complete = false;
1105 bool blocked = false; // this req is currently blocked on
1107 #if CMK_BIGSIM_CHARM
1108 public:
1109 void *event = nullptr; // the event point that corresponds to this message
1110 int eventPe = -1; // the PE that the event is located on
1111 #endif
1113 public:
1114 AmpiRequest() =default;
1115 virtual ~AmpiRequest() =default;
1117 /// Activate this persistent request.
1118 /// Only meaningful for persistent Ireq, SendReq, and SsendReq requests.
1119 virtual void start(MPI_Request reqIdx) noexcept {}
1121 /// Used by AmmEntry's constructor
1122 virtual int getTag() const noexcept { return tag; }
1123 virtual int getSrcRank() const noexcept { return src; }
1125 /// Return true if this request is finished (progress):
1126 virtual bool test(MPI_Status *sts=MPI_STATUS_IGNORE) noexcept =0;
1128 /// Block until this request is finished,
1129 /// returning a valid MPI error code.
1130 virtual int wait(MPI_Status *sts) noexcept =0;
1132 /// Mark this request for cancellation.
1133 /// Supported only for IReq requests
1134 virtual void cancel() noexcept {}
1136 /// Mark this request persistent.
1137 /// Supported only for IReq, SendReq, and SsendReq requests
1138 virtual void setPersistent(bool p) noexcept {}
1139 virtual bool isPersistent() const noexcept { return false; }
1141 /// Receive an AmpiMsg
1142 virtual void receive(ampi *ptr, AmpiMsg *msg, bool deleteMsg=true) noexcept =0;
1144 /// Receive a CkReductionMsg
1145 virtual void receive(ampi *ptr, CkReductionMsg *msg) noexcept =0;
1147 /// Receive an Rdma message
1148 virtual void receiveRdma(ampi *ptr, char *sbuf, int slength, int ssendReq,
1149 int srcRank, MPI_Comm scomm) noexcept { }
1151 /// Set the request's index into AmpiRequestList
1152 void setReqIdx(MPI_Request idx) noexcept { reqIdx = idx; }
1153 MPI_Request getReqIdx() const noexcept { return reqIdx; }
1155 /// Free the request's datatype
1156 void free(CkDDT* ddt) noexcept {
1157 if (type != MPI_DATATYPE_NULL) ddt->freeType(type);
1160 /// Set whether the request is currently blocked on
1161 void setBlocked(bool b) noexcept { blocked = b; }
1162 bool isBlocked() const noexcept { return blocked; }
1164 /// Returns the type of request:
1165 /// AMPI_I_REQ, AMPI_ATA_REQ, AMPI_SEND_REQ, AMPI_SSEND_REQ,
1166 /// AMPI_REDN_REQ, AMPI_GATHER_REQ, AMPI_GATHERV_REQ, AMPI_G_REQ
1167 virtual AmpiReqType getType() const noexcept =0;
1169 /// Returns whether this request will need to be matched.
1170 /// It is used to determine whether this request should be inserted into postedReqs.
1171 /// AMPI_SEND_REQ, AMPI_SSEND_REQ, and AMPI_ATA_REQ should not be posted.
1172 virtual bool isUnmatched() const noexcept =0;
1174 /// Returns whether this type is pooled or not:
1175 /// Only AMPI_I_REQ, AMPI_SEND_REQ, and AMPI_SSEND_REQs are pooled.
1176 virtual bool isPooledType() const noexcept { return false; }
1178 /// Return the actual number of bytes that were received.
1179 virtual int getNumReceivedBytes(CkDDT *ddt) const noexcept {
1180 // by default, return number of bytes requested
1181 return count * ddt->getSize(type);
1184 virtual void pup(PUP::er &p) noexcept {
1185 p((char *)&buf, sizeof(void *)); //supposed to work only with Isomalloc
1186 p(count);
1187 p(type);
1188 p(tag);
1189 p(src);
1190 p(comm);
1191 p(reqIdx);
1192 p(complete);
1193 p(blocked);
1194 #if CMK_BIGSIM_CHARM
1195 //needed for bigsim out-of-core emulation
1196 //as the "log" is not moved from memory, this pointer is safe
1197 //to be reused
1198 p((char *)&event, sizeof(void *));
1199 p(eventPe);
1200 #endif
1203 virtual void print() const noexcept =0;
1206 // This is used in the constructors of the AmpiRequest types below,
1207 // assuming arguments: (MPI_Datatype type_, CkDDT* ddt_, AmpiReqSts sts_)
1208 #define AMPI_REQUEST_COMMON_INIT \
1210 complete = (sts_ == AMPI_REQ_COMPLETED); \
1211 blocked = (sts_ == AMPI_REQ_BLOCKED); \
1212 if (type_ != MPI_DATATYPE_NULL) { \
1213 ddt_->getType(type_)->incRefCount(); \
1217 class IReq final : public AmpiRequest {
1218 public:
1219 bool cancelled = false; // track if request is cancelled
1220 bool persistent = false; // Is this a persistent recv request?
1221 int length = 0; // recv'ed length in bytes
1223 IReq(void *buf_, int count_, MPI_Datatype type_, int src_, int tag_,
1224 MPI_Comm comm_, CkDDT *ddt_, AmpiReqSts sts_=AMPI_REQ_PENDING) noexcept
1226 buf = buf_;
1227 count = count_;
1228 type = type_;
1229 src = src_;
1230 tag = tag_;
1231 comm = comm_;
1232 AMPI_REQUEST_COMMON_INIT
1234 IReq() =default;
1235 ~IReq() =default;
1236 bool test(MPI_Status *sts=MPI_STATUS_IGNORE) noexcept override;
1237 int wait(MPI_Status *sts) noexcept override;
1238 void cancel() noexcept override { if (!complete) cancelled = true; }
1239 AmpiReqType getType() const noexcept override { return AMPI_I_REQ; }
1240 bool isUnmatched() const noexcept override { return !complete; }
1241 bool isPooledType() const noexcept override { return true; }
1242 void setPersistent(bool p) noexcept override { persistent = p; }
1243 bool isPersistent() const noexcept override { return persistent; }
1244 void start(MPI_Request reqIdx) noexcept override;
1245 void receive(ampi *ptr, AmpiMsg *msg, bool deleteMsg=true) noexcept override;
1246 void receive(ampi *ptr, CkReductionMsg *msg) noexcept override {}
1247 void receiveRdma(ampi *ptr, char *sbuf, int slength, int ssendReq, int srcRank, MPI_Comm scomm) noexcept override;
1248 int getNumReceivedBytes(CkDDT *ptr) const noexcept override {
1249 return length;
1251 void pup(PUP::er &p) noexcept override {
1252 AmpiRequest::pup(p);
1253 p|cancelled;
1254 p|persistent;
1255 p|length;
1257 void print() const noexcept override;
1260 class RednReq final : public AmpiRequest {
1261 public:
1262 MPI_Op op = MPI_OP_NULL;
1264 RednReq(void *buf_, int count_, MPI_Datatype type_, MPI_Comm comm_,
1265 MPI_Op op_, CkDDT* ddt_, AmpiReqSts sts_=AMPI_REQ_PENDING) noexcept
1267 buf = buf_;
1268 count = count_;
1269 type = type_;
1270 src = AMPI_COLL_SOURCE;
1271 tag = MPI_REDN_TAG;
1272 comm = comm_;
1273 op = op_;
1274 AMPI_REQUEST_COMMON_INIT
1276 RednReq() =default;
1277 ~RednReq() =default;
1278 bool test(MPI_Status *sts=MPI_STATUS_IGNORE) noexcept override;
1279 int wait(MPI_Status *sts) noexcept override;
1280 void cancel() noexcept override {}
1281 AmpiReqType getType() const noexcept override { return AMPI_REDN_REQ; }
1282 bool isUnmatched() const noexcept override { return !complete; }
1283 void receive(ampi *ptr, AmpiMsg *msg, bool deleteMsg=true) noexcept override {}
1284 void receive(ampi *ptr, CkReductionMsg *msg) noexcept override;
1285 void pup(PUP::er &p) noexcept override {
1286 AmpiRequest::pup(p);
1287 p|op;
1289 void print() const noexcept override;
1292 class GatherReq final : public AmpiRequest {
1293 public:
1294 GatherReq(void *buf_, int count_, MPI_Datatype type_, MPI_Comm comm_,
1295 CkDDT *ddt_, AmpiReqSts sts_=AMPI_REQ_PENDING) noexcept
1297 buf = buf_;
1298 count = count_;
1299 type = type_;
1300 src = AMPI_COLL_SOURCE;
1301 tag = MPI_REDN_TAG;
1302 comm = comm_;
1303 AMPI_REQUEST_COMMON_INIT
1305 GatherReq() =default;
1306 ~GatherReq() =default;
1307 bool test(MPI_Status *sts=MPI_STATUS_IGNORE) noexcept override;
1308 int wait(MPI_Status *sts) noexcept override;
1309 void cancel() noexcept override {}
1310 AmpiReqType getType() const noexcept override { return AMPI_GATHER_REQ; }
1311 bool isUnmatched() const noexcept override { return !complete; }
1312 void receive(ampi *ptr, AmpiMsg *msg, bool deleteMsg=true) noexcept override {}
1313 void receive(ampi *ptr, CkReductionMsg *msg) noexcept override;
1314 void pup(PUP::er &p) noexcept override {
1315 AmpiRequest::pup(p);
1317 void print() const noexcept override;
1320 class GathervReq final : public AmpiRequest {
1321 public:
1322 vector<int> recvCounts;
1323 vector<int> displs;
1325 GathervReq(void *buf_, int count_, MPI_Datatype type_, MPI_Comm comm_, const int *rc,
1326 const int *d, CkDDT* ddt_, AmpiReqSts sts_=AMPI_REQ_PENDING) noexcept
1328 buf = buf_;
1329 count = count_;
1330 type = type_;
1331 src = AMPI_COLL_SOURCE;
1332 tag = MPI_REDN_TAG;
1333 comm = comm_;
1334 recvCounts.assign(rc, rc+count);
1335 displs.assign(d, d+count);
1336 AMPI_REQUEST_COMMON_INIT
1338 GathervReq() =default;
1339 ~GathervReq() =default;
1340 bool test(MPI_Status *sts=MPI_STATUS_IGNORE) noexcept override;
1341 int wait(MPI_Status *sts) noexcept override;
1342 AmpiReqType getType() const noexcept override { return AMPI_GATHERV_REQ; }
1343 bool isUnmatched() const noexcept override { return !complete; }
1344 void receive(ampi *ptr, AmpiMsg *msg, bool deleteMsg=true) noexcept override {}
1345 void receive(ampi *ptr, CkReductionMsg *msg) noexcept override;
1346 void pup(PUP::er &p) noexcept override {
1347 AmpiRequest::pup(p);
1348 p|recvCounts;
1349 p|displs;
1351 void print() const noexcept override;
1354 class SendReq final : public AmpiRequest {
1355 bool persistent = false; // is this a persistent send request?
1357 public:
1358 SendReq(MPI_Datatype type_, MPI_Comm comm_, CkDDT* ddt_, AmpiReqSts sts_=AMPI_REQ_PENDING) noexcept
1360 type = type_;
1361 comm = comm_;
1362 AMPI_REQUEST_COMMON_INIT
1364 SendReq(void* buf_, int count_, MPI_Datatype type_, int dest_, int tag_,
1365 MPI_Comm comm_, CkDDT* ddt_, AmpiReqSts sts_=AMPI_REQ_PENDING) noexcept
1367 buf = buf_;
1368 count = count_;
1369 type = type_;
1370 src = dest_;
1371 tag = tag_;
1372 comm = comm_;
1373 AMPI_REQUEST_COMMON_INIT
1375 SendReq() noexcept {}
1376 ~SendReq() noexcept {}
1377 bool test(MPI_Status *sts=MPI_STATUS_IGNORE) noexcept override;
1378 int wait(MPI_Status *sts) noexcept override;
1379 void setPersistent(bool p) noexcept override { persistent = p; }
1380 bool isPersistent() const noexcept override { return persistent; }
1381 void start(MPI_Request reqIdx) noexcept override;
1382 void receive(ampi *ptr, AmpiMsg *msg, bool deleteMsg=true) noexcept override {}
1383 void receive(ampi *ptr, CkReductionMsg *msg) noexcept override {}
1384 AmpiReqType getType() const noexcept override { return AMPI_SEND_REQ; }
1385 bool isUnmatched() const noexcept override { return false; }
1386 bool isPooledType() const noexcept override { return true; }
1387 void pup(PUP::er &p) noexcept override {
1388 AmpiRequest::pup(p);
1389 p|persistent;
1391 void print() const noexcept override;
1394 class SsendReq final : public AmpiRequest {
1395 private:
1396 bool persistent = false; // is this a persistent Ssend request?
1398 public:
1399 SsendReq(MPI_Datatype type_, MPI_Comm comm_, CkDDT* ddt_, AmpiReqSts sts_=AMPI_REQ_PENDING) noexcept
1401 type = type_;
1402 comm = comm_;
1403 AMPI_REQUEST_COMMON_INIT
1405 SsendReq(void* buf_, int count_, MPI_Datatype type_, int dest_, int tag_, MPI_Comm comm_,
1406 CkDDT* ddt_, AmpiReqSts sts_=AMPI_REQ_PENDING) noexcept
1408 buf = buf_;
1409 count = count_;
1410 type = type_;
1411 src = dest_;
1412 tag = tag_;
1413 comm = comm_;
1414 AMPI_REQUEST_COMMON_INIT
1416 SsendReq(void* buf_, int count_, MPI_Datatype type_, int dest_, int tag_, MPI_Comm comm_,
1417 int src_, CkDDT* ddt_, AmpiReqSts sts_=AMPI_REQ_PENDING) noexcept
1419 buf = buf_;
1420 count = count_;
1421 type = type_;
1422 src = dest_;
1423 tag = tag_;
1424 comm = comm_;
1425 AMPI_REQUEST_COMMON_INIT
1427 SsendReq() =default;
1428 ~SsendReq() =default;
1429 bool test(MPI_Status *sts=MPI_STATUS_IGNORE) noexcept override;
1430 int wait(MPI_Status *sts) noexcept override;
1431 void setPersistent(bool p) noexcept override { persistent = p; }
1432 bool isPersistent() const noexcept override { return persistent; }
1433 void start(MPI_Request reqIdx) noexcept override;
1434 void receive(ampi *ptr, AmpiMsg *msg, bool deleteMsg=true) noexcept override {}
1435 void receive(ampi *ptr, CkReductionMsg *msg) noexcept override {}
1436 AmpiReqType getType() const noexcept override { return AMPI_SSEND_REQ; }
1437 bool isUnmatched() const noexcept override { return false; }
1438 bool isPooledType() const noexcept override { return true; }
1439 void pup(PUP::er &p) noexcept override {
1440 AmpiRequest::pup(p);
1441 p|persistent;
1443 void print() const noexcept override;
1446 #if CMK_CUDA
1447 class GPUReq : public AmpiRequest {
1448 public:
1449 GPUReq() noexcept;
1450 ~GPUReq() =default;
1451 bool test(MPI_Status *sts=MPI_STATUS_IGNORE) noexcept override;
1452 int wait(MPI_Status *sts) noexcept override;
1453 void receive(ampi *ptr, AmpiMsg *msg, bool deleteMsg=true) noexcept override;
1454 void receive(ampi *ptr, CkReductionMsg *msg) noexcept override;
1455 AmpiReqType getType() const noexcept override { return AMPI_GPU_REQ; }
1456 bool isUnmatched() const noexcept override { return false; }
1457 void setComplete() noexcept;
1458 void print() const noexcept override;
1460 #endif
1462 class ATAReq final : public AmpiRequest {
1463 public:
1464 vector<MPI_Request> reqs;
1466 ATAReq(int numReqs_) noexcept : reqs(numReqs_) {}
1467 ATAReq() =default;
1468 ~ATAReq() =default;
1469 bool test(MPI_Status *sts=MPI_STATUS_IGNORE) noexcept override;
1470 int wait(MPI_Status *sts) noexcept override;
1471 void receive(ampi *ptr, AmpiMsg *msg, bool deleteMsg=true) noexcept override {}
1472 void receive(ampi *ptr, CkReductionMsg *msg) noexcept override {}
1473 int getCount() const noexcept { return reqs.size(); }
1474 AmpiReqType getType() const noexcept override { return AMPI_ATA_REQ; }
1475 bool isUnmatched() const noexcept override { return false; }
1476 void pup(PUP::er &p) noexcept override {
1477 AmpiRequest::pup(p);
1478 p|reqs;
1480 void print() const noexcept override;
1483 class GReq final : public AmpiRequest {
1484 private:
1485 MPI_Grequest_query_function* queryFn;
1486 MPI_Grequest_free_function* freeFn;
1487 MPI_Grequest_cancel_function* cancelFn;
1488 MPIX_Grequest_poll_function* pollFn;
1489 MPIX_Grequest_wait_function* waitFn;
1490 void* extraState;
1492 public:
1493 GReq(MPI_Grequest_query_function* q, MPI_Grequest_free_function* f, MPI_Grequest_cancel_function* c, void* es) noexcept
1494 : queryFn(q), freeFn(f), cancelFn(c), pollFn(nullptr), waitFn(nullptr), extraState(es) {}
1495 GReq(MPI_Grequest_query_function *q, MPI_Grequest_free_function* f, MPI_Grequest_cancel_function* c, MPIX_Grequest_poll_function* p, void* es) noexcept
1496 : queryFn(q), freeFn(f), cancelFn(c), pollFn(p), waitFn(nullptr), extraState(es) {}
1497 GReq(MPI_Grequest_query_function *q, MPI_Grequest_free_function* f, MPI_Grequest_cancel_function* c, MPIX_Grequest_poll_function* p, MPIX_Grequest_wait_function* w, void* es) noexcept
1498 : queryFn(q), freeFn(f), cancelFn(c), pollFn(p), waitFn(w), extraState(es) {}
1499 GReq() =default;
1500 ~GReq() noexcept { (*freeFn)(extraState); }
1501 bool test(MPI_Status *sts=MPI_STATUS_IGNORE) noexcept override;
1502 int wait(MPI_Status *sts) noexcept override;
1503 void receive(ampi *ptr, AmpiMsg *msg, bool deleteMsg=true) noexcept override {}
1504 void receive(ampi *ptr, CkReductionMsg *msg) noexcept override {}
1505 void cancel() noexcept override { (*cancelFn)(extraState, complete); }
1506 AmpiReqType getType() const noexcept override { return AMPI_G_REQ; }
1507 bool isUnmatched() const noexcept override { return false; }
1508 void pup(PUP::er &p) noexcept override {
1509 AmpiRequest::pup(p);
1510 p((char *)queryFn, sizeof(void *));
1511 p((char *)freeFn, sizeof(void *));
1512 p((char *)cancelFn, sizeof(void *));
1513 p((char *)pollFn, sizeof(void *));
1514 p((char *)waitFn, sizeof(void *));
1515 p((char *)extraState, sizeof(void *));
1517 void print() const noexcept override;
1520 class AmpiRequestPool;
1522 class AmpiRequestList {
1523 private:
1524 vector<AmpiRequest*> reqs; // indexed by MPI_Request
1525 int startIdx; // start next search from this index
1526 AmpiRequestPool* reqPool;
1527 public:
1528 AmpiRequestList() noexcept : startIdx(0) {}
1529 AmpiRequestList(int size, AmpiRequestPool* reqPoolPtr) noexcept
1530 : reqs(size), startIdx(0), reqPool(reqPoolPtr) {}
1531 ~AmpiRequestList() noexcept {}
1533 inline AmpiRequest* operator[](int n) noexcept {
1534 #if CMK_ERROR_CHECKING
1535 return reqs.at(n);
1536 #else
1537 return reqs[n];
1538 #endif
1540 void free(AmpiRequestPool& reqPool, int idx, CkDDT *ddt) noexcept;
1541 void freeNonPersReq(int &idx) noexcept;
1542 inline int insert(AmpiRequest* req) noexcept {
1543 for (int i=startIdx; i<reqs.size(); i++) {
1544 if (reqs[i] == NULL) {
1545 req->setReqIdx(i);
1546 reqs[i] = req;
1547 startIdx = i+1;
1548 return i;
1551 reqs.push_back(req);
1552 int idx = reqs.size()-1;
1553 req->setReqIdx(idx);
1554 startIdx = idx+1;
1555 return idx;
1558 inline void checkRequest(MPI_Request idx) const noexcept {
1559 if (idx != MPI_REQUEST_NULL && (idx < 0 || idx >= reqs.size()))
1560 CkAbort("Invalid MPI_Request\n");
1563 inline void unblockReqs(MPI_Request *requests, int numReqs) noexcept {
1564 for (int i=0; i<numReqs; i++) {
1565 if (requests[i] != MPI_REQUEST_NULL) {
1566 reqs[requests[i]]->setBlocked(false);
1571 void pup(PUP::er &p, AmpiRequestPool* reqPool) noexcept;
1573 void print() const noexcept {
1574 for (int i=0; i<reqs.size(); i++) {
1575 if (reqs[i] == NULL) continue;
1576 CkPrintf("AmpiRequestList Element %d [%p]: \n", i+1, reqs[i]);
1577 reqs[i]->print();
1582 //A simple memory buffer
1583 class memBuf {
1584 CkVec<char> buf;
1585 public:
1586 memBuf() =default;
1587 memBuf(int size) noexcept : buf(size) {}
1588 void setSize(int s) noexcept {buf.resize(s);}
1589 int getSize() const noexcept {return buf.size();}
1590 const void *getData() const noexcept {return (const void *)&buf[0];}
1591 void *getData() noexcept {return (void *)&buf[0];}
1594 template <class T>
1595 inline void pupIntoBuf(memBuf &b,T &t) noexcept {
1596 PUP::sizer ps;ps|t;
1597 b.setSize(ps.size());
1598 PUP::toMem pm(b.getData()); pm|t;
1601 template <class T>
1602 inline void pupFromBuf(const void *data,T &t) noexcept {
1603 PUP::fromMem p(data); p|t;
1606 #define COLL_SEQ_IDX -1
1608 class AmpiMsgPool;
1610 class AmpiMsg final : public CMessage_AmpiMsg {
1611 private:
1612 int ssendReq; //Index to the sender's request
1613 int tag; //MPI tag
1614 int srcRank; //Communicator rank for source
1615 int length; //Number of bytes in this message
1616 int origLength; // true size of allocation
1617 MPI_Comm comm; // Communicator
1618 public:
1619 char *data; //Payload
1620 #if CMK_BIGSIM_CHARM
1621 public:
1622 void *event;
1623 int eventPe; // the PE that the event is located
1624 #endif
1626 public:
1627 AmpiMsg() noexcept { data = NULL; }
1628 AmpiMsg(int sreq, int t, int sRank, int l) noexcept :
1629 ssendReq(sreq), tag(t), srcRank(sRank), length(l), origLength(l)
1630 { /* only called from AmpiMsg::pup() since the refnum (seq) will get pup'ed by the runtime */ }
1631 AmpiMsg(CMK_REFNUM_TYPE seq, int sreq, int t, int sRank, int l) noexcept :
1632 ssendReq(sreq), tag(t), srcRank(sRank), length(l), origLength(l)
1633 { CkSetRefNum(this, seq); }
1634 inline void setSsendReq(int s) noexcept { CkAssert(s >= 0); ssendReq = s; }
1635 inline void setSeq(CMK_REFNUM_TYPE s) noexcept { CkAssert(s >= 0); UsrToEnv(this)->setRef(s); }
1636 inline void setSrcRank(int sr) noexcept { srcRank = sr; }
1637 inline void setLength(int l) noexcept { length = l; }
1638 inline void setTag(int t) noexcept { tag = t; }
1639 inline void setComm(MPI_Comm c) noexcept { comm = c; }
1640 inline CMK_REFNUM_TYPE getSeq() const noexcept { return UsrToEnv(this)->getRef(); }
1641 inline int getSsendReq() const noexcept { return ssendReq; }
1642 inline int getSeqIdx() const noexcept {
1643 // seqIdx is srcRank, unless this message was part of a collective
1644 if (tag >= MPI_BCAST_TAG && tag <= MPI_ATA_TAG) {
1645 return COLL_SEQ_IDX;
1647 else {
1648 return srcRank;
1651 inline int getSrcRank() const noexcept { return srcRank; }
1652 inline int getLength() const noexcept { return length; }
1653 inline char* getData() const noexcept { return data; }
1654 inline int getTag() const noexcept { return tag; }
1655 inline MPI_Comm getComm() const noexcept { return comm; }
1656 static AmpiMsg* pup(PUP::er &p, AmpiMsg *m) noexcept
1658 int ref, ssendReq, tag, srcRank, length, origLength;
1659 MPI_Comm comm;
1660 if(p.isPacking() || p.isSizing()) {
1661 ref = CkGetRefNum(m);
1662 ssendReq = m->ssendReq;
1663 tag = m->tag;
1664 srcRank = m->srcRank;
1665 length = m->length;
1666 origLength = m->origLength;
1667 comm = m->comm;
1669 p(ref); p(ssendReq); p(tag); p(srcRank); p(length); p(origLength); p(comm);
1670 if(p.isUnpacking()) {
1671 m = new (origLength, 0) AmpiMsg(ref, ssendReq, tag, srcRank, origLength);
1672 m->setLength(length);
1673 m->setComm(comm);
1675 p(m->data, length);
1676 if(p.isDeleting()) {
1677 delete m;
1678 m = 0;
1680 return m;
1683 friend AmpiMsgPool;
1686 #define AMPI_MSG_POOL_SIZE 32 // Max # of AmpiMsg's allowed in the pool
1687 #define AMPI_POOLED_MSG_SIZE 64 // Max # of Bytes in pooled msgs' payload
1689 class AmpiMsgPool {
1690 private:
1691 std::forward_list<AmpiMsg *> msgs; // list of free msgs
1692 int msgLength; // AmpiMsg::length of messages in the pool
1693 int maxMsgs; // max # of msgs in the pool
1694 int currMsgs; // current # of msgs in the pool
1696 public:
1697 AmpiMsgPool(int _numMsgs = 0, int _msgLength = 0) noexcept
1698 : msgLength(_msgLength), maxMsgs(_numMsgs), currMsgs(0) {}
1699 ~AmpiMsgPool() =default;
1700 inline void clear() noexcept {
1701 while (!msgs.empty()) {
1702 delete msgs.front();
1703 msgs.pop_front();
1705 currMsgs = 0;
1707 inline AmpiMsg* newAmpiMsg(CMK_REFNUM_TYPE seq, int ssendReq, int tag, int srcRank, int len) noexcept {
1708 if (msgs.empty() || msgs.front()->origLength < len) {
1709 int newlen = std::max(msgLength, len);
1710 AmpiMsg* msg = new (newlen, 0) AmpiMsg(seq, ssendReq, tag, srcRank, newlen);
1711 msg->setLength(len);
1712 return msg;
1713 } else {
1714 AmpiMsg* msg = msgs.front();
1715 msgs.pop_front();
1716 currMsgs--;
1717 msg->setSeq(seq);
1718 msg->setSsendReq(ssendReq);
1719 msg->setTag(tag);
1720 msg->setSrcRank(srcRank);
1721 msg->setLength(len);
1722 return msg;
1725 inline void deleteAmpiMsg(AmpiMsg* msg) noexcept {
1726 /* msg->origLength is the true size of the message's data buffer, while
1727 * msg->length is the space taken by the payload within it. */
1728 if (currMsgs != maxMsgs && msg->origLength >= msgLength && msg->origLength < 2*msgLength) {
1729 msgs.push_front(msg);
1730 currMsgs++;
1731 } else {
1732 delete msg;
1735 void pup(PUP::er& p) {
1736 p|msgLength;
1737 p|maxMsgs;
1738 // Don't PUP the msgs in the free list or currMsgs, let the pool fill lazily
1742 // Number of requests in the pool
1743 #ifndef AMPI_REQ_POOL_SIZE
1744 #define AMPI_REQ_POOL_SIZE 64
1745 #endif
1747 // Helper macro for pool size and alignment calculations
1748 #define DefinePooledReqX(name, func) \
1749 static const size_t ireq##name = func(IReq); \
1750 static const size_t sreq##name = func(SendReq); \
1751 static const size_t ssreq##name = func(SsendReq); \
1752 static const size_t pooledReq##name = (ireq##name >= sreq##name && ireq##name >= ssreq##name) ? ireq##name : \
1753 (sreq##name >= ireq##name && sreq##name >= ssreq##name) ? sreq##name : \
1754 (ssreq##name);
1756 // This defines 'static const size_t pooledReqSize = ... ;'
1757 DefinePooledReqX(Size, sizeof)
1759 // This defines 'static const size_t pooledReqAlign = ... ;'
1760 DefinePooledReqX(Align, alignof)
1762 // Pool of IReq, SendReq, and SsendReq objects:
1763 // These are different sizes, but we use a single pool for them so
1764 // that iteration over these objects is fast, as in AMPI_Waitall.
1765 // We also try to always allocate new requests from the start to the end
1766 // of the pool, so that forward iteration over requests is fast.
1767 class AmpiRequestPool {
1768 private:
1769 std::bitset<AMPI_REQ_POOL_SIZE> validReqs; // reqs in the pool are either valid (being used by a real req) or invalid
1770 int startIdx = 0; // start next search from this index
1771 alignas(pooledReqAlign) std::array<char, AMPI_REQ_POOL_SIZE*pooledReqSize> reqs; // pool of memory for requests
1773 public:
1774 AmpiRequestPool() =default;
1775 ~AmpiRequestPool() =default;
1776 template <typename T, typename... Args>
1777 inline T* newReq(Args&&... args) noexcept {
1778 if (validReqs.all()) {
1779 return new T(std::forward<Args>(args)...);
1780 } else {
1781 for (int i=startIdx; i<validReqs.size(); i++) {
1782 if (!validReqs[i]) {
1783 validReqs[i] = 1;
1784 startIdx = i+1;
1785 T* req = new (&reqs[i*pooledReqSize]) T(std::forward<Args>(args)...);
1786 return req;
1789 CkAbort("AMPI> failed to find a free request in pool!");
1790 return NULL;
1793 inline void deleteReq(AmpiRequest* req) noexcept {
1794 if (req->isPooledType() &&
1795 ((char*)req >= &reqs.front() && (char*)req <= &reqs.back()))
1797 int idx = (int)((intptr_t)req - (intptr_t)&reqs[0]) / pooledReqSize;
1798 validReqs[idx] = 0;
1799 startIdx = std::min(idx, startIdx);
1800 } else {
1801 delete req;
1804 void pup(PUP::er& p) noexcept {
1805 // Nothing to do here, because AmpiRequestList::pup will be the
1806 // one to actually PUP the AmpiRequest objects to/from the pool
1811 Our local representation of another AMPI
1812 array element. Used to keep track of incoming
1813 and outgoing message sequence numbers, and
1814 the out-of-order message list.
1816 class AmpiOtherElement {
1817 private:
1818 /// Next incoming and outgoing message sequence number
1819 CMK_REFNUM_TYPE seqIncoming, seqOutgoing;
1821 /// Number of messages in out-of-order queue (normally 0)
1822 uint16_t numOutOfOrder;
1824 public:
1825 /// seqIncoming starts from 1, b/c 0 means unsequenced
1826 /// seqOutgoing starts from 0, b/c this will be incremented for the first real seq #
1827 AmpiOtherElement() noexcept : seqIncoming(1), seqOutgoing(0), numOutOfOrder(0) {}
1829 /// Handle wrap around of unsigned type CMK_REFNUM_TYPE
1830 inline void incSeqIncoming() noexcept { seqIncoming++; if (seqIncoming==0) seqIncoming=1; }
1831 inline CMK_REFNUM_TYPE getSeqIncoming() const noexcept { return seqIncoming; }
1833 inline void incSeqOutgoing() noexcept { seqOutgoing++; if (seqOutgoing==0) seqOutgoing=1; }
1834 inline CMK_REFNUM_TYPE getSeqOutgoing() const noexcept { return seqOutgoing; }
1836 inline void incNumOutOfOrder() noexcept { numOutOfOrder++; }
1837 inline void decNumOutOfOrder() noexcept { numOutOfOrder--; }
1838 inline uint16_t getNumOutOfOrder() const noexcept { return numOutOfOrder; }
1840 PUPbytes(AmpiOtherElement)
1842 class AmpiSeqQ : private CkNoncopyable {
1843 CkMsgQ<AmpiMsg> out; // all out of order messages
1844 std::unordered_map<int, AmpiOtherElement> elements; // element info: indexed by seqIdx (comm rank)
1846 public:
1847 AmpiSeqQ() =default;
1848 AmpiSeqQ(int commSize) noexcept {
1849 elements.reserve(std::min(commSize, 64));
1851 ~AmpiSeqQ() =default;
1852 void pup(PUP::er &p) noexcept;
1854 /// Insert this message in the table. Returns the number
1855 /// of messages now available for the element.
1856 /// If 0, the message was out-of-order and is buffered.
1857 /// If 1, this message can be immediately processed.
1858 /// If >1, this message can be immediately processed,
1859 /// and you should call "getOutOfOrder" repeatedly.
1860 inline int put(int seqIdx, AmpiMsg *msg) noexcept {
1861 AmpiOtherElement &el = elements[seqIdx];
1862 if (msg->getSeq() == el.getSeqIncoming()) { // In order:
1863 el.incSeqIncoming();
1864 return 1+el.getNumOutOfOrder();
1866 else { // Out of order: stash message
1867 putOutOfOrder(seqIdx, msg);
1868 return 0;
1872 /// Is this message in order (return >0) or not (return 0)?
1873 /// Same as put() except we don't call putOutOfOrder() here,
1874 /// so the caller should do that separately
1875 inline int isInOrder(int srcRank, CMK_REFNUM_TYPE seq) noexcept {
1876 AmpiOtherElement &el = elements[srcRank];
1877 if (seq == el.getSeqIncoming()) { // In order:
1878 el.incSeqIncoming();
1879 return 1+el.getNumOutOfOrder();
1881 else { // Out of order: caller should stash message
1882 return 0;
1886 /// Get an out-of-order message from the table.
1887 /// (in-order messages never go into the table)
1888 AmpiMsg *getOutOfOrder(int seqIdx) noexcept;
1890 /// Stash an out-of-order message
1891 void putOutOfOrder(int seqIdx, AmpiMsg *msg) noexcept;
1893 /// Increment the outgoing sequence number.
1894 inline void incCollSeqOutgoing() noexcept {
1895 elements[COLL_SEQ_IDX].incSeqOutgoing();
1898 /// Return the next outgoing sequence number, and increment it.
1899 inline CMK_REFNUM_TYPE nextOutgoing(int destRank) noexcept {
1900 AmpiOtherElement &el = elements[destRank];
1901 el.incSeqOutgoing();
1902 return el.getSeqOutgoing();
1905 PUPmarshall(AmpiSeqQ)
1908 inline CProxy_ampi ampiCommStruct::getProxy() const noexcept {return ampiID;}
1909 const ampiCommStruct &universeComm2CommStruct(MPI_Comm universeNo) noexcept;
1911 // Max value of a predefined MPI_Op (values defined in ampi.h)
1912 #define AMPI_MAX_PREDEFINED_OP 13
1915 An ampiParent holds all the communicators and the TCharm thread
1916 for its children, which are bound to it.
1918 class ampiParent final : public CBase_ampiParent {
1919 private:
1920 TCharm *thread;
1921 CProxy_TCharm threads;
1923 public: // Communication state:
1924 int numBlockedReqs; // number of requests currently blocked on
1925 bool resumeOnRecv, resumeOnColl;
1926 AmpiRequestList ampiReqs;
1927 AmpiRequestPool reqPool;
1928 AmpiRequest *blockingReq;
1929 CkDDT myDDT;
1931 private:
1932 MPI_Comm worldNo; //My MPI_COMM_WORLD
1933 ampi *worldPtr; //AMPI element corresponding to MPI_COMM_WORLD
1935 CkPupPtrVec<ampiCommStruct> splitComm; //Communicators from MPI_Comm_split
1936 CkPupPtrVec<ampiCommStruct> groupComm; //Communicators from MPI_Comm_group
1937 CkPupPtrVec<ampiCommStruct> cartComm; //Communicators from MPI_Cart_create
1938 CkPupPtrVec<ampiCommStruct> graphComm; //Communicators from MPI_Graph_create
1939 CkPupPtrVec<ampiCommStruct> distGraphComm; //Communicators from MPI_Dist_graph_create
1940 CkPupPtrVec<ampiCommStruct> interComm; //Communicators from MPI_Intercomm_create
1941 CkPupPtrVec<ampiCommStruct> intraComm; //Communicators from MPI_Intercomm_merge
1943 CkPupPtrVec<groupStruct> groups; // "Wild" groups that don't have a communicator
1944 CkPupPtrVec<WinStruct> winStructList; //List of windows for one-sided communication
1945 CkPupPtrVec<InfoStruct> infos; // list of all MPI_Infos
1946 const std::array<MPI_User_function*, AMPI_MAX_PREDEFINED_OP+1>& predefinedOps; // owned by ampiNodeMgr
1947 vector<OpStruct> userOps; // list of any user-defined MPI_Ops
1948 vector<AmpiMsg *> matchedMsgs; // for use with MPI_Mprobe and MPI_Mrecv
1950 /* MPI_*_get_attr C binding returns a *pointer* to an integer,
1951 * so there needs to be some storage somewhere to point to.
1952 * All builtin keyvals are ints, except for MPI_WIN_BASE, which
1953 * is a pointer, and MPI_WIN_SIZE, which is an MPI_Aint. */
1954 int* kv_builtin_storage;
1955 MPI_Aint* win_size_storage;
1956 void** win_base_storage;
1957 CkPupPtrVec<KeyvalNode> kvlist;
1958 void* bsendBuffer; // NOTE: we don't actually use this for buffering of MPI_Bsend's,
1959 int bsendBufferSize; // we only keep track of it to return it from MPI_Buffer_detach
1961 // Intercommunicator creation:
1962 bool isTmpRProxySet;
1963 CProxy_ampi tmpRProxy;
1965 MPI_MigrateFn userAboutToMigrateFn, userJustMigratedFn;
1967 public:
1968 bool ampiInitCallDone;
1970 private:
1971 bool kv_set_builtin(int keyval, void* attribute_val) noexcept;
1972 bool kv_get_builtin(int keyval) noexcept;
1974 public:
1975 void prepareCtv() noexcept;
1977 MPI_Message putMatchedMsg(AmpiMsg* msg) noexcept {
1978 // Search thru matchedMsgs for any NULL ones first:
1979 for (int i=0; i<matchedMsgs.size(); i++) {
1980 if (matchedMsgs[i] == NULL) {
1981 matchedMsgs[i] = msg;
1982 return i;
1985 // No NULL entries, so create a new one:
1986 matchedMsgs.push_back(msg);
1987 return matchedMsgs.size() - 1;
1989 AmpiMsg* getMatchedMsg(MPI_Message message) noexcept {
1990 if (message == MPI_MESSAGE_NO_PROC || message == MPI_MESSAGE_NULL) {
1991 return NULL;
1993 CkAssert(message >= 0 && message < matchedMsgs.size());
1994 AmpiMsg* msg = matchedMsgs[message];
1995 // Mark this matchedMsg index NULL and free from back of vector:
1996 matchedMsgs[message] = NULL;
1997 while (matchedMsgs.back() == NULL) {
1998 matchedMsgs.pop_back();
2000 return msg;
2003 inline void attachBuffer(void *buffer, int size) noexcept {
2004 bsendBuffer = buffer;
2005 bsendBufferSize = size;
2007 inline void detachBuffer(void *buffer, int *size) noexcept {
2008 *(void **)buffer = bsendBuffer;
2009 *size = bsendBufferSize;
2011 inline bool isSplit(MPI_Comm comm) const noexcept {
2012 return (comm>=MPI_COMM_FIRST_SPLIT && comm<MPI_COMM_FIRST_GROUP);
2014 const ampiCommStruct &getSplit(MPI_Comm comm) const noexcept {
2015 int idx=comm-MPI_COMM_FIRST_SPLIT;
2016 if (idx>=splitComm.size()) CkAbort("Bad split communicator used");
2017 return *splitComm[idx];
2019 void splitChildRegister(const ampiCommStruct &s) noexcept;
2021 inline bool isGroup(MPI_Comm comm) const noexcept {
2022 return (comm>=MPI_COMM_FIRST_GROUP && comm<MPI_COMM_FIRST_CART);
2024 const ampiCommStruct &getGroup(MPI_Comm comm) const noexcept {
2025 int idx=comm-MPI_COMM_FIRST_GROUP;
2026 if (idx>=groupComm.size()) CkAbort("Bad group communicator used");
2027 return *groupComm[idx];
2029 void groupChildRegister(const ampiCommStruct &s) noexcept;
2030 inline bool isInGroups(MPI_Group group) const noexcept {
2031 return (group>=0 && group<groups.size());
2034 void cartChildRegister(const ampiCommStruct &s) noexcept;
2035 void graphChildRegister(const ampiCommStruct &s) noexcept;
2036 void distGraphChildRegister(const ampiCommStruct &s) noexcept;
2037 void interChildRegister(const ampiCommStruct &s) noexcept;
2038 void intraChildRegister(const ampiCommStruct &s) noexcept;
2040 public:
2041 ampiParent(MPI_Comm worldNo_,CProxy_TCharm threads_,int nRanks_) noexcept;
2042 ampiParent(CkMigrateMessage *msg) noexcept;
2043 void ckAboutToMigrate() noexcept;
2044 void ckJustMigrated() noexcept;
2045 void ckJustRestored() noexcept;
2046 void setUserAboutToMigrateFn(MPI_MigrateFn f) noexcept;
2047 void setUserJustMigratedFn(MPI_MigrateFn f) noexcept;
2048 ~ampiParent() noexcept;
2050 //Children call this when they are first created, or just migrated
2051 TCharm *registerAmpi(ampi *ptr,ampiCommStruct s,bool forMigration) noexcept;
2053 // exchange proxy info between two ampi proxies
2054 void ExchangeProxy(CProxy_ampi rproxy) noexcept {
2055 if(!isTmpRProxySet){ tmpRProxy=rproxy; isTmpRProxySet=true; }
2056 else{ tmpRProxy.setRemoteProxy(rproxy); rproxy.setRemoteProxy(tmpRProxy); isTmpRProxySet=false; }
2059 //Grab the next available split/group communicator
2060 MPI_Comm getNextSplit() const noexcept {return MPI_COMM_FIRST_SPLIT+splitComm.size();}
2061 MPI_Comm getNextGroup() const noexcept {return MPI_COMM_FIRST_GROUP+groupComm.size();}
2062 MPI_Comm getNextCart() const noexcept {return MPI_COMM_FIRST_CART+cartComm.size();}
2063 MPI_Comm getNextGraph() const noexcept {return MPI_COMM_FIRST_GRAPH+graphComm.size();}
2064 MPI_Comm getNextDistGraph() const noexcept {return MPI_COMM_FIRST_DIST_GRAPH+distGraphComm.size();}
2065 MPI_Comm getNextInter() const noexcept {return MPI_COMM_FIRST_INTER+interComm.size();}
2066 MPI_Comm getNextIntra() const noexcept {return MPI_COMM_FIRST_INTRA+intraComm.size();}
2068 inline bool isCart(MPI_Comm comm) const noexcept {
2069 return (comm>=MPI_COMM_FIRST_CART && comm<MPI_COMM_FIRST_GRAPH);
2071 ampiCommStruct &getCart(MPI_Comm comm) const noexcept {
2072 int idx=comm-MPI_COMM_FIRST_CART;
2073 if (idx>=cartComm.size()) CkAbort("AMPI> Bad cartesian communicator used!\n");
2074 return *cartComm[idx];
2076 inline bool isGraph(MPI_Comm comm) const noexcept {
2077 return (comm>=MPI_COMM_FIRST_GRAPH && comm<MPI_COMM_FIRST_DIST_GRAPH);
2079 ampiCommStruct &getGraph(MPI_Comm comm) const noexcept {
2080 int idx=comm-MPI_COMM_FIRST_GRAPH;
2081 if (idx>=graphComm.size()) CkAbort("AMPI> Bad graph communicator used!\n");
2082 return *graphComm[idx];
2084 inline bool isDistGraph(MPI_Comm comm) const noexcept {
2085 return (comm >= MPI_COMM_FIRST_DIST_GRAPH && comm < MPI_COMM_FIRST_INTER);
2087 ampiCommStruct &getDistGraph(MPI_Comm comm) const noexcept {
2088 int idx = comm-MPI_COMM_FIRST_DIST_GRAPH;
2089 if (idx>=distGraphComm.size()) CkAbort("Bad distributed graph communicator used");
2090 return *distGraphComm[idx];
2092 inline bool isInter(MPI_Comm comm) const noexcept {
2093 return (comm>=MPI_COMM_FIRST_INTER && comm<MPI_COMM_FIRST_INTRA);
2095 const ampiCommStruct &getInter(MPI_Comm comm) const noexcept {
2096 int idx=comm-MPI_COMM_FIRST_INTER;
2097 if (idx>=interComm.size()) CkAbort("AMPI> Bad inter-communicator used!\n");
2098 return *interComm[idx];
2100 inline bool isIntra(MPI_Comm comm) const noexcept {
2101 return (comm>=MPI_COMM_FIRST_INTRA && comm<MPI_COMM_FIRST_RESVD);
2103 const ampiCommStruct &getIntra(MPI_Comm comm) const noexcept {
2104 int idx=comm-MPI_COMM_FIRST_INTRA;
2105 if (idx>=intraComm.size()) CkAbort("Bad intra-communicator used");
2106 return *intraComm[idx];
2109 void pup(PUP::er &p) noexcept;
2111 void startCheckpoint(const char* dname) noexcept;
2112 void Checkpoint(int len, const char* dname) noexcept;
2113 void ResumeThread() noexcept;
2114 TCharm* getTCharmThread() const noexcept {return thread;}
2115 inline ampiParent* blockOnRecv() noexcept;
2116 inline CkDDT* getDDT() noexcept { return &myDDT; }
2118 #if CMK_LBDB_ON
2119 void setMigratable(bool mig) noexcept {
2120 thread->setMigratable(mig);
2122 #endif
2124 const ampiCommStruct &getWorldStruct() const noexcept;
2126 inline const ampiCommStruct &comm2CommStruct(MPI_Comm comm) const noexcept {
2127 if (comm==MPI_COMM_WORLD) return getWorldStruct();
2128 if (comm==worldNo) return getWorldStruct();
2129 if (isSplit(comm)) return getSplit(comm);
2130 if (isGroup(comm)) return getGroup(comm);
2131 if (isCart(comm)) return getCart(comm);
2132 if (isGraph(comm)) return getGraph(comm);
2133 if (isDistGraph(comm)) return getDistGraph(comm);
2134 if (isInter(comm)) return getInter(comm);
2135 if (isIntra(comm)) return getIntra(comm);
2136 return universeComm2CommStruct(comm);
2139 inline vector<int>& getKeyvals(MPI_Comm comm) noexcept {
2140 ampiCommStruct &cs = *(ampiCommStruct *)&comm2CommStruct(comm);
2141 return cs.getKeyvals();
2144 inline ampi *comm2ampi(MPI_Comm comm) const noexcept {
2145 if (comm==MPI_COMM_WORLD) return worldPtr;
2146 if (comm==worldNo) return worldPtr;
2147 if (isSplit(comm)) {
2148 const ampiCommStruct &st=getSplit(comm);
2149 return st.getProxy()[thisIndex].ckLocal();
2151 if (isGroup(comm)) {
2152 const ampiCommStruct &st=getGroup(comm);
2153 return st.getProxy()[thisIndex].ckLocal();
2155 if (isCart(comm)) {
2156 const ampiCommStruct &st = getCart(comm);
2157 return st.getProxy()[thisIndex].ckLocal();
2159 if (isGraph(comm)) {
2160 const ampiCommStruct &st = getGraph(comm);
2161 return st.getProxy()[thisIndex].ckLocal();
2163 if (isDistGraph(comm)) {
2164 const ampiCommStruct &st = getDistGraph(comm);
2165 return st.getProxy()[thisIndex].ckLocal();
2167 if (isInter(comm)) {
2168 const ampiCommStruct &st=getInter(comm);
2169 return st.getProxy()[thisIndex].ckLocal();
2171 if (isIntra(comm)) {
2172 const ampiCommStruct &st=getIntra(comm);
2173 return st.getProxy()[thisIndex].ckLocal();
2175 if (comm>MPI_COMM_WORLD) return worldPtr; //Use MPI_WORLD ampi for cross-world messages:
2176 CkAbort("Invalid communicator used!");
2177 return NULL;
2180 inline bool hasComm(const MPI_Group group) const noexcept {
2181 MPI_Comm comm = (MPI_Comm)group;
2182 return ( comm==MPI_COMM_WORLD || comm==worldNo || isSplit(comm) || isGroup(comm) ||
2183 isCart(comm) || isGraph(comm) || isDistGraph(comm) || isIntra(comm) );
2184 //isInter omitted because its comm number != its group number
2186 inline vector<int> group2vec(MPI_Group group) const noexcept {
2187 if (group == MPI_GROUP_NULL || group == MPI_GROUP_EMPTY) {
2188 return vector<int>();
2190 else if (hasComm(group)) {
2191 return comm2CommStruct((MPI_Comm)group).getIndices();
2193 else {
2194 CkAssert(isInGroups(group));
2195 return groups[group]->getRanks();
2198 inline MPI_Group saveGroupStruct(const vector<int>& vec) noexcept {
2199 if (vec.empty()) return MPI_GROUP_EMPTY;
2200 int idx = groups.size();
2201 groups.resize(idx+1);
2202 groups[idx]=new groupStruct(vec);
2203 return (MPI_Group)idx;
2205 inline int getRank(const MPI_Group group) const noexcept {
2206 vector<int> vec = group2vec(group);
2207 return getPosOp(thisIndex,vec);
2209 inline AmpiRequestList &getReqs() noexcept { return ampiReqs; }
2210 inline int getMyPe() const noexcept {
2211 return CkMyPe();
2213 inline bool hasWorld() const noexcept {
2214 return worldPtr!=NULL;
2217 inline void checkComm(MPI_Comm comm) const noexcept {
2218 if ((comm != MPI_COMM_SELF && comm != MPI_COMM_WORLD)
2219 || (isSplit(comm) && comm-MPI_COMM_FIRST_SPLIT >= splitComm.size())
2220 || (isGroup(comm) && comm-MPI_COMM_FIRST_GROUP >= groupComm.size())
2221 || (isCart(comm) && comm-MPI_COMM_FIRST_CART >= cartComm.size())
2222 || (isGraph(comm) && comm-MPI_COMM_FIRST_GRAPH >= graphComm.size())
2223 || (isDistGraph(comm) && comm-MPI_COMM_FIRST_DIST_GRAPH >= distGraphComm.size())
2224 || (isInter(comm) && comm-MPI_COMM_FIRST_INTER >= interComm.size())
2225 || (isIntra(comm) && comm-MPI_COMM_FIRST_INTRA >= intraComm.size()) )
2226 CkAbort("Invalid MPI_Comm\n");
2229 /// if intra-communicator, return comm, otherwise return null group
2230 inline MPI_Group comm2group(const MPI_Comm comm) const noexcept {
2231 if(isInter(comm)) return MPI_GROUP_NULL; // we don't support inter-communicator in such functions
2232 ampiCommStruct s = comm2CommStruct(comm);
2233 if(comm!=MPI_COMM_WORLD && comm!=s.getComm()) CkAbort("Error in ampiParent::comm2group()");
2234 return (MPI_Group)(s.getComm());
2237 inline int getRemoteSize(const MPI_Comm comm) const noexcept {
2238 if(isInter(comm)) return getInter(comm).getRemoteIndices().size();
2239 else return -1;
2241 inline MPI_Group getRemoteGroup(const MPI_Comm comm) noexcept {
2242 if(isInter(comm)) return saveGroupStruct(getInter(comm).getRemoteIndices());
2243 else return MPI_GROUP_NULL;
2246 int createKeyval(MPI_Copy_function *copy_fn, MPI_Delete_function *delete_fn,
2247 int *keyval, void* extra_state) noexcept;
2248 bool getBuiltinKeyval(int keyval, void *attribute_val) noexcept;
2249 int setUserKeyval(MPI_Comm comm, int keyval, void *attribute_val) noexcept;
2250 bool getUserKeyval(MPI_Comm comm, vector<int>& keyvals, int keyval, void *attribute_val, int *flag) noexcept;
2251 int dupUserKeyvals(MPI_Comm old_comm, MPI_Comm new_comm) noexcept;
2252 int freeUserKeyval(int context, vector<int>& keyvals, int *keyval) noexcept;
2253 int freeUserKeyvals(int context, vector<int>& keyvals) noexcept;
2255 int setAttr(MPI_Comm comm, vector<int>& keyvals, int keyval, void *attribute_val) noexcept;
2256 int getAttr(MPI_Comm comm, vector<int>& keyvals, int keyval, void *attribute_val, int *flag) noexcept;
2257 int deleteAttr(MPI_Comm comm, vector<int>& keyvals, int keyval) noexcept;
2259 int addWinStruct(WinStruct *win) noexcept;
2260 WinStruct *getWinStruct(MPI_Win win) const noexcept;
2261 void removeWinStruct(WinStruct *win) noexcept;
2263 int createInfo(MPI_Info *newinfo) noexcept;
2264 int dupInfo(MPI_Info info, MPI_Info *newinfo) noexcept;
2265 int setInfo(MPI_Info info, const char *key, const char *value) noexcept;
2266 int deleteInfo(MPI_Info info, const char *key) noexcept;
2267 int getInfo(MPI_Info info, const char *key, int valuelen, char *value, int *flag) const noexcept;
2268 int getInfoValuelen(MPI_Info info, const char *key, int *valuelen, int *flag) const noexcept;
2269 int getInfoNkeys(MPI_Info info, int *nkeys) const noexcept;
2270 int getInfoNthkey(MPI_Info info, int n, char *key) const noexcept;
2271 int freeInfo(MPI_Info info) noexcept;
2272 void defineInfoEnv(int nRanks_) noexcept;
2273 void defineInfoMigration() noexcept;
2275 // An 'MPI_Op' is an integer that indexes into either:
2276 // A) an array of predefined ops owned by ampiNodeMgr, or
2277 // B) a vector of user-defined ops owned by ampiParent
2278 // The MPI_Op is compared to AMPI_MAX_PREDEFINED_OP to disambiguate.
2279 inline int createOp(MPI_User_function *fn, bool isCommutative) noexcept {
2280 // Search thru non-predefined op's for any invalidated ones:
2281 for (int i=0; i<userOps.size(); i++) {
2282 if (userOps[i].isFree()) {
2283 userOps[i].init(fn, isCommutative);
2284 return AMPI_MAX_PREDEFINED_OP + 1 + i;
2287 // No invalid entries, so create a new one:
2288 userOps.emplace_back(fn, isCommutative);
2289 return AMPI_MAX_PREDEFINED_OP + userOps.size();
2291 inline void freeOp(MPI_Op op) noexcept {
2292 // Don't free predefined op's:
2293 if (!opIsPredefined(op)) {
2294 // Invalidate op, then free all invalid op's from the back of the userOp's vector
2295 int opIdx = op - 1 - AMPI_MAX_PREDEFINED_OP;
2296 CkAssert(opIdx < userOps.size());
2297 userOps[opIdx].free();
2298 while (!userOps.empty() && userOps.back().isFree()) {
2299 userOps.pop_back();
2303 inline bool opIsPredefined(MPI_Op op) const noexcept {
2304 return (op <= AMPI_MAX_PREDEFINED_OP);
2306 inline bool opIsCommutative(MPI_Op op) const noexcept {
2307 if (opIsPredefined(op)) {
2308 return true; // all predefined ops are commutative
2310 else {
2311 int opIdx = op - 1 - AMPI_MAX_PREDEFINED_OP;
2312 CkAssert(opIdx < userOps.size());
2313 return userOps[opIdx].isCommutative;
2316 inline MPI_User_function* op2User_function(MPI_Op op) const noexcept {
2317 if (opIsPredefined(op)) {
2318 return predefinedOps[op];
2320 else {
2321 int opIdx = op - 1 - AMPI_MAX_PREDEFINED_OP;
2322 CkAssert(opIdx < userOps.size());
2323 return userOps[opIdx].func;
2326 inline AmpiOpHeader op2AmpiOpHeader(MPI_Op op, MPI_Datatype type, int count) const noexcept {
2327 if (opIsPredefined(op)) {
2328 int size = myDDT.getType(type)->getSize(count);
2329 return AmpiOpHeader(predefinedOps[op], type, count, size);
2331 else {
2332 int opIdx = op - 1 - AMPI_MAX_PREDEFINED_OP;
2333 CkAssert(opIdx < userOps.size());
2334 int size = myDDT.getType(type)->getSize(count);
2335 return AmpiOpHeader(userOps[opIdx].func, type, count, size);
2338 inline void applyOp(MPI_Datatype datatype, MPI_Op op, int count, const void* invec, void* inoutvec) const noexcept {
2339 // inoutvec[i] = invec[i] op inoutvec[i]
2340 MPI_User_function *func = op2User_function(op);
2341 (func)((void*)invec, inoutvec, &count, &datatype);
2344 void init() noexcept;
2345 void finalize() noexcept;
2346 void block() noexcept;
2347 void yield() noexcept;
2349 #if AMPI_PRINT_MSG_SIZES
2350 // Map of AMPI routine names to message sizes and number of messages:
2351 // ["AMPI_Routine"][ [msg_size][num_msgs] ]
2352 std::unordered_map<std::string, std::map<int, int> > msgSizes;
2353 inline bool isRankRecordingMsgSizes() noexcept;
2354 inline void recordMsgSize(const char* func, int msgSize) noexcept;
2355 void printMsgSizes() noexcept;
2356 #endif
2358 #if AMPIMSGLOG
2359 /* message logging */
2360 int pupBytes;
2361 #if CMK_USE_ZLIB && 0
2362 gzFile fMsgLog;
2363 PUP::tozDisk *toPUPer;
2364 PUP::fromzDisk *fromPUPer;
2365 #else
2366 FILE* fMsgLog;
2367 PUP::toDisk *toPUPer;
2368 PUP::fromDisk *fromPUPer;
2369 #endif
2370 #endif
2373 // Store a generalized request class created by MPIX_Grequest_class_create
2374 class greq_class_desc {
2375 public:
2376 MPI_Grequest_query_function *query_fn;
2377 MPI_Grequest_free_function *free_fn;
2378 MPI_Grequest_cancel_function *cancel_fn;
2379 MPIX_Grequest_poll_function *poll_fn;
2380 MPIX_Grequest_wait_function *wait_fn;
2382 void pup(PUP::er &p) noexcept {
2383 p((char *)query_fn, sizeof(void *));
2384 p((char *)free_fn, sizeof(void *));
2385 p((char *)cancel_fn, sizeof(void *));
2386 p((char *)poll_fn, sizeof(void *));
2387 p((char *)wait_fn, sizeof(void *));
2392 An ampi manages the communication of one thread over
2393 one MPI communicator.
2395 class ampi final : public CBase_ampi {
2396 private:
2397 friend class IReq; // for checking resumeOnRecv
2398 friend class SendReq;
2399 friend class SsendReq;
2400 friend class RednReq;
2401 friend class GatherReq;
2402 friend class GathervReq;
2404 ampiParent *parent;
2405 CProxy_ampiParent parentProxy;
2406 TCharm *thread;
2407 int myRank;
2408 AmpiSeqQ oorder;
2410 public:
2412 * AMPI Message Matching (Amm) queues are indexed by the tag and sender.
2413 * Since ampi objects are per-communicator, there are separate Amm's per communicator.
2415 Amm<AmpiRequest *, AMPI_AMM_PT2PT_POOL_SIZE> postedReqs;
2416 Amm<AmpiMsg *, AMPI_AMM_PT2PT_POOL_SIZE> unexpectedMsgs;
2418 // Bcast requests / msgs must be kept separate from pt2pt,
2419 // so we don't match them to wildcard recv's
2420 Amm<AmpiRequest *, AMPI_AMM_COLL_POOL_SIZE> postedBcastReqs;
2421 Amm<AmpiMsg *, AMPI_AMM_COLL_POOL_SIZE> unexpectedBcastMsgs;
2423 // Store generalized request classes created by MPIX_Grequest_class_create
2424 vector<greq_class_desc> greq_classes;
2426 private:
2427 ampiCommStruct myComm;
2428 vector<int> tmpVec; // stores temp group info
2429 CProxy_ampi remoteProxy; // valid only for intercommunicator
2430 CkPupPtrVec<win_obj> winObjects;
2432 private:
2433 void inorder(AmpiMsg *msg) noexcept;
2434 void inorderBcast(AmpiMsg *msg, bool deleteMsg) noexcept;
2435 void inorderRdma(char* buf, int size, CMK_REFNUM_TYPE seq, int tag, int srcRank,
2436 MPI_Comm comm, int ssendReq) noexcept;
2438 void init() noexcept;
2439 void findParent(bool forMigration) noexcept;
2441 public: // entry methods
2442 ampi() noexcept;
2443 ampi(CkArrayID parent_,const ampiCommStruct &s) noexcept;
2444 ampi(CkMigrateMessage *msg) noexcept;
2445 void ckJustMigrated() noexcept;
2446 void ckJustRestored() noexcept;
2447 ~ampi() noexcept;
2449 void pup(PUP::er &p) noexcept;
2451 void allInitDone() noexcept;
2452 void setInitDoneFlag() noexcept;
2454 void unblock() noexcept;
2455 void injectMsg(int size, char* buf) noexcept;
2456 void generic(AmpiMsg *) noexcept;
2457 void genericRdma(char* buf, int size, CMK_REFNUM_TYPE seq, int tag, int srcRank,
2458 MPI_Comm destcomm, int ssendReq) noexcept;
2459 void completedRdmaSend(CkDataMsg *msg) noexcept;
2460 void ssend_ack(int sreq) noexcept;
2461 void barrierResult() noexcept;
2462 void ibarrierResult() noexcept;
2463 void bcastResult(AmpiMsg *msg) noexcept;
2464 void rednResult(CkReductionMsg *msg) noexcept;
2465 void irednResult(CkReductionMsg *msg) noexcept;
2467 void splitPhase1(CkReductionMsg *msg) noexcept;
2468 void splitPhaseInter(CkReductionMsg *msg) noexcept;
2469 void commCreatePhase1(MPI_Comm nextGroupComm) noexcept;
2470 void intercommCreatePhase1(MPI_Comm nextInterComm) noexcept;
2471 void intercommMergePhase1(MPI_Comm nextIntraComm) noexcept;
2473 private: // Used by the above entry methods that create new MPI_Comm objects
2474 CProxy_ampi createNewChildAmpiSync() noexcept;
2475 void insertNewChildAmpiElements(MPI_Comm newComm, CProxy_ampi newAmpi) noexcept;
2477 inline void handleBlockedReq(AmpiRequest* req) noexcept {
2478 if (req->isBlocked() && parent->numBlockedReqs != 0) {
2479 parent->numBlockedReqs--;
2482 inline void resumeThreadIfReady() noexcept {
2483 if (parent->resumeOnRecv && parent->numBlockedReqs == 0) {
2484 thread->resume();
2488 public: // to be used by MPI_* functions
2489 inline const ampiCommStruct &comm2CommStruct(MPI_Comm comm) const noexcept {
2490 return parent->comm2CommStruct(comm);
2492 inline const ampiCommStruct &getCommStruct() const noexcept { return myComm; }
2494 inline ampi* blockOnRecv() noexcept;
2495 inline ampi* blockOnColl() noexcept;
2496 inline void setBlockingReq(AmpiRequest *req) noexcept;
2497 MPI_Request postReq(AmpiRequest* newreq) noexcept;
2499 inline CMK_REFNUM_TYPE getSeqNo(int destRank, MPI_Comm destcomm, int tag) noexcept;
2500 AmpiMsg *makeBcastMsg(const void *buf,int count,MPI_Datatype type,int root,MPI_Comm destcomm) noexcept;
2501 AmpiMsg *makeAmpiMsg(int destRank,int t,int sRank,const void *buf,int count,
2502 MPI_Datatype type,MPI_Comm destcomm, int ssendReq=0) noexcept;
2504 MPI_Request send(int t, int s, const void* buf, int count, MPI_Datatype type, int rank,
2505 MPI_Comm destcomm, int ssendReq=0, AmpiSendType sendType=BLOCKING_SEND) noexcept;
2506 static void sendraw(int t, int s, void* buf, int len, CkArrayID aid, int idx) noexcept;
2507 inline MPI_Request sendLocalMsg(int t, int sRank, const void* buf, int size, MPI_Datatype type, int destRank,
2508 MPI_Comm destcomm, ampi* destPtr, int ssendReq, AmpiSendType sendType) noexcept;
2509 inline MPI_Request sendRdmaMsg(int t, int sRank, const void* buf, int size, MPI_Datatype type, int destIdx,
2510 int destRank, MPI_Comm destcomm, CProxy_ampi arrProxy, int ssendReq) noexcept;
2511 inline bool destLikelyWithinProcess(CProxy_ampi arrProxy, int destIdx) const noexcept {
2512 CkArray* localBranch = arrProxy.ckLocalBranch();
2513 int destPe = localBranch->lastKnown(CkArrayIndex1D(destIdx));
2514 return (CkNodeOf(destPe) == CkMyNode());
2516 MPI_Request delesend(int t, int s, const void* buf, int count, MPI_Datatype type, int rank,
2517 MPI_Comm destcomm, CProxy_ampi arrproxy, int ssend, AmpiSendType sendType) noexcept;
2518 inline void processAmpiMsg(AmpiMsg *msg, void* buf, MPI_Datatype type, int count) noexcept;
2519 inline void processRdmaMsg(const void *sbuf, int slength, int ssendReq, int srank, void* rbuf,
2520 int rcount, MPI_Datatype rtype, MPI_Comm comm) noexcept;
2521 inline void processRednMsg(CkReductionMsg *msg, void* buf, MPI_Datatype type, int count) noexcept;
2522 inline void processNoncommutativeRednMsg(CkReductionMsg *msg, void* buf, MPI_Datatype type, int count,
2523 MPI_User_function* func) noexcept;
2524 inline void processGatherMsg(CkReductionMsg *msg, void* buf, MPI_Datatype type, int recvCount) noexcept;
2525 inline void processGathervMsg(CkReductionMsg *msg, void* buf, MPI_Datatype type,
2526 int* recvCounts, int* displs) noexcept;
2527 inline AmpiMsg * getMessage(int t, int s, MPI_Comm comm, int *sts) const noexcept;
2528 int recv(int t,int s,void* buf,int count,MPI_Datatype type,MPI_Comm comm,MPI_Status *sts=NULL) noexcept;
2529 void irecv(void *buf, int count, MPI_Datatype type, int src,
2530 int tag, MPI_Comm comm, MPI_Request *request) noexcept;
2531 void mrecv(int tag, int src, void* buf, int count, MPI_Datatype datatype, MPI_Comm comm,
2532 MPI_Status* status, MPI_Message* message) noexcept;
2533 void imrecv(void* buf, int count, MPI_Datatype datatype, int src, int tag, MPI_Comm comm,
2534 MPI_Request* request, MPI_Message* message) noexcept;
2535 void irecvBcast(void *buf, int count, MPI_Datatype type, int src,
2536 MPI_Comm comm, MPI_Request *request) noexcept;
2537 void sendrecv(const void *sbuf, int scount, MPI_Datatype stype, int dest, int stag,
2538 void *rbuf, int rcount, MPI_Datatype rtype, int src, int rtag,
2539 MPI_Comm comm, MPI_Status *sts) noexcept;
2540 void sendrecv_replace(void* buf, int count, MPI_Datatype datatype,
2541 int dest, int sendtag, int source, int recvtag,
2542 MPI_Comm comm, MPI_Status *status) noexcept;
2543 void probe(int t,int s,MPI_Comm comm,MPI_Status *sts) noexcept;
2544 void mprobe(int t, int s, MPI_Comm comm, MPI_Status *sts, MPI_Message *message) noexcept;
2545 int iprobe(int t,int s,MPI_Comm comm,MPI_Status *sts) noexcept;
2546 int improbe(int t, int s, MPI_Comm comm, MPI_Status *sts, MPI_Message *message) noexcept;
2547 void barrier() noexcept;
2548 void ibarrier(MPI_Request *request) noexcept;
2549 void bcast(int root, void* buf, int count, MPI_Datatype type, MPI_Comm comm) noexcept;
2550 int intercomm_bcast(int root, void* buf, int count, MPI_Datatype type, MPI_Comm intercomm) noexcept;
2551 void ibcast(int root, void* buf, int count, MPI_Datatype type, MPI_Comm comm, MPI_Request* request) noexcept;
2552 int intercomm_ibcast(int root, void* buf, int count, MPI_Datatype type, MPI_Comm intercomm, MPI_Request *request) noexcept;
2553 static void bcastraw(void* buf, int len, CkArrayID aid) noexcept;
2554 void split(int color,int key,MPI_Comm *dest, int type) noexcept;
2555 void commCreate(const vector<int>& vec,MPI_Comm *newcomm) noexcept;
2556 MPI_Comm cartCreate0D() noexcept;
2557 MPI_Comm cartCreate(vector<int>& vec, int ndims, const int* dims) noexcept;
2558 void graphCreate(const vector<int>& vec, MPI_Comm *newcomm) noexcept;
2559 void distGraphCreate(const vector<int>& vec, MPI_Comm *newcomm) noexcept;
2560 void intercommCreate(const vector<int>& rvec, int root, MPI_Comm tcomm, MPI_Comm *ncomm) noexcept;
2562 inline bool isInter() const noexcept { return myComm.isinter(); }
2563 void intercommMerge(int first, MPI_Comm *ncomm) noexcept;
2565 inline int getWorldRank() const noexcept {return parent->thisIndex;}
2566 /// Return our rank in this communicator
2567 inline int getRank() const noexcept {return myRank;}
2568 inline int getSize() const noexcept {return myComm.getSize();}
2569 inline MPI_Comm getComm() const noexcept {return myComm.getComm();}
2570 inline void setCommName(const char *name) noexcept {myComm.setName(name);}
2571 inline void getCommName(char *name, int *len) const noexcept {myComm.getName(name,len);}
2572 inline vector<int> getIndices() const noexcept { return myComm.getIndices(); }
2573 inline vector<int> getRemoteIndices() const noexcept { return myComm.getRemoteIndices(); }
2574 inline const CProxy_ampi &getProxy() const noexcept {return thisProxy;}
2575 inline const CProxy_ampi &getRemoteProxy() const noexcept {return remoteProxy;}
2576 inline void setRemoteProxy(CProxy_ampi rproxy) noexcept { remoteProxy = rproxy; thread->resume(); }
2577 inline int getIndexForRank(int r) const noexcept {return myComm.getIndexForRank(r);}
2578 inline int getIndexForRemoteRank(int r) const noexcept {return myComm.getIndexForRemoteRank(r);}
2579 void findNeighbors(MPI_Comm comm, int rank, vector<int>& neighbors) const noexcept;
2580 inline const vector<int>& getNeighbors() const noexcept { return myComm.getTopologyforNeighbors()->getnbors(); }
2581 inline bool opIsCommutative(MPI_Op op) const noexcept { return parent->opIsCommutative(op); }
2582 inline MPI_User_function* op2User_function(MPI_Op op) const noexcept { return parent->op2User_function(op); }
2583 void topoDup(int topoType, int rank, MPI_Comm comm, MPI_Comm *newcomm) noexcept;
2585 inline AmpiRequestList& getReqs() noexcept { return parent->ampiReqs; }
2586 CkDDT *getDDT() noexcept {return &parent->myDDT;}
2587 CthThread getThread() const noexcept { return thread->getThread(); }
2589 public:
2590 MPI_Win createWinInstance(void *base, MPI_Aint size, int disp_unit, MPI_Info info) noexcept;
2591 int deleteWinInstance(MPI_Win win) noexcept;
2592 int winGetGroup(WinStruct *win, MPI_Group *group) const noexcept;
2593 int winPut(const void *orgaddr, int orgcnt, MPI_Datatype orgtype, int rank,
2594 MPI_Aint targdisp, int targcnt, MPI_Datatype targtype, WinStruct *win) noexcept;
2595 int winGet(void *orgaddr, int orgcnt, MPI_Datatype orgtype, int rank,
2596 MPI_Aint targdisp, int targcnt, MPI_Datatype targtype, WinStruct *win) noexcept;
2597 int winIget(MPI_Aint orgdisp, int orgcnt, MPI_Datatype orgtype, int rank,
2598 MPI_Aint targdisp, int targcnt, MPI_Datatype targtype, WinStruct *win,
2599 MPI_Request *req) noexcept;
2600 int winIgetWait(MPI_Request *request, MPI_Status *status) noexcept;
2601 int winIgetFree(MPI_Request *request, MPI_Status *status) noexcept;
2602 void winRemotePut(int orgtotalsize, char* orgaddr, int orgcnt, MPI_Datatype orgtype,
2603 MPI_Aint targdisp, int targcnt, MPI_Datatype targtype, int winIndex) noexcept;
2604 char* winLocalGet(int orgcnt, MPI_Datatype orgtype, MPI_Aint targdisp, int targcnt,
2605 MPI_Datatype targtype, int winIndex) noexcept;
2606 AmpiMsg* winRemoteGet(int orgcnt, MPI_Datatype orgtype, MPI_Aint targdisp,
2607 int targcnt, MPI_Datatype targtype, int winIndex) noexcept;
2608 AmpiMsg* winRemoteIget(MPI_Aint orgdisp, int orgcnt, MPI_Datatype orgtype, MPI_Aint targdisp,
2609 int targcnt, MPI_Datatype targtype, int winIndex) noexcept;
2610 int winLock(int lock_type, int rank, WinStruct *win) noexcept;
2611 int winUnlock(int rank, WinStruct *win) noexcept;
2612 void winRemoteLock(int lock_type, int winIndex, int requestRank) noexcept;
2613 void winRemoteUnlock(int winIndex, int requestRank) noexcept;
2614 int winAccumulate(const void *orgaddr, int orgcnt, MPI_Datatype orgtype, int rank,
2615 MPI_Aint targdisp, int targcnt, MPI_Datatype targtype,
2616 MPI_Op op, WinStruct *win) noexcept;
2617 void winRemoteAccumulate(int orgtotalsize, char* orgaddr, int orgcnt, MPI_Datatype orgtype,
2618 MPI_Aint targdisp, int targcnt, MPI_Datatype targtype,
2619 MPI_Op op, int winIndex) noexcept;
2620 int winGetAccumulate(const void *orgaddr, int orgcnt, MPI_Datatype orgtype, void *resaddr,
2621 int rescnt, MPI_Datatype restype, int rank, MPI_Aint targdisp,
2622 int targcnt, MPI_Datatype targtype, MPI_Op op, WinStruct *win) noexcept;
2623 void winLocalGetAccumulate(int orgtotalsize, char* sorgaddr, int orgcnt, MPI_Datatype orgtype,
2624 MPI_Aint targdisp, int targcnt, MPI_Datatype targtype, MPI_Op op,
2625 char *resaddr, int winIndex) noexcept;
2626 AmpiMsg* winRemoteGetAccumulate(int orgtotalsize, char* sorgaddr, int orgcnt, MPI_Datatype orgtype,
2627 MPI_Aint targdisp, int targcnt, MPI_Datatype targtype, MPI_Op op,
2628 int winIndex) noexcept;
2629 int winCompareAndSwap(const void *orgaddr, const void *compaddr, void *resaddr, MPI_Datatype type,
2630 int rank, MPI_Aint targdisp, WinStruct *win) noexcept;
2631 char* winLocalCompareAndSwap(int size, char* sorgaddr, char* compaddr, MPI_Datatype type,
2632 MPI_Aint targdisp, int winIndex) noexcept;
2633 AmpiMsg* winRemoteCompareAndSwap(int size, char *sorgaddr, char *compaddr, MPI_Datatype type,
2634 MPI_Aint targdisp, int winIndex) noexcept;
2635 void winSetName(WinStruct *win, const char *name) noexcept;
2636 void winGetName(WinStruct *win, char *name, int *length) const noexcept;
2637 win_obj* getWinObjInstance(WinStruct *win) const noexcept;
2638 int getNewSemaId() noexcept;
2640 int intercomm_scatter(int root, const void *sendbuf, int sendcount, MPI_Datatype sendtype,
2641 void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm intercomm) noexcept;
2642 int intercomm_iscatter(int root, const void *sendbuf, int sendcount, MPI_Datatype sendtype,
2643 void *recvbuf, int recvcount, MPI_Datatype recvtype,
2644 MPI_Comm intercomm, MPI_Request *request) noexcept;
2645 int intercomm_scatterv(int root, const void* sendbuf, const int* sendcounts, const int* displs,
2646 MPI_Datatype sendtype, void* recvbuf, int recvcount,
2647 MPI_Datatype recvtype, MPI_Comm intercomm) noexcept;
2648 int intercomm_iscatterv(int root, const void* sendbuf, const int* sendcounts, const int* displs,
2649 MPI_Datatype sendtype, void* recvbuf, int recvcount,
2650 MPI_Datatype recvtype, MPI_Comm intercomm, MPI_Request* request) noexcept;
2653 ampiParent *getAmpiParent() noexcept;
2654 bool isAmpiThread() noexcept;
2655 ampi *getAmpiInstance(MPI_Comm comm) noexcept;
2656 void checkComm(MPI_Comm comm) noexcept;
2657 void checkRequest(MPI_Request req) noexcept;
2658 void handle_MPI_BOTTOM(void* &buf, MPI_Datatype type) noexcept;
2659 void handle_MPI_BOTTOM(void* &buf1, MPI_Datatype type1, void* &buf2, MPI_Datatype type2) noexcept;
2661 #if AMPI_ERROR_CHECKING
2662 int ampiErrhandler(const char* func, int errcode) noexcept;
2663 #else
2664 #define ampiErrhandler(func, errcode) (errcode)
2665 #endif
2668 #if CMK_TRACE_ENABLED
2670 // List of AMPI functions to trace:
2671 static const char *funclist[] = {"AMPI_Abort", "AMPI_Add_error_class", "AMPI_Add_error_code", "AMPI_Add_error_string",
2672 "AMPI_Address", "AMPI_Allgather", "AMPI_Allgatherv", "AMPI_Allreduce", "AMPI_Alltoall",
2673 "AMPI_Alltoallv", "AMPI_Alltoallw", "AMPI_Attr_delete", "AMPI_Attr_get",
2674 "AMPI_Attr_put", "AMPI_Barrier", "AMPI_Bcast", "AMPI_Bsend", "AMPI_Cancel",
2675 "AMPI_Cart_coords", "AMPI_Cart_create", "AMPI_Cart_get", "AMPI_Cart_map",
2676 "AMPI_Cart_rank", "AMPI_Cart_shift", "AMPI_Cart_sub", "AMPI_Cartdim_get",
2677 "AMPI_Comm_call_errhandler", "AMPI_Comm_compare", "AMPI_Comm_create", "AMPI_Comm_create_group",
2678 "AMPI_Comm_create_errhandler", "AMPI_Comm_create_keyval", "AMPI_Comm_delete_attr",
2679 "AMPI_Comm_dup", "AMPI_Comm_dup_with_info", "AMPI_Comm_free",
2680 "AMPI_Comm_free_errhandler", "AMPI_Comm_free_keyval", "AMPI_Comm_get_attr",
2681 "AMPI_Comm_get_errhandler", "AMPI_Comm_get_info", "AMPI_Comm_get_name",
2682 "AMPI_Comm_group", "AMPI_Comm_rank", "AMPI_Comm_remote_group", "AMPI_Comm_remote_size",
2683 "AMPI_Comm_set_attr", "AMPI_Comm_set_errhandler", "AMPI_Comm_set_info", "AMPI_Comm_set_name",
2684 "AMPI_Comm_size", "AMPI_Comm_split", "AMPI_Comm_split_type", "AMPI_Comm_test_inter",
2685 "AMPI_Dims_create", "AMPI_Dist_graph_create", "AMPI_Dist_graph_create_adjacent",
2686 "AMPI_Dist_graph_neighbors", "AMPI_Dist_graph_neighbors_count",
2687 "AMPI_Errhandler_create", "AMPI_Errhandler_free", "AMPI_Errhandler_get",
2688 "AMPI_Errhandler_set", "AMPI_Error_class", "AMPI_Error_string", "AMPI_Exscan", "AMPI_Finalize",
2689 "AMPI_Finalized", "AMPI_Gather", "AMPI_Gatherv", "AMPI_Get_address", "AMPI_Get_count",
2690 "AMPI_Get_elements", "AMPI_Get_library_version", "AMPI_Get_processor_name", "AMPI_Get_version",
2691 "AMPI_Graph_create", "AMPI_Graph_get", "AMPI_Graph_map", "AMPI_Graph_neighbors",
2692 "AMPI_Graph_neighbors_count", "AMPI_Graphdims_get", "AMPI_Group_compare", "AMPI_Group_difference",
2693 "AMPI_Group_excl", "AMPI_Group_free", "AMPI_Group_incl", "AMPI_Group_intersection",
2694 "AMPI_Group_range_excl", "AMPI_Group_range_incl", "AMPI_Group_rank", "AMPI_Group_size",
2695 "AMPI_Group_translate_ranks", "AMPI_Group_union", "AMPI_Iallgather", "AMPI_Iallgatherv",
2696 "AMPI_Iallreduce", "AMPI_Ialltoall", "AMPI_Ialltoallv", "AMPI_Ialltoallw", "AMPI_Ibarrier",
2697 "AMPI_Ibcast", "AMPI_Iexscan", "AMPI_Igather", "AMPI_Igatherv", "AMPI_Ineighbor_allgather",
2698 "AMPI_Ineighbor_allgatherv", "AMPI_Ineighbor_alltoall", "AMPI_Ineighbor_alltoallv",
2699 "AMPI_Ineighbor_alltoallw", "AMPI_Init", "AMPI_Init_thread", "AMPI_Initialized", "AMPI_Intercomm_create",
2700 "AMPI_Intercomm_merge", "AMPI_Iprobe", "AMPI_Irecv", "AMPI_Ireduce", "AMPI_Ireduce_scatter",
2701 "AMPI_Ireduce_scatter_block", "AMPI_Is_thread_main", "AMPI_Iscan", "AMPI_Iscatter", "AMPI_Iscatterv",
2702 "AMPI_Isend", "AMPI_Issend", "AMPI_Keyval_create", "AMPI_Keyval_free", "AMPI_Neighbor_allgather",
2703 "AMPI_Neighbor_allgatherv", "AMPI_Neighbor_alltoall", "AMPI_Neighbor_alltoallv", "AMPI_Neighbor_alltoallw",
2704 "AMPI_Op_commutative", "AMPI_Op_create", "AMPI_Op_free", "AMPI_Pack", "AMPI_Pack_size",
2705 "AMPI_Pcontrol", "AMPI_Probe", "AMPI_Query_thread", "AMPI_Recv", "AMPI_Recv_init", "AMPI_Reduce",
2706 "AMPI_Reduce_local", "AMPI_Reduce_scatter", "AMPI_Reduce_scatter_block", "AMPI_Request_free",
2707 "AMPI_Request_get_status", "AMPI_Rsend", "AMPI_Scan", "AMPI_Scatter", "AMPI_Scatterv", "AMPI_Send",
2708 "AMPI_Send_init", "AMPI_Sendrecv", "AMPI_Sendrecv_replace", "AMPI_Ssend", "AMPI_Ssend_init",
2709 "AMPI_Start", "AMPI_Startall", "AMPI_Status_set_cancelled", "AMPI_Status_set_elements", "AMPI_Test",
2710 "AMPI_Test_cancelled", "AMPI_Testall", "AMPI_Testany", "AMPI_Testsome", "AMPI_Topo_test",
2711 "AMPI_Type_commit", "AMPI_Type_contiguous", "AMPI_Type_create_hindexed",
2712 "AMPI_Type_create_hindexed_block", "AMPI_Type_create_hvector", "AMPI_Type_create_indexed_block",
2713 "AMPI_Type_create_keyval", "AMPI_Type_create_resized", "AMPI_Type_create_struct",
2714 "AMPI_Type_delete_attr", "AMPI_Type_dup", "AMPI_Type_extent", "AMPI_Type_free",
2715 "AMPI_Type_free_keyval", "AMPI_Type_get_attr", "AMPI_Type_get_contents", "AMPI_Type_get_envelope",
2716 "AMPI_Type_get_extent", "AMPI_Type_get_name", "AMPI_Type_get_true_extent", "AMPI_Type_hindexed",
2717 "AMPI_Type_hvector", "AMPI_Type_indexed", "AMPI_Type_lb", "AMPI_Type_set_attr",
2718 "AMPI_Type_set_name", "AMPI_Type_size", "AMPI_Type_struct", "AMPI_Type_ub", "AMPI_Type_vector",
2719 "AMPI_Type_create_darray", "AMPI_Type_create_subarray",
2720 "AMPI_Unpack", "AMPI_Wait", "AMPI_Waitall", "AMPI_Waitany", "AMPI_Waitsome", "AMPI_Wtick", "AMPI_Wtime",
2721 "AMPI_Accumulate", "AMPI_Compare_and_swap", "AMPI_Fetch_and_op", "AMPI_Get", "AMPI_Get_accumulate",
2722 "AMPI_Info_create", "AMPI_Info_delete", "AMPI_Info_dup", "AMPI_Info_free", "AMPI_Info_get",
2723 "AMPI_Info_get_nkeys", "AMPI_Info_get_nthkey", "AMPI_Info_get_valuelen",
2724 "AMPI_Info_set", "AMPI_Put", "AMPI_Raccumulate", "AMPI_Rget", "AMPI_Rget_accumulate",
2725 "AMPI_Rput", "AMPI_Win_complete", "AMPI_Win_create", "AMPI_Win_create_errhandler",
2726 "AMPI_Win_create_keyval", "AMPI_Win_delete_attr", "AMPI_Win_fence", "AMPI_Win_free",
2727 "AMPI_Win_free_keyval", "AMPI_Win_get_attr", "AMPI_Win_get_errhandler",
2728 "AMPI_Win_get_group", "AMPI_Win_get_info", "AMPI_Win_get_name", "AMPI_Win_lock",
2729 "AMPI_Win_post", "AMPI_Win_set_attr", "AMPI_Win_set_errhandler", "AMPI_Win_set_info",
2730 "AMPI_Win_set_name", "AMPI_Win_start", "AMPI_Win_test", "AMPI_Win_unlock",
2731 "AMPI_Win_wait", "AMPI_Exit" /*AMPI extensions:*/, "AMPI_Migrate",
2732 "AMPI_Load_start_measure", "AMPI_Load_stop_measure",
2733 "AMPI_Load_set_value", "AMPI_Migrate_to_pe", "AMPI_Set_migratable",
2734 "AMPI_Register_pup", "AMPI_Get_pup_data", "AMPI_Register_main",
2735 "AMPI_Register_about_to_migrate", "AMPI_Register_just_migrated",
2736 "AMPI_Iget", "AMPI_Iget_wait", "AMPI_Iget_free", "AMPI_Iget_data",
2737 "AMPI_Type_is_contiguous", "AMPI_Yield", "AMPI_Suspend",
2738 "AMPI_Resume", "AMPI_Print", "AMPI_Alltoall_medium",
2739 "AMPI_Alltoall_long", "AMPI_System"};
2741 // not traced: AMPI_Trace_begin, AMPI_Trace_end
2743 #endif // CMK_TRACE_ENABLED
2745 //Use this to mark the start of AMPI interface routines that can only be called on AMPI threads:
2746 #if CMK_ERROR_CHECKING
2747 #define AMPI_API(routineName) \
2748 if (!isAmpiThread()) { CkAbort("AMPI> cannot call MPI routines from non-AMPI threads!"); } \
2749 TCHARM_API_TRACE(routineName, "ampi");
2750 #else
2751 #define AMPI_API(routineName) TCHARM_API_TRACE(routineName, "ampi")
2752 #endif
2754 //Use this for MPI_Init and routines than can be called before AMPI threads have been initialized:
2755 #define AMPI_API_INIT(routineName) TCHARM_API_TRACE(routineName, "ampi")
2757 #endif // _AMPIIMPL_H