2 * Charm Onesided API Utility Functions
10 /*********************************** Zerocopy Direct API **********************************/
12 #define CK_BUFFER_REG CMK_BUFFER_REG
13 #define CK_BUFFER_UNREG CMK_BUFFER_UNREG
14 #define CK_BUFFER_PREREG CMK_BUFFER_PREREG
15 #define CK_BUFFER_NOREG CMK_BUFFER_NOREG
17 #define CK_BUFFER_DEREG CMK_BUFFER_DEREG
18 #define CK_BUFFER_NODEREG CMK_BUFFER_NODEREG
20 #ifndef CMK_NOCOPY_DIRECT_BYTES
23 #define CMK_NOCOPY_DIRECT_BYTES 1
24 /* It is required to declare CMK_NOCOPY_DIRECT_BYTES to 1 instead of 0
25 * as this avoids the C2229 error (illegal zero-sized array)
26 * for char layerInfo[CMK_NOCOPY_DIRECT_BYTES] which is seen for
27 * a 0 sized array on VC++
30 #define CMK_NOCOPY_DIRECT_BYTES 0
31 #endif // end of if defined(_WIN32)
33 #endif // end of ifndef CMK_NOCOPY_DIRECT_BYTES
35 #ifndef CMK_COMMON_NOCOPY_DIRECT_BYTES
36 #define CMK_COMMON_NOCOPY_DIRECT_BYTES 0
39 #define CkRdmaAlloc CmiRdmaAlloc
40 #define CkRdmaFree CmiRdmaFree
42 // Represents the mode of the zerocopy transfer
43 // CkNcpyMode::MEMCPY indicates that the PEs are on the logical node and memcpy can be used
44 // CkNcpyMode::CMA indicates that the PEs are on the same physical node and CMA can be used
45 // CkNcpyMode::RDMA indicates that the neither MEMCPY or CMA can be used and REMOTE Direct Memory Access needs to be used
46 enum class CkNcpyMode
: char { MEMCPY
, CMA
, RDMA
};
48 // Represents the completion status of the zerocopy transfer (used as a return value for CkNcpyBuffer::get & CkNcpyBuffer:::put)
49 // CMA and MEMCPY transfers complete instantly and return CkNcpyStatus::complete
50 // RDMA transfers use a remote asynchronous call and hence return CkNcpyStatus::incomplete
51 enum class CkNcpyStatus
: char { incomplete
, complete
};
53 // P2P_SEND mode is used for EM P2P Send API
54 // BCAST_SEND mode is used for EM BCAST Send API
55 // P2P_RECV mode is used for EM P2P Recv API
56 // BCAST_RECV mode is used for EM BCAST Send API
57 enum class ncpyEmApiMode
: char { P2P_SEND
, BCAST_SEND
, P2P_RECV
, BCAST_RECV
};
59 // Struct passed in a ZC Post Entry Method to allow receiver side to post
60 struct CkNcpyBufferPost
{
62 unsigned short int regMode
;
65 unsigned short int deregMode
;
68 // Class to represent an Zerocopy buffer
69 // CkSendBuffer(....) passed by the user internally translates to a CkNcpyBuffer
74 // bool to indicate registration for current values of ptr and cnt on pe
77 // machine specific information about the buffer
79 #pragma GCC diagnostic push
80 #pragma GCC diagnostic ignored "-Wpedantic"
82 char layerInfo
[CMK_COMMON_NOCOPY_DIRECT_BYTES
+ CMK_NOCOPY_DIRECT_BYTES
];
84 #pragma GCC diagnostic pop
88 // pointer to the buffer
94 // callback to be invoked on the sender/receiver
101 unsigned short int regMode
;
104 unsigned short int deregMode
;
109 // bcast ack handling pointer
110 const void *bcastAckInfo
;
112 CkNcpyBuffer() : isRegistered(false), ptr(NULL
), cnt(0), pe(-1), regMode(CK_BUFFER_REG
), deregMode(CK_BUFFER_DEREG
), ref(NULL
), bcastAckInfo(NULL
) {}
114 explicit CkNcpyBuffer(const void *ptr_
, size_t cnt_
, unsigned short int regMode_
=CK_BUFFER_REG
, unsigned short int deregMode_
=CK_BUFFER_DEREG
) {
115 cb
= CkCallback(CkCallback::ignore
);
116 init(ptr_
, cnt_
, regMode_
, deregMode_
);
119 explicit CkNcpyBuffer(const void *ptr_
, size_t cnt_
, CkCallback
&cb_
, unsigned short int regMode_
=CK_BUFFER_REG
, unsigned short int deregMode_
=CK_BUFFER_DEREG
) {
120 init(ptr_
, cnt_
, cb_
, regMode_
, deregMode_
);
124 CkPrintf("[%d][%d][%d] CkNcpyBuffer print: ptr:%p, size:%d, pe:%d, regMode=%d, deregMode=%d, ref:%p, bcastAckInfo:%p\n", CmiMyPe(), CmiMyNode(), CmiMyRank(), ptr
, cnt
, pe
, regMode
, deregMode
, ref
, bcastAckInfo
);
127 void init(const void *ptr_
, size_t cnt_
, CkCallback
&cb_
, unsigned short int regMode_
=CK_BUFFER_REG
, unsigned short int deregMode_
=CK_BUFFER_DEREG
) {
129 init(ptr_
, cnt_
, regMode_
, deregMode_
);
132 void init(const void *ptr_
, size_t cnt_
, unsigned short int regMode_
=CK_BUFFER_REG
, unsigned short int deregMode_
=CK_BUFFER_DEREG
) {
137 deregMode
= deregMode_
;
139 isRegistered
= false;
141 // Register memory everytime new values are initialized
146 void setRef(const void *ref_
) {
150 const void *getRef() {
154 // Register(Pin) the memory for the buffer
157 // Check that this object is local when registerMem is called
158 CkAssert(CkNodeOf(pe
) == CkMyNode());
160 // Set machine layer information when regMode is not CK_BUFFER_NOREG
161 if(regMode
!= CK_BUFFER_NOREG
) {
163 CmiSetRdmaCommonInfo(&layerInfo
[0], ptr
, cnt
);
165 /* Set the pointer layerInfo unconditionally for layers that don't require pinning (MPI, PAMI)
166 * or if regMode is REG, PREREG on layers that require pinning (GNI, Verbs, OFI) */
168 if(regMode
== CK_BUFFER_REG
|| regMode
== CK_BUFFER_PREREG
)
171 CmiSetRdmaBufferInfo(layerInfo
+ CmiGetRdmaCommonInfoSize(), ptr
, cnt
, regMode
);
177 void setMode(unsigned short int regMode_
) { regMode
= regMode_
; }
179 void memcpyGet(CkNcpyBuffer
&source
);
180 void memcpyPut(CkNcpyBuffer
&destination
);
183 void cmaGet(CkNcpyBuffer
&source
);
184 void cmaPut(CkNcpyBuffer
&destination
);
187 void rdmaGet(CkNcpyBuffer
&source
);
188 void rdmaPut(CkNcpyBuffer
&destination
);
190 CkNcpyStatus
get(CkNcpyBuffer
&source
);
191 CkNcpyStatus
put(CkNcpyBuffer
&destination
);
193 // Deregister(Unpin) the memory that is registered for the buffer
194 void deregisterMem() {
195 // Check that this object is local when deregisterMem is called
196 CkAssert(CkNodeOf(pe
) == CkMyNode());
198 if(isRegistered
== false)
202 if(regMode
!= CK_BUFFER_NOREG
) {
203 CmiDeregisterMem(ptr
, layerInfo
+ CmiGetRdmaCommonInfoSize(), pe
, regMode
);
204 isRegistered
= false;
209 void pup(PUP::er
&p
) {
210 p((char *)&ptr
, sizeof(ptr
));
211 p((char *)&ref
, sizeof(ref
));
212 p((char *)&bcastAckInfo
, sizeof(bcastAckInfo
));
219 PUParray(p
, layerInfo
, CMK_COMMON_NOCOPY_DIRECT_BYTES
+ CMK_NOCOPY_DIRECT_BYTES
);
222 friend void CkRdmaDirectAckHandler(void *ack
);
224 friend void CkRdmaEMBcastAckHandler(void *ack
);
226 friend void constructSourceBufferObject(NcpyOperationInfo
*info
, CkNcpyBuffer
&src
);
227 friend void constructDestinationBufferObject(NcpyOperationInfo
*info
, CkNcpyBuffer
&dest
);
229 friend envelope
* CkRdmaIssueRgets(envelope
*env
, ncpyEmApiMode emMode
, void *forwardMsg
);
230 friend void CkRdmaIssueRgets(envelope
*env
, ncpyEmApiMode emMode
, void *forwardMsg
, int numops
, void **arrPtrs
, CkNcpyBufferPost
*postStructs
);
232 friend void readonlyGet(CkNcpyBuffer
&src
, CkNcpyBuffer
&dest
, void *refPtr
);
233 friend void readonlyCreateOnSource(CkNcpyBuffer
&src
);
236 friend void performEmApiNcpyTransfer(CkNcpyBuffer
&source
, CkNcpyBuffer
&dest
, int opIndex
, int child_count
, char *ref
, int extraSize
, CkNcpyMode ncpyMode
, ncpyEmApiMode emMode
);
238 friend void performEmApiRget(CkNcpyBuffer
&source
, CkNcpyBuffer
&dest
, int opIndex
, char *ref
, int extraSize
, ncpyEmApiMode emMode
);
240 friend void performEmApiCmaTransfer(CkNcpyBuffer
&source
, CkNcpyBuffer
&dest
, int child_count
, ncpyEmApiMode emMode
);
242 friend void deregisterMemFromMsg(envelope
*env
, bool isRecv
);
245 // Ack handler for the Zerocopy Direct API
246 // Invoked on the completion of any RDMA operation calling using the Direct API
247 void CkRdmaDirectAckHandler(void *ack
);
249 // Method to invoke a callback on a particular pe with a CkNcpyBuffer being passed
250 // as a part of a CkDataMsg. This method is used to invoke callbacks on specific pes
251 // after the completion of the Zerocopy Direct API operation
252 void invokeCallback(void *cb
, int pe
, CkNcpyBuffer
&buff
);
254 // Returns CkNcpyMode::MEMCPY if both the PEs are the same and memcpy can be used
255 // Returns CkNcpyMode::CMA if both the PEs are in the same physical node and CMA can be used
256 // Returns CkNcpyMode::RDMA if RDMA needs to be used
257 CkNcpyMode
findTransferMode(int srcPe
, int destPe
);
259 void invokeSourceCallback(NcpyOperationInfo
*info
);
261 void invokeDestinationCallback(NcpyOperationInfo
*info
);
263 // Method to enqueue a message after the completion of an payload transfer
264 void enqueueNcpyMessage(int destPe
, void *msg
);
266 /*********************************** Zerocopy Entry Method API ****************************/
267 static inline CkNcpyBuffer
CkSendBuffer(const void *ptr_
, CkCallback
&cb_
, unsigned short int regMode_
=CK_BUFFER_REG
, unsigned short int deregMode_
=CK_BUFFER_DEREG
) {
268 return CkNcpyBuffer(ptr_
, 0, cb_
, regMode_
, deregMode_
);
271 static inline CkNcpyBuffer
CkSendBuffer(const void *ptr_
, unsigned short int regMode_
=CK_BUFFER_REG
, unsigned short int deregMode_
=CK_BUFFER_DEREG
) {
272 return CkNcpyBuffer(ptr_
, 0, regMode_
, deregMode_
);
275 #if CMK_ONESIDED_IMPL
277 // NOTE: Inside CkRdmaIssueRgets, a large message allocation is made consisting of space
278 // for the destination or receiver buffers and some additional information required for processing
279 // and acknowledgment handling. The space for additional information is typically equal to
280 // sizeof(NcpyEmInfo) + numops * sizeof(NcpyEmBufferInfo)
282 // This structure is used to store zerocopy information associated with an entry method
283 // invocation which uses the RDMA mode of transfer in Zerocopy Entry Method API.
284 // A variable of the structure stores the information in order to access it after the
285 // completion of the Rget operation (which is an asynchronous call) in order to invoke
288 int numOps
; // number of zerocopy operations i.e number of buffers sent using CkSendBuffer
289 int counter
; // used for tracking the number of completed RDMA operations
291 ncpyEmApiMode mode
; // used to distinguish between p2p and bcast
292 void *msg
; // pointer to the Charm++ message which will be enqueued after completion of all Rgets
293 void *forwardMsg
; // used for the ncpy broadcast api
297 // This structure is used to store the buffer information specific to each buffer being sent
298 // using the Zerocopy Entry Method API. A variable of the structure stores the information associated
300 struct NcpyEmBufferInfo
{
301 int index
; // Represents the index of the buffer information (from 0,1... numops - 1)
302 NcpyOperationInfo ncpyOpInfo
; // Stores all the information required for the zerocopy operation
307 * Extract ncpy buffer information from the metadata message,
308 * allocate buffers and issue ncpy calls (either memcpy or cma read or rdma get)
310 envelope
* CkRdmaIssueRgets(envelope
*env
, ncpyEmApiMode emMode
, void *forwardMsg
= NULL
);
312 void CkRdmaIssueRgets(envelope
*env
, ncpyEmApiMode emMode
, void *forwardMsg
, int numops
, void **arrPtrs
, CkNcpyBufferPost
*postStructs
);
314 void handleEntryMethodApiCompletion(NcpyOperationInfo
*info
);
316 void handleReverseEntryMethodApiCompletion(NcpyOperationInfo
*info
);
318 // Method called to pack rdma pointers
319 void CkPackRdmaPtrs(char *msgBuf
);
321 // Method called to pack rdma pointers
322 void CkUnpackRdmaPtrs(char *msgBuf
);
324 // Determine the number of ncpy ops and the sum of the ncpy buffer sizes
325 // from the metadata message
326 void getRdmaNumopsAndBufsize(envelope
*env
, int &numops
, int &bufsize
);
328 // Ack handler function for the nocopy EM API
329 void CkRdmaEMAckHandler(int destPe
, void *ack
);
331 void CkRdmaEMBcastPostAckHandler(void *msg
);
333 struct NcpyBcastRecvPeerAckInfo
{
335 std::atomic
<int> numPeers
;
343 int getNumPeers() const {
344 return numPeers
.load(std::memory_order_acquire
);
346 void setNumPeers(int r
) {
347 return numPeers
.store(r
, std::memory_order_release
);
350 return numPeers
.fetch_add(1, std::memory_order_release
);
353 return numPeers
.fetch_sub(1, std::memory_order_release
);
356 int getNumPeers() const { return numPeers
; }
357 void setNumPeers(int r
) { numPeers
= r
; }
358 int incNumPeers() { return numPeers
++; }
359 int decNumPeers() { return numPeers
--; }
366 /***************************** Zerocopy Bcast Entry Method API ****************************/
367 struct NcpyBcastAckInfo
{
375 struct NcpyBcastRootAckInfo
: public NcpyBcastAckInfo
{
379 struct NcpyBcastInterimAckInfo
: public NcpyBcastAckInfo
{
385 void *parentBcastAckInfo
;
390 // Method called on the bcast source to store some information for ack handling
391 void CkRdmaPrepareBcastMsg(envelope
*env
);
393 void CkReplaceSourcePtrsInBcastMsg(envelope
*env
, NcpyBcastInterimAckInfo
*bcastAckInfo
, int origPe
);
395 // Method called to extract the parent bcastAckInfo from the received message for ack handling
396 const void *getParentBcastAckInfo(void *msg
, int &srcPe
);
398 // Allocate a NcpyBcastInterimAckInfo and return the pointer
399 NcpyBcastInterimAckInfo
*allocateInterimNodeAckObj(envelope
*myEnv
, envelope
*myChildEnv
, int pe
);
401 void forwardMessageToChildNodes(envelope
*myChildrenMsg
, UChar msgType
);
403 void forwardMessageToPeerNodes(envelope
*myMsg
, UChar msgType
);
405 void handleBcastEntryMethodApiCompletion(NcpyOperationInfo
*info
);
407 void handleBcastReverseEntryMethodApiCompletion(NcpyOperationInfo
*info
);
409 void deregisterMemFromMsg(envelope
*env
, bool isRecv
);
411 void handleMsgUsingCMAPostCompletionForSendBcast(envelope
*copyenv
, envelope
*env
, CkNcpyBuffer
&source
);
413 void processBcastSendEmApiCompletion(NcpyEmInfo
*ncpyEmInfo
, int destPe
);
415 // Method called on intermediate nodes after RGET to switch old source pointers with my pointers
416 void CkReplaceSourcePtrsInBcastMsg(envelope
*prevEnv
, envelope
*env
, void *bcastAckInfo
, int origPe
);
418 void processBcastRecvEmApiCompletion(NcpyEmInfo
*ncpyEmInfo
, int destPe
);
420 // Method called on the root node and other intermediate parent nodes on completion of RGET through ZC Bcast
421 void CkRdmaEMBcastAckHandler(void *ack
);
423 void handleMsgOnChildPostCompletionForRecvBcast(envelope
*env
);
425 void handleMsgOnInterimPostCompletionForRecvBcast(envelope
*env
, NcpyBcastInterimAckInfo
*bcastAckInfo
, int pe
);
429 /***************************** Zerocopy Readonly Bcast Support ****************************/
431 /* Support for Zerocopy Broadcast of large readonly variables */
432 CkpvExtern(int, _numPendingRORdmaTransfers
);
434 struct NcpyROBcastBuffAckInfo
{
441 // machine specific information about the buffer
443 #pragma GCC diagnostic push
444 #pragma GCC diagnostic ignored "-Wpedantic"
446 char layerInfo
[CMK_COMMON_NOCOPY_DIRECT_BYTES
+ CMK_NOCOPY_DIRECT_BYTES
];
448 #pragma GCC diagnostic pop
452 struct NcpyROBcastAckInfo
{
457 NcpyROBcastBuffAckInfo buffAckInfo
[0];
460 void readonlyUpdateNumops();
462 void readonlyAllocateOnSource();
464 void readonlyCreateOnSource(CkNcpyBuffer
&src
);
466 void readonlyGet(CkNcpyBuffer
&src
, CkNcpyBuffer
&dest
, void *refPtr
);
468 void readonlyGetCompleted(NcpyOperationInfo
*ncpyOpInfo
);
471 void updatePeerCounterAndPush(envelope
*env
);
474 CkArray
* getArrayMgrFromMsg(envelope
*env
);
476 void sendAckMsgToParent(envelope
*env
);
478 void sendRecvDoneMsgToPeers(envelope
*env
, CkArray
*mgr
);
480 #endif /* End of CMK_ONESIDED_IMPL */