BG XLC: Use tr1/unordered_map
[charm.git] / src / arch / util / machine-broadcast.c
bloba72d549e26c1cef8437ed0093bfb2ebaf93eaa2f
1 /**
2 * functions for broadcast
3 **/
5 CmiCommHandle CmiSendNetworkFunc(int destPE, int size, char *msg, int mode);
7 static void handleOneBcastMsg(int size, char *msg) {
8 CmiAssert(CMI_BROADCAST_ROOT(msg)!=0);
9 #if CMK_OFFLOAD_BCAST_PROCESS
10 if (CMI_BROADCAST_ROOT(msg)>0) {
11 CMIQueuePush(CsvAccess(procBcastQ), msg);
12 } else {
13 #if CMK_NODE_QUEUE_AVAILABLE
14 CMIQueuePush(CsvAccess(nodeBcastQ), msg);
15 #endif
17 #else
18 if (CMI_BROADCAST_ROOT(msg)>0) {
19 processProcBcastMsg(size, msg);
20 } else {
21 #if CMK_NODE_QUEUE_AVAILABLE
22 processNodeBcastMsg(size, msg);
23 #endif
25 #endif
28 static void processBcastQs() {
29 #if CMK_OFFLOAD_BCAST_PROCESS
30 char *msg;
31 do {
32 msg = CMIQueuePop(CsvAccess(procBcastQ));
33 if (!msg) break;
34 MACHSTATE2(4, "[%d]: process a proc-level bcast msg %p begin{", CmiMyNode(), msg);
35 processProcBcastMsg(CMI_MSG_SIZE(msg), msg);
36 MACHSTATE2(4, "[%d]: process a proc-level bcast msg %p end}", CmiMyNode(), msg);
37 } while (1);
38 #if CMK_NODE_QUEUE_AVAILABLE
39 do {
40 msg = CMIQueuePop(CsvAccess(nodeBcastQ));
41 if (!msg) break;
42 MACHSTATE2(4, "[%d]: process a node-level bcast msg %p begin{", CmiMyNode(), msg);
43 processNodeBcastMsg(CMI_MSG_SIZE(msg), msg);
44 MACHSTATE2(4, "[%d]: process a node-level bcast msg %p end}", CmiMyNode(), msg);
45 } while (1);
46 #endif
47 #endif
50 static INLINE_KEYWORD void processProcBcastMsg(int size, char *msg) {
51 /* Since this function is only called on intermediate nodes,
52 * the rank of this msg should be 0.
54 CmiAssert(CMI_DEST_RANK(msg)==0);
55 /*CmiPushPE(CMI_DEST_RANK(msg), msg);*/
57 #if CMK_BROADCAST_SPANNING_TREE
58 SendSpanningChildrenProc(size, msg);
59 #elif CMK_BROADCAST_HYPERCUBE
60 SendHyperCubeProc(size, msg);
61 #endif
62 #if CMK_BROADCAST_SPANNING_TREE && CMK_BROADCAST_USE_CMIREFERENCE
63 /* same message may be sent out, make a copy of it */
64 if (CmiNumNodes()>1 && CmiGetReference(msg)>1) {
65 void *newmsg;
66 newmsg = CopyMsg(msg, size);
67 CmiFree(msg);
68 msg = newmsg;
70 #endif
71 CmiPushPE(0, msg);
76 #if CMK_NODE_QUEUE_AVAILABLE
77 static INLINE_KEYWORD void processNodeBcastMsg(int size, char *msg) {
78 #if CMK_BROADCAST_SPANNING_TREE
79 SendSpanningChildrenNode(size, msg);
80 #elif CMK_BROADCAST_HYPERCUBE
81 SendHyperCubeNode(size, msg);
82 #endif
84 /* In SMP mode, this push operation needs to be executed
85 * after forwarding broadcast messages. If it is executed
86 * earlier, then during the bcast msg forwarding period,
87 * the msg could be already freed on the worker thread.
88 * As a result, the forwarded message could be wrong!
91 CmiPushNode(msg);
93 #endif
95 static void SendSpanningChildren(int size, char *msg, int rankToAssign, int startNode) {
96 #if CMK_BROADCAST_SPANNING_TREE
97 int i, oldRank;
98 char *newmsg;
100 oldRank = CMI_DEST_RANK(msg);
101 /* doing this is to avoid the multiple assignment in the following for loop */
102 CMI_DEST_RANK(msg) = rankToAssign;
103 /* first send msgs to other nodes */
104 CmiAssert(startNode >=0 && startNode<CmiNumNodes());
105 for (i=1; i<=BROADCAST_SPANNING_FACTOR; i++) {
106 int nd = CmiMyNode()-startNode;
107 if (nd<0) nd+=CmiNumNodes();
108 nd = BROADCAST_SPANNING_FACTOR*nd + i;
109 if (nd > CmiNumNodes() - 1) break;
110 nd += startNode;
111 nd = nd%CmiNumNodes();
112 CmiAssert(nd>=0 && nd!=CmiMyNode());
113 #if CMK_BROADCAST_USE_CMIREFERENCE
114 CmiReference(msg);
115 CmiSendNetworkFunc(CmiNodeFirst(nd), size, msg, BCAST_SYNC);
116 #else
117 newmsg = CopyMsg(msg, size);
118 CmiSendNetworkFunc(CmiNodeFirst(nd), size, newmsg, BCAST_SYNC);
119 #endif
121 CMI_DEST_RANK(msg) = oldRank;
122 #endif
125 static void SendHyperCube(int size, char *msg, int rankToAssign, int startNode) {
126 #if CMK_BROADCAST_HYPERCUBE
127 int i, cnt, tmp, relDist, oldRank;
128 const int dims=CmiNodesDim;
130 oldRank = CMI_DEST_RANK(msg);
131 /* doing this is to avoid the multiple assignment in the following for loop */
132 CMI_DEST_RANK(msg) = rankToAssign;
134 /* first send msgs to other nodes */
135 relDist = CmiMyNode()-startNode;
136 if (relDist < 0) relDist += CmiNumNodes();
138 /* Sending scheme example: say we have 9 nodes, and the msg is sent from 0
139 * The overall sending steps will be as follows:
140 * 0-->8, 0-->4, 0-->2, 0-->1
141 * 4-->6, 4-->5
142 * 2-->3
143 * 6-->7
144 * So for node id as N=A+2^B, it will forward the broadcast (B-1) msg to in
145 * the order as: N+2^(B-1), N+2^(B-2),..., N+1 except node 0, where B is
146 * the first position of bit 1 in the binary format of the number of N
147 * counting from the right with count starting from 0.
148 * On node 0, the value "B" should be CmiNodesDim
150 /* Calculate 2^B */
151 if(relDist==0) cnt = 1<<dims;
152 else cnt = relDist & ((~relDist)+1);
153 /*CmiPrintf("ND[%d]: send bcast msg with cnt=%d\n", CmiMyNode(), cnt);*/
154 /* Begin to send msgs */
155 for(cnt>>=1; cnt>0; cnt>>=1){
156 int nd = relDist + cnt;
157 char *newmsg;
158 if (nd >= CmiNumNodes()) continue;
159 nd = (nd+startNode)%CmiNumNodes();
160 /*CmiPrintf("ND[%d]: send to node %d\n", CmiMyNode(), nd);*/
161 CmiAssert(nd>=0 && nd!=CmiMyNode());
162 #if CMK_BROADCAST_USE_CMIREFERENCE
163 CmiReference(msg);
164 CmiSendNetworkFunc(CmiNodeFirst(nd), size, msg, BCAST_SYNC);
165 #else
166 newmsg = CopyMsg(msg, size);
167 CmiSendNetworkFunc(CmiNodeFirst(nd), size, newmsg, BCAST_SYNC);
168 #endif
170 CMI_DEST_RANK(msg) = oldRank;
171 #endif
174 static void SendSpanningChildrenProc(int size, char *msg) {
175 int startnode = CMI_BROADCAST_ROOT(msg)-1;
176 SendSpanningChildren(size, msg, 0, startnode);
177 #if CMK_SMP
178 /* second send msgs to my peers on this node */
179 SendToPeers(size, msg);
180 #endif
183 /* send msg along the hypercube in broadcast. (Sameer) */
184 static void SendHyperCubeProc(int size, char *msg) {
185 int startpe = CMI_BROADCAST_ROOT(msg)-1;
186 int startnode = CmiNodeOf(startpe);
187 #if CMK_SMP
188 if (startpe > CmiNumPes()) startnode = startpe - CmiNumPes();
189 #endif
190 SendHyperCube(size, msg, 0, startnode);
191 #if CMK_SMP
192 /* second send msgs to my peers on this node */
193 SendToPeers(size, msg);
194 #endif
197 #if CMK_NODE_QUEUE_AVAILABLE
198 static void SendSpanningChildrenNode(int size, char *msg) {
199 int startnode = -CMI_BROADCAST_ROOT(msg)-1;
200 SendSpanningChildren(size, msg, DGRAM_NODEMESSAGE, startnode);
202 static void SendHyperCubeNode(int size, char *msg) {
203 int startnode = -CMI_BROADCAST_ROOT(msg)-1;
204 SendHyperCube(size, msg, DGRAM_NODEMESSAGE, startnode);
206 #endif
208 #if USE_COMMON_SYNC_BCAST
209 /* Functions regarding broadcat op that sends to every one else except me */
210 void CmiSyncBroadcastFn1(int size, char *msg) {
211 int i, mype;
213 CQdCreate(CpvAccess(cQdState), CmiNumPes()-1);
214 /*record the rank to avoid re-sending the msg in spanning tree or hypercube*/
215 CMI_DEST_RANK(msg) = CmiMyRank();
217 #if CMK_BROADCAST_SPANNING_TREE
218 CMI_SET_BROADCAST_ROOT(msg, CmiMyNode()+1);
219 SendSpanningChildrenProc(size, msg);
220 #elif CMK_BROADCAST_HYPERCUBE
221 CMI_SET_BROADCAST_ROOT(msg, CmiMyNode()+1);
222 SendHyperCubeProc(size, msg);
223 #else
224 mype = CmiMyPe();
225 #if CMK_SMP
226 /* In SMP, this function may be called from comm thread with a larger pe */
227 if(mype >= _Cmi_numpes){
228 for(i=0; i<_Cmi_numpes; i++)
229 CmiSyncSendFn(i, size, msg);
230 return;
232 #endif
234 for ( i=mype+1; i<_Cmi_numpes; i++ )
235 CmiSyncSendFn(i, size, msg) ;
237 for ( i=0; i<mype; i++ )
238 CmiSyncSendFn(i, size, msg) ;
239 #endif
241 /*CmiPrintf("In SyncBroadcast broadcast\n");*/
244 void CmiSyncBroadcastFn(int size, char *msg) {
245 void *newmsg = msg;
246 #if CMK_BROADCAST_SPANNING_TREE && CMK_BROADCAST_USE_CMIREFERENCE
247 /* need to copy the msg in case the msg is on the stack */
248 /* and we only need to copy when sending out network */
249 if (CmiNumNodes()>1) newmsg = CopyMsg(msg, size);
250 #endif
251 CmiSyncBroadcastFn1(size, newmsg);
252 #if CMK_BROADCAST_SPANNING_TREE && CMK_BROADCAST_USE_CMIREFERENCE
253 if (newmsg != msg) CmiFree(newmsg);
254 #endif
257 void CmiFreeBroadcastFn(int size, char *msg) {
258 CmiSyncBroadcastFn1(size,msg);
259 CmiFree(msg);
261 #else
262 #define CmiSyncBroadcastFn1(s,m) CmiSyncBroadcastFn(s,m)
263 #endif
265 #if USE_COMMON_ASYNC_BCAST
266 /* FIXME: should use spanning or hypercube, but luckily async is never used */
267 CmiCommHandle CmiAsyncBroadcastFn(int size, char *msg) {
268 /*CmiPrintf("In AsyncBroadcast broadcast\n");*/
269 CmiAbort("CmiAsyncBroadcastFn should never be called");
270 return 0;
272 #endif
274 /* Functions regarding broadcat op that sends to every one */
275 void CmiSyncBroadcastAllFn(int size, char *msg) {
276 void *newmsg = msg;
277 #if CMK_BROADCAST_SPANNING_TREE && CMK_BROADCAST_USE_CMIREFERENCE
278 /* need to copy the msg in case the msg is on the stack */
279 /* and we only need to copy when sending out network */
280 if (CmiNumNodes()>1) newmsg = CopyMsg(msg, size);
281 #endif
282 CmiSyncSendFn(CmiMyPe(), size, newmsg) ;
283 CmiSyncBroadcastFn1(size, newmsg);
284 #if CMK_BROADCAST_SPANNING_TREE && CMK_BROADCAST_USE_CMIREFERENCE
285 if (newmsg != msg) CmiFree(newmsg);
286 #endif
289 void CmiFreeBroadcastAllFn(int size, char *msg) {
290 CmiSyncBroadcastFn1(size, msg);
291 #if CMK_BROADCAST_SPANNING_TREE && CMK_BROADCAST_USE_CMIREFERENCE
292 /* need to copy the msg in case the msg is on the stack */
293 /* and we only need to copy when sending out network */
294 if (CmiNumNodes()>1 && CmiGetReference(msg)>1) {
295 void *newmsg = CopyMsg(msg, size);
296 CmiFree(msg);
297 msg = newmsg;
299 #endif
300 CmiSendSelf(msg);
303 CmiCommHandle CmiAsyncBroadcastAllFn(int size, char *msg) {
304 CmiSendSelf(CopyMsg(msg, size));
305 return CmiAsyncBroadcastFn(size, msg);
308 #if CMK_NODE_QUEUE_AVAILABLE
309 #if USE_COMMON_SYNC_BCAST
310 void CmiSyncNodeBroadcastFn(int size, char *msg) {
311 int mynode = CmiMyNode();
312 int i;
313 CQdCreate(CpvAccess(cQdState), CmiNumNodes()-1);
314 #if CMK_BROADCAST_SPANNING_TREE
315 CMI_SET_BROADCAST_ROOT(msg, -CmiMyNode()-1);
316 SendSpanningChildrenNode(size, msg);
317 #elif CMK_BROADCAST_HYPERCUBE
318 CMI_SET_BROADCAST_ROOT(msg, -CmiMyNode()-1);
319 SendHyperCubeNode(size, msg);
320 #else
321 for (i=mynode+1; i<CmiNumNodes(); i++)
322 CmiSyncNodeSendFn(i, size, msg);
323 for (i=0; i<mynode; i++)
324 CmiSyncNodeSendFn(i, size, msg);
325 #endif
328 void CmiFreeNodeBroadcastFn(int size, char *msg) {
329 CmiSyncNodeBroadcastFn(size, msg);
330 CmiFree(msg);
332 #endif
334 #if USE_COMMON_ASYNC_BCAST
335 CmiCommHandle CmiAsyncNodeBroadcastFn(int size, char *msg) {
336 CmiSyncNodeBroadcastFn(size, msg);
337 return 0;
339 #endif
341 void CmiSyncNodeBroadcastAllFn(int size, char *msg) {
342 CmiSyncNodeSendFn(CmiMyNode(), size, msg);
343 CmiSyncNodeBroadcastFn(size, msg);
346 CmiCommHandle CmiAsyncNodeBroadcastAllFn(int size, char *msg) {
347 CmiSendNodeSelf(CopyMsg(msg, size));
348 return CmiAsyncNodeBroadcastFn(size, msg);
351 void CmiFreeNodeBroadcastAllFn(int size, char *msg) {
352 CmiSyncNodeBroadcastFn(size, msg);
353 /* Since it's a node-level msg, the msg could be executed on any other
354 * procs on the same node. This means, the push of this msg to the
355 * node-level queue could be immediately followed a pop of this msg on
356 * other cores on the same node even when this msg has not been sent to
357 * other nodes. This is the reason CmiSendNodeSelf must be called after
358 * CmiSyncNodeBroadcastFn
360 CmiSendNodeSelf(msg);
362 #endif
363 /* ##### End of Functions Related with Message Sending OPs ##### */
365 #if ! CMK_MULTICAST_LIST_USE_COMMON_CODE
367 void CmiSyncListSendFn(int npes, int *pes, int len, char *msg)
369 LrtsSyncListSendFn(npes, pes, len, msg);
372 CmiCommHandle CmiAsyncListSendFn(int npes, int *pes, int len, char *msg)
374 return LrtsAsyncListSendFn(npes, pes, len, msg);
377 void CmiFreeListSendFn(int npes, int *pes, int len, char *msg)
379 LrtsFreeListSendFn(npes, pes, len, msg);
382 #endif