Add const to thread-MPI functions
[gromacs.git] / src / external / thread_mpi / src / scatter.c
blob03117a34b977c75529c8fb0216b105ba791e65bf
1 /*
2 This source code file is part of thread_mpi.
3 Written by Sander Pronk, Erik Lindahl, and possibly others.
5 Copyright (c) 2009,2016, Sander Pronk, Erik Lindahl.
6 All rights reserved.
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are met:
10 1) Redistributions of source code must retain the above copyright
11 notice, this list of conditions and the following disclaimer.
12 2) Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15 3) Neither the name of the copyright holders nor the
16 names of its contributors may be used to endorse or promote products
17 derived from this software without specific prior written permission.
19 THIS SOFTWARE IS PROVIDED BY US ''AS IS'' AND ANY
20 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 DISCLAIMED. IN NO EVENT SHALL WE BE LIABLE FOR ANY
23 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
26 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 If you want to redistribute modifications, please consider that
31 scientific software is very special. Version control is crucial -
32 bugs must be traceable. We will be happy to consider code for
33 inclusion in the official distribution, but derived work should not
34 be called official thread_mpi. Details are found in the README & COPYING
35 files.
38 #ifdef HAVE_TMPI_CONFIG_H
39 #include "tmpi_config.h"
40 #endif
42 #ifdef HAVE_CONFIG_H
43 #include "config.h"
44 #endif
47 #ifdef HAVE_UNISTD_H
48 #include <unistd.h>
49 #endif
51 #include <errno.h>
52 #include <stdlib.h>
53 #include <stdio.h>
54 #include <stdarg.h>
55 #include <string.h>
57 #include "impl.h"
58 #include "collective.h"
62 int tMPI_Scatter(const void* sendbuf, int sendcount, tMPI_Datatype sendtype,
63 void* recvbuf, int recvcount, tMPI_Datatype recvtype,
64 int root, tMPI_Comm comm)
66 int synct;
67 struct coll_env *cev;
68 int myrank;
69 int ret = TMPI_SUCCESS;
70 struct tmpi_thread *cur = tMPI_Get_current();
72 #ifdef TMPI_PROFILE
73 tMPI_Profile_count_start(cur);
74 #endif
75 #ifdef TMPI_TRACE
76 tMPI_Trace_print("tMPI_Scatter(%p, %d, %p, %p, %d, %p, %d, %p)",
77 sendbuf, sendcount, sendtype,
78 recvbuf, recvcount, recvtype, root, comm);
79 #endif
80 if (!comm)
82 return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_COMM);
84 myrank = tMPI_Comm_seek_rank(comm, cur);
86 /* we increase our counter, and determine which coll_env we get */
87 cev = tMPI_Get_cev(comm, myrank, &synct);
89 if (myrank == root)
91 int i;
92 size_t sendsize = sendtype->size*sendcount;
93 size_t total_send_size = 0;
94 #ifdef USE_COLLECTIVE_COPY_BUFFER
95 tmpi_bool using_cb;
96 #endif
98 if (!sendbuf) /* don't do pointer arithmetic on a NULL ptr */
100 return tMPI_Error(comm, TMPI_ERR_BUF);
103 /* we set up multiple posts, so no Post_multi */
104 cev->met[myrank].tag = TMPI_SCATTER_TAG;
105 cev->met[myrank].datatype = sendtype;
106 tMPI_Atomic_memory_barrier_rel();
107 tMPI_Atomic_set( &(cev->met[myrank].n_remaining), cev->N-1 );
108 for (i = 0; i < comm->grp.N; i++)
110 total_send_size += sendtype->size*sendcount;
111 cev->met[myrank].bufsize[i] = sendsize;
112 cev->met[myrank].buf[i] = (char*)sendbuf+sendsize*i;
114 #ifdef USE_COLLECTIVE_COPY_BUFFER
115 /* we must copy our own data too, unfortunately (otherwise there's
116 a hole) */
117 using_cb = (total_send_size < (size_t)((cev->N)*COPY_BUFFER_SIZE));
118 cev->met[myrank].using_cb = using_cb;
119 if (using_cb)
121 /* we set cpbuf stuff to NULL initially */
122 for (i = 0; i < cev->N; i++)
124 /*cev->met[myrank].cpbuf[i]=NULL;*/
125 tMPI_Atomic_ptr_set(&(cev->met[myrank].cpbuf[i]), NULL);
127 tMPI_Atomic_set(&(cev->met[myrank].buf_readcount), 0);
129 #endif
131 /* post availability */
132 for (i = 0; i < cev->N; i++)
134 if (i != myrank)
136 tMPI_Event_signal( &(cev->met[i].recv_ev) );
143 #ifdef USE_COLLECTIVE_COPY_BUFFER
144 /* do the copy_buffer thing */
145 if (using_cb)
147 /* copy the buffer locally. First allocate */
148 cev->met[myrank].cb = tMPI_Copy_buffer_list_get(
149 &(tMPI_Get_thread(comm, myrank)->cbl_multi));
150 if (cev->met[myrank].cb->size < total_send_size)
152 return tMPI_Error(comm, TMPI_ERR_COPY_BUFFER_SIZE);
154 /* copy to the new buf */
155 memcpy(cev->met[myrank].cb->buf, sendbuf, total_send_size);
157 /* post the new buf */
158 for (i = 0; i < cev->N; i++)
160 tMPI_Atomic_memory_barrier_rel();
161 tMPI_Atomic_ptr_set(&(cev->met[myrank].cpbuf[i]),
162 (char*)cev->met[myrank].cb->buf+sendsize*i);
163 /*cev->met[myrank].cpbuf[i] = (char*)cev->met[myrank].cb->buf +
164 sendsize*i ;*/
167 #endif
169 /* do root transfer */
170 if (recvbuf != TMPI_IN_PLACE)
172 tMPI_Coll_root_xfer(comm, sendtype, recvtype,
173 sendsize, recvtype->size*recvcount,
174 (char*)sendbuf+sendsize*myrank,
175 recvbuf, &ret);
178 /* and wait until everybody is done copying */
179 tMPI_Wait_for_others(cev, myrank);
181 else
183 /* get the root cev */
184 size_t bufsize = recvcount*recvtype->size;
185 /* wait until root becomes available */
186 tMPI_Wait_for_data(cur, cev, myrank);
187 tMPI_Mult_recv(comm, cev, root, myrank, TMPI_SCATTER_TAG, recvtype,
188 bufsize, recvbuf, &ret);
190 #ifdef TMPI_PROFILE
191 tMPI_Profile_count_stop(cur, TMPIFN_Scatter);
192 #endif
193 return ret;
198 int tMPI_Scatterv(const void* sendbuf, int *sendcounts, int *displs,
199 tMPI_Datatype sendtype, void* recvbuf, int recvcount,
200 tMPI_Datatype recvtype, int root, tMPI_Comm comm)
202 int synct;
203 struct coll_env *cev;
204 int myrank;
205 int ret = TMPI_SUCCESS;
206 struct tmpi_thread *cur = tMPI_Get_current();
208 #ifdef TMPI_TRACE
209 tMPI_Trace_print("tMPI_Scatterv(%p, %p, %p, %p, %p, %d, %p, %d, %p)",
210 sendbuf, sendcounts, displs, sendtype, recvbuf,
211 recvcount, recvtype, root, comm);
212 #endif
213 #ifdef TMPI_PROFILE
214 tMPI_Profile_count_start(cur);
215 #endif
218 if (!comm)
220 return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_COMM);
222 myrank = tMPI_Comm_seek_rank(comm, cur);
224 /* we increase our counter, and determine which coll_env we get */
225 cev = tMPI_Get_cev(comm, myrank, &synct);
227 if (myrank == root)
229 int i;
230 size_t total_send_size = 0;
231 #ifdef USE_COLLECTIVE_COPY_BUFFER
232 tmpi_bool using_cb;
233 #endif
235 if (!sendbuf) /* don't do pointer arithmetic on a NULL ptr */
237 return tMPI_Error(comm, TMPI_ERR_BUF);
240 /* we set up multiple posts, so no Post_multi */
241 cev->met[myrank].tag = TMPI_SCATTERV_TAG;
242 cev->met[myrank].datatype = sendtype;
243 tMPI_Atomic_memory_barrier_rel();
244 tMPI_Atomic_set( &(cev->met[myrank].n_remaining), cev->N-1 );
245 for (i = 0; i < cev->N; i++)
247 total_send_size += sendtype->size*sendcounts[i];
248 cev->met[myrank].bufsize[i] = sendtype->size*sendcounts[i];
249 cev->met[myrank].buf[i] = (char*)sendbuf+sendtype->size*displs[i];
251 #ifdef USE_COLLECTIVE_COPY_BUFFER
252 /* we must copy our own data too, unfortunately (otherwise there's
253 a hole) */
254 using_cb = (total_send_size < (size_t)((cev->N)*COPY_BUFFER_SIZE));
255 cev->met[myrank].using_cb = using_cb;
256 if (using_cb)
258 /* we set cpbuf stuff to NULL initially */
259 for (i = 0; i < cev->N; i++)
261 /*cev->met[myrank].cpbuf[i]=NULL;*/
262 tMPI_Atomic_ptr_set(&(cev->met[myrank].cpbuf[i]), NULL);
264 tMPI_Atomic_set(&(cev->met[myrank].buf_readcount), 0);
266 #endif
268 /* post availability */
269 for (i = 0; i < cev->N; i++)
271 if (i != myrank)
273 tMPI_Event_signal( &(cev->met[i].recv_ev) );
279 #ifdef USE_COLLECTIVE_COPY_BUFFER
280 /* do the copy_buffer thing */
281 if (using_cb)
283 /* copy the buffer locally. First allocate */
284 cev->met[myrank].cb = tMPI_Copy_buffer_list_get(
285 &(tMPI_Get_thread(comm, myrank)->cbl_multi));
286 if (cev->met[myrank].cb->size < total_send_size)
288 return tMPI_Error(comm, TMPI_ERR_COPY_BUFFER_SIZE);
290 /* copy to the new buf */
291 memcpy(cev->met[myrank].cb->buf, sendbuf, total_send_size);
292 /* post the new buf */
293 for (i = 0; i < cev->N; i++)
295 tMPI_Atomic_memory_barrier_rel();
296 tMPI_Atomic_ptr_set(&(cev->met[myrank].cpbuf[i]),
297 (char*)cev->met[myrank].cb->buf +
298 sendtype->size*displs[i]);
299 /*cev->met[myrank].cpbuf[i]=(char*)cev->met[myrank].cb->buf +
300 sendtype->size*displs[i];*/
303 #endif
305 /* do root transfer */
306 if (recvbuf != TMPI_IN_PLACE)
308 tMPI_Coll_root_xfer(comm, sendtype, recvtype,
309 sendtype->size*sendcounts[myrank],
310 recvtype->size*recvcount,
311 (char*)sendbuf+sendtype->size*displs[myrank],
312 recvbuf,
313 &ret);
316 /* and wait until everybody is done copying */
317 tMPI_Wait_for_others(cev, myrank);
319 else
321 /* get the root cev */
322 size_t bufsize = recvcount*recvtype->size;
323 /* wait until root becomes available */
324 tMPI_Wait_for_data(cur, cev, myrank);
325 tMPI_Mult_recv(comm, cev, root, myrank, TMPI_SCATTERV_TAG,
326 recvtype, bufsize, recvbuf, &ret);
328 #ifdef TMPI_PROFILE
329 tMPI_Profile_count_stop(cur, TMPIFN_Scatterv);
330 #endif
331 return ret;