Activate PME on GPUs
[gromacs.git] / src / external / thread_mpi / src / gather.c
blob780f54a966c83af776efd6ac2b79e2ed41c5d764
1 /*
2 This source code file is part of thread_mpi.
3 Written by Sander Pronk, Erik Lindahl, and possibly others.
5 Copyright (c) 2009,2016, Sander Pronk, Erik Lindahl.
6 All rights reserved.
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are met:
10 1) Redistributions of source code must retain the above copyright
11 notice, this list of conditions and the following disclaimer.
12 2) Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15 3) Neither the name of the copyright holders nor the
16 names of its contributors may be used to endorse or promote products
17 derived from this software without specific prior written permission.
19 THIS SOFTWARE IS PROVIDED BY US ''AS IS'' AND ANY
20 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 DISCLAIMED. IN NO EVENT SHALL WE BE LIABLE FOR ANY
23 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
26 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 If you want to redistribute modifications, please consider that
31 scientific software is very special. Version control is crucial -
32 bugs must be traceable. We will be happy to consider code for
33 inclusion in the official distribution, but derived work should not
34 be called official thread_mpi. Details are found in the README & COPYING
35 files.
38 #ifdef HAVE_TMPI_CONFIG_H
39 #include "tmpi_config.h"
40 #endif
42 #ifdef HAVE_CONFIG_H
43 #include "config.h"
44 #endif
47 #ifdef HAVE_UNISTD_H
48 #include <unistd.h>
49 #endif
51 #include <errno.h>
52 #include <stdlib.h>
53 #include <stdio.h>
54 #include <stdarg.h>
55 #include <string.h>
57 #include "impl.h"
58 #include "collective.h"
61 int tMPI_Gather(const void* sendbuf, int sendcount, tMPI_Datatype sendtype,
62 void* recvbuf, int recvcount, tMPI_Datatype recvtype,
63 int root, tMPI_Comm comm)
65 int synct;
66 struct coll_env *cev;
67 int myrank;
68 int ret = TMPI_SUCCESS;
69 struct tmpi_thread *cur = tMPI_Get_current();
71 #ifdef TMPI_PROFILE
72 tMPI_Profile_count_start(cur);
73 #endif
74 #ifdef TMPI_TRACE
75 tMPI_Trace_print("tMPI_Gather(%p, %d, %p, %p, %d, %p, %d, %p)",
76 sendbuf, sendcount, sendtype,
77 recvbuf, recvcount, recvtype, root, comm);
78 #endif
80 if (!comm)
82 return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_COMM);
84 myrank = tMPI_Comm_seek_rank(comm, cur);
86 /* we increase our counter, and determine which coll_env we get */
87 cev = tMPI_Get_cev(comm, myrank, &synct);
89 if (myrank == root)
91 int i;
92 int n_remaining = comm->grp.N-1;
93 /* do root transfer */
94 if (sendbuf != TMPI_IN_PLACE)
96 tMPI_Coll_root_xfer(comm, sendtype, recvtype,
97 sendtype->size*sendcount,
98 recvtype->size*recvcount,
99 (void*)sendbuf,
100 (char*)recvbuf+myrank*recvcount*recvtype->size,
101 &ret);
103 for (i = 0; i < comm->grp.N; i++)
105 cev->met[myrank].read_data[i] = FALSE;
107 cev->met[myrank].read_data[myrank] = TRUE;
109 /* wait for data availability as long as there are xfers to be done */
110 while (n_remaining > 0)
112 #if defined(TMPI_PROFILE) && defined(TMPI_CYCLE_COUNT)
113 tMPI_Profile_wait_start(cur);
114 #endif
115 tMPI_Event_wait( &(cev->met[myrank]).recv_ev );
116 #if defined(TMPI_PROFILE) && defined(TMPI_CYCLE_COUNT)
117 tMPI_Profile_wait_stop(cur, TMPIWAIT_Coll_recv);
118 #endif
119 /* now check all of them */
120 for (i = 0; i < comm->grp.N; i++)
122 if (!cev->met[myrank].read_data[i] &&
123 (tMPI_Atomic_get(&(cev->met[i].current_sync)) == synct))
125 tMPI_Mult_recv(comm, cev, i, 0, TMPI_GATHER_TAG, recvtype,
126 recvcount*recvtype->size,
127 (char*)recvbuf+i*recvcount*recvtype->size,
128 &ret);
129 tMPI_Event_process( &(cev->met[myrank]).recv_ev, 1);
130 if (ret != TMPI_SUCCESS)
132 return ret;
134 cev->met[myrank].read_data[i] = TRUE;
135 n_remaining--;
140 else
142 if (!sendbuf && sendcount > 0) /* don't do pointer arithmetic on a NULL ptr */
144 return tMPI_Error(comm, TMPI_ERR_BUF);
147 /* first set up the data just to root. */
148 ret = tMPI_Post_multi(cev, myrank, 0, TMPI_GATHER_TAG, sendtype,
149 sendcount*sendtype->size, (void*)sendbuf, 1, synct, root);
150 if (ret != TMPI_SUCCESS)
152 return ret;
154 /* and wait until root is done copying */
155 tMPI_Wait_for_others(cev, myrank);
157 #ifdef TMPI_PROFILE
158 tMPI_Profile_count_stop(cur, TMPIFN_Gather);
159 #endif
160 return ret;
168 int tMPI_Gatherv(const void* sendbuf, int sendcount, tMPI_Datatype sendtype,
169 void* recvbuf, int *recvcounts, int *displs,
170 tMPI_Datatype recvtype, int root, tMPI_Comm comm)
172 int synct;
173 struct coll_env *cev;
174 int myrank;
175 int ret = TMPI_SUCCESS;
176 struct tmpi_thread *cur = tMPI_Get_current();
178 #ifdef TMPI_PROFILE
179 tMPI_Profile_count_start(cur);
180 #endif
181 #ifdef TMPI_TRACE
182 tMPI_Trace_print("tMPI_Gatherv(%p, %d, %p, %p, %p, %p, %p, %d, %p)",
183 sendbuf, sendcount, sendtype, recvbuf,
184 recvcounts, displs, recvtype, root, comm);
185 #endif
187 if (!comm)
189 return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_COMM);
191 myrank = tMPI_Comm_seek_rank(comm, cur);
193 /* we increase our counter, and determine which coll_env we get */
194 cev = tMPI_Get_cev(comm, myrank, &synct);
196 if (myrank == root)
198 int i;
199 int n_remaining = comm->grp.N-1;
200 /* do root transfer */
201 if (sendbuf != TMPI_IN_PLACE)
203 tMPI_Coll_root_xfer(comm, sendtype, recvtype,
204 sendtype->size*sendcount,
205 recvtype->size*recvcounts[myrank],
206 (void*)sendbuf,
207 (char*)recvbuf+displs[myrank]*recvtype->size,
208 &ret);
210 for (i = 0; i < comm->grp.N; i++)
212 cev->met[myrank].read_data[i] = FALSE;
214 cev->met[myrank].read_data[myrank] = TRUE;
216 /* wait for data availability as long as there are xfers to be done */
217 while (n_remaining > 0)
219 #if defined(TMPI_PROFILE) && defined(TMPI_CYCLE_COUNT)
220 tMPI_Profile_wait_start(cur);
221 #endif
222 tMPI_Event_wait( &(cev->met[myrank]).recv_ev );
223 #if defined(TMPI_PROFILE) && defined(TMPI_CYCLE_COUNT)
224 tMPI_Profile_wait_stop(cur, TMPIWAIT_Coll_recv);
225 #endif
226 for (i = 0; i < comm->grp.N; i++)
228 if (!cev->met[myrank].read_data[i] &&
229 (tMPI_Atomic_get(&(cev->met[i].current_sync)) == synct) )
231 tMPI_Event_process( &(cev->met[myrank]).recv_ev, 1);
232 tMPI_Mult_recv(comm, cev, i, 0, TMPI_GATHERV_TAG, recvtype,
233 recvcounts[i]*recvtype->size,
234 (char*)recvbuf+displs[i]*recvtype->size,
235 &ret);
236 if (ret != TMPI_SUCCESS)
238 return ret;
240 cev->met[myrank].read_data[i] = TRUE;
241 n_remaining--;
246 else
248 if (!sendbuf && sendcount > 0) /* don't do pointer arithmetic on a NULL ptr */
250 return tMPI_Error(comm, TMPI_ERR_BUF);
253 /* first set up the data just to root. */
254 ret = tMPI_Post_multi(cev, myrank, 0, TMPI_GATHERV_TAG, sendtype,
255 sendcount*sendtype->size, (void*)sendbuf, 1, synct, root);
256 if (ret != TMPI_SUCCESS)
258 return ret;
260 /* and wait until root is done copying */
261 tMPI_Wait_for_others(cev, myrank);
264 #ifdef TMPI_PROFILE
265 tMPI_Profile_count_stop(cur, TMPIFN_Gatherv);
266 #endif
267 return ret;