PK added extern C for rmpbc and atomprop
[gromacs.git] / include / thread_mpi.h
blob563214b3dcc1bb0ec4088d7adc1fccc257cf9484
1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-s
2 tyle: "stroustrup"; -*-
4 * $Id:
5 *
6 * This file is part of Gromacs Copyright (c) 1991-2009
7 * David van der Spoel, Erik Lindahl, University of Groningen.
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; either version 2
12 * of the License, or (at your option) any later version.
14 * To help us fund GROMACS development, we humbly ask that you cite
15 * the research papers on the package. Check out http://www.gromacs.org
17 * And Hey:
18 * Gnomes, ROck Monsters And Chili Sauce
20 #ifndef _GMX_THREAD_MPI_H_
21 #define _GMX_THREAD_MPI_H_
23 /** \file gmx_thread_mpi.h
25 * \brief Partial implementation of MPI using only threads.
27 * See the MPI specification at
28 * http://www.mpi-forum.org/docs/docs.html
29 * for an explanation of what these functions do.
31 * Because this is a thread-based library, be very careful with global
32 * variables and static variables in functions: they will be shared across
33 * all threads an lead to conflicts if not properly mutex-ed or barrier-ed
34 * out.
36 * This library supports all of MPI that is being used in Gromacs, but can
37 * still use some improvement:
38 * - the gmx_mutexes should be replaced by busy-waits on atomic operations
39 * for performance reasons (the aim of a pthreads mutex: scheduling out
40 * waiting threads, is antithetical to the requirements of Gromacs: low
41 * latency and high throughput).
42 * - Some of the global communication functions (bcast, scatter, alltoall)
43 * could perhaps use a binary tree-like distribution method rather than
44 * simply letting each receiver thread read from one distributor.
46 * Right now, this library can only be enabled using cmake (although some
47 * work has been done on autoconf). The relevant option is GMX_THREADED.
51 #ifdef __cplusplus
52 extern "C"
54 #endif
55 #if 0
56 } /* Avoids screwing up auto-indentation */
57 #endif
60 /* The MPI_Comm structure contains the group of processes to communicate
61 with (defines the scope for global operations such as broadcast) */
62 typedef struct mpi_comm_ *MPI_Comm;
63 /* The group part of the MPI-Comm structure */
64 typedef struct mpi_group_ *MPI_Group;
65 /* Request structure for holding data about non-blocking transfers */
66 typedef struct mpi_req_ *MPI_Request;
67 /* status of receives */
68 typedef struct mpi_status_ MPI_Status;
69 /* data types */
70 typedef struct mpi_datatype_ *MPI_Datatype;
73 /** MPI data types as specified by the MPI standard.
74 Note that not all are available. */
75 extern MPI_Datatype MPI_CHAR;
76 extern MPI_Datatype MPI_SHORT;
77 extern MPI_Datatype MPI_INT;
78 extern MPI_Datatype MPI_LONG;
79 #ifdef SIZEOF_LONG_LONG_INT
80 extern MPI_Datatype MPI_LONG_LONG;
81 extern MPI_Datatype MPI_LONG_LONG_INT;
82 #endif
84 extern MPI_Datatype MPI_SIGNED_CHAR;
85 extern MPI_Datatype MPI_UNSIGNED_CHAR;
86 extern MPI_Datatype MPI_UNSIGNED_SHORT;
87 extern MPI_Datatype MPI_UNSIGNED;
88 extern MPI_Datatype MPI_UNSIGNED_LONG;
89 #ifdef SIZEOF_LONG_LONG_INT
90 extern MPI_Datatype MPI_UNSIGNED_LONG_LONG;
91 #endif
93 extern MPI_Datatype MPI_FLOAT;
94 extern MPI_Datatype MPI_DOUBLE;
95 extern MPI_Datatype MPI_LONG_DOUBLE;
97 /*extern MPI_Datatype MPI_UNSIGNED_WCHAR, we don't want this right now, anyway */
98 extern MPI_Datatype MPI_BYTE;
101 /* error codes */
102 #define MPI_SUCCESS 0
103 #define MPI_ERR_GROUP 1
104 #define MPI_ERR_COMM 2
105 #define MPI_ERR_STATUS 3
106 #define MPI_ERR_DIMS 4
107 #define MPI_ERR_COORDS 5
108 #define MPI_ERR_CART_CREATE_NPROCS 6
109 #define MPI_ERR_XFER_COUNTERPART 7
110 #define MPI_ERR_XFER_BUFSIZE 8
111 #define MPI_ERR_SEND_DEST 9
112 #define MPI_ERR_RECV_SRC 10
113 #define MPI_ERR_BUF 11
114 #define MPI_ERR_OP_FN 12
115 #define MPI_ERR_UNKNOWN 13
116 #define MPI_FAILURE 14
117 #define N_MPI_ERR 15
119 #define MPI_MAX_ERROR_STRING 256
121 #define MPI_UNDEFINED -1
122 #if 1
123 /* error handling */
124 typedef void (*MPI_Errhandler_fn)(MPI_Comm*, int*);
125 typedef struct mpi_errhandler_ *MPI_Errhandler;
127 extern MPI_Errhandler MPI_ERRORS_ARE_FATAL;
128 extern MPI_Errhandler MPI_ERRORS_RETURN;
130 #endif
132 /* miscelaneous defines */
133 #define MPI_ANY_SOURCE -1
134 #define MPI_ANY_TAG -1
136 /* topology test defines */
137 #define MPI_CART 1
138 #define MPI_GRAPH 2
141 /** All communicators */
142 extern MPI_Comm MPI_COMM_WORLD;
143 /* these are 0 instead of NULL so that we can compare against them */
144 #define MPI_COMM_NULL 0
145 #define MPI_GROUP_NULL 0
147 /** empty group */
148 extern MPI_Group MPI_GROUP_EMPTY;
151 #define MPI_MAX_PROCESSOR_NAME 128
154 /* MPI status */
155 #define MPI_STATUS_IGNORE 0
156 #define MPI_STATUSES_IGNORE 0
158 /* the status object is user-maintained. */
159 struct mpi_status_
161 int MPI_SOURCE; /* the message source rank */
162 int MPI_TAG; /* the message source tag */
163 int MPI_ERROR; /* the message error */
164 int transferred;
168 #define MPI_REQUEST_NULL 0
170 /* collective communication specials: */
171 #define MPI_IN_PLACE 0
174 /** MPI_Reduce operators.
175 These all work (except obviously bad combinations like bitwise
176 and/or/xor on floats,etc): */
177 typedef enum
179 MPI_MAX, /* maximum */
180 MPI_MIN, /* minimum */
181 MPI_SUM, /* sum */
182 MPI_PROD, /* product */
183 MPI_LAND, /* logical and */
184 MPI_BAND, /* binary and */
185 MPI_LOR, /* logical or */
186 MPI_BOR, /* binary or */
187 MPI_LXOR, /* logical xor */
188 MPI_BXOR /* binary xor */
189 } MPI_Op;
192 /* function for MPI_COMM_SELF */
193 MPI_Comm tMPI_Get_comm_self(void);
194 /* this must be a funtion because it's a thread-local property: */
195 #define MPI_COMM_SELF (tMPI_Get_comm_self())
198 /** MPI initializer. Seeks the argument '-np n', where n is the number of
199 threads that will be created. These new threads then run main() again,
200 with the original argc and argv. */
201 int MPI_Init(int *argc, char ***argv);
203 /** Alternate thread MPI intializer. Creates N threads (including main thread)
204 that run main() again so they can catch up to MPI_Init themselves */
206 int MPI_Init_N(int N);
207 /** get the number of threads that will be requested (can be called before
208 MPI_Init() ) */
209 int tMPI_Get_N(int *argc, char ***argv);
212 /** waits for all threads to join() */
213 int MPI_Finalize(void);
214 /** just kills all threads. Not really neccesary because exit() will do
215 that for us anyway */
216 int MPI_Abort(MPI_Comm comm, int errorcode);
217 /** whether MPI_Init, but not yet MPI_Finalize, has been run*/
218 int MPI_Initialized(int *flag);
219 /** whether MPI_Finalize has been run */
220 int MPI_Finalized(int *flag);
223 /** create an error handler object from a function */
224 int MPI_Create_errhandler(MPI_Errhandler_fn *function,
225 MPI_Errhandler *errhandler);
226 /** free the error handler object */
227 int MPI_Errhandler_free(MPI_Errhandler *errhandler);
229 /** set the error handler */
230 int MPI_Comm_set_errhandler(MPI_Comm comm, MPI_Errhandler errhandler);
231 /** get the error handler */
232 int MPI_Comm_get_errhandler(MPI_Comm comm, MPI_Errhandler *errhandler);
234 /** get the error string associated with an error code */
235 int MPI_Error_string(int errorcode, char *string, int *resultlen);
240 /* system query: */
241 /** returns string with thread # */
242 int MPI_Get_processor_name(char *name, int *resultlen);
243 /** get an elapsed time value as a double, in seconds */
244 double MPI_Wtime(void);
245 #if 0
246 /** get the resolution of MPI_Wtime as a double, in seconds */
247 double MPI_Wtick(void);
248 #endif
253 /** check the size of a group */
254 int MPI_Group_size(MPI_Group group, int *size);
255 /** check the rank of a group */
256 int MPI_Group_rank(MPI_Group group, int *rank);
257 /** create a new group as a union of an existing group and new ranks*/
258 int MPI_Group_incl(MPI_Group group, int n, int *ranks, MPI_Group *newgroup);
259 /** get a pointer to the group in the comm */
260 int MPI_Comm_group(MPI_Comm comm, MPI_Group *group);
261 /** de-allocate a group */
262 int MPI_Group_free(MPI_Group *group);
264 /** get the comm size */
265 int MPI_Comm_size(MPI_Comm comm, int *size);
266 /** get the rank in comm of the current process */
267 int MPI_Comm_rank(MPI_Comm comm, int *rank);
268 /** de-allocate a comm */
269 int MPI_Comm_free(MPI_Comm *comm);
270 /** create a comm based on a group */
271 int MPI_Comm_create(MPI_Comm comm, MPI_Group group, MPI_Comm *newcomm);
272 /** split up a group into same-colored sub-groups ordered by key */
273 int MPI_Comm_split(MPI_Comm comm, int color, int key, MPI_Comm *newcomm);
274 /** make a duplicate of a comm*/
275 int MPI_Comm_dup(MPI_Comm comm, MPI_Comm *newcomm);
277 /* topology functions */
278 /** check what type of topology the comm has */
279 int MPI_Topo_test(MPI_Comm comm, int status);
280 /** check which dimensionality a topology has */
281 int MPI_Cartdim_get(MPI_Comm comm, int *ndims);
282 /** check which size and pbc a Cartesian topology has */
283 int MPI_Cart_get(MPI_Comm comm, int maxdims, int *dims, int *periods,
284 int *coords);
285 /** check which rank a set of process coordinates has in a Cartesian topology */
286 int MPI_Cart_rank(MPI_Comm comm, int *coords, int *rank);
287 /** check which coordinates a process rank has in a Cartesian topology */
288 int MPI_Cart_coords(MPI_Comm comm, int rank, int maxdims, int *coords);
289 /** check which rank this process would have in a Cartesian topology */
290 int MPI_Cart_map(MPI_Comm comm, int ndims, int *dims, int *periods,
291 int *newrank);
292 /** create a comm with a Cartesian topology */
293 int MPI_Cart_create(MPI_Comm comm_old, int ndims, int *dims, int *periods,
294 int reorder, MPI_Comm *comm_cart);
297 /** create a contiguous data type (the only type possible right now */
298 int MPI_Type_contiguous(int count, MPI_Datatype oldtype,
299 MPI_Datatype *newtype);
300 /** make the data type ready for use */
301 int MPI_Type_commit(MPI_Datatype *datatype);
305 /** wait for all process in comm to arrive here */
306 int MPI_Barrier(MPI_Comm comm);
310 /** blocking transfers. The actual transfer (copy) is done on the receiving end
311 (so that the receiver's cache already contains the data that it presumably
312 will use soon). */
313 /* send message; waits until finished. */
314 int MPI_Send(void* buf, int count, MPI_Datatype datatype, int dest,
315 int tag, MPI_Comm comm);
316 /** receive message; waits until finished. */
317 int MPI_Recv(void* buf, int count, MPI_Datatype datatype, int source,
318 int tag, MPI_Comm comm, MPI_Status *status);
319 /** send & receive message at the same time; waits until finished. */
320 int MPI_Sendrecv(void *sendbuf, int sendcount, MPI_Datatype sendtype,
321 int dest, int sendtag, void *recvbuf, int recvcount,
322 MPI_Datatype recvtype, int source, int recvtag, MPI_Comm comm,
323 MPI_Status *status);
324 /** get the number of actually transferred items from a transfer status */
325 int MPI_Get_count(MPI_Status *status, MPI_Datatype datatype, int *count);
328 /** async send/recv. The actual transfer is usually done on the receiving
329 end, during MPI_Wait, MPI_Waitall or MPI_Test. For MPI_Waitall,
330 the incoming messages are processed in the order they come in.
332 In the case of async receives, the sender may initiate transfer,
333 and there's a lock in the envelope to make sure that it doesn't
334 happen on both ends simultaneously. */
335 /** initiate sending a message */
336 int MPI_Isend(void* buf, int count, MPI_Datatype datatype, int dest,
337 int tag, MPI_Comm comm, MPI_Request *request);
338 /** initiate receiving a message */
339 int MPI_Irecv(void* buf, int count, MPI_Datatype datatype, int source,
340 int tag, MPI_Comm comm, MPI_Request *request);
341 /** test whether message is sent */
342 int MPI_Test(MPI_Request *request, int *flag, MPI_Status *status);
343 /** wait until message is sent */
344 int MPI_Wait(MPI_Request *request, MPI_Status *status);
345 /** wait for several message sending requests */
346 int MPI_Waitall(int count, MPI_Request *array_of_requests,
347 MPI_Status *array_of_statuses);
353 /** multicast */
354 /** broadcast over entire comm from root */
355 int MPI_Bcast(void* buffer, int count, MPI_Datatype datatype, int root,
356 MPI_Comm comm);
358 /** gather data from all processes in comm to root */
359 int MPI_Gather(void* sendbuf, int sendcount, MPI_Datatype sendtype,
360 void* recvbuf, int recvcount, MPI_Datatype recvtype, int root,
361 MPI_Comm comm);
362 /** gather irregularly laid out data from all processes in comm to root */
363 int MPI_Gatherv(void* sendbuf, int sendcount, MPI_Datatype sendtype,
364 void* recvbuf, int *recvcounts, int *displs,
365 MPI_Datatype recvtype, int root, MPI_Comm comm);
367 /** spread parts of sendbuf to all processes in comm from root */
368 int MPI_Scatter(void* sendbuf, int sendcount, MPI_Datatype sendtype,
369 void* recvbuf, int recvcount, MPI_Datatype recvtype, int root,
370 MPI_Comm comm);
371 /** spread irregularly laid out parts of sendbuf to all processes from root */
372 int MPI_Scatterv(void* sendbuf, int *sendcounts, int *displs,
373 MPI_Datatype sendtype, void* recvbuf, int recvcount,
374 MPI_Datatype recvtype, int root, MPI_Comm comm);
379 /** spread out parts of sendbuf to all processes from all processes */
380 int MPI_Alltoall(void* sendbuf, int sendcount, MPI_Datatype sendtype,
381 void* recvbuf, int recvcount, MPI_Datatype recvtype,
382 MPI_Comm comm);
383 /** spread out irregularly laid out parts of sendbuf to all processes
384 from all processes */
385 int MPI_Alltoallv(void* sendbuf, int *sendcounts, int *sdispls,
386 MPI_Datatype sendtype, void* recvbuf, int *recvcounts,
387 int *rdispls, MPI_Datatype recvtype, MPI_Comm comm);
392 /** Do an operation between all locally held buffers on all items in the
393 buffers and send the results to root*/
394 int MPI_Reduce(void* sendbuf, void* recvbuf, int count,
395 MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm);
396 /** Do an operation between all locally held buffers on all items in the
397 buffers and broadcast the results */
398 int MPI_Allreduce(void* sendbuf, void* recvbuf, int count,
399 MPI_Datatype datatype, MPI_Op op, MPI_Comm comm);
401 #ifdef __cplusplus
402 } /* closing extern "C" */
403 #endif
405 #endif /* _THREAD_MPI_H_ */