src/gmxlib/thread_mpi/profile.h

   1 /*
   2 This source code file is part of thread_mpi.
   3 Written by Sander Pronk, Erik Lindahl, and possibly others.
   4
   5 Copyright (c) 2009, Sander Pronk, Erik Lindahl.
   6 All rights reserved.
   7
   8 Redistribution and use in source and binary forms, with or without
   9 modification, are permitted provided that the following conditions are met:
  10 1) Redistributions of source code must retain the above copyright
  11    notice, this list of conditions and the following disclaimer.
  12 2) Redistributions in binary form must reproduce the above copyright
  13    notice, this list of conditions and the following disclaimer in the
  14    documentation and/or other materials provided with the distribution.
  15 3) Neither the name of the copyright holders nor the
  16    names of its contributors may be used to endorse or promote products
  17    derived from this software without specific prior written permission.
  18
  19 THIS SOFTWARE IS PROVIDED BY US ''AS IS'' AND ANY
  20 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  22 DISCLAIMED. IN NO EVENT SHALL WE BE LIABLE FOR ANY
  23 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  24 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  26 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29
  30 If you want to redistribute modifications, please consider that
  31 scientific software is very special. Version control is crucial -
  32 bugs must be traceable. We will be happy to consider code for
  33 inclusion in the official distribution, but derived work should not
  34 be called official thread_mpi. Details are found in the README & COPYING
  35 files.
  36 */
  37
  38
  39 /* the profiling functions. Many of these are macros, so they're inlined
  40    forcibly. Profiling is turned on by defining TMPI_PROFILE, but the most
  41    useful parts depend on the cycle counter, which currently only works for
  42    x86, x86_64 and ia64. */
  43 #ifdef TMPI_PROFILE
  44
  45 #include "thread_mpi/atomic/cycles.h"
  46
  47 struct tmpi_thread;
  48
  49 enum tmpi_functions
  50 {
  51     TMPIFN_Send=0, /* first the point-to-point comm functions */
  52     TMPIFN_Recv,
  53     TMPIFN_Sendrecv,
  54     TMPIFN_Isend,
  55     TMPIFN_Irecv,
  56     TMPIFN_Wait,
  57     TMPIFN_Test,
  58     TMPIFN_Waitall,
  59     TMPIFN_Testall,
  60     TMPIFN_Waitany,
  61     TMPIFN_Testany,
  62     TMPIFN_Waitsome,
  63     TMPIFN_Testsome,
  64
  65     TMPIFN_Barrier, /* then the barrier */
  66
  67     TMPIFN_Bcast, /* and now the collective comm functions */
  68     TMPIFN_Gather,
  69     TMPIFN_Gatherv,
  70     TMPIFN_Scatter,
  71     TMPIFN_Scatterv,
  72     TMPIFN_Alltoall,
  73     TMPIFN_Alltoallv,
  74
  75     TMPIFN_Reduce,
  76     TMPIFN_Allreduce,
  77
  78     TMPIFN_Nfunctions
  79 };
  80
  81 enum tmpi_wait_functions
  82 {
  83     TMPIWAIT_P2p, /* p2p send wait */
  84     TMPIWAIT_P2p_signal, /* p2p signaling wait */
  85     TMPIWAIT_Coll_send, /* collective recv wait */
  86     TMPIWAIT_Coll_recv, /* collective recv wait */
  87     TMPIWAIT_Barrier, /* collective recv wait */
  88     TMPIWAIT_Reduce, /* collective (all)reduce wait */
  89
  90     TMPIWAIT_N
  91 };
  92
  93
  94 /* thread-specific profiling data structure */
  95 struct tmpi_profile
  96 {
  97     unsigned long int mpifn_calls[TMPIFN_Nfunctions]; /* array of counters */
  98
  99     unsigned long int buffered_p2p_xfers; /* number of buffered p2p transfers */
 100     unsigned long int total_p2p_xfers; /* total number of p2p transfers */
 101
 102     unsigned long int buffered_coll_xfers; /* number of buffered collective
 103                                               transfers */
 104     unsigned long int total_coll_xfers; /* total number of collective
 105                                            transfers */
 106
 107 #ifdef TMPI_CYCLE_COUNT
 108     /* cycle counters */
 109     tmpi_cycles_t mpifn_cycles[TMPIFN_Nfunctions]; /* array of cycle counters */
 110     tmpi_cycles_t wait_cycles[TMPIWAIT_N]; /* the wait cycles */
 111
 112     tmpi_cycles_t global_start,global_stop; /* timing start and stop times */
 113     tmpi_cycles_t mpifn_start; /* individual timing start times for profiling
 114                                   function call times.  This can be here
 115                                   because tmpi_profile is thread-specific. */
 116     enum tmpi_functions fn; /* the function being cycle-counted */
 117
 118
 119     tmpi_cycles_t wait_start; /* individual timing start times for profiling
 120                                  wait times. */
 121
 122     double totals;            /* totals counter for reporting end results */
 123 #endif
 124 };
 125
 126 extern int tMPI_Profile_started;
 127
 128 /* initialize the profile counter */
 129 void tMPI_Profile_init(struct tmpi_profile *prof);
 130
 131 #if 0
 132 /* deallocations */
 133 void tMPI_Profile_destroy(struct tmpi_profile *prof);
 134 #endif
 135
 136 /* stop counting */
 137 void tMPI_Profile_stop(struct tmpi_profile *prof);
 138
 139
 140
 141 /* counter functions */
 142 /* start */
 143 #ifdef TMPI_CYCLE_COUNT
 144 /*void tMPI_Profile_count_start(struct tmpi_thread *th);*/
 145 #define tMPI_Profile_count_start(th) { th->profile.mpifn_start = tmpi_cycles_read(); }
 146 #else
 147 #define tMPI_Profile_count_start(th) {}
 148 #endif
 149
 150 /* end. this is where the counting actually happens */
 151 /*void tMPI_Profile_count_stop(struct tmpi_thread *th, enum tmpi_functions fn);*/
 152 #ifdef TMPI_CYCLE_COUNT
 153 #define tMPI_Profile_count_stop(th, fn) \
 154 { \
 155     tmpi_cycles_t stop=tmpi_cycles_read(); \
 156     th->profile.mpifn_cycles[fn] += (stop - th->profile.mpifn_start); \
 157     (th->profile.mpifn_calls[fn])++; \
 158 }
 159 #else
 160 #define tMPI_Profile_count_stop(th, fn) \
 161 { \
 162     (th->profile.mpifn_calls[fn])++; \
 163 }
 164 #endif
 165
 166
 167
 168
 169
 170
 171
 172 /* wait functions */
 173 #ifdef TMPI_CYCLE_COUNT
 174 /* start waiting cycle count */
 175 /*void tMPI_Profile_wait_start(struct tmpi_thread *th);*/
 176 #define tMPI_Profile_wait_start(th) \
 177 {\
 178     th->profile.wait_start=tmpi_cycles_read(); \
 179 }
 180
 181 /* stop waiting cycle count */
 182 /*void tMPI_Profile_wait_stop(struct tmpi_thread *th,
 183                             enum tmpi_wait_functions fn);*/
 184 #define tMPI_Profile_wait_stop(th, fn)\
 185 { \
 186     tmpi_cycles_t wait_stop=tmpi_cycles_read();\
 187     th->profile.wait_cycles[fn] += (wait_stop - th->profile.wait_start);\
 188 }
 189 #else
 190 #define tMPI_Profile_wait_start(th) {}
 191 #define tMPI_Profile_wait_stop(th, fn) {}
 192 #endif
 193
 194
 195 /* count the number of transfers at the receiving end. */
 196 /*void tMPI_Profile_count_buffered_p2p_xfer(struct tmpi_thread *th);
 197 void tMPI_Profile_count_p2p_xfer(struct tmpi_thread *th);
 198 void tMPI_Profile_count_buffered_coll_xfer(struct tmpi_thread *th);
 199 void tMPI_Profile_count_coll_xfer(struct tmpi_thread *th);*/
 200 #define tMPI_Profile_count_buffered_p2p_xfer(th) \
 201 { \
 202     (th->profile.buffered_p2p_xfers)++; \
 203 }
 204
 205 #define tMPI_Profile_count_p2p_xfer(th) \
 206 { \
 207     (th->profile.total_p2p_xfers)++; \
 208 }
 209
 210 #define tMPI_Profile_count_buffered_coll_xfer(th) \
 211 { \
 212     (th->profile.buffered_coll_xfers)++; \
 213 }
 214
 215 #define tMPI_Profile_count_coll_xfer(th) \
 216 { \
 217     (th->profile.total_coll_xfers)++; \
 218 }
 219
 220
 221
 222 /* output functions */
 223 void tMPI_Profiles_summarize(int Nthreads, struct tmpi_thread *threads);
 224
 225 #endif
 226
 227