Bugfix: restored floatTransfer flag
[foam-extend-3.2.git] / src / foam / db / IOstreams / Pstreams / Pstream.C
blobd41e2ac7768d1eb0026d7b7b4c5c9e1593b6e6f5
1 /*---------------------------------------------------------------------------*\
2   =========                 |
3   \\      /  F ield         | foam-extend: Open Source CFD
4    \\    /   O peration     | Version:     3.2
5     \\  /    A nd           | Web:         http://www.foam-extend.org
6      \\/     M anipulation  | For copyright notice see file Copyright
7 -------------------------------------------------------------------------------
8 License
9     This file is part of foam-extend.
11     foam-extend is free software: you can redistribute it and/or modify it
12     under the terms of the GNU General Public License as published by the
13     Free Software Foundation, either version 3 of the License, or (at your
14     option) any later version.
16     foam-extend is distributed in the hope that it will be useful, but
17     WITHOUT ANY WARRANTY; without even the implied warranty of
18     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19     General Public License for more details.
21     You should have received a copy of the GNU General Public License
22     along with foam-extend.  If not, see <http://www.gnu.org/licenses/>.
24 \*---------------------------------------------------------------------------*/
27 #include <cstring>
28 #include <cstdlib>
29 #include <csignal>
31 #include "mpi.h"
33 #include "Pstream.H"
34 #include "PstreamReduceOps.H"
35 #include "debug.H"
36 #include "dictionary.H"
37 #include "OSspecific.H"
39 #if defined(WM_SP)
40 #   define MPI_SCALAR MPI_FLOAT
41 #elif defined(WM_DP)
42 #   define MPI_SCALAR MPI_DOUBLE
43 #elif defined(WM_LDP)
44 #   define MPI_SCALAR MPI_LONG_DOUBLE
45 #endif
48 // * * * * * * * * * * * * * * Static Data Members * * * * * * * * * * * * * //
50 defineTypeNameAndDebug(Foam::Pstream, 0);
52 template<>
53 const char* Foam::NamedEnum<Foam::Pstream::commsTypes, 3>::names[] =
55     "blocking",
56     "scheduled",
57     "nonBlocking"
60 const Foam::NamedEnum<Foam::Pstream::commsTypes, 3>
61     Foam::Pstream::commsTypeNames;
64 // * * * * * * * * * * * * * Private Member Functions  * * * * * * * * * * * //
66 void Foam::Pstream::setParRun()
68     parRun_ = true;
70     Pout.prefix() = '[' +  name(myProcNo()) + "] ";
71     Perr.prefix() = '[' +  name(myProcNo()) + "] ";
75 void Foam::Pstream::calcLinearComm(const label nProcs)
77     linearCommunication_.setSize(nProcs);
79     // Master
80     labelList belowIDs(nProcs - 1);
81     forAll(belowIDs, i)
82     {
83         belowIDs[i] = i + 1;
84     }
86     linearCommunication_[0] = commsStruct
87     (
88         nProcs,
89         0,
90         -1,
91         belowIDs,
92         labelList(0)
93     );
95     // Slaves. Have no below processors, only communicate up to master
96     for (label procID = 1; procID < nProcs; procID++)
97     {
98         linearCommunication_[procID] = commsStruct
99         (
100             nProcs,
101             procID,
102             0,
103             labelList(0),
104             labelList(0)
105         );
106     }
110 // Append my children (and my children children etc.) to allReceives.
111 void Foam::Pstream::collectReceives
113     const label procID,
114     const List<dynamicLabelList >& receives,
115     dynamicLabelList& allReceives
118     const dynamicLabelList& myChildren = receives[procID];
120     forAll(myChildren, childI)
121     {
122         allReceives.append(myChildren[childI]);
123         collectReceives(myChildren[childI], receives, allReceives);
124     }
128 // Tree like schedule. For 8 procs:
129 // (level 0)
130 //      0 receives from 1
131 //      2 receives from 3
132 //      4 receives from 5
133 //      6 receives from 7
134 // (level 1)
135 //      0 receives from 2
136 //      4 receives from 6
137 // (level 2)
138 //      0 receives from 4
140 // The sends/receives for all levels are collected per processor (one send per
141 // processor; multiple receives possible) creating a table:
143 // So per processor:
144 // proc     receives from   sends to
145 // ----     -------------   --------
146 //  0       1,2,4           -
147 //  1       -               0
148 //  2       3               0
149 //  3       -               2
150 //  4       5               0
151 //  5       -               4
152 //  6       7               4
153 //  7       -               6
154 void Foam::Pstream::calcTreeComm(label nProcs)
156     label nLevels = 1;
157     while ((1 << nLevels) < nProcs)
158     {
159         nLevels++;
160     }
162     List<dynamicLabelList > receives(nProcs);
163     labelList sends(nProcs, -1);
165     // Info<< "Using " << nLevels << " communication levels" << endl;
167     label offset = 2;
168     label childOffset = offset/2;
170     for (label level = 0; level < nLevels; level++)
171     {
172         label receiveID = 0;
173         while (receiveID < nProcs)
174         {
175             // Determine processor that sends and we receive from
176             label sendID = receiveID + childOffset;
178             if (sendID < nProcs)
179             {
180                 receives[receiveID].append(sendID);
181                 sends[sendID] = receiveID;
182             }
184             receiveID += offset;
185         }
187         offset <<= 1;
188         childOffset <<= 1;
189     }
191     // For all processors find the processors it receives data from
192     // (and the processors they receive data from etc.)
193     List<dynamicLabelList > allReceives(nProcs);
194     for (label procID = 0; procID < nProcs; procID++)
195     {
196         collectReceives(procID, receives, allReceives[procID]);
197     }
200     treeCommunication_.setSize(nProcs);
202     for (label procID = 0; procID < nProcs; procID++)
203     {
204         treeCommunication_[procID] = commsStruct
205         (
206             nProcs,
207             procID,
208             sends[procID],
209             receives[procID].shrink(),
210             allReceives[procID].shrink()
211         );
212     }
216 // Callback from Pstream::init() : initialize linear and tree communication
217 // schedules now that nProcs is known.
218 void Foam::Pstream::initCommunicationSchedule()
220     calcLinearComm(nProcs());
221     calcTreeComm(nProcs());
225 // NOTE:
226 // valid parallel options vary between implementations, but flag common ones.
227 // if they are not removed by MPI_Init(), the subsequent argument processing
228 // will notice that they are wrong
229 void Foam::Pstream::addValidParOptions(HashTable<string>& validParOptions)
231     validParOptions.insert("np", "");
232     validParOptions.insert("p4pg", "PI file");
233     validParOptions.insert("p4wd", "directory");
234     validParOptions.insert("p4amslave", "");
235     validParOptions.insert("p4yourname", "hostname");
236     validParOptions.insert("GAMMANP", "number of instances");
237     validParOptions.insert("machinefile", "machine file");
241 bool Foam::Pstream::init(int& argc, char**& argv)
243     MPI_Init(&argc, &argv);
245     int numprocs;
246     MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
247     MPI_Comm_rank(MPI_COMM_WORLD, &myProcNo_);
249     if (numprocs <= 1)
250     {
251         FatalErrorIn("Pstream::init(int& argc, char**& argv)")
252             << "bool Pstream::init(int& argc, char**& argv) : "
253                "attempt to run parallel on 1 processor"
254             << Foam::abort(FatalError);
255     }
257     procIDs_.setSize(numprocs);
259     forAll(procIDs_, procNo)
260     {
261         procIDs_[procNo] = procNo;
262     }
264     setParRun();
266 #   ifndef SGIMPI
267     string bufferSizeName = getEnv("MPI_BUFFER_SIZE");
269     if (bufferSizeName.size())
270     {
271         int bufferSize = atoi(bufferSizeName.c_str());
273         if (bufferSize)
274         {
275             MPI_Buffer_attach(new char[bufferSize], bufferSize);
276         }
277     }
278     else
279     {
280         FatalErrorIn("Pstream::init(int& argc, char**& argv)")
281             << "Pstream::init(int& argc, char**& argv) : "
282             << "environment variable MPI_BUFFER_SIZE not defined"
283             << Foam::abort(FatalError);
284     }
285 #   endif
287     int processorNameLen;
288     char processorName[MPI_MAX_PROCESSOR_NAME];
290     MPI_Get_processor_name(processorName, &processorNameLen);
292     //signal(SIGABRT, stop);
294     // Now that nprocs is known construct communication tables.
295     initCommunicationSchedule();
297     return true;
301 void Foam::Pstream::exit(int errnum)
303 #   ifndef SGIMPI
304     int size;
305     char* buff;
306     MPI_Buffer_detach(&buff, &size);
307     delete[] buff;
308 #   endif
310     if (errnum == 0)
311     {
312         MPI_Finalize();
313         ::exit(errnum);
314     }
315     else
316     {
317         MPI_Abort(MPI_COMM_WORLD, errnum);
318     }
322 void Foam::Pstream::abort()
324     MPI_Abort(MPI_COMM_WORLD, 1);
328 void Foam::reduce(scalar& Value, const sumOp<scalar>& bop)
330     if (!Pstream::parRun())
331     {
332         return;
333     }
335     if (Pstream::nProcs() <= Pstream::nProcsSimpleSum())
336     {
337         if (Pstream::master())
338         {
339             for
340             (
341                 int slave=Pstream::firstSlave();
342                 slave<=Pstream::lastSlave();
343                 slave++
344             )
345             {
346                 scalar value;
348                 if
349                 (
350                     MPI_Recv
351                     (
352                         &value,
353                         1,
354                         MPI_SCALAR,
355                         Pstream::procID(slave),
356                         Pstream::msgType(),
357                         MPI_COMM_WORLD,
358                         MPI_STATUS_IGNORE
359                     )
360                 )
361                 {
362                     FatalErrorIn
363                     (
364                         "reduce(scalar& Value, const sumOp<scalar>& sumOp)"
365                     )   << "MPI_Recv failed"
366                         << Foam::abort(FatalError);
367                 }
369                 Value = bop(Value, value);
370             }
371         }
372         else
373         {
374             if
375             (
376                 MPI_Send
377                 (
378                     &Value,
379                     1,
380                     MPI_SCALAR,
381                     Pstream::procID(Pstream::masterNo()),
382                     Pstream::msgType(),
383                     MPI_COMM_WORLD
384                 )
385             )
386             {
387                 FatalErrorIn
388                 (
389                     "reduce(scalar& Value, const sumOp<scalar>& sumOp)"
390                 )   << "MPI_Send failed"
391                     << Foam::abort(FatalError);
392             }
393         }
396         if (Pstream::master())
397         {
398             for
399             (
400                 int slave=Pstream::firstSlave();
401                 slave<=Pstream::lastSlave();
402                 slave++
403             )
404             {
405                 if
406                 (
407                     MPI_Send
408                     (
409                         &Value,
410                         1,
411                         MPI_SCALAR,
412                         Pstream::procID(slave),
413                         Pstream::msgType(),
414                         MPI_COMM_WORLD
415                     )
416                 )
417                 {
418                     FatalErrorIn
419                     (
420                         "reduce(scalar& Value, const sumOp<scalar>& sumOp)"
421                     )   << "MPI_Send failed"
422                         << Foam::abort(FatalError);
423                 }
424             }
425         }
426         else
427         {
428             if
429             (
430                 MPI_Recv
431                 (
432                     &Value,
433                     1,
434                     MPI_SCALAR,
435                     Pstream::procID(Pstream::masterNo()),
436                     Pstream::msgType(),
437                     MPI_COMM_WORLD,
438                     MPI_STATUS_IGNORE
439                 )
440             )
441             {
442                 FatalErrorIn
443                 (
444                     "reduce(scalar& Value, const sumOp<scalar>& sumOp)"
445                 )   << "MPI_Recv failed"
446                     << Foam::abort(FatalError);
447             }
448         }
449     }
450     else
451     {
452         scalar sum;
453         MPI_Allreduce(&Value, &sum, 1, MPI_SCALAR, MPI_SUM, MPI_COMM_WORLD);
454         Value = sum;
456         /*
457         int myProcNo = Pstream::myProcNo();
458         int nProcs = Pstream::nProcs();
460         //
461         // receive from children
462         //
463         int level = 1;
464         int thisLevelOffset = 2;
465         int childLevelOffset = thisLevelOffset/2;
466         int childProcId = 0;
468         while
469         (
470             (childLevelOffset < nProcs)
471          && (myProcNo % thisLevelOffset) == 0
472         )
473         {
474             childProcId = myProcNo + childLevelOffset;
476             scalar value;
478             if (childProcId < nProcs)
479             {
480                 if
481                 (
482                     MPI_Recv
483                     (
484                         &value,
485                         1,
486                         MPI_SCALAR,
487                         Pstream::procID(childProcId),
488                         Pstream::msgType(),
489                         MPI_COMM_WORLD,
490                         MPI_STATUS_IGNORE
491                     )
492                 )
493                 {
494                     FatalErrorIn
495                     (
496                         "reduce(scalar& Value, const sumOp<scalar>& sumOp)"
497                     )   << "MPI_Recv failed"
498                         << Foam::abort(FatalError);
499                 }
501                 Value = bop(Value, value);
502             }
504             level++;
505             thisLevelOffset <<= 1;
506             childLevelOffset = thisLevelOffset/2;
507         }
509         //
510         // send and receive from parent
511         //
512         if (!Pstream::master())
513         {
514             int parentId = myProcNo - (myProcNo % thisLevelOffset);
516             if
517             (
518                 MPI_Send
519                 (
520                     &Value,
521                     1,
522                     MPI_SCALAR,
523                     Pstream::procID(parentId),
524                     Pstream::msgType(),
525                     MPI_COMM_WORLD
526                 )
527             )
528             {
529                 FatalErrorIn
530                 (
531                     "reduce(scalar& Value, const sumOp<scalar>& sumOp)"
532                 )   << "MPI_Send failed"
533                     << Foam::abort(FatalError);
534             }
536             if
537             (
538                 MPI_Recv
539                 (
540                     &Value,
541                     1,
542                     MPI_SCALAR,
543                     Pstream::procID(parentId),
544                     Pstream::msgType(),
545                     MPI_COMM_WORLD,
546                     MPI_STATUS_IGNORE
547                 )
548             )
549             {
550                 FatalErrorIn
551                 (
552                     "reduce(scalar& Value, const sumOp<scalar>& sumOp)"
553                 )   << "MPI_Recv failed"
554                     << Foam::abort(FatalError);
555             }
556         }
559         //
560         // distribute to my children
561         //
562         level--;
563         thisLevelOffset >>= 1;
564         childLevelOffset = thisLevelOffset/2;
566         while (level > 0)
567         {
568             childProcId = myProcNo + childLevelOffset;
570             if (childProcId < nProcs)
571             {
572                 if
573                 (
574                     MPI_Send
575                     (
576                         &Value,
577                         1,
578                         MPI_SCALAR,
579                         Pstream::procID(childProcId),
580                         Pstream::msgType(),
581                         MPI_COMM_WORLD
582                     )
583                 )
584                 {
585                     FatalErrorIn
586                     (
587                         "reduce(scalar& Value, const sumOp<scalar>& sumOp)"
588                     )   << "MPI_Send failed"
589                         << Foam::abort(FatalError);
590                 }
591             }
593             level--;
594             thisLevelOffset >>= 1;
595             childLevelOffset = thisLevelOffset/2;
596         }
597         */
598     }
602 // * * * * * * * * * * * * * * Static Data Members * * * * * * * * * * * * * //
604 // Initialise my process number to 0 (the master)
605 int Foam::Pstream::myProcNo_(0);
608 // By default this is not a parallel run
609 bool Foam::Pstream::parRun_(false);
612 // List of process IDs
613 Foam::List<int> Foam::Pstream::procIDs_(1, 0);
616 // Standard transfer message type
617 const int Foam::Pstream::msgType_(1);
620 // Linear communication schedule
621 Foam::List<Foam::Pstream::commsStruct> Foam::Pstream::linearCommunication_(0);
624 // Multi level communication schedule
625 Foam::List<Foam::Pstream::commsStruct> Foam::Pstream::treeCommunication_(0);
628 // Should compact transfer be used in which floats replace doubles
629 // reducing the bandwidth requirement at the expense of some loss
630 // in accuracy
631 const Foam::debug::optimisationSwitch
632 Foam::Pstream::floatTransfer
634     "floatTransfer",
635     0
639 // Number of processors at which the reduce algorithm changes from linear to
640 // tree
641 const Foam::debug::optimisationSwitch
642 Foam::Pstream::nProcsSimpleSum
644     "nProcsSimpleSum",
645     16
649 const Foam::debug::optimisationSwitch
650 Foam::Pstream::defaultCommsType
652     "commsType",
653 //     "nonBlocking",
654 //     "scheduled",
655     "blocking",
656     "blocking, nonBlocking, scheduled"
660 // ************************************************************************* //