ext/OGDF/src/energybased/FMEMultipoleKernel.h

   1 /*
   2  * $Revision: 2559 $
   3  *
   4  * last checkin:
   5  *   $Author: gutwenger $
   6  *   $Date: 2012-07-06 15:04:28 +0200 (Fr, 06. Jul 2012) $
   7  ***************************************************************/
   8
   9 /** \file
  10  * \brief Declaration of class FMEMultipoleKernel.
  11  *
  12  * \author Martin Gronemann
  13  *
  14  * \par License:
  15  * This file is part of the Open Graph Drawing Framework (OGDF).
  16  *
  17  * \par
  18  * Copyright (C)<br>
  19  * See README.txt in the root directory of the OGDF installation for details.
  20  *
  21  * \par
  22  * This program is free software; you can redistribute it and/or
  23  * modify it under the terms of the GNU General Public License
  24  * Version 2 or 3 as published by the Free Software Foundation;
  25  * see the file LICENSE.txt included in the packaging of this file
  26  * for details.
  27  *
  28  * \par
  29  * This program is distributed in the hope that it will be useful,
  30  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  31  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  32  * GNU General Public License for more details.
  33  *
  34  * \par
  35  * You should have received a copy of the GNU General Public
  36  * License along with this program; if not, write to the Free
  37  * Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  38  * Boston, MA 02110-1301, USA.
  39  *
  40  * \see  http://www.gnu.org/copyleft/gpl.html
  41  ***************************************************************/
  42
  43 #ifdef _MSC_VER
  44 #pragma once
  45 #endif
  46
  47 #ifndef OGDF_FME_MULTIPOLE_KERNEL_H
  48 #define OGDF_FME_MULTIPOLE_KERNEL_H
  49
  50 #include "FMEKernel.h"
  51 #include "FMEFunc.h"
  52 #include <algorithm>
  53
  54 namespace ogdf {
  55
  56 struct ArrayPartition
  57 {
  58         __uint32 begin;
  59         __uint32 end;
  60
  61         template<typename Func>
  62         void for_loop(Func& func)
  63         {
  64                 for(__uint32 i=begin; i<=end; i++) func(i);
  65         }
  66 };
  67
  68
  69 class FMEMultipoleKernel : public FMEKernel
  70 {
  71 public:
  72         FMEMultipoleKernel(FMEThread* pThread) : FMEKernel(pThread) { }
  73
  74         //! allocate the global and local contexts used by an instance of this kernel
  75         static FMEGlobalContext* allocateContext(ArrayGraph* pGraph, FMEGlobalOptions* pOptions, __uint32 numThreads);
  76
  77         //! free the global and local context
  78         static void deallocateContext(FMEGlobalContext* globalContext);
  79
  80         //! sub procedure for quadtree construction
  81         void quadtreeConstruction(ArrayPartition& nodePointPartition);
  82
  83         //! the single threaded version without fences
  84         void multipoleApproxSingleThreaded(ArrayPartition& nodePointPartition);
  85
  86         //! the original algorithm which runs the WSPD completely single threaded
  87         void multipoleApproxSingleWSPD(ArrayPartition& nodePointPartition);
  88
  89         //! new but slower method, parallel wspd computation without using the wspd structure
  90         void multipoleApproxNoWSPDStructure(ArrayPartition& nodePointPartition);
  91
  92         //! the final version, the wspd structure is only used for the top of the tree
  93         void multipoleApproxFinal(ArrayPartition& nodePointPartition);
  94
  95         //! main function of the kernel
  96         void operator()(FMEGlobalContext* globalContext);
  97
  98         //! creates an array partition with a default chunksize of 16
  99         inline ArrayPartition arrayPartition(__uint32 n)
 100         {
 101                 return arrayPartition(n, threadNr(), numThreads(), 16);
 102         }
 103
 104         //! returns an array partition for the given threadNr and thread count
 105         inline ArrayPartition arrayPartition(__uint32 n, __uint32 threadNr, __uint32 numThreads, __uint32 chunkSize)
 106         {
 107                 ArrayPartition result;
 108                 if (!n)
 109                 {
 110                         result.begin = 1;
 111                         result.end = 0;
 112                         return result;
 113                 }
 114                 if (n>=numThreads*chunkSize)
 115                 {
 116                         __uint32 s = n/(numThreads*chunkSize);
 117                         __uint32 o = s*chunkSize*threadNr;
 118                         if (threadNr == numThreads-1)
 119                         {
 120                                 result.begin = o;
 121                                 result.end = n-1;
 122                         }
 123                         else
 124                         {
 125                                 result.begin = o;
 126                                 result.end = o+s*chunkSize-1;
 127                         }
 128                 } else
 129                 {
 130                         if (threadNr == 0)
 131                         {
 132                                 result.begin = 0;
 133                                 result.end = n-1;
 134                         } else
 135                         {
 136                                 result.begin = 1;
 137                                 result.end = 0;
 138                         }
 139                 }
 140                 return result;
 141         }
 142
 143         //! for loop on a partition
 144         template<typename F>
 145         inline void for_loop(const ArrayPartition& partition, F func)
 146         {
 147                 if (partition.begin > partition.end)
 148                         return;
 149                 for (__uint32 i=partition.begin; i<= partition.end; i++) func(i);
 150         }
 151
 152         //! for loop on the tree partition
 153         template<typename F>
 154         inline void for_tree_partition(F functor)
 155         {
 156                 for (std::list<__uint32>::const_iterator it = m_pLocalContext->treePartition.nodes.begin();
 157                         it!=m_pLocalContext->treePartition.nodes.end(); it++)
 158                 {
 159                         __uint32 node = *it;
 160                         functor(node);
 161                 }
 162         }
 163
 164         //! sorting single threaded
 165         template<typename T, typename C>
 166         inline void sort_single(T* ptr, __uint32 n, C comparer)
 167         {
 168                 if (isMainThread())
 169                 {
 170                         std::sort(ptr, ptr + n, comparer);
 171                 }
 172         }
 173
 174         //! lazy parallel sorting for num_threads = power of two
 175         template<typename T, typename C>
 176         inline void sort_parallel(T* ptr, __uint32 n, C comparer)
 177         {
 178                 if ((n < numThreads()*1000) || (numThreads() == 1))
 179                         sort_single(ptr, n, comparer);
 180                 else
 181                         sort_parallel(ptr, n, comparer, 0, numThreads());
 182         }
 183
 184         //! lazy parallel sorting for num_threads = power of two
 185         template<typename T, typename C>
 186         inline void sort_parallel(T* ptr, __uint32 n, C comparer, __uint32 threadNrBegin, __uint32 numThreads)
 187         {
 188                 if (n <= 1) return;
 189                 if (numThreads == 1)
 190                 {
 191                         std::sort(ptr, ptr + n, comparer);
 192                 }
 193                 else
 194                 {
 195                         __uint32 half = n >> 1;
 196                         __uint32 halfThreads = numThreads >> 1;
 197                         if (this->threadNr() < threadNrBegin + halfThreads)
 198                                 sort_parallel(ptr, half, comparer, threadNrBegin, halfThreads);
 199                         else
 200                                 sort_parallel(ptr + half, n - half, comparer, threadNrBegin + halfThreads, halfThreads);
 201
 202                         // wait until all threads are ready.
 203                         sync();
 204                         if (this->threadNr() == threadNrBegin)
 205                                 std::inplace_merge(ptr, ptr + half, ptr + n, comparer);
 206                 }
 207         }
 208
 209 private:
 210         FMEGlobalContext* m_pGlobalContext;
 211         FMELocalContext* m_pLocalContext;
 212 };
 213
 214 };
 215
 216 #endif