6 * $Date: 2012-07-06 15:04:28 +0200 (Fr, 06. Jul 2012) $
7 ***************************************************************/
10 * \brief Declaration of class FMEMultipoleKernel.
12 * \author Martin Gronemann
15 * This file is part of the Open Graph Drawing Framework (OGDF).
19 * See README.txt in the root directory of the OGDF installation for details.
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * Version 2 or 3 as published by the Free Software Foundation;
25 * see the file LICENSE.txt included in the packaging of this file
29 * This program is distributed in the hope that it will be useful,
30 * but WITHOUT ANY WARRANTY; without even the implied warranty of
31 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
32 * GNU General Public License for more details.
35 * You should have received a copy of the GNU General Public
36 * License along with this program; if not, write to the Free
37 * Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
38 * Boston, MA 02110-1301, USA.
40 * \see http://www.gnu.org/copyleft/gpl.html
41 ***************************************************************/
47 #ifndef OGDF_FME_MULTIPOLE_KERNEL_H
48 #define OGDF_FME_MULTIPOLE_KERNEL_H
50 #include "FMEKernel.h"
61 template<typename Func
>
62 void for_loop(Func
& func
)
64 for(__uint32 i
=begin
; i
<=end
; i
++) func(i
);
69 class FMEMultipoleKernel
: public FMEKernel
72 FMEMultipoleKernel(FMEThread
* pThread
) : FMEKernel(pThread
) { }
74 //! allocate the global and local contexts used by an instance of this kernel
75 static FMEGlobalContext
* allocateContext(ArrayGraph
* pGraph
, FMEGlobalOptions
* pOptions
, __uint32 numThreads
);
77 //! free the global and local context
78 static void deallocateContext(FMEGlobalContext
* globalContext
);
80 //! sub procedure for quadtree construction
81 void quadtreeConstruction(ArrayPartition
& nodePointPartition
);
83 //! the single threaded version without fences
84 void multipoleApproxSingleThreaded(ArrayPartition
& nodePointPartition
);
86 //! the original algorithm which runs the WSPD completely single threaded
87 void multipoleApproxSingleWSPD(ArrayPartition
& nodePointPartition
);
89 //! new but slower method, parallel wspd computation without using the wspd structure
90 void multipoleApproxNoWSPDStructure(ArrayPartition
& nodePointPartition
);
92 //! the final version, the wspd structure is only used for the top of the tree
93 void multipoleApproxFinal(ArrayPartition
& nodePointPartition
);
95 //! main function of the kernel
96 void operator()(FMEGlobalContext
* globalContext
);
98 //! creates an array partition with a default chunksize of 16
99 inline ArrayPartition
arrayPartition(__uint32 n
)
101 return arrayPartition(n
, threadNr(), numThreads(), 16);
104 //! returns an array partition for the given threadNr and thread count
105 inline ArrayPartition
arrayPartition(__uint32 n
, __uint32 threadNr
, __uint32 numThreads
, __uint32 chunkSize
)
107 ArrayPartition result
;
114 if (n
>=numThreads
*chunkSize
)
116 __uint32 s
= n
/(numThreads
*chunkSize
);
117 __uint32 o
= s
*chunkSize
*threadNr
;
118 if (threadNr
== numThreads
-1)
126 result
.end
= o
+s
*chunkSize
-1;
143 //! for loop on a partition
145 inline void for_loop(const ArrayPartition
& partition
, F func
)
147 if (partition
.begin
> partition
.end
)
149 for (__uint32 i
=partition
.begin
; i
<= partition
.end
; i
++) func(i
);
152 //! for loop on the tree partition
154 inline void for_tree_partition(F functor
)
156 for (std::list
<__uint32
>::const_iterator it
= m_pLocalContext
->treePartition
.nodes
.begin();
157 it
!=m_pLocalContext
->treePartition
.nodes
.end(); it
++)
164 //! sorting single threaded
165 template<typename T
, typename C
>
166 inline void sort_single(T
* ptr
, __uint32 n
, C comparer
)
170 std::sort(ptr
, ptr
+ n
, comparer
);
174 //! lazy parallel sorting for num_threads = power of two
175 template<typename T
, typename C
>
176 inline void sort_parallel(T
* ptr
, __uint32 n
, C comparer
)
178 if ((n
< numThreads()*1000) || (numThreads() == 1))
179 sort_single(ptr
, n
, comparer
);
181 sort_parallel(ptr
, n
, comparer
, 0, numThreads());
184 //! lazy parallel sorting for num_threads = power of two
185 template<typename T
, typename C
>
186 inline void sort_parallel(T
* ptr
, __uint32 n
, C comparer
, __uint32 threadNrBegin
, __uint32 numThreads
)
191 std::sort(ptr
, ptr
+ n
, comparer
);
195 __uint32 half
= n
>> 1;
196 __uint32 halfThreads
= numThreads
>> 1;
197 if (this->threadNr() < threadNrBegin
+ halfThreads
)
198 sort_parallel(ptr
, half
, comparer
, threadNrBegin
, halfThreads
);
200 sort_parallel(ptr
+ half
, n
- half
, comparer
, threadNrBegin
+ halfThreads
, halfThreads
);
202 // wait until all threads are ready.
204 if (this->threadNr() == threadNrBegin
)
205 std::inplace_merge(ptr
, ptr
+ half
, ptr
+ n
, comparer
);
210 FMEGlobalContext
* m_pGlobalContext
;
211 FMELocalContext
* m_pLocalContext
;