Don't import ogdf namespace
[TortoiseGit.git] / ext / OGDF / src / energybased / FMEMultipoleKernel.cpp
blobb9978df3f858073f79303466757ba1749bba9a25
1 /*
2 * $Revision: 2552 $
4 * last checkin:
5 * $Author: gutwenger $
6 * $Date: 2012-07-05 16:45:20 +0200 (Do, 05. Jul 2012) $
7 ***************************************************************/
9 /** \file
10 * \brief Implementation of class FMEMultipoleKernel.
12 * \author Martin Gronemann
14 * \par License:
15 * This file is part of the Open Graph Drawing Framework (OGDF).
17 * \par
18 * Copyright (C)<br>
19 * See README.txt in the root directory of the OGDF installation for details.
21 * \par
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * Version 2 or 3 as published by the Free Software Foundation;
25 * see the file LICENSE.txt included in the packaging of this file
26 * for details.
28 * \par
29 * This program is distributed in the hope that it will be useful,
30 * but WITHOUT ANY WARRANTY; without even the implied warranty of
31 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
32 * GNU General Public License for more details.
34 * \par
35 * You should have received a copy of the GNU General Public
36 * License along with this program; if not, write to the Free
37 * Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
38 * Boston, MA 02110-1301, USA.
40 * \see http://www.gnu.org/copyleft/gpl.html
41 ***************************************************************/
43 #include "FMEMultipoleKernel.h"
44 #include "ArrayGraph.h"
45 #include "LinearQuadtree.h"
46 #include "LinearQuadtreeBuilder.h"
47 #include "LinearQuadtreeExpansion.h"
48 #include "WSPD.h"
49 #include <algorithm>
51 namespace ogdf {
53 void FMEMultipoleKernel::quadtreeConstruction(ArrayPartition& pointPartition)
55 FMELocalContext* localContext = m_pLocalContext;
56 FMEGlobalContext* globalContext = m_pGlobalContext;
57 LinearQuadtree& tree = *globalContext->pQuadtree;
59 // precompute the bounding box for the quadtree points from the graph nodes
60 for_loop(pointPartition, min_max_x_function(localContext));
61 for_loop(pointPartition, min_max_y_function(localContext));
63 // wait until the thread's bounding box is computed
64 sync();
66 // let the main thread computed the bounding box of the bounding boxes
67 if (isMainThread())
69 globalContext->min_x = globalContext->pLocalContext[0]->min_x;
70 globalContext->min_y = globalContext->pLocalContext[0]->min_y;
71 globalContext->max_x = globalContext->pLocalContext[0]->max_x;
72 globalContext->max_y = globalContext->pLocalContext[0]->max_y;
73 for (__uint32 j=1; j < numThreads(); j++)
75 globalContext->min_x = min(globalContext->min_x, globalContext->pLocalContext[j]->min_x);
76 globalContext->min_y = min(globalContext->min_y, globalContext->pLocalContext[j]->min_y);
77 globalContext->max_x = max(globalContext->max_x, globalContext->pLocalContext[j]->max_x);
78 globalContext->max_y = max(globalContext->max_y, globalContext->pLocalContext[j]->max_y);
80 tree.init(globalContext->min_x, globalContext->min_y, globalContext->max_x, globalContext->max_y);
81 globalContext->coolDown *= 0.999f;
82 tree.clear();
84 // wait because the morton number computation needs the bounding box
85 sync();
86 // udpate morton number to prepare them for sorting
87 for_loop(pointPartition, LQMortonFunctor(localContext));
88 // wait so we can sort them by morton number
89 sync();
91 #ifdef OGDF_FME_PARALLEL_QUADTREE_SORT
92 // use a simple parallel sorting algorithm
93 LinearQuadtree::LQPoint* points = tree.pointArray();
94 sort_parallel(points, tree.numberOfPoints(), LQPointComparer);
95 #else
96 if (isMainThread())
98 LinearQuadtree::LQPoint* points = tree.pointArray();
99 sort_single(points, tree.numberOfPoints(), LQPointComparer);
101 #endif
102 // wait because the quadtree builder needs the sorted order
103 sync();
104 // if not a parallel run, we can do the easy way
105 if (isSingleThreaded())
107 LinearQuadtreeBuilder builder(tree);
108 // prepare the tree
109 builder.prepareTree();
110 // and link it
111 builder.build();
112 LQPartitioner partitioner( localContext );
113 partitioner.partition();
114 } else // the more difficult part
116 // snap the left point of the interval of the thread to the first in the cell
117 LinearQuadtree::PointID beginPoint = tree.findFirstPointInCell(pointPartition.begin);
118 LinearQuadtree::PointID endPoint_plus_one;
119 // if this thread is the last one, no snapping required for the right point
120 if (threadNr()==numThreads()-1)
121 endPoint_plus_one = tree.numberOfPoints();
122 else // find the left point of the next thread
123 endPoint_plus_one = tree.findFirstPointInCell(pointPartition.end+1);
125 // now we can prepare the snapped interval
126 LinearQuadtreeBuilder builder(tree);
127 // this function prepares the tree from begin point to endPoint_plus_one-1 (EXCLUDING endPoint_plus_one)
128 builder.prepareTree(beginPoint, endPoint_plus_one);
129 // save the start, end and count of the inner node chain in the context
130 localContext->firstInnerNode = builder.firstInner;
131 localContext->lastInnerNode = builder.lastInner;
132 localContext->numInnerNodes = builder.numInnerNodes;
133 // save the start, end and count of the leaf node chain in the context
134 localContext->firstLeaf = builder.firstLeaf;
135 localContext->lastLeaf = builder.lastLeaf;
136 localContext->numLeaves = builder.numLeaves;
137 // wait until all are finished
138 sync();
140 // now the main thread has to link the tree
141 if (isMainThread())
143 // with his own builder
144 LinearQuadtreeBuilder sbuilder(tree);
145 // first we need the complete chain data
146 sbuilder.firstInner = globalContext->pLocalContext[0]->firstInnerNode;
147 sbuilder.firstLeaf = globalContext->pLocalContext[0]->firstLeaf;
148 sbuilder.numInnerNodes = globalContext->pLocalContext[0]->numInnerNodes;
149 sbuilder.numLeaves = globalContext->pLocalContext[0]->numLeaves;
150 for (__uint32 j=1; j < numThreads(); j++)
152 sbuilder.numLeaves += globalContext->pLocalContext[j]->numLeaves;
153 sbuilder.numInnerNodes += globalContext->pLocalContext[j]->numInnerNodes;
155 sbuilder.lastInner = globalContext->pLocalContext[numThreads()-1]->lastInnerNode;
156 sbuilder.lastLeaf = globalContext->pLocalContext[numThreads()-1]->lastLeaf;
157 // Link the tree
158 sbuilder.build();
159 // and run the partitions
160 LQPartitioner partitioner(localContext);
161 partitioner.partition();
164 // wait for tree to finish
165 sync();
166 // now update the copy of the point data
167 for_loop(pointPartition, LQPointUpdateFunctor(localContext));
168 // compute the nodes coordinates and sizes
169 tree.forall_tree_nodes(LQCoordsFunctor(localContext), localContext->innerNodePartition.begin, localContext->innerNodePartition.numNodes)();
170 tree.forall_tree_nodes(LQCoordsFunctor(localContext), localContext->leafPartition.begin, localContext->leafPartition.numNodes)();
177 void FMEMultipoleKernel::multipoleApproxSingleThreaded(ArrayPartition& nodePointPartition)
179 FMELocalContext* localContext = m_pLocalContext;
180 FMEGlobalContext* globalContext = m_pGlobalContext;
181 LinearQuadtree& tree = *globalContext->pQuadtree;
182 if (isMainThread())
184 tree.bottom_up_traversal( // do a bottom up traversal M2M pass
185 if_then_else(tree.is_leaf_condition(), // if the current node is a leaf
186 p2m_function(localContext), // then calculate the multipole coeff. due to the points in the leaf
187 m2m_function(localContext) // else shift the coefficents of all children to center of the inner node
189 )(tree.root());
191 tree.forall_well_separated_pairs( // do a wspd traversal M2L direct eval
192 pair_vice_versa(m2l_function(localContext)),// M2L for a well-separated pair
193 p2p_function(localContext), // direct evaluation
194 p2p_function(localContext) // direct evaluation
195 )(tree.root());
197 tree.top_down_traversal( // top down traversal
198 if_then_else( tree.is_leaf_condition(), // if the node is a leaf
199 do_nothing(), // then do nothing, we will deal with this case later
200 l2l_function(localContext) // else shift the nodes local coeffs to the children
202 )(tree.root());// start at the root
204 // evaluate all leaves and store the forces in the threads array
205 for_loop(nodePointPartition, // loop over points
206 func_comp( // composition of two statements
207 l2p_function(localContext), // evaluate the forces due to the local expansion in the corresponding leaf
208 collect_force_function // collect the forces of all threads with the following options:
210 COLLECT_REPULSIVE_FACTOR | // multiply by the repulsive factor stored in the global options
211 COLLECT_TREE_2_GRAPH_ORDER | // threads data is stored in quadtree leaf order, transform it into graph order
212 COLLECT_ZERO_THREAD_ARRAY // reset threads array
213 >(localContext)
219 //! the original algorithm which runs the WSPD completely single threaded
220 void FMEMultipoleKernel::multipoleApproxSingleWSPD(ArrayPartition& nodePointPartition)
222 FMELocalContext* localContext = m_pLocalContext;
223 FMEGlobalContext* globalContext = m_pGlobalContext;
224 LinearQuadtree& tree = *globalContext->pQuadtree;
226 // let the main thread run the WSPD
227 if (isMainThread())
229 // The Well-separated pairs decomposition
230 tree.forall_well_separated_pairs( // do a wspd traversal
231 tree.StoreWSPairFunction(), // store the ws pairs in the WSPD
232 tree.StoreDirectPairFunction(), // store the direct pairs
233 tree.StoreDirectNodeFunction() // store the direct nodes
234 )(tree.root()); // start at the root
237 // Note: dont wait for the WSPD to finish. We dont need it yet for
238 // the big multihreaded bottom up traversal.
239 for_tree_partition( // for all roots in the threads tree partition
240 tree.bottom_up_traversal( // do a bottom up traversal
241 if_then_else(tree.is_leaf_condition(), // if the current node is a leaf
242 p2m_function(localContext), // then calculate the multipole coeff. due to the points in the leaf
243 m2m_function(localContext) // else shift the coefficents of all children to center of the inner node
247 sync();
248 // top of the tree has to be done by the main thread
249 if (isMainThread())
251 tree.bottom_up_traversal( // start a bottom up traversal
252 if_then_else(tree.is_leaf_condition(), // if the current node is a leaf
253 p2m_function(localContext), // then calculate the multipole coeff. due to the points in the leaf
254 m2m_function(localContext) // else shift the coefficents of all children to center of the inner node
256 not_condition(tree.is_fence_condition()) // stop when the fence to the threads is reached
257 )(tree.root());// start at the root
259 // wait until all nodes in the quadtree have their mulipole coeff
260 sync();
261 // M2L pass with the WSPD
262 tree.forall_tree_nodes(M2LFunctor(localContext), localContext->innerNodePartition.begin, localContext->innerNodePartition.numNodes)();
263 tree.forall_tree_nodes(M2LFunctor(localContext), localContext->leafPartition.begin, localContext->leafPartition.numNodes)();
264 // D2D pass and store in the thread force array
265 for_loop(arrayPartition(tree.numberOfDirectPairs()), D2DFunctor(localContext));
266 // D pass and store in the thread force array
267 for_loop(arrayPartition(tree.numberOfDirectNodes()), NDFunctor(localContext));
268 // wait until all local coeffs and all direct forces are computed
269 sync();
270 // big multihreaded top down traversal. top of the tree has to be done by the main thread
271 if (isMainThread())
273 tree.top_down_traversal( // top down traversal
274 if_then_else( tree.is_leaf_condition(), // if the node is a leaf
275 do_nothing(), // then do nothing, we will deal with L2P pass later
276 l2l_function(localContext) // else shift the nodes local coeffs to the children
278 not_condition(tree.is_fence_condition()) //stop when the fence to the threads is reached
279 )(tree.root());// start at the root
281 sync();
282 // the rest is done by the threads
283 for_tree_partition( // for all roots in the threads tree partition
284 tree.top_down_traversal( // do a top down traversal
285 if_then_else( tree.is_leaf_condition(), // if the node is a leaf
286 do_nothing(), // then do nothing, we will deal with L2P pass later
287 l2l_function(localContext) // else shift the nodes local coeffs to the children
291 // wait until the traversal is finished and all leaves have their accumulated local coeffs
292 sync();
293 // evaluate all leaves and store the forces in the threads array (Note: we can store them in the global array but then we have to use random access)
294 // we can start immediately to collect the forces because we evaluated before point by point
295 for_loop(nodePointPartition, // loop over threads points
296 func_comp( // composition of two statements
297 l2p_function(localContext), // evaluate the forces due to the local expansion in the corresponding leaf
298 collect_force_function // collect the forces of all threads with the following options:
300 COLLECT_REPULSIVE_FACTOR | // multiply by the repulsive factor stored in the global options
301 COLLECT_TREE_2_GRAPH_ORDER | // threads data is stored in quadtree leaf order, transform it into graph order
302 COLLECT_ZERO_THREAD_ARRAY // reset threads array to zero
303 >(localContext)
309 //! new but slower method, parallel wspd computation without using the wspd structure
310 void FMEMultipoleKernel::multipoleApproxNoWSPDStructure(ArrayPartition& nodePointPartition)
312 FMELocalContext* localContext = m_pLocalContext;
313 FMEGlobalContext* globalContext = m_pGlobalContext;
314 LinearQuadtree& tree = *globalContext->pQuadtree;
315 // big multihreaded bottom up traversal.
316 for_tree_partition( // for all roots in the threads tree partition
317 tree.bottom_up_traversal( // do a bottom up traversal
318 if_then_else(tree.is_leaf_condition(), // if the current node is a leaf
319 p2m_function(localContext), // then calculate the multipole coeff. due to the points in the leaf
320 m2m_function(localContext) // else shift the coefficents of all children to center of the inner node
324 sync();
326 // top of the tree has to be done by the main thread
327 if (isMainThread())
329 tree.bottom_up_traversal( // start a bottom up traversal
330 if_then_else(tree.is_leaf_condition(), // if the current node is a leaf
331 p2m_function(localContext), // then calculate the multipole coeff. due to the points in the leaf
332 m2m_function(localContext) // else shift the coefficents of all children to center of the inner node
334 not_condition(tree.is_fence_condition()))(tree.root());// start at the root, stop when the fence to the threads is reached
336 tree.forall_well_separated_pairs( // do a wspd traversal
337 pair_vice_versa(m2l_function(localContext)), // M2L for a well-separated pair
338 p2p_function(localContext), // direct evaluation
339 p2p_function(localContext), // direct evaluation
340 not_condition(tree.is_fence_condition()))(tree.root());
342 // wait until all local coeffs and all direct forces are computed
343 sync();
345 // now a wspd traversal for the roots in the tree partition
346 for_tree_partition(
347 tree.forall_well_separated_pairs( // do a wspd traversal
348 pair_vice_versa(m2l_function(localContext)), // M2L for a well-separated pair
349 p2p_function(localContext), // direct evaluation
350 p2p_function(localContext) // direct evaluation
353 // wait until all local coeffs and all direct forces are computed
354 sync();
356 // big multihreaded top down traversal. Top of the tree has to be done by the main thread
357 if (isMainThread())
359 tree.top_down_traversal( // top down traversal
360 if_then_else( tree.is_leaf_condition(), // if the node is a leaf
361 do_nothing(), // then do nothing, we will deal with this case later
362 l2l_function(localContext) // else shift the nodes local coeffs to the children
364 not_condition(tree.is_fence_condition()) //stop when the fence to the threads is reached
365 )(tree.root()); // start at the root
367 // wait until the traversal is finished and all roots of the threads have their coefficients
368 sync();
370 for_tree_partition( // for all roots in the threads tree partition
371 tree.top_down_traversal( // do a top down traversal
372 if_then_else( tree.is_leaf_condition(), // if the node is a leaf
373 do_nothing(), // then do nothing, we will deal with this case later
374 l2l_function(localContext) // else shift the nodes local coeffs to the children
378 // wait until the traversal is finished and all leaves have their accumulated local coeffs
379 sync();
380 // evaluate all leaves and store the forces in the threads array (Note we can store them in the global array but then we have to use random access)
381 // we can start immediately to collect the forces because we evaluated before point by point
382 for_loop(nodePointPartition, // loop over threads points
383 func_comp( // composition of two statements
384 l2p_function(localContext), // evaluate the forces due to the local expansion in the corresponding leaf
385 collect_force_function // collect the forces of all threads with the following options:
387 COLLECT_REPULSIVE_FACTOR | // multiply by the repulsive factor stored in the global options
388 COLLECT_TREE_2_GRAPH_ORDER | // threads data is stored in quadtree leaf order, transform it into graph order
389 COLLECT_ZERO_THREAD_ARRAY // reset threads array
390 >(localContext)
396 //! the final approximation algorithm which runs the wspd parallel without storing it in the threads subtrees
397 void FMEMultipoleKernel::multipoleApproxFinal(ArrayPartition& nodePointPartition)
399 FMELocalContext* localContext = m_pLocalContext;
400 FMEGlobalContext* globalContext = m_pGlobalContext;
401 LinearQuadtree& tree = *globalContext->pQuadtree;
402 // big multihreaded bottom up traversal.
403 for_tree_partition( // for all roots in the threads tree partition
404 tree.bottom_up_traversal( // do a bottom up traversal
405 if_then_else(tree.is_leaf_condition(), // if the current node is a leaf
406 p2m_function(localContext), // then calculate the multipole coeff. due to the points in the leaf
407 m2m_function(localContext) // else shift the coefficents of all children to center of the inner node
411 sync();
412 // top of the tree has to be done by the main thread
413 if (isMainThread())
415 tree.bottom_up_traversal( // start a bottom up traversal
416 if_then_else(tree.is_leaf_condition(), // if the current node is a leaf
417 p2m_function(localContext), // then calculate the multipole coeff. due to the points in the leaf
418 m2m_function(localContext) // else shift the coefficents of all children to center of the inner node
420 not_condition(tree.is_fence_condition()))(tree.root());// start at the root, stop when the fence to the threads is reached
422 tree.forall_well_separated_pairs( // do a wspd traversal
423 tree.StoreWSPairFunction(), // store the ws pairs in the WSPD
424 tree.StoreDirectPairFunction(), // store the direct pairs
425 tree.StoreDirectNodeFunction(), // store the direct nodes
426 not_condition(tree.is_fence_condition()))(tree.root());
428 // wait for the main thread to finish
429 sync();
431 // M2L pass with the WSPD for the result of the single threaded pass above
432 tree.forall_tree_nodes(M2LFunctor(localContext), localContext->innerNodePartition.begin, localContext->innerNodePartition.numNodes)();
433 tree.forall_tree_nodes(M2LFunctor(localContext), localContext->leafPartition.begin, localContext->leafPartition.numNodes)();
435 // D2D pass and store in the thread force array
436 for_loop(arrayPartition(tree.numberOfDirectPairs()), D2DFunctor(localContext));
437 for_loop(arrayPartition(tree.numberOfDirectNodes()), NDFunctor(localContext));
439 // wait until all local coeffs and all direct forces are computed
440 sync();
442 // the rest of the WSPD can be done on the fly by the thread
443 for_tree_partition(
444 tree.forall_well_separated_pairs( // do a wspd traversal
445 pair_vice_versa(m2l_function(localContext)), // M2L for a well-separated pair
446 p2p_function(localContext), // direct evaluation
447 p2p_function(localContext) // direct evaluation
450 // wait until all local coeffs and all direct forces are computed
451 sync();
453 // big multihreaded top down traversal. top of the tree has to be done by the main thread
454 if (isMainThread())
456 tree.top_down_traversal( // top down traversal L2L pass
457 if_then_else( tree.is_leaf_condition(), // if the node is a leaf
458 do_nothing(), // then do nothing, we will deal with this case later
459 l2l_function(localContext) // else shift the nodes local coeffs to the children
461 not_condition(tree.is_fence_condition()) // stop when the fence to the threads is reached
462 )(tree.root()); // start at the root,
464 // wait for the top of the tree
465 sync();
467 for_tree_partition( // for all roots in the threads tree partition L2L pass
468 tree.top_down_traversal( // do a top down traversal
469 if_then_else( tree.is_leaf_condition(), // if the node is a leaf
470 do_nothing(), // then do nothing, we will deal with this case later
471 l2l_function(localContext) // else shift the nodes local coeffs to the children
475 // wait until the traversal is finished and all leaves have their accumulated local coeffs
476 sync();
477 // evaluate all leaves and store the forces in the threads array (Note we can store them in the global array but then we have to use random access)
478 // we can start immediately to collect the forces because we evaluated before point by point
479 for_loop(nodePointPartition, // loop over threads points
480 func_comp( // composition of two statements
481 l2p_function(localContext), // evaluate the forces due to the local expansion in the corresponding leaf
482 collect_force_function // collect the forces of all threads with the following options:
484 COLLECT_REPULSIVE_FACTOR | // multiply by the repulsive factor stored in the global options
485 COLLECT_TREE_2_GRAPH_ORDER | // threads data is stored in quadtree leaf order, transform it into graph order
486 COLLECT_ZERO_THREAD_ARRAY // reset threads array
487 >(localContext)
494 void FMEMultipoleKernel::operator()(FMEGlobalContext* globalContext)
496 __uint32 maxNumIterations = globalContext->pOptions->maxNumIterations;
497 __uint32 minNumIterations = globalContext->pOptions->minNumIterations;
498 ArrayGraph& graph = *globalContext->pGraph;
499 LinearQuadtree& tree = *globalContext->pQuadtree;
500 LinearQuadtreeExpansion& treeExp = *globalContext->pExpansion;
501 FMELocalContext* localContext = globalContext->pLocalContext[threadNr()];
502 FMEGlobalOptions* options = globalContext->pOptions;
503 float* threadsForceArrayX = localContext->forceX;
504 float* threadsForceArrayY = localContext->forceY;
505 float* globalForceArrayX = globalContext->globalForceX;
506 float* globalForceArrayY = globalContext->globalForceY;
508 ArrayPartition edgePartition = arrayPartition(graph.numEdges());
509 ArrayPartition nodePointPartition = arrayPartition(graph.numNodes());
511 m_pLocalContext = localContext;
512 m_pGlobalContext = globalContext;
513 /****************************/
514 /* INIT */
515 /****************************/
516 //! reset the global force array
517 for_loop_array_set(threadNr(), numThreads(), globalForceArrayX, tree.numberOfPoints(), 0.0f);
518 for_loop_array_set(threadNr(), numThreads(), globalForceArrayY, tree.numberOfPoints(), 0.0f);
520 // reset the threads force array
521 for (__uint32 i = 0; i < tree.numberOfPoints(); i++)
523 threadsForceArrayX[i] = 0.0f;
524 threadsForceArrayY[i] = 0.0f;
527 __uint32 maxNumIt = options->preProcMaxNumIterations;
528 for (__uint32 currNumIteration = 0; ((currNumIteration < maxNumIt) ); currNumIteration++)
530 // iterate over all edges and store the resulting forces in the threads array
531 for_loop(edgePartition,
532 edge_force_function< EDGE_FORCE_DIV_DEGREE > (localContext) // divide the forces by degree of the node to avoid oscilation
534 // wait until all edges are done
535 sync();
536 // now collect the forces in parallel and put the sum into the global array and move the nodes accordingly
537 for_loop(nodePointPartition,
538 func_comp(
539 collect_force_function<COLLECT_EDGE_FACTOR_PREP | COLLECT_ZERO_THREAD_ARRAY >(localContext),
540 node_move_function<TIME_STEP_PREP | ZERO_GLOBAL_ARRAY>(localContext)
544 if (isMainThread())
546 globalContext->coolDown = 1.0f;
548 sync();
550 for (__uint32 currNumIteration = 0; ((currNumIteration < maxNumIterations) && !globalContext->earlyExit); currNumIteration++)
552 // reset the coefficients
553 for_loop_array_set(threadNr(), numThreads(), treeExp.m_multiExp, treeExp.m_numExp*(treeExp.m_numCoeff << 1), 0.0);
554 for_loop_array_set(threadNr(), numThreads(), treeExp.m_localExp, treeExp.m_numExp*(treeExp.m_numCoeff << 1), 0.0);
556 localContext->maxForceSq = 0.0;
557 localContext->avgForce = 0.0;
559 // construct the quadtree
560 quadtreeConstruction(nodePointPartition);
561 // wait for all threads to finish
562 sync();
564 if (isSingleThreaded()) // if is single threaded run the simple approximation
565 multipoleApproxSingleThreaded(nodePointPartition);
566 else // otherwise use the partitioning
567 multipoleApproxFinal(nodePointPartition);
568 // now wait until all forces are summed up in the global array and mapped to graph node order
569 sync();
571 // run the edge forces
572 for_loop(edgePartition, // iterate over all edges and sum up the forces in the threads array
573 edge_force_function< EDGE_FORCE_DIV_DEGREE >(localContext) // divide the forces by degree of the node to avoid oscilation
575 // wait until edges are finished
576 sync();
578 // collect the edge forces and move nodes without waiting
579 for_loop(nodePointPartition,
580 func_comp(
581 collect_force_function<COLLECT_EDGE_FACTOR | COLLECT_ZERO_THREAD_ARRAY>(localContext),
582 node_move_function<TIME_STEP_NORMAL | ZERO_GLOBAL_ARRAY>(localContext)
585 // wait so we can decide if we need another iteration
586 sync();
587 // check the max force square for all threads
588 if (isMainThread())
590 double maxForceSq = 0.0;
591 for (__uint32 j=0; j < numThreads(); j++)
592 maxForceSq = max(globalContext->pLocalContext[j]->maxForceSq, maxForceSq);
594 // if we are allowed to quit and the max force sq falls under the threshold tell all threads we are done
595 if ((currNumIteration >= minNumIterations) && (maxForceSq < globalContext->pOptions->stopCritForce ))
597 globalContext->earlyExit = true;
600 // this is required to wait for the earlyExit result
601 sync();
605 //! allcates the global context and for all threads a local context
606 FMEGlobalContext* FMEMultipoleKernel::allocateContext(ArrayGraph* pGraph, FMEGlobalOptions* pOptions, __uint32 numThreads)
608 FMEGlobalContext* globalContext = new FMEGlobalContext();
610 globalContext->numThreads = numThreads;
611 globalContext->pOptions = pOptions;
612 globalContext->pGraph = pGraph;
613 globalContext->pQuadtree = new LinearQuadtree(pGraph->numNodes(), pGraph->nodeXPos(), pGraph->nodeYPos(), pGraph->nodeSize());
614 globalContext->pWSPD = globalContext->pQuadtree->wspd();
615 globalContext->pExpansion = new LinearQuadtreeExpansion(globalContext->pOptions->multipolePrecision, (*globalContext->pQuadtree));
616 __uint32 numPoints = globalContext->pQuadtree->numberOfPoints();
617 typedef FMELocalContext* FMELocalContextPtr;
619 globalContext->pLocalContext = new FMELocalContextPtr[numThreads];
620 globalContext->globalForceX = (float*)MALLOC_16(sizeof(float)*numPoints);
621 globalContext->globalForceY = (float*)MALLOC_16(sizeof(float)*numPoints);
622 for (__uint32 i=0; i < numThreads; i++)
624 globalContext->pLocalContext[i] = new FMELocalContext;
625 globalContext->pLocalContext[i]->forceX = (float*)MALLOC_16(sizeof(float)*numPoints);
626 globalContext->pLocalContext[i]->forceY = (float*)MALLOC_16(sizeof(float)*numPoints);
627 globalContext->pLocalContext[i]->pGlobalContext = globalContext;
629 return globalContext;
632 //! frees the memory for all contexts
633 void FMEMultipoleKernel::deallocateContext(FMEGlobalContext* globalContext)
635 __uint32 numThreads = globalContext->numThreads;
636 for (__uint32 i=0; i < numThreads; i++)
638 FREE_16(globalContext->pLocalContext[i]->forceX);
639 FREE_16(globalContext->pLocalContext[i]->forceY);
640 delete globalContext->pLocalContext[i];
642 FREE_16(globalContext->globalForceX);
643 FREE_16(globalContext->globalForceY);
644 delete[] globalContext->pLocalContext;
645 delete globalContext->pExpansion;
646 delete globalContext->pQuadtree;
647 delete globalContext;