6 * $Date: 2012-07-05 16:45:20 +0200 (Do, 05. Jul 2012) $
7 ***************************************************************/
10 * \brief Implementation of class FMEMultipoleKernel.
12 * \author Martin Gronemann
15 * This file is part of the Open Graph Drawing Framework (OGDF).
19 * See README.txt in the root directory of the OGDF installation for details.
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * Version 2 or 3 as published by the Free Software Foundation;
25 * see the file LICENSE.txt included in the packaging of this file
29 * This program is distributed in the hope that it will be useful,
30 * but WITHOUT ANY WARRANTY; without even the implied warranty of
31 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
32 * GNU General Public License for more details.
35 * You should have received a copy of the GNU General Public
36 * License along with this program; if not, write to the Free
37 * Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
38 * Boston, MA 02110-1301, USA.
40 * \see http://www.gnu.org/copyleft/gpl.html
41 ***************************************************************/
43 #include "FMEMultipoleKernel.h"
44 #include "ArrayGraph.h"
45 #include "LinearQuadtree.h"
46 #include "LinearQuadtreeBuilder.h"
47 #include "LinearQuadtreeExpansion.h"
53 void FMEMultipoleKernel::quadtreeConstruction(ArrayPartition
& pointPartition
)
55 FMELocalContext
* localContext
= m_pLocalContext
;
56 FMEGlobalContext
* globalContext
= m_pGlobalContext
;
57 LinearQuadtree
& tree
= *globalContext
->pQuadtree
;
59 // precompute the bounding box for the quadtree points from the graph nodes
60 for_loop(pointPartition
, min_max_x_function(localContext
));
61 for_loop(pointPartition
, min_max_y_function(localContext
));
63 // wait until the thread's bounding box is computed
66 // let the main thread computed the bounding box of the bounding boxes
69 globalContext
->min_x
= globalContext
->pLocalContext
[0]->min_x
;
70 globalContext
->min_y
= globalContext
->pLocalContext
[0]->min_y
;
71 globalContext
->max_x
= globalContext
->pLocalContext
[0]->max_x
;
72 globalContext
->max_y
= globalContext
->pLocalContext
[0]->max_y
;
73 for (__uint32 j
=1; j
< numThreads(); j
++)
75 globalContext
->min_x
= min(globalContext
->min_x
, globalContext
->pLocalContext
[j
]->min_x
);
76 globalContext
->min_y
= min(globalContext
->min_y
, globalContext
->pLocalContext
[j
]->min_y
);
77 globalContext
->max_x
= max(globalContext
->max_x
, globalContext
->pLocalContext
[j
]->max_x
);
78 globalContext
->max_y
= max(globalContext
->max_y
, globalContext
->pLocalContext
[j
]->max_y
);
80 tree
.init(globalContext
->min_x
, globalContext
->min_y
, globalContext
->max_x
, globalContext
->max_y
);
81 globalContext
->coolDown
*= 0.999f
;
84 // wait because the morton number computation needs the bounding box
86 // udpate morton number to prepare them for sorting
87 for_loop(pointPartition
, LQMortonFunctor(localContext
));
88 // wait so we can sort them by morton number
91 #ifdef OGDF_FME_PARALLEL_QUADTREE_SORT
92 // use a simple parallel sorting algorithm
93 LinearQuadtree::LQPoint
* points
= tree
.pointArray();
94 sort_parallel(points
, tree
.numberOfPoints(), LQPointComparer
);
98 LinearQuadtree::LQPoint
* points
= tree
.pointArray();
99 sort_single(points
, tree
.numberOfPoints(), LQPointComparer
);
102 // wait because the quadtree builder needs the sorted order
104 // if not a parallel run, we can do the easy way
105 if (isSingleThreaded())
107 LinearQuadtreeBuilder
builder(tree
);
109 builder
.prepareTree();
112 LQPartitioner
partitioner( localContext
);
113 partitioner
.partition();
114 } else // the more difficult part
116 // snap the left point of the interval of the thread to the first in the cell
117 LinearQuadtree::PointID beginPoint
= tree
.findFirstPointInCell(pointPartition
.begin
);
118 LinearQuadtree::PointID endPoint_plus_one
;
119 // if this thread is the last one, no snapping required for the right point
120 if (threadNr()==numThreads()-1)
121 endPoint_plus_one
= tree
.numberOfPoints();
122 else // find the left point of the next thread
123 endPoint_plus_one
= tree
.findFirstPointInCell(pointPartition
.end
+1);
125 // now we can prepare the snapped interval
126 LinearQuadtreeBuilder
builder(tree
);
127 // this function prepares the tree from begin point to endPoint_plus_one-1 (EXCLUDING endPoint_plus_one)
128 builder
.prepareTree(beginPoint
, endPoint_plus_one
);
129 // save the start, end and count of the inner node chain in the context
130 localContext
->firstInnerNode
= builder
.firstInner
;
131 localContext
->lastInnerNode
= builder
.lastInner
;
132 localContext
->numInnerNodes
= builder
.numInnerNodes
;
133 // save the start, end and count of the leaf node chain in the context
134 localContext
->firstLeaf
= builder
.firstLeaf
;
135 localContext
->lastLeaf
= builder
.lastLeaf
;
136 localContext
->numLeaves
= builder
.numLeaves
;
137 // wait until all are finished
140 // now the main thread has to link the tree
143 // with his own builder
144 LinearQuadtreeBuilder
sbuilder(tree
);
145 // first we need the complete chain data
146 sbuilder
.firstInner
= globalContext
->pLocalContext
[0]->firstInnerNode
;
147 sbuilder
.firstLeaf
= globalContext
->pLocalContext
[0]->firstLeaf
;
148 sbuilder
.numInnerNodes
= globalContext
->pLocalContext
[0]->numInnerNodes
;
149 sbuilder
.numLeaves
= globalContext
->pLocalContext
[0]->numLeaves
;
150 for (__uint32 j
=1; j
< numThreads(); j
++)
152 sbuilder
.numLeaves
+= globalContext
->pLocalContext
[j
]->numLeaves
;
153 sbuilder
.numInnerNodes
+= globalContext
->pLocalContext
[j
]->numInnerNodes
;
155 sbuilder
.lastInner
= globalContext
->pLocalContext
[numThreads()-1]->lastInnerNode
;
156 sbuilder
.lastLeaf
= globalContext
->pLocalContext
[numThreads()-1]->lastLeaf
;
159 // and run the partitions
160 LQPartitioner
partitioner(localContext
);
161 partitioner
.partition();
164 // wait for tree to finish
166 // now update the copy of the point data
167 for_loop(pointPartition
, LQPointUpdateFunctor(localContext
));
168 // compute the nodes coordinates and sizes
169 tree
.forall_tree_nodes(LQCoordsFunctor(localContext
), localContext
->innerNodePartition
.begin
, localContext
->innerNodePartition
.numNodes
)();
170 tree
.forall_tree_nodes(LQCoordsFunctor(localContext
), localContext
->leafPartition
.begin
, localContext
->leafPartition
.numNodes
)();
177 void FMEMultipoleKernel::multipoleApproxSingleThreaded(ArrayPartition
& nodePointPartition
)
179 FMELocalContext
* localContext
= m_pLocalContext
;
180 FMEGlobalContext
* globalContext
= m_pGlobalContext
;
181 LinearQuadtree
& tree
= *globalContext
->pQuadtree
;
184 tree
.bottom_up_traversal( // do a bottom up traversal M2M pass
185 if_then_else(tree
.is_leaf_condition(), // if the current node is a leaf
186 p2m_function(localContext
), // then calculate the multipole coeff. due to the points in the leaf
187 m2m_function(localContext
) // else shift the coefficents of all children to center of the inner node
191 tree
.forall_well_separated_pairs( // do a wspd traversal M2L direct eval
192 pair_vice_versa(m2l_function(localContext
)),// M2L for a well-separated pair
193 p2p_function(localContext
), // direct evaluation
194 p2p_function(localContext
) // direct evaluation
197 tree
.top_down_traversal( // top down traversal
198 if_then_else( tree
.is_leaf_condition(), // if the node is a leaf
199 do_nothing(), // then do nothing, we will deal with this case later
200 l2l_function(localContext
) // else shift the nodes local coeffs to the children
202 )(tree
.root());// start at the root
204 // evaluate all leaves and store the forces in the threads array
205 for_loop(nodePointPartition
, // loop over points
206 func_comp( // composition of two statements
207 l2p_function(localContext
), // evaluate the forces due to the local expansion in the corresponding leaf
208 collect_force_function
// collect the forces of all threads with the following options:
210 COLLECT_REPULSIVE_FACTOR
| // multiply by the repulsive factor stored in the global options
211 COLLECT_TREE_2_GRAPH_ORDER
| // threads data is stored in quadtree leaf order, transform it into graph order
212 COLLECT_ZERO_THREAD_ARRAY
// reset threads array
219 //! the original algorithm which runs the WSPD completely single threaded
220 void FMEMultipoleKernel::multipoleApproxSingleWSPD(ArrayPartition
& nodePointPartition
)
222 FMELocalContext
* localContext
= m_pLocalContext
;
223 FMEGlobalContext
* globalContext
= m_pGlobalContext
;
224 LinearQuadtree
& tree
= *globalContext
->pQuadtree
;
226 // let the main thread run the WSPD
229 // The Well-separated pairs decomposition
230 tree
.forall_well_separated_pairs( // do a wspd traversal
231 tree
.StoreWSPairFunction(), // store the ws pairs in the WSPD
232 tree
.StoreDirectPairFunction(), // store the direct pairs
233 tree
.StoreDirectNodeFunction() // store the direct nodes
234 )(tree
.root()); // start at the root
237 // Note: dont wait for the WSPD to finish. We dont need it yet for
238 // the big multihreaded bottom up traversal.
239 for_tree_partition( // for all roots in the threads tree partition
240 tree
.bottom_up_traversal( // do a bottom up traversal
241 if_then_else(tree
.is_leaf_condition(), // if the current node is a leaf
242 p2m_function(localContext
), // then calculate the multipole coeff. due to the points in the leaf
243 m2m_function(localContext
) // else shift the coefficents of all children to center of the inner node
248 // top of the tree has to be done by the main thread
251 tree
.bottom_up_traversal( // start a bottom up traversal
252 if_then_else(tree
.is_leaf_condition(), // if the current node is a leaf
253 p2m_function(localContext
), // then calculate the multipole coeff. due to the points in the leaf
254 m2m_function(localContext
) // else shift the coefficents of all children to center of the inner node
256 not_condition(tree
.is_fence_condition()) // stop when the fence to the threads is reached
257 )(tree
.root());// start at the root
259 // wait until all nodes in the quadtree have their mulipole coeff
261 // M2L pass with the WSPD
262 tree
.forall_tree_nodes(M2LFunctor(localContext
), localContext
->innerNodePartition
.begin
, localContext
->innerNodePartition
.numNodes
)();
263 tree
.forall_tree_nodes(M2LFunctor(localContext
), localContext
->leafPartition
.begin
, localContext
->leafPartition
.numNodes
)();
264 // D2D pass and store in the thread force array
265 for_loop(arrayPartition(tree
.numberOfDirectPairs()), D2DFunctor(localContext
));
266 // D pass and store in the thread force array
267 for_loop(arrayPartition(tree
.numberOfDirectNodes()), NDFunctor(localContext
));
268 // wait until all local coeffs and all direct forces are computed
270 // big multihreaded top down traversal. top of the tree has to be done by the main thread
273 tree
.top_down_traversal( // top down traversal
274 if_then_else( tree
.is_leaf_condition(), // if the node is a leaf
275 do_nothing(), // then do nothing, we will deal with L2P pass later
276 l2l_function(localContext
) // else shift the nodes local coeffs to the children
278 not_condition(tree
.is_fence_condition()) //stop when the fence to the threads is reached
279 )(tree
.root());// start at the root
282 // the rest is done by the threads
283 for_tree_partition( // for all roots in the threads tree partition
284 tree
.top_down_traversal( // do a top down traversal
285 if_then_else( tree
.is_leaf_condition(), // if the node is a leaf
286 do_nothing(), // then do nothing, we will deal with L2P pass later
287 l2l_function(localContext
) // else shift the nodes local coeffs to the children
291 // wait until the traversal is finished and all leaves have their accumulated local coeffs
293 // evaluate all leaves and store the forces in the threads array (Note: we can store them in the global array but then we have to use random access)
294 // we can start immediately to collect the forces because we evaluated before point by point
295 for_loop(nodePointPartition
, // loop over threads points
296 func_comp( // composition of two statements
297 l2p_function(localContext
), // evaluate the forces due to the local expansion in the corresponding leaf
298 collect_force_function
// collect the forces of all threads with the following options:
300 COLLECT_REPULSIVE_FACTOR
| // multiply by the repulsive factor stored in the global options
301 COLLECT_TREE_2_GRAPH_ORDER
| // threads data is stored in quadtree leaf order, transform it into graph order
302 COLLECT_ZERO_THREAD_ARRAY
// reset threads array to zero
309 //! new but slower method, parallel wspd computation without using the wspd structure
310 void FMEMultipoleKernel::multipoleApproxNoWSPDStructure(ArrayPartition
& nodePointPartition
)
312 FMELocalContext
* localContext
= m_pLocalContext
;
313 FMEGlobalContext
* globalContext
= m_pGlobalContext
;
314 LinearQuadtree
& tree
= *globalContext
->pQuadtree
;
315 // big multihreaded bottom up traversal.
316 for_tree_partition( // for all roots in the threads tree partition
317 tree
.bottom_up_traversal( // do a bottom up traversal
318 if_then_else(tree
.is_leaf_condition(), // if the current node is a leaf
319 p2m_function(localContext
), // then calculate the multipole coeff. due to the points in the leaf
320 m2m_function(localContext
) // else shift the coefficents of all children to center of the inner node
326 // top of the tree has to be done by the main thread
329 tree
.bottom_up_traversal( // start a bottom up traversal
330 if_then_else(tree
.is_leaf_condition(), // if the current node is a leaf
331 p2m_function(localContext
), // then calculate the multipole coeff. due to the points in the leaf
332 m2m_function(localContext
) // else shift the coefficents of all children to center of the inner node
334 not_condition(tree
.is_fence_condition()))(tree
.root());// start at the root, stop when the fence to the threads is reached
336 tree
.forall_well_separated_pairs( // do a wspd traversal
337 pair_vice_versa(m2l_function(localContext
)), // M2L for a well-separated pair
338 p2p_function(localContext
), // direct evaluation
339 p2p_function(localContext
), // direct evaluation
340 not_condition(tree
.is_fence_condition()))(tree
.root());
342 // wait until all local coeffs and all direct forces are computed
345 // now a wspd traversal for the roots in the tree partition
347 tree
.forall_well_separated_pairs( // do a wspd traversal
348 pair_vice_versa(m2l_function(localContext
)), // M2L for a well-separated pair
349 p2p_function(localContext
), // direct evaluation
350 p2p_function(localContext
) // direct evaluation
353 // wait until all local coeffs and all direct forces are computed
356 // big multihreaded top down traversal. Top of the tree has to be done by the main thread
359 tree
.top_down_traversal( // top down traversal
360 if_then_else( tree
.is_leaf_condition(), // if the node is a leaf
361 do_nothing(), // then do nothing, we will deal with this case later
362 l2l_function(localContext
) // else shift the nodes local coeffs to the children
364 not_condition(tree
.is_fence_condition()) //stop when the fence to the threads is reached
365 )(tree
.root()); // start at the root
367 // wait until the traversal is finished and all roots of the threads have their coefficients
370 for_tree_partition( // for all roots in the threads tree partition
371 tree
.top_down_traversal( // do a top down traversal
372 if_then_else( tree
.is_leaf_condition(), // if the node is a leaf
373 do_nothing(), // then do nothing, we will deal with this case later
374 l2l_function(localContext
) // else shift the nodes local coeffs to the children
378 // wait until the traversal is finished and all leaves have their accumulated local coeffs
380 // evaluate all leaves and store the forces in the threads array (Note we can store them in the global array but then we have to use random access)
381 // we can start immediately to collect the forces because we evaluated before point by point
382 for_loop(nodePointPartition
, // loop over threads points
383 func_comp( // composition of two statements
384 l2p_function(localContext
), // evaluate the forces due to the local expansion in the corresponding leaf
385 collect_force_function
// collect the forces of all threads with the following options:
387 COLLECT_REPULSIVE_FACTOR
| // multiply by the repulsive factor stored in the global options
388 COLLECT_TREE_2_GRAPH_ORDER
| // threads data is stored in quadtree leaf order, transform it into graph order
389 COLLECT_ZERO_THREAD_ARRAY
// reset threads array
396 //! the final approximation algorithm which runs the wspd parallel without storing it in the threads subtrees
397 void FMEMultipoleKernel::multipoleApproxFinal(ArrayPartition
& nodePointPartition
)
399 FMELocalContext
* localContext
= m_pLocalContext
;
400 FMEGlobalContext
* globalContext
= m_pGlobalContext
;
401 LinearQuadtree
& tree
= *globalContext
->pQuadtree
;
402 // big multihreaded bottom up traversal.
403 for_tree_partition( // for all roots in the threads tree partition
404 tree
.bottom_up_traversal( // do a bottom up traversal
405 if_then_else(tree
.is_leaf_condition(), // if the current node is a leaf
406 p2m_function(localContext
), // then calculate the multipole coeff. due to the points in the leaf
407 m2m_function(localContext
) // else shift the coefficents of all children to center of the inner node
412 // top of the tree has to be done by the main thread
415 tree
.bottom_up_traversal( // start a bottom up traversal
416 if_then_else(tree
.is_leaf_condition(), // if the current node is a leaf
417 p2m_function(localContext
), // then calculate the multipole coeff. due to the points in the leaf
418 m2m_function(localContext
) // else shift the coefficents of all children to center of the inner node
420 not_condition(tree
.is_fence_condition()))(tree
.root());// start at the root, stop when the fence to the threads is reached
422 tree
.forall_well_separated_pairs( // do a wspd traversal
423 tree
.StoreWSPairFunction(), // store the ws pairs in the WSPD
424 tree
.StoreDirectPairFunction(), // store the direct pairs
425 tree
.StoreDirectNodeFunction(), // store the direct nodes
426 not_condition(tree
.is_fence_condition()))(tree
.root());
428 // wait for the main thread to finish
431 // M2L pass with the WSPD for the result of the single threaded pass above
432 tree
.forall_tree_nodes(M2LFunctor(localContext
), localContext
->innerNodePartition
.begin
, localContext
->innerNodePartition
.numNodes
)();
433 tree
.forall_tree_nodes(M2LFunctor(localContext
), localContext
->leafPartition
.begin
, localContext
->leafPartition
.numNodes
)();
435 // D2D pass and store in the thread force array
436 for_loop(arrayPartition(tree
.numberOfDirectPairs()), D2DFunctor(localContext
));
437 for_loop(arrayPartition(tree
.numberOfDirectNodes()), NDFunctor(localContext
));
439 // wait until all local coeffs and all direct forces are computed
442 // the rest of the WSPD can be done on the fly by the thread
444 tree
.forall_well_separated_pairs( // do a wspd traversal
445 pair_vice_versa(m2l_function(localContext
)), // M2L for a well-separated pair
446 p2p_function(localContext
), // direct evaluation
447 p2p_function(localContext
) // direct evaluation
450 // wait until all local coeffs and all direct forces are computed
453 // big multihreaded top down traversal. top of the tree has to be done by the main thread
456 tree
.top_down_traversal( // top down traversal L2L pass
457 if_then_else( tree
.is_leaf_condition(), // if the node is a leaf
458 do_nothing(), // then do nothing, we will deal with this case later
459 l2l_function(localContext
) // else shift the nodes local coeffs to the children
461 not_condition(tree
.is_fence_condition()) // stop when the fence to the threads is reached
462 )(tree
.root()); // start at the root,
464 // wait for the top of the tree
467 for_tree_partition( // for all roots in the threads tree partition L2L pass
468 tree
.top_down_traversal( // do a top down traversal
469 if_then_else( tree
.is_leaf_condition(), // if the node is a leaf
470 do_nothing(), // then do nothing, we will deal with this case later
471 l2l_function(localContext
) // else shift the nodes local coeffs to the children
475 // wait until the traversal is finished and all leaves have their accumulated local coeffs
477 // evaluate all leaves and store the forces in the threads array (Note we can store them in the global array but then we have to use random access)
478 // we can start immediately to collect the forces because we evaluated before point by point
479 for_loop(nodePointPartition
, // loop over threads points
480 func_comp( // composition of two statements
481 l2p_function(localContext
), // evaluate the forces due to the local expansion in the corresponding leaf
482 collect_force_function
// collect the forces of all threads with the following options:
484 COLLECT_REPULSIVE_FACTOR
| // multiply by the repulsive factor stored in the global options
485 COLLECT_TREE_2_GRAPH_ORDER
| // threads data is stored in quadtree leaf order, transform it into graph order
486 COLLECT_ZERO_THREAD_ARRAY
// reset threads array
494 void FMEMultipoleKernel::operator()(FMEGlobalContext
* globalContext
)
496 __uint32 maxNumIterations
= globalContext
->pOptions
->maxNumIterations
;
497 __uint32 minNumIterations
= globalContext
->pOptions
->minNumIterations
;
498 ArrayGraph
& graph
= *globalContext
->pGraph
;
499 LinearQuadtree
& tree
= *globalContext
->pQuadtree
;
500 LinearQuadtreeExpansion
& treeExp
= *globalContext
->pExpansion
;
501 FMELocalContext
* localContext
= globalContext
->pLocalContext
[threadNr()];
502 FMEGlobalOptions
* options
= globalContext
->pOptions
;
503 float* threadsForceArrayX
= localContext
->forceX
;
504 float* threadsForceArrayY
= localContext
->forceY
;
505 float* globalForceArrayX
= globalContext
->globalForceX
;
506 float* globalForceArrayY
= globalContext
->globalForceY
;
508 ArrayPartition edgePartition
= arrayPartition(graph
.numEdges());
509 ArrayPartition nodePointPartition
= arrayPartition(graph
.numNodes());
511 m_pLocalContext
= localContext
;
512 m_pGlobalContext
= globalContext
;
513 /****************************/
515 /****************************/
516 //! reset the global force array
517 for_loop_array_set(threadNr(), numThreads(), globalForceArrayX
, tree
.numberOfPoints(), 0.0f
);
518 for_loop_array_set(threadNr(), numThreads(), globalForceArrayY
, tree
.numberOfPoints(), 0.0f
);
520 // reset the threads force array
521 for (__uint32 i
= 0; i
< tree
.numberOfPoints(); i
++)
523 threadsForceArrayX
[i
] = 0.0f
;
524 threadsForceArrayY
[i
] = 0.0f
;
527 __uint32 maxNumIt
= options
->preProcMaxNumIterations
;
528 for (__uint32 currNumIteration
= 0; ((currNumIteration
< maxNumIt
) ); currNumIteration
++)
530 // iterate over all edges and store the resulting forces in the threads array
531 for_loop(edgePartition
,
532 edge_force_function
< EDGE_FORCE_DIV_DEGREE
> (localContext
) // divide the forces by degree of the node to avoid oscilation
534 // wait until all edges are done
536 // now collect the forces in parallel and put the sum into the global array and move the nodes accordingly
537 for_loop(nodePointPartition
,
539 collect_force_function
<COLLECT_EDGE_FACTOR_PREP
| COLLECT_ZERO_THREAD_ARRAY
>(localContext
),
540 node_move_function
<TIME_STEP_PREP
| ZERO_GLOBAL_ARRAY
>(localContext
)
546 globalContext
->coolDown
= 1.0f
;
550 for (__uint32 currNumIteration
= 0; ((currNumIteration
< maxNumIterations
) && !globalContext
->earlyExit
); currNumIteration
++)
552 // reset the coefficients
553 for_loop_array_set(threadNr(), numThreads(), treeExp
.m_multiExp
, treeExp
.m_numExp
*(treeExp
.m_numCoeff
<< 1), 0.0);
554 for_loop_array_set(threadNr(), numThreads(), treeExp
.m_localExp
, treeExp
.m_numExp
*(treeExp
.m_numCoeff
<< 1), 0.0);
556 localContext
->maxForceSq
= 0.0;
557 localContext
->avgForce
= 0.0;
559 // construct the quadtree
560 quadtreeConstruction(nodePointPartition
);
561 // wait for all threads to finish
564 if (isSingleThreaded()) // if is single threaded run the simple approximation
565 multipoleApproxSingleThreaded(nodePointPartition
);
566 else // otherwise use the partitioning
567 multipoleApproxFinal(nodePointPartition
);
568 // now wait until all forces are summed up in the global array and mapped to graph node order
571 // run the edge forces
572 for_loop(edgePartition
, // iterate over all edges and sum up the forces in the threads array
573 edge_force_function
< EDGE_FORCE_DIV_DEGREE
>(localContext
) // divide the forces by degree of the node to avoid oscilation
575 // wait until edges are finished
578 // collect the edge forces and move nodes without waiting
579 for_loop(nodePointPartition
,
581 collect_force_function
<COLLECT_EDGE_FACTOR
| COLLECT_ZERO_THREAD_ARRAY
>(localContext
),
582 node_move_function
<TIME_STEP_NORMAL
| ZERO_GLOBAL_ARRAY
>(localContext
)
585 // wait so we can decide if we need another iteration
587 // check the max force square for all threads
590 double maxForceSq
= 0.0;
591 for (__uint32 j
=0; j
< numThreads(); j
++)
592 maxForceSq
= max(globalContext
->pLocalContext
[j
]->maxForceSq
, maxForceSq
);
594 // if we are allowed to quit and the max force sq falls under the threshold tell all threads we are done
595 if ((currNumIteration
>= minNumIterations
) && (maxForceSq
< globalContext
->pOptions
->stopCritForce
))
597 globalContext
->earlyExit
= true;
600 // this is required to wait for the earlyExit result
605 //! allcates the global context and for all threads a local context
606 FMEGlobalContext
* FMEMultipoleKernel::allocateContext(ArrayGraph
* pGraph
, FMEGlobalOptions
* pOptions
, __uint32 numThreads
)
608 FMEGlobalContext
* globalContext
= new FMEGlobalContext();
610 globalContext
->numThreads
= numThreads
;
611 globalContext
->pOptions
= pOptions
;
612 globalContext
->pGraph
= pGraph
;
613 globalContext
->pQuadtree
= new LinearQuadtree(pGraph
->numNodes(), pGraph
->nodeXPos(), pGraph
->nodeYPos(), pGraph
->nodeSize());
614 globalContext
->pWSPD
= globalContext
->pQuadtree
->wspd();
615 globalContext
->pExpansion
= new LinearQuadtreeExpansion(globalContext
->pOptions
->multipolePrecision
, (*globalContext
->pQuadtree
));
616 __uint32 numPoints
= globalContext
->pQuadtree
->numberOfPoints();
617 typedef FMELocalContext
* FMELocalContextPtr
;
619 globalContext
->pLocalContext
= new FMELocalContextPtr
[numThreads
];
620 globalContext
->globalForceX
= (float*)MALLOC_16(sizeof(float)*numPoints
);
621 globalContext
->globalForceY
= (float*)MALLOC_16(sizeof(float)*numPoints
);
622 for (__uint32 i
=0; i
< numThreads
; i
++)
624 globalContext
->pLocalContext
[i
] = new FMELocalContext
;
625 globalContext
->pLocalContext
[i
]->forceX
= (float*)MALLOC_16(sizeof(float)*numPoints
);
626 globalContext
->pLocalContext
[i
]->forceY
= (float*)MALLOC_16(sizeof(float)*numPoints
);
627 globalContext
->pLocalContext
[i
]->pGlobalContext
= globalContext
;
629 return globalContext
;
632 //! frees the memory for all contexts
633 void FMEMultipoleKernel::deallocateContext(FMEGlobalContext
* globalContext
)
635 __uint32 numThreads
= globalContext
->numThreads
;
636 for (__uint32 i
=0; i
< numThreads
; i
++)
638 FREE_16(globalContext
->pLocalContext
[i
]->forceX
);
639 FREE_16(globalContext
->pLocalContext
[i
]->forceY
);
640 delete globalContext
->pLocalContext
[i
];
642 FREE_16(globalContext
->globalForceX
);
643 FREE_16(globalContext
->globalForceY
);
644 delete[] globalContext
->pLocalContext
;
645 delete globalContext
->pExpansion
;
646 delete globalContext
->pQuadtree
;
647 delete globalContext
;