7 #include "tm_timings.h"
16 #define MIN(a,b) ((a)<(b)?(a):(b))
17 #define MAX(a,b) ((a)>(b)?(a):(b))
21 void free_list_child(tree_t
*tree
){
24 for(i
=0;i
<tree
->arity
;i
++){
25 free_list_child(tree
->child
[i
]);
36 void free_tab_child(tree_t
*tree
){
38 free_tab_child(tree
->tab_child
);
39 free(tree
->tab_child
);
45 void free_tree(tree_t
*tree
){
47 free_list_child(tree
);
52 long int choose (long n
,long k
){
57 res
*=(double)(n
-i
)/(double)(k
-i
);
62 void set_node(tree_t
*node
,tree_t
** child
, int arity
,tree_t
*parent
,int id
,double val
,tree_t
*tab_child
){
74 void display_node(tree_t
*node
){
75 printf("child : %p\narity : %d\nparent : %p\nid : %d\nval : %f\nuniq : %d\n\n" ,
76 node
->child
,node
->arity
,node
->parent
,node
->id
,node
->val
,node
->uniq
);
78 void clone_tree(tree_t
*new,tree_t
*old
){
80 new->child
=old
->child
;
81 new->arity
=old
->arity
;
82 new->parent
=old
->parent
;
83 //new->deb_tab_child=old->deb_tab_child;
88 for(i
=0;i
<new->arity
;i
++)
89 new->child
[i
]->parent
=new;
94 double *aggregate_obj_weight(tree_t
*new_tab_node
, double *tab
, int M
){
101 res
=(double*)malloc(M
*sizeof(double));
105 for(i1
=0;i1
<new_tab_node
[i
].arity
;i1
++){
106 id1
=new_tab_node
[i
].child
[i1
]->id
;
116 double **aggregate_com_mat(tree_t
*new_tab_node
, double **tab
, int M
){
117 int i
,j
,i1
,j1
,id1
,id2
;
122 res
=(double**)malloc(M
*sizeof(double*));
124 res
[i
]=(double*)malloc(M
*sizeof(double));
132 for(i1
=0;i1
<new_tab_node
[i
].arity
;i1
++){
133 id1
=new_tab_node
[i
].child
[i1
]->id
;
134 for(j1
=0;j1
<new_tab_node
[j
].arity
;j1
++){
135 id2
=new_tab_node
[j
].child
[j1
]->id
;
136 res
[i
][j
]+=tab
[id1
][id2
];
137 //printf("res[%d][%d]+=tab[%d][%d]=%f\n",i,j,id1,id2,tab[id1][id2]);
148 void free_tab_double(double**tab
,int N
){
156 void free_tab_int(int**tab
,int N
){
164 void display_tab(double **tab
,int N
){
166 double line
,total
=0;;
170 printf("%g ",tab
[i
][j
]);
174 //printf(": %g",line);
177 printf("Total: %.2f\n",total
);
181 double eval_grouping(double **tab
,tree_t
**cur_group
,int arity
,int N
){
183 int i
,k
,j
,id
,id1
,id2
;
184 //display_tab(tab,N);
187 for(i
=0;i
<arity
;i
++){
191 //printf("res+=tab[%d][%d]+tab[%d][%d]=%f+%f \n",k,id,id,k,tab[k][id],tab[id][k]);
196 for(i
=0;i
<arity
;i
++){
197 id1
=cur_group
[i
]->id
;
198 for(j
=0;j
<arity
;j
++){
199 id2
=cur_group
[j
]->id
;
200 //printf("res-=tab[%d][%d]=%f\n",id1,id2,tab[id1][id2]);
204 //printf(" = %f\n",res);
210 group_list_t
*new_group_list(tree_t
**tab
,double val
,group_list_t
*next
){
212 res
=(group_list_t
*)malloc(sizeof(group_list_t
));
216 res
->sum_neighbour
=0;
221 void add_to_list(group_list_t
*list
,tree_t
**cur_group
, int arity
, double val
){
227 tab
=(tree_t
**)malloc(sizeof(tree_t
*)*arity
);
229 for(i
=0;i
<arity
;i
++){
231 //printf("%d ",cur_group[i]->id);
235 elem
=new_group_list(tab
,val
,list
->next
);
238 /* cppcheck-suppress memleak */
243 void list_all_possible_groups(double **tab
,tree_t
*tab_node
,int id
,int arity
, int depth
,
244 tree_t
**cur_group
,int N
,group_list_t
*list
){
251 val
=eval_grouping(tab
,cur_group
,arity
,N
);
252 add_to_list(list
,cur_group
,arity
,val
);
254 }else if(N
+depth
>=arity
+id
){
257 if(tab_node
[i
].parent
)
259 cur_group
[depth
]=&tab_node
[i
];
260 //printf("%d<-%d\n",depth,i);
261 list_all_possible_groups(tab
,tab_node
,i
+1,arity
,depth
+1,cur_group
,N
,list
);
267 void update_val(double **tab
,tree_t
*parent
,int N
){
270 parent
->val
=eval_grouping(tab
,parent
->child
,parent
->arity
,N
);
271 //printf("connecting: ");
272 for(i
=0;i
<parent
->arity
;i
++){
273 //printf("%d ",parent->child[i]->id);
274 /* if(parent->child[i]->parent!=parent){
275 parent->child[i]->parent=parent;
277 fprintf(stderr,"redundant operation!\n");
283 //printf(": %f\n",parent->val);
287 int independent_groups(group_list_t
**selection
,int d
,group_list_t
*elem
,int arity
){
293 for(i
=0;i
<arity
;i
++){
295 for(k
=0;k
<arity
;k
++){
296 if(elem
->tab
[i
]->id
==selection
[j
]->tab
[k
]->id
)
307 void display_selection (group_list_t
** selection
,int M
,int arity
,double val
){
311 printf("%d ",selection
[i
]->tab
[j
]->id
);
318 void display_grouping (tree_t
*father
,int M
,int arity
,double val
){
322 printf("%d ",father
[i
].child
[j
]->id
);
330 int recurs_select_independent_groups(group_list_t
**tab
,int i
,int n
,int arity
,int d
,int M
,double val
,double *best_val
,group_list_t
**selection
,group_list_t
**best_selection
){
337 //display_selection(selection,M,arity,val);
341 best_selection
[i
]=selection
[i
];
349 if(independent_groups(selection
,d
,elem
,arity
)){
350 //printf("%d: %d\n",d,i);
353 return recurs_select_independent_groups(tab
,i
+1,n
,arity
,d
+1,M
,val
,best_val
,selection
,best_selection
);
361 int test_independent_groups(group_list_t
**tab
,int i
,int n
,int arity
,int d
,int M
,double val
,double *best_val
,group_list_t
**selection
,group_list_t
**best_selection
){
365 //display_selection(selection,M,arity,val);
371 if(independent_groups(selection
,d
,elem
,arity
)){
372 //printf("%d: %d\n",d,i);
375 return recurs_select_independent_groups(tab
,i
+1,n
,arity
,d
+1,M
,val
,best_val
,selection
,best_selection
);
382 void delete_group_list(group_list_t
*list
){
385 delete_group_list(list
->next
);
392 int group_list_id(const void* x1
,const void* x2
){
394 group_list_t
*e1
,*e2
;
396 e1
=*((group_list_t
**)x1
);
397 e2
=*((group_list_t
**)x2
);
400 return e1
->tab
[0]->id
<e2
->tab
[0]->id
?-1:1;
403 int group_list_asc(const void* x1
,const void* x2
){
405 group_list_t
*e1
,*e2
;
407 e1
=*((group_list_t
**)x1
);
408 e2
=*((group_list_t
**)x2
);
411 return e1
->val
<e2
->val
?-1:1;
415 int group_list_dsc(const void* x1
,const void* x2
){
417 group_list_t
*e1
,*e2
;
419 e1
=*((group_list_t
**)x1
);
420 e2
=*((group_list_t
**)x2
);
423 return e1
->val
>e2
->val
?-1:1;
427 int weighted_degree_asc(const void* x1
,const void* x2
){
429 group_list_t
*e1
,*e2
;
431 e1
=*((group_list_t
**)x1
);
432 e2
=*((group_list_t
**)x2
);
435 return e1
->wg
>e2
->wg
?1:-1;
439 int weighted_degree_dsc(const void* x1
,const void* x2
){
441 group_list_t
*e1
,*e2
;
443 e1
=*((group_list_t
**)x1
);
444 e2
=*((group_list_t
**)x2
);
447 return e1
->wg
>e2
->wg
?-1:1;
450 int select_independent_groups(group_list_t
**tab_group
,int n
,int arity
,int M
,double *best_val
,group_list_t
**best_selection
,int bound
,double max_duration
){
452 group_list_t
**selection
;
457 selection
=(group_list_t
**)malloc(sizeof(group_list_t
*)*M
);
459 for(i
=0;i
<MIN(bound
,n
);i
++){
460 selection
[0]=tab_group
[i
];
461 val
=tab_group
[i
]->val
;
462 recurs_select_independent_groups(tab_group
,i
+1,n
,arity
,1,M
,val
,best_val
,selection
,best_selection
);
465 duration
=CLOCK_DIFF(time1
,time0
);
466 if(duration
>max_duration
){
476 display_selection(best_selection
,M
,arity
,*best_val
);
481 int select_independent_groups_by_largest_index(group_list_t
**tab_group
,int n
,int arity
,int M
,double *best_val
,group_list_t
**best_selection
,int bound
,double max_duration
){
483 group_list_t
**selection
;
488 selection
=(group_list_t
**)malloc(sizeof(group_list_t
*)*M
);
492 for(i
=n
-1;i
>=0;i
-=dec
*dec
){
493 selection
[0]=tab_group
[i
];
494 val
=tab_group
[i
]->val
;
495 nb_groups
+=test_independent_groups(tab_group
,i
+1,n
,arity
,1,M
,val
,best_val
,selection
,best_selection
);
496 //printf("%d:%d\n",i,nb_groups);
497 if(nb_groups
>=bound
){
503 duration
=CLOCK_DIFF(time1
,time0
);
504 if(duration
>max_duration
){
515 void list_to_tab(group_list_t
*list
,group_list_t
**tab
,int n
){
519 fprintf(stderr
,"Error not enough elements. Only %d on %d\n",i
,n
);
526 fprintf(stderr
,"Error too many elements\n");
531 void display_tab_group(group_list_t
**tab
, int n
,int arity
){
535 printf("%d ",tab
[i
]->tab
[j
]->id
);
536 printf(": %.2f %.2f\n",tab
[i
]->val
,tab
[i
]->wg
);
540 int independent_tab(tree_t
**tab1
,tree_t
**tab2
,int n
){
544 if(tab1
[i
]->id
==tab2
[j
]->id
)
546 else if(tab1
[i
]->id
>tab2
[j
]->id
)
556 void compute_weighted_degree(group_list_t
**tab
, int n
,int arity
){
559 tab
[i
]->sum_neighbour
=0;
561 //printf("%d/%d=%f%%\n",i,n,(100.0*i)/n);
563 //if(!independent_groups(&tab[i],1,tab[j],arity)){
564 if(!independent_tab(tab
[i
]->tab
,tab
[j
]->tab
,arity
)){
565 tab
[i
]->sum_neighbour
+=tab
[j
]->val
;
566 tab
[j
]->sum_neighbour
+=tab
[i
]->val
;
570 tab
[i
]->wg
=tab
[i
]->sum_neighbour
/tab
[i
]->val
;
571 if(tab
[i
]->sum_neighbour
==0)
573 //printf("%d:%f/%f=%f\n",i,tab[i]->sum_neighbour,tab[i]->val,tab[i]->wg);
579 Very slow: explore all possibilities
580 tab: comm_matrix at the considered level (used to evaluate a grouping)
581 tab_node: array of the node to group
582 parent: node to which attached the computed group
583 id: current considered node of tab_node
584 arity: number of children of parent (i.e.) size of the group to compute
585 best_val: current value of th grouping
586 cur_group: current grouping
588 void group(double **tab
,tree_t
*tab_node
,tree_t
*parent
,int id
,int arity
, int n
,double *best_val
,tree_t
**cur_group
,int N
){
593 //if we have found enough noide in the group
595 // evaluate this group
596 val
=eval_grouping(tab
,cur_group
,arity
,N
);
597 // If we improve compared to previous grouping: uodate the children of parent accordingly
600 for(i
=0;i
<arity
;i
++){
601 parent
->child
[i
]=cur_group
[i
];
610 // If we need more node in the group
611 // Continue to explore avilable nodes
613 // If this node is allready in a group: skip it
614 if(tab_node
[i
].parent
)
616 //Otherwise, add it to the group at place n
617 cur_group
[n
]=&tab_node
[i
];
618 //printf("%d<-%d\n",n,i);
619 //recursively add the next element to this group
620 group(tab
,tab_node
,parent
,i
,arity
,n
+1,best_val
,cur_group
,N
);
625 tab: comm_matrix at the considered level (use to evaluate a grouping)
626 tab_node: array of the node to group
627 parent: node to which attached the computed group
628 id: current considered node of tab_node
629 arity: number of children of parent (i.e.) size of the group to compute
630 best_val: current value of th grouping
631 cur_group: current grouping
632 N: size of tab and tab_node. i.e. number of nodes at the considered level
634 void fast_group(double **tab
,tree_t
*tab_node
,tree_t
*parent
,int id
,int arity
, int n
,
635 double *best_val
,tree_t
**cur_group
,int N
, int *nb_groups
,int max_groups
){
639 //printf("Max groups=%d\n",max_groups);
641 //if we have found enough node in the group
644 // evaluate this group
645 val
=eval_grouping(tab
,cur_group
,arity
,N
);
646 // If we improve compared to previous grouping: uodate the children of parent accordingly
649 for(i
=0;i
<arity
;i
++){
650 parent
->child
[i
]=cur_group
[i
];
659 // If we need more node in the group
660 // Continue to explore avilable nodes
662 // If this node is allready in a group: skip it
663 if(tab_node
[i
].parent
)
665 //Otherwise, add it to the group at place n
666 cur_group
[n
]=&tab_node
[i
];
667 //printf("%d<-%d %d/%d\n",n,i,*nb_groups,max_groups);
669 //recursively add the next element to this group
670 fast_group(tab
,tab_node
,parent
,i
,arity
,n
+1,best_val
,cur_group
,N
,nb_groups
,max_groups
);
671 if(*nb_groups
>max_groups
)
679 void fast_grouping(double **tab
,tree_t
*tab_node
, tree_t
*new_tab_node
, int arity
,int N
, int M
,long int k
){
682 double best_val
,val
=0;
685 cur_group
=(tree_t
**)malloc(sizeof(tree_t
*)*arity
);
689 //printf("k%d/%d, k=%ld\n",l,M,k);
690 /* select the best greedy grouping among the 10 first one*/
691 //fast_group(tab,tab_node,&new_tab_node[l],-1,arity,0,&best_val,cur_group,N,&nb_groups,MAX(2,(int)(50-log2(k))-M/10));
692 fast_group(tab
,tab_node
,&new_tab_node
[l
],-1,arity
,0,&best_val
,cur_group
,N
,&nb_groups
,MAX(1,(int)(50-CmiLog2(k
))-M
/10));
694 for(i
=0;i
<new_tab_node
[l
].arity
;i
++){
695 new_tab_node
[l
].child
[i
]->parent
=&new_tab_node
[l
];
697 update_val(tab
,&new_tab_node
[l
],N
);
702 printf("val=%f\n",val
);
706 display_grouping(new_tab_node
,M
,arity
,val
);
713 int adjacency_asc(const void* x1
,const void* x2
){
717 e1
=((adjacency_t
*)x1
);
718 e2
=((adjacency_t
*)x2
);
721 return e1
->val
<e2
->val
?-1:1;
725 int adjacency_dsc(const void* x1
,const void* x2
){
729 e1
=((adjacency_t
*)x1
);
730 e2
=((adjacency_t
*)x2
);
733 return e1
->val
>e2
->val
?-1:1;
735 void super_fast_grouping(double **tab
,tree_t
*tab_node
, tree_t
*new_tab_node
, int arity
,int N
, int M
,int k
){
738 int i
,j
,e
,l
,nb_groups
;
744 graph
=(adjacency_t
*)malloc(sizeof(adjacency_t
)*((N
*N
-N
)/2));
750 graph
[e
].val
=tab
[i
][j
];
755 printf("linearization=%fs\n",duration
);
758 assert(e
==(N
*N
-N
)/2);
760 qsort(graph
,e
,sizeof(adjacency_t
),adjacency_dsc
);
763 printf("sorting=%fs\n",duration
);
770 for(i
=0;i
<e
&&l
<M
;i
++){
771 if(try_add_edge(tab
,tab_node
,&new_tab_node
[l
],arity
,graph
[i
].i
,graph
[i
].j
,N
,&nb_groups
)){
777 update_val(tab
,&new_tab_node
[l
],N
);
778 val
+=new_tab_node
[l
].val
;
782 printf("Grouping=%fs\n",duration
);
784 printf("val=%f\n",val
);
789 display_grouping(new_tab_node
,M
,arity
,val
);
795 double **build_cost_matrix(double **comm_matrix
, double* obj_weight
, double comm_speed
, int N
){
802 res
=(double**)malloc(N
*sizeof(double*));
804 res
[i
]=(double*)malloc(N
*sizeof(double));
811 printf("avg=%f\n",avg
);
818 res
[i
][j
]=1e-4*comm_matrix
[i
][j
]/comm_speed
-fabs(avg
-(obj_weight
[i
]+obj_weight
[j
])/2);
827 comm_matrix: comm_matrix at the considered level (use to evaluate a grouping) of size N*N
828 tab_node: array of the node to group
829 new_tab_node: array of nodes at the next level (the parents of the node in tab_node once the grouping will be done).
830 arity: number of children of parent (i.e.) size of the group to compute
831 N: size of tab and tab_node. i.e. number of nodes at the considered level
832 M: size of new_tab_node (i.e) the number of parents
833 Hence we have: M*arity=N
834 */void group_nodes(double **comm_matrix
,tree_t
*tab_node
, tree_t
*new_tab_node
, int depth
, int arity
,int N
, int M
, double* obj_weigth
, double comm_speed
){
838 group_list_t list
,**best_selection
,**tab_group
;
839 double best_val
,last_best
;
841 double **tab
; /*cost matrix taking into account the communiocation cost but also the weight of the object*/
846 /* might return comm_matrix (if obj_weight==NULL): do not free this tab in this case*/
847 tab
=build_cost_matrix(comm_matrix
,obj_weigth
,comm_speed
,N
);
852 printf("Number of groups:%ld\n",k
);
854 if(k
>30000||depth
>5){
856 printf("Fast Grouping...\n");
863 //super_fast_grouping(tab,tab_node,new_tab_node,arity,N,M,k);
864 bucket_grouping(tab
,tab_node
,new_tab_node
,arity
,N
,M
,k
);
866 fast_grouping(tab
,tab_node
,new_tab_node
,arity
,N
,M
,k
);
870 printf("Fast grouping duration=%f\n",duration
);
875 printf("Grouping nodes...\n");
878 list
.val
=0;//number of elements in the list
879 cur_group
=(tree_t
**)malloc(sizeof(tree_t
*)*arity
);
880 best_selection
=(group_list_t
**)malloc(sizeof(group_list_t
*)*M
);
882 list_all_possible_groups(tab
,tab_node
,0,arity
,0,cur_group
,N
,&list
);
885 tab_group
=(group_list_t
**)malloc(sizeof(group_list_t
)*n
);
886 list_to_tab(list
.next
,tab_group
,n
);
888 printf("List to tab done\n");
892 // perform the pack mapping fist
893 timeout
=select_independent_groups(tab_group
,n
,arity
,M
,&best_val
,best_selection
,1,0.1);
896 printf("Packed mapping timeout!\n");
899 // give this mapping an exra credit (in general MPI application are made such that neighbour process communicates more than distant ones)
902 printf("Packing computed\n");
904 // perform a mapping trying to use group that cost less first
905 qsort(tab_group
,n
,sizeof(group_list_t
*),group_list_asc
);
907 timeout
=select_independent_groups(tab_group
,n
,arity
,M
,&best_val
,best_selection
,10,0.1);
910 printf("Cost less first timeout!\n");
911 }else if(last_best
>best_val
){
912 printf("Cost less first Impoved solution\n");
916 // perform a mapping trying to minimize the use of groups that cost a lot
917 qsort(tab_group
,n
,sizeof(group_list_t
*),group_list_dsc
);
919 timeout
=select_independent_groups_by_largest_index(tab_group
,n
,arity
,M
,&best_val
,best_selection
,10,0.1);
922 printf("Cost most last timeout!\n");
923 }else if(last_best
>best_val
){
924 printf("Cost most last impoved solution\n");
928 // perform a mapping in the weighted degree order
930 printf("----WG----\n");
932 compute_weighted_degree(tab_group
,n
,arity
);
934 printf("Weigted degree computed\n");
936 qsort(tab_group
,n
,sizeof(group_list_t
*),weighted_degree_dsc
);
937 //display_tab_group(tab_group,n,arity);
939 timeout
=select_independent_groups(tab_group
,n
,arity
,M
,&best_val
,best_selection
,10,0.1);
942 printf("WG timeout!\n");
943 }else if(last_best
>best_val
){
944 printf("WG impoved solution\n");
949 qsort(best_selection
,M
,sizeof(group_list_t
*),group_list_id
);
952 for(j
=0;j
<arity
;j
++){
953 new_tab_node
[l
].child
[j
]=best_selection
[l
]->tab
[j
];
954 new_tab_node
[l
].child
[j
]->parent
=&new_tab_node
[l
];
956 new_tab_node
[l
].arity
=arity
;
958 //printf("arity=%d\n",new_tab_node[l].arity);
959 update_val(tab
,&new_tab_node
[l
],N
);
962 delete_group_list((&list
)->next
);
963 free(best_selection
);
969 free_tab_double(tab
,N
);
972 printf("Grouping done in %.4fs!\n",duration
);
979 void complete_com_mat(double ***tab
,int N
, int K
){
980 double **old_tab
,**new_tab
;
986 new_tab
=(double**)malloc(M
*sizeof(double*));
988 new_tab
[i
]=(double*)malloc(M
*sizeof(double));
994 new_tab
[i
][j
]=old_tab
[i
][j
];
1002 void complete_obj_weight(double **tab
,int N
, int K
){
1003 double *old_tab
,*new_tab
,avg
;
1019 new_tab
=(double*)malloc(M
*sizeof(double));
1024 new_tab
[i
]=old_tab
[i
];
1033 void create_dumb_tree(tree_t
*node
,int depth
,tm_topology_t
*topology
){
1034 tree_t
**list_child
;
1038 if(depth
==topology
->nb_levels
-1){
1039 set_node(node
,NULL
,0,NULL
,-1,0,NULL
);
1043 arity
=topology
->arity
[depth
];
1045 list_child
=(tree_t
**)calloc(arity
,sizeof(tree_t
*));
1046 for(i
=0;i
<arity
;i
++){
1047 list_child
[i
]=(tree_t
*)malloc(sizeof(tree_t
));
1048 create_dumb_tree(list_child
[i
],depth
+1,topology
);
1049 list_child
[i
]->parent
=node
;
1050 list_child
[i
]->dumb
=1;
1053 set_node(node
,list_child
,arity
,NULL
,-1,0,list_child
[0]);
1057 void complete_tab_node(tree_t
**tab
,int N
, int K
,int depth
,tm_topology_t
*topology
){
1058 tree_t
*old_tab
,*new_tab
;
1067 new_tab
=(tree_t
*)malloc(M
*sizeof(tree_t
));
1072 clone_tree(&new_tab
[i
],&old_tab
[i
]);
1074 create_dumb_tree(&new_tab
[i
],depth
,topology
);
1079 //do not suppress tab if you are at the depth-most level it will be used at the mapping stage
1084 void set_deb_tab_child(tree_t
*tree
, tree_t
*child
,int depth
){
1085 //printf("depth=%d\t%p\t%p\n",depth,child,tree);
1087 set_deb_tab_child(tree
->tab_child
,child
,depth
-1);
1089 tree
->tab_child
=child
;
1095 Build the tree of the matching. It is a bottom up algorithm: it starts from the bottom of the tree on proceed by decreasing the depth
1096 It groups nodes of the matrix tab and link these groups to the nodes of the under level.
1097 Then it calls recursivcely the function to prefrom the grouping at the above level.
1099 tab_node: array of nodes of the under level.
1100 tab: local communication matrix
1101 N: number of nodes. Order of com_mat, size of obj_weight
1102 arity: arity of the nodes of the above level.
1103 depth: current depth of the algorithm
1104 toplogy: description of the hardware topology.
1106 tree_t
*build_level_topology(tree_t
*tab_node
,double **com_mat
,int N
,int arity
,int depth
,tm_topology_t
*topology
, double *obj_weight
, double *comm_speed
){
1107 int M
; /*N/Arity: number the groups*/
1109 tree_t
*new_tab_node
; /*array of node for this level (of size M): there will be linked to the nodes of tab_nodes*/
1110 double **new_com_mat
; /*New communication matrix (after grouyping nodes together)*/
1111 tree_t
*res
; /*resulting tree*/
1113 double speed
; /* communication speed at this level*/
1114 double *new_obj_weight
;
1116 if((N
==1)&&(depth
==0))
1117 return &tab_node
[0];
1119 fprintf(stderr
,"Error: matrix size: %d and depth:%d (should be 1 and -1 respectively)\n",N
,depth
);
1125 /* If the number of nodes does not devide the arity: we add K nodes */
1127 K
=arity
*((N
/arity
)+1)-N
;
1128 //printf("****N=%d arity=%d K=%d\n",N,arity,K);
1129 //display_tab(tab,N);
1130 /* add K rows and columns to comm_matrix*/
1131 complete_com_mat(&com_mat
,N
,K
);
1132 /* add K element to the object weight*/
1133 complete_obj_weight(&obj_weight
,N
,K
);
1134 //display_tab(tab,N+K);
1135 /* add a dumb tree to the K new "virtual nodes"*/
1136 complete_tab_node(&tab_node
,N
,K
,depth
,topology
);
1137 completed
=1; /*flag this addition*/
1138 N
+=K
; /*increase the number of nodes accordingly*/
1139 } //display_tab(tab,N);
1143 printf("Depth=%d\tnb_nodes=%d\tnb_groups=%d\tsize of groups(arity)=%d\n",depth
,N
,M
,arity
);
1145 /*create the new nodes*/
1146 new_tab_node
=(tree_t
*)malloc(sizeof(tree_t
)*M
);
1147 /*intitialize each node*/
1149 tree_t
**list_child
;
1150 list_child
=(tree_t
**)calloc(arity
,sizeof(tree_t
*));
1151 set_node(&new_tab_node
[i
],list_child
,arity
,NULL
,i
,0,tab_node
);
1154 /*Core of the algorithm: perfrom the grouping*/
1156 speed
=comm_speed
[depth
];
1159 group_nodes(com_mat
,tab_node
,new_tab_node
,depth
,arity
,N
,M
,obj_weight
,speed
);
1161 /*based on that grouping aggregate the communication matrix*/
1162 new_com_mat
=aggregate_com_mat(new_tab_node
,com_mat
,M
);
1163 /*based on that grouping aggregate the object weight matrix*/
1164 new_obj_weight
=aggregate_obj_weight(new_tab_node
,obj_weight
,M
);
1166 /* set ID of virtual nodes to -1*/
1171 // display_node(&tab_node[i]);
1173 //display_tab(new_com_mat,M);
1175 /* decrease depth and compute arity of the above level*/
1178 arity
= topology
->arity
[depth
-1];
1181 // assume all objects have the same arity
1182 res
= build_level_topology(new_tab_node
,new_com_mat
,M
,arity
,depth
,topology
,new_obj_weight
,comm_speed
);
1184 set_deb_tab_child(res
,tab_node
,depth
);
1188 free_tab_double(com_mat
,N
);
1191 /* cppcheck-suppress deallocDealloc */
1192 free_tab_double(new_com_mat
,M
);
1193 free(new_obj_weight
);
1200 double speed(int depth
){
1202 //double tab[5]={21,9,4.5,2.5,0.001};
1203 //double tab[5]={1,1,1,1,1};
1204 //double tab[6]={100000,10000,1000,500,100,10};
1205 double tab
[5]={100000,10000,1000,500,10};
1207 return 1.0/tab
[depth
];
1208 //return 10*log(depth+2);
1210 //return (long int)pow(100,depth);
1213 tree_t
* build_tree_from_topology(tm_topology_t
*topology
,double **tab
,int N
,double *obj_weight
, double *comm_speed
){
1215 tree_t
*res
,*tab_node
;
1218 tab_node
=(tree_t
*)malloc(sizeof(tree_t
)*N
);
1220 set_node(&tab_node
[i
],NULL
,0,NULL
,i
,0,NULL
);
1223 depth
= topology
->nb_levels
-1;
1224 printf("nb_levels=%d\n",depth
+1);
1225 // assume all objects have the same arity
1226 res
= build_level_topology(tab_node
,tab
,N
,topology
->arity
[depth
-1],depth
,topology
, obj_weight
, comm_speed
);
1227 printf("Build tree done!\n");