Fix hash table usage for XLC
[charm.git] / src / ck-ldb / tm_tree.c
blobee54f4df17300fff0ae673c4e038cc1c0b6f362f
1 #include <float.h>
2 #include <stdlib.h>
3 #include <stdio.h>
4 #include <math.h>
5 #include <assert.h>
6 #include "tm_tree.h"
7 #include "tm_timings.h"
8 #include "tm_bucket.h"
10 #if __CHARMC__
11 #include "converse.h"
12 #else
13 #define CmiLog2 log2
14 #endif
16 #define MIN(a,b) ((a)<(b)?(a):(b))
17 #define MAX(a,b) ((a)>(b)?(a):(b))
18 #undef DEBUG
21 void free_list_child(tree_t *tree){
22 int i;
23 if(tree){
24 for(i=0;i<tree->arity;i++){
25 free_list_child(tree->child[i]);
27 free(tree->child);
28 if(tree->dumb)
29 free(tree);
36 void free_tab_child(tree_t *tree){
37 if(tree){
38 free_tab_child(tree->tab_child);
39 free(tree->tab_child);
45 void free_tree(tree_t *tree){
47 free_list_child(tree);
48 free_tab_child(tree);
49 free(tree);
52 long int choose (long n,long k){
53 // compute C_n_k
54 double res=1;
55 int i;
56 for(i=0;i<k;i++)
57 res*=(double)(n-i)/(double)(k-i);
59 return (long int)res;
62 void set_node(tree_t *node,tree_t ** child, int arity,tree_t *parent,int id,double val,tree_t *tab_child){
63 static int uniq=0;
64 node->child=child;
65 node->arity=arity;
66 node->tab_child=NULL;
67 node->parent=parent;
68 node->id=id;
69 node->val=val;
70 node->uniq=uniq++;
71 node->dumb=0;
74 void display_node(tree_t *node){
75 printf("child : %p\narity : %d\nparent : %p\nid : %d\nval : %f\nuniq : %d\n\n" ,
76 node->child,node->arity,node->parent,node->id,node->val,node->uniq);
78 void clone_tree(tree_t *new,tree_t *old){
79 int i;
80 new->child=old->child;
81 new->arity=old->arity;
82 new->parent=old->parent;
83 //new->deb_tab_child=old->deb_tab_child;
84 new->id=old->id;
85 new->val=old->val;
86 new->uniq=old->uniq;
87 new->dumb=old->dumb;
88 for(i=0;i<new->arity;i++)
89 new->child[i]->parent=new;
94 double *aggregate_obj_weight(tree_t *new_tab_node, double *tab, int M){
95 int i,i1,id1;
96 double *res;
98 if(!tab)
99 return NULL;
101 res=(double*)malloc(M*sizeof(double));
103 for(i=0;i<M;i++){
104 res[i]=0.0;
105 for(i1=0;i1<new_tab_node[i].arity;i1++){
106 id1=new_tab_node[i].child[i1]->id;
107 res[i]+=tab[id1];
111 return res;
116 double **aggregate_com_mat(tree_t *new_tab_node, double **tab, int M){
117 int i,j,i1,j1,id1,id2;
118 double **res;
122 res=(double**)malloc(M*sizeof(double*));
123 for(i=0;i<M;i++)
124 res[i]=(double*)malloc(M*sizeof(double));
126 for(i=0;i<M;i++){
127 for(j=0;j<M;j++){
128 if(i==j)
129 res[i][j]=0;
130 else{
131 res[i][j]=0;
132 for(i1=0;i1<new_tab_node[i].arity;i1++){
133 id1=new_tab_node[i].child[i1]->id;
134 for(j1=0;j1<new_tab_node[j].arity;j1++){
135 id2=new_tab_node[j].child[j1]->id;
136 res[i][j]+=tab[id1][id2];
137 //printf("res[%d][%d]+=tab[%d][%d]=%f\n",i,j,id1,id2,tab[id1][id2]);
144 return res;
148 void free_tab_double(double**tab,int N){
149 int i;
150 for(i=0;i<N;i++){
151 free(tab[i]);
153 free(tab);
156 void free_tab_int(int**tab,int N){
157 int i;
158 for(i=0;i<N;i++){
159 free(tab[i]);
161 free(tab);
164 void display_tab(double **tab,int N){
165 int i,j;
166 double line,total=0;;
167 for(i=0;i<N;i++){
168 line=0;
169 for(j=0;j<N;j++){
170 printf("%g ",tab[i][j]);
171 line+=tab[i][j];
173 total+=line;
174 //printf(": %g",line);
175 printf("\n");
177 printf("Total: %.2f\n",total);
181 double eval_grouping(double **tab,tree_t **cur_group,int arity,int N){
182 double res=0;
183 int i,k,j,id,id1,id2;
184 //display_tab(tab,N);
187 for(i=0;i<arity;i++){
188 id=cur_group[i]->id;
189 //printf("%d ",id);
190 for(k=0;k<N;k++){
191 //printf("res+=tab[%d][%d]+tab[%d][%d]=%f+%f \n",k,id,id,k,tab[k][id],tab[id][k]);
192 res+=tab[k][id];
196 for(i=0;i<arity;i++){
197 id1=cur_group[i]->id;
198 for(j=0;j<arity;j++){
199 id2=cur_group[j]->id;
200 //printf("res-=tab[%d][%d]=%f\n",id1,id2,tab[id1][id2]);
201 res-=tab[id1][id2];
204 //printf(" = %f\n",res);
205 return res;
210 group_list_t *new_group_list(tree_t **tab,double val,group_list_t *next){
211 group_list_t *res;
212 res=(group_list_t *)malloc(sizeof(group_list_t));
213 res->tab=tab;
214 res->val=val;
215 res->next=next;
216 res->sum_neighbour=0;
217 return res;
221 void add_to_list(group_list_t *list,tree_t **cur_group, int arity, double val){
223 group_list_t *elem;
224 int i;
225 tree_t **tab;
227 tab=(tree_t **)malloc(sizeof(tree_t *)*arity);
229 for(i=0;i<arity;i++){
230 tab[i]=cur_group[i];
231 //printf("%d ",cur_group[i]->id);
234 //printf("\n");
235 elem=new_group_list(tab,val,list->next);
236 list->next=elem;
237 list->val++;
238 /* cppcheck-suppress memleak */
243 void list_all_possible_groups(double **tab,tree_t *tab_node,int id,int arity, int depth,
244 tree_t **cur_group,int N,group_list_t *list){
245 double val;
246 int i;
250 if(depth==arity){
251 val=eval_grouping(tab,cur_group,arity,N);
252 add_to_list(list,cur_group,arity,val);
253 return;
254 }else if(N+depth>=arity+id){
255 //}else if(1){
256 for(i=id;i<N;i++){
257 if(tab_node[i].parent)
258 continue;
259 cur_group[depth]=&tab_node[i];
260 //printf("%d<-%d\n",depth,i);
261 list_all_possible_groups(tab,tab_node,i+1,arity,depth+1,cur_group,N,list);
267 void update_val(double **tab,tree_t *parent,int N){
268 int i;
270 parent->val=eval_grouping(tab,parent->child,parent->arity,N);
271 //printf("connecting: ");
272 for(i=0;i<parent->arity;i++){
273 //printf("%d ",parent->child[i]->id);
274 /* if(parent->child[i]->parent!=parent){
275 parent->child[i]->parent=parent;
276 }else{
277 fprintf(stderr,"redundant operation!\n");
278 exit(-1);
283 //printf(": %f\n",parent->val);
287 int independent_groups(group_list_t **selection,int d,group_list_t *elem,int arity){
288 int i,j,k;
290 if(d==0)
291 return 1;
293 for(i=0;i<arity;i++){
294 for(j=0;j<d;j++){
295 for(k=0;k<arity;k++){
296 if(elem->tab[i]->id==selection[j]->tab[k]->id)
297 return 0;
302 return 1;
307 void display_selection (group_list_t** selection,int M,int arity,double val){
308 int i,j;
309 for(i=0;i<M;i++){
310 for(j=0;j<arity;j++)
311 printf("%d ",selection[i]->tab[j]->id);
312 printf("-- ");
314 printf(":%f\n",val);
318 void display_grouping (tree_t *father,int M,int arity,double val){
319 int i,j;
320 for(i=0;i<M;i++){
321 for(j=0;j<arity;j++)
322 printf("%d ",father[i].child[j]->id);
323 printf("-- ");
325 printf(":%f\n",val);
330 int recurs_select_independent_groups(group_list_t **tab,int i,int n,int arity,int d,int M,double val,double *best_val,group_list_t **selection,group_list_t **best_selection){
331 group_list_t *elem;
333 //if(val>=*best_val)
334 // return 0;
336 if(d==M){
337 //display_selection(selection,M,arity,val);
338 if(val<*best_val){
339 *best_val=val;
340 for(i=0;i<M;i++)
341 best_selection[i]=selection[i];
342 return 1;
344 return 0;
347 while(i<n){
348 elem=tab[i];
349 if(independent_groups(selection,d,elem,arity)){
350 //printf("%d: %d\n",d,i);
351 selection[d]=elem;
352 val+=elem->val;
353 return recurs_select_independent_groups(tab,i+1,n,arity,d+1,M,val,best_val,selection,best_selection);
355 i++;
357 return 0;
361 int test_independent_groups(group_list_t **tab,int i,int n,int arity,int d,int M,double val,double *best_val,group_list_t **selection,group_list_t **best_selection){
362 group_list_t *elem;
364 if(d==M){
365 //display_selection(selection,M,arity,val);
366 return 1;
369 while(i<n){
370 elem=tab[i];
371 if(independent_groups(selection,d,elem,arity)){
372 //printf("%d: %d\n",d,i);
373 selection[d]=elem;
374 val+=elem->val;
375 return recurs_select_independent_groups(tab,i+1,n,arity,d+1,M,val,best_val,selection,best_selection);
377 i++;
379 return 0;
382 void delete_group_list(group_list_t *list){
384 if(list){
385 delete_group_list(list->next);
386 free(list->tab);
387 free(list);
392 int group_list_id(const void* x1,const void* x2){
394 group_list_t *e1,*e2;
396 e1=*((group_list_t**)x1);
397 e2=*((group_list_t**)x2);
400 return e1->tab[0]->id<e2->tab[0]->id?-1:1;
403 int group_list_asc(const void* x1,const void* x2){
405 group_list_t *e1,*e2;
407 e1=*((group_list_t**)x1);
408 e2=*((group_list_t**)x2);
411 return e1->val<e2->val?-1:1;
415 int group_list_dsc(const void* x1,const void* x2){
417 group_list_t *e1,*e2;
419 e1=*((group_list_t**)x1);
420 e2=*((group_list_t**)x2);
423 return e1->val>e2->val?-1:1;
427 int weighted_degree_asc(const void* x1,const void* x2){
429 group_list_t *e1,*e2;
431 e1=*((group_list_t**)x1);
432 e2=*((group_list_t**)x2);
435 return e1->wg>e2->wg?1:-1;
439 int weighted_degree_dsc(const void* x1,const void* x2){
441 group_list_t *e1,*e2;
443 e1=*((group_list_t**)x1);
444 e2=*((group_list_t**)x2);
447 return e1->wg>e2->wg?-1:1;
450 int select_independent_groups(group_list_t **tab_group,int n,int arity,int M,double *best_val,group_list_t **best_selection,int bound,double max_duration){
451 int i;
452 group_list_t **selection;
453 double val,duration;
454 CLOCK_T time1,time0;
457 selection=(group_list_t **)malloc(sizeof(group_list_t*)*M);
458 CLOCK(time0);
459 for(i=0;i<MIN(bound,n);i++){
460 selection[0]=tab_group[i];
461 val=tab_group[i]->val;
462 recurs_select_independent_groups(tab_group,i+1,n,arity,1,M,val,best_val,selection,best_selection);
463 if(i%5){
464 CLOCK(time1);
465 duration=CLOCK_DIFF(time1,time0);
466 if(duration>max_duration){
467 free(selection);
468 return 1;
473 free(selection);
475 #ifdef DEBUG
476 display_selection(best_selection,M,arity,*best_val);
477 #endif
478 return 0;
481 int select_independent_groups_by_largest_index(group_list_t **tab_group,int n,int arity,int M,double *best_val,group_list_t **best_selection,int bound,double max_duration){
482 int i,nb_groups=0;
483 group_list_t **selection;
484 double val,duration;
485 int dec;
486 CLOCK_T time1,time0;
488 selection=(group_list_t **)malloc(sizeof(group_list_t*)*M);
489 CLOCK(time0);
491 dec=MAX(n/10000,1);
492 for(i=n-1;i>=0;i-=dec*dec){
493 selection[0]=tab_group[i];
494 val=tab_group[i]->val;
495 nb_groups+=test_independent_groups(tab_group,i+1,n,arity,1,M,val,best_val,selection,best_selection);
496 //printf("%d:%d\n",i,nb_groups);
497 if(nb_groups>=bound){
498 free(selection);
499 return 0;
501 if(i%5){
502 CLOCK(time1);
503 duration=CLOCK_DIFF(time1,time0);
504 if(duration>max_duration){
505 free(selection);
506 return 1;
511 free(selection);
512 return 0;
515 void list_to_tab(group_list_t *list,group_list_t **tab,int n){
516 int i;
517 for(i=0;i<n;i++){
518 if(!list){
519 fprintf(stderr,"Error not enough elements. Only %d on %d\n",i,n);
520 exit(-1);
522 tab[n-i-1]=list;
523 list=list->next;
525 if(list){
526 fprintf(stderr,"Error too many elements\n");
527 exit(-1);
531 void display_tab_group(group_list_t **tab, int n,int arity){
532 int i,j;
533 for(i=0;i<n;i++){
534 for(j=0;j<arity;j++)
535 printf("%d ",tab[i]->tab[j]->id);
536 printf(": %.2f %.2f\n",tab[i]->val,tab[i]->wg);
540 int independent_tab(tree_t **tab1,tree_t **tab2,int n){
541 int i,j;
542 i=0;j=0;
543 while((i<n)&&(j<n)){
544 if(tab1[i]->id==tab2[j]->id)
545 return 0;
546 else if(tab1[i]->id>tab2[j]->id)
547 j++;
548 else
549 i++;
551 return 1;
556 void compute_weighted_degree(group_list_t **tab, int n,int arity){
557 int i,j;
558 for(i=0;i<n;i++)
559 tab[i]->sum_neighbour=0;
560 for(i=0;i<n;i++){
561 //printf("%d/%d=%f%%\n",i,n,(100.0*i)/n);
562 for(j=i+1;j<n;j++){
563 //if(!independent_groups(&tab[i],1,tab[j],arity)){
564 if(!independent_tab(tab[i]->tab,tab[j]->tab,arity)){
565 tab[i]->sum_neighbour+=tab[j]->val;
566 tab[j]->sum_neighbour+=tab[i]->val;
570 tab[i]->wg=tab[i]->sum_neighbour/tab[i]->val;
571 if(tab[i]->sum_neighbour==0)
572 tab[i]->wg=0;
573 //printf("%d:%f/%f=%f\n",i,tab[i]->sum_neighbour,tab[i]->val,tab[i]->wg);
579 Very slow: explore all possibilities
580 tab: comm_matrix at the considered level (used to evaluate a grouping)
581 tab_node: array of the node to group
582 parent: node to which attached the computed group
583 id: current considered node of tab_node
584 arity: number of children of parent (i.e.) size of the group to compute
585 best_val: current value of th grouping
586 cur_group: current grouping
588 void group(double **tab,tree_t *tab_node,tree_t *parent,int id,int arity, int n,double *best_val,tree_t **cur_group,int N){
589 double val;
590 int i;
593 //if we have found enough noide in the group
594 if(n==arity){
595 // evaluate this group
596 val=eval_grouping(tab,cur_group,arity,N);
597 // If we improve compared to previous grouping: uodate the children of parent accordingly
598 if(val<*best_val){
599 *best_val=val;
600 for(i=0;i<arity;i++){
601 parent->child[i]=cur_group[i];
603 parent->arity=arity;
605 return;
610 // If we need more node in the group
611 // Continue to explore avilable nodes
612 for(i=id+1;i<N;i++){
613 // If this node is allready in a group: skip it
614 if(tab_node[i].parent)
615 continue;
616 //Otherwise, add it to the group at place n
617 cur_group[n]=&tab_node[i];
618 //printf("%d<-%d\n",n,i);
619 //recursively add the next element to this group
620 group(tab,tab_node,parent,i,arity,n+1,best_val,cur_group,N);
625 tab: comm_matrix at the considered level (use to evaluate a grouping)
626 tab_node: array of the node to group
627 parent: node to which attached the computed group
628 id: current considered node of tab_node
629 arity: number of children of parent (i.e.) size of the group to compute
630 best_val: current value of th grouping
631 cur_group: current grouping
632 N: size of tab and tab_node. i.e. number of nodes at the considered level
634 void fast_group(double **tab,tree_t *tab_node,tree_t *parent,int id,int arity, int n,
635 double *best_val,tree_t **cur_group,int N, int *nb_groups,int max_groups){
636 double val;
637 int i;
639 //printf("Max groups=%d\n",max_groups);
641 //if we have found enough node in the group
642 if(n==arity){
643 (*nb_groups)++;
644 // evaluate this group
645 val=eval_grouping(tab,cur_group,arity,N);
646 // If we improve compared to previous grouping: uodate the children of parent accordingly
647 if(val<*best_val){
648 *best_val=val;
649 for(i=0;i<arity;i++){
650 parent->child[i]=cur_group[i];
652 parent->arity=arity;
654 return;
659 // If we need more node in the group
660 // Continue to explore avilable nodes
661 for(i=id+1;i<N;i++){
662 // If this node is allready in a group: skip it
663 if(tab_node[i].parent)
664 continue;
665 //Otherwise, add it to the group at place n
666 cur_group[n]=&tab_node[i];
667 //printf("%d<-%d %d/%d\n",n,i,*nb_groups,max_groups);
668 //exit(-1);
669 //recursively add the next element to this group
670 fast_group(tab,tab_node,parent,i,arity,n+1,best_val,cur_group,N,nb_groups,max_groups);
671 if(*nb_groups>max_groups)
672 return;
679 void fast_grouping(double **tab,tree_t *tab_node, tree_t *new_tab_node, int arity,int N, int M,long int k){
680 tree_t **cur_group;
681 int l,i;
682 double best_val,val=0;
683 int nb_groups;
685 cur_group=(tree_t**)malloc(sizeof(tree_t*)*arity);
686 for(l=0;l<M;l++){
687 best_val=DBL_MAX;
688 nb_groups=0;
689 //printf("k%d/%d, k=%ld\n",l,M,k);
690 /* select the best greedy grouping among the 10 first one*/
691 //fast_group(tab,tab_node,&new_tab_node[l],-1,arity,0,&best_val,cur_group,N,&nb_groups,MAX(2,(int)(50-log2(k))-M/10));
692 fast_group(tab,tab_node,&new_tab_node[l],-1,arity,0,&best_val,cur_group,N,&nb_groups,MAX(1,(int)(50-CmiLog2(k))-M/10));
693 val+=best_val;
694 for(i=0;i<new_tab_node[l].arity;i++){
695 new_tab_node[l].child[i]->parent=&new_tab_node[l];
697 update_val(tab,&new_tab_node[l],N);
700 free(cur_group);
702 printf("val=%f\n",val);
703 //exit(-1);
705 #ifdef DEBUG
706 display_grouping(new_tab_node,M,arity,val);
707 #endif
713 int adjacency_asc(const void* x1,const void* x2){
715 adjacency_t *e1,*e2;
717 e1=((adjacency_t*)x1);
718 e2=((adjacency_t*)x2);
721 return e1->val<e2->val?-1:1;
725 int adjacency_dsc(const void* x1,const void* x2){
727 adjacency_t *e1,*e2;
729 e1=((adjacency_t*)x1);
730 e2=((adjacency_t*)x2);
733 return e1->val>e2->val?-1:1;
735 void super_fast_grouping(double **tab,tree_t *tab_node, tree_t *new_tab_node, int arity,int N, int M,int k){
736 double val=0;
737 adjacency_t *graph;
738 int i,j,e,l,nb_groups;
739 double duration;
741 assert(arity==2);
743 TIC;
744 graph=(adjacency_t*)malloc(sizeof(adjacency_t)*((N*N-N)/2));
745 e=0;
746 for(i=0;i<N;i++){
747 for(j=i+1;j<N;j++){
748 graph[e].i=i;
749 graph[e].j=j;
750 graph[e].val=tab[i][j];
751 e++;
754 duration=TOC;
755 printf("linearization=%fs\n",duration);
758 assert(e==(N*N-N)/2);
759 TIC;
760 qsort(graph,e,sizeof(adjacency_t),adjacency_dsc);
761 duration=TOC;
763 printf("sorting=%fs\n",duration);
765 TIC;
767 TIC;
768 l=0;
769 nb_groups=0;
770 for(i=0;i<e&&l<M;i++){
771 if(try_add_edge(tab,tab_node,&new_tab_node[l],arity,graph[i].i,graph[i].j,N,&nb_groups)){
772 l++;
776 for(l=0;l<M;l++){
777 update_val(tab,&new_tab_node[l],N);
778 val+=new_tab_node[l].val;
781 duration=TOC;
782 printf("Grouping=%fs\n",duration);
784 printf("val=%f\n",val);
786 free(graph);
788 #ifdef DEBUG
789 display_grouping(new_tab_node,M,arity,val);
790 #endif
795 double **build_cost_matrix(double **comm_matrix, double* obj_weight, double comm_speed, int N){
796 double **res,avg;
797 int i,j;
799 if(!obj_weight)
800 return comm_matrix;
802 res=(double**)malloc(N*sizeof(double*));
803 for(i=0;i<N;i++)
804 res[i]=(double*)malloc(N*sizeof(double));
806 avg=0;
807 for(i=0;i<N;i++)
808 avg+=obj_weight[i];
809 avg/=N;
811 printf("avg=%f\n",avg);
813 for(i=0;i<N;i++)
814 for(j=0;j<N;j++)
815 if(i==j)
816 res[i][j]=0;
817 else
818 res[i][j]=1e-4*comm_matrix[i][j]/comm_speed-fabs(avg-(obj_weight[i]+obj_weight[j])/2);
820 return res;
827 comm_matrix: comm_matrix at the considered level (use to evaluate a grouping) of size N*N
828 tab_node: array of the node to group
829 new_tab_node: array of nodes at the next level (the parents of the node in tab_node once the grouping will be done).
830 arity: number of children of parent (i.e.) size of the group to compute
831 N: size of tab and tab_node. i.e. number of nodes at the considered level
832 M: size of new_tab_node (i.e) the number of parents
833 Hence we have: M*arity=N
834 */void group_nodes(double **comm_matrix,tree_t *tab_node, tree_t *new_tab_node, int depth, int arity,int N, int M, double* obj_weigth, double comm_speed){
835 tree_t **cur_group;
836 int j,l,n;
837 long int k;
838 group_list_t list,**best_selection,**tab_group;
839 double best_val,last_best;
840 int timeout;
841 double **tab; /*cost matrix taking into account the communiocation cost but also the weight of the object*/
842 double duration;
844 TIC;
846 /* might return comm_matrix (if obj_weight==NULL): do not free this tab in this case*/
847 tab=build_cost_matrix(comm_matrix,obj_weigth,comm_speed,N);
851 k=choose(N,arity);
852 printf("Number of groups:%ld\n",k);
854 if(k>30000||depth>5){
855 #ifdef DEBUG
856 printf("Fast Grouping...\n");
857 #endif
859 double duration;
861 TIC;
862 if(arity<=2){
863 //super_fast_grouping(tab,tab_node,new_tab_node,arity,N,M,k);
864 bucket_grouping(tab,tab_node,new_tab_node,arity,N,M,k);
865 }else{
866 fast_grouping(tab,tab_node,new_tab_node,arity,N,M,k);
868 duration=TOC;
870 printf("Fast grouping duration=%f\n",duration);
872 //exit(-1);
873 }else{
874 #ifdef DEBUG
875 printf("Grouping nodes...\n");
876 #endif
877 list.next=NULL;
878 list.val=0;//number of elements in the list
879 cur_group=(tree_t**)malloc(sizeof(tree_t*)*arity);
880 best_selection=(group_list_t **)malloc(sizeof(group_list_t*)*M);
882 list_all_possible_groups(tab,tab_node,0,arity,0,cur_group,N,&list);
883 n=(int)list.val;
884 assert(n==k);
885 tab_group=(group_list_t**)malloc(sizeof(group_list_t)*n);
886 list_to_tab(list.next,tab_group,n);
887 #ifdef DEBUG
888 printf("List to tab done\n");
889 #endif
890 best_val=DBL_MAX;
892 // perform the pack mapping fist
893 timeout=select_independent_groups(tab_group,n,arity,M,&best_val,best_selection,1,0.1);
894 #ifdef DEBUG
895 if(timeout){
896 printf("Packed mapping timeout!\n");
898 #endif
899 // give this mapping an exra credit (in general MPI application are made such that neighbour process communicates more than distant ones)
900 best_val/=1.001;
901 #ifdef DEBUG
902 printf("Packing computed\n");
903 #endif
904 // perform a mapping trying to use group that cost less first
905 qsort(tab_group,n,sizeof(group_list_t*),group_list_asc);
906 last_best=best_val;
907 timeout=select_independent_groups(tab_group,n,arity,M,&best_val,best_selection,10,0.1);
908 #ifdef DEBUG
909 if(timeout){
910 printf("Cost less first timeout!\n");
911 }else if(last_best>best_val){
912 printf("Cost less first Impoved solution\n");
914 printf("----\n");
915 #endif
916 // perform a mapping trying to minimize the use of groups that cost a lot
917 qsort(tab_group,n,sizeof(group_list_t*),group_list_dsc);
918 last_best=best_val;
919 timeout=select_independent_groups_by_largest_index(tab_group,n,arity,M,&best_val,best_selection,10,0.1);
920 #ifdef DEBUG
921 if(timeout){
922 printf("Cost most last timeout!\n");
923 }else if(last_best>best_val){
924 printf("Cost most last impoved solution\n");
926 #endif
927 if(n<10000){
928 // perform a mapping in the weighted degree order
929 #ifdef DEBUG
930 printf("----WG----\n");
931 #endif
932 compute_weighted_degree(tab_group,n,arity);
933 #ifdef DEBUG
934 printf("Weigted degree computed\n");
935 #endif
936 qsort(tab_group,n,sizeof(group_list_t*),weighted_degree_dsc);
937 //display_tab_group(tab_group,n,arity);
938 last_best=best_val;
939 timeout=select_independent_groups(tab_group,n,arity,M,&best_val,best_selection,10,0.1);
940 #ifdef DEBUG
941 if(timeout){
942 printf("WG timeout!\n");
943 }else if(last_best>best_val){
944 printf("WG impoved solution\n");
946 #endif
949 qsort(best_selection,M,sizeof(group_list_t*),group_list_id);
951 for(l=0;l<M;l++){
952 for(j=0;j<arity;j++){
953 new_tab_node[l].child[j]=best_selection[l]->tab[j];
954 new_tab_node[l].child[j]->parent=&new_tab_node[l];
956 new_tab_node[l].arity=arity;
958 //printf("arity=%d\n",new_tab_node[l].arity);
959 update_val(tab,&new_tab_node[l],N);
962 delete_group_list((&list)->next);
963 free(best_selection);
964 free(tab_group);
965 free(cur_group);
968 if(tab!=comm_matrix)
969 free_tab_double(tab,N);
971 duration=TOC;
972 printf("Grouping done in %.4fs!\n",duration);
979 void complete_com_mat(double ***tab,int N, int K){
980 double **old_tab,**new_tab;
981 int M,i,j;
983 old_tab=*tab;
985 M=N+K;
986 new_tab=(double**)malloc(M*sizeof(double*));
987 for(i=0;i<M;i++)
988 new_tab[i]=(double*)malloc(M*sizeof(double));
990 *tab=new_tab;
991 for(i=0;i<M;i++){
992 for(j=0;j<M;j++){
993 if((i<N)&&(j<N)){
994 new_tab[i][j]=old_tab[i][j];
995 }else{
996 new_tab[i][j]=0;
1002 void complete_obj_weight(double **tab,int N, int K){
1003 double *old_tab,*new_tab,avg;
1004 int M,i;
1006 old_tab=*tab;
1008 if(!old_tab)
1009 return;
1012 avg=0;
1013 for(i=0;i<N;i++)
1014 avg+=old_tab[i];
1015 avg/=N;
1018 M=N+K;
1019 new_tab=(double*)malloc(M*sizeof(double));
1021 *tab=new_tab;
1022 for(i=0;i<M;i++){
1023 if(i<N){
1024 new_tab[i]=old_tab[i];
1025 }else{
1026 new_tab[i]=avg;
1033 void create_dumb_tree(tree_t *node,int depth,tm_topology_t *topology){
1034 tree_t **list_child;
1035 int arity,i;
1038 if(depth==topology->nb_levels-1){
1039 set_node(node,NULL,0,NULL,-1,0,NULL);
1040 return;
1043 arity=topology->arity[depth];
1044 assert(arity>0);
1045 list_child=(tree_t**)calloc(arity,sizeof(tree_t*));
1046 for(i=0;i<arity;i++){
1047 list_child[i]=(tree_t*)malloc(sizeof(tree_t));
1048 create_dumb_tree(list_child[i],depth+1,topology);
1049 list_child[i]->parent=node;
1050 list_child[i]->dumb=1;
1053 set_node(node,list_child,arity,NULL,-1,0,list_child[0]);
1057 void complete_tab_node(tree_t **tab,int N, int K,int depth,tm_topology_t *topology){
1058 tree_t *old_tab,*new_tab;
1059 int M,i;
1060 if(K==0)
1061 return;
1063 old_tab=*tab;
1066 M=N+K;
1067 new_tab=(tree_t*)malloc(M*sizeof(tree_t));
1069 *tab=new_tab;
1070 for(i=0;i<M;i++){
1071 if((i<N)){
1072 clone_tree(&new_tab[i],&old_tab[i]);
1073 }else{
1074 create_dumb_tree(&new_tab[i],depth,topology);
1075 new_tab[i].id=i;
1079 //do not suppress tab if you are at the depth-most level it will be used at the mapping stage
1080 free(old_tab);
1084 void set_deb_tab_child(tree_t *tree, tree_t *child,int depth){
1085 //printf("depth=%d\t%p\t%p\n",depth,child,tree);
1086 if(depth>0)
1087 set_deb_tab_child(tree->tab_child,child,depth-1);
1088 else
1089 tree->tab_child=child;
1095 Build the tree of the matching. It is a bottom up algorithm: it starts from the bottom of the tree on proceed by decreasing the depth
1096 It groups nodes of the matrix tab and link these groups to the nodes of the under level.
1097 Then it calls recursivcely the function to prefrom the grouping at the above level.
1099 tab_node: array of nodes of the under level.
1100 tab: local communication matrix
1101 N: number of nodes. Order of com_mat, size of obj_weight
1102 arity: arity of the nodes of the above level.
1103 depth: current depth of the algorithm
1104 toplogy: description of the hardware topology.
1106 tree_t *build_level_topology(tree_t *tab_node,double **com_mat,int N,int arity,int depth,tm_topology_t *topology, double *obj_weight, double *comm_speed){
1107 int M; /*N/Arity: number the groups*/
1108 int K=0,i;
1109 tree_t *new_tab_node; /*array of node for this level (of size M): there will be linked to the nodes of tab_nodes*/
1110 double **new_com_mat; /*New communication matrix (after grouyping nodes together)*/
1111 tree_t *res; /*resulting tree*/
1112 int completed=0;
1113 double speed; /* communication speed at this level*/
1114 double *new_obj_weight;
1115 if(depth==0){
1116 if((N==1)&&(depth==0))
1117 return &tab_node[0];
1118 else{
1119 fprintf(stderr,"Error: matrix size: %d and depth:%d (should be 1 and -1 respectively)\n",N,depth);
1120 exit(-1);
1125 /* If the number of nodes does not devide the arity: we add K nodes */
1126 if(N%arity!=0){
1127 K=arity*((N/arity)+1)-N;
1128 //printf("****N=%d arity=%d K=%d\n",N,arity,K);
1129 //display_tab(tab,N);
1130 /* add K rows and columns to comm_matrix*/
1131 complete_com_mat(&com_mat,N,K);
1132 /* add K element to the object weight*/
1133 complete_obj_weight(&obj_weight,N,K);
1134 //display_tab(tab,N+K);
1135 /* add a dumb tree to the K new "virtual nodes"*/
1136 complete_tab_node(&tab_node,N,K,depth,topology);
1137 completed=1; /*flag this addition*/
1138 N+=K; /*increase the number of nodes accordingly*/
1139 } //display_tab(tab,N);
1142 M=N/arity;
1143 printf("Depth=%d\tnb_nodes=%d\tnb_groups=%d\tsize of groups(arity)=%d\n",depth,N,M,arity);
1145 /*create the new nodes*/
1146 new_tab_node=(tree_t*)malloc(sizeof(tree_t)*M);
1147 /*intitialize each node*/
1148 for(i=0;i<M;i++){
1149 tree_t **list_child;
1150 list_child=(tree_t**)calloc(arity,sizeof(tree_t*));
1151 set_node(&new_tab_node[i],list_child,arity,NULL,i,0,tab_node);
1154 /*Core of the algorithm: perfrom the grouping*/
1155 if(comm_speed)
1156 speed=comm_speed[depth];
1157 else
1158 speed=-1;
1159 group_nodes(com_mat,tab_node,new_tab_node,depth,arity,N,M,obj_weight,speed);
1161 /*based on that grouping aggregate the communication matrix*/
1162 new_com_mat=aggregate_com_mat(new_tab_node,com_mat,M);
1163 /*based on that grouping aggregate the object weight matrix*/
1164 new_obj_weight=aggregate_obj_weight(new_tab_node,obj_weight,M);
1166 /* set ID of virtual nodes to -1*/
1167 for(i=N-K;i<N;i++)
1168 tab_node[i].id=-1;
1170 //for(i=0;i<N;i++)
1171 // display_node(&tab_node[i]);
1173 //display_tab(new_com_mat,M);
1175 /* decrease depth and compute arity of the above level*/
1176 depth--;
1177 if(depth>0)
1178 arity = topology->arity[depth-1];
1179 else
1180 arity=1;
1181 // assume all objects have the same arity
1182 res = build_level_topology(new_tab_node,new_com_mat,M,arity,depth,topology,new_obj_weight,comm_speed);
1184 set_deb_tab_child(res,tab_node,depth);
1187 if(completed){
1188 free_tab_double(com_mat,N);
1189 free(obj_weight);
1191 /* cppcheck-suppress deallocDealloc */
1192 free_tab_double(new_com_mat,M);
1193 free(new_obj_weight);
1195 return res;
1200 double speed(int depth){
1201 // Bertha values
1202 //double tab[5]={21,9,4.5,2.5,0.001};
1203 //double tab[5]={1,1,1,1,1};
1204 //double tab[6]={100000,10000,1000,500,100,10};
1205 double tab[5]={100000,10000,1000,500,10};
1207 return 1.0/tab[depth];
1208 //return 10*log(depth+2);
1209 //return (depth+1);
1210 //return (long int)pow(100,depth);
1213 tree_t * build_tree_from_topology(tm_topology_t *topology,double **tab,int N,double *obj_weight, double *comm_speed){
1214 int depth,i;
1215 tree_t *res,*tab_node;
1218 tab_node=(tree_t*)malloc(sizeof(tree_t)*N);
1219 for(i=0;i<N;i++)
1220 set_node(&tab_node[i],NULL,0,NULL,i,0,NULL);
1223 depth = topology->nb_levels -1;
1224 printf("nb_levels=%d\n",depth+1);
1225 // assume all objects have the same arity
1226 res = build_level_topology(tab_node,tab,N,topology->arity[depth-1],depth,topology, obj_weight, comm_speed);
1227 printf("Build tree done!\n");
1228 return res;