cloog_loop_merge: properly handle union domains
[cloog.git] / source / loop.c
blob176a6558cc7b202db2266ce37e32d9d346d0bb47
2 /**-------------------------------------------------------------------**
3 ** CLooG **
4 **-------------------------------------------------------------------**
5 ** loop.c **
6 **-------------------------------------------------------------------**
7 ** First version: october 26th 2001 **
8 **-------------------------------------------------------------------**/
11 /******************************************************************************
12 * CLooG : the Chunky Loop Generator (experimental) *
13 ******************************************************************************
14 * *
15 * Copyright (C) 2001-2005 Cedric Bastoul *
16 * *
17 * This library is free software; you can redistribute it and/or *
18 * modify it under the terms of the GNU Lesser General Public *
19 * License as published by the Free Software Foundation; either *
20 * version 2.1 of the License, or (at your option) any later version. *
21 * *
22 * This library is distributed in the hope that it will be useful, *
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *
25 * Lesser General Public License for more details. *
26 * *
27 * You should have received a copy of the GNU Lesser General Public *
28 * License along with this library; if not, write to the Free Software *
29 * Foundation, Inc., 51 Franklin Street, Fifth Floor, *
30 * Boston, MA 02110-1301 USA *
31 * *
32 * CLooG, the Chunky Loop Generator *
33 * Written by Cedric Bastoul, Cedric.Bastoul@inria.fr *
34 * *
35 ******************************************************************************/
36 /* CAUTION: the english used for comments is probably the worst you ever read,
37 * please feel free to correct and improve it !
40 # include <stdlib.h>
41 # include <stdio.h>
42 # include "../include/cloog/cloog.h"
44 #define ALLOC(type) (type*)malloc(sizeof(type))
47 /******************************************************************************
48 * Memory leaks hunting *
49 ******************************************************************************/
52 /**
53 * These functions and global variables are devoted to memory leaks hunting: we
54 * want to know at each moment how many CloogLoop structures had been allocated
55 * (cloog_loop_allocated) and how many had been freed (cloog_loop_freed).
56 * Each time a CloogLoog structure is allocated, a call to the function
57 * cloog_loop_leak_up() must be carried out, and respectively
58 * cloog_loop_leak_down() when a CloogLoop structure is freed. The special
59 * variable cloog_loop_max gives the maximal number of CloogLoop structures
60 * simultaneously alive (i.e. allocated and non-freed) in memory.
61 * - July 3rd->11th 2003: first version (memory leaks hunt and correction).
65 static void cloog_loop_leak_up(CloogState *state)
67 state->loop_allocated++;
68 if ((state->loop_allocated - state->loop_freed) > state->loop_max)
69 state->loop_max = state->loop_allocated - state->loop_freed;
73 static void cloog_loop_leak_down(CloogState *state)
75 state->loop_freed++;
79 /******************************************************************************
80 * Structure display function *
81 ******************************************************************************/
84 /**
85 * cloog_loop_print_structure function:
86 * Displays a loop structure in a way that trends to be understandable without
87 * falling in a deep depression or, for the lucky ones, getting a headache...
88 * Written by Olivier Chorier, Luc Marchaud, Pierre Martin and Romain Tartiere.
89 * - April 24th 2005: Initial version.
90 * - May 21rd 2005: - New parameter `F' for destination file (ie stdout),
91 * - Minor tweaks.
92 * - May 26th 2005: Memory leak hunt.
93 * - June 2nd 2005: (Ced) Integration and minor fixes.
94 * -June 22nd 2005: (Ced) Adaptation for GMP.
96 void cloog_loop_print_structure(FILE * file, CloogLoop * loop, int level)
97 { int i, j, first=1 ;
99 if (loop)
100 { /* Go to the right level. */
101 for (i=0; i<level; i++)
102 fprintf(file,"|\t") ;
104 fprintf(file,"+-- CloogLoop\n") ;
107 /* For each loop. */
108 while (loop)
109 { if (!first)
110 { /* Go to the right level. */
111 for (i=0; i<level; i++)
112 fprintf(file,"|\t") ;
114 fprintf(file,"| CloogLoop\n") ;
116 else
117 first = 0 ;
119 /* A blank line. */
120 for(j=0; j<=level+1; j++)
121 fprintf(file,"|\t") ;
122 fprintf(file,"\n") ;
124 /* Print the domain. */
125 cloog_domain_print_structure(file, loop->domain, level+1, "CloogDomain");
127 /* Print the stride. */
128 for(j=0; j<=level; j++)
129 fprintf(file,"|\t") ;
130 if (loop->stride) {
131 fprintf(file, "Stride: ");
132 cloog_int_print(file, loop->stride->stride);
133 fprintf(file, "\n");
134 fprintf(file, "Offset: ");
135 cloog_int_print(file, loop->stride->offset);
136 fprintf(file, "\n");
139 /* A blank line. */
140 for(j=0; j<=level+1; j++)
141 fprintf(file,"|\t") ;
142 fprintf(file,"\n") ;
144 /* Print the block. */
145 cloog_block_print_structure(file,loop->block,level+1) ;
147 /* A blank line. */
148 for (i=0; i<=level+1; i++)
149 fprintf(file,"|\t") ;
150 fprintf(file,"\n") ;
152 /* Print inner if any. */
153 if (loop->inner)
154 cloog_loop_print_structure(file,loop->inner,level+1) ;
156 /* And let's go for the next one. */
157 loop = loop->next ;
159 /* One more time something that is here only for a better look. */
160 if (!loop)
161 { /* Two blank lines if this is the end of the linked list. */
162 for (j=0; j<2; j++)
163 { for (i=0; i<=level; i++)
164 fprintf(file,"|\t") ;
166 fprintf(file,"\n") ;
169 else
170 { /* A special blank line if the is a next loop. */
171 for (i=0; i<=level; i++)
172 fprintf(file,"|\t") ;
173 fprintf(file,"V\n") ;
180 * cloog_loop_print function:
181 * This function prints the content of a CloogLoop structure (start) into a
182 * file (file, possibly stdout).
183 * - June 2nd 2005: Now this very old function (probably as old as CLooG) is
184 * only a frontend to cloog_loop_print_structure, with a quite
185 * better human-readable representation.
187 void cloog_loop_print(FILE * file, CloogLoop * loop)
188 { cloog_loop_print_structure(file,loop,0) ;
192 /******************************************************************************
193 * Memory deallocation function *
194 ******************************************************************************/
198 * cloog_loop_free function:
199 * This function frees the allocated memory for a CloogLoop structure (loop),
200 * and frees its inner loops and its next loops.
201 * - June 22nd 2005: Adaptation for GMP.
203 void cloog_loop_free(CloogLoop * loop)
204 { CloogLoop * next ;
206 while (loop != NULL) {
207 cloog_loop_leak_down(loop->state);
209 next = loop->next ;
210 cloog_domain_free(loop->domain) ;
211 cloog_domain_free(loop->unsimplified);
212 cloog_block_free(loop->block) ;
213 if (loop->inner != NULL)
214 cloog_loop_free(loop->inner) ;
216 cloog_stride_free(loop->stride);
217 free(loop) ;
218 loop = next ;
224 * cloog_loop_free_parts function:
225 * This function frees the allocated memory for some parts of a CloogLoop
226 * structure (loop), each other argument is a boolean having to be set to 1 if
227 * we want to free the corresponding part, 0 otherwise. This function applies
228 * the same freeing policy to its inner ans next loops recursively.
229 * - July 3rd 2003: first version.
230 * - June 22nd 2005: Adaptation for GMP.
232 void cloog_loop_free_parts(loop, domain, block, inner, next)
233 CloogLoop * loop ;
234 int domain, block, inner, next ;
235 { CloogLoop * follow ;
237 while (loop != NULL) {
238 cloog_loop_leak_down(loop->state);
239 follow = loop->next ;
241 if (domain)
242 cloog_domain_free(loop->domain) ;
244 if (block)
245 cloog_block_free(loop->block) ;
247 if ((inner) && (loop->inner != NULL))
248 cloog_loop_free_parts(loop->inner,domain,block,inner,1) ;
250 cloog_stride_free(loop->stride);
251 free(loop) ;
252 if (next)
253 loop = follow ;
254 else
255 loop = NULL ;
260 /******************************************************************************
261 * Reading functions *
262 ******************************************************************************/
266 * Construct a CloogLoop structure from a given iteration domain
267 * and statement number.
269 CloogLoop *cloog_loop_from_domain(CloogState *state, CloogDomain *domain,
270 int number)
272 int nb_iterators;
273 CloogLoop * loop ;
274 CloogStatement * statement ;
276 /* Memory allocation and information reading for the first domain: */
277 loop = cloog_loop_malloc(state);
278 /* domain. */
279 loop->domain = domain;
280 if (loop->domain != NULL)
281 nb_iterators = cloog_domain_dimension(loop->domain);
282 else
283 nb_iterators = 0 ;
284 /* included statement block. */
285 statement = cloog_statement_alloc(state, number + 1);
286 loop->block = cloog_block_alloc(statement, 0, NULL, nb_iterators);
288 return loop ;
293 * cloog_loop_read function:
294 * This function reads loop data from a file (foo, possibly stdin) and
295 * returns a pointer to a CloogLoop structure containing the read information.
296 * This function can be used only for input file reading, when one loop is
297 * associated with one statement.
298 * - number is the statement block number carried by the loop (-1 if none).
299 * - nb_parameters is the number of parameters.
301 * - September 9th 2002: first version.
302 * - April 16th 2005: adaptation to new CloogStatement struct (with number).
303 * - June 11th 2005: adaptation to new CloogBlock structure.
304 * - June 22nd 2005: Adaptation for GMP.
306 CloogLoop *cloog_loop_read(CloogState *state,
307 FILE *foo, int number, int nb_parameters)
309 int op1, op2, op3;
310 char s[MAX_STRING];
311 CloogDomain *domain;
313 domain = cloog_domain_union_read(state, foo, nb_parameters);
315 /* To read that stupid "0 0 0" line. */
316 while (fgets(s,MAX_STRING,foo) == 0) ;
317 while ((*s=='#' || *s=='\n') || (sscanf(s," %d %d %d",&op1,&op2,&op3)<3))
318 fgets(s,MAX_STRING,foo) ;
320 return cloog_loop_from_domain(state, domain, number);
324 /******************************************************************************
325 * Processing functions *
326 ******************************************************************************/
330 * cloog_loop_malloc function:
331 * This function allocates the memory space for a CloogLoop structure and
332 * sets its fields with default values. Then it returns a pointer to the
333 * allocated space.
334 * - November 21th 2005: first version.
336 CloogLoop *cloog_loop_malloc(CloogState *state)
337 { CloogLoop * loop ;
339 /* Memory allocation for the CloogLoop structure. */
340 loop = (CloogLoop *)malloc(sizeof(CloogLoop)) ;
341 if (loop == NULL)
342 cloog_die("memory overflow.\n");
343 cloog_loop_leak_up(state);
346 /* We set the various fields with default values. */
347 loop->state = state;
348 loop->domain = NULL ;
349 loop->unsimplified = NULL;
350 loop->block = NULL ;
351 loop->usr = NULL;
352 loop->inner = NULL ;
353 loop->next = NULL ;
354 loop->otl = 0;
355 loop->stride = NULL;
357 return loop ;
362 * cloog_loop_alloc function:
363 * This function allocates the memory space for a CloogLoop structure and
364 * sets its fields with those given as input. Then it returns a pointer to the
365 * allocated space.
366 * - October 27th 2001: first version.
367 * - June 22nd 2005: Adaptation for GMP.
368 * - November 21th 2005: use of cloog_loop_malloc.
370 CloogLoop *cloog_loop_alloc(CloogState *state,
371 CloogDomain *domain, int otl, CloogStride *stride,
372 CloogBlock *block, CloogLoop *inner, CloogLoop *next)
373 { CloogLoop * loop ;
375 loop = cloog_loop_malloc(state);
377 loop->domain = domain ;
378 loop->block = block ;
379 loop->inner = inner ;
380 loop->next = next ;
381 loop->otl = otl;
382 loop->stride = cloog_stride_copy(stride);
384 return(loop) ;
389 * cloog_loop_add function:
390 * This function adds a CloogLoop structure (loop) at a given place (now) of a
391 * NULL terminated list of CloogLoop structures. The beginning of this list
392 * is (start). This function updates (now) to (loop), and updates (start) if the
393 * added element is the first one -that is when (start) is NULL-.
394 * - October 28th 2001: first version.
396 void cloog_loop_add(CloogLoop ** start, CloogLoop ** now, CloogLoop * loop)
397 { if (*start == NULL)
398 { *start = loop ;
399 *now = *start ;
401 else
402 { (*now)->next = loop ;
403 *now = (*now)->next ;
409 * cloog_loop_add function:
410 * This function adds a CloogLoop structure (loop) at a given place (now) of a
411 * NULL terminated list of CloogLoop structures. The beginning of this list
412 * is (start). This function updates (now) to the end of the loop list (loop),
413 * and updates (start) if the added element is the first one -that is when
414 * (start) is NULL-.
415 * - September 9th 2005: first version.
417 void cloog_loop_add_list(CloogLoop ** start, CloogLoop ** now, CloogLoop * loop)
418 { if (*start == NULL)
419 { *start = loop ;
420 *now = *start ;
422 else
423 { (*now)->next = loop ;
424 *now = (*now)->next ;
427 while ((*now)->next != NULL)
428 *now = (*now)->next ;
433 * cloog_loop_copy function:
434 * This function returns a copy of the CloogLoop structure given as input. In
435 * fact, there is just new allocations for the CloogLoop structures, but their
436 * contents are the same.
437 * - October 28th 2001: first version.
438 * - July 3rd->11th 2003: memory leaks hunt and correction.
440 CloogLoop * cloog_loop_copy(CloogLoop * source)
441 { CloogLoop * loop ;
442 CloogBlock * block ;
443 CloogDomain * domain ;
445 loop = NULL ;
446 if (source != NULL)
447 { domain = cloog_domain_copy(source->domain) ;
448 block = cloog_block_copy(source->block) ;
449 loop = cloog_loop_alloc(source->state, domain, source->otl,
450 source->stride, block, NULL, NULL);
451 loop->usr = source->usr;
452 loop->inner = cloog_loop_copy(source->inner) ;
453 loop->next = cloog_loop_copy(source->next) ;
455 return(loop) ;
460 * cloog_loop_add_disjoint function:
461 * This function adds some CloogLoop structures at a given place (now) of a
462 * NULL terminated list of CloogLoop structures. The beginning of this list
463 * is (start). (loop) can be an union of polyhedra, this function separates the
464 * union into a list of *disjoint* polyhedra then adds the list. This function
465 * updates (now) to the end of the list and updates (start) if first added
466 * element is the first of the principal list -that is when (start) is NULL-.
467 * (loop) can be freed by this function, basically when its domain is actually
468 * a union of polyhedra, but don't worry, all the useful data are now stored
469 * inside the list (start). We do not use PolyLib's Domain_Disjoint function,
470 * since the number of union components is often higher (thus code size too).
471 * - October 28th 2001: first version.
472 * - November 14th 2001: bug correction (this one was hard to find !).
473 * - July 3rd->11th 2003: memory leaks hunt and correction.
474 * - June 22nd 2005: Adaptation for GMP.
475 * - October 27th 2005: (debug) included blocks were not copied for new loops.
477 void cloog_loop_add_disjoint(start, now, loop)
478 CloogLoop ** start, ** now, * loop ;
480 CloogLoop * sep, * inner ;
481 CloogDomain *domain, *seen, *temp, *rest;
482 CloogBlock * block ;
484 if (cloog_domain_isconvex(loop->domain))
485 cloog_loop_add(start,now,loop) ;
486 else {
487 domain = cloog_domain_simplify_union(loop->domain);
488 loop->domain = NULL ;
490 /* We separate the first element of the rest of the union. */
491 domain = cloog_domain_cut_first(domain, &rest);
493 /* This first element is the first of the list of disjoint polyhedra. */
494 sep = cloog_loop_alloc(loop->state, domain, 0, NULL,
495 loop->block, loop->inner, NULL);
496 cloog_loop_add(start,now,sep) ;
498 seen = cloog_domain_copy(domain);
499 while (!cloog_domain_isempty(domain = rest)) {
500 temp = cloog_domain_cut_first(domain, &rest);
501 domain = cloog_domain_difference(temp, seen);
502 cloog_domain_free(temp);
504 if (cloog_domain_isempty(domain)) {
505 cloog_domain_free(domain);
506 continue;
509 /* Each new loop will have its own life, for instance we can free its
510 * inner loop and included block. Then each one must have its own copy
511 * of both 'inner' and 'block'.
513 inner = cloog_loop_copy(loop->inner) ;
514 block = cloog_block_copy(loop->block) ;
516 sep = cloog_loop_alloc(loop->state, cloog_domain_copy(domain),
517 0, NULL, block, inner, NULL);
518 /* domain can be an union too. If so: recursion. */
519 if (cloog_domain_isconvex(domain))
520 cloog_loop_add(start,now,sep) ;
521 else
522 cloog_loop_add_disjoint(start,now,sep) ;
524 if (cloog_domain_isempty(rest)) {
525 cloog_domain_free(domain);
526 break;
529 seen = cloog_domain_union(seen, domain);
531 cloog_domain_free(rest);
532 cloog_domain_free(seen);
533 cloog_loop_free_parts(loop,0,0,0,0) ;
539 * cloog_loop_disjoint function:
540 * This function returns a list of loops such that each loop with non-convex
541 * domain in the input list (loop) is separated into several loops where the
542 * domains are the components of the union of *disjoint* polyhedra equivalent
543 * to the original non-convex domain. See cloog_loop_add_disjoint comments
544 * for more details.
545 * - September 16th 2005: first version.
547 CloogLoop * cloog_loop_disjoint(CloogLoop * loop)
548 { CloogLoop *res=NULL, * now=NULL, * next ;
550 /* Because this is often the case, don't waste time ! */
551 if (loop && !loop->next && cloog_domain_isconvex(loop->domain))
552 return loop ;
554 while (loop != NULL)
555 { next = loop->next ;
556 loop->next = NULL ;
557 cloog_loop_add_disjoint(&res,&now,loop) ;
558 loop = next ;
561 return res ;
566 * cloog_loop_restrict function:
567 * This function returns the (loop) in the context of (context): it makes the
568 * intersection between the (loop) domain and the (context), then it returns
569 * a pointer to a new loop, with this intersection as domain.
571 * - October 27th 2001: first version.
572 * - June 15th 2005: a memory leak fixed (domain was not freed when empty).
573 * - June 22nd 2005: Adaptation for GMP.
575 CloogLoop *cloog_loop_restrict(CloogLoop *loop, CloogDomain *context)
576 { int new_dimension ;
577 CloogDomain * domain, * extended_context, * new_domain ;
578 CloogLoop * new_loop ;
580 domain = loop->domain ;
581 if (cloog_domain_dimension(domain) > cloog_domain_dimension(context))
583 new_dimension = cloog_domain_dimension(domain);
584 extended_context = cloog_domain_extend(context, new_dimension);
585 new_domain = cloog_domain_intersection(extended_context,loop->domain) ;
586 cloog_domain_free(extended_context) ;
588 else
589 new_domain = cloog_domain_intersection(context,loop->domain) ;
591 if (cloog_domain_isempty(new_domain))
592 { cloog_domain_free(new_domain) ;
593 return(NULL) ;
595 else {
596 new_loop = cloog_loop_alloc(loop->state, new_domain,
597 0, NULL, loop->block, loop->inner, NULL);
598 return(new_loop) ;
604 * Call cloog_loop_restrict on each loop in the list "loop" and return
605 * the concatenated result.
607 CloogLoop *cloog_loop_restrict_all(CloogLoop *loop, CloogDomain *context)
609 CloogLoop *next;
610 CloogLoop *res = NULL;
611 CloogLoop **res_next = &res;
613 for (; loop; loop = next) {
614 next = loop->next;
616 *res_next = cloog_loop_restrict(loop, context);
617 if (*res_next) {
618 res_next = &(*res_next)->next;
619 cloog_loop_free_parts(loop, 1, 0, 0, 0);
620 } else {
621 loop->next = NULL;
622 cloog_loop_free(loop);
626 return res;
631 * Restrict the domains of the inner loops of each loop l in the given
632 * list of loops to the domain of the loop l. If the domains of all
633 * inner loops of a given loop l turn out to be empty, then remove l
634 * from the list.
636 CloogLoop *cloog_loop_restrict_inner(CloogLoop *loop)
638 CloogLoop *next;
639 CloogLoop *res;
640 CloogLoop **res_next = &res;
642 for (; loop; loop = next) {
643 next = loop->next;
645 loop->inner = cloog_loop_restrict_all(loop->inner, loop->domain);
646 if (loop->inner) {
647 *res_next = loop;
648 res_next = &(*res_next)->next;
649 } else {
650 loop->next = NULL;
651 cloog_loop_free(loop);
655 *res_next = NULL;
657 return res;
661 * cloog_loop_project function:
662 * This function returns the projection of (loop) on the (level) first
663 * dimensions (outer loops). It makes the projection of the (loop) domain,
664 * then it returns a pointer to a new loop, with this projection as domain.
666 * - October 27th 2001: first version.
667 * - July 3rd->11th 2003: memory leaks hunt and correction.
668 * - June 22nd 2005: Adaptation for GMP.
670 CloogLoop * cloog_loop_project(CloogLoop * loop, int level)
672 CloogDomain * new_domain ;
673 CloogLoop * new_loop, * copy ;
675 copy = cloog_loop_alloc(loop->state, loop->domain, loop->otl, loop->stride,
676 loop->block, loop->inner, NULL);
678 if (cloog_domain_dimension(loop->domain) == level)
679 new_domain = cloog_domain_copy(loop->domain) ;
680 else
681 new_domain = cloog_domain_project(loop->domain, level);
683 new_loop = cloog_loop_alloc(loop->state, new_domain, 0, NULL,
684 NULL, copy, NULL);
686 return(new_loop) ;
691 * Call cloog_loop_project on each loop in the list "loop" and return
692 * the concatenated result.
694 CloogLoop *cloog_loop_project_all(CloogLoop *loop, int level)
696 CloogLoop *next;
697 CloogLoop *res = NULL;
698 CloogLoop **res_next = &res;
700 for (; loop; loop = next) {
701 next = loop->next;
703 *res_next = cloog_loop_project(loop, level);
704 res_next = &(*res_next)->next;
705 cloog_loop_free_parts(loop, 0, 0, 0, 0);
708 return res;
713 * cloog_loop_concat function:
714 * This function returns a pointer to the concatenation of the
715 * CloogLoop lists given as input.
716 * - October 28th 2001: first version.
718 CloogLoop * cloog_loop_concat(CloogLoop * a, CloogLoop * b)
719 { CloogLoop * loop, * temp ;
721 loop = a ;
722 temp = loop ;
723 if (loop != NULL)
724 { while (temp->next != NULL)
725 temp = temp->next ;
726 temp->next = b ;
728 else
729 loop = b ;
731 return(loop) ;
736 * cloog_loop_combine:
737 * Combine consecutive loops with identical domains into
738 * a single loop with the concatenation of their inner loops
739 * as inner loop.
741 CloogLoop *cloog_loop_combine(CloogLoop *loop)
743 CloogLoop *first, *second;
745 for (first = loop; first; first = first->next) {
746 while (first->next) {
747 if (!cloog_domain_lazy_equal(first->domain, first->next->domain))
748 break;
749 second = first->next;
750 first->inner = cloog_loop_concat(first->inner, second->inner);
751 first->next = second->next;
752 cloog_loop_free_parts(second, 1, 0, 0, 0);
756 return loop;
760 * Remove loops from list that have an empty domain.
762 CloogLoop *cloog_loop_remove_empty_domain_loops(CloogLoop *loop)
764 CloogLoop *l, *res, *next, **res_next;
766 res = NULL;
767 res_next = &res;
768 for (l = loop; l; l = next) {
769 next = l->next;
770 if (cloog_domain_isempty(l->domain))
771 cloog_loop_free_parts(l, 1, 1, 1, 0);
772 else {
773 *res_next = l;
774 res_next = &(*res_next)->next;
777 *res_next = NULL;
779 return res;
782 CloogLoop *cloog_loop_decompose_inner(CloogLoop *loop,
783 int level, int scalar, int *scaldims, int nb_scattdims);
785 /* For each loop with only one inner loop, replace the domain
786 * of the loop with the projection of the domain of the inner
787 * loop. To increase the number of loops with a single inner
788 * we first decompose the inner loops into strongly connected
789 * components.
791 CloogLoop *cloog_loop_specialize(CloogLoop *loop,
792 int level, int scalar, int *scaldims, int nb_scattdims)
794 int dim;
795 CloogDomain *domain;
796 CloogLoop *l;
798 loop = cloog_loop_decompose_inner(loop, level, scalar,
799 scaldims, nb_scattdims);
801 for (l = loop; l; l = l->next) {
802 if (l->inner->next)
803 continue;
804 if (!cloog_domain_isconvex(l->inner->domain))
805 continue;
807 dim = cloog_domain_dimension(l->domain);
808 domain = cloog_domain_project(l->inner->domain, dim);
809 if (cloog_domain_isconvex(domain)) {
810 cloog_domain_free(l->domain);
811 l->domain = domain;
812 } else {
813 cloog_domain_free(domain);
817 return cloog_loop_remove_empty_domain_loops(loop);
821 * cloog_loop_separate function:
822 * This function implements the Quillere algorithm for separation of multiple
823 * loops: for a given set of polyhedra (loop), it computes a set of disjoint
824 * polyhedra such that the unions of these sets are equal, and returns this set.
825 * - October 28th 2001: first version.
826 * - November 14th 2001: elimination of some unused blocks.
827 * - August 13th 2002: (debug) in the case of union of polyhedra for one
828 * loop, redundant constraints are fired.
829 * - July 3rd->11th 2003: memory leaks hunt and correction.
830 * - June 22nd 2005: Adaptation for GMP.
831 * - October 16th 2005: Removal of the non-shared constraint elimination when
832 * there is only one loop in the list (seems to work
833 * without now, DomainSimplify may have been improved).
834 * The problem was visible with test/iftest2.cloog.
836 CloogLoop * cloog_loop_separate(CloogLoop * loop)
837 { int lazy_equal=0, disjoint = 0;
838 CloogLoop * new_loop, * new_inner, * res, * now, * temp, * Q,
839 * inner, * old /*, * previous, * next*/ ;
840 CloogDomain *UQ, *domain;
842 if (loop == NULL)
843 return NULL ;
845 loop = cloog_loop_combine(loop);
847 if (loop->next == NULL)
848 return cloog_loop_disjoint(loop) ;
850 UQ = cloog_domain_copy(loop->domain) ;
851 domain = cloog_domain_copy(loop->domain) ;
852 res = cloog_loop_alloc(loop->state, domain, 0, NULL,
853 loop->block, loop->inner, NULL);
855 old = loop ;
856 while((loop = loop->next) != NULL)
857 { temp = NULL ;
859 /* For all Q, add Q-loop associated with the blocks of Q alone,
860 * and Q inter loop associated with the blocks of Q and loop.
862 for (Q = res; Q; Q = Q->next) {
863 /* Add (Q inter loop). */
864 if ((disjoint = cloog_domain_lazy_disjoint(Q->domain,loop->domain)))
865 domain = NULL ;
866 else
867 { if ((lazy_equal = cloog_domain_lazy_equal(Q->domain,loop->domain)))
868 domain = cloog_domain_copy(Q->domain) ;
869 else
870 domain = cloog_domain_intersection(Q->domain,loop->domain) ;
872 if (!cloog_domain_isempty(domain))
873 { new_inner = cloog_loop_concat(cloog_loop_copy(Q->inner),
874 cloog_loop_copy(loop->inner)) ;
875 new_loop = cloog_loop_alloc(loop->state, domain, 0, NULL,
876 NULL, new_inner, NULL);
877 cloog_loop_add_disjoint(&temp,&now,new_loop) ;
879 else {
880 disjoint = 1;
881 cloog_domain_free(domain);
885 /* Add (Q - loop). */
886 if (disjoint)
887 domain = cloog_domain_copy(Q->domain) ;
888 else
889 { if (lazy_equal)
890 domain = cloog_domain_empty(Q->domain);
891 else
892 domain = cloog_domain_difference(Q->domain,loop->domain) ;
895 if (!cloog_domain_isempty(domain)) {
896 new_loop = cloog_loop_alloc(loop->state, domain, 0, NULL,
897 NULL, Q->inner, NULL);
898 cloog_loop_add_disjoint(&temp,&now,new_loop) ;
900 else
901 { cloog_domain_free(domain) ;
902 /* If Q->inner is no more useful, we can free it. */
903 inner = Q->inner ;
904 Q->inner = NULL ;
905 cloog_loop_free(inner) ;
909 /* Add loop-UQ associated with the blocks of loop alone.*/
910 if (cloog_domain_lazy_disjoint(loop->domain,UQ))
911 domain = cloog_domain_copy(loop->domain) ;
912 else
913 { if (cloog_domain_lazy_equal(loop->domain,UQ))
914 domain = cloog_domain_empty(UQ);
915 else
916 domain = cloog_domain_difference(loop->domain,UQ) ;
919 if (!cloog_domain_isempty(domain)) {
920 new_loop = cloog_loop_alloc(loop->state, domain, 0, NULL,
921 NULL, loop->inner, NULL);
922 cloog_loop_add_disjoint(&temp,&now,new_loop) ;
924 else
925 { cloog_domain_free(domain) ;
926 /* If loop->inner is no more useful, we can free it. */
927 cloog_loop_free(loop->inner) ;
930 loop->inner = NULL ;
932 if (loop->next != NULL)
933 UQ = cloog_domain_union(UQ, cloog_domain_copy(loop->domain));
934 else
935 cloog_domain_free(UQ);
937 cloog_loop_free_parts(res,1,0,0,1) ;
939 res = temp ;
941 cloog_loop_free_parts(old,1,0,0,1) ;
943 return(res) ;
947 static CloogDomain *bounding_domain(CloogDomain *dom, CloogOptions *options)
949 if (options->sh)
950 return cloog_domain_simple_convex(dom);
951 else
952 return cloog_domain_convex(dom);
957 * cloog_loop_merge function:
958 * This function is the 'soft' version of loop_separate if we are looking for
959 * a code much simpler (and less efficicient). This function returns the new
960 * CloogLoop list.
961 * - October 29th 2001: first version.
962 * - July 3rd->11th 2003: memory leaks hunt and correction.
963 * - June 22nd 2005: Adaptation for GMP.
965 CloogLoop *cloog_loop_merge(CloogLoop *loop, int level, CloogOptions *options)
967 CloogLoop *res, *new_inner, *old;
968 CloogDomain *new_domain, *temp;
970 if (loop == NULL)
971 return loop;
973 if (loop->next == NULL && cloog_domain_isconvex(loop->domain))
974 return loop;
976 old = loop;
977 temp = loop->domain;
978 loop->domain = NULL;
979 new_inner = loop->inner;
981 for (loop = loop->next; loop; loop = loop->next) {
982 temp = cloog_domain_union(temp, loop->domain);
983 loop->domain = NULL;
984 new_inner = cloog_loop_concat(new_inner, loop->inner);
987 new_domain = bounding_domain(temp, options);
989 if (level > 0 && !cloog_domain_is_bounded(new_domain, level) &&
990 cloog_domain_is_bounded(temp, level)) {
991 CloogDomain *splitter, *t2;
993 cloog_domain_free(new_domain);
994 splitter = cloog_domain_bound_splitter(temp, level);
996 res = NULL;
997 while (!cloog_domain_isconvex(splitter)) {
998 CloogDomain *first, *rest;
999 first = cloog_domain_cut_first(splitter, &rest);
1000 splitter = rest;
1001 t2 = cloog_domain_intersection(first, temp);
1002 cloog_domain_free(first);
1004 new_domain = bounding_domain(t2, options);
1005 cloog_domain_free(t2);
1007 if (cloog_domain_isempty(new_domain)) {
1008 cloog_domain_free(new_domain);
1009 continue;
1011 res = cloog_loop_alloc(old->state, new_domain, 0, NULL,
1012 NULL, cloog_loop_copy(new_inner), res);
1015 t2 = cloog_domain_intersection(splitter, temp);
1016 cloog_domain_free(splitter);
1018 new_domain = bounding_domain(t2, options);
1019 cloog_domain_free(t2);
1021 if (cloog_domain_isempty(new_domain)) {
1022 cloog_domain_free(new_domain);
1023 cloog_loop_free(new_inner);
1024 } else
1025 res = cloog_loop_alloc(old->state, new_domain, 0, NULL,
1026 NULL, new_inner, res);
1027 } else {
1028 res = cloog_loop_alloc(old->state, new_domain, 0, NULL,
1029 NULL, new_inner, NULL);
1031 cloog_domain_free(temp);
1033 cloog_loop_free_parts(old, 0, 0, 0, 1);
1035 return res;
1039 static int cloog_loop_count(CloogLoop *loop)
1041 int nb_loops;
1043 for (nb_loops = 0; loop; loop = loop->next)
1044 nb_loops++;
1046 return nb_loops;
1051 * cloog_loop_sort function:
1052 * Adaptation from LoopGen 0.4 by F. Quillere. This function sorts a list of
1053 * parameterized disjoint polyhedra, in order to not have lexicographic order
1054 * violation (see Quillere paper).
1055 * - September 16th 2005: inclusion of cloog_loop_number (October 29th 2001).
1057 CloogLoop *cloog_loop_sort(CloogLoop *loop, int level)
1059 CloogLoop *res, *now, **loop_array;
1060 CloogDomain **doms;
1061 int i, nb_loops=0, * permut ;
1063 /* There is no need to sort the parameter domains. */
1064 if (!level)
1065 return loop;
1067 /* We will need to know how many loops are in the list. */
1068 nb_loops = cloog_loop_count(loop);
1070 /* If there is only one loop, it's the end. */
1071 if (nb_loops == 1)
1072 return(loop) ;
1074 /* We have to allocate memory for some useful components:
1075 * - loop_array: the loop array,
1076 * - doms: the array of domains to sort,
1077 * - permut: will give us a possible sort (maybe not the only one).
1079 loop_array = (CloogLoop **)malloc(nb_loops*sizeof(CloogLoop *)) ;
1080 doms = (CloogDomain **)malloc(nb_loops*sizeof(CloogDomain *));
1081 permut = (int *)malloc(nb_loops*sizeof(int)) ;
1083 /* We fill up the loop and domain arrays. */
1084 for (i=0;i<nb_loops;i++,loop=loop->next)
1085 { loop_array[i] = loop ;
1086 doms[i] = loop_array[i]->domain;
1089 /* cloog_domain_sort will fill up permut. */
1090 cloog_domain_sort(doms, nb_loops, level, permut);
1092 /* With permut and loop_array we build the sorted list. */
1093 res = NULL ;
1094 for (i=0;i<nb_loops;i++)
1095 { /* To avoid pointer looping... loop_add will rebuild the list. */
1096 loop_array[permut[i]-1]->next = NULL ;
1097 cloog_loop_add(&res,&now,loop_array[permut[i]-1]) ;
1100 free(permut) ;
1101 free(doms);
1102 free(loop_array) ;
1104 return res;
1109 * cloog_loop_nest function:
1110 * This function changes the loop list in such a way that we have no more than
1111 * one dimension added by level. It returns an equivalent loop list with
1112 * this property.
1113 * - October 29th 2001: first version.
1114 * - July 3rd->11th 2003: memory leaks hunt and correction.
1115 * - June 22nd 2005: Adaptation for GMP.
1116 * - November 21th 2005: (debug) now OK when cloog_loop_restrict returns NULL.
1118 CloogLoop *cloog_loop_nest(CloogLoop *loop, CloogDomain *context, int level)
1119 { int l ;
1120 CloogLoop * p, * temp, * res, * now, * next ;
1121 CloogDomain * new_domain ;
1123 loop = cloog_loop_disjoint(loop);
1125 res = NULL ;
1126 /* Each domain is changed by its intersection with the context. */
1127 while (loop != NULL)
1128 { p = cloog_loop_restrict(loop, context);
1129 next = loop->next ;
1131 if (p != NULL)
1132 { cloog_loop_free_parts(loop,1,0,0,0) ;
1134 temp = cloog_loop_alloc(p->state, p->domain, 0, NULL,
1135 p->block, p->inner, NULL);
1137 /* If the intersection dimension is too big, we make projections smaller
1138 * and smaller, and each projection includes the preceding projection
1139 * (thus, in the target list, dimensions are added one by one).
1141 if (cloog_domain_dimension(p->domain) >= level)
1142 for (l = cloog_domain_dimension(p->domain); l >= level; l--) {
1143 new_domain = cloog_domain_project(p->domain, l);
1144 temp = cloog_loop_alloc(p->state, new_domain, 0, NULL,
1145 NULL, temp, NULL);
1148 /* p is no more useful (but its content yes !). */
1149 cloog_loop_free_parts(p,0,0,0,0) ;
1151 cloog_loop_add(&res,&now,temp) ;
1153 else
1154 cloog_loop_free_parts(loop,1,1,1,0) ;
1156 loop = next ;
1159 return(res) ;
1163 /* Check if the domains of the inner loops impose a stride constraint
1164 * on the given level.
1165 * The core of the search is implemented in cloog_domain_list_stride.
1166 * Here, we simply construct a list of domains to pass to this function
1167 * and if a stride is found, we adjust the lower bounds by calling
1168 * cloog_domain_stride_lower_bound.
1170 static int cloog_loop_variable_offset_stride(CloogLoop *loop, int level)
1172 CloogDomainList *list = NULL;
1173 CloogLoop *inner;
1174 CloogStride *stride;
1176 for (inner = loop->inner; inner; inner = inner->next) {
1177 CloogDomainList *entry = ALLOC(CloogDomainList);
1178 entry->domain = cloog_domain_copy(inner->domain);
1179 entry->next = list;
1180 list = entry;
1183 stride = cloog_domain_list_stride(list, level);
1185 cloog_domain_list_free(list);
1187 if (!stride)
1188 return 0;
1190 loop->stride = stride;
1191 loop->domain = cloog_domain_stride_lower_bound(loop->domain, level, stride);
1193 return 1;
1198 * cloog_loop_stride function:
1199 * This function will find the stride of a loop for the iterator at the column
1200 * number 'level' in the constraint matrix. It will update the lower bound of
1201 * the iterator accordingly. Basically, the function will try to find in the
1202 * inner loops a common condition on this iterator for the inner loop iterators
1203 * to be integral. For instance, let us consider a loop with the iterator i,
1204 * the iteration domain -4<=i<=n, and its two inner loops with the iterator j.
1205 * The first inner loop has the constraint 3j=i, and the second one has the
1206 * constraint 6j=i. Then the common constraint on i for j to be integral is
1207 * i%3=0, the stride for i is 3. Lastly, we have to find the new lower bound
1208 * for i: the first value satisfying the common constraint: -3. At the end, the
1209 * iteration domain for i is -3<=i<=n and the stride for i is 3.
1211 * The algorithm implemented in this function only allows for strides
1212 * on loops with a lower bound that has a constant remainder on division
1213 * by the stride. Before initiating this procedure, we first check
1214 * if we can find a stride with a lower bound with a variable offset in
1215 * cloog_loop_variable_offset_stride.
1217 * - loop is the loop including the iteration domain of the considered iterator,
1218 * - level is the column number of the iterator in the matrix of contraints.
1220 * - June 29th 2003: first version (work in progress since June 26th 2003).
1221 * - July 14th 2003: simpler version.
1222 * - June 22nd 2005: Adaptation for GMP (from S. Verdoolaege's 0.12.1 version).
1224 void cloog_loop_stride(CloogLoop * loop, int level)
1225 { int first_search ;
1226 cloog_int_t stride, ref_offset, offset, potential;
1227 CloogLoop * inner ;
1229 if (!cloog_domain_can_stride(loop->domain, level))
1230 return;
1232 if (cloog_loop_variable_offset_stride(loop, level))
1233 return;
1235 cloog_int_init(stride);
1236 cloog_int_init(ref_offset);
1237 cloog_int_init(offset);
1238 cloog_int_init(potential);
1240 cloog_int_set_si(ref_offset, 0);
1241 cloog_int_set_si(offset, 0);
1243 /* Default stride. */
1244 cloog_int_set_si(stride, 1);
1245 first_search = 1 ;
1246 inner = loop->inner ;
1248 while (inner != NULL)
1249 { /* If the minimun stride has not been found yet, find the stride. */
1250 if ((first_search) || (!cloog_int_is_one(stride)))
1252 cloog_domain_stride(inner->domain, level, &potential, &offset);
1253 if (!cloog_int_is_one(potential) && (!first_search))
1254 { /* Offsets must be the same for common stride. */
1255 cloog_int_gcd(stride, potential, stride);
1256 if (!cloog_int_is_zero(stride)) {
1257 cloog_int_fdiv_r(offset, offset, stride);
1258 cloog_int_fdiv_r(ref_offset, ref_offset, stride);
1260 if (cloog_int_ne(offset,ref_offset))
1261 cloog_int_set_si(stride, 1);
1263 else {
1264 cloog_int_set(stride, potential);
1265 cloog_int_set(ref_offset, offset);
1268 first_search = 0 ;
1271 inner = inner->next ;
1274 if (cloog_int_is_zero(stride))
1275 cloog_int_set_si(stride, 1);
1277 /* Update the values if necessary. */
1278 if (!cloog_int_is_one(stride))
1279 { /* Update the stride value. */
1280 if (!cloog_int_is_zero(offset))
1281 cloog_int_sub(offset, stride, offset);
1282 loop->stride = cloog_stride_alloc(stride, offset);
1283 loop->domain = cloog_domain_stride_lower_bound(loop->domain, level,
1284 loop->stride);
1287 cloog_int_clear(stride);
1288 cloog_int_clear(ref_offset);
1289 cloog_int_clear(offset);
1290 cloog_int_clear(potential);
1294 void cloog_loop_otl(CloogLoop *loop, int level)
1296 if (cloog_domain_is_otl(loop->domain, level))
1297 loop->otl = 1;
1302 * cloog_loop_stop function:
1303 * This function implements the 'stop' option : each domain of each loop
1304 * in the list 'loop' is replaced by 'context'. 'context' should be the
1305 * domain of the outer loop. By using this method, there are no more dimensions
1306 * to scan and the simplification step will automaticaly remove the domains
1307 * since they are the same as the corresponding contexts. The effect of this
1308 * function is to stop the code generation at the level this function is called,
1309 * the resulting code do not consider the next dimensions.
1310 * - January 11th 2005: first version.
1312 CloogLoop * cloog_loop_stop(CloogLoop * loop, CloogDomain * context)
1313 { if (loop == NULL)
1314 return NULL ;
1315 else
1316 { cloog_domain_free(loop->domain) ;
1317 loop->domain = cloog_domain_copy(context) ;
1318 loop->next = cloog_loop_stop(loop->next, context) ;
1321 return loop ;
1325 static int level_is_constant(int level, int scalar, int *scaldims, int nb_scattdims)
1327 return level && (level+scalar <= nb_scattdims) && (scaldims[level+scalar-1]);
1332 * Compare the constant dimensions of loops 'l1' and 'l2' starting at 'scalar'
1333 * and return -1 if the vector of constant dimensions of 'l1' is smaller
1334 * than that of 'l2', 0 if they are the same and +1 if that of 'l1' is
1335 * greater than that of 'l2'.
1336 * This function should be called on the innermost loop (the loop
1337 * containing a block).
1338 * \param l1 Loop to be compared with l2.
1339 * \param l2 Loop to be compared with l1.
1340 * \param level Current non-scalar dimension.
1341 * \param scaldims Boolean array saying whether a dimension is scalar or not.
1342 * \param nb_scattdims Size of the scaldims array.
1343 * \param scalar Current scalar dimension.
1344 * \return -1 if (l1 < l2), 0 if (l1 == l2) and +1 if (l1 > l2)
1346 int cloog_loop_constant_cmp(CloogLoop *l1, CloogLoop *l2, int level,
1347 int *scaldims, int nb_scattdims, int scalar)
1349 CloogBlock *b1, *b2;
1350 b1 = l1->block;
1351 b2 = l2->block;
1352 while (level_is_constant(level, scalar, scaldims, nb_scattdims)) {
1353 int cmp = cloog_int_cmp(b1->scaldims[scalar], b2->scaldims[scalar]);
1354 if (cmp)
1355 return cmp;
1356 scalar++;
1358 return 0;
1363 * cloog_loop_scalar_gt function:
1364 * This function returns 1 if loop 'l1' is greater than loop 'l2' for the
1365 * scalar dimension vector that begins at dimension 'scalar', 0 otherwise. What
1366 * we want to know is whether a loop is scheduled before another one or not.
1367 * This function solves the problem when the considered dimension for scheduling
1368 * is a scalar dimension. Since there may be a succession of scalar dimensions,
1369 * this function will reason about the vector of scalar dimension that begins
1370 * at dimension 'level+scalar' and finish to the first non-scalar dimension.
1371 * \param l1 Loop to be compared with l2.
1372 * \param l2 Loop to be compared with l1.
1373 * \param level Current non-scalar dimension.
1374 * \param scaldims Boolean array saying whether a dimension is scalar or not.
1375 * \param nb_scattdims Size of the scaldims array.
1376 * \param scalar Current scalar dimension.
1377 * \return 1 if (l1 > l2), 0 otherwise.
1379 * - September 9th 2005: first version.
1380 * - October 15nd 2007: now "greater than" instead of "greater or equal".
1382 int cloog_loop_scalar_gt(l1, l2, level, scaldims, nb_scattdims, scalar)
1383 CloogLoop * l1, * l2 ;
1384 int level, * scaldims, nb_scattdims, scalar ;
1386 return cloog_loop_constant_cmp(l1, l2, level, scaldims, nb_scattdims, scalar) > 0;
1391 * cloog_loop_scalar_eq function:
1392 * This function returns 1 if loop 'l1' is equal to loop 'l2' for the scalar
1393 * dimension vector that begins at dimension 'scalar', 0 otherwise. What we want
1394 * to know is whether two loops are scheduled for the same time or not.
1395 * This function solves the problem when the considered dimension for scheduling
1396 * is a scalar dimension. Since there may be a succession of scalar dimensions,
1397 * this function will reason about the vector of scalar dimension that begins
1398 * at dimension 'level+scalar' and finish to the first non-scalar dimension.
1399 * - l1 and l2 are the loops to compare,
1400 * - level is the current non-scalar dimension,
1401 * - scaldims is the boolean array saying whether a dimension is scalar or not,
1402 * - nb_scattdims is the size of the scaldims array,
1403 * - scalar is the current scalar dimension.
1405 * - September 9th 2005 : first version.
1407 int cloog_loop_scalar_eq(l1, l2, level, scaldims, nb_scattdims, scalar)
1408 CloogLoop * l1, * l2 ;
1409 int level, * scaldims, nb_scattdims, scalar ;
1411 return cloog_loop_constant_cmp(l1, l2, level, scaldims, nb_scattdims, scalar) == 0;
1416 * cloog_loop_scalar_sort function:
1417 * This function sorts a linked list of loops (loop) with respect to the
1418 * scalar dimension vector that begins at dimension 'scalar'. Since there may
1419 * be a succession of scalar dimensions, this function will reason about the
1420 * vector of scalar dimension that begins at dimension 'level+scalar' and
1421 * finish to the first non-scalar dimension.
1422 * \param loop Loop list to sort.
1423 * \param level Current non-scalar dimension.
1424 * \param scaldims Boolean array saying whether a dimension is scalar or not.
1425 * \param nb_scattdims Size of the scaldims array.
1426 * \param scalar Current scalar dimension.
1427 * \return A pointer to the sorted list.
1429 * - July 2nd 2005: first developments.
1430 * - September 2nd 2005: first version.
1431 * - October 15nd 2007: complete rewrite to remove bugs, now a bubble sort.
1433 CloogLoop * cloog_loop_scalar_sort(loop, level, scaldims, nb_scattdims, scalar)
1434 CloogLoop * loop ;
1435 int level, * scaldims, nb_scattdims, scalar ;
1436 { int ok ;
1437 CloogLoop **current;
1439 do {
1440 ok = 1;
1441 for (current = &loop; (*current)->next; current = &(*current)->next) {
1442 CloogLoop *next = (*current)->next;
1443 if (cloog_loop_scalar_gt(*current,next,level,scaldims,nb_scattdims,scalar)) {
1444 ok = 0;
1445 (*current)->next = next->next;
1446 next->next = *current;
1447 *current = next;
1450 } while (!ok);
1452 return loop ;
1457 * cloog_loop_generate_backtrack function:
1458 * adaptation from LoopGen 0.4 by F. Quillere. This function implements the
1459 * backtrack of the Quillere et al. algorithm (see the Quillere paper).
1460 * It eliminates unused iterations of the current level for the new one. See the
1461 * example called linearity-1-1 example with and without this part for an idea.
1462 * - October 26th 2001: first version in cloog_loop_generate_general.
1463 * - July 31th 2002: (debug) no more parasite loops (REALLY hard !).
1464 * - October 30th 2005: extraction from cloog_loop_generate_general.
1466 CloogLoop *cloog_loop_generate_backtrack(CloogLoop *loop,
1467 int level, CloogOptions *options)
1469 CloogDomain * domain ;
1470 CloogLoop * now, * now2, * next, * next2, * end, * temp, * l, * inner,
1471 * new_loop ;
1473 temp = loop ;
1474 loop = NULL ;
1476 while (temp != NULL)
1477 { l = NULL ;
1478 inner = temp->inner ;
1480 while (inner != NULL)
1481 { next = inner->next ;
1482 /* This 'if' and its first part is the debug of july 31th 2002. */
1483 if (inner->block != NULL) {
1484 end = cloog_loop_alloc(temp->state, inner->domain, 0, NULL,
1485 inner->block, NULL, NULL);
1486 domain = cloog_domain_copy(temp->domain) ;
1487 new_loop = cloog_loop_alloc(temp->state, domain, 0, NULL,
1488 NULL, end, NULL);
1490 else
1491 new_loop = cloog_loop_project(inner, level);
1493 cloog_loop_free_parts(inner,0,0,0,0) ;
1494 cloog_loop_add(&l,&now2,new_loop) ;
1495 inner = next ;
1498 temp->inner = NULL ;
1500 if (l != NULL)
1501 { l = cloog_loop_separate(l) ;
1502 l = cloog_loop_sort(l, level);
1503 while (l != NULL) {
1504 l->stride = cloog_stride_copy(l->stride);
1505 cloog_loop_add(&loop,&now,l) ;
1506 l = l->next ;
1509 next2 = temp->next ;
1510 cloog_loop_free_parts(temp,1,0,0,0) ;
1511 temp = next2 ;
1514 return loop ;
1519 * Return 1 if we need to continue recursing to the specified level.
1521 int cloog_loop_more(CloogLoop *loop, int level, int scalar, int nb_scattdims)
1523 return level + scalar <= nb_scattdims ||
1524 cloog_domain_dimension(loop->domain) >= level;
1528 * Return 1 if the domains of all loops in the given linked list
1529 * have a fixed value at the given level.
1530 * In principle, there would be no need to check that the fixed value is
1531 * the same for each of these loops because this function is only
1532 * called on a component. However, not all backends perform a proper
1533 * decomposition into components.
1535 int cloog_loop_is_constant(CloogLoop *loop, int level)
1537 cloog_int_t c1, c2;
1538 int r = 1;
1540 cloog_int_init(c1);
1541 cloog_int_init(c2);
1543 if (!cloog_domain_lazy_isconstant(loop->domain, level - 1, &c1))
1544 r = 0;
1546 for (loop = loop->next; r && loop; loop = loop->next) {
1547 if (!cloog_domain_lazy_isconstant(loop->domain, level - 1, &c2))
1548 r = 0;
1549 else if (cloog_int_ne(c1, c2))
1550 r = 0;
1553 cloog_int_clear(c1);
1554 cloog_int_clear(c2);
1556 return r;
1560 * Assuming all domains in the given linked list of loop
1561 * have a fixed values at level, return a single loop with
1562 * a domain corresponding to this fixed value and with as
1563 * list of inner loops the concatenation of all inner loops
1564 * in the original list.
1566 CloogLoop *cloog_loop_constant(CloogLoop *loop, int level)
1568 CloogLoop *res, *inner, *tmp;
1569 CloogDomain *domain, *context, *t;
1571 if (!loop)
1572 return loop;
1574 inner = loop->inner;
1575 for (tmp = loop->next; tmp; tmp = tmp->next)
1576 inner = cloog_loop_concat(inner, tmp->inner);
1578 domain = cloog_domain_copy(loop->domain);
1579 domain = cloog_domain_simple_convex(t = domain);
1580 cloog_domain_free(t);
1581 context = cloog_domain_project(domain, level - 1);
1582 context = cloog_domain_extend(t = context, level);
1583 cloog_domain_free(t);
1584 domain = cloog_domain_simplify(t = domain, context);
1585 cloog_domain_free(t);
1586 cloog_domain_free(context);
1588 res = cloog_loop_alloc(loop->state, domain, 0, NULL, NULL, inner, NULL);
1590 cloog_loop_free_parts(loop, 1, 0, 0, 1);
1592 return res;
1595 CloogLoop *cloog_loop_generate_restricted_or_stop(CloogLoop *loop,
1596 CloogDomain *context,
1597 int level, int scalar, int *scaldims, int nb_scattdims,
1598 CloogOptions *options);
1601 * cloog_loop_generate_general function:
1602 * Adaptation from LoopGen 0.4 by F. Quillere. This function implements the
1603 * Quillere algorithm for polyhedron scanning from step 3 to 5.
1604 * (see the Quillere paper).
1605 * - loop is the loop for which we have to generate a scanning code,
1606 * - level is the current non-scalar dimension,
1607 * - scalar is the current scalar dimension,
1608 * - scaldims is the boolean array saying whether a dimension is scalar or not,
1609 * - nb_scattdims is the size of the scaldims array,
1610 * - options are the general code generation options.
1612 * - October 26th 2001: first version.
1613 * - July 3rd->11th 2003: memory leaks hunt and correction.
1614 * - June 22nd 2005: Adaptation for GMP.
1615 * - September 2nd 2005: The function have been cutted out in two pieces:
1616 * cloog_loop_generate and this one, in order to handle
1617 * the scalar dimension case more efficiently with
1618 * cloog_loop_generate_scalar.
1619 * - November 15th 2005: (debug) the result of the cloog_loop_generate call may
1620 * be a list of polyhedra (especially if stop option is
1621 * used): cloog_loop_add_list instead of cloog_loop_add.
1623 CloogLoop *cloog_loop_generate_general(CloogLoop *loop,
1624 int level, int scalar, int *scaldims, int nb_scattdims,
1625 CloogOptions *options)
1627 CloogLoop * res, * now, * temp, * l, * new_loop, * inner, * now2, * end,
1628 * next, * into ;
1629 CloogDomain * domain ;
1630 int separate = 0;
1632 /* 3. Separate all projections into disjoint polyhedra. */
1633 if (level > 0 && cloog_loop_is_constant(loop, level))
1634 res = cloog_loop_constant(loop, level);
1635 else if ((options->f > level+scalar) || (options->f < 0))
1636 res = cloog_loop_merge(loop, level, options);
1637 else {
1638 res = cloog_loop_separate(loop);
1639 separate = 1;
1642 /* 3b. -correction- sort the loops to determine their textual order. */
1643 res = cloog_loop_sort(res, level);
1645 res = cloog_loop_restrict_inner(res);
1647 if (separate)
1648 res = cloog_loop_specialize(res, level, scalar, scaldims, nb_scattdims);
1650 /* 4. Recurse for each loop with the current domain as context. */
1651 temp = res ;
1652 res = NULL ;
1653 if (!level || (level+scalar < options->l) || (options->l < 0))
1654 while(temp != NULL)
1655 { if (level && options->strides)
1656 cloog_loop_stride(temp, level);
1657 if (level && options->otl)
1658 cloog_loop_otl(temp, level);
1659 inner = temp->inner ;
1660 domain = temp->domain ;
1661 into = NULL ;
1662 while (inner != NULL)
1663 { /* 4b. -ced- recurse for each sub-list of non terminal loops. */
1664 if (cloog_loop_more(inner, level + 1, scalar, nb_scattdims)) {
1665 end = inner;
1666 while ((end->next != NULL) &&
1667 cloog_loop_more(end->next, level + 1, scalar, nb_scattdims))
1668 end = end->next ;
1670 next = end->next ;
1671 end->next = NULL ;
1673 l = cloog_loop_generate_restricted_or_stop(inner, domain,
1674 level + 1, scalar, scaldims, nb_scattdims, options);
1676 if (l != NULL)
1677 cloog_loop_add_list(&into,&now,l) ;
1679 inner = next ;
1681 else
1682 { cloog_loop_add(&into,&now,inner) ;
1683 inner = inner->next ;
1686 next = temp->next ;
1687 temp->next = NULL ;
1688 temp->inner = into ;
1689 cloog_loop_add(&res,&now2,temp) ;
1690 temp = next ;
1692 else
1693 while (temp != NULL)
1694 { next = temp->next ;
1695 l = cloog_loop_nest(temp->inner, temp->domain, level+1);
1696 new_loop = cloog_loop_alloc(temp->state, temp->domain, 0, NULL,
1697 NULL, l, NULL);
1698 temp->inner = NULL ;
1699 temp->next = NULL ;
1700 cloog_loop_free_parts(temp,0,0,0,0) ;
1701 cloog_loop_add(&res,&now,new_loop) ;
1702 temp = next ;
1705 /* 5. eliminate unused iterations of the current level for the new one. See
1706 * the example called linearity-1-1 example with and without this part
1707 * for an idea.
1709 if (options->backtrack && level &&
1710 ((level+scalar < options->l) || (options->l < 0)) &&
1711 ((options->f <= level+scalar) && !(options->f < 0)))
1712 res = cloog_loop_generate_backtrack(res, level, options);
1714 /* Pray for my new paper to be accepted somewhere since the following stuff
1715 * is really amazing :-) !
1716 * Far long later: The paper has been accepted to PACT 2004 :-))). But there
1717 * are still some bugs and I have no time to fix them. Thus now you have to
1718 * pray for me to get an academic position for that really amazing stuff :-) !
1719 * Later again: OK, I get my academic position, but still I have not enough
1720 * time to fix and clean this part... Pray again :-) !!!
1722 /* res = cloog_loop_unisolate(res,level) ;*/
1724 return(res) ;
1728 CloogLoop *cloog_loop_generate_restricted(CloogLoop *loop,
1729 int level, int scalar, int *scaldims, int nb_scattdims,
1730 CloogOptions *options);
1734 * cloog_loop_generate_scalar function:
1735 * This function applies the simplified code generation scheme in the trivial
1736 * case of scalar dimensions. When dealing with scalar dimensions, there is
1737 * no need of costly polyhedral operations for separation or sorting: sorting
1738 * is a question of comparing scalar vectors and separation amounts to consider
1739 * only loops with the same scalar vector for the next step of the code
1740 * generation process. This function achieves the separation/sorting process
1741 * for the vector of scalar dimension that begins at dimension 'level+scalar'
1742 * and finish to the first non-scalar dimension.
1743 * - loop is the loop for which we have to generate a scanning code,
1744 * - level is the current non-scalar dimension,
1745 * - scalar is the current scalar dimension,
1746 * - scaldims is the boolean array saying whether a dimension is scalar or not,
1747 * - nb_scattdims is the size of the scaldims array,
1748 * - options are the general code generation options.
1750 * - September 2nd 2005: First version.
1752 CloogLoop *cloog_loop_generate_scalar(CloogLoop *loop,
1753 int level, int scalar, int *scaldims, int nb_scattdims,
1754 CloogOptions *options)
1755 { CloogLoop * res, * now, * temp, * l, * end, * next, * ref ;
1756 int scalar_new;
1758 /* We sort the loop list with respect to the current scalar vector. */
1759 res = cloog_loop_scalar_sort(loop,level,scaldims,nb_scattdims,scalar) ;
1761 scalar_new = scalar + scaldims[level + scalar - 1];
1763 temp = res ;
1764 res = NULL ;
1765 while (temp != NULL)
1766 { /* Then we will appy the general code generation process to each sub-list
1767 * of loops with the same scalar vector.
1769 end = temp ;
1770 ref = temp ;
1772 while((end->next != NULL) &&
1773 cloog_loop_more(end->next, level, scalar_new, nb_scattdims) &&
1774 cloog_loop_scalar_eq(ref,end->next,level,scaldims,nb_scattdims,scalar))
1775 end = end->next ;
1777 next = end->next ;
1778 end->next = NULL ;
1780 /* For the next dimension, scalar value is updated by adding the scalar
1781 * vector size, which is stored at scaldims[level+scalar-1].
1783 if (cloog_loop_more(temp, level, scalar_new, nb_scattdims)) {
1784 l = cloog_loop_generate_restricted(temp, level, scalar_new,
1785 scaldims, nb_scattdims, options);
1787 if (l != NULL)
1788 cloog_loop_add_list(&res, &now, l);
1789 } else
1790 cloog_loop_add(&res, &now, temp);
1792 temp = next ;
1795 return res ;
1799 /* Compare loop with the next loop based on their constant dimensions.
1800 * The result is < 0, == 0 or > 0 depending on whether the constant
1801 * dimensions of loop are lexicographically smaller, equal or greater
1802 * than those of loop->next.
1803 * If loop is the last in the list, then it is assumed to be smaller
1804 * than the "next" one.
1806 static int cloog_loop_next_scal_cmp(CloogLoop *loop)
1808 int i;
1809 int nb_scaldims;
1811 if (!loop->next)
1812 return -1;
1814 nb_scaldims = loop->block->nb_scaldims;
1815 if (loop->next->block->nb_scaldims < nb_scaldims)
1816 nb_scaldims = loop->next->block->nb_scaldims;
1818 for (i = 0; i < nb_scaldims; ++i) {
1819 int cmp = cloog_int_cmp(loop->block->scaldims[i],
1820 loop->next->block->scaldims[i]);
1821 if (cmp)
1822 return cmp;
1824 return loop->block->nb_scaldims - loop->next->block->nb_scaldims;
1828 /* Check whether the globally constant dimensions of a and b
1829 * have the same value for all globally constant dimensions
1830 * that are situated before any (locally) non-constant dimension.
1832 static int cloog_loop_equal_prefix(CloogLoop *a, CloogLoop *b,
1833 int *scaldims, int nb_scattdims)
1835 int i;
1836 int cst = 0;
1837 int dim = 0;
1839 for (i = 0; i < nb_scattdims; ++i) {
1840 if (!scaldims[i]) {
1841 dim++;
1842 continue;
1844 if (!cloog_int_eq(a->block->scaldims[cst], b->block->scaldims[cst]))
1845 break;
1846 cst++;
1848 for (i = i + 1; i < nb_scattdims; ++i) {
1849 if (scaldims[i])
1850 continue;
1851 if (!cloog_domain_lazy_isconstant(a->domain, dim, NULL))
1852 return 0;
1853 /* No need to check that dim is also constant in b and that the
1854 * constant values are equal. That will happen during the check
1855 * whether the two domains are equal.
1857 dim++;
1859 return 1;
1863 /* Try to block adjacent loops in the loop list "loop".
1864 * We only attempt blocking if the constant dimensions of the loops
1865 * in the least are (not necessarily strictly) increasing.
1866 * Then we look for a sublist such that the first (begin) has constant
1867 * dimensions strictly larger than the previous loop in the complete
1868 * list and such that the loop (end) after the last loop in the sublist
1869 * has constant dimensions strictly larger than the last loop in the sublist.
1870 * Furthermore, all loops in the sublist should have the same domain
1871 * (with globally constant dimensions removed) and the difference
1872 * (if any) in constant dimensions may only occur after all the
1873 * (locally) constant dimensions.
1874 * If we find such a sublist, then the blocks of all but the first
1875 * are merged into the block of the first.
1877 * Note that this function can only be called before the global
1878 * blocklist has been created because it may otherwise modify and destroy
1879 * elements on that list.
1881 CloogLoop *cloog_loop_block(CloogLoop *loop, int *scaldims, int nb_scattdims)
1883 CloogLoop *begin, *end, *l;
1884 int begin_after_previous;
1885 int end_after_previous;
1887 if (!loop->next)
1888 return loop;
1889 for (begin = loop; begin; begin = begin->next) {
1890 if (!begin->block || !begin->block->scaldims)
1891 return loop;
1892 if (cloog_loop_next_scal_cmp(begin) > 0)
1893 return loop;
1896 begin_after_previous = 1;
1897 for (begin = loop; begin; begin = begin->next) {
1898 if (!begin_after_previous) {
1899 begin_after_previous = cloog_loop_next_scal_cmp(begin) < 0;
1900 continue;
1903 end_after_previous = cloog_loop_next_scal_cmp(begin) < 0;
1904 for (end = begin->next; end; end = end->next) {
1905 if (!cloog_loop_equal_prefix(begin, end, scaldims, nb_scattdims))
1906 break;
1907 if (!cloog_domain_lazy_equal(begin->domain, end->domain))
1908 break;
1909 end_after_previous = cloog_loop_next_scal_cmp(end) < 0;
1911 if (end != begin->next && end_after_previous) {
1912 for (l = begin->next; l != end; l = begin->next) {
1913 cloog_block_merge(begin->block, l->block);
1914 begin->next = l->next;
1915 cloog_loop_free_parts(l, 1, 0, 1, 0);
1919 begin_after_previous = cloog_loop_next_scal_cmp(begin) < 0;
1922 return loop;
1927 * Check whether for any fixed iteration of the outer loops,
1928 * there is an iteration of loop1 that is lexicographically greater
1929 * than an iteration of loop2.
1930 * Return 1 if there exists (or may exist) such a pair.
1931 * Return 0 if all iterations of loop1 are lexicographically smaller
1932 * than the iterations of loop2.
1933 * If no iteration is lexicographically greater, but if there are
1934 * iterations that are equal to iterations of loop2, then return "def".
1935 * This is useful for ensuring that such statements are not reordered.
1936 * Some users, including the test_run target in test, expect
1937 * the statements at a given point to be run in the original order.
1938 * Passing the value "0" for "def" would allow such statements to be reordered
1939 * and would allow for the detection of more components.
1941 int cloog_loop_follows(CloogLoop *loop1, CloogLoop *loop2,
1942 int level, int scalar, int *scaldims, int nb_scattdims, int def)
1944 int dim1, dim2;
1946 dim1 = cloog_domain_dimension(loop1->domain);
1947 dim2 = cloog_domain_dimension(loop2->domain);
1948 while ((level <= dim1 && level <= dim2) ||
1949 level_is_constant(level, scalar, scaldims, nb_scattdims)) {
1950 if (level_is_constant(level, scalar, scaldims, nb_scattdims)) {
1951 int cmp = cloog_loop_constant_cmp(loop1, loop2, level, scaldims,
1952 nb_scattdims, scalar);
1953 if (cmp > 0)
1954 return 1;
1955 if (cmp < 0)
1956 return 0;
1957 scalar += scaldims[level + scalar - 1];
1958 } else {
1959 int follows = cloog_domain_follows(loop1->domain, loop2->domain,
1960 level);
1961 if (follows > 0)
1962 return 1;
1963 if (follows < 0)
1964 return 0;
1965 level++;
1969 return def;
1973 /* Structure for representing the nodes in the graph being traversed
1974 * using Tarjan's algorithm.
1975 * index represents the order in which nodes are visited.
1976 * min_index is the index of the root of a (sub)component.
1977 * on_stack indicates whether the node is currently on the stack.
1979 struct cloog_loop_sort_node {
1980 int index;
1981 int min_index;
1982 int on_stack;
1984 /* Structure for representing the graph being traversed
1985 * using Tarjan's algorithm.
1986 * len is the number of nodes
1987 * node is an array of nodes
1988 * stack contains the nodes on the path from the root to the current node
1989 * sp is the stack pointer
1990 * index is the index of the last node visited
1991 * order contains the elements of the components separated by -1
1992 * op represents the current position in order
1994 struct cloog_loop_sort {
1995 int len;
1996 struct cloog_loop_sort_node *node;
1997 int *stack;
1998 int sp;
1999 int index;
2000 int *order;
2001 int op;
2004 /* Allocate and initialize cloog_loop_sort structure.
2006 static struct cloog_loop_sort *cloog_loop_sort_alloc(int len)
2008 struct cloog_loop_sort *s;
2009 int i;
2011 s = (struct cloog_loop_sort *)malloc(sizeof(struct cloog_loop_sort));
2012 assert(s);
2013 s->len = len;
2014 s->node = (struct cloog_loop_sort_node *)
2015 malloc(len * sizeof(struct cloog_loop_sort_node));
2016 assert(s->node);
2017 for (i = 0; i < len; ++i)
2018 s->node[i].index = -1;
2019 s->stack = (int *)malloc(len * sizeof(int));
2020 assert(s->stack);
2021 s->order = (int *)malloc(2 * len * sizeof(int));
2022 assert(s->order);
2024 s->sp = 0;
2025 s->index = 0;
2026 s->op = 0;
2028 return s;
2031 /* Free cloog_loop_sort structure.
2033 static void cloog_loop_sort_free(struct cloog_loop_sort *s)
2035 free(s->node);
2036 free(s->stack);
2037 free(s->order);
2038 free(s);
2042 /* Check whether for any fixed iteration of the outer loops,
2043 * there is an iteration of loop1 that is lexicographically greater
2044 * than an iteration of loop2, where the iteration domains are
2045 * available in the inner loops of the arguments.
2047 * By using this functions to detect components, we ensure that
2048 * two CloogLoops appear in the same component if some iterations of
2049 * each loop should be executed before some iterations of the other loop.
2050 * Since we also want two CloogLoops that have exactly the same
2051 * iteration domain at the current level to be placed in the same component,
2052 * we first check if these domains are indeed the same.
2054 static int inner_loop_follows(CloogLoop *loop1, CloogLoop *loop2,
2055 int level, int scalar, int *scaldims, int nb_scattdims, int def)
2057 int f;
2059 f = cloog_domain_lazy_equal(loop1->domain, loop2->domain);
2060 if (!f)
2061 f = cloog_loop_follows(loop1->inner, loop2->inner,
2062 level, scalar, scaldims, nb_scattdims, def);
2064 return f;
2068 /* Perform Tarjan's algorithm for computing the strongly connected components
2069 * in the graph with the individual CloogLoops as vertices.
2070 * Two CloopLoops appear in the same component if they both (indirectly)
2071 * "follow" each other, where the following relation is determined
2072 * by the follows function.
2074 static void cloog_loop_components_tarjan(struct cloog_loop_sort *s,
2075 CloogLoop **loop_array, int i, int level, int scalar, int *scaldims,
2076 int nb_scattdims,
2077 int (*follows)(CloogLoop *loop1, CloogLoop *loop2,
2078 int level, int scalar, int *scaldims, int nb_scattdims, int def))
2080 int j;
2082 s->node[i].index = s->index;
2083 s->node[i].min_index = s->index;
2084 s->node[i].on_stack = 1;
2085 s->index++;
2086 s->stack[s->sp++] = i;
2088 for (j = s->len - 1; j >= 0; --j) {
2089 int f;
2091 if (j == i)
2092 continue;
2093 if (s->node[j].index >= 0 &&
2094 (!s->node[j].on_stack ||
2095 s->node[j].index > s->node[i].min_index))
2096 continue;
2098 f = follows(loop_array[i], loop_array[j],
2099 level, scalar, scaldims, nb_scattdims, i > j);
2100 if (!f)
2101 continue;
2103 if (s->node[j].index < 0) {
2104 cloog_loop_components_tarjan(s, loop_array, j, level, scalar,
2105 scaldims, nb_scattdims, follows);
2106 if (s->node[j].min_index < s->node[i].min_index)
2107 s->node[i].min_index = s->node[j].min_index;
2108 } else if (s->node[j].index < s->node[i].min_index)
2109 s->node[i].min_index = s->node[j].index;
2112 if (s->node[i].index != s->node[i].min_index)
2113 return;
2115 do {
2116 j = s->stack[--s->sp];
2117 s->node[j].on_stack = 0;
2118 s->order[s->op++] = j;
2119 } while (j != i);
2120 s->order[s->op++] = -1;
2124 static int qsort_index_cmp(const void *p1, const void *p2)
2126 return *(int *)p1 - *(int *)p2;
2129 /* Sort the elements of the component starting at list.
2130 * The list is terminated by a -1.
2132 static void sort_component(int *list)
2134 int len;
2136 for (len = 0; list[len] != -1; ++len)
2139 qsort(list, len, sizeof(int), qsort_index_cmp);
2142 /* Given an array of indices "list" into the "loop_array" array,
2143 * terminated by -1, construct a linked list of the corresponding
2144 * entries and put the result in *res.
2145 * The value returned is the number of CloogLoops in the (linked) list
2147 static int extract_component(CloogLoop **loop_array, int *list, CloogLoop **res)
2149 int i = 0;
2151 sort_component(list);
2152 while (list[i] != -1) {
2153 *res = loop_array[list[i]];
2154 res = &(*res)->next;
2155 ++i;
2157 *res = NULL;
2159 return i;
2164 * Call cloog_loop_generate_scalar or cloog_loop_generate_general
2165 * on each of the strongly connected components in the list of CloogLoops
2166 * pointed to by "loop".
2168 * We use Tarjan's algorithm to find the strongly connected components.
2169 * Note that this algorithm also topologically sorts the components.
2171 * The components are treated separately to avoid spurious separations.
2172 * The concatentation of the results may contain successive loops
2173 * with the same bounds, so we try to combine such loops.
2175 CloogLoop *cloog_loop_generate_components(CloogLoop *loop,
2176 int level, int scalar, int *scaldims, int nb_scattdims,
2177 CloogOptions *options)
2179 int i, nb_loops;
2180 CloogLoop *tmp;
2181 CloogLoop *res, **res_next;
2182 CloogLoop **loop_array;
2183 struct cloog_loop_sort *s;
2185 if (level == 0 || !loop->next)
2186 return cloog_loop_generate_general(loop, level, scalar,
2187 scaldims, nb_scattdims, options);
2189 nb_loops = cloog_loop_count(loop);
2191 loop_array = (CloogLoop **)malloc(nb_loops * sizeof(CloogLoop *));
2192 assert(loop_array);
2194 for (i = 0, tmp = loop; i < nb_loops; i++, tmp = tmp->next)
2195 loop_array[i] = tmp;
2197 s = cloog_loop_sort_alloc(nb_loops);
2198 for (i = nb_loops - 1; i >= 0; --i) {
2199 if (s->node[i].index >= 0)
2200 continue;
2201 cloog_loop_components_tarjan(s, loop_array, i, level, scalar, scaldims,
2202 nb_scattdims, &inner_loop_follows);
2205 i = 0;
2206 res = NULL;
2207 res_next = &res;
2208 while (nb_loops) {
2209 int n = extract_component(loop_array, &s->order[i], &tmp);
2210 i += n + 1;
2211 nb_loops -= n;
2212 *res_next = cloog_loop_generate_general(tmp, level, scalar,
2213 scaldims, nb_scattdims, options);
2214 while (*res_next)
2215 res_next = &(*res_next)->next;
2218 cloog_loop_sort_free(s);
2220 free(loop_array);
2222 res = cloog_loop_combine(res);
2224 return res;
2228 /* For each loop in the list "loop", decompose the list of
2229 * inner loops into strongly connected components and put
2230 * the components into separate loops at the top level.
2232 CloogLoop *cloog_loop_decompose_inner(CloogLoop *loop,
2233 int level, int scalar, int *scaldims, int nb_scattdims)
2235 CloogLoop *l, *tmp;
2236 CloogLoop **loop_array;
2237 int i, n_loops, max_loops = 0;
2238 struct cloog_loop_sort *s;
2240 for (l = loop; l; l = l->next) {
2241 n_loops = cloog_loop_count(l->inner);
2242 if (max_loops < n_loops)
2243 max_loops = n_loops;
2246 if (max_loops <= 1)
2247 return loop;
2249 loop_array = (CloogLoop **)malloc(max_loops * sizeof(CloogLoop *));
2250 assert(loop_array);
2252 for (l = loop; l; l = l->next) {
2253 int n;
2255 for (i = 0, tmp = l->inner; tmp; i++, tmp = tmp->next)
2256 loop_array[i] = tmp;
2257 n_loops = i;
2258 if (n_loops <= 1)
2259 continue;
2261 s = cloog_loop_sort_alloc(n_loops);
2262 for (i = n_loops - 1; i >= 0; --i) {
2263 if (s->node[i].index >= 0)
2264 continue;
2265 cloog_loop_components_tarjan(s, loop_array, i, level, scalar,
2266 scaldims, nb_scattdims, &cloog_loop_follows);
2269 n = extract_component(loop_array, s->order, &l->inner);
2270 n_loops -= n;
2271 i = n + 1;
2272 while (n_loops) {
2273 CloogLoop *inner;
2275 n = extract_component(loop_array, &s->order[i], &inner);
2276 n_loops -= n;
2277 i += n + 1;
2278 tmp = cloog_loop_alloc(l->state, cloog_domain_copy(l->domain),
2279 l->otl, l->stride, l->block, inner, l->next);
2280 l->next = tmp;
2281 l = tmp;
2284 cloog_loop_sort_free(s);
2287 free(loop_array);
2289 return loop;
2293 CloogLoop *cloog_loop_generate_restricted(CloogLoop *loop,
2294 int level, int scalar, int *scaldims, int nb_scattdims,
2295 CloogOptions *options)
2297 /* To save both time and memory, we switch here depending on whether the
2298 * current dimension is scalar (simplified processing) or not (general
2299 * processing).
2301 if (level_is_constant(level, scalar, scaldims, nb_scattdims))
2302 return cloog_loop_generate_scalar(loop, level, scalar,
2303 scaldims, nb_scattdims, options);
2305 * 2. Compute the projection of each polyhedron onto the outermost
2306 * loop variable and the parameters.
2308 loop = cloog_loop_project_all(loop, level);
2310 return cloog_loop_generate_components(loop, level, scalar, scaldims,
2311 nb_scattdims, options);
2315 CloogLoop *cloog_loop_generate_restricted_or_stop(CloogLoop *loop,
2316 CloogDomain *context,
2317 int level, int scalar, int *scaldims, int nb_scattdims,
2318 CloogOptions *options)
2320 /* If the user asked to stop code generation at this level, let's stop. */
2321 if ((options->stop >= 0) && (level+scalar >= options->stop+1))
2322 return cloog_loop_stop(loop,context) ;
2324 return cloog_loop_generate_restricted(loop, level, scalar, scaldims,
2325 nb_scattdims, options);
2330 * cloog_loop_generate function:
2331 * Adaptation from LoopGen 0.4 by F. Quillere. This function implements the
2332 * Quillere algorithm for polyhedron scanning from step 1 to 2.
2333 * (see the Quillere paper).
2334 * - loop is the loop for which we have to generate a scanning code,
2335 * - context is the context of the current loop (constraints on parameter and/or
2336 * on outer loop counters),
2337 * - level is the current non-scalar dimension,
2338 * - scalar is the current scalar dimension,
2339 * - scaldims is the boolean array saying whether a dimension is scalar or not,
2340 * - nb_scattdims is the size of the scaldims array,
2341 * - options are the general code generation options.
2343 * - October 26th 2001: first version.
2344 * - July 3rd->11th 2003: memory leaks hunt and correction.
2345 * - June 15th 2005: a memory leak fixed (loop was not entirely freed when
2346 * the result of cloog_loop_restrict was NULL).
2347 * - June 22nd 2005: Adaptation for GMP.
2348 * - September 2nd 2005: The function have been cutted out in two pieces:
2349 * cloog_loop_generate and this one, in order to handle
2350 * the scalar dimension case more efficiently with
2351 * cloog_loop_generate_scalar.
2352 * - November 15th 2005: (debug) Condition for stop option no more take care of
2353 * further scalar dimensions.
2355 CloogLoop *cloog_loop_generate(CloogLoop *loop, CloogDomain *context,
2356 int level, int scalar, int *scaldims, int nb_scattdims,
2357 CloogOptions *options)
2359 /* 1. Replace each polyhedron by its intersection with the context.
2361 loop = cloog_loop_restrict_all(loop, context);
2362 if (!loop)
2363 return NULL;
2365 return cloog_loop_generate_restricted_or_stop(loop, context,
2366 level, scalar, scaldims, nb_scattdims, options);
2371 * Internal function for simplifying a single loop in a list of loops.
2372 * See cloog_loop_simplify.
2374 static CloogLoop *loop_simplify(CloogLoop *loop, CloogDomain *context,
2375 int level, int nb_scattdims, CloogOptions *options)
2377 int domain_dim;
2378 CloogBlock * new_block ;
2379 CloogLoop *simplified, *inner;
2380 CloogDomain * domain, * simp, * inter, * extended_context ;
2382 if (!cloog_domain_isconvex(loop->domain))
2383 loop->domain = cloog_domain_simplify_union(loop->domain);
2385 domain = loop->domain ;
2387 domain_dim = cloog_domain_dimension(domain);
2388 extended_context = cloog_domain_extend(context, domain_dim);
2389 inter = cloog_domain_intersection(domain,extended_context) ;
2390 simp = cloog_domain_simplify(domain, extended_context);
2391 cloog_domain_free(extended_context) ;
2393 /* If the constraint system is never true, go to the next one. */
2394 if (cloog_domain_never_integral(simp)) {
2395 cloog_loop_free(loop->inner);
2396 cloog_domain_free(inter);
2397 cloog_domain_free(simp);
2398 return NULL;
2401 inner = cloog_loop_simplify(loop->inner, inter, level+1, nb_scattdims,
2402 options);
2404 if ((inner == NULL) && (loop->block == NULL)) {
2405 cloog_domain_free(inter);
2406 cloog_domain_free(simp);
2407 return NULL;
2410 new_block = cloog_block_copy(loop->block) ;
2412 simplified = cloog_loop_alloc(loop->state, simp, loop->otl, loop->stride,
2413 new_block, inner, NULL);
2415 /* Only save the domains, if their level is still a scattering level. */
2416 if (options->save_domains && level <= nb_scattdims)
2417 simplified->unsimplified = inter;
2418 else
2419 cloog_domain_free(inter);
2421 return(simplified) ;
2426 * cloog_loop_simplify function:
2427 * This function implements the part 6. of the Quillere algorithm, it
2428 * recursively simplifies each loop in the context of the preceding loop domain.
2429 * It returns a pointer to the simplified loop list.
2430 * The cloog_domain_simplify (DomainSimplify) behaviour is really bad with
2431 * polyhedra union and some really awful sidesteppings were written, I plan
2432 * to solve that...
2433 * - October 31th 2001: first version.
2434 * - July 3rd->11th 2003: memory leaks hunt and correction.
2435 * - April 16th 2005: a memory leak fixed (extended_context was not freed).
2436 * - June 15th 2005: a memory leak fixed (loop was not conveniently freed
2437 * when the constraint system is never true).
2438 * - October 27th 2005: - this function called before cloog_loop_fast_simplify
2439 * is now the official cloog_loop_simplify function in
2440 * replacement of a slower and more complex one (after
2441 * deep changes in the pretty printer).
2442 * - we use cloog_loop_disjoint to fix the problem when
2443 * simplifying gives a union of polyhedra (before, it
2444 * was under the responsibility of the pretty printer).
2446 CloogLoop *cloog_loop_simplify(CloogLoop *loop, CloogDomain *context, int level,
2447 int nb_scattdims, CloogOptions *options)
2449 CloogLoop *now;
2450 CloogLoop *res = NULL;
2451 CloogLoop **next = &res;
2453 for (now = loop; now; now = now->next) {
2454 *next = loop_simplify(now, context, level, nb_scattdims, options);
2456 now->inner = NULL; /* For loop integrity. */
2457 cloog_domain_free(now->domain);
2458 now->domain = NULL;
2460 if (*next)
2461 next = &(*next)->next;
2463 cloog_loop_free(loop);
2465 /* Examples like test/iftest2.cloog give unions of polyhedra after
2466 * simplifying, thus we have to make them disjoint. Another good reason to
2467 * put the simplifying step in the Quillere backtrack.
2469 res = cloog_loop_disjoint(res);
2471 return res;
2476 * cloog_loop_scatter function:
2477 * This function add the scattering (scheduling) informations in a loop.
2479 void cloog_loop_scatter(CloogLoop * loop, CloogScattering *scatt)
2481 loop->domain = cloog_domain_scatter(loop->domain, scatt);