2 /**-------------------------------------------------------------------**
4 **-------------------------------------------------------------------**
6 **-------------------------------------------------------------------**
7 ** First version: october 26th 2001 **
8 **-------------------------------------------------------------------**/
11 /******************************************************************************
12 * CLooG : the Chunky Loop Generator (experimental) *
13 ******************************************************************************
15 * Copyright (C) 2001-2005 Cedric Bastoul *
17 * This library is free software; you can redistribute it and/or *
18 * modify it under the terms of the GNU Lesser General Public *
19 * License as published by the Free Software Foundation; either *
20 * version 2.1 of the License, or (at your option) any later version. *
22 * This library is distributed in the hope that it will be useful, *
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *
25 * Lesser General Public License for more details. *
27 * You should have received a copy of the GNU Lesser General Public *
28 * License along with this library; if not, write to the Free Software *
29 * Foundation, Inc., 51 Franklin Street, Fifth Floor, *
30 * Boston, MA 02110-1301 USA *
32 * CLooG, the Chunky Loop Generator *
33 * Written by Cedric Bastoul, Cedric.Bastoul@inria.fr *
35 ******************************************************************************/
36 /* CAUTION: the english used for comments is probably the worst you ever read,
37 * please feel free to correct and improve it !
42 # include "../include/cloog/cloog.h"
44 #define ALLOC(type) (type*)malloc(sizeof(type))
47 /******************************************************************************
48 * Memory leaks hunting *
49 ******************************************************************************/
53 * These functions and global variables are devoted to memory leaks hunting: we
54 * want to know at each moment how many CloogLoop structures had been allocated
55 * (cloog_loop_allocated) and how many had been freed (cloog_loop_freed).
56 * Each time a CloogLoog structure is allocated, a call to the function
57 * cloog_loop_leak_up() must be carried out, and respectively
58 * cloog_loop_leak_down() when a CloogLoop structure is freed. The special
59 * variable cloog_loop_max gives the maximal number of CloogLoop structures
60 * simultaneously alive (i.e. allocated and non-freed) in memory.
61 * - July 3rd->11th 2003: first version (memory leaks hunt and correction).
65 static void cloog_loop_leak_up(CloogState
*state
)
67 state
->loop_allocated
++;
68 if ((state
->loop_allocated
- state
->loop_freed
) > state
->loop_max
)
69 state
->loop_max
= state
->loop_allocated
- state
->loop_freed
;
73 static void cloog_loop_leak_down(CloogState
*state
)
79 /******************************************************************************
80 * Structure display function *
81 ******************************************************************************/
85 * cloog_loop_print_structure function:
86 * Displays a loop structure in a way that trends to be understandable without
87 * falling in a deep depression or, for the lucky ones, getting a headache...
88 * Written by Olivier Chorier, Luc Marchaud, Pierre Martin and Romain Tartiere.
89 * - April 24th 2005: Initial version.
90 * - May 21rd 2005: - New parameter `F' for destination file (ie stdout),
92 * - May 26th 2005: Memory leak hunt.
93 * - June 2nd 2005: (Ced) Integration and minor fixes.
94 * -June 22nd 2005: (Ced) Adaptation for GMP.
96 void cloog_loop_print_structure(FILE * file
, CloogLoop
* loop
, int level
)
100 { /* Go to the right level. */
101 for (i
=0; i
<level
; i
++)
102 fprintf(file
,"|\t") ;
104 fprintf(file
,"+-- CloogLoop\n") ;
110 { /* Go to the right level. */
111 for (i
=0; i
<level
; i
++)
112 fprintf(file
,"|\t") ;
114 fprintf(file
,"| CloogLoop\n") ;
120 for(j
=0; j
<=level
+1; j
++)
121 fprintf(file
,"|\t") ;
124 /* Print the domain. */
125 cloog_domain_print_structure(file
, loop
->domain
, level
+1, "CloogDomain");
127 /* Print the stride. */
128 for(j
=0; j
<=level
; j
++)
129 fprintf(file
,"|\t") ;
131 fprintf(file
, "Stride: ");
132 cloog_int_print(file
, loop
->stride
->stride
);
134 fprintf(file
, "Offset: ");
135 cloog_int_print(file
, loop
->stride
->offset
);
140 for(j
=0; j
<=level
+1; j
++)
141 fprintf(file
,"|\t") ;
144 /* Print the block. */
145 cloog_block_print_structure(file
,loop
->block
,level
+1) ;
148 for (i
=0; i
<=level
+1; i
++)
149 fprintf(file
,"|\t") ;
152 /* Print inner if any. */
154 cloog_loop_print_structure(file
,loop
->inner
,level
+1) ;
156 /* And let's go for the next one. */
159 /* One more time something that is here only for a better look. */
161 { /* Two blank lines if this is the end of the linked list. */
163 { for (i
=0; i
<=level
; i
++)
164 fprintf(file
,"|\t") ;
170 { /* A special blank line if the is a next loop. */
171 for (i
=0; i
<=level
; i
++)
172 fprintf(file
,"|\t") ;
173 fprintf(file
,"V\n") ;
180 * cloog_loop_print function:
181 * This function prints the content of a CloogLoop structure (start) into a
182 * file (file, possibly stdout).
183 * - June 2nd 2005: Now this very old function (probably as old as CLooG) is
184 * only a frontend to cloog_loop_print_structure, with a quite
185 * better human-readable representation.
187 void cloog_loop_print(FILE * file
, CloogLoop
* loop
)
188 { cloog_loop_print_structure(file
,loop
,0) ;
192 /******************************************************************************
193 * Memory deallocation function *
194 ******************************************************************************/
198 * cloog_loop_free function:
199 * This function frees the allocated memory for a CloogLoop structure (loop),
200 * and frees its inner loops and its next loops.
201 * - June 22nd 2005: Adaptation for GMP.
203 void cloog_loop_free(CloogLoop
* loop
)
206 while (loop
!= NULL
) {
207 cloog_loop_leak_down(loop
->state
);
210 cloog_domain_free(loop
->domain
) ;
211 cloog_domain_free(loop
->unsimplified
);
212 cloog_block_free(loop
->block
) ;
213 if (loop
->inner
!= NULL
)
214 cloog_loop_free(loop
->inner
) ;
216 cloog_stride_free(loop
->stride
);
224 * cloog_loop_free_parts function:
225 * This function frees the allocated memory for some parts of a CloogLoop
226 * structure (loop), each other argument is a boolean having to be set to 1 if
227 * we want to free the corresponding part, 0 otherwise. This function applies
228 * the same freeing policy to its inner ans next loops recursively.
229 * - July 3rd 2003: first version.
230 * - June 22nd 2005: Adaptation for GMP.
232 void cloog_loop_free_parts(loop
, domain
, block
, inner
, next
)
234 int domain
, block
, inner
, next
;
235 { CloogLoop
* follow
;
237 while (loop
!= NULL
) {
238 cloog_loop_leak_down(loop
->state
);
239 follow
= loop
->next
;
242 cloog_domain_free(loop
->domain
) ;
245 cloog_block_free(loop
->block
) ;
247 if ((inner
) && (loop
->inner
!= NULL
))
248 cloog_loop_free_parts(loop
->inner
,domain
,block
,inner
,1) ;
250 cloog_domain_free(loop
->unsimplified
);
251 cloog_stride_free(loop
->stride
);
261 /******************************************************************************
262 * Reading functions *
263 ******************************************************************************/
267 * Construct a CloogLoop structure from a given iteration domain
268 * and statement number.
270 CloogLoop
*cloog_loop_from_domain(CloogState
*state
, CloogDomain
*domain
,
275 CloogStatement
* statement
;
277 /* Memory allocation and information reading for the first domain: */
278 loop
= cloog_loop_malloc(state
);
280 loop
->domain
= domain
;
281 if (loop
->domain
!= NULL
)
282 nb_iterators
= cloog_domain_dimension(loop
->domain
);
285 /* included statement block. */
286 statement
= cloog_statement_alloc(state
, number
+ 1);
287 loop
->block
= cloog_block_alloc(statement
, 0, NULL
, nb_iterators
);
294 * cloog_loop_read function:
295 * This function reads loop data from a file (foo, possibly stdin) and
296 * returns a pointer to a CloogLoop structure containing the read information.
297 * This function can be used only for input file reading, when one loop is
298 * associated with one statement.
299 * - number is the statement block number carried by the loop (-1 if none).
300 * - nb_parameters is the number of parameters.
302 * - September 9th 2002: first version.
303 * - April 16th 2005: adaptation to new CloogStatement struct (with number).
304 * - June 11th 2005: adaptation to new CloogBlock structure.
305 * - June 22nd 2005: Adaptation for GMP.
307 CloogLoop
*cloog_loop_read(CloogState
*state
,
308 FILE *foo
, int number
, int nb_parameters
)
314 domain
= cloog_domain_union_read(state
, foo
, nb_parameters
);
316 /* To read that stupid "0 0 0" line. */
317 while (fgets(s
,MAX_STRING
,foo
) == 0) ;
318 while ((*s
=='#' || *s
=='\n') || (sscanf(s
," %d %d %d",&op1
,&op2
,&op3
)<3))
319 if (fgets(s
, MAX_STRING
, foo
))
322 return cloog_loop_from_domain(state
, domain
, number
);
326 /******************************************************************************
327 * Processing functions *
328 ******************************************************************************/
332 * cloog_loop_malloc function:
333 * This function allocates the memory space for a CloogLoop structure and
334 * sets its fields with default values. Then it returns a pointer to the
336 * - November 21th 2005: first version.
338 CloogLoop
*cloog_loop_malloc(CloogState
*state
)
341 /* Memory allocation for the CloogLoop structure. */
342 loop
= (CloogLoop
*)malloc(sizeof(CloogLoop
)) ;
344 cloog_die("memory overflow.\n");
345 cloog_loop_leak_up(state
);
348 /* We set the various fields with default values. */
350 loop
->domain
= NULL
;
351 loop
->unsimplified
= NULL
;
364 * cloog_loop_alloc function:
365 * This function allocates the memory space for a CloogLoop structure and
366 * sets its fields with those given as input. Then it returns a pointer to the
368 * - October 27th 2001: first version.
369 * - June 22nd 2005: Adaptation for GMP.
370 * - November 21th 2005: use of cloog_loop_malloc.
372 CloogLoop
*cloog_loop_alloc(CloogState
*state
,
373 CloogDomain
*domain
, int otl
, CloogStride
*stride
,
374 CloogBlock
*block
, CloogLoop
*inner
, CloogLoop
*next
)
377 loop
= cloog_loop_malloc(state
);
379 loop
->domain
= domain
;
380 loop
->block
= block
;
381 loop
->inner
= inner
;
384 loop
->stride
= cloog_stride_copy(stride
);
391 * cloog_loop_add function:
392 * This function adds a CloogLoop structure (loop) at a given place (now) of a
393 * NULL terminated list of CloogLoop structures. The beginning of this list
394 * is (start). This function updates (now) to (loop), and updates (start) if the
395 * added element is the first one -that is when (start) is NULL-.
396 * - October 28th 2001: first version.
398 void cloog_loop_add(CloogLoop
** start
, CloogLoop
** now
, CloogLoop
* loop
)
399 { if (*start
== NULL
)
404 { (*now
)->next
= loop
;
405 *now
= (*now
)->next
;
411 * cloog_loop_add function:
412 * This function adds a CloogLoop structure (loop) at a given place (now) of a
413 * NULL terminated list of CloogLoop structures. The beginning of this list
414 * is (start). This function updates (now) to the end of the loop list (loop),
415 * and updates (start) if the added element is the first one -that is when
417 * - September 9th 2005: first version.
419 void cloog_loop_add_list(CloogLoop
** start
, CloogLoop
** now
, CloogLoop
* loop
)
420 { if (*start
== NULL
)
425 { (*now
)->next
= loop
;
426 *now
= (*now
)->next
;
429 while ((*now
)->next
!= NULL
)
430 *now
= (*now
)->next
;
435 * cloog_loop_copy function:
436 * This function returns a copy of the CloogLoop structure given as input. In
437 * fact, there is just new allocations for the CloogLoop structures, but their
438 * contents are the same.
439 * - October 28th 2001: first version.
440 * - July 3rd->11th 2003: memory leaks hunt and correction.
442 CloogLoop
* cloog_loop_copy(CloogLoop
* source
)
445 CloogDomain
* domain
;
449 { domain
= cloog_domain_copy(source
->domain
) ;
450 block
= cloog_block_copy(source
->block
) ;
451 loop
= cloog_loop_alloc(source
->state
, domain
, source
->otl
,
452 source
->stride
, block
, NULL
, NULL
);
453 loop
->usr
= source
->usr
;
454 loop
->inner
= cloog_loop_copy(source
->inner
) ;
455 loop
->next
= cloog_loop_copy(source
->next
) ;
462 * cloog_loop_add_disjoint function:
463 * This function adds some CloogLoop structures at a given place (now) of a
464 * NULL terminated list of CloogLoop structures. The beginning of this list
465 * is (start). (loop) can be an union of polyhedra, this function separates the
466 * union into a list of *disjoint* polyhedra then adds the list. This function
467 * updates (now) to the end of the list and updates (start) if first added
468 * element is the first of the principal list -that is when (start) is NULL-.
469 * (loop) can be freed by this function, basically when its domain is actually
470 * a union of polyhedra, but don't worry, all the useful data are now stored
471 * inside the list (start). We do not use PolyLib's Domain_Disjoint function,
472 * since the number of union components is often higher (thus code size too).
473 * - October 28th 2001: first version.
474 * - November 14th 2001: bug correction (this one was hard to find !).
475 * - July 3rd->11th 2003: memory leaks hunt and correction.
476 * - June 22nd 2005: Adaptation for GMP.
477 * - October 27th 2005: (debug) included blocks were not copied for new loops.
479 void cloog_loop_add_disjoint(start
, now
, loop
)
480 CloogLoop
** start
, ** now
, * loop
;
482 CloogLoop
* sep
, * inner
;
483 CloogDomain
*domain
, *seen
, *temp
, *rest
;
486 if (cloog_domain_isconvex(loop
->domain
))
487 cloog_loop_add(start
,now
,loop
) ;
489 domain
= cloog_domain_simplify_union(loop
->domain
);
490 loop
->domain
= NULL
;
492 /* We separate the first element of the rest of the union. */
493 domain
= cloog_domain_cut_first(domain
, &rest
);
495 /* This first element is the first of the list of disjoint polyhedra. */
496 sep
= cloog_loop_alloc(loop
->state
, domain
, 0, NULL
,
497 loop
->block
, loop
->inner
, NULL
);
498 cloog_loop_add(start
,now
,sep
) ;
500 seen
= cloog_domain_copy(domain
);
501 while (!cloog_domain_isempty(domain
= rest
)) {
502 temp
= cloog_domain_cut_first(domain
, &rest
);
503 domain
= cloog_domain_difference(temp
, seen
);
504 cloog_domain_free(temp
);
506 if (cloog_domain_isempty(domain
)) {
507 cloog_domain_free(domain
);
511 /* Each new loop will have its own life, for instance we can free its
512 * inner loop and included block. Then each one must have its own copy
513 * of both 'inner' and 'block'.
515 inner
= cloog_loop_copy(loop
->inner
) ;
516 block
= cloog_block_copy(loop
->block
) ;
518 sep
= cloog_loop_alloc(loop
->state
, cloog_domain_copy(domain
),
519 0, NULL
, block
, inner
, NULL
);
520 /* domain can be an union too. If so: recursion. */
521 if (cloog_domain_isconvex(domain
))
522 cloog_loop_add(start
,now
,sep
) ;
524 cloog_loop_add_disjoint(start
,now
,sep
) ;
526 if (cloog_domain_isempty(rest
)) {
527 cloog_domain_free(domain
);
531 seen
= cloog_domain_union(seen
, domain
);
533 cloog_domain_free(rest
);
534 cloog_domain_free(seen
);
535 cloog_loop_free_parts(loop
,0,0,0,0) ;
541 * cloog_loop_disjoint function:
542 * This function returns a list of loops such that each loop with non-convex
543 * domain in the input list (loop) is separated into several loops where the
544 * domains are the components of the union of *disjoint* polyhedra equivalent
545 * to the original non-convex domain. See cloog_loop_add_disjoint comments
547 * - September 16th 2005: first version.
549 CloogLoop
* cloog_loop_disjoint(CloogLoop
* loop
)
550 { CloogLoop
*res
=NULL
, * now
=NULL
, * next
;
552 /* Because this is often the case, don't waste time ! */
553 if (loop
&& !loop
->next
&& cloog_domain_isconvex(loop
->domain
))
557 { next
= loop
->next
;
559 cloog_loop_add_disjoint(&res
,&now
,loop
) ;
568 * cloog_loop_restrict function:
569 * This function returns the (loop) in the context of (context): it makes the
570 * intersection between the (loop) domain and the (context), then it returns
571 * a pointer to a new loop, with this intersection as domain.
573 * - October 27th 2001: first version.
574 * - June 15th 2005: a memory leak fixed (domain was not freed when empty).
575 * - June 22nd 2005: Adaptation for GMP.
577 CloogLoop
*cloog_loop_restrict(CloogLoop
*loop
, CloogDomain
*context
)
578 { int new_dimension
;
579 CloogDomain
* domain
, * extended_context
, * new_domain
;
580 CloogLoop
* new_loop
;
582 domain
= loop
->domain
;
583 if (cloog_domain_dimension(domain
) > cloog_domain_dimension(context
))
585 new_dimension
= cloog_domain_dimension(domain
);
586 extended_context
= cloog_domain_extend(context
, new_dimension
);
587 new_domain
= cloog_domain_intersection(extended_context
,loop
->domain
) ;
588 cloog_domain_free(extended_context
) ;
591 new_domain
= cloog_domain_intersection(context
,loop
->domain
) ;
593 if (cloog_domain_isempty(new_domain
))
594 { cloog_domain_free(new_domain
) ;
598 new_loop
= cloog_loop_alloc(loop
->state
, new_domain
,
599 0, NULL
, loop
->block
, loop
->inner
, NULL
);
606 * Call cloog_loop_restrict on each loop in the list "loop" and return
607 * the concatenated result.
609 CloogLoop
*cloog_loop_restrict_all(CloogLoop
*loop
, CloogDomain
*context
)
612 CloogLoop
*res
= NULL
;
613 CloogLoop
**res_next
= &res
;
615 for (; loop
; loop
= next
) {
618 *res_next
= cloog_loop_restrict(loop
, context
);
620 res_next
= &(*res_next
)->next
;
621 cloog_loop_free_parts(loop
, 1, 0, 0, 0);
624 cloog_loop_free(loop
);
633 * Restrict the domains of the inner loops of each loop l in the given
634 * list of loops to the domain of the loop l. If the domains of all
635 * inner loops of a given loop l turn out to be empty, then remove l
638 CloogLoop
*cloog_loop_restrict_inner(CloogLoop
*loop
)
642 CloogLoop
**res_next
= &res
;
644 for (; loop
; loop
= next
) {
647 loop
->inner
= cloog_loop_restrict_all(loop
->inner
, loop
->domain
);
650 res_next
= &(*res_next
)->next
;
653 cloog_loop_free(loop
);
663 * cloog_loop_project function:
664 * This function returns the projection of (loop) on the (level) first
665 * dimensions (outer loops). It makes the projection of the (loop) domain,
666 * then it returns a pointer to a new loop, with this projection as domain.
668 * - October 27th 2001: first version.
669 * - July 3rd->11th 2003: memory leaks hunt and correction.
670 * - June 22nd 2005: Adaptation for GMP.
672 CloogLoop
* cloog_loop_project(CloogLoop
* loop
, int level
)
674 CloogDomain
* new_domain
;
675 CloogLoop
* new_loop
, * copy
;
677 copy
= cloog_loop_alloc(loop
->state
, loop
->domain
, loop
->otl
, loop
->stride
,
678 loop
->block
, loop
->inner
, NULL
);
680 if (cloog_domain_dimension(loop
->domain
) == level
)
681 new_domain
= cloog_domain_copy(loop
->domain
) ;
683 new_domain
= cloog_domain_project(loop
->domain
, level
);
685 new_loop
= cloog_loop_alloc(loop
->state
, new_domain
, 0, NULL
,
693 * Call cloog_loop_project on each loop in the list "loop" and return
694 * the concatenated result.
696 CloogLoop
*cloog_loop_project_all(CloogLoop
*loop
, int level
)
699 CloogLoop
*res
= NULL
;
700 CloogLoop
**res_next
= &res
;
702 for (; loop
; loop
= next
) {
705 *res_next
= cloog_loop_project(loop
, level
);
706 res_next
= &(*res_next
)->next
;
707 cloog_loop_free_parts(loop
, 0, 0, 0, 0);
715 * cloog_loop_concat function:
716 * This function returns a pointer to the concatenation of the
717 * CloogLoop lists given as input.
718 * - October 28th 2001: first version.
720 CloogLoop
* cloog_loop_concat(CloogLoop
* a
, CloogLoop
* b
)
721 { CloogLoop
* loop
, * temp
;
726 { while (temp
->next
!= NULL
)
738 * cloog_loop_combine:
739 * Combine consecutive loops with identical domains into
740 * a single loop with the concatenation of their inner loops
743 CloogLoop
*cloog_loop_combine(CloogLoop
*loop
)
745 CloogLoop
*first
, *second
;
747 for (first
= loop
; first
; first
= first
->next
) {
748 while (first
->next
) {
749 if (!cloog_domain_lazy_equal(first
->domain
, first
->next
->domain
))
751 second
= first
->next
;
752 first
->inner
= cloog_loop_concat(first
->inner
, second
->inner
);
753 first
->next
= second
->next
;
754 cloog_loop_free_parts(second
, 1, 0, 0, 0);
762 * Remove loops from list that have an empty domain.
764 CloogLoop
*cloog_loop_remove_empty_domain_loops(CloogLoop
*loop
)
766 CloogLoop
*l
, *res
, *next
, **res_next
;
770 for (l
= loop
; l
; l
= next
) {
772 if (cloog_domain_isempty(l
->domain
))
773 cloog_loop_free_parts(l
, 1, 1, 1, 0);
776 res_next
= &(*res_next
)->next
;
784 CloogLoop
*cloog_loop_decompose_inner(CloogLoop
*loop
,
785 int level
, int scalar
, int *scaldims
, int nb_scattdims
);
787 /* For each loop with only one inner loop, replace the domain
788 * of the loop with the projection of the domain of the inner
789 * loop. To increase the number of loops with a single inner
790 * we first decompose the inner loops into strongly connected
793 CloogLoop
*cloog_loop_specialize(CloogLoop
*loop
,
794 int level
, int scalar
, int *scaldims
, int nb_scattdims
)
800 loop
= cloog_loop_decompose_inner(loop
, level
, scalar
,
801 scaldims
, nb_scattdims
);
803 for (l
= loop
; l
; l
= l
->next
) {
806 if (!cloog_domain_isconvex(l
->inner
->domain
))
809 dim
= cloog_domain_dimension(l
->domain
);
810 domain
= cloog_domain_project(l
->inner
->domain
, dim
);
811 if (cloog_domain_isconvex(domain
)) {
812 cloog_domain_free(l
->domain
);
815 cloog_domain_free(domain
);
819 return cloog_loop_remove_empty_domain_loops(loop
);
822 /* For each loop with only one inner loop, propagate the bounds from
823 * the inner loop domain to the outer loop domain. This is especially
824 * useful if the inner loop domain has a non-trivial stride which
825 * results in an update of the lower bound.
827 CloogLoop
*cloog_loop_propagate_lower_bound(CloogLoop
*loop
, int level
)
830 CloogDomain
*domain
, *t
;
833 for (l
= loop
; l
; l
= l
->next
) {
836 if (!cloog_domain_isconvex(l
->inner
->domain
))
839 dim
= cloog_domain_dimension(l
->domain
);
840 domain
= cloog_domain_project(l
->inner
->domain
, dim
);
841 if (cloog_domain_isconvex(domain
)) {
842 t
= cloog_domain_intersection(domain
, l
->domain
);
843 cloog_domain_free(l
->domain
);
846 cloog_domain_free(domain
);
853 * cloog_loop_separate function:
854 * This function implements the Quillere algorithm for separation of multiple
855 * loops: for a given set of polyhedra (loop), it computes a set of disjoint
856 * polyhedra such that the unions of these sets are equal, and returns this set.
857 * - October 28th 2001: first version.
858 * - November 14th 2001: elimination of some unused blocks.
859 * - August 13th 2002: (debug) in the case of union of polyhedra for one
860 * loop, redundant constraints are fired.
861 * - July 3rd->11th 2003: memory leaks hunt and correction.
862 * - June 22nd 2005: Adaptation for GMP.
863 * - October 16th 2005: Removal of the non-shared constraint elimination when
864 * there is only one loop in the list (seems to work
865 * without now, DomainSimplify may have been improved).
866 * The problem was visible with test/iftest2.cloog.
868 CloogLoop
* cloog_loop_separate(CloogLoop
* loop
)
869 { int lazy_equal
=0, disjoint
= 0;
870 CloogLoop
* new_loop
, * new_inner
, * res
, * now
, * temp
, * Q
,
871 * inner
, * old
/*, * previous, * next*/ ;
872 CloogDomain
*UQ
, *domain
;
877 loop
= cloog_loop_combine(loop
);
879 if (loop
->next
== NULL
)
880 return cloog_loop_disjoint(loop
) ;
882 UQ
= cloog_domain_copy(loop
->domain
) ;
883 domain
= cloog_domain_copy(loop
->domain
) ;
884 res
= cloog_loop_alloc(loop
->state
, domain
, 0, NULL
,
885 loop
->block
, loop
->inner
, NULL
);
888 while((loop
= loop
->next
) != NULL
)
891 /* For all Q, add Q-loop associated with the blocks of Q alone,
892 * and Q inter loop associated with the blocks of Q and loop.
894 for (Q
= res
; Q
; Q
= Q
->next
) {
895 /* Add (Q inter loop). */
896 if ((disjoint
= cloog_domain_lazy_disjoint(Q
->domain
,loop
->domain
)))
899 { if ((lazy_equal
= cloog_domain_lazy_equal(Q
->domain
,loop
->domain
)))
900 domain
= cloog_domain_copy(Q
->domain
) ;
902 domain
= cloog_domain_intersection(Q
->domain
,loop
->domain
) ;
904 if (!cloog_domain_isempty(domain
))
905 { new_inner
= cloog_loop_concat(cloog_loop_copy(Q
->inner
),
906 cloog_loop_copy(loop
->inner
)) ;
907 new_loop
= cloog_loop_alloc(loop
->state
, domain
, 0, NULL
,
908 NULL
, new_inner
, NULL
);
909 cloog_loop_add_disjoint(&temp
,&now
,new_loop
) ;
913 cloog_domain_free(domain
);
917 /* Add (Q - loop). */
919 domain
= cloog_domain_copy(Q
->domain
) ;
922 domain
= cloog_domain_empty(Q
->domain
);
924 domain
= cloog_domain_difference(Q
->domain
,loop
->domain
) ;
927 if (!cloog_domain_isempty(domain
)) {
928 new_loop
= cloog_loop_alloc(loop
->state
, domain
, 0, NULL
,
929 NULL
, Q
->inner
, NULL
);
930 cloog_loop_add_disjoint(&temp
,&now
,new_loop
) ;
933 { cloog_domain_free(domain
) ;
934 /* If Q->inner is no more useful, we can free it. */
937 cloog_loop_free(inner
) ;
941 /* Add loop-UQ associated with the blocks of loop alone.*/
942 if (cloog_domain_lazy_disjoint(loop
->domain
,UQ
))
943 domain
= cloog_domain_copy(loop
->domain
) ;
945 { if (cloog_domain_lazy_equal(loop
->domain
,UQ
))
946 domain
= cloog_domain_empty(UQ
);
948 domain
= cloog_domain_difference(loop
->domain
,UQ
) ;
951 if (!cloog_domain_isempty(domain
)) {
952 new_loop
= cloog_loop_alloc(loop
->state
, domain
, 0, NULL
,
953 NULL
, loop
->inner
, NULL
);
954 cloog_loop_add_disjoint(&temp
,&now
,new_loop
) ;
957 { cloog_domain_free(domain
) ;
958 /* If loop->inner is no more useful, we can free it. */
959 cloog_loop_free(loop
->inner
) ;
964 if (loop
->next
!= NULL
)
965 UQ
= cloog_domain_union(UQ
, cloog_domain_copy(loop
->domain
));
967 cloog_domain_free(UQ
);
969 cloog_loop_free_parts(res
,1,0,0,1) ;
973 cloog_loop_free_parts(old
,1,0,0,1) ;
979 static CloogDomain
*bounding_domain(CloogDomain
*dom
, CloogOptions
*options
)
982 return cloog_domain_simple_convex(dom
);
984 return cloog_domain_convex(dom
);
989 * cloog_loop_merge function:
990 * This function is the 'soft' version of loop_separate if we are looking for
991 * a code much simpler (and less efficicient). This function returns the new
993 * - October 29th 2001: first version.
994 * - July 3rd->11th 2003: memory leaks hunt and correction.
995 * - June 22nd 2005: Adaptation for GMP.
997 CloogLoop
*cloog_loop_merge(CloogLoop
*loop
, int level
, CloogOptions
*options
)
999 CloogLoop
*res
, *new_inner
, *old
;
1000 CloogDomain
*new_domain
, *temp
;
1005 if (loop
->next
== NULL
&& cloog_domain_isconvex(loop
->domain
))
1009 temp
= loop
->domain
;
1010 loop
->domain
= NULL
;
1011 new_inner
= loop
->inner
;
1013 for (loop
= loop
->next
; loop
; loop
= loop
->next
) {
1014 temp
= cloog_domain_union(temp
, loop
->domain
);
1015 loop
->domain
= NULL
;
1016 new_inner
= cloog_loop_concat(new_inner
, loop
->inner
);
1019 new_domain
= bounding_domain(temp
, options
);
1021 if (level
> 0 && !cloog_domain_is_bounded(new_domain
, level
) &&
1022 cloog_domain_is_bounded(temp
, level
)) {
1023 CloogDomain
*splitter
, *t2
;
1025 cloog_domain_free(new_domain
);
1026 splitter
= cloog_domain_bound_splitter(temp
, level
);
1029 while (!cloog_domain_isconvex(splitter
)) {
1030 CloogDomain
*first
, *rest
;
1031 first
= cloog_domain_cut_first(splitter
, &rest
);
1033 t2
= cloog_domain_intersection(first
, temp
);
1034 cloog_domain_free(first
);
1036 new_domain
= bounding_domain(t2
, options
);
1037 cloog_domain_free(t2
);
1039 if (cloog_domain_isempty(new_domain
)) {
1040 cloog_domain_free(new_domain
);
1043 res
= cloog_loop_alloc(old
->state
, new_domain
, 0, NULL
,
1044 NULL
, cloog_loop_copy(new_inner
), res
);
1047 t2
= cloog_domain_intersection(splitter
, temp
);
1048 cloog_domain_free(splitter
);
1050 new_domain
= bounding_domain(t2
, options
);
1051 cloog_domain_free(t2
);
1053 if (cloog_domain_isempty(new_domain
)) {
1054 cloog_domain_free(new_domain
);
1055 cloog_loop_free(new_inner
);
1057 res
= cloog_loop_alloc(old
->state
, new_domain
, 0, NULL
,
1058 NULL
, new_inner
, res
);
1060 res
= cloog_loop_alloc(old
->state
, new_domain
, 0, NULL
,
1061 NULL
, new_inner
, NULL
);
1063 cloog_domain_free(temp
);
1065 cloog_loop_free_parts(old
, 0, 0, 0, 1);
1071 static int cloog_loop_count(CloogLoop
*loop
)
1075 for (nb_loops
= 0; loop
; loop
= loop
->next
)
1083 * cloog_loop_sort function:
1084 * Adaptation from LoopGen 0.4 by F. Quillere. This function sorts a list of
1085 * parameterized disjoint polyhedra, in order to not have lexicographic order
1086 * violation (see Quillere paper).
1087 * - September 16th 2005: inclusion of cloog_loop_number (October 29th 2001).
1089 CloogLoop
*cloog_loop_sort(CloogLoop
*loop
, int level
)
1091 CloogLoop
*res
, *now
, **loop_array
;
1093 int i
, nb_loops
=0, * permut
;
1095 /* There is no need to sort the parameter domains. */
1099 /* We will need to know how many loops are in the list. */
1100 nb_loops
= cloog_loop_count(loop
);
1102 /* If there is only one loop, it's the end. */
1106 /* We have to allocate memory for some useful components:
1107 * - loop_array: the loop array,
1108 * - doms: the array of domains to sort,
1109 * - permut: will give us a possible sort (maybe not the only one).
1111 loop_array
= (CloogLoop
**)malloc(nb_loops
*sizeof(CloogLoop
*)) ;
1112 doms
= (CloogDomain
**)malloc(nb_loops
*sizeof(CloogDomain
*));
1113 permut
= (int *)malloc(nb_loops
*sizeof(int)) ;
1115 /* We fill up the loop and domain arrays. */
1116 for (i
=0;i
<nb_loops
;i
++,loop
=loop
->next
)
1117 { loop_array
[i
] = loop
;
1118 doms
[i
] = loop_array
[i
]->domain
;
1121 /* cloog_domain_sort will fill up permut. */
1122 cloog_domain_sort(doms
, nb_loops
, level
, permut
);
1124 /* With permut and loop_array we build the sorted list. */
1126 for (i
=0;i
<nb_loops
;i
++)
1127 { /* To avoid pointer looping... loop_add will rebuild the list. */
1128 loop_array
[permut
[i
]-1]->next
= NULL
;
1129 cloog_loop_add(&res
,&now
,loop_array
[permut
[i
]-1]) ;
1141 * cloog_loop_nest function:
1142 * This function changes the loop list in such a way that we have no more than
1143 * one dimension added by level. It returns an equivalent loop list with
1145 * - October 29th 2001: first version.
1146 * - July 3rd->11th 2003: memory leaks hunt and correction.
1147 * - June 22nd 2005: Adaptation for GMP.
1148 * - November 21th 2005: (debug) now OK when cloog_loop_restrict returns NULL.
1150 CloogLoop
*cloog_loop_nest(CloogLoop
*loop
, CloogDomain
*context
, int level
)
1152 CloogLoop
* p
, * temp
, * res
, * now
, * next
;
1153 CloogDomain
* new_domain
;
1155 loop
= cloog_loop_disjoint(loop
);
1158 /* Each domain is changed by its intersection with the context. */
1159 while (loop
!= NULL
)
1160 { p
= cloog_loop_restrict(loop
, context
);
1164 { cloog_loop_free_parts(loop
,1,0,0,0) ;
1166 temp
= cloog_loop_alloc(p
->state
, p
->domain
, 0, NULL
,
1167 p
->block
, p
->inner
, NULL
);
1169 /* If the intersection dimension is too big, we make projections smaller
1170 * and smaller, and each projection includes the preceding projection
1171 * (thus, in the target list, dimensions are added one by one).
1173 if (cloog_domain_dimension(p
->domain
) >= level
)
1174 for (l
= cloog_domain_dimension(p
->domain
); l
>= level
; l
--) {
1175 new_domain
= cloog_domain_project(p
->domain
, l
);
1176 temp
= cloog_loop_alloc(p
->state
, new_domain
, 0, NULL
,
1180 /* p is no more useful (but its content yes !). */
1181 cloog_loop_free_parts(p
,0,0,0,0) ;
1183 cloog_loop_add(&res
,&now
,temp
) ;
1186 cloog_loop_free_parts(loop
,1,1,1,0) ;
1195 /* Check if the domains of the inner loops impose a stride constraint
1196 * on the given level.
1197 * The core of the search is implemented in cloog_domain_list_stride.
1198 * Here, we simply construct a list of domains to pass to this function
1199 * and if a stride is found, we adjust the lower bounds by calling
1200 * cloog_domain_stride_lower_bound.
1202 static int cloog_loop_variable_offset_stride(CloogLoop
*loop
, int level
)
1204 CloogDomainList
*list
= NULL
;
1206 CloogStride
*stride
;
1208 for (inner
= loop
->inner
; inner
; inner
= inner
->next
) {
1209 CloogDomainList
*entry
= ALLOC(CloogDomainList
);
1210 entry
->domain
= cloog_domain_copy(inner
->domain
);
1215 stride
= cloog_domain_list_stride(list
, level
);
1217 cloog_domain_list_free(list
);
1222 loop
->stride
= stride
;
1223 loop
->domain
= cloog_domain_stride_lower_bound(loop
->domain
, level
, stride
);
1230 * cloog_loop_stride function:
1231 * This function will find the stride of a loop for the iterator at the column
1232 * number 'level' in the constraint matrix. It will update the lower bound of
1233 * the iterator accordingly. Basically, the function will try to find in the
1234 * inner loops a common condition on this iterator for the inner loop iterators
1235 * to be integral. For instance, let us consider a loop with the iterator i,
1236 * the iteration domain -4<=i<=n, and its two inner loops with the iterator j.
1237 * The first inner loop has the constraint 3j=i, and the second one has the
1238 * constraint 6j=i. Then the common constraint on i for j to be integral is
1239 * i%3=0, the stride for i is 3. Lastly, we have to find the new lower bound
1240 * for i: the first value satisfying the common constraint: -3. At the end, the
1241 * iteration domain for i is -3<=i<=n and the stride for i is 3.
1243 * The algorithm implemented in this function only allows for strides
1244 * on loops with a lower bound that has a constant remainder on division
1245 * by the stride. Before initiating this procedure, we first check
1246 * if we can find a stride with a lower bound with a variable offset in
1247 * cloog_loop_variable_offset_stride.
1249 * - loop is the loop including the iteration domain of the considered iterator,
1250 * - level is the column number of the iterator in the matrix of contraints.
1252 * - June 29th 2003: first version (work in progress since June 26th 2003).
1253 * - July 14th 2003: simpler version.
1254 * - June 22nd 2005: Adaptation for GMP (from S. Verdoolaege's 0.12.1 version).
1256 void cloog_loop_stride(CloogLoop
* loop
, int level
)
1257 { int first_search
;
1258 cloog_int_t stride
, ref_offset
, offset
, potential
;
1261 if (!cloog_domain_can_stride(loop
->domain
, level
))
1264 if (cloog_loop_variable_offset_stride(loop
, level
))
1267 cloog_int_init(stride
);
1268 cloog_int_init(ref_offset
);
1269 cloog_int_init(offset
);
1270 cloog_int_init(potential
);
1272 cloog_int_set_si(ref_offset
, 0);
1273 cloog_int_set_si(offset
, 0);
1275 /* Default stride. */
1276 cloog_int_set_si(stride
, 1);
1278 inner
= loop
->inner
;
1280 while (inner
!= NULL
)
1281 { /* If the minimun stride has not been found yet, find the stride. */
1282 if ((first_search
) || (!cloog_int_is_one(stride
)))
1284 cloog_domain_stride(inner
->domain
, level
, &potential
, &offset
);
1285 if (!cloog_int_is_one(potential
) && (!first_search
))
1286 { /* Offsets must be the same for common stride. */
1287 cloog_int_gcd(stride
, potential
, stride
);
1288 if (!cloog_int_is_zero(stride
)) {
1289 cloog_int_fdiv_r(offset
, offset
, stride
);
1290 cloog_int_fdiv_r(ref_offset
, ref_offset
, stride
);
1292 if (cloog_int_ne(offset
,ref_offset
))
1293 cloog_int_set_si(stride
, 1);
1296 cloog_int_set(stride
, potential
);
1297 cloog_int_set(ref_offset
, offset
);
1303 inner
= inner
->next
;
1306 if (cloog_int_is_zero(stride
))
1307 cloog_int_set_si(stride
, 1);
1309 /* Update the values if necessary. */
1310 if (!cloog_int_is_one(stride
))
1311 { /* Update the stride value. */
1312 if (!cloog_int_is_zero(offset
))
1313 cloog_int_sub(offset
, stride
, offset
);
1314 loop
->stride
= cloog_stride_alloc(stride
, offset
);
1315 loop
->domain
= cloog_domain_stride_lower_bound(loop
->domain
, level
,
1319 cloog_int_clear(stride
);
1320 cloog_int_clear(ref_offset
);
1321 cloog_int_clear(offset
);
1322 cloog_int_clear(potential
);
1326 void cloog_loop_otl(CloogLoop
*loop
, int level
)
1328 if (cloog_domain_is_otl(loop
->domain
, level
))
1334 * cloog_loop_stop function:
1335 * This function implements the 'stop' option : each domain of each loop
1336 * in the list 'loop' is replaced by 'context'. 'context' should be the
1337 * domain of the outer loop. By using this method, there are no more dimensions
1338 * to scan and the simplification step will automaticaly remove the domains
1339 * since they are the same as the corresponding contexts. The effect of this
1340 * function is to stop the code generation at the level this function is called,
1341 * the resulting code do not consider the next dimensions.
1342 * - January 11th 2005: first version.
1344 CloogLoop
* cloog_loop_stop(CloogLoop
* loop
, CloogDomain
* context
)
1348 { cloog_domain_free(loop
->domain
) ;
1349 loop
->domain
= cloog_domain_copy(context
) ;
1350 loop
->next
= cloog_loop_stop(loop
->next
, context
) ;
1357 static int level_is_constant(int level
, int scalar
, int *scaldims
, int nb_scattdims
)
1359 return level
&& (level
+scalar
<= nb_scattdims
) && (scaldims
[level
+scalar
-1]);
1364 * Compare the constant dimensions of loops 'l1' and 'l2' starting at 'scalar'
1365 * and return -1 if the vector of constant dimensions of 'l1' is smaller
1366 * than that of 'l2', 0 if they are the same and +1 if that of 'l1' is
1367 * greater than that of 'l2'.
1368 * This function should be called on the innermost loop (the loop
1369 * containing a block).
1370 * \param l1 Loop to be compared with l2.
1371 * \param l2 Loop to be compared with l1.
1372 * \param level Current non-scalar dimension.
1373 * \param scaldims Boolean array saying whether a dimension is scalar or not.
1374 * \param nb_scattdims Size of the scaldims array.
1375 * \param scalar Current scalar dimension.
1376 * \return -1 if (l1 < l2), 0 if (l1 == l2) and +1 if (l1 > l2)
1378 int cloog_loop_constant_cmp(CloogLoop
*l1
, CloogLoop
*l2
, int level
,
1379 int *scaldims
, int nb_scattdims
, int scalar
)
1381 CloogBlock
*b1
, *b2
;
1384 while (level_is_constant(level
, scalar
, scaldims
, nb_scattdims
)) {
1385 int cmp
= cloog_int_cmp(b1
->scaldims
[scalar
], b2
->scaldims
[scalar
]);
1395 * cloog_loop_scalar_gt function:
1396 * This function returns 1 if loop 'l1' is greater than loop 'l2' for the
1397 * scalar dimension vector that begins at dimension 'scalar', 0 otherwise. What
1398 * we want to know is whether a loop is scheduled before another one or not.
1399 * This function solves the problem when the considered dimension for scheduling
1400 * is a scalar dimension. Since there may be a succession of scalar dimensions,
1401 * this function will reason about the vector of scalar dimension that begins
1402 * at dimension 'level+scalar' and finish to the first non-scalar dimension.
1403 * \param l1 Loop to be compared with l2.
1404 * \param l2 Loop to be compared with l1.
1405 * \param level Current non-scalar dimension.
1406 * \param scaldims Boolean array saying whether a dimension is scalar or not.
1407 * \param nb_scattdims Size of the scaldims array.
1408 * \param scalar Current scalar dimension.
1409 * \return 1 if (l1 > l2), 0 otherwise.
1411 * - September 9th 2005: first version.
1412 * - October 15nd 2007: now "greater than" instead of "greater or equal".
1414 int cloog_loop_scalar_gt(l1
, l2
, level
, scaldims
, nb_scattdims
, scalar
)
1415 CloogLoop
* l1
, * l2
;
1416 int level
, * scaldims
, nb_scattdims
, scalar
;
1418 return cloog_loop_constant_cmp(l1
, l2
, level
, scaldims
, nb_scattdims
, scalar
) > 0;
1423 * cloog_loop_scalar_eq function:
1424 * This function returns 1 if loop 'l1' is equal to loop 'l2' for the scalar
1425 * dimension vector that begins at dimension 'scalar', 0 otherwise. What we want
1426 * to know is whether two loops are scheduled for the same time or not.
1427 * This function solves the problem when the considered dimension for scheduling
1428 * is a scalar dimension. Since there may be a succession of scalar dimensions,
1429 * this function will reason about the vector of scalar dimension that begins
1430 * at dimension 'level+scalar' and finish to the first non-scalar dimension.
1431 * - l1 and l2 are the loops to compare,
1432 * - level is the current non-scalar dimension,
1433 * - scaldims is the boolean array saying whether a dimension is scalar or not,
1434 * - nb_scattdims is the size of the scaldims array,
1435 * - scalar is the current scalar dimension.
1437 * - September 9th 2005 : first version.
1439 int cloog_loop_scalar_eq(l1
, l2
, level
, scaldims
, nb_scattdims
, scalar
)
1440 CloogLoop
* l1
, * l2
;
1441 int level
, * scaldims
, nb_scattdims
, scalar
;
1443 return cloog_loop_constant_cmp(l1
, l2
, level
, scaldims
, nb_scattdims
, scalar
) == 0;
1448 * cloog_loop_scalar_sort function:
1449 * This function sorts a linked list of loops (loop) with respect to the
1450 * scalar dimension vector that begins at dimension 'scalar'. Since there may
1451 * be a succession of scalar dimensions, this function will reason about the
1452 * vector of scalar dimension that begins at dimension 'level+scalar' and
1453 * finish to the first non-scalar dimension.
1454 * \param loop Loop list to sort.
1455 * \param level Current non-scalar dimension.
1456 * \param scaldims Boolean array saying whether a dimension is scalar or not.
1457 * \param nb_scattdims Size of the scaldims array.
1458 * \param scalar Current scalar dimension.
1459 * \return A pointer to the sorted list.
1461 * - July 2nd 2005: first developments.
1462 * - September 2nd 2005: first version.
1463 * - October 15nd 2007: complete rewrite to remove bugs, now a bubble sort.
1465 CloogLoop
* cloog_loop_scalar_sort(loop
, level
, scaldims
, nb_scattdims
, scalar
)
1467 int level
, * scaldims
, nb_scattdims
, scalar
;
1469 CloogLoop
**current
;
1473 for (current
= &loop
; (*current
)->next
; current
= &(*current
)->next
) {
1474 CloogLoop
*next
= (*current
)->next
;
1475 if (cloog_loop_scalar_gt(*current
,next
,level
,scaldims
,nb_scattdims
,scalar
)) {
1477 (*current
)->next
= next
->next
;
1478 next
->next
= *current
;
1489 * cloog_loop_generate_backtrack function:
1490 * adaptation from LoopGen 0.4 by F. Quillere. This function implements the
1491 * backtrack of the Quillere et al. algorithm (see the Quillere paper).
1492 * It eliminates unused iterations of the current level for the new one. See the
1493 * example called linearity-1-1 example with and without this part for an idea.
1494 * - October 26th 2001: first version in cloog_loop_generate_general.
1495 * - July 31th 2002: (debug) no more parasite loops (REALLY hard !).
1496 * - October 30th 2005: extraction from cloog_loop_generate_general.
1498 CloogLoop
*cloog_loop_generate_backtrack(CloogLoop
*loop
,
1499 int level
, CloogOptions
*options
)
1501 CloogDomain
* domain
;
1502 CloogLoop
* now
, * now2
, * next
, * next2
, * end
, * temp
, * l
, * inner
,
1508 while (temp
!= NULL
)
1510 inner
= temp
->inner
;
1512 while (inner
!= NULL
)
1513 { next
= inner
->next
;
1514 /* This 'if' and its first part is the debug of july 31th 2002. */
1515 if (inner
->block
!= NULL
) {
1516 end
= cloog_loop_alloc(temp
->state
, inner
->domain
, 0, NULL
,
1517 inner
->block
, NULL
, NULL
);
1518 domain
= cloog_domain_copy(temp
->domain
) ;
1519 new_loop
= cloog_loop_alloc(temp
->state
, domain
, 0, NULL
,
1523 new_loop
= cloog_loop_project(inner
, level
);
1525 cloog_loop_free_parts(inner
,0,0,0,0) ;
1526 cloog_loop_add(&l
,&now2
,new_loop
) ;
1530 temp
->inner
= NULL
;
1533 { l
= cloog_loop_separate(l
) ;
1534 l
= cloog_loop_sort(l
, level
);
1536 l
->stride
= cloog_stride_copy(l
->stride
);
1537 cloog_loop_add(&loop
,&now
,l
) ;
1541 next2
= temp
->next
;
1542 cloog_loop_free_parts(temp
,1,0,0,0) ;
1551 * Return 1 if we need to continue recursing to the specified level.
1553 int cloog_loop_more(CloogLoop
*loop
, int level
, int scalar
, int nb_scattdims
)
1555 return level
+ scalar
<= nb_scattdims
||
1556 cloog_domain_dimension(loop
->domain
) >= level
;
1560 * Return 1 if the domains of all loops in the given linked list
1561 * have a fixed value at the given level.
1562 * In principle, there would be no need to check that the fixed value is
1563 * the same for each of these loops because this function is only
1564 * called on a component. However, not all backends perform a proper
1565 * decomposition into components.
1567 int cloog_loop_is_constant(CloogLoop
*loop
, int level
)
1575 if (!cloog_domain_lazy_isconstant(loop
->domain
, level
- 1, &c1
))
1578 for (loop
= loop
->next
; r
&& loop
; loop
= loop
->next
) {
1579 if (!cloog_domain_lazy_isconstant(loop
->domain
, level
- 1, &c2
))
1581 else if (cloog_int_ne(c1
, c2
))
1585 cloog_int_clear(c1
);
1586 cloog_int_clear(c2
);
1592 * Assuming all domains in the given linked list of loop
1593 * have a fixed values at level, return a single loop with
1594 * a domain corresponding to this fixed value and with as
1595 * list of inner loops the concatenation of all inner loops
1596 * in the original list.
1598 CloogLoop
*cloog_loop_constant(CloogLoop
*loop
, int level
)
1600 CloogLoop
*res
, *inner
, *tmp
;
1601 CloogDomain
*domain
, *t
;
1606 inner
= loop
->inner
;
1607 domain
= loop
->domain
;
1608 for (tmp
= loop
->next
; tmp
; tmp
= tmp
->next
) {
1609 inner
= cloog_loop_concat(inner
, tmp
->inner
);
1610 domain
= cloog_domain_union(domain
, tmp
->domain
);
1613 domain
= cloog_domain_simple_convex(t
= domain
);
1614 cloog_domain_free(t
);
1616 res
= cloog_loop_alloc(loop
->state
, domain
, 0, NULL
, NULL
, inner
, NULL
);
1618 cloog_loop_free_parts(loop
, 0, 0, 0, 1);
1624 /* Unroll the given loop at the given level, provided it is allowed
1625 * by cloog_domain_can_unroll.
1626 * If so, we return a list of loops, one for each iteration of the original
1627 * loop. Otherwise, we simply return the original loop.
1629 static CloogLoop
*loop_unroll(CloogLoop
*loop
, int level
)
1634 CloogConstraint
*lb
;
1635 CloogLoop
*res
= NULL
;
1636 CloogLoop
**next_res
= &res
;
1637 CloogDomain
*domain
;
1641 can_unroll
= cloog_domain_can_unroll(loop
->domain
, level
, &n
, &lb
);
1649 for (cloog_int_set_si(i
, 0); cloog_int_lt(i
, n
); cloog_int_add_ui(i
, i
, 1)) {
1650 domain
= cloog_domain_copy(loop
->domain
);
1651 domain
= cloog_domain_fixed_offset(domain
, level
, lb
, i
);
1652 inner
= cloog_loop_copy(loop
->inner
);
1653 inner
= cloog_loop_restrict_all(inner
, domain
);
1655 cloog_domain_free(domain
);
1658 *next_res
= cloog_loop_alloc(loop
->state
, domain
, 1, NULL
, NULL
,
1660 next_res
= &(*next_res
)->next
;
1665 cloog_constraint_release(lb
);
1667 cloog_loop_free(loop
);
1673 /* Unroll all loops in the given list at the given level, provided
1674 * they can be unrolled.
1676 CloogLoop
*cloog_loop_unroll(CloogLoop
*loop
, int level
)
1678 CloogLoop
*now
, *next
;
1679 CloogLoop
*res
= NULL
;
1680 CloogLoop
**next_res
= &res
;
1682 for (now
= loop
; now
; now
= next
) {
1686 *next_res
= loop_unroll(now
, level
);
1689 next_res
= &(*next_res
)->next
;
1695 CloogLoop
*cloog_loop_generate_restricted_or_stop(CloogLoop
*loop
,
1696 CloogDomain
*context
,
1697 int level
, int scalar
, int *scaldims
, int nb_scattdims
,
1698 CloogOptions
*options
);
1700 CloogLoop
*cloog_loop_recurse(CloogLoop
*loop
,
1701 int level
, int scalar
, int *scaldims
, int nb_scattdims
,
1702 int constant
, CloogOptions
*options
);
1706 * Recurse on the inner loops of the given single loop.
1708 * - loop is the loop for which we have to generate scanning code,
1709 * - level is the current non-scalar dimension,
1710 * - scalar is the current scalar dimension,
1711 * - scaldims is the boolean array saying whether a dimension is scalar or not,
1712 * - nb_scattdims is the size of the scaldims array,
1713 * - constant is true if the loop is known to be executed at most once
1714 * - options are the general code generation options.
1716 static CloogLoop
*loop_recurse(CloogLoop
*loop
,
1717 int level
, int scalar
, int *scaldims
, int nb_scattdims
,
1718 int constant
, CloogOptions
*options
)
1720 CloogLoop
*inner
, *into
, *end
, *next
, *l
, *now
;
1721 CloogDomain
*domain
;
1723 if (level
&& options
->strides
&& !constant
)
1724 cloog_loop_stride(loop
, level
);
1727 options
->first_unroll
>= 0 && level
+ scalar
>= options
->first_unroll
) {
1728 loop
= cloog_loop_unroll(loop
, level
);
1730 return cloog_loop_recurse(loop
, level
, scalar
, scaldims
,
1731 nb_scattdims
, 1, options
);
1734 if (level
&& options
->otl
)
1735 cloog_loop_otl(loop
, level
);
1736 inner
= loop
->inner
;
1737 domain
= cloog_domain_copy(loop
->domain
);
1738 domain
= cloog_domain_add_stride_constraint(domain
, loop
->stride
);
1740 while (inner
!= NULL
)
1741 { /* 4b. -ced- recurse for each sub-list of non terminal loops. */
1742 if (cloog_loop_more(inner
, level
+ 1, scalar
, nb_scattdims
)) {
1744 while ((end
->next
!= NULL
) &&
1745 cloog_loop_more(end
->next
, level
+ 1, scalar
, nb_scattdims
))
1751 l
= cloog_loop_generate_restricted_or_stop(inner
, domain
,
1752 level
+ 1, scalar
, scaldims
, nb_scattdims
, options
);
1755 cloog_loop_add_list(&into
,&now
,l
) ;
1760 { cloog_loop_add(&into
,&now
,inner
) ;
1761 inner
= inner
->next
;
1765 cloog_domain_free(domain
);
1772 * Recurse on the inner loops of each of the loops in the loop list.
1774 * - loop is the loop list for which we have to generate scanning code,
1775 * - level is the current non-scalar dimension,
1776 * - scalar is the current scalar dimension,
1777 * - scaldims is the boolean array saying whether a dimension is scalar or not,
1778 * - nb_scattdims is the size of the scaldims array,
1779 * - constant is true if the loop is known to be executed at most once
1780 * - options are the general code generation options.
1782 CloogLoop
*cloog_loop_recurse(CloogLoop
*loop
,
1783 int level
, int scalar
, int *scaldims
, int nb_scattdims
,
1784 int constant
, CloogOptions
*options
)
1786 CloogLoop
*now
, *next
;
1787 CloogLoop
*res
= NULL
;
1788 CloogLoop
**next_res
= &res
;
1790 for (now
= loop
; now
; now
= next
) {
1794 *next_res
= loop_recurse(now
, level
, scalar
, scaldims
, nb_scattdims
,
1798 next_res
= &(*next_res
)->next
;
1805 /* Get the max across all 'first' depths for statements in this
1806 * stmt list, and the max across all 'last' depths */
1807 void cloog_statement_get_fl(CloogStatement
*s
, int *f
, int *l
,
1808 CloogOptions
*options
)
1810 if (s
== NULL
) return;
1814 if (options
->fs
!= NULL
&& options
->ls
!= NULL
) {
1815 fs
= options
->fs
[s
->number
-1];
1816 ls
= options
->ls
[s
->number
-1];
1817 *f
= (fs
> *f
)? fs
: *f
;
1818 *l
= (ls
> *l
)? ls
: *l
;
1824 cloog_statement_get_fl(s
->next
, f
, l
, options
);
1827 /* Get the max across all 'first' depths for statements under
1828 * this loop, and the max across all 'last' depths */
1829 void cloog_loop_get_fl(CloogLoop
*loop
, int *f
, int *l
,
1830 CloogOptions
*options
)
1832 if (loop
== NULL
) return;
1834 CloogBlock
*block
= loop
->block
;
1836 if (block
!= NULL
&& block
->statement
!= NULL
) {
1837 cloog_statement_get_fl(block
->statement
, f
, l
, options
);
1840 cloog_loop_get_fl(loop
->inner
, f
, l
, options
);
1841 cloog_loop_get_fl(loop
->next
, f
, l
, options
);
1845 * cloog_loop_generate_general function:
1846 * Adaptation from LoopGen 0.4 by F. Quillere. This function implements the
1847 * Quillere algorithm for polyhedron scanning from step 3 to 5.
1848 * (see the Quillere paper).
1849 * - loop is the loop for which we have to generate a scanning code,
1850 * - level is the current non-scalar dimension,
1851 * - scalar is the current scalar dimension,
1852 * - scaldims is the boolean array saying whether a dimension is scalar or not,
1853 * - nb_scattdims is the size of the scaldims array,
1854 * - options are the general code generation options.
1856 * - October 26th 2001: first version.
1857 * - July 3rd->11th 2003: memory leaks hunt and correction.
1858 * - June 22nd 2005: Adaptation for GMP.
1859 * - September 2nd 2005: The function have been cutted out in two pieces:
1860 * cloog_loop_generate and this one, in order to handle
1861 * the scalar dimension case more efficiently with
1862 * cloog_loop_generate_scalar.
1863 * - November 15th 2005: (debug) the result of the cloog_loop_generate call may
1864 * be a list of polyhedra (especially if stop option is
1865 * used): cloog_loop_add_list instead of cloog_loop_add.
1866 * - May 31, 2012: statement-wise first and last depth for loop separation
1867 * if options->fs and option->ls arrays are set, it will override what has
1868 * been supplied via "-f/-l"
1871 CloogLoop
*cloog_loop_generate_general(CloogLoop
*loop
,
1872 int level
, int scalar
, int *scaldims
, int nb_scattdims
,
1873 CloogOptions
*options
)
1875 CloogLoop
*res
, *now
, *temp
, *l
, *new_loop
, *next
;
1884 /* Get the -f and -l for each statement */
1885 cloog_loop_get_fl(loop
, &first
, &last
, options
);
1887 /* If stmt-wise options are not set or set inconsistently, use -f/-l ones globally */
1888 if (first
<= 0 || last
< first
) {
1893 /* 3. Separate all projections into disjoint polyhedra. */
1894 if (level
> 0 && cloog_loop_is_constant(loop
, level
)) {
1895 res
= cloog_loop_constant(loop
, level
);
1897 }else if ((first
> level
+scalar
) || (first
< 0)) {
1898 res
= cloog_loop_merge(loop
, level
, options
);
1900 res
= cloog_loop_separate(loop
);
1904 /* 3b. -correction- sort the loops to determine their textual order. */
1905 res
= cloog_loop_sort(res
, level
);
1907 res
= cloog_loop_restrict_inner(res
);
1910 res
= cloog_loop_specialize(res
, level
, scalar
, scaldims
, nb_scattdims
);
1912 /* 4. Recurse for each loop with the current domain as context. */
1915 if (!level
|| (level
+scalar
< last
) || (last
< 0))
1916 res
= cloog_loop_recurse(temp
, level
, scalar
, scaldims
, nb_scattdims
,
1919 while (temp
!= NULL
)
1920 { next
= temp
->next
;
1921 l
= cloog_loop_nest(temp
->inner
, temp
->domain
, level
+1);
1922 new_loop
= cloog_loop_alloc(temp
->state
, temp
->domain
, 0, NULL
,
1924 temp
->inner
= NULL
;
1926 cloog_loop_free_parts(temp
,0,0,0,0) ;
1927 cloog_loop_add(&res
,&now
,new_loop
) ;
1931 if (options
->strides
)
1932 res
= cloog_loop_propagate_lower_bound(res
, level
);
1934 /* 5. eliminate unused iterations of the current level for the new one. See
1935 * the example called linearity-1-1 example with and without this part
1938 if (options
->backtrack
&& level
&&
1939 ((level
+scalar
< last
) || (last
< 0)) &&
1940 ((first
<= level
+scalar
) && !(first
< 0)))
1941 res
= cloog_loop_generate_backtrack(res
, level
, options
);
1943 /* Pray for my new paper to be accepted somewhere since the following stuff
1944 * is really amazing :-) !
1945 * Far long later: The paper has been accepted to PACT 2004 :-))). But there
1946 * are still some bugs and I have no time to fix them. Thus now you have to
1947 * pray for me to get an academic position for that really amazing stuff :-) !
1948 * Later again: OK, I get my academic position, but still I have not enough
1949 * time to fix and clean this part... Pray again :-) !!!
1951 /* res = cloog_loop_unisolate(res,level) ;*/
1957 CloogLoop
*cloog_loop_generate_restricted(CloogLoop
*loop
,
1958 int level
, int scalar
, int *scaldims
, int nb_scattdims
,
1959 CloogOptions
*options
);
1963 * cloog_loop_generate_scalar function:
1964 * This function applies the simplified code generation scheme in the trivial
1965 * case of scalar dimensions. When dealing with scalar dimensions, there is
1966 * no need of costly polyhedral operations for separation or sorting: sorting
1967 * is a question of comparing scalar vectors and separation amounts to consider
1968 * only loops with the same scalar vector for the next step of the code
1969 * generation process. This function achieves the separation/sorting process
1970 * for the vector of scalar dimension that begins at dimension 'level+scalar'
1971 * and finish to the first non-scalar dimension.
1972 * - loop is the loop for which we have to generate a scanning code,
1973 * - level is the current non-scalar dimension,
1974 * - scalar is the current scalar dimension,
1975 * - scaldims is the boolean array saying whether a dimension is scalar or not,
1976 * - nb_scattdims is the size of the scaldims array,
1977 * - options are the general code generation options.
1979 * - September 2nd 2005: First version.
1981 CloogLoop
*cloog_loop_generate_scalar(CloogLoop
*loop
,
1982 int level
, int scalar
, int *scaldims
, int nb_scattdims
,
1983 CloogOptions
*options
)
1984 { CloogLoop
* res
, * now
, * temp
, * l
, * end
, * next
, * ref
;
1987 /* We sort the loop list with respect to the current scalar vector. */
1988 res
= cloog_loop_scalar_sort(loop
,level
,scaldims
,nb_scattdims
,scalar
) ;
1990 scalar_new
= scalar
+ scaldims
[level
+ scalar
- 1];
1994 while (temp
!= NULL
)
1995 { /* Then we will appy the general code generation process to each sub-list
1996 * of loops with the same scalar vector.
2001 while((end
->next
!= NULL
) &&
2002 cloog_loop_more(end
->next
, level
, scalar_new
, nb_scattdims
) &&
2003 cloog_loop_scalar_eq(ref
,end
->next
,level
,scaldims
,nb_scattdims
,scalar
))
2009 /* For the next dimension, scalar value is updated by adding the scalar
2010 * vector size, which is stored at scaldims[level+scalar-1].
2012 if (cloog_loop_more(temp
, level
, scalar_new
, nb_scattdims
)) {
2013 l
= cloog_loop_generate_restricted(temp
, level
, scalar_new
,
2014 scaldims
, nb_scattdims
, options
);
2017 cloog_loop_add_list(&res
, &now
, l
);
2019 cloog_loop_add(&res
, &now
, temp
);
2028 /* Compare loop with the next loop based on their constant dimensions.
2029 * The result is < 0, == 0 or > 0 depending on whether the constant
2030 * dimensions of loop are lexicographically smaller, equal or greater
2031 * than those of loop->next.
2032 * If loop is the last in the list, then it is assumed to be smaller
2033 * than the "next" one.
2035 static int cloog_loop_next_scal_cmp(CloogLoop
*loop
)
2043 nb_scaldims
= loop
->block
->nb_scaldims
;
2044 if (loop
->next
->block
->nb_scaldims
< nb_scaldims
)
2045 nb_scaldims
= loop
->next
->block
->nb_scaldims
;
2047 for (i
= 0; i
< nb_scaldims
; ++i
) {
2048 int cmp
= cloog_int_cmp(loop
->block
->scaldims
[i
],
2049 loop
->next
->block
->scaldims
[i
]);
2053 return loop
->block
->nb_scaldims
- loop
->next
->block
->nb_scaldims
;
2057 /* Check whether the globally constant dimensions of a and b
2058 * have the same value for all globally constant dimensions
2059 * that are situated before any (locally) non-constant dimension.
2061 static int cloog_loop_equal_prefix(CloogLoop
*a
, CloogLoop
*b
,
2062 int *scaldims
, int nb_scattdims
)
2068 for (i
= 0; i
< nb_scattdims
; ++i
) {
2073 if (!cloog_int_eq(a
->block
->scaldims
[cst
], b
->block
->scaldims
[cst
]))
2077 for (i
= i
+ 1; i
< nb_scattdims
; ++i
) {
2080 if (!cloog_domain_lazy_isconstant(a
->domain
, dim
, NULL
))
2082 /* No need to check that dim is also constant in b and that the
2083 * constant values are equal. That will happen during the check
2084 * whether the two domains are equal.
2092 /* Try to block adjacent loops in the loop list "loop".
2093 * We only attempt blocking if the constant dimensions of the loops
2094 * in the least are (not necessarily strictly) increasing.
2095 * Then we look for a sublist such that the first (begin) has constant
2096 * dimensions strictly larger than the previous loop in the complete
2097 * list and such that the loop (end) after the last loop in the sublist
2098 * has constant dimensions strictly larger than the last loop in the sublist.
2099 * Furthermore, all loops in the sublist should have the same domain
2100 * (with globally constant dimensions removed) and the difference
2101 * (if any) in constant dimensions may only occur after all the
2102 * (locally) constant dimensions.
2103 * If we find such a sublist, then the blocks of all but the first
2104 * are merged into the block of the first.
2106 * Note that this function can only be called before the global
2107 * blocklist has been created because it may otherwise modify and destroy
2108 * elements on that list.
2110 CloogLoop
*cloog_loop_block(CloogLoop
*loop
, int *scaldims
, int nb_scattdims
)
2112 CloogLoop
*begin
, *end
, *l
;
2113 int begin_after_previous
;
2114 int end_after_previous
;
2118 for (begin
= loop
; begin
; begin
= begin
->next
) {
2119 if (!begin
->block
|| !begin
->block
->scaldims
)
2121 if (cloog_loop_next_scal_cmp(begin
) > 0)
2125 begin_after_previous
= 1;
2126 for (begin
= loop
; begin
; begin
= begin
->next
) {
2127 if (!begin_after_previous
) {
2128 begin_after_previous
= cloog_loop_next_scal_cmp(begin
) < 0;
2132 end_after_previous
= cloog_loop_next_scal_cmp(begin
) < 0;
2133 for (end
= begin
->next
; end
; end
= end
->next
) {
2134 if (!cloog_loop_equal_prefix(begin
, end
, scaldims
, nb_scattdims
))
2136 if (!cloog_domain_lazy_equal(begin
->domain
, end
->domain
))
2138 end_after_previous
= cloog_loop_next_scal_cmp(end
) < 0;
2140 if (end
!= begin
->next
&& end_after_previous
) {
2141 for (l
= begin
->next
; l
!= end
; l
= begin
->next
) {
2142 cloog_block_merge(begin
->block
, l
->block
);
2143 begin
->next
= l
->next
;
2144 cloog_loop_free_parts(l
, 1, 0, 1, 0);
2148 begin_after_previous
= cloog_loop_next_scal_cmp(begin
) < 0;
2156 * Check whether for any fixed iteration of the outer loops,
2157 * there is an iteration of loop1 that is lexicographically greater
2158 * than an iteration of loop2.
2159 * Return 1 if there exists (or may exist) such a pair.
2160 * Return 0 if all iterations of loop1 are lexicographically smaller
2161 * than the iterations of loop2.
2162 * If no iteration is lexicographically greater, but if there are
2163 * iterations that are equal to iterations of loop2, then return "def".
2164 * This is useful for ensuring that such statements are not reordered.
2165 * Some users, including the test_run target in test, expect
2166 * the statements at a given point to be run in the original order.
2167 * Passing the value "0" for "def" would allow such statements to be reordered
2168 * and would allow for the detection of more components.
2170 int cloog_loop_follows(CloogLoop
*loop1
, CloogLoop
*loop2
,
2171 int level
, int scalar
, int *scaldims
, int nb_scattdims
, int def
)
2175 dim1
= cloog_domain_dimension(loop1
->domain
);
2176 dim2
= cloog_domain_dimension(loop2
->domain
);
2177 while ((level
<= dim1
&& level
<= dim2
) ||
2178 level_is_constant(level
, scalar
, scaldims
, nb_scattdims
)) {
2179 if (level_is_constant(level
, scalar
, scaldims
, nb_scattdims
)) {
2180 int cmp
= cloog_loop_constant_cmp(loop1
, loop2
, level
, scaldims
,
2181 nb_scattdims
, scalar
);
2186 scalar
+= scaldims
[level
+ scalar
- 1];
2188 int follows
= cloog_domain_follows(loop1
->domain
, loop2
->domain
,
2202 /* Structure for representing the nodes in the graph being traversed
2203 * using Tarjan's algorithm.
2204 * index represents the order in which nodes are visited.
2205 * min_index is the index of the root of a (sub)component.
2206 * on_stack indicates whether the node is currently on the stack.
2208 struct cloog_loop_sort_node
{
2213 /* Structure for representing the graph being traversed
2214 * using Tarjan's algorithm.
2215 * len is the number of nodes
2216 * node is an array of nodes
2217 * stack contains the nodes on the path from the root to the current node
2218 * sp is the stack pointer
2219 * index is the index of the last node visited
2220 * order contains the elements of the components separated by -1
2221 * op represents the current position in order
2223 struct cloog_loop_sort
{
2225 struct cloog_loop_sort_node
*node
;
2233 /* Allocate and initialize cloog_loop_sort structure.
2235 static struct cloog_loop_sort
*cloog_loop_sort_alloc(int len
)
2237 struct cloog_loop_sort
*s
;
2240 s
= (struct cloog_loop_sort
*)malloc(sizeof(struct cloog_loop_sort
));
2243 s
->node
= (struct cloog_loop_sort_node
*)
2244 malloc(len
* sizeof(struct cloog_loop_sort_node
));
2246 for (i
= 0; i
< len
; ++i
)
2247 s
->node
[i
].index
= -1;
2248 s
->stack
= (int *)malloc(len
* sizeof(int));
2250 s
->order
= (int *)malloc(2 * len
* sizeof(int));
2260 /* Free cloog_loop_sort structure.
2262 static void cloog_loop_sort_free(struct cloog_loop_sort
*s
)
2271 /* Check whether for any fixed iteration of the outer loops,
2272 * there is an iteration of loop1 that is lexicographically greater
2273 * than an iteration of loop2, where the iteration domains are
2274 * available in the inner loops of the arguments.
2276 * By using this functions to detect components, we ensure that
2277 * two CloogLoops appear in the same component if some iterations of
2278 * each loop should be executed before some iterations of the other loop.
2279 * Since we also want two CloogLoops that have exactly the same
2280 * iteration domain at the current level to be placed in the same component,
2281 * we first check if these domains are indeed the same.
2283 static int inner_loop_follows(CloogLoop
*loop1
, CloogLoop
*loop2
,
2284 int level
, int scalar
, int *scaldims
, int nb_scattdims
, int def
)
2288 f
= cloog_domain_lazy_equal(loop1
->domain
, loop2
->domain
);
2290 f
= cloog_loop_follows(loop1
->inner
, loop2
->inner
,
2291 level
, scalar
, scaldims
, nb_scattdims
, def
);
2297 /* Perform Tarjan's algorithm for computing the strongly connected components
2298 * in the graph with the individual CloogLoops as vertices.
2299 * Two CloopLoops appear in the same component if they both (indirectly)
2300 * "follow" each other, where the following relation is determined
2301 * by the follows function.
2303 static void cloog_loop_components_tarjan(struct cloog_loop_sort
*s
,
2304 CloogLoop
**loop_array
, int i
, int level
, int scalar
, int *scaldims
,
2306 int (*follows
)(CloogLoop
*loop1
, CloogLoop
*loop2
,
2307 int level
, int scalar
, int *scaldims
, int nb_scattdims
, int def
))
2311 s
->node
[i
].index
= s
->index
;
2312 s
->node
[i
].min_index
= s
->index
;
2313 s
->node
[i
].on_stack
= 1;
2315 s
->stack
[s
->sp
++] = i
;
2317 for (j
= s
->len
- 1; j
>= 0; --j
) {
2322 if (s
->node
[j
].index
>= 0 &&
2323 (!s
->node
[j
].on_stack
||
2324 s
->node
[j
].index
> s
->node
[i
].min_index
))
2327 f
= follows(loop_array
[i
], loop_array
[j
],
2328 level
, scalar
, scaldims
, nb_scattdims
, i
> j
);
2332 if (s
->node
[j
].index
< 0) {
2333 cloog_loop_components_tarjan(s
, loop_array
, j
, level
, scalar
,
2334 scaldims
, nb_scattdims
, follows
);
2335 if (s
->node
[j
].min_index
< s
->node
[i
].min_index
)
2336 s
->node
[i
].min_index
= s
->node
[j
].min_index
;
2337 } else if (s
->node
[j
].index
< s
->node
[i
].min_index
)
2338 s
->node
[i
].min_index
= s
->node
[j
].index
;
2341 if (s
->node
[i
].index
!= s
->node
[i
].min_index
)
2345 j
= s
->stack
[--s
->sp
];
2346 s
->node
[j
].on_stack
= 0;
2347 s
->order
[s
->op
++] = j
;
2349 s
->order
[s
->op
++] = -1;
2353 static int qsort_index_cmp(const void *p1
, const void *p2
)
2355 return *(int *)p1
- *(int *)p2
;
2358 /* Sort the elements of the component starting at list.
2359 * The list is terminated by a -1.
2361 static void sort_component(int *list
)
2365 for (len
= 0; list
[len
] != -1; ++len
)
2368 qsort(list
, len
, sizeof(int), qsort_index_cmp
);
2371 /* Given an array of indices "list" into the "loop_array" array,
2372 * terminated by -1, construct a linked list of the corresponding
2373 * entries and put the result in *res.
2374 * The value returned is the number of CloogLoops in the (linked) list
2376 static int extract_component(CloogLoop
**loop_array
, int *list
, CloogLoop
**res
)
2380 sort_component(list
);
2381 while (list
[i
] != -1) {
2382 *res
= loop_array
[list
[i
]];
2383 res
= &(*res
)->next
;
2393 * Call cloog_loop_generate_scalar or cloog_loop_generate_general
2394 * on each of the strongly connected components in the list of CloogLoops
2395 * pointed to by "loop".
2397 * We use Tarjan's algorithm to find the strongly connected components.
2398 * Note that this algorithm also topologically sorts the components.
2400 * The components are treated separately to avoid spurious separations.
2401 * The concatentation of the results may contain successive loops
2402 * with the same bounds, so we try to combine such loops.
2404 CloogLoop
*cloog_loop_generate_components(CloogLoop
*loop
,
2405 int level
, int scalar
, int *scaldims
, int nb_scattdims
,
2406 CloogOptions
*options
)
2410 CloogLoop
*res
, **res_next
;
2411 CloogLoop
**loop_array
;
2412 struct cloog_loop_sort
*s
;
2414 if (level
== 0 || !loop
->next
)
2415 return cloog_loop_generate_general(loop
, level
, scalar
,
2416 scaldims
, nb_scattdims
, options
);
2418 nb_loops
= cloog_loop_count(loop
);
2420 loop_array
= (CloogLoop
**)malloc(nb_loops
* sizeof(CloogLoop
*));
2423 for (i
= 0, tmp
= loop
; i
< nb_loops
; i
++, tmp
= tmp
->next
)
2424 loop_array
[i
] = tmp
;
2426 s
= cloog_loop_sort_alloc(nb_loops
);
2427 for (i
= nb_loops
- 1; i
>= 0; --i
) {
2428 if (s
->node
[i
].index
>= 0)
2430 cloog_loop_components_tarjan(s
, loop_array
, i
, level
, scalar
, scaldims
,
2431 nb_scattdims
, &inner_loop_follows
);
2438 int n
= extract_component(loop_array
, &s
->order
[i
], &tmp
);
2441 *res_next
= cloog_loop_generate_general(tmp
, level
, scalar
,
2442 scaldims
, nb_scattdims
, options
);
2444 res_next
= &(*res_next
)->next
;
2447 cloog_loop_sort_free(s
);
2451 res
= cloog_loop_combine(res
);
2457 /* For each loop in the list "loop", decompose the list of
2458 * inner loops into strongly connected components and put
2459 * the components into separate loops at the top level.
2461 CloogLoop
*cloog_loop_decompose_inner(CloogLoop
*loop
,
2462 int level
, int scalar
, int *scaldims
, int nb_scattdims
)
2465 CloogLoop
**loop_array
;
2466 int i
, n_loops
, max_loops
= 0;
2467 struct cloog_loop_sort
*s
;
2469 for (l
= loop
; l
; l
= l
->next
) {
2470 n_loops
= cloog_loop_count(l
->inner
);
2471 if (max_loops
< n_loops
)
2472 max_loops
= n_loops
;
2478 loop_array
= (CloogLoop
**)malloc(max_loops
* sizeof(CloogLoop
*));
2481 for (l
= loop
; l
; l
= l
->next
) {
2484 for (i
= 0, tmp
= l
->inner
; tmp
; i
++, tmp
= tmp
->next
)
2485 loop_array
[i
] = tmp
;
2490 s
= cloog_loop_sort_alloc(n_loops
);
2491 for (i
= n_loops
- 1; i
>= 0; --i
) {
2492 if (s
->node
[i
].index
>= 0)
2494 cloog_loop_components_tarjan(s
, loop_array
, i
, level
, scalar
,
2495 scaldims
, nb_scattdims
, &cloog_loop_follows
);
2498 n
= extract_component(loop_array
, s
->order
, &l
->inner
);
2504 n
= extract_component(loop_array
, &s
->order
[i
], &inner
);
2507 tmp
= cloog_loop_alloc(l
->state
, cloog_domain_copy(l
->domain
),
2508 l
->otl
, l
->stride
, l
->block
, inner
, l
->next
);
2513 cloog_loop_sort_free(s
);
2522 CloogLoop
*cloog_loop_generate_restricted(CloogLoop
*loop
,
2523 int level
, int scalar
, int *scaldims
, int nb_scattdims
,
2524 CloogOptions
*options
)
2526 /* To save both time and memory, we switch here depending on whether the
2527 * current dimension is scalar (simplified processing) or not (general
2530 if (level_is_constant(level
, scalar
, scaldims
, nb_scattdims
))
2531 return cloog_loop_generate_scalar(loop
, level
, scalar
,
2532 scaldims
, nb_scattdims
, options
);
2534 * 2. Compute the projection of each polyhedron onto the outermost
2535 * loop variable and the parameters.
2537 loop
= cloog_loop_project_all(loop
, level
);
2539 return cloog_loop_generate_components(loop
, level
, scalar
, scaldims
,
2540 nb_scattdims
, options
);
2544 CloogLoop
*cloog_loop_generate_restricted_or_stop(CloogLoop
*loop
,
2545 CloogDomain
*context
,
2546 int level
, int scalar
, int *scaldims
, int nb_scattdims
,
2547 CloogOptions
*options
)
2549 /* If the user asked to stop code generation at this level, let's stop. */
2550 if ((options
->stop
>= 0) && (level
+scalar
>= options
->stop
+1))
2551 return cloog_loop_stop(loop
,context
) ;
2553 return cloog_loop_generate_restricted(loop
, level
, scalar
, scaldims
,
2554 nb_scattdims
, options
);
2559 * cloog_loop_generate function:
2560 * Adaptation from LoopGen 0.4 by F. Quillere. This function implements the
2561 * Quillere algorithm for polyhedron scanning from step 1 to 2.
2562 * (see the Quillere paper).
2563 * - loop is the loop for which we have to generate a scanning code,
2564 * - context is the context of the current loop (constraints on parameter and/or
2565 * on outer loop counters),
2566 * - level is the current non-scalar dimension,
2567 * - scalar is the current scalar dimension,
2568 * - scaldims is the boolean array saying whether a dimension is scalar or not,
2569 * - nb_scattdims is the size of the scaldims array,
2570 * - options are the general code generation options.
2572 * - October 26th 2001: first version.
2573 * - July 3rd->11th 2003: memory leaks hunt and correction.
2574 * - June 15th 2005: a memory leak fixed (loop was not entirely freed when
2575 * the result of cloog_loop_restrict was NULL).
2576 * - June 22nd 2005: Adaptation for GMP.
2577 * - September 2nd 2005: The function have been cutted out in two pieces:
2578 * cloog_loop_generate and this one, in order to handle
2579 * the scalar dimension case more efficiently with
2580 * cloog_loop_generate_scalar.
2581 * - November 15th 2005: (debug) Condition for stop option no more take care of
2582 * further scalar dimensions.
2584 CloogLoop
*cloog_loop_generate(CloogLoop
*loop
, CloogDomain
*context
,
2585 int level
, int scalar
, int *scaldims
, int nb_scattdims
,
2586 CloogOptions
*options
)
2588 /* 1. Replace each polyhedron by its intersection with the context.
2590 loop
= cloog_loop_restrict_all(loop
, context
);
2594 return cloog_loop_generate_restricted_or_stop(loop
, context
,
2595 level
, scalar
, scaldims
, nb_scattdims
, options
);
2600 * Internal function for simplifying a single loop in a list of loops.
2601 * See cloog_loop_simplify.
2603 static CloogLoop
*loop_simplify(CloogLoop
*loop
, CloogDomain
*context
,
2604 int level
, int nb_scattdims
, CloogOptions
*options
)
2607 CloogBlock
* new_block
;
2608 CloogLoop
*simplified
, *inner
;
2609 CloogDomain
* domain
, * simp
, * inter
, * extended_context
;
2611 domain
= loop
->domain
;
2613 domain_dim
= cloog_domain_dimension(domain
);
2614 extended_context
= cloog_domain_extend(context
, domain_dim
);
2615 inter
= cloog_domain_intersection(domain
,extended_context
) ;
2616 simp
= cloog_domain_simplify(domain
, extended_context
);
2617 cloog_domain_free(extended_context
) ;
2619 /* If the constraint system is never true, go to the next one. */
2620 if (cloog_domain_never_integral(simp
)) {
2621 cloog_loop_free(loop
->inner
);
2622 cloog_domain_free(inter
);
2623 cloog_domain_free(simp
);
2627 inner
= cloog_loop_simplify(loop
->inner
, inter
, level
+1, nb_scattdims
,
2630 if ((inner
== NULL
) && (loop
->block
== NULL
)) {
2631 cloog_domain_free(inter
);
2632 cloog_domain_free(simp
);
2636 new_block
= cloog_block_copy(loop
->block
) ;
2638 simplified
= cloog_loop_alloc(loop
->state
, simp
, loop
->otl
, loop
->stride
,
2639 new_block
, inner
, NULL
);
2641 if (options
->save_domains
) {
2642 inter
= cloog_domain_add_stride_constraint(inter
, loop
->stride
);
2643 if (domain_dim
> nb_scattdims
) {
2645 inter
= cloog_domain_project(t
= inter
, nb_scattdims
);
2646 cloog_domain_free(t
);
2648 simplified
->unsimplified
= inter
;
2650 cloog_domain_free(inter
);
2652 return(simplified
) ;
2657 * cloog_loop_simplify function:
2658 * This function implements the part 6. of the Quillere algorithm, it
2659 * recursively simplifies each loop in the context of the preceding loop domain.
2660 * It returns a pointer to the simplified loop list.
2661 * The cloog_domain_simplify (DomainSimplify) behaviour is really bad with
2662 * polyhedra union and some really awful sidesteppings were written, I plan
2664 * - October 31th 2001: first version.
2665 * - July 3rd->11th 2003: memory leaks hunt and correction.
2666 * - April 16th 2005: a memory leak fixed (extended_context was not freed).
2667 * - June 15th 2005: a memory leak fixed (loop was not conveniently freed
2668 * when the constraint system is never true).
2669 * - October 27th 2005: - this function called before cloog_loop_fast_simplify
2670 * is now the official cloog_loop_simplify function in
2671 * replacement of a slower and more complex one (after
2672 * deep changes in the pretty printer).
2673 * - we use cloog_loop_disjoint to fix the problem when
2674 * simplifying gives a union of polyhedra (before, it
2675 * was under the responsibility of the pretty printer).
2677 CloogLoop
*cloog_loop_simplify(CloogLoop
*loop
, CloogDomain
*context
, int level
,
2678 int nb_scattdims
, CloogOptions
*options
)
2681 CloogLoop
*res
= NULL
;
2682 CloogLoop
**next
= &res
;
2685 for (now
= loop
; now
; now
= now
->next
)
2686 if (!cloog_domain_isconvex(now
->domain
)) {
2687 now
->domain
= cloog_domain_simplify_union(now
->domain
);
2688 if (!cloog_domain_isconvex(now
->domain
))
2692 /* If the input of CLooG contains any union domains, then they
2693 * may not have been split yet at this point. Do so now as the
2694 * clast construction assumes there are no union domains.
2697 loop
= cloog_loop_disjoint(loop
);
2699 for (now
= loop
; now
; now
= now
->next
) {
2700 *next
= loop_simplify(now
, context
, level
, nb_scattdims
, options
);
2702 now
->inner
= NULL
; /* For loop integrity. */
2703 cloog_domain_free(now
->domain
);
2707 next
= &(*next
)->next
;
2709 cloog_loop_free(loop
);
2716 * cloog_loop_scatter function:
2717 * This function add the scattering (scheduling) informations in a loop.
2719 void cloog_loop_scatter(CloogLoop
* loop
, CloogScattering
*scatt
)
2721 loop
->domain
= cloog_domain_scatter(loop
->domain
, scatt
);