statement-wise -f/-l options for loop separation
[cloog.git] / source / loop.c
blob9592503d971138d4da0de913012686023a69e876
2 /**-------------------------------------------------------------------**
3 ** CLooG **
4 **-------------------------------------------------------------------**
5 ** loop.c **
6 **-------------------------------------------------------------------**
7 ** First version: october 26th 2001 **
8 **-------------------------------------------------------------------**/
11 /******************************************************************************
12 * CLooG : the Chunky Loop Generator (experimental) *
13 ******************************************************************************
14 * *
15 * Copyright (C) 2001-2005 Cedric Bastoul *
16 * *
17 * This library is free software; you can redistribute it and/or *
18 * modify it under the terms of the GNU Lesser General Public *
19 * License as published by the Free Software Foundation; either *
20 * version 2.1 of the License, or (at your option) any later version. *
21 * *
22 * This library is distributed in the hope that it will be useful, *
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *
25 * Lesser General Public License for more details. *
26 * *
27 * You should have received a copy of the GNU Lesser General Public *
28 * License along with this library; if not, write to the Free Software *
29 * Foundation, Inc., 51 Franklin Street, Fifth Floor, *
30 * Boston, MA 02110-1301 USA *
31 * *
32 * CLooG, the Chunky Loop Generator *
33 * Written by Cedric Bastoul, Cedric.Bastoul@inria.fr *
34 * *
35 ******************************************************************************/
36 /* CAUTION: the english used for comments is probably the worst you ever read,
37 * please feel free to correct and improve it !
40 # include <stdlib.h>
41 # include <stdio.h>
42 # include "../include/cloog/cloog.h"
44 #define ALLOC(type) (type*)malloc(sizeof(type))
47 /******************************************************************************
48 * Memory leaks hunting *
49 ******************************************************************************/
52 /**
53 * These functions and global variables are devoted to memory leaks hunting: we
54 * want to know at each moment how many CloogLoop structures had been allocated
55 * (cloog_loop_allocated) and how many had been freed (cloog_loop_freed).
56 * Each time a CloogLoog structure is allocated, a call to the function
57 * cloog_loop_leak_up() must be carried out, and respectively
58 * cloog_loop_leak_down() when a CloogLoop structure is freed. The special
59 * variable cloog_loop_max gives the maximal number of CloogLoop structures
60 * simultaneously alive (i.e. allocated and non-freed) in memory.
61 * - July 3rd->11th 2003: first version (memory leaks hunt and correction).
65 static void cloog_loop_leak_up(CloogState *state)
67 state->loop_allocated++;
68 if ((state->loop_allocated - state->loop_freed) > state->loop_max)
69 state->loop_max = state->loop_allocated - state->loop_freed;
73 static void cloog_loop_leak_down(CloogState *state)
75 state->loop_freed++;
79 /******************************************************************************
80 * Structure display function *
81 ******************************************************************************/
84 /**
85 * cloog_loop_print_structure function:
86 * Displays a loop structure in a way that trends to be understandable without
87 * falling in a deep depression or, for the lucky ones, getting a headache...
88 * Written by Olivier Chorier, Luc Marchaud, Pierre Martin and Romain Tartiere.
89 * - April 24th 2005: Initial version.
90 * - May 21rd 2005: - New parameter `F' for destination file (ie stdout),
91 * - Minor tweaks.
92 * - May 26th 2005: Memory leak hunt.
93 * - June 2nd 2005: (Ced) Integration and minor fixes.
94 * -June 22nd 2005: (Ced) Adaptation for GMP.
96 void cloog_loop_print_structure(FILE * file, CloogLoop * loop, int level)
97 { int i, j, first=1 ;
99 if (loop)
100 { /* Go to the right level. */
101 for (i=0; i<level; i++)
102 fprintf(file,"|\t") ;
104 fprintf(file,"+-- CloogLoop\n") ;
107 /* For each loop. */
108 while (loop)
109 { if (!first)
110 { /* Go to the right level. */
111 for (i=0; i<level; i++)
112 fprintf(file,"|\t") ;
114 fprintf(file,"| CloogLoop\n") ;
116 else
117 first = 0 ;
119 /* A blank line. */
120 for(j=0; j<=level+1; j++)
121 fprintf(file,"|\t") ;
122 fprintf(file,"\n") ;
124 /* Print the domain. */
125 cloog_domain_print_structure(file, loop->domain, level+1, "CloogDomain");
127 /* Print the stride. */
128 for(j=0; j<=level; j++)
129 fprintf(file,"|\t") ;
130 if (loop->stride) {
131 fprintf(file, "Stride: ");
132 cloog_int_print(file, loop->stride->stride);
133 fprintf(file, "\n");
134 fprintf(file, "Offset: ");
135 cloog_int_print(file, loop->stride->offset);
136 fprintf(file, "\n");
139 /* A blank line. */
140 for(j=0; j<=level+1; j++)
141 fprintf(file,"|\t") ;
142 fprintf(file,"\n") ;
144 /* Print the block. */
145 cloog_block_print_structure(file,loop->block,level+1) ;
147 /* A blank line. */
148 for (i=0; i<=level+1; i++)
149 fprintf(file,"|\t") ;
150 fprintf(file,"\n") ;
152 /* Print inner if any. */
153 if (loop->inner)
154 cloog_loop_print_structure(file,loop->inner,level+1) ;
156 /* And let's go for the next one. */
157 loop = loop->next ;
159 /* One more time something that is here only for a better look. */
160 if (!loop)
161 { /* Two blank lines if this is the end of the linked list. */
162 for (j=0; j<2; j++)
163 { for (i=0; i<=level; i++)
164 fprintf(file,"|\t") ;
166 fprintf(file,"\n") ;
169 else
170 { /* A special blank line if the is a next loop. */
171 for (i=0; i<=level; i++)
172 fprintf(file,"|\t") ;
173 fprintf(file,"V\n") ;
180 * cloog_loop_print function:
181 * This function prints the content of a CloogLoop structure (start) into a
182 * file (file, possibly stdout).
183 * - June 2nd 2005: Now this very old function (probably as old as CLooG) is
184 * only a frontend to cloog_loop_print_structure, with a quite
185 * better human-readable representation.
187 void cloog_loop_print(FILE * file, CloogLoop * loop)
188 { cloog_loop_print_structure(file,loop,0) ;
192 /******************************************************************************
193 * Memory deallocation function *
194 ******************************************************************************/
198 * cloog_loop_free function:
199 * This function frees the allocated memory for a CloogLoop structure (loop),
200 * and frees its inner loops and its next loops.
201 * - June 22nd 2005: Adaptation for GMP.
203 void cloog_loop_free(CloogLoop * loop)
204 { CloogLoop * next ;
206 while (loop != NULL) {
207 cloog_loop_leak_down(loop->state);
209 next = loop->next ;
210 cloog_domain_free(loop->domain) ;
211 cloog_domain_free(loop->unsimplified);
212 cloog_block_free(loop->block) ;
213 if (loop->inner != NULL)
214 cloog_loop_free(loop->inner) ;
216 cloog_stride_free(loop->stride);
217 free(loop) ;
218 loop = next ;
224 * cloog_loop_free_parts function:
225 * This function frees the allocated memory for some parts of a CloogLoop
226 * structure (loop), each other argument is a boolean having to be set to 1 if
227 * we want to free the corresponding part, 0 otherwise. This function applies
228 * the same freeing policy to its inner ans next loops recursively.
229 * - July 3rd 2003: first version.
230 * - June 22nd 2005: Adaptation for GMP.
232 void cloog_loop_free_parts(loop, domain, block, inner, next)
233 CloogLoop * loop ;
234 int domain, block, inner, next ;
235 { CloogLoop * follow ;
237 while (loop != NULL) {
238 cloog_loop_leak_down(loop->state);
239 follow = loop->next ;
241 if (domain)
242 cloog_domain_free(loop->domain) ;
244 if (block)
245 cloog_block_free(loop->block) ;
247 if ((inner) && (loop->inner != NULL))
248 cloog_loop_free_parts(loop->inner,domain,block,inner,1) ;
250 cloog_domain_free(loop->unsimplified);
251 cloog_stride_free(loop->stride);
252 free(loop) ;
253 if (next)
254 loop = follow ;
255 else
256 loop = NULL ;
261 /******************************************************************************
262 * Reading functions *
263 ******************************************************************************/
267 * Construct a CloogLoop structure from a given iteration domain
268 * and statement number.
270 CloogLoop *cloog_loop_from_domain(CloogState *state, CloogDomain *domain,
271 int number)
273 int nb_iterators;
274 CloogLoop * loop ;
275 CloogStatement * statement ;
277 /* Memory allocation and information reading for the first domain: */
278 loop = cloog_loop_malloc(state);
279 /* domain. */
280 loop->domain = domain;
281 if (loop->domain != NULL)
282 nb_iterators = cloog_domain_dimension(loop->domain);
283 else
284 nb_iterators = 0 ;
285 /* included statement block. */
286 statement = cloog_statement_alloc(state, number + 1);
287 loop->block = cloog_block_alloc(statement, 0, NULL, nb_iterators);
289 return loop ;
294 * cloog_loop_read function:
295 * This function reads loop data from a file (foo, possibly stdin) and
296 * returns a pointer to a CloogLoop structure containing the read information.
297 * This function can be used only for input file reading, when one loop is
298 * associated with one statement.
299 * - number is the statement block number carried by the loop (-1 if none).
300 * - nb_parameters is the number of parameters.
302 * - September 9th 2002: first version.
303 * - April 16th 2005: adaptation to new CloogStatement struct (with number).
304 * - June 11th 2005: adaptation to new CloogBlock structure.
305 * - June 22nd 2005: Adaptation for GMP.
307 CloogLoop *cloog_loop_read(CloogState *state,
308 FILE *foo, int number, int nb_parameters)
310 int op1, op2, op3;
311 char s[MAX_STRING];
312 CloogDomain *domain;
314 domain = cloog_domain_union_read(state, foo, nb_parameters);
316 /* To read that stupid "0 0 0" line. */
317 while (fgets(s,MAX_STRING,foo) == 0) ;
318 while ((*s=='#' || *s=='\n') || (sscanf(s," %d %d %d",&op1,&op2,&op3)<3))
319 fgets(s,MAX_STRING,foo) ;
321 return cloog_loop_from_domain(state, domain, number);
325 /******************************************************************************
326 * Processing functions *
327 ******************************************************************************/
331 * cloog_loop_malloc function:
332 * This function allocates the memory space for a CloogLoop structure and
333 * sets its fields with default values. Then it returns a pointer to the
334 * allocated space.
335 * - November 21th 2005: first version.
337 CloogLoop *cloog_loop_malloc(CloogState *state)
338 { CloogLoop * loop ;
340 /* Memory allocation for the CloogLoop structure. */
341 loop = (CloogLoop *)malloc(sizeof(CloogLoop)) ;
342 if (loop == NULL)
343 cloog_die("memory overflow.\n");
344 cloog_loop_leak_up(state);
347 /* We set the various fields with default values. */
348 loop->state = state;
349 loop->domain = NULL ;
350 loop->unsimplified = NULL;
351 loop->block = NULL ;
352 loop->usr = NULL;
353 loop->inner = NULL ;
354 loop->next = NULL ;
355 loop->otl = 0;
356 loop->stride = NULL;
358 return loop ;
363 * cloog_loop_alloc function:
364 * This function allocates the memory space for a CloogLoop structure and
365 * sets its fields with those given as input. Then it returns a pointer to the
366 * allocated space.
367 * - October 27th 2001: first version.
368 * - June 22nd 2005: Adaptation for GMP.
369 * - November 21th 2005: use of cloog_loop_malloc.
371 CloogLoop *cloog_loop_alloc(CloogState *state,
372 CloogDomain *domain, int otl, CloogStride *stride,
373 CloogBlock *block, CloogLoop *inner, CloogLoop *next)
374 { CloogLoop * loop ;
376 loop = cloog_loop_malloc(state);
378 loop->domain = domain ;
379 loop->block = block ;
380 loop->inner = inner ;
381 loop->next = next ;
382 loop->otl = otl;
383 loop->stride = cloog_stride_copy(stride);
385 return(loop) ;
390 * cloog_loop_add function:
391 * This function adds a CloogLoop structure (loop) at a given place (now) of a
392 * NULL terminated list of CloogLoop structures. The beginning of this list
393 * is (start). This function updates (now) to (loop), and updates (start) if the
394 * added element is the first one -that is when (start) is NULL-.
395 * - October 28th 2001: first version.
397 void cloog_loop_add(CloogLoop ** start, CloogLoop ** now, CloogLoop * loop)
398 { if (*start == NULL)
399 { *start = loop ;
400 *now = *start ;
402 else
403 { (*now)->next = loop ;
404 *now = (*now)->next ;
410 * cloog_loop_add function:
411 * This function adds a CloogLoop structure (loop) at a given place (now) of a
412 * NULL terminated list of CloogLoop structures. The beginning of this list
413 * is (start). This function updates (now) to the end of the loop list (loop),
414 * and updates (start) if the added element is the first one -that is when
415 * (start) is NULL-.
416 * - September 9th 2005: first version.
418 void cloog_loop_add_list(CloogLoop ** start, CloogLoop ** now, CloogLoop * loop)
419 { if (*start == NULL)
420 { *start = loop ;
421 *now = *start ;
423 else
424 { (*now)->next = loop ;
425 *now = (*now)->next ;
428 while ((*now)->next != NULL)
429 *now = (*now)->next ;
434 * cloog_loop_copy function:
435 * This function returns a copy of the CloogLoop structure given as input. In
436 * fact, there is just new allocations for the CloogLoop structures, but their
437 * contents are the same.
438 * - October 28th 2001: first version.
439 * - July 3rd->11th 2003: memory leaks hunt and correction.
441 CloogLoop * cloog_loop_copy(CloogLoop * source)
442 { CloogLoop * loop ;
443 CloogBlock * block ;
444 CloogDomain * domain ;
446 loop = NULL ;
447 if (source != NULL)
448 { domain = cloog_domain_copy(source->domain) ;
449 block = cloog_block_copy(source->block) ;
450 loop = cloog_loop_alloc(source->state, domain, source->otl,
451 source->stride, block, NULL, NULL);
452 loop->usr = source->usr;
453 loop->inner = cloog_loop_copy(source->inner) ;
454 loop->next = cloog_loop_copy(source->next) ;
456 return(loop) ;
461 * cloog_loop_add_disjoint function:
462 * This function adds some CloogLoop structures at a given place (now) of a
463 * NULL terminated list of CloogLoop structures. The beginning of this list
464 * is (start). (loop) can be an union of polyhedra, this function separates the
465 * union into a list of *disjoint* polyhedra then adds the list. This function
466 * updates (now) to the end of the list and updates (start) if first added
467 * element is the first of the principal list -that is when (start) is NULL-.
468 * (loop) can be freed by this function, basically when its domain is actually
469 * a union of polyhedra, but don't worry, all the useful data are now stored
470 * inside the list (start). We do not use PolyLib's Domain_Disjoint function,
471 * since the number of union components is often higher (thus code size too).
472 * - October 28th 2001: first version.
473 * - November 14th 2001: bug correction (this one was hard to find !).
474 * - July 3rd->11th 2003: memory leaks hunt and correction.
475 * - June 22nd 2005: Adaptation for GMP.
476 * - October 27th 2005: (debug) included blocks were not copied for new loops.
478 void cloog_loop_add_disjoint(start, now, loop)
479 CloogLoop ** start, ** now, * loop ;
481 CloogLoop * sep, * inner ;
482 CloogDomain *domain, *seen, *temp, *rest;
483 CloogBlock * block ;
485 if (cloog_domain_isconvex(loop->domain))
486 cloog_loop_add(start,now,loop) ;
487 else {
488 domain = cloog_domain_simplify_union(loop->domain);
489 loop->domain = NULL ;
491 /* We separate the first element of the rest of the union. */
492 domain = cloog_domain_cut_first(domain, &rest);
494 /* This first element is the first of the list of disjoint polyhedra. */
495 sep = cloog_loop_alloc(loop->state, domain, 0, NULL,
496 loop->block, loop->inner, NULL);
497 cloog_loop_add(start,now,sep) ;
499 seen = cloog_domain_copy(domain);
500 while (!cloog_domain_isempty(domain = rest)) {
501 temp = cloog_domain_cut_first(domain, &rest);
502 domain = cloog_domain_difference(temp, seen);
503 cloog_domain_free(temp);
505 if (cloog_domain_isempty(domain)) {
506 cloog_domain_free(domain);
507 continue;
510 /* Each new loop will have its own life, for instance we can free its
511 * inner loop and included block. Then each one must have its own copy
512 * of both 'inner' and 'block'.
514 inner = cloog_loop_copy(loop->inner) ;
515 block = cloog_block_copy(loop->block) ;
517 sep = cloog_loop_alloc(loop->state, cloog_domain_copy(domain),
518 0, NULL, block, inner, NULL);
519 /* domain can be an union too. If so: recursion. */
520 if (cloog_domain_isconvex(domain))
521 cloog_loop_add(start,now,sep) ;
522 else
523 cloog_loop_add_disjoint(start,now,sep) ;
525 if (cloog_domain_isempty(rest)) {
526 cloog_domain_free(domain);
527 break;
530 seen = cloog_domain_union(seen, domain);
532 cloog_domain_free(rest);
533 cloog_domain_free(seen);
534 cloog_loop_free_parts(loop,0,0,0,0) ;
540 * cloog_loop_disjoint function:
541 * This function returns a list of loops such that each loop with non-convex
542 * domain in the input list (loop) is separated into several loops where the
543 * domains are the components of the union of *disjoint* polyhedra equivalent
544 * to the original non-convex domain. See cloog_loop_add_disjoint comments
545 * for more details.
546 * - September 16th 2005: first version.
548 CloogLoop * cloog_loop_disjoint(CloogLoop * loop)
549 { CloogLoop *res=NULL, * now=NULL, * next ;
551 /* Because this is often the case, don't waste time ! */
552 if (loop && !loop->next && cloog_domain_isconvex(loop->domain))
553 return loop ;
555 while (loop != NULL)
556 { next = loop->next ;
557 loop->next = NULL ;
558 cloog_loop_add_disjoint(&res,&now,loop) ;
559 loop = next ;
562 return res ;
567 * cloog_loop_restrict function:
568 * This function returns the (loop) in the context of (context): it makes the
569 * intersection between the (loop) domain and the (context), then it returns
570 * a pointer to a new loop, with this intersection as domain.
572 * - October 27th 2001: first version.
573 * - June 15th 2005: a memory leak fixed (domain was not freed when empty).
574 * - June 22nd 2005: Adaptation for GMP.
576 CloogLoop *cloog_loop_restrict(CloogLoop *loop, CloogDomain *context)
577 { int new_dimension ;
578 CloogDomain * domain, * extended_context, * new_domain ;
579 CloogLoop * new_loop ;
581 domain = loop->domain ;
582 if (cloog_domain_dimension(domain) > cloog_domain_dimension(context))
584 new_dimension = cloog_domain_dimension(domain);
585 extended_context = cloog_domain_extend(context, new_dimension);
586 new_domain = cloog_domain_intersection(extended_context,loop->domain) ;
587 cloog_domain_free(extended_context) ;
589 else
590 new_domain = cloog_domain_intersection(context,loop->domain) ;
592 if (cloog_domain_isempty(new_domain))
593 { cloog_domain_free(new_domain) ;
594 return(NULL) ;
596 else {
597 new_loop = cloog_loop_alloc(loop->state, new_domain,
598 0, NULL, loop->block, loop->inner, NULL);
599 return(new_loop) ;
605 * Call cloog_loop_restrict on each loop in the list "loop" and return
606 * the concatenated result.
608 CloogLoop *cloog_loop_restrict_all(CloogLoop *loop, CloogDomain *context)
610 CloogLoop *next;
611 CloogLoop *res = NULL;
612 CloogLoop **res_next = &res;
614 for (; loop; loop = next) {
615 next = loop->next;
617 *res_next = cloog_loop_restrict(loop, context);
618 if (*res_next) {
619 res_next = &(*res_next)->next;
620 cloog_loop_free_parts(loop, 1, 0, 0, 0);
621 } else {
622 loop->next = NULL;
623 cloog_loop_free(loop);
627 return res;
632 * Restrict the domains of the inner loops of each loop l in the given
633 * list of loops to the domain of the loop l. If the domains of all
634 * inner loops of a given loop l turn out to be empty, then remove l
635 * from the list.
637 CloogLoop *cloog_loop_restrict_inner(CloogLoop *loop)
639 CloogLoop *next;
640 CloogLoop *res;
641 CloogLoop **res_next = &res;
643 for (; loop; loop = next) {
644 next = loop->next;
646 loop->inner = cloog_loop_restrict_all(loop->inner, loop->domain);
647 if (loop->inner) {
648 *res_next = loop;
649 res_next = &(*res_next)->next;
650 } else {
651 loop->next = NULL;
652 cloog_loop_free(loop);
656 *res_next = NULL;
658 return res;
662 * cloog_loop_project function:
663 * This function returns the projection of (loop) on the (level) first
664 * dimensions (outer loops). It makes the projection of the (loop) domain,
665 * then it returns a pointer to a new loop, with this projection as domain.
667 * - October 27th 2001: first version.
668 * - July 3rd->11th 2003: memory leaks hunt and correction.
669 * - June 22nd 2005: Adaptation for GMP.
671 CloogLoop * cloog_loop_project(CloogLoop * loop, int level)
673 CloogDomain * new_domain ;
674 CloogLoop * new_loop, * copy ;
676 copy = cloog_loop_alloc(loop->state, loop->domain, loop->otl, loop->stride,
677 loop->block, loop->inner, NULL);
679 if (cloog_domain_dimension(loop->domain) == level)
680 new_domain = cloog_domain_copy(loop->domain) ;
681 else
682 new_domain = cloog_domain_project(loop->domain, level);
684 new_loop = cloog_loop_alloc(loop->state, new_domain, 0, NULL,
685 NULL, copy, NULL);
687 return(new_loop) ;
692 * Call cloog_loop_project on each loop in the list "loop" and return
693 * the concatenated result.
695 CloogLoop *cloog_loop_project_all(CloogLoop *loop, int level)
697 CloogLoop *next;
698 CloogLoop *res = NULL;
699 CloogLoop **res_next = &res;
701 for (; loop; loop = next) {
702 next = loop->next;
704 *res_next = cloog_loop_project(loop, level);
705 res_next = &(*res_next)->next;
706 cloog_loop_free_parts(loop, 0, 0, 0, 0);
709 return res;
714 * cloog_loop_concat function:
715 * This function returns a pointer to the concatenation of the
716 * CloogLoop lists given as input.
717 * - October 28th 2001: first version.
719 CloogLoop * cloog_loop_concat(CloogLoop * a, CloogLoop * b)
720 { CloogLoop * loop, * temp ;
722 loop = a ;
723 temp = loop ;
724 if (loop != NULL)
725 { while (temp->next != NULL)
726 temp = temp->next ;
727 temp->next = b ;
729 else
730 loop = b ;
732 return(loop) ;
737 * cloog_loop_combine:
738 * Combine consecutive loops with identical domains into
739 * a single loop with the concatenation of their inner loops
740 * as inner loop.
742 CloogLoop *cloog_loop_combine(CloogLoop *loop)
744 CloogLoop *first, *second;
746 for (first = loop; first; first = first->next) {
747 while (first->next) {
748 if (!cloog_domain_lazy_equal(first->domain, first->next->domain))
749 break;
750 second = first->next;
751 first->inner = cloog_loop_concat(first->inner, second->inner);
752 first->next = second->next;
753 cloog_loop_free_parts(second, 1, 0, 0, 0);
757 return loop;
761 * Remove loops from list that have an empty domain.
763 CloogLoop *cloog_loop_remove_empty_domain_loops(CloogLoop *loop)
765 CloogLoop *l, *res, *next, **res_next;
767 res = NULL;
768 res_next = &res;
769 for (l = loop; l; l = next) {
770 next = l->next;
771 if (cloog_domain_isempty(l->domain))
772 cloog_loop_free_parts(l, 1, 1, 1, 0);
773 else {
774 *res_next = l;
775 res_next = &(*res_next)->next;
778 *res_next = NULL;
780 return res;
783 CloogLoop *cloog_loop_decompose_inner(CloogLoop *loop,
784 int level, int scalar, int *scaldims, int nb_scattdims);
786 /* For each loop with only one inner loop, replace the domain
787 * of the loop with the projection of the domain of the inner
788 * loop. To increase the number of loops with a single inner
789 * we first decompose the inner loops into strongly connected
790 * components.
792 CloogLoop *cloog_loop_specialize(CloogLoop *loop,
793 int level, int scalar, int *scaldims, int nb_scattdims)
795 int dim;
796 CloogDomain *domain;
797 CloogLoop *l;
799 loop = cloog_loop_decompose_inner(loop, level, scalar,
800 scaldims, nb_scattdims);
802 for (l = loop; l; l = l->next) {
803 if (l->inner->next)
804 continue;
805 if (!cloog_domain_isconvex(l->inner->domain))
806 continue;
808 dim = cloog_domain_dimension(l->domain);
809 domain = cloog_domain_project(l->inner->domain, dim);
810 if (cloog_domain_isconvex(domain)) {
811 cloog_domain_free(l->domain);
812 l->domain = domain;
813 } else {
814 cloog_domain_free(domain);
818 return cloog_loop_remove_empty_domain_loops(loop);
821 /* For each loop with only one inner loop, propagate the bounds from
822 * the inner loop domain to the outer loop domain. This is especially
823 * useful if the inner loop domain has a non-trivial stride which
824 * results in an update of the lower bound.
826 CloogLoop *cloog_loop_propagate_lower_bound(CloogLoop *loop, int level)
828 int dim;
829 CloogDomain *domain, *t;
830 CloogLoop *l;
832 for (l = loop; l; l = l->next) {
833 if (l->inner->next)
834 continue;
835 if (!cloog_domain_isconvex(l->inner->domain))
836 continue;
838 dim = cloog_domain_dimension(l->domain);
839 domain = cloog_domain_project(l->inner->domain, dim);
840 if (cloog_domain_isconvex(domain)) {
841 t = cloog_domain_intersection(domain, l->domain);
842 cloog_domain_free(l->domain);
843 l->domain = t;
845 cloog_domain_free(domain);
848 return loop;
852 * cloog_loop_separate function:
853 * This function implements the Quillere algorithm for separation of multiple
854 * loops: for a given set of polyhedra (loop), it computes a set of disjoint
855 * polyhedra such that the unions of these sets are equal, and returns this set.
856 * - October 28th 2001: first version.
857 * - November 14th 2001: elimination of some unused blocks.
858 * - August 13th 2002: (debug) in the case of union of polyhedra for one
859 * loop, redundant constraints are fired.
860 * - July 3rd->11th 2003: memory leaks hunt and correction.
861 * - June 22nd 2005: Adaptation for GMP.
862 * - October 16th 2005: Removal of the non-shared constraint elimination when
863 * there is only one loop in the list (seems to work
864 * without now, DomainSimplify may have been improved).
865 * The problem was visible with test/iftest2.cloog.
867 CloogLoop * cloog_loop_separate(CloogLoop * loop)
868 { int lazy_equal=0, disjoint = 0;
869 CloogLoop * new_loop, * new_inner, * res, * now, * temp, * Q,
870 * inner, * old /*, * previous, * next*/ ;
871 CloogDomain *UQ, *domain;
873 if (loop == NULL)
874 return NULL ;
876 loop = cloog_loop_combine(loop);
878 if (loop->next == NULL)
879 return cloog_loop_disjoint(loop) ;
881 UQ = cloog_domain_copy(loop->domain) ;
882 domain = cloog_domain_copy(loop->domain) ;
883 res = cloog_loop_alloc(loop->state, domain, 0, NULL,
884 loop->block, loop->inner, NULL);
886 old = loop ;
887 while((loop = loop->next) != NULL)
888 { temp = NULL ;
890 /* For all Q, add Q-loop associated with the blocks of Q alone,
891 * and Q inter loop associated with the blocks of Q and loop.
893 for (Q = res; Q; Q = Q->next) {
894 /* Add (Q inter loop). */
895 if ((disjoint = cloog_domain_lazy_disjoint(Q->domain,loop->domain)))
896 domain = NULL ;
897 else
898 { if ((lazy_equal = cloog_domain_lazy_equal(Q->domain,loop->domain)))
899 domain = cloog_domain_copy(Q->domain) ;
900 else
901 domain = cloog_domain_intersection(Q->domain,loop->domain) ;
903 if (!cloog_domain_isempty(domain))
904 { new_inner = cloog_loop_concat(cloog_loop_copy(Q->inner),
905 cloog_loop_copy(loop->inner)) ;
906 new_loop = cloog_loop_alloc(loop->state, domain, 0, NULL,
907 NULL, new_inner, NULL);
908 cloog_loop_add_disjoint(&temp,&now,new_loop) ;
910 else {
911 disjoint = 1;
912 cloog_domain_free(domain);
916 /* Add (Q - loop). */
917 if (disjoint)
918 domain = cloog_domain_copy(Q->domain) ;
919 else
920 { if (lazy_equal)
921 domain = cloog_domain_empty(Q->domain);
922 else
923 domain = cloog_domain_difference(Q->domain,loop->domain) ;
926 if (!cloog_domain_isempty(domain)) {
927 new_loop = cloog_loop_alloc(loop->state, domain, 0, NULL,
928 NULL, Q->inner, NULL);
929 cloog_loop_add_disjoint(&temp,&now,new_loop) ;
931 else
932 { cloog_domain_free(domain) ;
933 /* If Q->inner is no more useful, we can free it. */
934 inner = Q->inner ;
935 Q->inner = NULL ;
936 cloog_loop_free(inner) ;
940 /* Add loop-UQ associated with the blocks of loop alone.*/
941 if (cloog_domain_lazy_disjoint(loop->domain,UQ))
942 domain = cloog_domain_copy(loop->domain) ;
943 else
944 { if (cloog_domain_lazy_equal(loop->domain,UQ))
945 domain = cloog_domain_empty(UQ);
946 else
947 domain = cloog_domain_difference(loop->domain,UQ) ;
950 if (!cloog_domain_isempty(domain)) {
951 new_loop = cloog_loop_alloc(loop->state, domain, 0, NULL,
952 NULL, loop->inner, NULL);
953 cloog_loop_add_disjoint(&temp,&now,new_loop) ;
955 else
956 { cloog_domain_free(domain) ;
957 /* If loop->inner is no more useful, we can free it. */
958 cloog_loop_free(loop->inner) ;
961 loop->inner = NULL ;
963 if (loop->next != NULL)
964 UQ = cloog_domain_union(UQ, cloog_domain_copy(loop->domain));
965 else
966 cloog_domain_free(UQ);
968 cloog_loop_free_parts(res,1,0,0,1) ;
970 res = temp ;
972 cloog_loop_free_parts(old,1,0,0,1) ;
974 return(res) ;
978 static CloogDomain *bounding_domain(CloogDomain *dom, CloogOptions *options)
980 if (options->sh)
981 return cloog_domain_simple_convex(dom);
982 else
983 return cloog_domain_convex(dom);
988 * cloog_loop_merge function:
989 * This function is the 'soft' version of loop_separate if we are looking for
990 * a code much simpler (and less efficicient). This function returns the new
991 * CloogLoop list.
992 * - October 29th 2001: first version.
993 * - July 3rd->11th 2003: memory leaks hunt and correction.
994 * - June 22nd 2005: Adaptation for GMP.
996 CloogLoop *cloog_loop_merge(CloogLoop *loop, int level, CloogOptions *options)
998 CloogLoop *res, *new_inner, *old;
999 CloogDomain *new_domain, *temp;
1001 if (loop == NULL)
1002 return loop;
1004 if (loop->next == NULL && cloog_domain_isconvex(loop->domain))
1005 return loop;
1007 old = loop;
1008 temp = loop->domain;
1009 loop->domain = NULL;
1010 new_inner = loop->inner;
1012 for (loop = loop->next; loop; loop = loop->next) {
1013 temp = cloog_domain_union(temp, loop->domain);
1014 loop->domain = NULL;
1015 new_inner = cloog_loop_concat(new_inner, loop->inner);
1018 new_domain = bounding_domain(temp, options);
1020 if (level > 0 && !cloog_domain_is_bounded(new_domain, level) &&
1021 cloog_domain_is_bounded(temp, level)) {
1022 CloogDomain *splitter, *t2;
1024 cloog_domain_free(new_domain);
1025 splitter = cloog_domain_bound_splitter(temp, level);
1027 res = NULL;
1028 while (!cloog_domain_isconvex(splitter)) {
1029 CloogDomain *first, *rest;
1030 first = cloog_domain_cut_first(splitter, &rest);
1031 splitter = rest;
1032 t2 = cloog_domain_intersection(first, temp);
1033 cloog_domain_free(first);
1035 new_domain = bounding_domain(t2, options);
1036 cloog_domain_free(t2);
1038 if (cloog_domain_isempty(new_domain)) {
1039 cloog_domain_free(new_domain);
1040 continue;
1042 res = cloog_loop_alloc(old->state, new_domain, 0, NULL,
1043 NULL, cloog_loop_copy(new_inner), res);
1046 t2 = cloog_domain_intersection(splitter, temp);
1047 cloog_domain_free(splitter);
1049 new_domain = bounding_domain(t2, options);
1050 cloog_domain_free(t2);
1052 if (cloog_domain_isempty(new_domain)) {
1053 cloog_domain_free(new_domain);
1054 cloog_loop_free(new_inner);
1055 } else
1056 res = cloog_loop_alloc(old->state, new_domain, 0, NULL,
1057 NULL, new_inner, res);
1058 } else {
1059 res = cloog_loop_alloc(old->state, new_domain, 0, NULL,
1060 NULL, new_inner, NULL);
1062 cloog_domain_free(temp);
1064 cloog_loop_free_parts(old, 0, 0, 0, 1);
1066 return res;
1070 static int cloog_loop_count(CloogLoop *loop)
1072 int nb_loops;
1074 for (nb_loops = 0; loop; loop = loop->next)
1075 nb_loops++;
1077 return nb_loops;
1082 * cloog_loop_sort function:
1083 * Adaptation from LoopGen 0.4 by F. Quillere. This function sorts a list of
1084 * parameterized disjoint polyhedra, in order to not have lexicographic order
1085 * violation (see Quillere paper).
1086 * - September 16th 2005: inclusion of cloog_loop_number (October 29th 2001).
1088 CloogLoop *cloog_loop_sort(CloogLoop *loop, int level)
1090 CloogLoop *res, *now, **loop_array;
1091 CloogDomain **doms;
1092 int i, nb_loops=0, * permut ;
1094 /* There is no need to sort the parameter domains. */
1095 if (!level)
1096 return loop;
1098 /* We will need to know how many loops are in the list. */
1099 nb_loops = cloog_loop_count(loop);
1101 /* If there is only one loop, it's the end. */
1102 if (nb_loops == 1)
1103 return(loop) ;
1105 /* We have to allocate memory for some useful components:
1106 * - loop_array: the loop array,
1107 * - doms: the array of domains to sort,
1108 * - permut: will give us a possible sort (maybe not the only one).
1110 loop_array = (CloogLoop **)malloc(nb_loops*sizeof(CloogLoop *)) ;
1111 doms = (CloogDomain **)malloc(nb_loops*sizeof(CloogDomain *));
1112 permut = (int *)malloc(nb_loops*sizeof(int)) ;
1114 /* We fill up the loop and domain arrays. */
1115 for (i=0;i<nb_loops;i++,loop=loop->next)
1116 { loop_array[i] = loop ;
1117 doms[i] = loop_array[i]->domain;
1120 /* cloog_domain_sort will fill up permut. */
1121 cloog_domain_sort(doms, nb_loops, level, permut);
1123 /* With permut and loop_array we build the sorted list. */
1124 res = NULL ;
1125 for (i=0;i<nb_loops;i++)
1126 { /* To avoid pointer looping... loop_add will rebuild the list. */
1127 loop_array[permut[i]-1]->next = NULL ;
1128 cloog_loop_add(&res,&now,loop_array[permut[i]-1]) ;
1131 free(permut) ;
1132 free(doms);
1133 free(loop_array) ;
1135 return res;
1140 * cloog_loop_nest function:
1141 * This function changes the loop list in such a way that we have no more than
1142 * one dimension added by level. It returns an equivalent loop list with
1143 * this property.
1144 * - October 29th 2001: first version.
1145 * - July 3rd->11th 2003: memory leaks hunt and correction.
1146 * - June 22nd 2005: Adaptation for GMP.
1147 * - November 21th 2005: (debug) now OK when cloog_loop_restrict returns NULL.
1149 CloogLoop *cloog_loop_nest(CloogLoop *loop, CloogDomain *context, int level)
1150 { int l ;
1151 CloogLoop * p, * temp, * res, * now, * next ;
1152 CloogDomain * new_domain ;
1154 loop = cloog_loop_disjoint(loop);
1156 res = NULL ;
1157 /* Each domain is changed by its intersection with the context. */
1158 while (loop != NULL)
1159 { p = cloog_loop_restrict(loop, context);
1160 next = loop->next ;
1162 if (p != NULL)
1163 { cloog_loop_free_parts(loop,1,0,0,0) ;
1165 temp = cloog_loop_alloc(p->state, p->domain, 0, NULL,
1166 p->block, p->inner, NULL);
1168 /* If the intersection dimension is too big, we make projections smaller
1169 * and smaller, and each projection includes the preceding projection
1170 * (thus, in the target list, dimensions are added one by one).
1172 if (cloog_domain_dimension(p->domain) >= level)
1173 for (l = cloog_domain_dimension(p->domain); l >= level; l--) {
1174 new_domain = cloog_domain_project(p->domain, l);
1175 temp = cloog_loop_alloc(p->state, new_domain, 0, NULL,
1176 NULL, temp, NULL);
1179 /* p is no more useful (but its content yes !). */
1180 cloog_loop_free_parts(p,0,0,0,0) ;
1182 cloog_loop_add(&res,&now,temp) ;
1184 else
1185 cloog_loop_free_parts(loop,1,1,1,0) ;
1187 loop = next ;
1190 return(res) ;
1194 /* Check if the domains of the inner loops impose a stride constraint
1195 * on the given level.
1196 * The core of the search is implemented in cloog_domain_list_stride.
1197 * Here, we simply construct a list of domains to pass to this function
1198 * and if a stride is found, we adjust the lower bounds by calling
1199 * cloog_domain_stride_lower_bound.
1201 static int cloog_loop_variable_offset_stride(CloogLoop *loop, int level)
1203 CloogDomainList *list = NULL;
1204 CloogLoop *inner;
1205 CloogStride *stride;
1207 for (inner = loop->inner; inner; inner = inner->next) {
1208 CloogDomainList *entry = ALLOC(CloogDomainList);
1209 entry->domain = cloog_domain_copy(inner->domain);
1210 entry->next = list;
1211 list = entry;
1214 stride = cloog_domain_list_stride(list, level);
1216 cloog_domain_list_free(list);
1218 if (!stride)
1219 return 0;
1221 loop->stride = stride;
1222 loop->domain = cloog_domain_stride_lower_bound(loop->domain, level, stride);
1224 return 1;
1229 * cloog_loop_stride function:
1230 * This function will find the stride of a loop for the iterator at the column
1231 * number 'level' in the constraint matrix. It will update the lower bound of
1232 * the iterator accordingly. Basically, the function will try to find in the
1233 * inner loops a common condition on this iterator for the inner loop iterators
1234 * to be integral. For instance, let us consider a loop with the iterator i,
1235 * the iteration domain -4<=i<=n, and its two inner loops with the iterator j.
1236 * The first inner loop has the constraint 3j=i, and the second one has the
1237 * constraint 6j=i. Then the common constraint on i for j to be integral is
1238 * i%3=0, the stride for i is 3. Lastly, we have to find the new lower bound
1239 * for i: the first value satisfying the common constraint: -3. At the end, the
1240 * iteration domain for i is -3<=i<=n and the stride for i is 3.
1242 * The algorithm implemented in this function only allows for strides
1243 * on loops with a lower bound that has a constant remainder on division
1244 * by the stride. Before initiating this procedure, we first check
1245 * if we can find a stride with a lower bound with a variable offset in
1246 * cloog_loop_variable_offset_stride.
1248 * - loop is the loop including the iteration domain of the considered iterator,
1249 * - level is the column number of the iterator in the matrix of contraints.
1251 * - June 29th 2003: first version (work in progress since June 26th 2003).
1252 * - July 14th 2003: simpler version.
1253 * - June 22nd 2005: Adaptation for GMP (from S. Verdoolaege's 0.12.1 version).
1255 void cloog_loop_stride(CloogLoop * loop, int level)
1256 { int first_search ;
1257 cloog_int_t stride, ref_offset, offset, potential;
1258 CloogLoop * inner ;
1260 if (!cloog_domain_can_stride(loop->domain, level))
1261 return;
1263 if (cloog_loop_variable_offset_stride(loop, level))
1264 return;
1266 cloog_int_init(stride);
1267 cloog_int_init(ref_offset);
1268 cloog_int_init(offset);
1269 cloog_int_init(potential);
1271 cloog_int_set_si(ref_offset, 0);
1272 cloog_int_set_si(offset, 0);
1274 /* Default stride. */
1275 cloog_int_set_si(stride, 1);
1276 first_search = 1 ;
1277 inner = loop->inner ;
1279 while (inner != NULL)
1280 { /* If the minimun stride has not been found yet, find the stride. */
1281 if ((first_search) || (!cloog_int_is_one(stride)))
1283 cloog_domain_stride(inner->domain, level, &potential, &offset);
1284 if (!cloog_int_is_one(potential) && (!first_search))
1285 { /* Offsets must be the same for common stride. */
1286 cloog_int_gcd(stride, potential, stride);
1287 if (!cloog_int_is_zero(stride)) {
1288 cloog_int_fdiv_r(offset, offset, stride);
1289 cloog_int_fdiv_r(ref_offset, ref_offset, stride);
1291 if (cloog_int_ne(offset,ref_offset))
1292 cloog_int_set_si(stride, 1);
1294 else {
1295 cloog_int_set(stride, potential);
1296 cloog_int_set(ref_offset, offset);
1299 first_search = 0 ;
1302 inner = inner->next ;
1305 if (cloog_int_is_zero(stride))
1306 cloog_int_set_si(stride, 1);
1308 /* Update the values if necessary. */
1309 if (!cloog_int_is_one(stride))
1310 { /* Update the stride value. */
1311 if (!cloog_int_is_zero(offset))
1312 cloog_int_sub(offset, stride, offset);
1313 loop->stride = cloog_stride_alloc(stride, offset);
1314 loop->domain = cloog_domain_stride_lower_bound(loop->domain, level,
1315 loop->stride);
1318 cloog_int_clear(stride);
1319 cloog_int_clear(ref_offset);
1320 cloog_int_clear(offset);
1321 cloog_int_clear(potential);
1325 void cloog_loop_otl(CloogLoop *loop, int level)
1327 if (cloog_domain_is_otl(loop->domain, level))
1328 loop->otl = 1;
1333 * cloog_loop_stop function:
1334 * This function implements the 'stop' option : each domain of each loop
1335 * in the list 'loop' is replaced by 'context'. 'context' should be the
1336 * domain of the outer loop. By using this method, there are no more dimensions
1337 * to scan and the simplification step will automaticaly remove the domains
1338 * since they are the same as the corresponding contexts. The effect of this
1339 * function is to stop the code generation at the level this function is called,
1340 * the resulting code do not consider the next dimensions.
1341 * - January 11th 2005: first version.
1343 CloogLoop * cloog_loop_stop(CloogLoop * loop, CloogDomain * context)
1344 { if (loop == NULL)
1345 return NULL ;
1346 else
1347 { cloog_domain_free(loop->domain) ;
1348 loop->domain = cloog_domain_copy(context) ;
1349 loop->next = cloog_loop_stop(loop->next, context) ;
1352 return loop ;
1356 static int level_is_constant(int level, int scalar, int *scaldims, int nb_scattdims)
1358 return level && (level+scalar <= nb_scattdims) && (scaldims[level+scalar-1]);
1363 * Compare the constant dimensions of loops 'l1' and 'l2' starting at 'scalar'
1364 * and return -1 if the vector of constant dimensions of 'l1' is smaller
1365 * than that of 'l2', 0 if they are the same and +1 if that of 'l1' is
1366 * greater than that of 'l2'.
1367 * This function should be called on the innermost loop (the loop
1368 * containing a block).
1369 * \param l1 Loop to be compared with l2.
1370 * \param l2 Loop to be compared with l1.
1371 * \param level Current non-scalar dimension.
1372 * \param scaldims Boolean array saying whether a dimension is scalar or not.
1373 * \param nb_scattdims Size of the scaldims array.
1374 * \param scalar Current scalar dimension.
1375 * \return -1 if (l1 < l2), 0 if (l1 == l2) and +1 if (l1 > l2)
1377 int cloog_loop_constant_cmp(CloogLoop *l1, CloogLoop *l2, int level,
1378 int *scaldims, int nb_scattdims, int scalar)
1380 CloogBlock *b1, *b2;
1381 b1 = l1->block;
1382 b2 = l2->block;
1383 while (level_is_constant(level, scalar, scaldims, nb_scattdims)) {
1384 int cmp = cloog_int_cmp(b1->scaldims[scalar], b2->scaldims[scalar]);
1385 if (cmp)
1386 return cmp;
1387 scalar++;
1389 return 0;
1394 * cloog_loop_scalar_gt function:
1395 * This function returns 1 if loop 'l1' is greater than loop 'l2' for the
1396 * scalar dimension vector that begins at dimension 'scalar', 0 otherwise. What
1397 * we want to know is whether a loop is scheduled before another one or not.
1398 * This function solves the problem when the considered dimension for scheduling
1399 * is a scalar dimension. Since there may be a succession of scalar dimensions,
1400 * this function will reason about the vector of scalar dimension that begins
1401 * at dimension 'level+scalar' and finish to the first non-scalar dimension.
1402 * \param l1 Loop to be compared with l2.
1403 * \param l2 Loop to be compared with l1.
1404 * \param level Current non-scalar dimension.
1405 * \param scaldims Boolean array saying whether a dimension is scalar or not.
1406 * \param nb_scattdims Size of the scaldims array.
1407 * \param scalar Current scalar dimension.
1408 * \return 1 if (l1 > l2), 0 otherwise.
1410 * - September 9th 2005: first version.
1411 * - October 15nd 2007: now "greater than" instead of "greater or equal".
1413 int cloog_loop_scalar_gt(l1, l2, level, scaldims, nb_scattdims, scalar)
1414 CloogLoop * l1, * l2 ;
1415 int level, * scaldims, nb_scattdims, scalar ;
1417 return cloog_loop_constant_cmp(l1, l2, level, scaldims, nb_scattdims, scalar) > 0;
1422 * cloog_loop_scalar_eq function:
1423 * This function returns 1 if loop 'l1' is equal to loop 'l2' for the scalar
1424 * dimension vector that begins at dimension 'scalar', 0 otherwise. What we want
1425 * to know is whether two loops are scheduled for the same time or not.
1426 * This function solves the problem when the considered dimension for scheduling
1427 * is a scalar dimension. Since there may be a succession of scalar dimensions,
1428 * this function will reason about the vector of scalar dimension that begins
1429 * at dimension 'level+scalar' and finish to the first non-scalar dimension.
1430 * - l1 and l2 are the loops to compare,
1431 * - level is the current non-scalar dimension,
1432 * - scaldims is the boolean array saying whether a dimension is scalar or not,
1433 * - nb_scattdims is the size of the scaldims array,
1434 * - scalar is the current scalar dimension.
1436 * - September 9th 2005 : first version.
1438 int cloog_loop_scalar_eq(l1, l2, level, scaldims, nb_scattdims, scalar)
1439 CloogLoop * l1, * l2 ;
1440 int level, * scaldims, nb_scattdims, scalar ;
1442 return cloog_loop_constant_cmp(l1, l2, level, scaldims, nb_scattdims, scalar) == 0;
1447 * cloog_loop_scalar_sort function:
1448 * This function sorts a linked list of loops (loop) with respect to the
1449 * scalar dimension vector that begins at dimension 'scalar'. Since there may
1450 * be a succession of scalar dimensions, this function will reason about the
1451 * vector of scalar dimension that begins at dimension 'level+scalar' and
1452 * finish to the first non-scalar dimension.
1453 * \param loop Loop list to sort.
1454 * \param level Current non-scalar dimension.
1455 * \param scaldims Boolean array saying whether a dimension is scalar or not.
1456 * \param nb_scattdims Size of the scaldims array.
1457 * \param scalar Current scalar dimension.
1458 * \return A pointer to the sorted list.
1460 * - July 2nd 2005: first developments.
1461 * - September 2nd 2005: first version.
1462 * - October 15nd 2007: complete rewrite to remove bugs, now a bubble sort.
1464 CloogLoop * cloog_loop_scalar_sort(loop, level, scaldims, nb_scattdims, scalar)
1465 CloogLoop * loop ;
1466 int level, * scaldims, nb_scattdims, scalar ;
1467 { int ok ;
1468 CloogLoop **current;
1470 do {
1471 ok = 1;
1472 for (current = &loop; (*current)->next; current = &(*current)->next) {
1473 CloogLoop *next = (*current)->next;
1474 if (cloog_loop_scalar_gt(*current,next,level,scaldims,nb_scattdims,scalar)) {
1475 ok = 0;
1476 (*current)->next = next->next;
1477 next->next = *current;
1478 *current = next;
1481 } while (!ok);
1483 return loop ;
1488 * cloog_loop_generate_backtrack function:
1489 * adaptation from LoopGen 0.4 by F. Quillere. This function implements the
1490 * backtrack of the Quillere et al. algorithm (see the Quillere paper).
1491 * It eliminates unused iterations of the current level for the new one. See the
1492 * example called linearity-1-1 example with and without this part for an idea.
1493 * - October 26th 2001: first version in cloog_loop_generate_general.
1494 * - July 31th 2002: (debug) no more parasite loops (REALLY hard !).
1495 * - October 30th 2005: extraction from cloog_loop_generate_general.
1497 CloogLoop *cloog_loop_generate_backtrack(CloogLoop *loop,
1498 int level, CloogOptions *options)
1500 CloogDomain * domain ;
1501 CloogLoop * now, * now2, * next, * next2, * end, * temp, * l, * inner,
1502 * new_loop ;
1504 temp = loop ;
1505 loop = NULL ;
1507 while (temp != NULL)
1508 { l = NULL ;
1509 inner = temp->inner ;
1511 while (inner != NULL)
1512 { next = inner->next ;
1513 /* This 'if' and its first part is the debug of july 31th 2002. */
1514 if (inner->block != NULL) {
1515 end = cloog_loop_alloc(temp->state, inner->domain, 0, NULL,
1516 inner->block, NULL, NULL);
1517 domain = cloog_domain_copy(temp->domain) ;
1518 new_loop = cloog_loop_alloc(temp->state, domain, 0, NULL,
1519 NULL, end, NULL);
1521 else
1522 new_loop = cloog_loop_project(inner, level);
1524 cloog_loop_free_parts(inner,0,0,0,0) ;
1525 cloog_loop_add(&l,&now2,new_loop) ;
1526 inner = next ;
1529 temp->inner = NULL ;
1531 if (l != NULL)
1532 { l = cloog_loop_separate(l) ;
1533 l = cloog_loop_sort(l, level);
1534 while (l != NULL) {
1535 l->stride = cloog_stride_copy(l->stride);
1536 cloog_loop_add(&loop,&now,l) ;
1537 l = l->next ;
1540 next2 = temp->next ;
1541 cloog_loop_free_parts(temp,1,0,0,0) ;
1542 temp = next2 ;
1545 return loop ;
1550 * Return 1 if we need to continue recursing to the specified level.
1552 int cloog_loop_more(CloogLoop *loop, int level, int scalar, int nb_scattdims)
1554 return level + scalar <= nb_scattdims ||
1555 cloog_domain_dimension(loop->domain) >= level;
1559 * Return 1 if the domains of all loops in the given linked list
1560 * have a fixed value at the given level.
1561 * In principle, there would be no need to check that the fixed value is
1562 * the same for each of these loops because this function is only
1563 * called on a component. However, not all backends perform a proper
1564 * decomposition into components.
1566 int cloog_loop_is_constant(CloogLoop *loop, int level)
1568 cloog_int_t c1, c2;
1569 int r = 1;
1571 cloog_int_init(c1);
1572 cloog_int_init(c2);
1574 if (!cloog_domain_lazy_isconstant(loop->domain, level - 1, &c1))
1575 r = 0;
1577 for (loop = loop->next; r && loop; loop = loop->next) {
1578 if (!cloog_domain_lazy_isconstant(loop->domain, level - 1, &c2))
1579 r = 0;
1580 else if (cloog_int_ne(c1, c2))
1581 r = 0;
1584 cloog_int_clear(c1);
1585 cloog_int_clear(c2);
1587 return r;
1591 * Assuming all domains in the given linked list of loop
1592 * have a fixed values at level, return a single loop with
1593 * a domain corresponding to this fixed value and with as
1594 * list of inner loops the concatenation of all inner loops
1595 * in the original list.
1597 CloogLoop *cloog_loop_constant(CloogLoop *loop, int level)
1599 CloogLoop *res, *inner, *tmp;
1600 CloogDomain *domain, *t;
1602 if (!loop)
1603 return loop;
1605 inner = loop->inner;
1606 domain = loop->domain;
1607 for (tmp = loop->next; tmp; tmp = tmp->next) {
1608 inner = cloog_loop_concat(inner, tmp->inner);
1609 domain = cloog_domain_union(domain, tmp->domain);
1612 domain = cloog_domain_simple_convex(t = domain);
1613 cloog_domain_free(t);
1615 res = cloog_loop_alloc(loop->state, domain, 0, NULL, NULL, inner, NULL);
1617 cloog_loop_free_parts(loop, 0, 0, 0, 1);
1619 return res;
1623 /* Unroll the given loop at the given level, provided it is allowed
1624 * by cloog_domain_can_unroll.
1625 * If so, we return a list of loops, one for each iteration of the original
1626 * loop. Otherwise, we simply return the original loop.
1628 static CloogLoop *loop_unroll(CloogLoop *loop, int level)
1630 int can_unroll;
1631 cloog_int_t i;
1632 cloog_int_t n;
1633 CloogConstraint *lb;
1634 CloogLoop *res = NULL;
1635 CloogLoop **next_res = &res;
1636 CloogDomain *domain;
1637 CloogLoop *inner;
1639 cloog_int_init(n);
1640 can_unroll = cloog_domain_can_unroll(loop->domain, level, &n, &lb);
1641 if (!can_unroll) {
1642 cloog_int_clear(n);
1643 return loop;
1646 cloog_int_init(i);
1648 for (cloog_int_set_si(i, 0); cloog_int_lt(i, n); cloog_int_add_ui(i, i, 1)) {
1649 domain = cloog_domain_copy(loop->domain);
1650 domain = cloog_domain_fixed_offset(domain, level, lb, i);
1651 inner = cloog_loop_copy(loop->inner);
1652 inner = cloog_loop_restrict_all(inner, domain);
1653 if (!inner) {
1654 cloog_domain_free(domain);
1655 continue;
1657 *next_res = cloog_loop_alloc(loop->state, domain, 1, NULL, NULL,
1658 inner, NULL);
1659 next_res = &(*next_res)->next;
1662 cloog_int_clear(i);
1663 cloog_int_clear(n);
1664 cloog_constraint_release(lb);
1666 cloog_loop_free(loop);
1668 return res;
1672 /* Unroll all loops in the given list at the given level, provided
1673 * they can be unrolled.
1675 CloogLoop *cloog_loop_unroll(CloogLoop *loop, int level)
1677 CloogLoop *now, *next;
1678 CloogLoop *res = NULL;
1679 CloogLoop **next_res = &res;
1681 for (now = loop; now; now = next) {
1682 next = now->next;
1683 now->next = NULL;
1685 *next_res = loop_unroll(now, level);
1687 while (*next_res)
1688 next_res = &(*next_res)->next;
1691 return res;
1694 CloogLoop *cloog_loop_generate_restricted_or_stop(CloogLoop *loop,
1695 CloogDomain *context,
1696 int level, int scalar, int *scaldims, int nb_scattdims,
1697 CloogOptions *options);
1699 CloogLoop *cloog_loop_recurse(CloogLoop *loop,
1700 int level, int scalar, int *scaldims, int nb_scattdims,
1701 int constant, CloogOptions *options);
1705 * Recurse on the inner loops of the given single loop.
1707 * - loop is the loop for which we have to generate scanning code,
1708 * - level is the current non-scalar dimension,
1709 * - scalar is the current scalar dimension,
1710 * - scaldims is the boolean array saying whether a dimension is scalar or not,
1711 * - nb_scattdims is the size of the scaldims array,
1712 * - constant is true if the loop is known to be executed at most once
1713 * - options are the general code generation options.
1715 static CloogLoop *loop_recurse(CloogLoop *loop,
1716 int level, int scalar, int *scaldims, int nb_scattdims,
1717 int constant, CloogOptions *options)
1719 CloogLoop *inner, *into, *end, *next, *l, *now;
1720 CloogDomain *domain;
1722 if (level && options->strides && !constant)
1723 cloog_loop_stride(loop, level);
1725 if (!constant &&
1726 options->first_unroll >= 0 && level + scalar >= options->first_unroll) {
1727 loop = cloog_loop_unroll(loop, level);
1728 if (loop->next)
1729 return cloog_loop_recurse(loop, level, scalar, scaldims,
1730 nb_scattdims, 1, options);
1733 if (level && options->otl)
1734 cloog_loop_otl(loop, level);
1735 inner = loop->inner;
1736 domain = cloog_domain_copy(loop->domain);
1737 domain = cloog_domain_add_stride_constraint(domain, loop->stride);
1738 into = NULL ;
1739 while (inner != NULL)
1740 { /* 4b. -ced- recurse for each sub-list of non terminal loops. */
1741 if (cloog_loop_more(inner, level + 1, scalar, nb_scattdims)) {
1742 end = inner;
1743 while ((end->next != NULL) &&
1744 cloog_loop_more(end->next, level + 1, scalar, nb_scattdims))
1745 end = end->next ;
1747 next = end->next ;
1748 end->next = NULL ;
1750 l = cloog_loop_generate_restricted_or_stop(inner, domain,
1751 level + 1, scalar, scaldims, nb_scattdims, options);
1753 if (l != NULL)
1754 cloog_loop_add_list(&into,&now,l) ;
1756 inner = next ;
1758 else
1759 { cloog_loop_add(&into,&now,inner) ;
1760 inner = inner->next ;
1764 cloog_domain_free(domain);
1765 loop->inner = into;
1766 return loop;
1771 * Recurse on the inner loops of each of the loops in the loop list.
1773 * - loop is the loop list for which we have to generate scanning code,
1774 * - level is the current non-scalar dimension,
1775 * - scalar is the current scalar dimension,
1776 * - scaldims is the boolean array saying whether a dimension is scalar or not,
1777 * - nb_scattdims is the size of the scaldims array,
1778 * - constant is true if the loop is known to be executed at most once
1779 * - options are the general code generation options.
1781 CloogLoop *cloog_loop_recurse(CloogLoop *loop,
1782 int level, int scalar, int *scaldims, int nb_scattdims,
1783 int constant, CloogOptions *options)
1785 CloogLoop *now, *next;
1786 CloogLoop *res = NULL;
1787 CloogLoop **next_res = &res;
1789 for (now = loop; now; now = next) {
1790 next = now->next;
1791 now->next = NULL;
1793 *next_res = loop_recurse(now, level, scalar, scaldims, nb_scattdims,
1794 constant, options);
1796 while (*next_res)
1797 next_res = &(*next_res)->next;
1800 return res;
1804 /* Get the max across all 'first' depths for statements in this
1805 * stmt list, and the max across all 'last' depths */
1806 void cloog_statement_get_fl(CloogStatement *s, int *f, int *l,
1807 CloogOptions *options)
1809 if (s == NULL) return;
1811 int fs, ls;
1813 if (options->fs != NULL && options->ls != NULL) {
1814 fs = options->fs[s->number-1];
1815 ls = options->ls[s->number-1];
1816 *f = (fs > *f)? fs: *f;
1817 *l = (ls > *l)? ls: *l;
1818 }else{
1819 *f = -1;
1820 *l = -1;
1823 cloog_statement_get_fl(s->next, f, l, options);
1826 /* Get the max across all 'first' depths for statements under
1827 * this loop, and the max across all 'last' depths */
1828 void cloog_loop_get_fl(CloogLoop *loop, int *f, int *l,
1829 CloogOptions *options)
1831 if (loop == NULL) return;
1833 CloogBlock *block = loop->block;
1835 if (block != NULL && block->statement != NULL) {
1836 cloog_statement_get_fl(block->statement, f, l, options);
1839 cloog_loop_get_fl(loop->inner, f, l, options);
1840 cloog_loop_get_fl(loop->next, f, l, options);
1844 * cloog_loop_generate_general function:
1845 * Adaptation from LoopGen 0.4 by F. Quillere. This function implements the
1846 * Quillere algorithm for polyhedron scanning from step 3 to 5.
1847 * (see the Quillere paper).
1848 * - loop is the loop for which we have to generate a scanning code,
1849 * - level is the current non-scalar dimension,
1850 * - scalar is the current scalar dimension,
1851 * - scaldims is the boolean array saying whether a dimension is scalar or not,
1852 * - nb_scattdims is the size of the scaldims array,
1853 * - options are the general code generation options.
1855 * - October 26th 2001: first version.
1856 * - July 3rd->11th 2003: memory leaks hunt and correction.
1857 * - June 22nd 2005: Adaptation for GMP.
1858 * - September 2nd 2005: The function have been cutted out in two pieces:
1859 * cloog_loop_generate and this one, in order to handle
1860 * the scalar dimension case more efficiently with
1861 * cloog_loop_generate_scalar.
1862 * - November 15th 2005: (debug) the result of the cloog_loop_generate call may
1863 * be a list of polyhedra (especially if stop option is
1864 * used): cloog_loop_add_list instead of cloog_loop_add.
1865 * - May 31, 2012: statement-wise first and last depth for loop separation
1866 * if options->fs and option->ls arrays are set, it will override what has
1867 * been supplied via "-f/-l"
1870 CloogLoop *cloog_loop_generate_general(CloogLoop *loop,
1871 int level, int scalar, int *scaldims, int nb_scattdims,
1872 CloogOptions *options)
1874 CloogLoop *res, *now, *temp, *l, *new_loop, *next;
1875 int separate = 0;
1876 int constant = 0;
1878 int first = -1;
1879 int last = -1;
1881 now = NULL;
1883 /* Get the -f and -l for each statement */
1884 cloog_loop_get_fl(loop, &first, &last, options);
1886 /* If stmt-wise options are not set or set inconsistently, use -f/-l ones globally */
1887 if (first <= 0 || last < first) {
1888 first = options->f;
1889 last = options->l;
1892 /* 3. Separate all projections into disjoint polyhedra. */
1893 if (level > 0 && cloog_loop_is_constant(loop, level)) {
1894 res = cloog_loop_constant(loop, level);
1895 constant = 1;
1896 }else if ((first > level+scalar) || (first < 0)) {
1897 res = cloog_loop_merge(loop, level, options);
1898 }else{
1899 res = cloog_loop_separate(loop);
1900 separate = 1;
1903 /* 3b. -correction- sort the loops to determine their textual order. */
1904 res = cloog_loop_sort(res, level);
1906 res = cloog_loop_restrict_inner(res);
1908 if (separate)
1909 res = cloog_loop_specialize(res, level, scalar, scaldims, nb_scattdims);
1911 /* 4. Recurse for each loop with the current domain as context. */
1912 temp = res ;
1913 res = NULL ;
1914 if (!level || (level+scalar < last) || (last < 0))
1915 res = cloog_loop_recurse(temp, level, scalar, scaldims, nb_scattdims,
1916 constant, options);
1917 else
1918 while (temp != NULL)
1919 { next = temp->next ;
1920 l = cloog_loop_nest(temp->inner, temp->domain, level+1);
1921 new_loop = cloog_loop_alloc(temp->state, temp->domain, 0, NULL,
1922 NULL, l, NULL);
1923 temp->inner = NULL ;
1924 temp->next = NULL ;
1925 cloog_loop_free_parts(temp,0,0,0,0) ;
1926 cloog_loop_add(&res,&now,new_loop) ;
1927 temp = next ;
1930 if (options->strides)
1931 res = cloog_loop_propagate_lower_bound(res, level);
1933 /* 5. eliminate unused iterations of the current level for the new one. See
1934 * the example called linearity-1-1 example with and without this part
1935 * for an idea.
1937 if (options->backtrack && level &&
1938 ((level+scalar < last) || (last < 0)) &&
1939 ((first <= level+scalar) && !(first < 0)))
1940 res = cloog_loop_generate_backtrack(res, level, options);
1942 /* Pray for my new paper to be accepted somewhere since the following stuff
1943 * is really amazing :-) !
1944 * Far long later: The paper has been accepted to PACT 2004 :-))). But there
1945 * are still some bugs and I have no time to fix them. Thus now you have to
1946 * pray for me to get an academic position for that really amazing stuff :-) !
1947 * Later again: OK, I get my academic position, but still I have not enough
1948 * time to fix and clean this part... Pray again :-) !!!
1950 /* res = cloog_loop_unisolate(res,level) ;*/
1952 return(res) ;
1956 CloogLoop *cloog_loop_generate_restricted(CloogLoop *loop,
1957 int level, int scalar, int *scaldims, int nb_scattdims,
1958 CloogOptions *options);
1962 * cloog_loop_generate_scalar function:
1963 * This function applies the simplified code generation scheme in the trivial
1964 * case of scalar dimensions. When dealing with scalar dimensions, there is
1965 * no need of costly polyhedral operations for separation or sorting: sorting
1966 * is a question of comparing scalar vectors and separation amounts to consider
1967 * only loops with the same scalar vector for the next step of the code
1968 * generation process. This function achieves the separation/sorting process
1969 * for the vector of scalar dimension that begins at dimension 'level+scalar'
1970 * and finish to the first non-scalar dimension.
1971 * - loop is the loop for which we have to generate a scanning code,
1972 * - level is the current non-scalar dimension,
1973 * - scalar is the current scalar dimension,
1974 * - scaldims is the boolean array saying whether a dimension is scalar or not,
1975 * - nb_scattdims is the size of the scaldims array,
1976 * - options are the general code generation options.
1978 * - September 2nd 2005: First version.
1980 CloogLoop *cloog_loop_generate_scalar(CloogLoop *loop,
1981 int level, int scalar, int *scaldims, int nb_scattdims,
1982 CloogOptions *options)
1983 { CloogLoop * res, * now, * temp, * l, * end, * next, * ref ;
1984 int scalar_new;
1986 /* We sort the loop list with respect to the current scalar vector. */
1987 res = cloog_loop_scalar_sort(loop,level,scaldims,nb_scattdims,scalar) ;
1989 scalar_new = scalar + scaldims[level + scalar - 1];
1991 temp = res ;
1992 res = NULL ;
1993 while (temp != NULL)
1994 { /* Then we will appy the general code generation process to each sub-list
1995 * of loops with the same scalar vector.
1997 end = temp ;
1998 ref = temp ;
2000 while((end->next != NULL) &&
2001 cloog_loop_more(end->next, level, scalar_new, nb_scattdims) &&
2002 cloog_loop_scalar_eq(ref,end->next,level,scaldims,nb_scattdims,scalar))
2003 end = end->next ;
2005 next = end->next ;
2006 end->next = NULL ;
2008 /* For the next dimension, scalar value is updated by adding the scalar
2009 * vector size, which is stored at scaldims[level+scalar-1].
2011 if (cloog_loop_more(temp, level, scalar_new, nb_scattdims)) {
2012 l = cloog_loop_generate_restricted(temp, level, scalar_new,
2013 scaldims, nb_scattdims, options);
2015 if (l != NULL)
2016 cloog_loop_add_list(&res, &now, l);
2017 } else
2018 cloog_loop_add(&res, &now, temp);
2020 temp = next ;
2023 return res ;
2027 /* Compare loop with the next loop based on their constant dimensions.
2028 * The result is < 0, == 0 or > 0 depending on whether the constant
2029 * dimensions of loop are lexicographically smaller, equal or greater
2030 * than those of loop->next.
2031 * If loop is the last in the list, then it is assumed to be smaller
2032 * than the "next" one.
2034 static int cloog_loop_next_scal_cmp(CloogLoop *loop)
2036 int i;
2037 int nb_scaldims;
2039 if (!loop->next)
2040 return -1;
2042 nb_scaldims = loop->block->nb_scaldims;
2043 if (loop->next->block->nb_scaldims < nb_scaldims)
2044 nb_scaldims = loop->next->block->nb_scaldims;
2046 for (i = 0; i < nb_scaldims; ++i) {
2047 int cmp = cloog_int_cmp(loop->block->scaldims[i],
2048 loop->next->block->scaldims[i]);
2049 if (cmp)
2050 return cmp;
2052 return loop->block->nb_scaldims - loop->next->block->nb_scaldims;
2056 /* Check whether the globally constant dimensions of a and b
2057 * have the same value for all globally constant dimensions
2058 * that are situated before any (locally) non-constant dimension.
2060 static int cloog_loop_equal_prefix(CloogLoop *a, CloogLoop *b,
2061 int *scaldims, int nb_scattdims)
2063 int i;
2064 int cst = 0;
2065 int dim = 0;
2067 for (i = 0; i < nb_scattdims; ++i) {
2068 if (!scaldims[i]) {
2069 dim++;
2070 continue;
2072 if (!cloog_int_eq(a->block->scaldims[cst], b->block->scaldims[cst]))
2073 break;
2074 cst++;
2076 for (i = i + 1; i < nb_scattdims; ++i) {
2077 if (scaldims[i])
2078 continue;
2079 if (!cloog_domain_lazy_isconstant(a->domain, dim, NULL))
2080 return 0;
2081 /* No need to check that dim is also constant in b and that the
2082 * constant values are equal. That will happen during the check
2083 * whether the two domains are equal.
2085 dim++;
2087 return 1;
2091 /* Try to block adjacent loops in the loop list "loop".
2092 * We only attempt blocking if the constant dimensions of the loops
2093 * in the least are (not necessarily strictly) increasing.
2094 * Then we look for a sublist such that the first (begin) has constant
2095 * dimensions strictly larger than the previous loop in the complete
2096 * list and such that the loop (end) after the last loop in the sublist
2097 * has constant dimensions strictly larger than the last loop in the sublist.
2098 * Furthermore, all loops in the sublist should have the same domain
2099 * (with globally constant dimensions removed) and the difference
2100 * (if any) in constant dimensions may only occur after all the
2101 * (locally) constant dimensions.
2102 * If we find such a sublist, then the blocks of all but the first
2103 * are merged into the block of the first.
2105 * Note that this function can only be called before the global
2106 * blocklist has been created because it may otherwise modify and destroy
2107 * elements on that list.
2109 CloogLoop *cloog_loop_block(CloogLoop *loop, int *scaldims, int nb_scattdims)
2111 CloogLoop *begin, *end, *l;
2112 int begin_after_previous;
2113 int end_after_previous;
2115 if (!loop->next)
2116 return loop;
2117 for (begin = loop; begin; begin = begin->next) {
2118 if (!begin->block || !begin->block->scaldims)
2119 return loop;
2120 if (cloog_loop_next_scal_cmp(begin) > 0)
2121 return loop;
2124 begin_after_previous = 1;
2125 for (begin = loop; begin; begin = begin->next) {
2126 if (!begin_after_previous) {
2127 begin_after_previous = cloog_loop_next_scal_cmp(begin) < 0;
2128 continue;
2131 end_after_previous = cloog_loop_next_scal_cmp(begin) < 0;
2132 for (end = begin->next; end; end = end->next) {
2133 if (!cloog_loop_equal_prefix(begin, end, scaldims, nb_scattdims))
2134 break;
2135 if (!cloog_domain_lazy_equal(begin->domain, end->domain))
2136 break;
2137 end_after_previous = cloog_loop_next_scal_cmp(end) < 0;
2139 if (end != begin->next && end_after_previous) {
2140 for (l = begin->next; l != end; l = begin->next) {
2141 cloog_block_merge(begin->block, l->block);
2142 begin->next = l->next;
2143 cloog_loop_free_parts(l, 1, 0, 1, 0);
2147 begin_after_previous = cloog_loop_next_scal_cmp(begin) < 0;
2150 return loop;
2155 * Check whether for any fixed iteration of the outer loops,
2156 * there is an iteration of loop1 that is lexicographically greater
2157 * than an iteration of loop2.
2158 * Return 1 if there exists (or may exist) such a pair.
2159 * Return 0 if all iterations of loop1 are lexicographically smaller
2160 * than the iterations of loop2.
2161 * If no iteration is lexicographically greater, but if there are
2162 * iterations that are equal to iterations of loop2, then return "def".
2163 * This is useful for ensuring that such statements are not reordered.
2164 * Some users, including the test_run target in test, expect
2165 * the statements at a given point to be run in the original order.
2166 * Passing the value "0" for "def" would allow such statements to be reordered
2167 * and would allow for the detection of more components.
2169 int cloog_loop_follows(CloogLoop *loop1, CloogLoop *loop2,
2170 int level, int scalar, int *scaldims, int nb_scattdims, int def)
2172 int dim1, dim2;
2174 dim1 = cloog_domain_dimension(loop1->domain);
2175 dim2 = cloog_domain_dimension(loop2->domain);
2176 while ((level <= dim1 && level <= dim2) ||
2177 level_is_constant(level, scalar, scaldims, nb_scattdims)) {
2178 if (level_is_constant(level, scalar, scaldims, nb_scattdims)) {
2179 int cmp = cloog_loop_constant_cmp(loop1, loop2, level, scaldims,
2180 nb_scattdims, scalar);
2181 if (cmp > 0)
2182 return 1;
2183 if (cmp < 0)
2184 return 0;
2185 scalar += scaldims[level + scalar - 1];
2186 } else {
2187 int follows = cloog_domain_follows(loop1->domain, loop2->domain,
2188 level);
2189 if (follows > 0)
2190 return 1;
2191 if (follows < 0)
2192 return 0;
2193 level++;
2197 return def;
2201 /* Structure for representing the nodes in the graph being traversed
2202 * using Tarjan's algorithm.
2203 * index represents the order in which nodes are visited.
2204 * min_index is the index of the root of a (sub)component.
2205 * on_stack indicates whether the node is currently on the stack.
2207 struct cloog_loop_sort_node {
2208 int index;
2209 int min_index;
2210 int on_stack;
2212 /* Structure for representing the graph being traversed
2213 * using Tarjan's algorithm.
2214 * len is the number of nodes
2215 * node is an array of nodes
2216 * stack contains the nodes on the path from the root to the current node
2217 * sp is the stack pointer
2218 * index is the index of the last node visited
2219 * order contains the elements of the components separated by -1
2220 * op represents the current position in order
2222 struct cloog_loop_sort {
2223 int len;
2224 struct cloog_loop_sort_node *node;
2225 int *stack;
2226 int sp;
2227 int index;
2228 int *order;
2229 int op;
2232 /* Allocate and initialize cloog_loop_sort structure.
2234 static struct cloog_loop_sort *cloog_loop_sort_alloc(int len)
2236 struct cloog_loop_sort *s;
2237 int i;
2239 s = (struct cloog_loop_sort *)malloc(sizeof(struct cloog_loop_sort));
2240 assert(s);
2241 s->len = len;
2242 s->node = (struct cloog_loop_sort_node *)
2243 malloc(len * sizeof(struct cloog_loop_sort_node));
2244 assert(s->node);
2245 for (i = 0; i < len; ++i)
2246 s->node[i].index = -1;
2247 s->stack = (int *)malloc(len * sizeof(int));
2248 assert(s->stack);
2249 s->order = (int *)malloc(2 * len * sizeof(int));
2250 assert(s->order);
2252 s->sp = 0;
2253 s->index = 0;
2254 s->op = 0;
2256 return s;
2259 /* Free cloog_loop_sort structure.
2261 static void cloog_loop_sort_free(struct cloog_loop_sort *s)
2263 free(s->node);
2264 free(s->stack);
2265 free(s->order);
2266 free(s);
2270 /* Check whether for any fixed iteration of the outer loops,
2271 * there is an iteration of loop1 that is lexicographically greater
2272 * than an iteration of loop2, where the iteration domains are
2273 * available in the inner loops of the arguments.
2275 * By using this functions to detect components, we ensure that
2276 * two CloogLoops appear in the same component if some iterations of
2277 * each loop should be executed before some iterations of the other loop.
2278 * Since we also want two CloogLoops that have exactly the same
2279 * iteration domain at the current level to be placed in the same component,
2280 * we first check if these domains are indeed the same.
2282 static int inner_loop_follows(CloogLoop *loop1, CloogLoop *loop2,
2283 int level, int scalar, int *scaldims, int nb_scattdims, int def)
2285 int f;
2287 f = cloog_domain_lazy_equal(loop1->domain, loop2->domain);
2288 if (!f)
2289 f = cloog_loop_follows(loop1->inner, loop2->inner,
2290 level, scalar, scaldims, nb_scattdims, def);
2292 return f;
2296 /* Perform Tarjan's algorithm for computing the strongly connected components
2297 * in the graph with the individual CloogLoops as vertices.
2298 * Two CloopLoops appear in the same component if they both (indirectly)
2299 * "follow" each other, where the following relation is determined
2300 * by the follows function.
2302 static void cloog_loop_components_tarjan(struct cloog_loop_sort *s,
2303 CloogLoop **loop_array, int i, int level, int scalar, int *scaldims,
2304 int nb_scattdims,
2305 int (*follows)(CloogLoop *loop1, CloogLoop *loop2,
2306 int level, int scalar, int *scaldims, int nb_scattdims, int def))
2308 int j;
2310 s->node[i].index = s->index;
2311 s->node[i].min_index = s->index;
2312 s->node[i].on_stack = 1;
2313 s->index++;
2314 s->stack[s->sp++] = i;
2316 for (j = s->len - 1; j >= 0; --j) {
2317 int f;
2319 if (j == i)
2320 continue;
2321 if (s->node[j].index >= 0 &&
2322 (!s->node[j].on_stack ||
2323 s->node[j].index > s->node[i].min_index))
2324 continue;
2326 f = follows(loop_array[i], loop_array[j],
2327 level, scalar, scaldims, nb_scattdims, i > j);
2328 if (!f)
2329 continue;
2331 if (s->node[j].index < 0) {
2332 cloog_loop_components_tarjan(s, loop_array, j, level, scalar,
2333 scaldims, nb_scattdims, follows);
2334 if (s->node[j].min_index < s->node[i].min_index)
2335 s->node[i].min_index = s->node[j].min_index;
2336 } else if (s->node[j].index < s->node[i].min_index)
2337 s->node[i].min_index = s->node[j].index;
2340 if (s->node[i].index != s->node[i].min_index)
2341 return;
2343 do {
2344 j = s->stack[--s->sp];
2345 s->node[j].on_stack = 0;
2346 s->order[s->op++] = j;
2347 } while (j != i);
2348 s->order[s->op++] = -1;
2352 static int qsort_index_cmp(const void *p1, const void *p2)
2354 return *(int *)p1 - *(int *)p2;
2357 /* Sort the elements of the component starting at list.
2358 * The list is terminated by a -1.
2360 static void sort_component(int *list)
2362 int len;
2364 for (len = 0; list[len] != -1; ++len)
2367 qsort(list, len, sizeof(int), qsort_index_cmp);
2370 /* Given an array of indices "list" into the "loop_array" array,
2371 * terminated by -1, construct a linked list of the corresponding
2372 * entries and put the result in *res.
2373 * The value returned is the number of CloogLoops in the (linked) list
2375 static int extract_component(CloogLoop **loop_array, int *list, CloogLoop **res)
2377 int i = 0;
2379 sort_component(list);
2380 while (list[i] != -1) {
2381 *res = loop_array[list[i]];
2382 res = &(*res)->next;
2383 ++i;
2385 *res = NULL;
2387 return i;
2392 * Call cloog_loop_generate_scalar or cloog_loop_generate_general
2393 * on each of the strongly connected components in the list of CloogLoops
2394 * pointed to by "loop".
2396 * We use Tarjan's algorithm to find the strongly connected components.
2397 * Note that this algorithm also topologically sorts the components.
2399 * The components are treated separately to avoid spurious separations.
2400 * The concatentation of the results may contain successive loops
2401 * with the same bounds, so we try to combine such loops.
2403 CloogLoop *cloog_loop_generate_components(CloogLoop *loop,
2404 int level, int scalar, int *scaldims, int nb_scattdims,
2405 CloogOptions *options)
2407 int i, nb_loops;
2408 CloogLoop *tmp;
2409 CloogLoop *res, **res_next;
2410 CloogLoop **loop_array;
2411 struct cloog_loop_sort *s;
2413 if (level == 0 || !loop->next)
2414 return cloog_loop_generate_general(loop, level, scalar,
2415 scaldims, nb_scattdims, options);
2417 nb_loops = cloog_loop_count(loop);
2419 loop_array = (CloogLoop **)malloc(nb_loops * sizeof(CloogLoop *));
2420 assert(loop_array);
2422 for (i = 0, tmp = loop; i < nb_loops; i++, tmp = tmp->next)
2423 loop_array[i] = tmp;
2425 s = cloog_loop_sort_alloc(nb_loops);
2426 for (i = nb_loops - 1; i >= 0; --i) {
2427 if (s->node[i].index >= 0)
2428 continue;
2429 cloog_loop_components_tarjan(s, loop_array, i, level, scalar, scaldims,
2430 nb_scattdims, &inner_loop_follows);
2433 i = 0;
2434 res = NULL;
2435 res_next = &res;
2436 while (nb_loops) {
2437 int n = extract_component(loop_array, &s->order[i], &tmp);
2438 i += n + 1;
2439 nb_loops -= n;
2440 *res_next = cloog_loop_generate_general(tmp, level, scalar,
2441 scaldims, nb_scattdims, options);
2442 while (*res_next)
2443 res_next = &(*res_next)->next;
2446 cloog_loop_sort_free(s);
2448 free(loop_array);
2450 res = cloog_loop_combine(res);
2452 return res;
2456 /* For each loop in the list "loop", decompose the list of
2457 * inner loops into strongly connected components and put
2458 * the components into separate loops at the top level.
2460 CloogLoop *cloog_loop_decompose_inner(CloogLoop *loop,
2461 int level, int scalar, int *scaldims, int nb_scattdims)
2463 CloogLoop *l, *tmp;
2464 CloogLoop **loop_array;
2465 int i, n_loops, max_loops = 0;
2466 struct cloog_loop_sort *s;
2468 for (l = loop; l; l = l->next) {
2469 n_loops = cloog_loop_count(l->inner);
2470 if (max_loops < n_loops)
2471 max_loops = n_loops;
2474 if (max_loops <= 1)
2475 return loop;
2477 loop_array = (CloogLoop **)malloc(max_loops * sizeof(CloogLoop *));
2478 assert(loop_array);
2480 for (l = loop; l; l = l->next) {
2481 int n;
2483 for (i = 0, tmp = l->inner; tmp; i++, tmp = tmp->next)
2484 loop_array[i] = tmp;
2485 n_loops = i;
2486 if (n_loops <= 1)
2487 continue;
2489 s = cloog_loop_sort_alloc(n_loops);
2490 for (i = n_loops - 1; i >= 0; --i) {
2491 if (s->node[i].index >= 0)
2492 continue;
2493 cloog_loop_components_tarjan(s, loop_array, i, level, scalar,
2494 scaldims, nb_scattdims, &cloog_loop_follows);
2497 n = extract_component(loop_array, s->order, &l->inner);
2498 n_loops -= n;
2499 i = n + 1;
2500 while (n_loops) {
2501 CloogLoop *inner;
2503 n = extract_component(loop_array, &s->order[i], &inner);
2504 n_loops -= n;
2505 i += n + 1;
2506 tmp = cloog_loop_alloc(l->state, cloog_domain_copy(l->domain),
2507 l->otl, l->stride, l->block, inner, l->next);
2508 l->next = tmp;
2509 l = tmp;
2512 cloog_loop_sort_free(s);
2515 free(loop_array);
2517 return loop;
2521 CloogLoop *cloog_loop_generate_restricted(CloogLoop *loop,
2522 int level, int scalar, int *scaldims, int nb_scattdims,
2523 CloogOptions *options)
2525 /* To save both time and memory, we switch here depending on whether the
2526 * current dimension is scalar (simplified processing) or not (general
2527 * processing).
2529 if (level_is_constant(level, scalar, scaldims, nb_scattdims))
2530 return cloog_loop_generate_scalar(loop, level, scalar,
2531 scaldims, nb_scattdims, options);
2533 * 2. Compute the projection of each polyhedron onto the outermost
2534 * loop variable and the parameters.
2536 loop = cloog_loop_project_all(loop, level);
2538 return cloog_loop_generate_components(loop, level, scalar, scaldims,
2539 nb_scattdims, options);
2543 CloogLoop *cloog_loop_generate_restricted_or_stop(CloogLoop *loop,
2544 CloogDomain *context,
2545 int level, int scalar, int *scaldims, int nb_scattdims,
2546 CloogOptions *options)
2548 /* If the user asked to stop code generation at this level, let's stop. */
2549 if ((options->stop >= 0) && (level+scalar >= options->stop+1))
2550 return cloog_loop_stop(loop,context) ;
2552 return cloog_loop_generate_restricted(loop, level, scalar, scaldims,
2553 nb_scattdims, options);
2558 * cloog_loop_generate function:
2559 * Adaptation from LoopGen 0.4 by F. Quillere. This function implements the
2560 * Quillere algorithm for polyhedron scanning from step 1 to 2.
2561 * (see the Quillere paper).
2562 * - loop is the loop for which we have to generate a scanning code,
2563 * - context is the context of the current loop (constraints on parameter and/or
2564 * on outer loop counters),
2565 * - level is the current non-scalar dimension,
2566 * - scalar is the current scalar dimension,
2567 * - scaldims is the boolean array saying whether a dimension is scalar or not,
2568 * - nb_scattdims is the size of the scaldims array,
2569 * - options are the general code generation options.
2571 * - October 26th 2001: first version.
2572 * - July 3rd->11th 2003: memory leaks hunt and correction.
2573 * - June 15th 2005: a memory leak fixed (loop was not entirely freed when
2574 * the result of cloog_loop_restrict was NULL).
2575 * - June 22nd 2005: Adaptation for GMP.
2576 * - September 2nd 2005: The function have been cutted out in two pieces:
2577 * cloog_loop_generate and this one, in order to handle
2578 * the scalar dimension case more efficiently with
2579 * cloog_loop_generate_scalar.
2580 * - November 15th 2005: (debug) Condition for stop option no more take care of
2581 * further scalar dimensions.
2583 CloogLoop *cloog_loop_generate(CloogLoop *loop, CloogDomain *context,
2584 int level, int scalar, int *scaldims, int nb_scattdims,
2585 CloogOptions *options)
2587 /* 1. Replace each polyhedron by its intersection with the context.
2589 loop = cloog_loop_restrict_all(loop, context);
2590 if (!loop)
2591 return NULL;
2593 return cloog_loop_generate_restricted_or_stop(loop, context,
2594 level, scalar, scaldims, nb_scattdims, options);
2599 * Internal function for simplifying a single loop in a list of loops.
2600 * See cloog_loop_simplify.
2602 static CloogLoop *loop_simplify(CloogLoop *loop, CloogDomain *context,
2603 int level, int nb_scattdims, CloogOptions *options)
2605 int domain_dim;
2606 CloogBlock * new_block ;
2607 CloogLoop *simplified, *inner;
2608 CloogDomain * domain, * simp, * inter, * extended_context ;
2610 domain = loop->domain ;
2612 domain_dim = cloog_domain_dimension(domain);
2613 extended_context = cloog_domain_extend(context, domain_dim);
2614 inter = cloog_domain_intersection(domain,extended_context) ;
2615 simp = cloog_domain_simplify(domain, extended_context);
2616 cloog_domain_free(extended_context) ;
2618 /* If the constraint system is never true, go to the next one. */
2619 if (cloog_domain_never_integral(simp)) {
2620 cloog_loop_free(loop->inner);
2621 cloog_domain_free(inter);
2622 cloog_domain_free(simp);
2623 return NULL;
2626 inner = cloog_loop_simplify(loop->inner, inter, level+1, nb_scattdims,
2627 options);
2629 if ((inner == NULL) && (loop->block == NULL)) {
2630 cloog_domain_free(inter);
2631 cloog_domain_free(simp);
2632 return NULL;
2635 new_block = cloog_block_copy(loop->block) ;
2637 simplified = cloog_loop_alloc(loop->state, simp, loop->otl, loop->stride,
2638 new_block, inner, NULL);
2640 if (options->save_domains) {
2641 inter = cloog_domain_add_stride_constraint(inter, loop->stride);
2642 if (domain_dim > nb_scattdims) {
2643 CloogDomain *t;
2644 inter = cloog_domain_project(t = inter, nb_scattdims);
2645 cloog_domain_free(t);
2647 simplified->unsimplified = inter;
2648 } else
2649 cloog_domain_free(inter);
2651 return(simplified) ;
2656 * cloog_loop_simplify function:
2657 * This function implements the part 6. of the Quillere algorithm, it
2658 * recursively simplifies each loop in the context of the preceding loop domain.
2659 * It returns a pointer to the simplified loop list.
2660 * The cloog_domain_simplify (DomainSimplify) behaviour is really bad with
2661 * polyhedra union and some really awful sidesteppings were written, I plan
2662 * to solve that...
2663 * - October 31th 2001: first version.
2664 * - July 3rd->11th 2003: memory leaks hunt and correction.
2665 * - April 16th 2005: a memory leak fixed (extended_context was not freed).
2666 * - June 15th 2005: a memory leak fixed (loop was not conveniently freed
2667 * when the constraint system is never true).
2668 * - October 27th 2005: - this function called before cloog_loop_fast_simplify
2669 * is now the official cloog_loop_simplify function in
2670 * replacement of a slower and more complex one (after
2671 * deep changes in the pretty printer).
2672 * - we use cloog_loop_disjoint to fix the problem when
2673 * simplifying gives a union of polyhedra (before, it
2674 * was under the responsibility of the pretty printer).
2676 CloogLoop *cloog_loop_simplify(CloogLoop *loop, CloogDomain *context, int level,
2677 int nb_scattdims, CloogOptions *options)
2679 CloogLoop *now;
2680 CloogLoop *res = NULL;
2681 CloogLoop **next = &res;
2682 int need_split = 0;
2684 for (now = loop; now; now = now->next)
2685 if (!cloog_domain_isconvex(now->domain)) {
2686 now->domain = cloog_domain_simplify_union(now->domain);
2687 if (!cloog_domain_isconvex(now->domain))
2688 need_split = 1;
2691 /* If the input of CLooG contains any union domains, then they
2692 * may not have been split yet at this point. Do so now as the
2693 * clast construction assumes there are no union domains.
2695 if (need_split)
2696 loop = cloog_loop_disjoint(loop);
2698 for (now = loop; now; now = now->next) {
2699 *next = loop_simplify(now, context, level, nb_scattdims, options);
2701 now->inner = NULL; /* For loop integrity. */
2702 cloog_domain_free(now->domain);
2703 now->domain = NULL;
2705 if (*next)
2706 next = &(*next)->next;
2708 cloog_loop_free(loop);
2710 return res;
2715 * cloog_loop_scatter function:
2716 * This function add the scattering (scheduling) informations in a loop.
2718 void cloog_loop_scatter(CloogLoop * loop, CloogScattering *scatt)
2720 loop->domain = cloog_domain_scatter(loop->domain, scatt);