isl_access_info: change interface for specifying restrictions
[isl.git] / isl_schedule.c
blob60d80af98b1664f57c1892b32b4cdf50906f26a4
1 /*
2 * Copyright 2011 INRIA Saclay
4 * Use of this software is governed by the GNU LGPLv2.1 license
6 * Written by Sven Verdoolaege, INRIA Saclay - Ile-de-France,
7 * Parc Club Orsay Universite, ZAC des vignes, 4 rue Jacques Monod,
8 * 91893 Orsay, France
9 */
11 #include <isl_ctx_private.h>
12 #include <isl_map_private.h>
13 #include <isl_space_private.h>
14 #include <isl/hash.h>
15 #include <isl/constraint.h>
16 #include <isl/schedule.h>
17 #include <isl_mat_private.h>
18 #include <isl/set.h>
19 #include <isl/seq.h>
20 #include <isl_tab.h>
21 #include <isl_dim_map.h>
22 #include <isl_hmap_map_basic_set.h>
23 #include <isl_qsort.h>
24 #include <isl_schedule_private.h>
25 #include <isl_band_private.h>
26 #include <isl_list_private.h>
27 #include <isl_options_private.h>
30 * The scheduling algorithm implemented in this file was inspired by
31 * Bondhugula et al., "Automatic Transformations for Communication-Minimized
32 * Parallelization and Locality Optimization in the Polyhedral Model".
36 /* Internal information about a node that is used during the construction
37 * of a schedule.
38 * dim represents the space in which the domain lives
39 * sched is a matrix representation of the schedule being constructed
40 * for this node
41 * sched_map is an isl_map representation of the same (partial) schedule
42 * sched_map may be NULL
43 * rank is the number of linearly independent rows in the linear part
44 * of sched
45 * the columns of cmap represent a change of basis for the schedule
46 * coefficients; the first rank columns span the linear part of
47 * the schedule rows
48 * start is the first variable in the LP problem in the sequences that
49 * represents the schedule coefficients of this node
50 * nvar is the dimension of the domain
51 * nparam is the number of parameters or 0 if we are not constructing
52 * a parametric schedule
54 * scc is the index of SCC (or WCC) this node belongs to
56 * band contains the band index for each of the rows of the schedule.
57 * band_id is used to differentiate between separate bands at the same
58 * level within the same parent band, i.e., bands that are separated
59 * by the parent band or bands that are independent of each other.
60 * zero contains a boolean for each of the rows of the schedule,
61 * indicating whether the corresponding scheduling dimension results
62 * in zero dependence distances within its band and with respect
63 * to the proximity edges.
65 * index, min_index and on_stack are used during the SCC detection
66 * index represents the order in which nodes are visited.
67 * min_index is the index of the root of a (sub)component.
68 * on_stack indicates whether the node is currently on the stack.
70 struct isl_sched_node {
71 isl_space *dim;
72 isl_mat *sched;
73 isl_map *sched_map;
74 int rank;
75 isl_mat *cmap;
76 int start;
77 int nvar;
78 int nparam;
80 int scc;
82 int *band;
83 int *band_id;
84 int *zero;
86 /* scc detection */
87 int index;
88 int min_index;
89 int on_stack;
92 static int node_has_dim(const void *entry, const void *val)
94 struct isl_sched_node *node = (struct isl_sched_node *)entry;
95 isl_space *dim = (isl_space *)val;
97 return isl_space_is_equal(node->dim, dim);
100 /* An edge in the dependence graph. An edge may be used to
101 * ensure validity of the generated schedule, to minimize the dependence
102 * distance or both
104 * map is the dependence relation
105 * src is the source node
106 * dst is the sink node
107 * validity is set if the edge is used to ensure correctness
108 * proximity is set if the edge is used to minimize dependence distances
110 * For validity edges, start and end mark the sequence of inequality
111 * constraints in the LP problem that encode the validity constraint
112 * corresponding to this edge.
114 struct isl_sched_edge {
115 isl_map *map;
117 struct isl_sched_node *src;
118 struct isl_sched_node *dst;
120 int validity;
121 int proximity;
123 int start;
124 int end;
127 /* Internal information about the dependence graph used during
128 * the construction of the schedule.
130 * intra_hmap is a cache, mapping dependence relations to their dual,
131 * for dependences from a node to itself
132 * inter_hmap is a cache, mapping dependence relations to their dual,
133 * for dependences between distinct nodes
135 * n is the number of nodes
136 * node is the list of nodes
137 * maxvar is the maximal number of variables over all nodes
138 * n_row is the current (maximal) number of linearly independent
139 * rows in the node schedules
140 * n_total_row is the current number of rows in the node schedules
141 * n_band is the current number of completed bands
142 * band_start is the starting row in the node schedules of the current band
143 * root is set if this graph is the original dependence graph,
144 * without any splitting
146 * sorted contains a list of node indices sorted according to the
147 * SCC to which a node belongs
149 * n_edge is the number of edges
150 * edge is the list of edges
151 * edge_table contains pointers into the edge array, hashed on the source
152 * and sink spaces; the table only contains edges that represent
153 * validity constraints (and that may or may not also represent proximity
154 * constraints)
156 * node_table contains pointers into the node array, hashed on the space
158 * region contains a list of variable sequences that should be non-trivial
160 * lp contains the (I)LP problem used to obtain new schedule rows
162 * src_scc and dst_scc are the source and sink SCCs of an edge with
163 * conflicting constraints
165 * scc, sp, index and stack are used during the detection of SCCs
166 * scc is the number of the next SCC
167 * stack contains the nodes on the path from the root to the current node
168 * sp is the stack pointer
169 * index is the index of the last node visited
171 struct isl_sched_graph {
172 isl_hmap_map_basic_set *intra_hmap;
173 isl_hmap_map_basic_set *inter_hmap;
175 struct isl_sched_node *node;
176 int n;
177 int maxvar;
178 int n_row;
180 int *sorted;
182 int n_band;
183 int n_total_row;
184 int band_start;
186 int root;
188 struct isl_sched_edge *edge;
189 int n_edge;
190 struct isl_hash_table *edge_table;
192 struct isl_hash_table *node_table;
193 struct isl_region *region;
195 isl_basic_set *lp;
197 int src_scc;
198 int dst_scc;
200 /* scc detection */
201 int scc;
202 int sp;
203 int index;
204 int *stack;
207 /* Initialize node_table based on the list of nodes.
209 static int graph_init_table(isl_ctx *ctx, struct isl_sched_graph *graph)
211 int i;
213 graph->node_table = isl_hash_table_alloc(ctx, graph->n);
214 if (!graph->node_table)
215 return -1;
217 for (i = 0; i < graph->n; ++i) {
218 struct isl_hash_table_entry *entry;
219 uint32_t hash;
221 hash = isl_space_get_hash(graph->node[i].dim);
222 entry = isl_hash_table_find(ctx, graph->node_table, hash,
223 &node_has_dim,
224 graph->node[i].dim, 1);
225 if (!entry)
226 return -1;
227 entry->data = &graph->node[i];
230 return 0;
233 /* Return a pointer to the node that lives within the given space,
234 * or NULL if there is no such node.
236 static struct isl_sched_node *graph_find_node(isl_ctx *ctx,
237 struct isl_sched_graph *graph, __isl_keep isl_space *dim)
239 struct isl_hash_table_entry *entry;
240 uint32_t hash;
242 hash = isl_space_get_hash(dim);
243 entry = isl_hash_table_find(ctx, graph->node_table, hash,
244 &node_has_dim, dim, 0);
246 return entry ? entry->data : NULL;
249 static int edge_has_src_and_dst(const void *entry, const void *val)
251 const struct isl_sched_edge *edge = entry;
252 const struct isl_sched_edge *temp = val;
254 return edge->src == temp->src && edge->dst == temp->dst;
257 /* Initialize edge_table based on the list of edges.
258 * Only edges with validity set are added to the table.
260 static int graph_init_edge_table(isl_ctx *ctx, struct isl_sched_graph *graph)
262 int i;
264 graph->edge_table = isl_hash_table_alloc(ctx, graph->n_edge);
265 if (!graph->edge_table)
266 return -1;
268 for (i = 0; i < graph->n_edge; ++i) {
269 struct isl_hash_table_entry *entry;
270 uint32_t hash;
272 if (!graph->edge[i].validity)
273 continue;
275 hash = isl_hash_init();
276 hash = isl_hash_builtin(hash, graph->edge[i].src);
277 hash = isl_hash_builtin(hash, graph->edge[i].dst);
278 entry = isl_hash_table_find(ctx, graph->edge_table, hash,
279 &edge_has_src_and_dst,
280 &graph->edge[i], 1);
281 if (!entry)
282 return -1;
283 entry->data = &graph->edge[i];
286 return 0;
289 /* Check whether the dependence graph has a (validity) edge
290 * between the given two nodes.
292 static int graph_has_edge(struct isl_sched_graph *graph,
293 struct isl_sched_node *src, struct isl_sched_node *dst)
295 isl_ctx *ctx = isl_space_get_ctx(src->dim);
296 struct isl_hash_table_entry *entry;
297 uint32_t hash;
298 struct isl_sched_edge temp = { .src = src, .dst = dst };
299 struct isl_sched_edge *edge;
300 int empty;
302 hash = isl_hash_init();
303 hash = isl_hash_builtin(hash, temp.src);
304 hash = isl_hash_builtin(hash, temp.dst);
305 entry = isl_hash_table_find(ctx, graph->edge_table, hash,
306 &edge_has_src_and_dst, &temp, 0);
307 if (!entry)
308 return 0;
310 edge = entry->data;
311 empty = isl_map_plain_is_empty(edge->map);
312 if (empty < 0)
313 return -1;
315 return !empty;
318 static int graph_alloc(isl_ctx *ctx, struct isl_sched_graph *graph,
319 int n_node, int n_edge)
321 int i;
323 graph->n = n_node;
324 graph->n_edge = n_edge;
325 graph->node = isl_calloc_array(ctx, struct isl_sched_node, graph->n);
326 graph->sorted = isl_calloc_array(ctx, int, graph->n);
327 graph->region = isl_alloc_array(ctx, struct isl_region, graph->n);
328 graph->stack = isl_alloc_array(ctx, int, graph->n);
329 graph->edge = isl_calloc_array(ctx,
330 struct isl_sched_edge, graph->n_edge);
332 graph->intra_hmap = isl_hmap_map_basic_set_alloc(ctx, 2 * n_edge);
333 graph->inter_hmap = isl_hmap_map_basic_set_alloc(ctx, 2 * n_edge);
335 if (!graph->node || !graph->region || !graph->stack || !graph->edge ||
336 !graph->sorted)
337 return -1;
339 for(i = 0; i < graph->n; ++i)
340 graph->sorted[i] = i;
342 return 0;
345 static void graph_free(isl_ctx *ctx, struct isl_sched_graph *graph)
347 int i;
349 isl_hmap_map_basic_set_free(ctx, graph->intra_hmap);
350 isl_hmap_map_basic_set_free(ctx, graph->inter_hmap);
352 for (i = 0; i < graph->n; ++i) {
353 isl_space_free(graph->node[i].dim);
354 isl_mat_free(graph->node[i].sched);
355 isl_map_free(graph->node[i].sched_map);
356 isl_mat_free(graph->node[i].cmap);
357 if (graph->root) {
358 free(graph->node[i].band);
359 free(graph->node[i].band_id);
360 free(graph->node[i].zero);
363 free(graph->node);
364 free(graph->sorted);
365 for (i = 0; i < graph->n_edge; ++i)
366 isl_map_free(graph->edge[i].map);
367 free(graph->edge);
368 free(graph->region);
369 free(graph->stack);
370 isl_hash_table_free(ctx, graph->edge_table);
371 isl_hash_table_free(ctx, graph->node_table);
372 isl_basic_set_free(graph->lp);
375 /* Add a new node to the graph representing the given set.
377 static int extract_node(__isl_take isl_set *set, void *user)
379 int nvar, nparam;
380 isl_ctx *ctx;
381 isl_space *dim;
382 isl_mat *sched;
383 struct isl_sched_graph *graph = user;
384 int *band, *band_id, *zero;
386 ctx = isl_set_get_ctx(set);
387 dim = isl_set_get_space(set);
388 isl_set_free(set);
389 nvar = isl_space_dim(dim, isl_dim_set);
390 nparam = isl_space_dim(dim, isl_dim_param);
391 if (!ctx->opt->schedule_parametric)
392 nparam = 0;
393 sched = isl_mat_alloc(ctx, 0, 1 + nparam + nvar);
394 graph->node[graph->n].dim = dim;
395 graph->node[graph->n].nvar = nvar;
396 graph->node[graph->n].nparam = nparam;
397 graph->node[graph->n].sched = sched;
398 graph->node[graph->n].sched_map = NULL;
399 band = isl_alloc_array(ctx, int, graph->n_edge + nvar);
400 graph->node[graph->n].band = band;
401 band_id = isl_calloc_array(ctx, int, graph->n_edge + nvar);
402 graph->node[graph->n].band_id = band_id;
403 zero = isl_calloc_array(ctx, int, graph->n_edge + nvar);
404 graph->node[graph->n].zero = zero;
405 graph->n++;
407 if (!sched || !band || !band_id || !zero)
408 return -1;
410 return 0;
413 /* Add a new edge to the graph based on the given map.
414 * Edges are first extracted from the validity dependences,
415 * from which the edge_table is constructed.
416 * Afterwards, the proximity dependences are added. If a proximity
417 * dependence relation happens to be identical to one of the
418 * validity dependence relations added before, then we don't create
419 * a new edge, but instead mark the original edge as also representing
420 * a proximity dependence.
422 static int extract_edge(__isl_take isl_map *map, void *user)
424 isl_ctx *ctx = isl_map_get_ctx(map);
425 struct isl_sched_graph *graph = user;
426 struct isl_sched_node *src, *dst;
427 isl_space *dim;
429 dim = isl_space_domain(isl_map_get_space(map));
430 src = graph_find_node(ctx, graph, dim);
431 isl_space_free(dim);
432 dim = isl_space_range(isl_map_get_space(map));
433 dst = graph_find_node(ctx, graph, dim);
434 isl_space_free(dim);
436 if (!src || !dst) {
437 isl_map_free(map);
438 return 0;
441 graph->edge[graph->n_edge].src = src;
442 graph->edge[graph->n_edge].dst = dst;
443 graph->edge[graph->n_edge].map = map;
444 graph->edge[graph->n_edge].validity = !graph->edge_table;
445 graph->edge[graph->n_edge].proximity = !!graph->edge_table;
446 graph->n_edge++;
448 if (graph->edge_table) {
449 uint32_t hash;
450 struct isl_hash_table_entry *entry;
451 struct isl_sched_edge *edge;
452 int is_equal;
454 hash = isl_hash_init();
455 hash = isl_hash_builtin(hash, src);
456 hash = isl_hash_builtin(hash, dst);
457 entry = isl_hash_table_find(ctx, graph->edge_table, hash,
458 &edge_has_src_and_dst,
459 &graph->edge[graph->n_edge - 1], 0);
460 if (!entry)
461 return 0;
462 edge = entry->data;
463 is_equal = isl_map_plain_is_equal(map, edge->map);
464 if (is_equal < 0)
465 return -1;
466 if (!is_equal)
467 return 0;
469 graph->n_edge--;
470 edge->proximity = 1;
471 isl_map_free(map);
474 return 0;
477 /* Check whether there is a validity dependence from src to dst,
478 * forcing dst to follow src.
480 static int node_follows(struct isl_sched_graph *graph,
481 struct isl_sched_node *dst, struct isl_sched_node *src)
483 return graph_has_edge(graph, src, dst);
486 /* Perform Tarjan's algorithm for computing the strongly connected components
487 * in the dependence graph (only validity edges).
488 * If directed is not set, we consider the graph to be undirected and
489 * we effectively compute the (weakly) connected components.
491 static int detect_sccs_tarjan(struct isl_sched_graph *g, int i, int directed)
493 int j;
495 g->node[i].index = g->index;
496 g->node[i].min_index = g->index;
497 g->node[i].on_stack = 1;
498 g->index++;
499 g->stack[g->sp++] = i;
501 for (j = g->n - 1; j >= 0; --j) {
502 int f;
504 if (j == i)
505 continue;
506 if (g->node[j].index >= 0 &&
507 (!g->node[j].on_stack ||
508 g->node[j].index > g->node[i].min_index))
509 continue;
511 f = node_follows(g, &g->node[i], &g->node[j]);
512 if (f < 0)
513 return -1;
514 if (!f && !directed) {
515 f = node_follows(g, &g->node[j], &g->node[i]);
516 if (f < 0)
517 return -1;
519 if (!f)
520 continue;
521 if (g->node[j].index < 0) {
522 detect_sccs_tarjan(g, j, directed);
523 if (g->node[j].min_index < g->node[i].min_index)
524 g->node[i].min_index = g->node[j].min_index;
525 } else if (g->node[j].index < g->node[i].min_index)
526 g->node[i].min_index = g->node[j].index;
529 if (g->node[i].index != g->node[i].min_index)
530 return 0;
532 do {
533 j = g->stack[--g->sp];
534 g->node[j].on_stack = 0;
535 g->node[j].scc = g->scc;
536 } while (j != i);
537 g->scc++;
539 return 0;
542 static int detect_ccs(struct isl_sched_graph *graph, int directed)
544 int i;
546 graph->index = 0;
547 graph->sp = 0;
548 graph->scc = 0;
549 for (i = graph->n - 1; i >= 0; --i)
550 graph->node[i].index = -1;
552 for (i = graph->n - 1; i >= 0; --i) {
553 if (graph->node[i].index >= 0)
554 continue;
555 if (detect_sccs_tarjan(graph, i, directed) < 0)
556 return -1;
559 return 0;
562 /* Apply Tarjan's algorithm to detect the strongly connected components
563 * in the dependence graph.
565 static int detect_sccs(struct isl_sched_graph *graph)
567 return detect_ccs(graph, 1);
570 /* Apply Tarjan's algorithm to detect the (weakly) connected components
571 * in the dependence graph.
573 static int detect_wccs(struct isl_sched_graph *graph)
575 return detect_ccs(graph, 0);
578 static int cmp_scc(const void *a, const void *b, void *data)
580 struct isl_sched_graph *graph = data;
581 const int *i1 = a;
582 const int *i2 = b;
584 return graph->node[*i1].scc - graph->node[*i2].scc;
587 /* Sort the elements of graph->sorted according to the corresponding SCCs.
589 static void sort_sccs(struct isl_sched_graph *graph)
591 isl_quicksort(graph->sorted, graph->n, sizeof(int), &cmp_scc, graph);
594 /* Given a dependence relation R from a node to itself,
595 * construct the set of coefficients of valid constraints for elements
596 * in that dependence relation.
597 * In particular, the result contains tuples of coefficients
598 * c_0, c_n, c_x such that
600 * c_0 + c_n n + c_x y - c_x x >= 0 for each (x,y) in R
602 * or, equivalently,
604 * c_0 + c_n n + c_x d >= 0 for each d in delta R = { y - x | (x,y) in R }
606 * We choose here to compute the dual of delta R.
607 * Alternatively, we could have computed the dual of R, resulting
608 * in a set of tuples c_0, c_n, c_x, c_y, and then
609 * plugged in (c_0, c_n, c_x, -c_x).
611 static __isl_give isl_basic_set *intra_coefficients(
612 struct isl_sched_graph *graph, __isl_take isl_map *map)
614 isl_ctx *ctx = isl_map_get_ctx(map);
615 isl_set *delta;
616 isl_basic_set *coef;
618 if (isl_hmap_map_basic_set_has(ctx, graph->intra_hmap, map))
619 return isl_hmap_map_basic_set_get(ctx, graph->intra_hmap, map);
621 delta = isl_set_remove_divs(isl_map_deltas(isl_map_copy(map)));
622 coef = isl_set_coefficients(delta);
623 isl_hmap_map_basic_set_set(ctx, graph->intra_hmap, map,
624 isl_basic_set_copy(coef));
626 return coef;
629 /* Given a dependence relation R, * construct the set of coefficients
630 * of valid constraints for elements in that dependence relation.
631 * In particular, the result contains tuples of coefficients
632 * c_0, c_n, c_x, c_y such that
634 * c_0 + c_n n + c_x x + c_y y >= 0 for each (x,y) in R
637 static __isl_give isl_basic_set *inter_coefficients(
638 struct isl_sched_graph *graph, __isl_take isl_map *map)
640 isl_ctx *ctx = isl_map_get_ctx(map);
641 isl_set *set;
642 isl_basic_set *coef;
644 if (isl_hmap_map_basic_set_has(ctx, graph->inter_hmap, map))
645 return isl_hmap_map_basic_set_get(ctx, graph->inter_hmap, map);
647 set = isl_map_wrap(isl_map_remove_divs(isl_map_copy(map)));
648 coef = isl_set_coefficients(set);
649 isl_hmap_map_basic_set_set(ctx, graph->inter_hmap, map,
650 isl_basic_set_copy(coef));
652 return coef;
655 /* Add constraints to graph->lp that force validity for the given
656 * dependence from a node i to itself.
657 * That is, add constraints that enforce
659 * (c_i_0 + c_i_n n + c_i_x y) - (c_i_0 + c_i_n n + c_i_x x)
660 * = c_i_x (y - x) >= 0
662 * for each (x,y) in R.
663 * We obtain general constraints on coefficients (c_0, c_n, c_x)
664 * of valid constraints for (y - x) and then plug in (0, 0, c_i_x^+ - c_i_x^-),
665 * where c_i_x = c_i_x^+ - c_i_x^-, with c_i_x^+ and c_i_x^- non-negative.
666 * In graph->lp, the c_i_x^- appear before their c_i_x^+ counterpart.
668 * Actually, we do not construct constraints for the c_i_x themselves,
669 * but for the coefficients of c_i_x written as a linear combination
670 * of the columns in node->cmap.
672 static int add_intra_validity_constraints(struct isl_sched_graph *graph,
673 struct isl_sched_edge *edge)
675 unsigned total;
676 isl_map *map = isl_map_copy(edge->map);
677 isl_ctx *ctx = isl_map_get_ctx(map);
678 isl_space *dim;
679 isl_dim_map *dim_map;
680 isl_basic_set *coef;
681 struct isl_sched_node *node = edge->src;
683 coef = intra_coefficients(graph, map);
685 dim = isl_space_domain(isl_space_unwrap(isl_basic_set_get_space(coef)));
687 coef = isl_basic_set_transform_dims(coef, isl_dim_set,
688 isl_space_dim(dim, isl_dim_set), isl_mat_copy(node->cmap));
690 total = isl_basic_set_total_dim(graph->lp);
691 dim_map = isl_dim_map_alloc(ctx, total);
692 isl_dim_map_range(dim_map, node->start + 2 * node->nparam + 1, 2,
693 isl_space_dim(dim, isl_dim_set), 1,
694 node->nvar, -1);
695 isl_dim_map_range(dim_map, node->start + 2 * node->nparam + 2, 2,
696 isl_space_dim(dim, isl_dim_set), 1,
697 node->nvar, 1);
698 graph->lp = isl_basic_set_extend_constraints(graph->lp,
699 coef->n_eq, coef->n_ineq);
700 graph->lp = isl_basic_set_add_constraints_dim_map(graph->lp,
701 coef, dim_map);
702 isl_space_free(dim);
704 return 0;
707 /* Add constraints to graph->lp that force validity for the given
708 * dependence from node i to node j.
709 * That is, add constraints that enforce
711 * (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) >= 0
713 * for each (x,y) in R.
714 * We obtain general constraints on coefficients (c_0, c_n, c_x, c_y)
715 * of valid constraints for R and then plug in
716 * (c_j_0 - c_i_0, c_j_n^+ - c_j_n^- - (c_i_n^+ - c_i_n^-),
717 * c_j_x^+ - c_j_x^- - (c_i_x^+ - c_i_x^-)),
718 * where c_* = c_*^+ - c_*^-, with c_*^+ and c_*^- non-negative.
719 * In graph->lp, the c_*^- appear before their c_*^+ counterpart.
721 * Actually, we do not construct constraints for the c_*_x themselves,
722 * but for the coefficients of c_*_x written as a linear combination
723 * of the columns in node->cmap.
725 static int add_inter_validity_constraints(struct isl_sched_graph *graph,
726 struct isl_sched_edge *edge)
728 unsigned total;
729 isl_map *map = isl_map_copy(edge->map);
730 isl_ctx *ctx = isl_map_get_ctx(map);
731 isl_space *dim;
732 isl_dim_map *dim_map;
733 isl_basic_set *coef;
734 struct isl_sched_node *src = edge->src;
735 struct isl_sched_node *dst = edge->dst;
737 coef = inter_coefficients(graph, map);
739 dim = isl_space_domain(isl_space_unwrap(isl_basic_set_get_space(coef)));
741 coef = isl_basic_set_transform_dims(coef, isl_dim_set,
742 isl_space_dim(dim, isl_dim_set), isl_mat_copy(src->cmap));
743 coef = isl_basic_set_transform_dims(coef, isl_dim_set,
744 isl_space_dim(dim, isl_dim_set) + src->nvar,
745 isl_mat_copy(dst->cmap));
747 total = isl_basic_set_total_dim(graph->lp);
748 dim_map = isl_dim_map_alloc(ctx, total);
750 isl_dim_map_range(dim_map, dst->start, 0, 0, 0, 1, 1);
751 isl_dim_map_range(dim_map, dst->start + 1, 2, 1, 1, dst->nparam, -1);
752 isl_dim_map_range(dim_map, dst->start + 2, 2, 1, 1, dst->nparam, 1);
753 isl_dim_map_range(dim_map, dst->start + 2 * dst->nparam + 1, 2,
754 isl_space_dim(dim, isl_dim_set) + src->nvar, 1,
755 dst->nvar, -1);
756 isl_dim_map_range(dim_map, dst->start + 2 * dst->nparam + 2, 2,
757 isl_space_dim(dim, isl_dim_set) + src->nvar, 1,
758 dst->nvar, 1);
760 isl_dim_map_range(dim_map, src->start, 0, 0, 0, 1, -1);
761 isl_dim_map_range(dim_map, src->start + 1, 2, 1, 1, src->nparam, 1);
762 isl_dim_map_range(dim_map, src->start + 2, 2, 1, 1, src->nparam, -1);
763 isl_dim_map_range(dim_map, src->start + 2 * src->nparam + 1, 2,
764 isl_space_dim(dim, isl_dim_set), 1,
765 src->nvar, 1);
766 isl_dim_map_range(dim_map, src->start + 2 * src->nparam + 2, 2,
767 isl_space_dim(dim, isl_dim_set), 1,
768 src->nvar, -1);
770 edge->start = graph->lp->n_ineq;
771 graph->lp = isl_basic_set_extend_constraints(graph->lp,
772 coef->n_eq, coef->n_ineq);
773 graph->lp = isl_basic_set_add_constraints_dim_map(graph->lp,
774 coef, dim_map);
775 isl_space_free(dim);
776 edge->end = graph->lp->n_ineq;
778 return 0;
781 /* Add constraints to graph->lp that bound the dependence distance for the given
782 * dependence from a node i to itself.
783 * If s = 1, we add the constraint
785 * c_i_x (y - x) <= m_0 + m_n n
787 * or
789 * -c_i_x (y - x) + m_0 + m_n n >= 0
791 * for each (x,y) in R.
792 * If s = -1, we add the constraint
794 * -c_i_x (y - x) <= m_0 + m_n n
796 * or
798 * c_i_x (y - x) + m_0 + m_n n >= 0
800 * for each (x,y) in R.
801 * We obtain general constraints on coefficients (c_0, c_n, c_x)
802 * of valid constraints for (y - x) and then plug in (m_0, m_n, -s * c_i_x),
803 * with each coefficient (except m_0) represented as a pair of non-negative
804 * coefficients.
806 * Actually, we do not construct constraints for the c_i_x themselves,
807 * but for the coefficients of c_i_x written as a linear combination
808 * of the columns in node->cmap.
810 static int add_intra_proximity_constraints(struct isl_sched_graph *graph,
811 struct isl_sched_edge *edge, int s)
813 unsigned total;
814 unsigned nparam;
815 isl_map *map = isl_map_copy(edge->map);
816 isl_ctx *ctx = isl_map_get_ctx(map);
817 isl_space *dim;
818 isl_dim_map *dim_map;
819 isl_basic_set *coef;
820 struct isl_sched_node *node = edge->src;
822 coef = intra_coefficients(graph, map);
824 dim = isl_space_domain(isl_space_unwrap(isl_basic_set_get_space(coef)));
826 coef = isl_basic_set_transform_dims(coef, isl_dim_set,
827 isl_space_dim(dim, isl_dim_set), isl_mat_copy(node->cmap));
829 nparam = isl_space_dim(node->dim, isl_dim_param);
830 total = isl_basic_set_total_dim(graph->lp);
831 dim_map = isl_dim_map_alloc(ctx, total);
832 isl_dim_map_range(dim_map, 1, 0, 0, 0, 1, 1);
833 isl_dim_map_range(dim_map, 4, 2, 1, 1, nparam, -1);
834 isl_dim_map_range(dim_map, 5, 2, 1, 1, nparam, 1);
835 isl_dim_map_range(dim_map, node->start + 2 * node->nparam + 1, 2,
836 isl_space_dim(dim, isl_dim_set), 1,
837 node->nvar, s);
838 isl_dim_map_range(dim_map, node->start + 2 * node->nparam + 2, 2,
839 isl_space_dim(dim, isl_dim_set), 1,
840 node->nvar, -s);
841 graph->lp = isl_basic_set_extend_constraints(graph->lp,
842 coef->n_eq, coef->n_ineq);
843 graph->lp = isl_basic_set_add_constraints_dim_map(graph->lp,
844 coef, dim_map);
845 isl_space_free(dim);
847 return 0;
850 /* Add constraints to graph->lp that bound the dependence distance for the given
851 * dependence from node i to node j.
852 * If s = 1, we add the constraint
854 * (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x)
855 * <= m_0 + m_n n
857 * or
859 * -(c_j_0 + c_j_n n + c_j_x y) + (c_i_0 + c_i_n n + c_i_x x) +
860 * m_0 + m_n n >= 0
862 * for each (x,y) in R.
863 * If s = -1, we add the constraint
865 * -((c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x))
866 * <= m_0 + m_n n
868 * or
870 * (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) +
871 * m_0 + m_n n >= 0
873 * for each (x,y) in R.
874 * We obtain general constraints on coefficients (c_0, c_n, c_x, c_y)
875 * of valid constraints for R and then plug in
876 * (m_0 - s*c_j_0 + s*c_i_0, m_n - s*c_j_n + s*c_i_n,
877 * -s*c_j_x+s*c_i_x)
878 * with each coefficient (except m_0, c_j_0 and c_i_0)
879 * represented as a pair of non-negative coefficients.
881 * Actually, we do not construct constraints for the c_*_x themselves,
882 * but for the coefficients of c_*_x written as a linear combination
883 * of the columns in node->cmap.
885 static int add_inter_proximity_constraints(struct isl_sched_graph *graph,
886 struct isl_sched_edge *edge, int s)
888 unsigned total;
889 unsigned nparam;
890 isl_map *map = isl_map_copy(edge->map);
891 isl_ctx *ctx = isl_map_get_ctx(map);
892 isl_space *dim;
893 isl_dim_map *dim_map;
894 isl_basic_set *coef;
895 struct isl_sched_node *src = edge->src;
896 struct isl_sched_node *dst = edge->dst;
898 coef = inter_coefficients(graph, map);
900 dim = isl_space_domain(isl_space_unwrap(isl_basic_set_get_space(coef)));
902 coef = isl_basic_set_transform_dims(coef, isl_dim_set,
903 isl_space_dim(dim, isl_dim_set), isl_mat_copy(src->cmap));
904 coef = isl_basic_set_transform_dims(coef, isl_dim_set,
905 isl_space_dim(dim, isl_dim_set) + src->nvar,
906 isl_mat_copy(dst->cmap));
908 nparam = isl_space_dim(src->dim, isl_dim_param);
909 total = isl_basic_set_total_dim(graph->lp);
910 dim_map = isl_dim_map_alloc(ctx, total);
912 isl_dim_map_range(dim_map, 1, 0, 0, 0, 1, 1);
913 isl_dim_map_range(dim_map, 4, 2, 1, 1, nparam, -1);
914 isl_dim_map_range(dim_map, 5, 2, 1, 1, nparam, 1);
916 isl_dim_map_range(dim_map, dst->start, 0, 0, 0, 1, -s);
917 isl_dim_map_range(dim_map, dst->start + 1, 2, 1, 1, dst->nparam, s);
918 isl_dim_map_range(dim_map, dst->start + 2, 2, 1, 1, dst->nparam, -s);
919 isl_dim_map_range(dim_map, dst->start + 2 * dst->nparam + 1, 2,
920 isl_space_dim(dim, isl_dim_set) + src->nvar, 1,
921 dst->nvar, s);
922 isl_dim_map_range(dim_map, dst->start + 2 * dst->nparam + 2, 2,
923 isl_space_dim(dim, isl_dim_set) + src->nvar, 1,
924 dst->nvar, -s);
926 isl_dim_map_range(dim_map, src->start, 0, 0, 0, 1, s);
927 isl_dim_map_range(dim_map, src->start + 1, 2, 1, 1, src->nparam, -s);
928 isl_dim_map_range(dim_map, src->start + 2, 2, 1, 1, src->nparam, s);
929 isl_dim_map_range(dim_map, src->start + 2 * src->nparam + 1, 2,
930 isl_space_dim(dim, isl_dim_set), 1,
931 src->nvar, -s);
932 isl_dim_map_range(dim_map, src->start + 2 * src->nparam + 2, 2,
933 isl_space_dim(dim, isl_dim_set), 1,
934 src->nvar, s);
936 graph->lp = isl_basic_set_extend_constraints(graph->lp,
937 coef->n_eq, coef->n_ineq);
938 graph->lp = isl_basic_set_add_constraints_dim_map(graph->lp,
939 coef, dim_map);
940 isl_space_free(dim);
942 return 0;
945 static int add_all_validity_constraints(struct isl_sched_graph *graph)
947 int i;
949 for (i = 0; i < graph->n_edge; ++i) {
950 struct isl_sched_edge *edge= &graph->edge[i];
951 if (!edge->validity)
952 continue;
953 if (edge->src != edge->dst)
954 continue;
955 if (add_intra_validity_constraints(graph, edge) < 0)
956 return -1;
959 for (i = 0; i < graph->n_edge; ++i) {
960 struct isl_sched_edge *edge = &graph->edge[i];
961 if (!edge->validity)
962 continue;
963 if (edge->src == edge->dst)
964 continue;
965 if (add_inter_validity_constraints(graph, edge) < 0)
966 return -1;
969 return 0;
972 /* Add constraints to graph->lp that bound the dependence distance
973 * for all dependence relations.
974 * If a given proximity dependence is identical to a validity
975 * dependence, then the dependence distance is already bounded
976 * from below (by zero), so we only need to bound the distance
977 * from above.
978 * Otherwise, we need to bound the distance both from above and from below.
980 static int add_all_proximity_constraints(struct isl_sched_graph *graph)
982 int i;
984 for (i = 0; i < graph->n_edge; ++i) {
985 struct isl_sched_edge *edge= &graph->edge[i];
986 if (!edge->proximity)
987 continue;
988 if (edge->src == edge->dst &&
989 add_intra_proximity_constraints(graph, edge, 1) < 0)
990 return -1;
991 if (edge->src != edge->dst &&
992 add_inter_proximity_constraints(graph, edge, 1) < 0)
993 return -1;
994 if (edge->validity)
995 continue;
996 if (edge->src == edge->dst &&
997 add_intra_proximity_constraints(graph, edge, -1) < 0)
998 return -1;
999 if (edge->src != edge->dst &&
1000 add_inter_proximity_constraints(graph, edge, -1) < 0)
1001 return -1;
1004 return 0;
1007 /* Compute a basis for the rows in the linear part of the schedule
1008 * and extend this basis to a full basis. The remaining rows
1009 * can then be used to force linear independence from the rows
1010 * in the schedule.
1012 * In particular, given the schedule rows S, we compute
1014 * S = H Q
1016 * with H the Hermite normal form of S. That is, all but the
1017 * first rank columns of Q are zero and so each row in S is
1018 * a linear combination of the first rank rows of Q.
1019 * The matrix Q is then transposed because we will write the
1020 * coefficients of the next schedule row as a column vector s
1021 * and express this s as a linear combination s = Q c of the
1022 * computed basis.
1024 static int node_update_cmap(struct isl_sched_node *node)
1026 isl_mat *H, *Q;
1027 int n_row = isl_mat_rows(node->sched);
1029 H = isl_mat_sub_alloc(node->sched, 0, n_row,
1030 1 + node->nparam, node->nvar);
1032 H = isl_mat_left_hermite(H, 0, NULL, &Q);
1033 isl_mat_free(node->cmap);
1034 node->cmap = isl_mat_transpose(Q);
1035 node->rank = isl_mat_initial_non_zero_cols(H);
1036 isl_mat_free(H);
1038 if (!node->cmap || node->rank < 0)
1039 return -1;
1040 return 0;
1043 /* Count the number of equality and inequality constraints
1044 * that will be added for the given map.
1045 * If once is set, then we count
1046 * each edge exactly once. Otherwise, we count as follows
1047 * validity -> 1 (>= 0)
1048 * validity+proximity -> 2 (>= 0 and upper bound)
1049 * proximity -> 2 (lower and upper bound)
1051 static int count_map_constraints(struct isl_sched_graph *graph,
1052 struct isl_sched_edge *edge, __isl_take isl_map *map,
1053 int *n_eq, int *n_ineq, int once)
1055 isl_basic_set *coef;
1056 int f = once ? 1 : edge->proximity ? 2 : 1;
1058 if (edge->src == edge->dst)
1059 coef = intra_coefficients(graph, map);
1060 else
1061 coef = inter_coefficients(graph, map);
1062 if (!coef)
1063 return -1;
1064 *n_eq += f * coef->n_eq;
1065 *n_ineq += f * coef->n_ineq;
1066 isl_basic_set_free(coef);
1068 return 0;
1071 /* Count the number of equality and inequality constraints
1072 * that will be added to the main lp problem.
1073 * If once is set, then we count
1074 * each edge exactly once. Otherwise, we count as follows
1075 * validity -> 1 (>= 0)
1076 * validity+proximity -> 2 (>= 0 and upper bound)
1077 * proximity -> 2 (lower and upper bound)
1079 static int count_constraints(struct isl_sched_graph *graph,
1080 int *n_eq, int *n_ineq, int once)
1082 int i;
1084 *n_eq = *n_ineq = 0;
1085 for (i = 0; i < graph->n_edge; ++i) {
1086 struct isl_sched_edge *edge= &graph->edge[i];
1087 isl_map *map = isl_map_copy(edge->map);
1089 if (count_map_constraints(graph, edge, map,
1090 n_eq, n_ineq, once) < 0)
1091 return -1;
1094 return 0;
1097 /* Add constraints that bound the values of the variable and parameter
1098 * coefficients of the schedule.
1100 * The maximal value of the coefficients is defined by the option
1101 * 'schedule_max_coefficient'.
1103 static int add_bound_coefficient_constraints(isl_ctx *ctx,
1104 struct isl_sched_graph *graph)
1106 int i, j, k;
1107 int max_coefficient;
1108 int total;
1110 max_coefficient = ctx->opt->schedule_max_coefficient;
1112 if (max_coefficient == -1)
1113 return 0;
1115 total = isl_basic_set_total_dim(graph->lp);
1117 for (i = 0; i < graph->n; ++i) {
1118 struct isl_sched_node *node = &graph->node[i];
1119 for (j = 0; j < 2 * node->nparam + 2 * node->nvar; ++j) {
1120 int dim;
1121 k = isl_basic_set_alloc_inequality(graph->lp);
1122 if (k < 0)
1123 return -1;
1124 dim = 1 + node->start + 1 + j;
1125 isl_seq_clr(graph->lp->ineq[k], 1 + total);
1126 isl_int_set_si(graph->lp->ineq[k][dim], -1);
1127 isl_int_set_si(graph->lp->ineq[k][0], max_coefficient);
1131 return 0;
1134 /* Construct an ILP problem for finding schedule coefficients
1135 * that result in non-negative, but small dependence distances
1136 * over all dependences.
1137 * In particular, the dependence distances over proximity edges
1138 * are bounded by m_0 + m_n n and we compute schedule coefficients
1139 * with small values (preferably zero) of m_n and m_0.
1141 * All variables of the ILP are non-negative. The actual coefficients
1142 * may be negative, so each coefficient is represented as the difference
1143 * of two non-negative variables. The negative part always appears
1144 * immediately before the positive part.
1145 * Other than that, the variables have the following order
1147 * - sum of positive and negative parts of m_n coefficients
1148 * - m_0
1149 * - sum of positive and negative parts of all c_n coefficients
1150 * (unconstrained when computing non-parametric schedules)
1151 * - sum of positive and negative parts of all c_x coefficients
1152 * - positive and negative parts of m_n coefficients
1153 * - for each node
1154 * - c_i_0
1155 * - positive and negative parts of c_i_n (if parametric)
1156 * - positive and negative parts of c_i_x
1158 * The c_i_x are not represented directly, but through the columns of
1159 * node->cmap. That is, the computed values are for variable t_i_x
1160 * such that c_i_x = Q t_i_x with Q equal to node->cmap.
1162 * The constraints are those from the edges plus two or three equalities
1163 * to express the sums.
1165 * If force_zero is set, then we add equalities to ensure that
1166 * the sum of the m_n coefficients and m_0 are both zero.
1168 static int setup_lp(isl_ctx *ctx, struct isl_sched_graph *graph,
1169 int force_zero)
1171 int i, j;
1172 int k;
1173 unsigned nparam;
1174 unsigned total;
1175 isl_space *dim;
1176 int parametric;
1177 int param_pos;
1178 int n_eq, n_ineq;
1179 int max_constant_term;
1180 int max_coefficient;
1182 max_constant_term = ctx->opt->schedule_max_constant_term;
1183 max_coefficient = ctx->opt->schedule_max_coefficient;
1185 parametric = ctx->opt->schedule_parametric;
1186 nparam = isl_space_dim(graph->node[0].dim, isl_dim_param);
1187 param_pos = 4;
1188 total = param_pos + 2 * nparam;
1189 for (i = 0; i < graph->n; ++i) {
1190 struct isl_sched_node *node = &graph->node[graph->sorted[i]];
1191 if (node_update_cmap(node) < 0)
1192 return -1;
1193 node->start = total;
1194 total += 1 + 2 * (node->nparam + node->nvar);
1197 if (count_constraints(graph, &n_eq, &n_ineq, 0) < 0)
1198 return -1;
1200 dim = isl_space_set_alloc(ctx, 0, total);
1201 isl_basic_set_free(graph->lp);
1202 n_eq += 2 + parametric + force_zero;
1203 if (max_constant_term != -1)
1204 n_ineq += graph->n;
1205 if (max_coefficient != -1)
1206 for (i = 0; i < graph->n; ++i)
1207 n_ineq += 2 * graph->node[i].nparam +
1208 2 * graph->node[i].nvar;
1210 graph->lp = isl_basic_set_alloc_space(dim, 0, n_eq, n_ineq);
1212 k = isl_basic_set_alloc_equality(graph->lp);
1213 if (k < 0)
1214 return -1;
1215 isl_seq_clr(graph->lp->eq[k], 1 + total);
1216 if (!force_zero)
1217 isl_int_set_si(graph->lp->eq[k][1], -1);
1218 for (i = 0; i < 2 * nparam; ++i)
1219 isl_int_set_si(graph->lp->eq[k][1 + param_pos + i], 1);
1221 if (force_zero) {
1222 k = isl_basic_set_alloc_equality(graph->lp);
1223 if (k < 0)
1224 return -1;
1225 isl_seq_clr(graph->lp->eq[k], 1 + total);
1226 isl_int_set_si(graph->lp->eq[k][2], -1);
1229 if (parametric) {
1230 k = isl_basic_set_alloc_equality(graph->lp);
1231 if (k < 0)
1232 return -1;
1233 isl_seq_clr(graph->lp->eq[k], 1 + total);
1234 isl_int_set_si(graph->lp->eq[k][3], -1);
1235 for (i = 0; i < graph->n; ++i) {
1236 int pos = 1 + graph->node[i].start + 1;
1238 for (j = 0; j < 2 * graph->node[i].nparam; ++j)
1239 isl_int_set_si(graph->lp->eq[k][pos + j], 1);
1243 k = isl_basic_set_alloc_equality(graph->lp);
1244 if (k < 0)
1245 return -1;
1246 isl_seq_clr(graph->lp->eq[k], 1 + total);
1247 isl_int_set_si(graph->lp->eq[k][4], -1);
1248 for (i = 0; i < graph->n; ++i) {
1249 struct isl_sched_node *node = &graph->node[i];
1250 int pos = 1 + node->start + 1 + 2 * node->nparam;
1252 for (j = 0; j < 2 * node->nvar; ++j)
1253 isl_int_set_si(graph->lp->eq[k][pos + j], 1);
1256 if (max_constant_term != -1)
1257 for (i = 0; i < graph->n; ++i) {
1258 struct isl_sched_node *node = &graph->node[i];
1259 k = isl_basic_set_alloc_inequality(graph->lp);
1260 if (k < 0)
1261 return -1;
1262 isl_seq_clr(graph->lp->ineq[k], 1 + total);
1263 isl_int_set_si(graph->lp->ineq[k][1 + node->start], -1);
1264 isl_int_set_si(graph->lp->ineq[k][0], max_constant_term);
1267 if (add_bound_coefficient_constraints(ctx, graph) < 0)
1268 return -1;
1269 if (add_all_validity_constraints(graph) < 0)
1270 return -1;
1271 if (add_all_proximity_constraints(graph) < 0)
1272 return -1;
1274 return 0;
1277 /* Analyze the conflicting constraint found by
1278 * isl_tab_basic_set_non_trivial_lexmin. If it corresponds to the validity
1279 * constraint of one of the edges between distinct nodes, living, moreover
1280 * in distinct SCCs, then record the source and sink SCC as this may
1281 * be a good place to cut between SCCs.
1283 static int check_conflict(int con, void *user)
1285 int i;
1286 struct isl_sched_graph *graph = user;
1288 if (graph->src_scc >= 0)
1289 return 0;
1291 con -= graph->lp->n_eq;
1293 if (con >= graph->lp->n_ineq)
1294 return 0;
1296 for (i = 0; i < graph->n_edge; ++i) {
1297 if (!graph->edge[i].validity)
1298 continue;
1299 if (graph->edge[i].src == graph->edge[i].dst)
1300 continue;
1301 if (graph->edge[i].src->scc == graph->edge[i].dst->scc)
1302 continue;
1303 if (graph->edge[i].start > con)
1304 continue;
1305 if (graph->edge[i].end <= con)
1306 continue;
1307 graph->src_scc = graph->edge[i].src->scc;
1308 graph->dst_scc = graph->edge[i].dst->scc;
1311 return 0;
1314 /* Check whether the next schedule row of the given node needs to be
1315 * non-trivial. Lower-dimensional domains may have some trivial rows,
1316 * but as soon as the number of remaining required non-trivial rows
1317 * is as large as the number or remaining rows to be computed,
1318 * all remaining rows need to be non-trivial.
1320 static int needs_row(struct isl_sched_graph *graph, struct isl_sched_node *node)
1322 return node->nvar - node->rank >= graph->maxvar - graph->n_row;
1325 /* Solve the ILP problem constructed in setup_lp.
1326 * For each node such that all the remaining rows of its schedule
1327 * need to be non-trivial, we construct a non-triviality region.
1328 * This region imposes that the next row is independent of previous rows.
1329 * In particular the coefficients c_i_x are represented by t_i_x
1330 * variables with c_i_x = Q t_i_x and Q a unimodular matrix such that
1331 * its first columns span the rows of the previously computed part
1332 * of the schedule. The non-triviality region enforces that at least
1333 * one of the remaining components of t_i_x is non-zero, i.e.,
1334 * that the new schedule row depends on at least one of the remaining
1335 * columns of Q.
1337 static __isl_give isl_vec *solve_lp(struct isl_sched_graph *graph)
1339 int i;
1340 isl_vec *sol;
1341 isl_basic_set *lp;
1343 for (i = 0; i < graph->n; ++i) {
1344 struct isl_sched_node *node = &graph->node[i];
1345 int skip = node->rank;
1346 graph->region[i].pos = node->start + 1 + 2*(node->nparam+skip);
1347 if (needs_row(graph, node))
1348 graph->region[i].len = 2 * (node->nvar - skip);
1349 else
1350 graph->region[i].len = 0;
1352 lp = isl_basic_set_copy(graph->lp);
1353 sol = isl_tab_basic_set_non_trivial_lexmin(lp, 2, graph->n,
1354 graph->region, &check_conflict, graph);
1355 return sol;
1358 /* Update the schedules of all nodes based on the given solution
1359 * of the LP problem.
1360 * The new row is added to the current band.
1361 * All possibly negative coefficients are encoded as a difference
1362 * of two non-negative variables, so we need to perform the subtraction
1363 * here. Moreover, if use_cmap is set, then the solution does
1364 * not refer to the actual coefficients c_i_x, but instead to variables
1365 * t_i_x such that c_i_x = Q t_i_x and Q is equal to node->cmap.
1366 * In this case, we then also need to perform this multiplication
1367 * to obtain the values of c_i_x.
1369 * If check_zero is set, then the first two coordinates of sol are
1370 * assumed to correspond to the dependence distance. If these two
1371 * coordinates are zero, then the corresponding scheduling dimension
1372 * is marked as being zero distance.
1374 static int update_schedule(struct isl_sched_graph *graph,
1375 __isl_take isl_vec *sol, int use_cmap, int check_zero)
1377 int i, j;
1378 int zero = 0;
1379 isl_vec *csol = NULL;
1381 if (!sol)
1382 goto error;
1383 if (sol->size == 0)
1384 isl_die(sol->ctx, isl_error_internal,
1385 "no solution found", goto error);
1387 if (check_zero)
1388 zero = isl_int_is_zero(sol->el[1]) &&
1389 isl_int_is_zero(sol->el[2]);
1391 for (i = 0; i < graph->n; ++i) {
1392 struct isl_sched_node *node = &graph->node[i];
1393 int pos = node->start;
1394 int row = isl_mat_rows(node->sched);
1396 isl_vec_free(csol);
1397 csol = isl_vec_alloc(sol->ctx, node->nvar);
1398 if (!csol)
1399 goto error;
1401 isl_map_free(node->sched_map);
1402 node->sched_map = NULL;
1403 node->sched = isl_mat_add_rows(node->sched, 1);
1404 if (!node->sched)
1405 goto error;
1406 node->sched = isl_mat_set_element(node->sched, row, 0,
1407 sol->el[1 + pos]);
1408 for (j = 0; j < node->nparam + node->nvar; ++j)
1409 isl_int_sub(sol->el[1 + pos + 1 + 2 * j + 1],
1410 sol->el[1 + pos + 1 + 2 * j + 1],
1411 sol->el[1 + pos + 1 + 2 * j]);
1412 for (j = 0; j < node->nparam; ++j)
1413 node->sched = isl_mat_set_element(node->sched,
1414 row, 1 + j, sol->el[1+pos+1+2*j+1]);
1415 for (j = 0; j < node->nvar; ++j)
1416 isl_int_set(csol->el[j],
1417 sol->el[1+pos+1+2*(node->nparam+j)+1]);
1418 if (use_cmap)
1419 csol = isl_mat_vec_product(isl_mat_copy(node->cmap),
1420 csol);
1421 if (!csol)
1422 goto error;
1423 for (j = 0; j < node->nvar; ++j)
1424 node->sched = isl_mat_set_element(node->sched,
1425 row, 1 + node->nparam + j, csol->el[j]);
1426 node->band[graph->n_total_row] = graph->n_band;
1427 node->zero[graph->n_total_row] = zero;
1429 isl_vec_free(sol);
1430 isl_vec_free(csol);
1432 graph->n_row++;
1433 graph->n_total_row++;
1435 return 0;
1436 error:
1437 isl_vec_free(sol);
1438 isl_vec_free(csol);
1439 return -1;
1442 /* Convert node->sched into a map and return this map.
1443 * We simply add equality constraints that express each output variable
1444 * as the affine combination of parameters and input variables specified
1445 * by the schedule matrix.
1447 * The result is cached in node->sched_map, which needs to be released
1448 * whenever node->sched is updated.
1450 static __isl_give isl_map *node_extract_schedule(struct isl_sched_node *node)
1452 int i, j;
1453 isl_space *dim;
1454 isl_local_space *ls;
1455 isl_basic_map *bmap;
1456 isl_constraint *c;
1457 int nrow, ncol;
1458 isl_int v;
1460 if (node->sched_map)
1461 return isl_map_copy(node->sched_map);
1463 nrow = isl_mat_rows(node->sched);
1464 ncol = isl_mat_cols(node->sched) - 1;
1465 dim = isl_space_from_domain(isl_space_copy(node->dim));
1466 dim = isl_space_add_dims(dim, isl_dim_out, nrow);
1467 bmap = isl_basic_map_universe(isl_space_copy(dim));
1468 ls = isl_local_space_from_space(dim);
1470 isl_int_init(v);
1472 for (i = 0; i < nrow; ++i) {
1473 c = isl_equality_alloc(isl_local_space_copy(ls));
1474 isl_constraint_set_coefficient_si(c, isl_dim_out, i, -1);
1475 isl_mat_get_element(node->sched, i, 0, &v);
1476 isl_constraint_set_constant(c, v);
1477 for (j = 0; j < node->nparam; ++j) {
1478 isl_mat_get_element(node->sched, i, 1 + j, &v);
1479 isl_constraint_set_coefficient(c, isl_dim_param, j, v);
1481 for (j = 0; j < node->nvar; ++j) {
1482 isl_mat_get_element(node->sched,
1483 i, 1 + node->nparam + j, &v);
1484 isl_constraint_set_coefficient(c, isl_dim_in, j, v);
1486 bmap = isl_basic_map_add_constraint(bmap, c);
1489 isl_int_clear(v);
1491 isl_local_space_free(ls);
1493 node->sched_map = isl_map_from_basic_map(bmap);
1494 return isl_map_copy(node->sched_map);
1497 /* Update the given dependence relation based on the current schedule.
1498 * That is, intersect the dependence relation with a map expressing
1499 * that source and sink are executed within the same iteration of
1500 * the current schedule.
1501 * This is not the most efficient way, but this shouldn't be a critical
1502 * operation.
1504 static __isl_give isl_map *specialize(__isl_take isl_map *map,
1505 struct isl_sched_node *src, struct isl_sched_node *dst)
1507 isl_map *src_sched, *dst_sched, *id;
1509 src_sched = node_extract_schedule(src);
1510 dst_sched = node_extract_schedule(dst);
1511 id = isl_map_apply_range(src_sched, isl_map_reverse(dst_sched));
1512 return isl_map_intersect(map, id);
1515 /* Update the dependence relations of all edges based on the current schedule.
1516 * If a dependence is carried completely by the current schedule, then
1517 * it is removed and edge_table is updated accordingly.
1519 static int update_edges(isl_ctx *ctx, struct isl_sched_graph *graph)
1521 int i;
1522 int reset_table = 0;
1524 for (i = graph->n_edge - 1; i >= 0; --i) {
1525 struct isl_sched_edge *edge = &graph->edge[i];
1526 edge->map = specialize(edge->map, edge->src, edge->dst);
1527 if (!edge->map)
1528 return -1;
1530 if (isl_map_plain_is_empty(edge->map)) {
1531 reset_table = 1;
1532 isl_map_free(edge->map);
1533 if (i != graph->n_edge - 1)
1534 graph->edge[i] = graph->edge[graph->n_edge - 1];
1535 graph->n_edge--;
1539 if (reset_table) {
1540 isl_hash_table_free(ctx, graph->edge_table);
1541 graph->edge_table = NULL;
1542 return graph_init_edge_table(ctx, graph);
1545 return 0;
1548 static void next_band(struct isl_sched_graph *graph)
1550 graph->band_start = graph->n_total_row;
1551 graph->n_band++;
1554 /* Topologically sort statements mapped to same schedule iteration
1555 * and add a row to the schedule corresponding to this order.
1557 static int sort_statements(isl_ctx *ctx, struct isl_sched_graph *graph)
1559 int i, j;
1561 if (graph->n <= 1)
1562 return 0;
1564 if (update_edges(ctx, graph) < 0)
1565 return -1;
1567 if (graph->n_edge == 0)
1568 return 0;
1570 if (detect_sccs(graph) < 0)
1571 return -1;
1573 for (i = 0; i < graph->n; ++i) {
1574 struct isl_sched_node *node = &graph->node[i];
1575 int row = isl_mat_rows(node->sched);
1576 int cols = isl_mat_cols(node->sched);
1578 isl_map_free(node->sched_map);
1579 node->sched_map = NULL;
1580 node->sched = isl_mat_add_rows(node->sched, 1);
1581 if (!node->sched)
1582 return -1;
1583 node->sched = isl_mat_set_element_si(node->sched, row, 0,
1584 node->scc);
1585 for (j = 1; j < cols; ++j)
1586 node->sched = isl_mat_set_element_si(node->sched,
1587 row, j, 0);
1588 node->band[graph->n_total_row] = graph->n_band;
1591 graph->n_total_row++;
1592 next_band(graph);
1594 return 0;
1597 /* Construct an isl_schedule based on the computed schedule stored
1598 * in graph and with parameters specified by dim.
1600 static __isl_give isl_schedule *extract_schedule(struct isl_sched_graph *graph,
1601 __isl_take isl_space *dim)
1603 int i;
1604 isl_ctx *ctx;
1605 isl_schedule *sched = NULL;
1607 if (!dim)
1608 return NULL;
1610 ctx = isl_space_get_ctx(dim);
1611 sched = isl_calloc(ctx, struct isl_schedule,
1612 sizeof(struct isl_schedule) +
1613 (graph->n - 1) * sizeof(struct isl_schedule_node));
1614 if (!sched)
1615 goto error;
1617 sched->ref = 1;
1618 sched->n = graph->n;
1619 sched->n_band = graph->n_band;
1620 sched->n_total_row = graph->n_total_row;
1622 for (i = 0; i < sched->n; ++i) {
1623 int r, b;
1624 int *band_end, *band_id, *zero;
1626 band_end = isl_alloc_array(ctx, int, graph->n_band);
1627 band_id = isl_alloc_array(ctx, int, graph->n_band);
1628 zero = isl_alloc_array(ctx, int, graph->n_total_row);
1629 sched->node[i].sched = node_extract_schedule(&graph->node[i]);
1630 sched->node[i].band_end = band_end;
1631 sched->node[i].band_id = band_id;
1632 sched->node[i].zero = zero;
1633 if (!band_end || !band_id || !zero)
1634 goto error;
1636 for (r = 0; r < graph->n_total_row; ++r)
1637 zero[r] = graph->node[i].zero[r];
1638 for (r = b = 0; r < graph->n_total_row; ++r) {
1639 if (graph->node[i].band[r] == b)
1640 continue;
1641 band_end[b++] = r;
1642 if (graph->node[i].band[r] == -1)
1643 break;
1645 if (r == graph->n_total_row)
1646 band_end[b++] = r;
1647 sched->node[i].n_band = b;
1648 for (--b; b >= 0; --b)
1649 band_id[b] = graph->node[i].band_id[b];
1652 sched->dim = dim;
1654 return sched;
1655 error:
1656 isl_space_free(dim);
1657 isl_schedule_free(sched);
1658 return NULL;
1661 /* Copy nodes that satisfy node_pred from the src dependence graph
1662 * to the dst dependence graph.
1664 static int copy_nodes(struct isl_sched_graph *dst, struct isl_sched_graph *src,
1665 int (*node_pred)(struct isl_sched_node *node, int data), int data)
1667 int i;
1669 dst->n = 0;
1670 for (i = 0; i < src->n; ++i) {
1671 if (!node_pred(&src->node[i], data))
1672 continue;
1673 dst->node[dst->n].dim = isl_space_copy(src->node[i].dim);
1674 dst->node[dst->n].nvar = src->node[i].nvar;
1675 dst->node[dst->n].nparam = src->node[i].nparam;
1676 dst->node[dst->n].sched = isl_mat_copy(src->node[i].sched);
1677 dst->node[dst->n].sched_map =
1678 isl_map_copy(src->node[i].sched_map);
1679 dst->node[dst->n].band = src->node[i].band;
1680 dst->node[dst->n].band_id = src->node[i].band_id;
1681 dst->node[dst->n].zero = src->node[i].zero;
1682 dst->n++;
1685 return 0;
1688 /* Copy non-empty edges that satisfy edge_pred from the src dependence graph
1689 * to the dst dependence graph.
1690 * If the source or destination node of the edge is not in the destination
1691 * graph, then it must be a backward proximity edge and it should simply
1692 * be ignored.
1694 static int copy_edges(isl_ctx *ctx, struct isl_sched_graph *dst,
1695 struct isl_sched_graph *src,
1696 int (*edge_pred)(struct isl_sched_edge *edge, int data), int data)
1698 int i;
1700 dst->n_edge = 0;
1701 for (i = 0; i < src->n_edge; ++i) {
1702 struct isl_sched_edge *edge = &src->edge[i];
1703 isl_map *map;
1704 struct isl_sched_node *dst_src, *dst_dst;
1706 if (!edge_pred(edge, data))
1707 continue;
1709 if (isl_map_plain_is_empty(edge->map))
1710 continue;
1712 dst_src = graph_find_node(ctx, dst, edge->src->dim);
1713 dst_dst = graph_find_node(ctx, dst, edge->dst->dim);
1714 if (!dst_src || !dst_dst) {
1715 if (edge->validity)
1716 isl_die(ctx, isl_error_internal,
1717 "backward validity edge", return -1);
1718 continue;
1721 map = isl_map_copy(edge->map);
1723 dst->edge[dst->n_edge].src = dst_src;
1724 dst->edge[dst->n_edge].dst = dst_dst;
1725 dst->edge[dst->n_edge].map = map;
1726 dst->edge[dst->n_edge].validity = edge->validity;
1727 dst->edge[dst->n_edge].proximity = edge->proximity;
1728 dst->n_edge++;
1731 return 0;
1734 /* Given a "src" dependence graph that contains the nodes from "dst"
1735 * that satisfy node_pred, copy the schedule computed in "src"
1736 * for those nodes back to "dst".
1738 static int copy_schedule(struct isl_sched_graph *dst,
1739 struct isl_sched_graph *src,
1740 int (*node_pred)(struct isl_sched_node *node, int data), int data)
1742 int i;
1744 src->n = 0;
1745 for (i = 0; i < dst->n; ++i) {
1746 if (!node_pred(&dst->node[i], data))
1747 continue;
1748 isl_mat_free(dst->node[i].sched);
1749 isl_map_free(dst->node[i].sched_map);
1750 dst->node[i].sched = isl_mat_copy(src->node[src->n].sched);
1751 dst->node[i].sched_map =
1752 isl_map_copy(src->node[src->n].sched_map);
1753 src->n++;
1756 dst->n_total_row = src->n_total_row;
1757 dst->n_band = src->n_band;
1759 return 0;
1762 /* Compute the maximal number of variables over all nodes.
1763 * This is the maximal number of linearly independent schedule
1764 * rows that we need to compute.
1765 * Just in case we end up in a part of the dependence graph
1766 * with only lower-dimensional domains, we make sure we will
1767 * compute the required amount of extra linearly independent rows.
1769 static int compute_maxvar(struct isl_sched_graph *graph)
1771 int i;
1773 graph->maxvar = 0;
1774 for (i = 0; i < graph->n; ++i) {
1775 struct isl_sched_node *node = &graph->node[i];
1776 int nvar;
1778 if (node_update_cmap(node) < 0)
1779 return -1;
1780 nvar = node->nvar + graph->n_row - node->rank;
1781 if (nvar > graph->maxvar)
1782 graph->maxvar = nvar;
1785 return 0;
1788 static int compute_schedule(isl_ctx *ctx, struct isl_sched_graph *graph);
1789 static int compute_schedule_wcc(isl_ctx *ctx, struct isl_sched_graph *graph);
1791 /* Compute a schedule for a subgraph of "graph". In particular, for
1792 * the graph composed of nodes that satisfy node_pred and edges that
1793 * that satisfy edge_pred. The caller should precompute the number
1794 * of nodes and edges that satisfy these predicates and pass them along
1795 * as "n" and "n_edge".
1796 * If the subgraph is known to consist of a single component, then wcc should
1797 * be set and then we call compute_schedule_wcc on the constructed subgraph.
1798 * Otherwise, we call compute_schedule, which will check whether the subgraph
1799 * is connected.
1801 static int compute_sub_schedule(isl_ctx *ctx,
1802 struct isl_sched_graph *graph, int n, int n_edge,
1803 int (*node_pred)(struct isl_sched_node *node, int data),
1804 int (*edge_pred)(struct isl_sched_edge *edge, int data),
1805 int data, int wcc)
1807 struct isl_sched_graph split = { 0 };
1809 if (graph_alloc(ctx, &split, n, n_edge) < 0)
1810 goto error;
1811 if (copy_nodes(&split, graph, node_pred, data) < 0)
1812 goto error;
1813 if (graph_init_table(ctx, &split) < 0)
1814 goto error;
1815 if (copy_edges(ctx, &split, graph, edge_pred, data) < 0)
1816 goto error;
1817 if (graph_init_edge_table(ctx, &split) < 0)
1818 goto error;
1819 split.n_row = graph->n_row;
1820 split.n_total_row = graph->n_total_row;
1821 split.n_band = graph->n_band;
1822 split.band_start = graph->band_start;
1824 if (wcc && compute_schedule_wcc(ctx, &split) < 0)
1825 goto error;
1826 if (!wcc && compute_schedule(ctx, &split) < 0)
1827 goto error;
1829 copy_schedule(graph, &split, node_pred, data);
1831 graph_free(ctx, &split);
1832 return 0;
1833 error:
1834 graph_free(ctx, &split);
1835 return -1;
1838 static int node_scc_exactly(struct isl_sched_node *node, int scc)
1840 return node->scc == scc;
1843 static int node_scc_at_most(struct isl_sched_node *node, int scc)
1845 return node->scc <= scc;
1848 static int node_scc_at_least(struct isl_sched_node *node, int scc)
1850 return node->scc >= scc;
1853 static int edge_scc_exactly(struct isl_sched_edge *edge, int scc)
1855 return edge->src->scc == scc && edge->dst->scc == scc;
1858 static int edge_dst_scc_at_most(struct isl_sched_edge *edge, int scc)
1860 return edge->dst->scc <= scc;
1863 static int edge_src_scc_at_least(struct isl_sched_edge *edge, int scc)
1865 return edge->src->scc >= scc;
1868 /* Pad the schedules of all nodes with zero rows such that in the end
1869 * they all have graph->n_total_row rows.
1870 * The extra rows don't belong to any band, so they get assigned band number -1.
1872 static int pad_schedule(struct isl_sched_graph *graph)
1874 int i, j;
1876 for (i = 0; i < graph->n; ++i) {
1877 struct isl_sched_node *node = &graph->node[i];
1878 int row = isl_mat_rows(node->sched);
1879 if (graph->n_total_row > row) {
1880 isl_map_free(node->sched_map);
1881 node->sched_map = NULL;
1883 node->sched = isl_mat_add_zero_rows(node->sched,
1884 graph->n_total_row - row);
1885 if (!node->sched)
1886 return -1;
1887 for (j = row; j < graph->n_total_row; ++j)
1888 node->band[j] = -1;
1891 return 0;
1894 /* Split the current graph into two parts and compute a schedule for each
1895 * part individually. In particular, one part consists of all SCCs up
1896 * to and including graph->src_scc, while the other part contains the other
1897 * SCCS.
1899 * The split is enforced in the schedule by constant rows with two different
1900 * values (0 and 1). These constant rows replace the previously computed rows
1901 * in the current band.
1902 * It would be possible to reuse them as the first rows in the next
1903 * band, but recomputing them may result in better rows as we are looking
1904 * at a smaller part of the dependence graph.
1905 * compute_split_schedule is only called when no zero-distance schedule row
1906 * could be found on the entire graph, so we wark the splitting row as
1907 * non zero-distance.
1909 * The band_id of the second group is set to n, where n is the number
1910 * of nodes in the first group. This ensures that the band_ids over
1911 * the two groups remain disjoint, even if either or both of the two
1912 * groups contain independent components.
1914 static int compute_split_schedule(isl_ctx *ctx, struct isl_sched_graph *graph)
1916 int i, j, n, e1, e2;
1917 int n_total_row, orig_total_row;
1918 int n_band, orig_band;
1919 int drop;
1921 drop = graph->n_total_row - graph->band_start;
1922 graph->n_total_row -= drop;
1923 graph->n_row -= drop;
1925 n = 0;
1926 for (i = 0; i < graph->n; ++i) {
1927 struct isl_sched_node *node = &graph->node[i];
1928 int row = isl_mat_rows(node->sched) - drop;
1929 int cols = isl_mat_cols(node->sched);
1930 int before = node->scc <= graph->src_scc;
1932 if (before)
1933 n++;
1935 isl_map_free(node->sched_map);
1936 node->sched_map = NULL;
1937 node->sched = isl_mat_drop_rows(node->sched,
1938 graph->band_start, drop);
1939 node->sched = isl_mat_add_rows(node->sched, 1);
1940 if (!node->sched)
1941 return -1;
1942 node->sched = isl_mat_set_element_si(node->sched, row, 0,
1943 !before);
1944 for (j = 1; j < cols; ++j)
1945 node->sched = isl_mat_set_element_si(node->sched,
1946 row, j, 0);
1947 node->band[graph->n_total_row] = graph->n_band;
1948 node->zero[graph->n_total_row] = 0;
1951 e1 = e2 = 0;
1952 for (i = 0; i < graph->n_edge; ++i) {
1953 if (graph->edge[i].dst->scc <= graph->src_scc)
1954 e1++;
1955 if (graph->edge[i].src->scc > graph->src_scc)
1956 e2++;
1959 graph->n_total_row++;
1960 next_band(graph);
1962 for (i = 0; i < graph->n; ++i) {
1963 struct isl_sched_node *node = &graph->node[i];
1964 if (node->scc > graph->src_scc)
1965 node->band_id[graph->n_band] = n;
1968 orig_total_row = graph->n_total_row;
1969 orig_band = graph->n_band;
1970 if (compute_sub_schedule(ctx, graph, n, e1,
1971 &node_scc_at_most, &edge_dst_scc_at_most,
1972 graph->src_scc, 0) < 0)
1973 return -1;
1974 n_total_row = graph->n_total_row;
1975 graph->n_total_row = orig_total_row;
1976 n_band = graph->n_band;
1977 graph->n_band = orig_band;
1978 if (compute_sub_schedule(ctx, graph, graph->n - n, e2,
1979 &node_scc_at_least, &edge_src_scc_at_least,
1980 graph->src_scc + 1, 0) < 0)
1981 return -1;
1982 if (n_total_row > graph->n_total_row)
1983 graph->n_total_row = n_total_row;
1984 if (n_band > graph->n_band)
1985 graph->n_band = n_band;
1987 return pad_schedule(graph);
1990 /* Compute the next band of the schedule after updating the dependence
1991 * relations based on the the current schedule.
1993 static int compute_next_band(isl_ctx *ctx, struct isl_sched_graph *graph)
1995 if (update_edges(ctx, graph) < 0)
1996 return -1;
1997 next_band(graph);
1999 return compute_schedule(ctx, graph);
2002 /* Add constraints to graph->lp that force the dependence "map" (which
2003 * is part of the dependence relation of "edge")
2004 * to be respected and attempt to carry it, where the edge is one from
2005 * a node j to itself. "pos" is the sequence number of the given map.
2006 * That is, add constraints that enforce
2008 * (c_j_0 + c_j_n n + c_j_x y) - (c_j_0 + c_j_n n + c_j_x x)
2009 * = c_j_x (y - x) >= e_i
2011 * for each (x,y) in R.
2012 * We obtain general constraints on coefficients (c_0, c_n, c_x)
2013 * of valid constraints for (y - x) and then plug in (-e_i, 0, c_j_x),
2014 * with each coefficient in c_j_x represented as a pair of non-negative
2015 * coefficients.
2017 static int add_intra_constraints(struct isl_sched_graph *graph,
2018 struct isl_sched_edge *edge, __isl_take isl_map *map, int pos)
2020 unsigned total;
2021 isl_ctx *ctx = isl_map_get_ctx(map);
2022 isl_space *dim;
2023 isl_dim_map *dim_map;
2024 isl_basic_set *coef;
2025 struct isl_sched_node *node = edge->src;
2027 coef = intra_coefficients(graph, map);
2029 dim = isl_space_domain(isl_space_unwrap(isl_basic_set_get_space(coef)));
2031 total = isl_basic_set_total_dim(graph->lp);
2032 dim_map = isl_dim_map_alloc(ctx, total);
2033 isl_dim_map_range(dim_map, 3 + pos, 0, 0, 0, 1, -1);
2034 isl_dim_map_range(dim_map, node->start + 2 * node->nparam + 1, 2,
2035 isl_space_dim(dim, isl_dim_set), 1,
2036 node->nvar, -1);
2037 isl_dim_map_range(dim_map, node->start + 2 * node->nparam + 2, 2,
2038 isl_space_dim(dim, isl_dim_set), 1,
2039 node->nvar, 1);
2040 graph->lp = isl_basic_set_extend_constraints(graph->lp,
2041 coef->n_eq, coef->n_ineq);
2042 graph->lp = isl_basic_set_add_constraints_dim_map(graph->lp,
2043 coef, dim_map);
2044 isl_space_free(dim);
2046 return 0;
2049 /* Add constraints to graph->lp that force the dependence "map" (which
2050 * is part of the dependence relation of "edge")
2051 * to be respected and attempt to carry it, where the edge is one from
2052 * node j to node k. "pos" is the sequence number of the given map.
2053 * That is, add constraints that enforce
2055 * (c_k_0 + c_k_n n + c_k_x y) - (c_j_0 + c_j_n n + c_j_x x) >= e_i
2057 * for each (x,y) in R.
2058 * We obtain general constraints on coefficients (c_0, c_n, c_x)
2059 * of valid constraints for R and then plug in
2060 * (-e_i + c_k_0 - c_j_0, c_k_n - c_j_n, c_k_x - c_j_x)
2061 * with each coefficient (except e_i, c_k_0 and c_j_0)
2062 * represented as a pair of non-negative coefficients.
2064 static int add_inter_constraints(struct isl_sched_graph *graph,
2065 struct isl_sched_edge *edge, __isl_take isl_map *map, int pos)
2067 unsigned total;
2068 isl_ctx *ctx = isl_map_get_ctx(map);
2069 isl_space *dim;
2070 isl_dim_map *dim_map;
2071 isl_basic_set *coef;
2072 struct isl_sched_node *src = edge->src;
2073 struct isl_sched_node *dst = edge->dst;
2075 coef = inter_coefficients(graph, map);
2077 dim = isl_space_domain(isl_space_unwrap(isl_basic_set_get_space(coef)));
2079 total = isl_basic_set_total_dim(graph->lp);
2080 dim_map = isl_dim_map_alloc(ctx, total);
2082 isl_dim_map_range(dim_map, 3 + pos, 0, 0, 0, 1, -1);
2084 isl_dim_map_range(dim_map, dst->start, 0, 0, 0, 1, 1);
2085 isl_dim_map_range(dim_map, dst->start + 1, 2, 1, 1, dst->nparam, -1);
2086 isl_dim_map_range(dim_map, dst->start + 2, 2, 1, 1, dst->nparam, 1);
2087 isl_dim_map_range(dim_map, dst->start + 2 * dst->nparam + 1, 2,
2088 isl_space_dim(dim, isl_dim_set) + src->nvar, 1,
2089 dst->nvar, -1);
2090 isl_dim_map_range(dim_map, dst->start + 2 * dst->nparam + 2, 2,
2091 isl_space_dim(dim, isl_dim_set) + src->nvar, 1,
2092 dst->nvar, 1);
2094 isl_dim_map_range(dim_map, src->start, 0, 0, 0, 1, -1);
2095 isl_dim_map_range(dim_map, src->start + 1, 2, 1, 1, src->nparam, 1);
2096 isl_dim_map_range(dim_map, src->start + 2, 2, 1, 1, src->nparam, -1);
2097 isl_dim_map_range(dim_map, src->start + 2 * src->nparam + 1, 2,
2098 isl_space_dim(dim, isl_dim_set), 1,
2099 src->nvar, 1);
2100 isl_dim_map_range(dim_map, src->start + 2 * src->nparam + 2, 2,
2101 isl_space_dim(dim, isl_dim_set), 1,
2102 src->nvar, -1);
2104 graph->lp = isl_basic_set_extend_constraints(graph->lp,
2105 coef->n_eq, coef->n_ineq);
2106 graph->lp = isl_basic_set_add_constraints_dim_map(graph->lp,
2107 coef, dim_map);
2108 isl_space_free(dim);
2110 return 0;
2113 /* Add constraints to graph->lp that force all dependence
2114 * to be respected and attempt to carry it.
2116 static int add_all_constraints(struct isl_sched_graph *graph)
2118 int i, j;
2119 int pos;
2121 pos = 0;
2122 for (i = 0; i < graph->n_edge; ++i) {
2123 struct isl_sched_edge *edge= &graph->edge[i];
2124 for (j = 0; j < edge->map->n; ++j) {
2125 isl_basic_map *bmap;
2126 isl_map *map;
2128 bmap = isl_basic_map_copy(edge->map->p[j]);
2129 map = isl_map_from_basic_map(bmap);
2131 if (edge->src == edge->dst &&
2132 add_intra_constraints(graph, edge, map, pos) < 0)
2133 return -1;
2134 if (edge->src != edge->dst &&
2135 add_inter_constraints(graph, edge, map, pos) < 0)
2136 return -1;
2137 ++pos;
2141 return 0;
2144 /* Count the number of equality and inequality constraints
2145 * that will be added to the carry_lp problem.
2146 * If once is set, then we count
2147 * each edge exactly once. Otherwise, we count as follows
2148 * validity -> 1 (>= 0)
2149 * validity+proximity -> 2 (>= 0 and upper bound)
2150 * proximity -> 2 (lower and upper bound)
2152 static int count_all_constraints(struct isl_sched_graph *graph,
2153 int *n_eq, int *n_ineq, int once)
2155 int i, j;
2157 *n_eq = *n_ineq = 0;
2158 for (i = 0; i < graph->n_edge; ++i) {
2159 struct isl_sched_edge *edge= &graph->edge[i];
2160 for (j = 0; j < edge->map->n; ++j) {
2161 isl_basic_map *bmap;
2162 isl_map *map;
2164 bmap = isl_basic_map_copy(edge->map->p[j]);
2165 map = isl_map_from_basic_map(bmap);
2167 if (count_map_constraints(graph, edge, map,
2168 n_eq, n_ineq, once) < 0)
2169 return -1;
2173 return 0;
2176 /* Construct an LP problem for finding schedule coefficients
2177 * such that the schedule carries as many dependences as possible.
2178 * In particular, for each dependence i, we bound the dependence distance
2179 * from below by e_i, with 0 <= e_i <= 1 and then maximize the sum
2180 * of all e_i's. Dependence with e_i = 0 in the solution are simply
2181 * respected, while those with e_i > 0 (in practice e_i = 1) are carried.
2182 * Note that if the dependence relation is a union of basic maps,
2183 * then we have to consider each basic map individually as it may only
2184 * be possible to carry the dependences expressed by some of those
2185 * basic maps and not all off them.
2186 * Below, we consider each of those basic maps as a separate "edge".
2188 * All variables of the LP are non-negative. The actual coefficients
2189 * may be negative, so each coefficient is represented as the difference
2190 * of two non-negative variables. The negative part always appears
2191 * immediately before the positive part.
2192 * Other than that, the variables have the following order
2194 * - sum of (1 - e_i) over all edges
2195 * - sum of positive and negative parts of all c_n coefficients
2196 * (unconstrained when computing non-parametric schedules)
2197 * - sum of positive and negative parts of all c_x coefficients
2198 * - for each edge
2199 * - e_i
2200 * - for each node
2201 * - c_i_0
2202 * - positive and negative parts of c_i_n (if parametric)
2203 * - positive and negative parts of c_i_x
2205 * The constraints are those from the edges plus three equalities
2206 * to express the sums and n_edge inequalities to express e_i <= 1.
2208 static int setup_carry_lp(isl_ctx *ctx, struct isl_sched_graph *graph)
2210 int i, j;
2211 int k;
2212 isl_space *dim;
2213 unsigned total;
2214 int n_eq, n_ineq;
2215 int n_edge;
2217 n_edge = 0;
2218 for (i = 0; i < graph->n_edge; ++i)
2219 n_edge += graph->edge[i].map->n;
2221 total = 3 + n_edge;
2222 for (i = 0; i < graph->n; ++i) {
2223 struct isl_sched_node *node = &graph->node[graph->sorted[i]];
2224 node->start = total;
2225 total += 1 + 2 * (node->nparam + node->nvar);
2228 if (count_all_constraints(graph, &n_eq, &n_ineq, 1) < 0)
2229 return -1;
2231 dim = isl_space_set_alloc(ctx, 0, total);
2232 isl_basic_set_free(graph->lp);
2233 n_eq += 3;
2234 n_ineq += n_edge;
2235 graph->lp = isl_basic_set_alloc_space(dim, 0, n_eq, n_ineq);
2236 graph->lp = isl_basic_set_set_rational(graph->lp);
2238 k = isl_basic_set_alloc_equality(graph->lp);
2239 if (k < 0)
2240 return -1;
2241 isl_seq_clr(graph->lp->eq[k], 1 + total);
2242 isl_int_set_si(graph->lp->eq[k][0], -n_edge);
2243 isl_int_set_si(graph->lp->eq[k][1], 1);
2244 for (i = 0; i < n_edge; ++i)
2245 isl_int_set_si(graph->lp->eq[k][4 + i], 1);
2247 k = isl_basic_set_alloc_equality(graph->lp);
2248 if (k < 0)
2249 return -1;
2250 isl_seq_clr(graph->lp->eq[k], 1 + total);
2251 isl_int_set_si(graph->lp->eq[k][2], -1);
2252 for (i = 0; i < graph->n; ++i) {
2253 int pos = 1 + graph->node[i].start + 1;
2255 for (j = 0; j < 2 * graph->node[i].nparam; ++j)
2256 isl_int_set_si(graph->lp->eq[k][pos + j], 1);
2259 k = isl_basic_set_alloc_equality(graph->lp);
2260 if (k < 0)
2261 return -1;
2262 isl_seq_clr(graph->lp->eq[k], 1 + total);
2263 isl_int_set_si(graph->lp->eq[k][3], -1);
2264 for (i = 0; i < graph->n; ++i) {
2265 struct isl_sched_node *node = &graph->node[i];
2266 int pos = 1 + node->start + 1 + 2 * node->nparam;
2268 for (j = 0; j < 2 * node->nvar; ++j)
2269 isl_int_set_si(graph->lp->eq[k][pos + j], 1);
2272 for (i = 0; i < n_edge; ++i) {
2273 k = isl_basic_set_alloc_inequality(graph->lp);
2274 if (k < 0)
2275 return -1;
2276 isl_seq_clr(graph->lp->ineq[k], 1 + total);
2277 isl_int_set_si(graph->lp->ineq[k][4 + i], -1);
2278 isl_int_set_si(graph->lp->ineq[k][0], 1);
2281 if (add_all_constraints(graph) < 0)
2282 return -1;
2284 return 0;
2287 /* If the schedule_split_scaled option is set and if the linear
2288 * parts of the scheduling rows for all nodes in the graphs have
2289 * non-trivial common divisor, then split off the constant term
2290 * from the linear part.
2291 * The constant term is then placed in a separate band and
2292 * the linear part is reduced.
2294 static int split_scaled(isl_ctx *ctx, struct isl_sched_graph *graph)
2296 int i;
2297 int row;
2298 isl_int gcd, gcd_i;
2300 if (!ctx->opt->schedule_split_scaled)
2301 return 0;
2302 if (graph->n <= 1)
2303 return 0;
2305 isl_int_init(gcd);
2306 isl_int_init(gcd_i);
2308 isl_int_set_si(gcd, 0);
2310 row = isl_mat_rows(graph->node[0].sched) - 1;
2312 for (i = 0; i < graph->n; ++i) {
2313 struct isl_sched_node *node = &graph->node[i];
2314 int cols = isl_mat_cols(node->sched);
2316 isl_seq_gcd(node->sched->row[row] + 1, cols - 1, &gcd_i);
2317 isl_int_gcd(gcd, gcd, gcd_i);
2320 isl_int_clear(gcd_i);
2322 if (isl_int_cmp_si(gcd, 1) <= 0) {
2323 isl_int_clear(gcd);
2324 return 0;
2327 next_band(graph);
2329 for (i = 0; i < graph->n; ++i) {
2330 struct isl_sched_node *node = &graph->node[i];
2332 isl_map_free(node->sched_map);
2333 node->sched_map = NULL;
2334 node->sched = isl_mat_add_zero_rows(node->sched, 1);
2335 if (!node->sched)
2336 goto error;
2337 isl_int_fdiv_r(node->sched->row[row + 1][0],
2338 node->sched->row[row][0], gcd);
2339 isl_int_fdiv_q(node->sched->row[row][0],
2340 node->sched->row[row][0], gcd);
2341 isl_int_mul(node->sched->row[row][0],
2342 node->sched->row[row][0], gcd);
2343 node->sched = isl_mat_scale_down_row(node->sched, row, gcd);
2344 if (!node->sched)
2345 goto error;
2346 node->band[graph->n_total_row] = graph->n_band;
2349 graph->n_total_row++;
2351 isl_int_clear(gcd);
2352 return 0;
2353 error:
2354 isl_int_clear(gcd);
2355 return -1;
2358 /* Construct a schedule row for each node such that as many dependences
2359 * as possible are carried and then continue with the next band.
2361 static int carry_dependences(isl_ctx *ctx, struct isl_sched_graph *graph)
2363 int i;
2364 int n_edge;
2365 isl_vec *sol;
2366 isl_basic_set *lp;
2368 n_edge = 0;
2369 for (i = 0; i < graph->n_edge; ++i)
2370 n_edge += graph->edge[i].map->n;
2372 if (setup_carry_lp(ctx, graph) < 0)
2373 return -1;
2375 lp = isl_basic_set_copy(graph->lp);
2376 sol = isl_tab_basic_set_non_neg_lexmin(lp);
2377 if (!sol)
2378 return -1;
2380 if (sol->size == 0) {
2381 isl_vec_free(sol);
2382 isl_die(ctx, isl_error_internal,
2383 "error in schedule construction", return -1);
2386 if (isl_int_cmp_si(sol->el[1], n_edge) >= 0) {
2387 isl_vec_free(sol);
2388 isl_die(ctx, isl_error_unknown,
2389 "unable to carry dependences", return -1);
2392 if (update_schedule(graph, sol, 0, 0) < 0)
2393 return -1;
2395 if (split_scaled(ctx, graph) < 0)
2396 return -1;
2398 return compute_next_band(ctx, graph);
2401 /* Are there any validity edges in the graph?
2403 static int has_validity_edges(struct isl_sched_graph *graph)
2405 int i;
2407 for (i = 0; i < graph->n_edge; ++i)
2408 if (graph->edge[i].validity)
2409 return 1;
2411 return 0;
2414 /* Should we apply a Feautrier step?
2415 * That is, did the user request the Feautrier algorithm and are
2416 * there any validity dependences (left)?
2418 static int need_feautrier_step(isl_ctx *ctx, struct isl_sched_graph *graph)
2420 if (ctx->opt->schedule_algorithm != ISL_SCHEDULE_ALGORITHM_FEAUTRIER)
2421 return 0;
2423 return has_validity_edges(graph);
2426 /* Compute a schedule for a connected dependence graph using Feautrier's
2427 * multi-dimensional scheduling algorithm.
2428 * The original algorithm is described in [1].
2429 * The main idea is to minimize the number of scheduling dimensions, by
2430 * trying to satisfy as many dependences as possible per scheduling dimension.
2432 * [1] P. Feautrier, Some Efficient Solutions to the Affine Scheduling
2433 * Problem, Part II: Multi-Dimensional Time.
2434 * In Intl. Journal of Parallel Programming, 1992.
2436 static int compute_schedule_wcc_feautrier(isl_ctx *ctx,
2437 struct isl_sched_graph *graph)
2439 return carry_dependences(ctx, graph);
2442 /* Compute a schedule for a connected dependence graph.
2443 * We try to find a sequence of as many schedule rows as possible that result
2444 * in non-negative dependence distances (independent of the previous rows
2445 * in the sequence, i.e., such that the sequence is tilable).
2446 * If we can't find any more rows we either
2447 * - split between SCCs and start over (assuming we found an interesting
2448 * pair of SCCs between which to split)
2449 * - continue with the next band (assuming the current band has at least
2450 * one row)
2451 * - try to carry as many dependences as possible and continue with the next
2452 * band
2454 * If Feautrier's algorithm is selected, we first recursively try to satisfy
2455 * as many validity dependences as possible. When all validity dependences
2456 * are satisfied we extend the schedule to a full-dimensional schedule.
2458 * If we manage to complete the schedule, we finish off by topologically
2459 * sorting the statements based on the remaining dependences.
2461 * If ctx->opt->schedule_outer_zero_distance is set, then we force the
2462 * outermost dimension in the current band to be zero distance. If this
2463 * turns out to be impossible, we fall back on the general scheme above
2464 * and try to carry as many dependences as possible.
2466 static int compute_schedule_wcc(isl_ctx *ctx, struct isl_sched_graph *graph)
2468 int force_zero = 0;
2470 if (detect_sccs(graph) < 0)
2471 return -1;
2472 sort_sccs(graph);
2474 if (compute_maxvar(graph) < 0)
2475 return -1;
2477 if (need_feautrier_step(ctx, graph))
2478 return compute_schedule_wcc_feautrier(ctx, graph);
2480 if (ctx->opt->schedule_outer_zero_distance)
2481 force_zero = 1;
2483 while (graph->n_row < graph->maxvar) {
2484 isl_vec *sol;
2486 graph->src_scc = -1;
2487 graph->dst_scc = -1;
2489 if (setup_lp(ctx, graph, force_zero) < 0)
2490 return -1;
2491 sol = solve_lp(graph);
2492 if (!sol)
2493 return -1;
2494 if (sol->size == 0) {
2495 isl_vec_free(sol);
2496 if (!ctx->opt->schedule_maximize_band_depth &&
2497 graph->n_total_row > graph->band_start)
2498 return compute_next_band(ctx, graph);
2499 if (graph->src_scc >= 0)
2500 return compute_split_schedule(ctx, graph);
2501 if (graph->n_total_row > graph->band_start)
2502 return compute_next_band(ctx, graph);
2503 return carry_dependences(ctx, graph);
2505 if (update_schedule(graph, sol, 1, 1) < 0)
2506 return -1;
2507 force_zero = 0;
2510 if (graph->n_total_row > graph->band_start)
2511 next_band(graph);
2512 return sort_statements(ctx, graph);
2515 /* Add a row to the schedules that separates the SCCs and move
2516 * to the next band.
2518 static int split_on_scc(struct isl_sched_graph *graph)
2520 int i;
2522 for (i = 0; i < graph->n; ++i) {
2523 struct isl_sched_node *node = &graph->node[i];
2524 int row = isl_mat_rows(node->sched);
2526 isl_map_free(node->sched_map);
2527 node->sched_map = NULL;
2528 node->sched = isl_mat_add_zero_rows(node->sched, 1);
2529 node->sched = isl_mat_set_element_si(node->sched, row, 0,
2530 node->scc);
2531 if (!node->sched)
2532 return -1;
2533 node->band[graph->n_total_row] = graph->n_band;
2536 graph->n_total_row++;
2537 next_band(graph);
2539 return 0;
2542 /* Compute a schedule for each component (identified by node->scc)
2543 * of the dependence graph separately and then combine the results.
2544 * Depending on the setting of schedule_fuse, a component may be
2545 * either weakly or strongly connected.
2547 * The band_id is adjusted such that each component has a separate id.
2548 * Note that the band_id may have already been set to a value different
2549 * from zero by compute_split_schedule.
2551 static int compute_component_schedule(isl_ctx *ctx,
2552 struct isl_sched_graph *graph)
2554 int wcc, i;
2555 int n, n_edge;
2556 int n_total_row, orig_total_row;
2557 int n_band, orig_band;
2559 if (ctx->opt->schedule_fuse == ISL_SCHEDULE_FUSE_MIN)
2560 split_on_scc(graph);
2562 n_total_row = 0;
2563 orig_total_row = graph->n_total_row;
2564 n_band = 0;
2565 orig_band = graph->n_band;
2566 for (i = 0; i < graph->n; ++i)
2567 graph->node[i].band_id[graph->n_band] += graph->node[i].scc;
2568 for (wcc = 0; wcc < graph->scc; ++wcc) {
2569 n = 0;
2570 for (i = 0; i < graph->n; ++i)
2571 if (graph->node[i].scc == wcc)
2572 n++;
2573 n_edge = 0;
2574 for (i = 0; i < graph->n_edge; ++i)
2575 if (graph->edge[i].src->scc == wcc &&
2576 graph->edge[i].dst->scc == wcc)
2577 n_edge++;
2579 if (compute_sub_schedule(ctx, graph, n, n_edge,
2580 &node_scc_exactly,
2581 &edge_scc_exactly, wcc, 1) < 0)
2582 return -1;
2583 if (graph->n_total_row > n_total_row)
2584 n_total_row = graph->n_total_row;
2585 graph->n_total_row = orig_total_row;
2586 if (graph->n_band > n_band)
2587 n_band = graph->n_band;
2588 graph->n_band = orig_band;
2591 graph->n_total_row = n_total_row;
2592 graph->n_band = n_band;
2594 return pad_schedule(graph);
2597 /* Compute a schedule for the given dependence graph.
2598 * We first check if the graph is connected (through validity dependences)
2599 * and, if not, compute a schedule for each component separately.
2600 * If schedule_fuse is set to minimal fusion, then we check for strongly
2601 * connected components instead and compute a separate schedule for
2602 * each such strongly connected component.
2604 static int compute_schedule(isl_ctx *ctx, struct isl_sched_graph *graph)
2606 if (ctx->opt->schedule_fuse == ISL_SCHEDULE_FUSE_MIN) {
2607 if (detect_sccs(graph) < 0)
2608 return -1;
2609 } else {
2610 if (detect_wccs(graph) < 0)
2611 return -1;
2614 if (graph->scc > 1)
2615 return compute_component_schedule(ctx, graph);
2617 return compute_schedule_wcc(ctx, graph);
2620 /* Compute a schedule for the given union of domains that respects
2621 * all the validity dependences.
2622 * If the default isl scheduling algorithm is used, it tries to minimize
2623 * the dependence distances over the proximity dependences.
2624 * If Feautrier's scheduling algorithm is used, the proximity dependence
2625 * distances are only minimized during the extension to a full-dimensional
2626 * schedule.
2628 __isl_give isl_schedule *isl_union_set_compute_schedule(
2629 __isl_take isl_union_set *domain,
2630 __isl_take isl_union_map *validity,
2631 __isl_take isl_union_map *proximity)
2633 isl_ctx *ctx = isl_union_set_get_ctx(domain);
2634 isl_space *dim;
2635 struct isl_sched_graph graph = { 0 };
2636 isl_schedule *sched;
2638 domain = isl_union_set_align_params(domain,
2639 isl_union_map_get_space(validity));
2640 domain = isl_union_set_align_params(domain,
2641 isl_union_map_get_space(proximity));
2642 dim = isl_union_set_get_space(domain);
2643 validity = isl_union_map_align_params(validity, isl_space_copy(dim));
2644 proximity = isl_union_map_align_params(proximity, dim);
2646 if (!domain)
2647 goto error;
2649 graph.n = isl_union_set_n_set(domain);
2650 if (graph.n == 0)
2651 goto empty;
2652 if (graph_alloc(ctx, &graph, graph.n,
2653 isl_union_map_n_map(validity) + isl_union_map_n_map(proximity)) < 0)
2654 goto error;
2655 graph.root = 1;
2656 graph.n = 0;
2657 if (isl_union_set_foreach_set(domain, &extract_node, &graph) < 0)
2658 goto error;
2659 if (graph_init_table(ctx, &graph) < 0)
2660 goto error;
2661 graph.n_edge = 0;
2662 if (isl_union_map_foreach_map(validity, &extract_edge, &graph) < 0)
2663 goto error;
2664 if (graph_init_edge_table(ctx, &graph) < 0)
2665 goto error;
2666 if (isl_union_map_foreach_map(proximity, &extract_edge, &graph) < 0)
2667 goto error;
2669 if (compute_schedule(ctx, &graph) < 0)
2670 goto error;
2672 empty:
2673 sched = extract_schedule(&graph, isl_union_set_get_space(domain));
2675 graph_free(ctx, &graph);
2676 isl_union_set_free(domain);
2677 isl_union_map_free(validity);
2678 isl_union_map_free(proximity);
2680 return sched;
2681 error:
2682 graph_free(ctx, &graph);
2683 isl_union_set_free(domain);
2684 isl_union_map_free(validity);
2685 isl_union_map_free(proximity);
2686 return NULL;
2689 void *isl_schedule_free(__isl_take isl_schedule *sched)
2691 int i;
2692 if (!sched)
2693 return NULL;
2695 if (--sched->ref > 0)
2696 return NULL;
2698 for (i = 0; i < sched->n; ++i) {
2699 isl_map_free(sched->node[i].sched);
2700 free(sched->node[i].band_end);
2701 free(sched->node[i].band_id);
2702 free(sched->node[i].zero);
2704 isl_space_free(sched->dim);
2705 isl_band_list_free(sched->band_forest);
2706 free(sched);
2707 return NULL;
2710 isl_ctx *isl_schedule_get_ctx(__isl_keep isl_schedule *schedule)
2712 return schedule ? isl_space_get_ctx(schedule->dim) : NULL;
2715 __isl_give isl_union_map *isl_schedule_get_map(__isl_keep isl_schedule *sched)
2717 int i;
2718 isl_union_map *umap;
2720 if (!sched)
2721 return NULL;
2723 umap = isl_union_map_empty(isl_space_copy(sched->dim));
2724 for (i = 0; i < sched->n; ++i)
2725 umap = isl_union_map_add_map(umap,
2726 isl_map_copy(sched->node[i].sched));
2728 return umap;
2731 static __isl_give isl_band_list *construct_band_list(
2732 __isl_keep isl_schedule *schedule, __isl_keep isl_band *parent,
2733 int band_nr, int *parent_active, int n_active);
2735 /* Construct an isl_band structure for the band in the given schedule
2736 * with sequence number band_nr for the n_active nodes marked by active.
2737 * If the nodes don't have a band with the given sequence number,
2738 * then a band without members is created.
2740 * Because of the way the schedule is constructed, we know that
2741 * the position of the band inside the schedule of a node is the same
2742 * for all active nodes.
2744 static __isl_give isl_band *construct_band(__isl_keep isl_schedule *schedule,
2745 __isl_keep isl_band *parent,
2746 int band_nr, int *active, int n_active)
2748 int i, j;
2749 isl_ctx *ctx = isl_schedule_get_ctx(schedule);
2750 isl_band *band;
2751 unsigned start, end;
2753 band = isl_calloc_type(ctx, isl_band);
2754 if (!band)
2755 return NULL;
2757 band->ref = 1;
2758 band->schedule = schedule;
2759 band->parent = parent;
2761 for (i = 0; i < schedule->n; ++i)
2762 if (active[i] && schedule->node[i].n_band > band_nr + 1)
2763 break;
2765 if (i < schedule->n) {
2766 band->children = construct_band_list(schedule, band,
2767 band_nr + 1, active, n_active);
2768 if (!band->children)
2769 goto error;
2772 for (i = 0; i < schedule->n; ++i)
2773 if (active[i])
2774 break;
2776 if (i >= schedule->n)
2777 isl_die(ctx, isl_error_internal,
2778 "band without active statements", goto error);
2780 start = band_nr ? schedule->node[i].band_end[band_nr - 1] : 0;
2781 end = band_nr < schedule->node[i].n_band ?
2782 schedule->node[i].band_end[band_nr] : start;
2783 band->n = end - start;
2785 band->zero = isl_alloc_array(ctx, int, band->n);
2786 if (!band->zero)
2787 goto error;
2789 for (j = 0; j < band->n; ++j)
2790 band->zero[j] = schedule->node[i].zero[start + j];
2792 band->map = isl_union_map_empty(isl_space_copy(schedule->dim));
2793 for (i = 0; i < schedule->n; ++i) {
2794 isl_map *map;
2795 unsigned n_out;
2797 if (!active[i])
2798 continue;
2800 map = isl_map_copy(schedule->node[i].sched);
2801 n_out = isl_map_dim(map, isl_dim_out);
2802 map = isl_map_project_out(map, isl_dim_out, end, n_out - end);
2803 map = isl_map_project_out(map, isl_dim_out, 0, start);
2804 band->map = isl_union_map_union(band->map,
2805 isl_union_map_from_map(map));
2807 if (!band->map)
2808 goto error;
2810 return band;
2811 error:
2812 isl_band_free(band);
2813 return NULL;
2816 /* Construct a list of bands that start at the same position (with
2817 * sequence number band_nr) in the schedules of the nodes that
2818 * were active in the parent band.
2820 * A separate isl_band structure is created for each band_id
2821 * and for each node that does not have a band with sequence
2822 * number band_nr. In the latter case, a band without members
2823 * is created.
2824 * This ensures that if a band has any children, then each node
2825 * that was active in the band is active in exactly one of the children.
2827 static __isl_give isl_band_list *construct_band_list(
2828 __isl_keep isl_schedule *schedule, __isl_keep isl_band *parent,
2829 int band_nr, int *parent_active, int n_active)
2831 int i, j;
2832 isl_ctx *ctx = isl_schedule_get_ctx(schedule);
2833 int *active;
2834 int n_band;
2835 isl_band_list *list;
2837 n_band = 0;
2838 for (i = 0; i < n_active; ++i) {
2839 for (j = 0; j < schedule->n; ++j) {
2840 if (!parent_active[j])
2841 continue;
2842 if (schedule->node[j].n_band <= band_nr)
2843 continue;
2844 if (schedule->node[j].band_id[band_nr] == i) {
2845 n_band++;
2846 break;
2850 for (j = 0; j < schedule->n; ++j)
2851 if (schedule->node[j].n_band <= band_nr)
2852 n_band++;
2854 if (n_band == 1) {
2855 isl_band *band;
2856 list = isl_band_list_alloc(ctx, n_band);
2857 band = construct_band(schedule, parent, band_nr,
2858 parent_active, n_active);
2859 return isl_band_list_add(list, band);
2862 active = isl_alloc_array(ctx, int, schedule->n);
2863 if (!active)
2864 return NULL;
2866 list = isl_band_list_alloc(ctx, n_band);
2868 for (i = 0; i < n_active; ++i) {
2869 int n = 0;
2870 isl_band *band;
2872 for (j = 0; j < schedule->n; ++j) {
2873 active[j] = parent_active[j] &&
2874 schedule->node[j].n_band > band_nr &&
2875 schedule->node[j].band_id[band_nr] == i;
2876 if (active[j])
2877 n++;
2879 if (n == 0)
2880 continue;
2882 band = construct_band(schedule, parent, band_nr, active, n);
2884 list = isl_band_list_add(list, band);
2886 for (i = 0; i < schedule->n; ++i) {
2887 isl_band *band;
2888 if (!parent_active[i])
2889 continue;
2890 if (schedule->node[i].n_band > band_nr)
2891 continue;
2892 for (j = 0; j < schedule->n; ++j)
2893 active[j] = j == i;
2894 band = construct_band(schedule, parent, band_nr, active, 1);
2895 list = isl_band_list_add(list, band);
2898 free(active);
2900 return list;
2903 /* Construct a band forest representation of the schedule and
2904 * return the list of roots.
2906 static __isl_give isl_band_list *construct_forest(
2907 __isl_keep isl_schedule *schedule)
2909 int i;
2910 isl_ctx *ctx = isl_schedule_get_ctx(schedule);
2911 isl_band_list *forest;
2912 int *active;
2914 active = isl_alloc_array(ctx, int, schedule->n);
2915 if (!active)
2916 return NULL;
2918 for (i = 0; i < schedule->n; ++i)
2919 active[i] = 1;
2921 forest = construct_band_list(schedule, NULL, 0, active, schedule->n);
2923 free(active);
2925 return forest;
2928 /* Return the roots of a band forest representation of the schedule.
2930 __isl_give isl_band_list *isl_schedule_get_band_forest(
2931 __isl_keep isl_schedule *schedule)
2933 if (!schedule)
2934 return NULL;
2935 if (!schedule->band_forest)
2936 schedule->band_forest = construct_forest(schedule);
2937 return isl_band_list_dup(schedule->band_forest);
2940 static __isl_give isl_printer *print_band_list(__isl_take isl_printer *p,
2941 __isl_keep isl_band_list *list);
2943 static __isl_give isl_printer *print_band(__isl_take isl_printer *p,
2944 __isl_keep isl_band *band)
2946 isl_band_list *children;
2948 p = isl_printer_start_line(p);
2949 p = isl_printer_print_union_map(p, band->map);
2950 p = isl_printer_end_line(p);
2952 if (!isl_band_has_children(band))
2953 return p;
2955 children = isl_band_get_children(band);
2957 p = isl_printer_indent(p, 4);
2958 p = print_band_list(p, children);
2959 p = isl_printer_indent(p, -4);
2961 isl_band_list_free(children);
2963 return p;
2966 static __isl_give isl_printer *print_band_list(__isl_take isl_printer *p,
2967 __isl_keep isl_band_list *list)
2969 int i, n;
2971 n = isl_band_list_n_band(list);
2972 for (i = 0; i < n; ++i) {
2973 isl_band *band;
2974 band = isl_band_list_get_band(list, i);
2975 p = print_band(p, band);
2976 isl_band_free(band);
2979 return p;
2982 __isl_give isl_printer *isl_printer_print_schedule(__isl_take isl_printer *p,
2983 __isl_keep isl_schedule *schedule)
2985 isl_band_list *forest;
2987 forest = isl_schedule_get_band_forest(schedule);
2989 p = print_band_list(p, forest);
2991 isl_band_list_free(forest);
2993 return p;
2996 void isl_schedule_dump(__isl_keep isl_schedule *schedule)
2998 isl_printer *printer;
3000 if (!schedule)
3001 return;
3003 printer = isl_printer_to_file(isl_schedule_get_ctx(schedule), stderr);
3004 printer = isl_printer_print_schedule(printer, schedule);
3006 isl_printer_free(printer);