isl_schedule_constraints: add prefix schedule constraint
[isl.git] / isl_scheduler.c
blob94c61837eac93df36bde37f3223441311587cc3d
1 /*
2 * Copyright 2011 INRIA Saclay
3 * Copyright 2012-2014 Ecole Normale Superieure
4 * Copyright 2015-2016 Sven Verdoolaege
5 * Copyright 2016 INRIA Paris
6 * Copyright 2017 Sven Verdoolaege
8 * Use of this software is governed by the MIT license
10 * Written by Sven Verdoolaege, INRIA Saclay - Ile-de-France,
11 * Parc Club Orsay Universite, ZAC des vignes, 4 rue Jacques Monod,
12 * 91893 Orsay, France
13 * and Ecole Normale Superieure, 45 rue d'Ulm, 75230 Paris, France
14 * and Centre de Recherche Inria de Paris, 2 rue Simone Iff - Voie DQ12,
15 * CS 42112, 75589 Paris Cedex 12, France
18 #include <string.h>
20 #include <isl_ctx_private.h>
21 #include <isl_map_private.h>
22 #include <isl_space_private.h>
23 #include <isl_aff_private.h>
24 #include <isl/hash.h>
25 #include <isl/constraint.h>
26 #include <isl/schedule.h>
27 #include <isl_schedule_constraints.h>
28 #include <isl/schedule_node.h>
29 #include <isl_mat_private.h>
30 #include <isl_vec_private.h>
31 #include <isl/set.h>
32 #include <isl_union_set_private.h>
33 #include <isl_seq.h>
34 #include <isl_tab.h>
35 #include <isl_dim_map.h>
36 #include <isl/map_to_basic_set.h>
37 #include <isl_sort.h>
38 #include <isl_options_private.h>
39 #include <isl_tarjan.h>
40 #include <isl_morph.h>
41 #include <isl/ilp.h>
42 #include <isl_val_private.h>
45 * The scheduling algorithm implemented in this file was inspired by
46 * Bondhugula et al., "Automatic Transformations for Communication-Minimized
47 * Parallelization and Locality Optimization in the Polyhedral Model".
49 * For a detailed description of the variant implemented in isl,
50 * see Verdoolaege and Janssens, "Scheduling for PPCG" (2017).
54 /* Extract the linear part, i.e., the coefficients of the input variables
55 * and the local variables (if any), from the affine expression "ma".
57 static __isl_give isl_mat *extract_linear(__isl_take isl_multi_aff *ma)
59 int i, n, n_param, n_var, n_div;
60 isl_ctx *ctx;
61 isl_mat *rows;
63 ma = isl_multi_aff_align_divs(ma);
64 if (!ma)
65 return NULL;
67 ctx = isl_multi_aff_get_ctx(ma);
68 n = isl_multi_aff_dim(ma, isl_dim_out);
69 n_param = isl_multi_aff_dim(ma, isl_dim_param);
70 n_var = isl_multi_aff_dim(ma, isl_dim_in);
71 n_div = 0;
72 if (n > 0)
73 n_div = isl_aff_dim(ma->p[0], isl_dim_div);
74 rows = isl_mat_alloc(ctx, n, n_var + n_div);
75 if (!rows)
76 goto error;
77 for (i = 0; i < n; ++i)
78 isl_seq_cpy(rows->row[i],
79 ma->p[i]->v->el + 1 + 1 + n_param, n_var + n_div);
80 isl_multi_aff_free(ma);
81 return rows;
82 error:
83 isl_multi_aff_free(ma);
84 return NULL;
87 /* Enumeration for indicating the type of ILP constraints that are added
88 * for an intra-statement consecutivity constraint.
90 * outer: linear combination of outer rows
91 * inner: equal to some inner row
92 * free: unrestricted
94 enum isl_sched_intra_state {
95 isl_sched_intra_outer,
96 isl_sched_intra_inner,
97 isl_sched_intra_free,
100 /* A linked list of intra-statement consecutivity constraints
101 * for a particular statement.
103 * "id" is the tuple identifier of the isl_multi_aff from
104 * which the constraint is derived. It may be NULL if the isl_multi_aff
105 * did not have a tuple identifier.
106 * "outer": the rows that should be covered by the outer part of the schedule.
107 * "inner": the desired inner schedule rows.
108 * "n_inner": the number of rows in "inner".
109 * "outer" and "inter" are expressed in terms of the compressed domain space.
111 * "state": the type of ILP constraint that is added.
112 * "n_fixed": the number of rows of "inner" that have already been taken
113 * into account. A negative value means that this intra-statement
114 * consecutivity constraint can no longer be imposed.
115 * "band_n_fixed": the number of rows of "inner" that had already been taken
116 * into account at the start of the current band.
117 * If "n_fixed" is greater than 0, then "first_fixed" is
118 * the index of the schedule row that corresponds to
119 * the first row of "inner".
121 * "next": next constraint in the linked list.
123 struct isl_sched_intra {
124 isl_id *id;
125 isl_mat *outer;
126 isl_mat *inner;
127 int n_inner;
129 enum isl_sched_intra_state state;
130 int n_fixed;
131 int band_n_fixed;
132 int first_fixed;
134 struct isl_sched_intra *next;
137 /* Internal information about a node that is used during the construction
138 * of a schedule.
139 * space represents the original space in which the domain lives;
140 * that is, the space is not affected by compression
141 * sched is a matrix representation of the schedule being constructed
142 * for this node; if compressed is set, then this schedule is
143 * defined over the compressed domain space
144 * band_sched is an isl_map representation of the schedule of the current band
145 * band_sched may be NULL; if compressed is set, then this map
146 * is defined over the uncompressed domain space
147 * rank is the number of linearly independent rows in the linear part
148 * of sched
149 * the rows of "vmap" represent a change of basis for the node
150 * variables; the first rank rows span the linear part of
151 * the schedule rows; the remaining rows are linearly independent
152 * the rows of "indep" represent linear combinations of the schedule
153 * coefficients that are non-zero when the schedule coefficients are
154 * linearly independent of previously computed schedule rows.
155 * start is the first variable in the LP problem in the sequences that
156 * represents the schedule coefficients of this node
157 * nvar is the dimension of the (compressed) domain
158 * nparam is the number of parameters or 0 if we are not constructing
159 * a parametric schedule
161 * If compressed is set, then hull represents the constraints
162 * that were used to derive the compression, while compress and
163 * decompress map the original space to the compressed space and
164 * vice versa.
166 * scc is the index of SCC (or WCC) this node belongs to
168 * "cluster" is only used inside extract_clusters and identifies
169 * the cluster of SCCs that the node belongs to.
171 * coincident contains a boolean for each of the rows of the schedule,
172 * indicating whether the corresponding scheduling dimension satisfies
173 * the coincidence constraints in the sense that the corresponding
174 * dependence distances are zero.
176 * If the schedule_treat_coalescing option is set, then
177 * "sizes" contains the sizes of the (compressed) instance set
178 * in each direction. If there is no fixed size in a given direction,
179 * then the corresponding size value is set to infinity.
180 * If the schedule_treat_coalescing option or the schedule_max_coefficient
181 * option is set, then "max" contains the maximal values for
182 * schedule coefficients of the (compressed) variables. If no bound
183 * needs to be imposed on a particular variable, then the corresponding
184 * value is negative.
185 * If not NULL, then "bounds" contains a non-parametric set
186 * in the compressed space that is bounded by the size in each direction.
187 * "intra" is a linked list of intra-statement consecutivity constraints,
188 * with the highest priority constraints appearing first.
189 * If the node belongs to a graph that is derived through splitting,
190 * then the "intra" list is shared with the node in the original graph.
192 struct isl_sched_node {
193 isl_space *space;
194 int compressed;
195 isl_set *hull;
196 isl_multi_aff *compress;
197 isl_multi_aff *decompress;
198 isl_mat *sched;
199 isl_map *band_sched;
200 int rank;
201 isl_mat *indep;
202 isl_mat *vmap;
203 int start;
204 int nvar;
205 int nparam;
207 int scc;
208 int cluster;
210 int *coincident;
212 isl_multi_val *sizes;
213 isl_basic_set *bounds;
214 isl_vec *max;
216 struct isl_sched_intra *intra;
219 static int node_has_tuples(const void *entry, const void *val)
221 struct isl_sched_node *node = (struct isl_sched_node *)entry;
222 isl_space *space = (isl_space *) val;
224 return isl_space_has_equal_tuples(node->space, space);
227 static int node_scc_exactly(struct isl_sched_node *node, int scc)
229 return node->scc == scc;
232 static int node_scc_at_most(struct isl_sched_node *node, int scc)
234 return node->scc <= scc;
237 static int node_scc_at_least(struct isl_sched_node *node, int scc)
239 return node->scc >= scc;
242 /* Enumeration for indicating the type of ILP constraint that is added
243 * for an inter-statement consecutivity constraint.
245 * failed: the inter-statement consecutivity constraint
246 * init: no constraint has been added or only zero-distance constraints
247 * inner: the one-distance constraint has been added
248 * free: no more constraints are added
250 * In practice, the "failed" and the "free" state have the same effect.
251 * They both result in the constraint being ignored in later steps.
253 enum isl_sched_inter_state {
254 isl_sched_inter_failed = -1,
255 isl_sched_inter_init = 0,
256 isl_sched_inter_inner,
257 isl_sched_inter_free,
260 /* An edge in the dependence graph. An edge may be used to
261 * ensure validity of the generated schedule, to minimize the dependence
262 * distance or both
264 * map is the dependence relation, with i -> j in the map if j depends on i
265 * tagged_condition and tagged_validity contain the union of all tagged
266 * condition or conditional validity dependence relations that
267 * specialize the dependence relation "map"; that is,
268 * if (i -> a) -> (j -> b) is an element of "tagged_condition"
269 * or "tagged_validity", then i -> j is an element of "map".
270 * If these fields are NULL, then they represent the empty relation.
271 * src is the source node
272 * dst is the sink node
274 * types is a bit vector containing the types of this edge.
275 * validity is set if the edge is used to ensure correctness
276 * coincidence is used to enforce zero dependence distances
277 * proximity is set if the edge is used to minimize dependence distances
278 * condition is set if the edge represents a condition
279 * for a conditional validity schedule constraint
280 * local can only be set for condition edges and indicates that
281 * the dependence distance over the edge should be zero
282 * conditional_validity is set if the edge is used to conditionally
283 * ensure correctness
284 * consecutivity is set if the edge is used to make pairs of instances
285 * consecutive at a given level. A consecutivity edge is exclusively
286 * used to represent a single consecutivity constraint.
288 * For validity edges, start and end mark the sequence of inequality
289 * constraints in the LP problem that encode the validity constraint
290 * corresponding to this edge.
292 * For consecutivity edges, "src_intra" and "dst_intra" point
293 * to the corresponding intra-statement consecutivity constraints
294 * in "src" and "dst". "state" reflects the type of ILP constraints
295 * that have been imposed. "band_state" is the state at the start
296 * of the current band.
298 * During clustering, an edge may be marked "no_merge" if it should
299 * not be used to merge clusters.
300 * The weight is also only used during clustering and it is
301 * an indication of how many schedule dimensions on either side
302 * of the schedule constraints can be aligned.
303 * If the weight is negative, then this means that this edge was postponed
304 * by has_bounded_distances or any_no_merge. The original weight can
305 * be retrieved by adding 1 + graph->max_weight, with "graph"
306 * the graph containing this edge.
308 struct isl_sched_edge {
309 isl_map *map;
310 isl_union_map *tagged_condition;
311 isl_union_map *tagged_validity;
313 struct isl_sched_node *src;
314 struct isl_sched_node *dst;
315 struct isl_sched_intra *src_intra;
316 struct isl_sched_intra *dst_intra;
318 unsigned types;
320 int start;
321 int end;
323 enum isl_sched_inter_state state;
324 enum isl_sched_inter_state band_state;
326 int no_merge;
327 int weight;
330 /* Is "edge" marked as being of type "type"?
332 static int is_type(struct isl_sched_edge *edge, enum isl_edge_type type)
334 return ISL_FL_ISSET(edge->types, 1 << type);
337 /* Mark "edge" as being of type "type".
339 static void set_type(struct isl_sched_edge *edge, enum isl_edge_type type)
341 ISL_FL_SET(edge->types, 1 << type);
344 /* No longer mark "edge" as being of type "type"?
346 static void clear_type(struct isl_sched_edge *edge, enum isl_edge_type type)
348 ISL_FL_CLR(edge->types, 1 << type);
351 /* Is "edge" marked as a validity edge?
353 static int is_validity(struct isl_sched_edge *edge)
355 return is_type(edge, isl_edge_validity);
358 /* Mark "edge" as a validity edge.
360 static void set_validity(struct isl_sched_edge *edge)
362 set_type(edge, isl_edge_validity);
365 /* Is "edge" marked as a proximity edge?
367 static int is_proximity(struct isl_sched_edge *edge)
369 return is_type(edge, isl_edge_proximity);
372 /* Is "edge" marked as a local edge?
374 static int is_local(struct isl_sched_edge *edge)
376 return is_type(edge, isl_edge_local);
379 /* Mark "edge" as a local edge.
381 static void set_local(struct isl_sched_edge *edge)
383 set_type(edge, isl_edge_local);
386 /* No longer mark "edge" as a local edge.
388 static void clear_local(struct isl_sched_edge *edge)
390 clear_type(edge, isl_edge_local);
393 /* Is "edge" marked as a coincidence edge?
395 static int is_coincidence(struct isl_sched_edge *edge)
397 return is_type(edge, isl_edge_coincidence);
400 /* Is "edge" marked as a condition edge?
402 static int is_condition(struct isl_sched_edge *edge)
404 return is_type(edge, isl_edge_condition);
407 /* Is "edge" marked as a conditional validity edge?
409 static int is_conditional_validity(struct isl_sched_edge *edge)
411 return is_type(edge, isl_edge_conditional_validity);
414 /* Is "edge" marked as a consecutivity edge?
416 static int is_consecutivity(struct isl_sched_edge *edge)
418 return is_type(edge, isl_edge_consecutivity);
421 /* Is "edge" of a type that can appear multiple times between
422 * the same pair of nodes?
424 * Condition edges and conditional validity edges may have tagged
425 * dependence relations, in which case an edge is added for each
426 * pair of tags.
428 static int is_multi_edge_type(struct isl_sched_edge *edge)
430 return is_condition(edge) || is_conditional_validity(edge) ||
431 is_consecutivity(edge);
434 /* Internal information about the dependence graph used during
435 * the construction of the schedule.
437 * intra_hmap is a cache, mapping dependence relations to their dual,
438 * for dependences from a node to itself, possibly without
439 * coefficients for the parameters
440 * intra_hmap_param is a cache, mapping dependence relations to their dual,
441 * for dependences from a node to itself, including coefficients
442 * for the parameters
443 * inter_hmap is a cache, mapping dependence relations to their dual,
444 * for dependences between distinct nodes
445 * if compression is involved then the key for these maps
446 * is the original, uncompressed dependence relation, while
447 * the value is the dual of the compressed dependence relation.
449 * prefix is the schedule prefix specified by the user.
450 * This field may be NULL if no (non-trivial) schedule prefix
451 * was specified.
453 * n is the number of nodes
454 * node is the list of nodes
455 * maxvar is the maximal number of variables over all nodes
456 * max_row is the allocated number of rows in the schedule
457 * n_row is the current (maximal) number of linearly independent
458 * rows in the node schedules
459 * n_total_row is the current number of rows in the node schedules
460 * band_start is the starting row in the node schedules of the current band
461 * root is set to the original dependence graph from which this graph
462 * is derived through splitting. If this graph is not the result of
463 * splitting, then the root field points to the graph itself.
465 * sorted contains a list of node indices sorted according to the
466 * SCC to which a node belongs
468 * n_edge is the number of edges
469 * edge is the list of edges
470 * max_edge contains the maximal number of edges of each type;
471 * in particular, it contains the number of edges in the inital graph.
472 * edge_table contains pointers into the edge array, hashed on the source
473 * and sink spaces; there is one such table for each type;
474 * a given edge may be referenced from more than one table
475 * if the corresponding relation appears in more than one of the
476 * sets of dependences; however, for each type there is only
477 * a single edge between a given pair of source and sink space
478 * in the entire graph
480 * node_table contains pointers into the node array, hashed on the space tuples
482 * "region" contains a list of variable sequences with constraints
483 * that need to be satisfied.
484 * "n_region" contains the size of the allocated array.
486 * lp contains the (I)LP problem used to obtain new schedule rows
488 * src_scc and dst_scc are the source and sink SCCs of an edge with
489 * conflicting constraints
491 * scc represents the number of components
492 * weak is set if the components are weakly connected
494 * max_weight is used during clustering and represents the maximal
495 * weight of the relevant proximity edges.
497 struct isl_sched_graph {
498 isl_map_to_basic_set *intra_hmap;
499 isl_map_to_basic_set *intra_hmap_param;
500 isl_map_to_basic_set *inter_hmap;
502 isl_multi_union_pw_aff *prefix;
504 struct isl_sched_node *node;
505 int n;
506 int maxvar;
507 int max_row;
508 int n_row;
510 int *sorted;
512 int n_total_row;
513 int band_start;
515 struct isl_sched_graph *root;
517 struct isl_sched_edge *edge;
518 int n_edge;
519 int max_edge[isl_edge_last_table + 1];
520 struct isl_hash_table *edge_table[isl_edge_last_table + 1];
522 struct isl_hash_table *node_table;
523 int n_region;
524 struct isl_ilp_region *region;
526 isl_basic_set *lp;
528 int src_scc;
529 int dst_scc;
531 int scc;
532 int weak;
534 int max_weight;
537 /* Initialize node_table based on the list of nodes.
539 static int graph_init_table(isl_ctx *ctx, struct isl_sched_graph *graph)
541 int i;
543 graph->node_table = isl_hash_table_alloc(ctx, graph->n);
544 if (!graph->node_table)
545 return -1;
547 for (i = 0; i < graph->n; ++i) {
548 struct isl_hash_table_entry *entry;
549 uint32_t hash;
551 hash = isl_space_get_tuple_hash(graph->node[i].space);
552 entry = isl_hash_table_find(ctx, graph->node_table, hash,
553 &node_has_tuples,
554 graph->node[i].space, 1);
555 if (!entry)
556 return -1;
557 entry->data = &graph->node[i];
560 return 0;
563 /* Return a pointer to the node that lives within the given space,
564 * an invalid node if there is no such node, or NULL in case of error.
566 static struct isl_sched_node *graph_find_node(isl_ctx *ctx,
567 struct isl_sched_graph *graph, __isl_keep isl_space *space)
569 struct isl_hash_table_entry *entry;
570 uint32_t hash;
572 if (!space)
573 return NULL;
575 hash = isl_space_get_tuple_hash(space);
576 entry = isl_hash_table_find(ctx, graph->node_table, hash,
577 &node_has_tuples, space, 0);
579 return entry ? entry->data : graph->node + graph->n;
582 /* Is "node" a node in "graph"?
584 static int is_node(struct isl_sched_graph *graph,
585 struct isl_sched_node *node)
587 return node && node >= &graph->node[0] && node < &graph->node[graph->n];
590 static int edge_has_src_and_dst(const void *entry, const void *val)
592 const struct isl_sched_edge *edge = entry;
593 const struct isl_sched_edge *temp = val;
595 return edge->src == temp->src && edge->dst == temp->dst;
598 /* Add the given edge to graph->edge_table[type].
600 static isl_stat graph_edge_table_add(isl_ctx *ctx,
601 struct isl_sched_graph *graph, enum isl_edge_type type,
602 struct isl_sched_edge *edge)
604 struct isl_hash_table_entry *entry;
605 uint32_t hash;
607 hash = isl_hash_init();
608 hash = isl_hash_builtin(hash, edge->src);
609 hash = isl_hash_builtin(hash, edge->dst);
610 entry = isl_hash_table_find(ctx, graph->edge_table[type], hash,
611 &edge_has_src_and_dst, edge, 1);
612 if (!entry)
613 return isl_stat_error;
614 entry->data = edge;
616 return isl_stat_ok;
619 /* Add "edge" to all relevant edge tables.
620 * That is, for every type of the edge, add it to the corresponding table.
622 static isl_stat graph_edge_tables_add(isl_ctx *ctx,
623 struct isl_sched_graph *graph, struct isl_sched_edge *edge)
625 enum isl_edge_type t;
627 for (t = isl_edge_first; t <= isl_edge_last_table; ++t) {
628 if (!is_type(edge, t))
629 continue;
630 if (graph_edge_table_add(ctx, graph, t, edge) < 0)
631 return isl_stat_error;
634 return isl_stat_ok;
637 /* Allocate the edge_tables based on the maximal number of edges of
638 * each type.
640 static int graph_init_edge_tables(isl_ctx *ctx, struct isl_sched_graph *graph)
642 int i;
644 for (i = 0; i <= isl_edge_last_table; ++i) {
645 graph->edge_table[i] = isl_hash_table_alloc(ctx,
646 graph->max_edge[i]);
647 if (!graph->edge_table[i])
648 return -1;
651 return 0;
654 /* If graph->edge_table[type] contains an edge from the given source
655 * to the given destination, then return the hash table entry of this edge.
656 * Otherwise, return NULL.
658 static struct isl_hash_table_entry *graph_find_edge_entry(
659 struct isl_sched_graph *graph,
660 enum isl_edge_type type,
661 struct isl_sched_node *src, struct isl_sched_node *dst)
663 isl_ctx *ctx = isl_space_get_ctx(src->space);
664 uint32_t hash;
665 struct isl_sched_edge temp = { .src = src, .dst = dst };
667 hash = isl_hash_init();
668 hash = isl_hash_builtin(hash, temp.src);
669 hash = isl_hash_builtin(hash, temp.dst);
670 return isl_hash_table_find(ctx, graph->edge_table[type], hash,
671 &edge_has_src_and_dst, &temp, 0);
675 /* If graph->edge_table[type] contains an edge from the given source
676 * to the given destination, then return this edge.
677 * Otherwise, return NULL.
679 static struct isl_sched_edge *graph_find_edge(struct isl_sched_graph *graph,
680 enum isl_edge_type type,
681 struct isl_sched_node *src, struct isl_sched_node *dst)
683 struct isl_hash_table_entry *entry;
685 entry = graph_find_edge_entry(graph, type, src, dst);
686 if (!entry)
687 return NULL;
689 return entry->data;
692 /* Check whether the dependence graph has an edge of the given type
693 * between the given two nodes.
695 static isl_bool graph_has_edge(struct isl_sched_graph *graph,
696 enum isl_edge_type type,
697 struct isl_sched_node *src, struct isl_sched_node *dst)
699 struct isl_sched_edge *edge;
700 isl_bool empty;
702 edge = graph_find_edge(graph, type, src, dst);
703 if (!edge)
704 return 0;
706 empty = isl_map_plain_is_empty(edge->map);
707 if (empty < 0)
708 return isl_bool_error;
710 return !empty;
713 /* Look for any edge with the same src, dst and map fields as "model".
714 * Do not look for matching edges of consecutivity constraints or
715 * matching consecutivity edges.
717 * Return the matching edge if one can be found.
718 * Return "model" if no matching edge is found.
719 * Return NULL on error.
721 static struct isl_sched_edge *graph_find_matching_edge(
722 struct isl_sched_graph *graph, struct isl_sched_edge *model)
724 enum isl_edge_type i;
725 struct isl_sched_edge *edge;
727 if (is_consecutivity(model))
728 return model;
729 for (i = isl_edge_first; i <= isl_edge_last_table; ++i) {
730 int is_equal;
732 if (i == isl_edge_consecutivity)
733 continue;
734 edge = graph_find_edge(graph, i, model->src, model->dst);
735 if (!edge)
736 continue;
737 is_equal = isl_map_plain_is_equal(model->map, edge->map);
738 if (is_equal < 0)
739 return NULL;
740 if (is_equal)
741 return edge;
744 return model;
747 /* Remove the given edge from all the edge_tables that refer to it.
749 static void graph_remove_edge(struct isl_sched_graph *graph,
750 struct isl_sched_edge *edge)
752 isl_ctx *ctx = isl_map_get_ctx(edge->map);
753 enum isl_edge_type i;
755 for (i = isl_edge_first; i <= isl_edge_last_table; ++i) {
756 struct isl_hash_table_entry *entry;
758 entry = graph_find_edge_entry(graph, i, edge->src, edge->dst);
759 if (!entry)
760 continue;
761 if (entry->data != edge)
762 continue;
763 isl_hash_table_remove(ctx, graph->edge_table[i], entry);
767 /* Check whether the dependence graph has any edge
768 * between the given two nodes.
770 static isl_bool graph_has_any_edge(struct isl_sched_graph *graph,
771 struct isl_sched_node *src, struct isl_sched_node *dst)
773 enum isl_edge_type i;
774 isl_bool r;
776 for (i = isl_edge_first; i <= isl_edge_last_table; ++i) {
777 r = graph_has_edge(graph, i, src, dst);
778 if (r < 0 || r)
779 return r;
782 return r;
785 /* Check whether the dependence graph has a validity edge
786 * between the given two nodes.
788 * Conditional validity edges are essentially validity edges that
789 * can be ignored if the corresponding condition edges are iteration private.
790 * Here, we are only checking for the presence of validity
791 * edges, so we need to consider the conditional validity edges too.
792 * In particular, this function is used during the detection
793 * of strongly connected components and we cannot ignore
794 * conditional validity edges during this detection.
796 static isl_bool graph_has_validity_edge(struct isl_sched_graph *graph,
797 struct isl_sched_node *src, struct isl_sched_node *dst)
799 isl_bool r;
801 r = graph_has_edge(graph, isl_edge_validity, src, dst);
802 if (r < 0 || r)
803 return r;
805 return graph_has_edge(graph, isl_edge_conditional_validity, src, dst);
808 /* Perform all the required memory allocations for a schedule graph "graph"
809 * with "n_node" nodes and "n_edge" edge and initialize the corresponding
810 * fields.
811 * "n_consecutive" is the number of consecutivity constraints.
812 * The number of regions introduced per intra-statement consecutivity
813 * constraint can vary between one and three, while an additional
814 * single region is introduced per inter-statement consecutivity constraint.
815 * Only allocate a single entry
816 * per consecutivity constraint for now, relying on graph_extend_region
817 * to extend the list of regions when needed.
819 static isl_stat graph_alloc(isl_ctx *ctx, struct isl_sched_graph *graph,
820 int n_node, int n_edge, int n_consecutive)
822 int i;
824 graph->n = n_node;
825 graph->n_edge = n_edge;
826 graph->node = isl_calloc_array(ctx, struct isl_sched_node, graph->n);
827 graph->sorted = isl_calloc_array(ctx, int, graph->n);
828 graph->n_region = n_consecutive + graph->n;
829 graph->region = isl_calloc_array(ctx, struct isl_ilp_region,
830 graph->n_region);
831 graph->edge = isl_calloc_array(ctx,
832 struct isl_sched_edge, graph->n_edge);
834 graph->intra_hmap = isl_map_to_basic_set_alloc(ctx, 2 * n_edge);
835 graph->intra_hmap_param = isl_map_to_basic_set_alloc(ctx, 2 * n_edge);
836 graph->inter_hmap = isl_map_to_basic_set_alloc(ctx, 2 * n_edge);
838 if (!graph->node || !graph->region || (graph->n_edge && !graph->edge) ||
839 !graph->sorted)
840 return isl_stat_error;
842 for(i = 0; i < graph->n; ++i)
843 graph->sorted[i] = i;
845 return isl_stat_ok;
848 /* Extend the size of graph->region to contain at least "n" elements,
849 * clearing the additionally allocated elements.
851 static isl_stat graph_extend_region(isl_ctx *ctx, struct isl_sched_graph *graph,
852 int n)
854 struct isl_ilp_region *region;
856 if (graph->n_region >= n)
857 return isl_stat_ok;
859 n = (n + 1) * 3 / 2;
860 region = isl_realloc_array(ctx, graph->region, struct isl_ilp_region,
862 if (!region)
863 return isl_stat_error;
865 memset(region + graph->n_region, 0,
866 (n - graph->n_region) * sizeof(struct isl_ilp_region));
867 graph->region = region;
868 graph->n_region = n;
870 return isl_stat_ok;
873 /* Free the memory associated to node "node" in "graph".
874 * The "coincident" and the "intra" fields are shared by nodes in a graph and
875 * its subgraph.
876 * They therefore only need to be freed for the original dependence graph,
877 * i.e., one that is not the result of splitting.
879 static void clear_node(struct isl_sched_graph *graph,
880 struct isl_sched_node *node)
882 isl_space_free(node->space);
883 isl_set_free(node->hull);
884 isl_multi_aff_free(node->compress);
885 isl_multi_aff_free(node->decompress);
886 isl_mat_free(node->sched);
887 isl_map_free(node->band_sched);
888 isl_mat_free(node->indep);
889 isl_mat_free(node->vmap);
890 isl_multi_val_free(node->sizes);
891 isl_basic_set_free(node->bounds);
892 isl_vec_free(node->max);
894 if (graph->root != graph)
895 return;
897 free(node->coincident);
898 while (node->intra) {
899 struct isl_sched_intra *intra = node->intra;
901 node->intra = intra->next;
902 isl_id_free(intra->id);
903 isl_mat_free(intra->outer);
904 isl_mat_free(intra->inner);
905 free(intra);
909 static void graph_free(isl_ctx *ctx, struct isl_sched_graph *graph)
911 int i;
913 isl_map_to_basic_set_free(graph->intra_hmap);
914 isl_map_to_basic_set_free(graph->intra_hmap_param);
915 isl_map_to_basic_set_free(graph->inter_hmap);
917 if (graph->node)
918 for (i = 0; i < graph->n; ++i)
919 clear_node(graph, &graph->node[i]);
920 free(graph->node);
921 free(graph->sorted);
922 if (graph->edge)
923 for (i = 0; i < graph->n_edge; ++i) {
924 isl_map_free(graph->edge[i].map);
925 isl_union_map_free(graph->edge[i].tagged_condition);
926 isl_union_map_free(graph->edge[i].tagged_validity);
928 free(graph->edge);
929 free(graph->region);
930 for (i = 0; i <= isl_edge_last_table; ++i)
931 isl_hash_table_free(ctx, graph->edge_table[i]);
932 isl_hash_table_free(ctx, graph->node_table);
933 isl_basic_set_free(graph->lp);
935 isl_multi_union_pw_aff_free(graph->prefix);
938 /* For each "set" on which this function is called, increment
939 * graph->n by one and update graph->maxvar.
941 static isl_stat init_n_maxvar(__isl_take isl_set *set, void *user)
943 struct isl_sched_graph *graph = user;
944 int nvar = isl_set_dim(set, isl_dim_set);
946 graph->n++;
947 if (nvar > graph->maxvar)
948 graph->maxvar = nvar;
950 isl_set_free(set);
952 return isl_stat_ok;
955 /* Compute the number of rows that should be allocated for the schedule.
956 * In particular, we need one row for each variable or one row
957 * for each basic map in the dependences.
958 * Note that it is practically impossible to exhaust both
959 * the number of dependences and the number of variables.
960 * If any prefix schedule was specified, then the initial rows
961 * are initialized from this prefix. Since the prefix may be
962 * completely trivial, it needs to be taken into account separately.
964 static isl_stat compute_max_row(struct isl_sched_graph *graph,
965 __isl_keep isl_schedule_constraints *sc)
967 int n_prefix, n_edge;
968 isl_stat r;
969 isl_union_set *domain;
970 isl_multi_union_pw_aff *prefix;
972 graph->n = 0;
973 graph->maxvar = 0;
974 domain = isl_schedule_constraints_get_domain(sc);
975 r = isl_union_set_foreach_set(domain, &init_n_maxvar, graph);
976 isl_union_set_free(domain);
977 if (r < 0)
978 return isl_stat_error;
979 prefix = isl_schedule_constraints_get_prefix(sc);
980 n_prefix = isl_multi_union_pw_aff_dim(prefix, isl_dim_set);
981 isl_multi_union_pw_aff_free(prefix);
982 if (!prefix)
983 return isl_stat_error;
984 n_edge = isl_schedule_constraints_n_basic_map(sc);
985 if (n_edge < 0)
986 return isl_stat_error;
987 graph->max_row = n_prefix + n_edge + graph->maxvar;
989 return isl_stat_ok;
992 /* Does "bset" have any defining equalities for its set variables?
994 static isl_bool has_any_defining_equality(__isl_keep isl_basic_set *bset)
996 int i, n;
998 if (!bset)
999 return isl_bool_error;
1001 n = isl_basic_set_dim(bset, isl_dim_set);
1002 for (i = 0; i < n; ++i) {
1003 isl_bool has;
1005 has = isl_basic_set_has_defining_equality(bset, isl_dim_set, i,
1006 NULL);
1007 if (has < 0 || has)
1008 return has;
1011 return isl_bool_false;
1014 /* Set the entries of node->max to the value of the schedule_max_coefficient
1015 * option, if set.
1017 static isl_stat set_max_coefficient(isl_ctx *ctx, struct isl_sched_node *node)
1019 int max;
1021 max = isl_options_get_schedule_max_coefficient(ctx);
1022 if (max == -1)
1023 return isl_stat_ok;
1025 node->max = isl_vec_alloc(ctx, node->nvar);
1026 node->max = isl_vec_set_si(node->max, max);
1027 if (!node->max)
1028 return isl_stat_error;
1030 return isl_stat_ok;
1033 /* Set the entries of node->max to the minimum of the schedule_max_coefficient
1034 * option (if set) and half of the minimum of the sizes in the other
1035 * dimensions. Round up when computing the half such that
1036 * if the minimum of the sizes is one, half of the size is taken to be one
1037 * rather than zero.
1038 * If the global minimum is unbounded (i.e., if both
1039 * the schedule_max_coefficient is not set and the sizes in the other
1040 * dimensions are unbounded), then store a negative value.
1041 * If the schedule coefficient is close to the size of the instance set
1042 * in another dimension, then the schedule may represent a loop
1043 * coalescing transformation (especially if the coefficient
1044 * in that other dimension is one). Forcing the coefficient to be
1045 * smaller than or equal to half the minimal size should avoid this
1046 * situation.
1048 static isl_stat compute_max_coefficient(isl_ctx *ctx,
1049 struct isl_sched_node *node)
1051 int max;
1052 int i, j;
1053 isl_vec *v;
1055 max = isl_options_get_schedule_max_coefficient(ctx);
1056 v = isl_vec_alloc(ctx, node->nvar);
1057 if (!v)
1058 return isl_stat_error;
1060 for (i = 0; i < node->nvar; ++i) {
1061 isl_int_set_si(v->el[i], max);
1062 isl_int_mul_si(v->el[i], v->el[i], 2);
1065 for (i = 0; i < node->nvar; ++i) {
1066 isl_val *size;
1068 size = isl_multi_val_get_val(node->sizes, i);
1069 if (!size)
1070 goto error;
1071 if (!isl_val_is_int(size)) {
1072 isl_val_free(size);
1073 continue;
1075 for (j = 0; j < node->nvar; ++j) {
1076 if (j == i)
1077 continue;
1078 if (isl_int_is_neg(v->el[j]) ||
1079 isl_int_gt(v->el[j], size->n))
1080 isl_int_set(v->el[j], size->n);
1082 isl_val_free(size);
1085 for (i = 0; i < node->nvar; ++i)
1086 isl_int_cdiv_q_ui(v->el[i], v->el[i], 2);
1088 node->max = v;
1089 return isl_stat_ok;
1090 error:
1091 isl_vec_free(v);
1092 return isl_stat_error;
1095 /* Compute and return the size of "set" in dimension "dim".
1096 * The size is taken to be the difference in values for that variable
1097 * for fixed values of the other variables.
1098 * This assumes that "set" is convex.
1099 * In particular, the variable is first isolated from the other variables
1100 * in the range of a map
1102 * [i_0, ..., i_dim-1, i_dim+1, ...] -> [i_dim]
1104 * and then duplicated
1106 * [i_0, ..., i_dim-1, i_dim+1, ...] -> [[i_dim] -> [i_dim']]
1108 * The shared variables are then projected out and the maximal value
1109 * of i_dim' - i_dim is computed.
1111 static __isl_give isl_val *compute_size(__isl_take isl_set *set, int dim)
1113 isl_map *map;
1114 isl_local_space *ls;
1115 isl_aff *obj;
1116 isl_val *v;
1118 map = isl_set_project_onto_map(set, isl_dim_set, dim, 1);
1119 map = isl_map_project_out(map, isl_dim_in, dim, 1);
1120 map = isl_map_range_product(map, isl_map_copy(map));
1121 map = isl_set_unwrap(isl_map_range(map));
1122 set = isl_map_deltas(map);
1123 ls = isl_local_space_from_space(isl_set_get_space(set));
1124 obj = isl_aff_var_on_domain(ls, isl_dim_set, 0);
1125 v = isl_set_max_val(set, obj);
1126 isl_aff_free(obj);
1127 isl_set_free(set);
1129 return v;
1132 /* Compute the size of the instance set "set" of "node", after compression,
1133 * as well as bounds on the corresponding coefficients, if needed.
1135 * The sizes are needed when the schedule_treat_coalescing option is set.
1136 * The bounds are needed when the schedule_treat_coalescing option or
1137 * the schedule_max_coefficient option is set.
1139 * If the schedule_treat_coalescing option is not set, then at most
1140 * the bounds need to be set and this is done in set_max_coefficient.
1141 * Otherwise, compress the domain if needed, compute the size
1142 * in each direction and store the results in node->size.
1143 * If the domain is not convex, then the sizes are computed
1144 * on a convex superset in order to avoid picking up sizes
1145 * that are valid for the individual disjuncts, but not for
1146 * the domain as a whole.
1147 * Finally, set the bounds on the coefficients based on the sizes
1148 * and the schedule_max_coefficient option in compute_max_coefficient.
1150 static isl_stat compute_sizes_and_max(isl_ctx *ctx, struct isl_sched_node *node,
1151 __isl_take isl_set *set)
1153 int j, n;
1154 isl_multi_val *mv;
1156 if (!isl_options_get_schedule_treat_coalescing(ctx)) {
1157 isl_set_free(set);
1158 return set_max_coefficient(ctx, node);
1161 if (node->compressed)
1162 set = isl_set_preimage_multi_aff(set,
1163 isl_multi_aff_copy(node->decompress));
1164 set = isl_set_from_basic_set(isl_set_simple_hull(set));
1165 mv = isl_multi_val_zero(isl_set_get_space(set));
1166 n = isl_set_dim(set, isl_dim_set);
1167 for (j = 0; j < n; ++j) {
1168 isl_val *v;
1170 v = compute_size(isl_set_copy(set), j);
1171 mv = isl_multi_val_set_val(mv, j, v);
1173 node->sizes = mv;
1174 isl_set_free(set);
1175 if (!node->sizes)
1176 return isl_stat_error;
1177 return compute_max_coefficient(ctx, node);
1180 /* Add a new node to the graph representing the given instance set.
1181 * "nvar" is the (possibly compressed) number of variables and
1182 * may be smaller than then number of set variables in "set"
1183 * if "compressed" is set.
1184 * If "compressed" is set, then "hull" represents the constraints
1185 * that were used to derive the compression, while "compress" and
1186 * "decompress" map the original space to the compressed space and
1187 * vice versa.
1188 * If "compressed" is not set, then "hull", "compress" and "decompress"
1189 * should be NULL.
1191 * Compute the size of the instance set and bounds on the coefficients,
1192 * if needed.
1194 static isl_stat add_node(struct isl_sched_graph *graph,
1195 __isl_take isl_set *set, int nvar, int compressed,
1196 __isl_take isl_set *hull, __isl_take isl_multi_aff *compress,
1197 __isl_take isl_multi_aff *decompress)
1199 int nparam;
1200 isl_ctx *ctx;
1201 isl_mat *sched;
1202 isl_space *space;
1203 int *coincident;
1204 struct isl_sched_node *node;
1206 if (!set)
1207 return isl_stat_error;
1209 ctx = isl_set_get_ctx(set);
1210 nparam = isl_set_dim(set, isl_dim_param);
1211 if (!ctx->opt->schedule_parametric)
1212 nparam = 0;
1213 sched = isl_mat_alloc(ctx, 0, 1 + nparam + nvar);
1214 node = &graph->node[graph->n];
1215 graph->n++;
1216 space = isl_set_get_space(set);
1217 node->space = space;
1218 node->nvar = nvar;
1219 node->nparam = nparam;
1220 node->sched = sched;
1221 node->band_sched = NULL;
1222 coincident = isl_calloc_array(ctx, int, graph->max_row);
1223 node->coincident = coincident;
1224 node->compressed = compressed;
1225 node->hull = hull;
1226 node->compress = compress;
1227 node->decompress = decompress;
1228 if (compute_sizes_and_max(ctx, node, set) < 0)
1229 return isl_stat_error;
1231 if (!space || !sched || (graph->max_row && !coincident))
1232 return isl_stat_error;
1233 if (compressed && (!hull || !compress || !decompress))
1234 return isl_stat_error;
1236 return isl_stat_ok;
1239 /* Construct an identifier for node "node", which will represent "set".
1240 * The name of the identifier is either "compressed" or
1241 * "compressed_<name>", with <name> the name of the space of "set".
1242 * The user pointer of the identifier points to "node".
1244 static __isl_give isl_id *construct_compressed_id(__isl_keep isl_set *set,
1245 struct isl_sched_node *node)
1247 isl_bool has_name;
1248 isl_ctx *ctx;
1249 isl_id *id;
1250 isl_printer *p;
1251 const char *name;
1252 char *id_name;
1254 has_name = isl_set_has_tuple_name(set);
1255 if (has_name < 0)
1256 return NULL;
1258 ctx = isl_set_get_ctx(set);
1259 if (!has_name)
1260 return isl_id_alloc(ctx, "compressed", node);
1262 p = isl_printer_to_str(ctx);
1263 name = isl_set_get_tuple_name(set);
1264 p = isl_printer_print_str(p, "compressed_");
1265 p = isl_printer_print_str(p, name);
1266 id_name = isl_printer_get_str(p);
1267 isl_printer_free(p);
1269 id = isl_id_alloc(ctx, id_name, node);
1270 free(id_name);
1272 return id;
1275 /* Add a new node to the graph representing the given set.
1277 * If any of the set variables is defined by an equality, then
1278 * we perform variable compression such that we can perform
1279 * the scheduling on the compressed domain.
1280 * In this case, an identifier is used that references the new node
1281 * such that each compressed space is unique and
1282 * such that the node can be recovered from the compressed space.
1284 static isl_stat extract_node(__isl_take isl_set *set, void *user)
1286 int nvar;
1287 isl_bool has_equality;
1288 isl_id *id;
1289 isl_basic_set *hull;
1290 isl_set *hull_set;
1291 isl_morph *morph;
1292 isl_multi_aff *compress, *decompress;
1293 struct isl_sched_graph *graph = user;
1295 hull = isl_set_affine_hull(isl_set_copy(set));
1296 hull = isl_basic_set_remove_divs(hull);
1297 nvar = isl_set_dim(set, isl_dim_set);
1298 has_equality = has_any_defining_equality(hull);
1300 if (has_equality < 0)
1301 goto error;
1302 if (!has_equality) {
1303 isl_basic_set_free(hull);
1304 return add_node(graph, set, nvar, 0, NULL, NULL, NULL);
1307 id = construct_compressed_id(set, &graph->node[graph->n]);
1308 morph = isl_basic_set_variable_compression_with_id(hull,
1309 isl_dim_set, id);
1310 isl_id_free(id);
1311 nvar = isl_morph_ran_dim(morph, isl_dim_set);
1312 compress = isl_morph_get_var_multi_aff(morph);
1313 morph = isl_morph_inverse(morph);
1314 decompress = isl_morph_get_var_multi_aff(morph);
1315 isl_morph_free(morph);
1317 hull_set = isl_set_from_basic_set(hull);
1318 return add_node(graph, set, nvar, 1, hull_set, compress, decompress);
1319 error:
1320 isl_basic_set_free(hull);
1321 isl_set_free(set);
1322 return isl_stat_error;
1325 struct isl_extract_edge_data {
1326 enum isl_edge_type type;
1327 struct isl_sched_graph *graph;
1330 /* Merge edge2 into edge1, freeing the contents of edge2.
1331 * Return 0 on success and -1 on failure.
1333 * edge1 and edge2 are assumed to have the same value for the map field.
1335 static int merge_edge(struct isl_sched_edge *edge1,
1336 struct isl_sched_edge *edge2)
1338 edge1->types |= edge2->types;
1339 isl_map_free(edge2->map);
1341 if (is_condition(edge2)) {
1342 if (!edge1->tagged_condition)
1343 edge1->tagged_condition = edge2->tagged_condition;
1344 else
1345 edge1->tagged_condition =
1346 isl_union_map_union(edge1->tagged_condition,
1347 edge2->tagged_condition);
1350 if (is_conditional_validity(edge2)) {
1351 if (!edge1->tagged_validity)
1352 edge1->tagged_validity = edge2->tagged_validity;
1353 else
1354 edge1->tagged_validity =
1355 isl_union_map_union(edge1->tagged_validity,
1356 edge2->tagged_validity);
1359 if (is_condition(edge2) && !edge1->tagged_condition)
1360 return -1;
1361 if (is_conditional_validity(edge2) && !edge1->tagged_validity)
1362 return -1;
1364 return 0;
1367 /* Insert dummy tags in domain and range of "map".
1369 * In particular, if "map" is of the form
1371 * A -> B
1373 * then return
1375 * [A -> dummy_tag] -> [B -> dummy_tag]
1377 * where the dummy_tags are identical and equal to any dummy tags
1378 * introduced by any other call to this function.
1380 static __isl_give isl_map *insert_dummy_tags(__isl_take isl_map *map)
1382 static char dummy;
1383 isl_ctx *ctx;
1384 isl_id *id;
1385 isl_space *space;
1386 isl_set *domain, *range;
1388 ctx = isl_map_get_ctx(map);
1390 id = isl_id_alloc(ctx, NULL, &dummy);
1391 space = isl_space_params(isl_map_get_space(map));
1392 space = isl_space_set_from_params(space);
1393 space = isl_space_set_tuple_id(space, isl_dim_set, id);
1394 space = isl_space_map_from_set(space);
1396 domain = isl_map_wrap(map);
1397 range = isl_map_wrap(isl_map_universe(space));
1398 map = isl_map_from_domain_and_range(domain, range);
1399 map = isl_map_zip(map);
1401 return map;
1404 /* Return a map in the same space as that of "map" that relates
1405 * the elements with equal schedule prefix.
1406 * Use the original schedule prefix specified by the user and
1407 * not the linear information extracted from it for the purpose
1408 * of avoiding redundant rows in the generated schedule.
1410 static __isl_give isl_map *extract_equal_prefix(struct isl_sched_graph *graph,
1411 __isl_keep isl_map *map)
1413 isl_space *space;
1414 isl_union_map *umap;
1415 isl_multi_union_pw_aff *prefix;
1417 space = isl_map_get_space(map);
1418 map = isl_map_universe(isl_space_copy(space));
1419 umap = isl_union_map_from_map(map);
1420 prefix = isl_multi_union_pw_aff_copy(graph->prefix);
1421 umap = isl_union_map_eq_at_multi_union_pw_aff(umap, prefix);
1422 map = isl_union_map_extract_map(umap, space);
1423 isl_union_map_free(umap);
1425 return map;
1428 /* Given that at least one of "src" or "dst" is compressed, return
1429 * a map between the spaces of these nodes restricted to the affine
1430 * hull that was used in the compression.
1432 static __isl_give isl_map *extract_hull(struct isl_sched_node *src,
1433 struct isl_sched_node *dst)
1435 isl_set *dom, *ran;
1437 if (src->compressed)
1438 dom = isl_set_copy(src->hull);
1439 else
1440 dom = isl_set_universe(isl_space_copy(src->space));
1441 if (dst->compressed)
1442 ran = isl_set_copy(dst->hull);
1443 else
1444 ran = isl_set_universe(isl_space_copy(dst->space));
1446 return isl_map_from_domain_and_range(dom, ran);
1449 /* Intersect the domains of the nested relations in domain and range
1450 * of "tagged" with "map".
1452 static __isl_give isl_map *map_intersect_domains(__isl_take isl_map *tagged,
1453 __isl_keep isl_map *map)
1455 isl_set *set;
1457 tagged = isl_map_zip(tagged);
1458 set = isl_map_wrap(isl_map_copy(map));
1459 tagged = isl_map_intersect_domain(tagged, set);
1460 tagged = isl_map_zip(tagged);
1461 return tagged;
1464 /* Return a pointer to the node that lives in the domain space of "map",
1465 * an invalid node if there is no such node, or NULL in case of error.
1467 static struct isl_sched_node *find_domain_node(isl_ctx *ctx,
1468 struct isl_sched_graph *graph, __isl_keep isl_map *map)
1470 struct isl_sched_node *node;
1471 isl_space *space;
1473 space = isl_space_domain(isl_map_get_space(map));
1474 node = graph_find_node(ctx, graph, space);
1475 isl_space_free(space);
1477 return node;
1480 /* Return a pointer to the node that lives in the range space of "map",
1481 * an invalid node if there is no such node, or NULL in case of error.
1483 static struct isl_sched_node *find_range_node(isl_ctx *ctx,
1484 struct isl_sched_graph *graph, __isl_keep isl_map *map)
1486 struct isl_sched_node *node;
1487 isl_space *space;
1489 space = isl_space_range(isl_map_get_space(map));
1490 node = graph_find_node(ctx, graph, space);
1491 isl_space_free(space);
1493 return node;
1496 /* Refrain from adding a new edge based on "map" to "graph".
1497 * Instead, just free the map and tell the caller
1498 * no edge was added.
1499 * "tagged" is either a copy of "map" with additional tags or NULL.
1501 static struct isl_sched_edge *skip_edge(struct isl_sched_graph *graph,
1502 __isl_take isl_map *map, __isl_take isl_map *tagged)
1504 isl_map_free(map);
1505 isl_map_free(tagged);
1507 return &graph->edge[graph->n_edge];
1510 /* Add a new edge to the graph based on the given map
1511 * and add it to graph->edge_table[type].
1512 * If a dependence relation of a given type happens to be identical
1513 * to one of the dependence relations of a type that was added before,
1514 * then we don't create a new edge, but instead mark the original edge
1515 * as also representing a dependence of the current type.
1516 * No such merging is performed on consecutivity edges.
1517 * If no corresponding source or destination nodes can be found,
1518 * then no edge is created.
1519 * Return a pointer to the new or merged edge if an edge was created or
1520 * updated. Return an invalid edge otherwise.
1521 * Return NULL on error.
1523 * Edges of type isl_edge_condition or isl_edge_conditional_validity
1524 * may be specified as "tagged" dependence relations. That is, "map"
1525 * may contain elements (i -> a) -> (j -> b), where i -> j denotes
1526 * the dependence on iterations and a and b are tags.
1527 * edge->map is set to the relation containing the elements i -> j,
1528 * while edge->tagged_condition and edge->tagged_validity contain
1529 * the union of all the "map" relations
1530 * for which extract_edge is called that result in the same edge->map.
1532 * If the source or the destination node is compressed, then
1533 * intersect both "map" and "tagged" with the constraints that
1534 * were used to construct the compression.
1535 * This ensures that there are no schedule constraints defined
1536 * outside of these domains, while the scheduler no longer has
1537 * any control over those outside parts.
1539 * If a (non-trivial) prefix schedule was specified by the user,
1540 * then only retain dependences between instances with equal
1541 * prefix values. If the specified prefix schedule was incomplete,
1542 * then this may result in the removal of all dependences.
1544 static struct isl_sched_edge *add_edge(struct isl_sched_graph *graph,
1545 enum isl_edge_type type, __isl_take isl_map *map)
1547 isl_bool empty;
1548 isl_ctx *ctx = isl_map_get_ctx(map);
1549 struct isl_sched_node *src, *dst;
1550 struct isl_sched_edge *edge;
1551 isl_map *tagged = NULL;
1553 if (type == isl_edge_condition ||
1554 type == isl_edge_conditional_validity) {
1555 if (isl_map_can_zip(map)) {
1556 tagged = isl_map_copy(map);
1557 map = isl_set_unwrap(isl_map_domain(isl_map_zip(map)));
1558 } else {
1559 tagged = insert_dummy_tags(isl_map_copy(map));
1563 if (graph->prefix) {
1564 isl_map *equal_prefix;
1565 equal_prefix = extract_equal_prefix(graph, map);
1566 if (tagged)
1567 tagged = map_intersect_domains(tagged, equal_prefix);
1568 map = isl_map_intersect(map, equal_prefix);
1571 src = find_domain_node(ctx, graph, map);
1572 dst = find_range_node(ctx, graph, map);
1574 if (!src || !dst)
1575 goto error;
1576 if (!is_node(graph, src) || !is_node(graph, dst))
1577 return skip_edge(graph, map, tagged);
1579 if (src->compressed || dst->compressed) {
1580 isl_map *hull;
1581 hull = extract_hull(src, dst);
1582 if (tagged)
1583 tagged = map_intersect_domains(tagged, hull);
1584 map = isl_map_intersect(map, hull);
1587 empty = isl_map_plain_is_empty(map);
1588 if (empty < 0)
1589 goto error;
1590 if (empty)
1591 return skip_edge(graph, map, tagged);
1593 graph->edge[graph->n_edge].src = src;
1594 graph->edge[graph->n_edge].dst = dst;
1595 graph->edge[graph->n_edge].map = map;
1596 graph->edge[graph->n_edge].types = 0;
1597 graph->edge[graph->n_edge].tagged_condition = NULL;
1598 graph->edge[graph->n_edge].tagged_validity = NULL;
1599 set_type(&graph->edge[graph->n_edge], type);
1600 if (type == isl_edge_condition)
1601 graph->edge[graph->n_edge].tagged_condition =
1602 isl_union_map_from_map(tagged);
1603 if (type == isl_edge_conditional_validity)
1604 graph->edge[graph->n_edge].tagged_validity =
1605 isl_union_map_from_map(tagged);
1607 edge = graph_find_matching_edge(graph, &graph->edge[graph->n_edge]);
1608 if (!edge) {
1609 graph->n_edge++;
1610 return NULL;
1612 if (edge == &graph->edge[graph->n_edge])
1613 edge = &graph->edge[graph->n_edge++];
1614 else if (merge_edge(edge, &graph->edge[graph->n_edge]) < 0)
1615 return NULL;
1617 if (graph_edge_table_add(ctx, graph, type, edge) < 0)
1618 return NULL;
1619 return edge;
1620 error:
1621 isl_map_free(map);
1622 isl_map_free(tagged);
1623 return NULL;
1626 /* Is "edge" an edge in "graph"?
1628 static int is_edge(struct isl_sched_graph *graph,
1629 struct isl_sched_edge *edge)
1631 return edge &&
1632 edge >= graph->edge && edge < graph->edge + graph->n_edge;
1635 /* Add a new edge to the graph based on the given map
1636 * and add it to data->graph->edge_table[data->type].
1638 static isl_stat extract_edge(__isl_take isl_map *map, void *user)
1640 struct isl_extract_edge_data *data = user;
1641 struct isl_sched_graph *graph = data->graph;
1642 struct isl_sched_edge *edge;
1644 edge = add_edge(graph, data->type, map);
1645 return edge ? isl_stat_ok : isl_stat_error;
1648 /* Insert an intra-statement consecutivity constraint with
1649 * identifier "id" (may be NULL),
1650 * outer part "outer" and inner part "inner" in front of the list of
1651 * intra-statement consecutivity constraints of "node".
1652 * "outer" is replaced by a basis because only the spanned
1653 * space is relevant and not the individual rows.
1655 static isl_stat insert_intra(struct isl_sched_node *node,
1656 __isl_take isl_id *id, __isl_take isl_mat *outer,
1657 __isl_take isl_mat *inner)
1659 isl_ctx *ctx;
1660 struct isl_sched_intra *intra;
1662 outer = isl_mat_row_basis(outer);
1663 if (!outer || !inner)
1664 goto error;
1666 ctx = isl_mat_get_ctx(outer);
1667 intra = isl_calloc_type(ctx, struct isl_sched_intra);
1668 if (!intra)
1669 goto error;
1670 intra->n_inner = isl_mat_rows(inner);
1671 intra->id = id;
1672 intra->outer = outer;
1673 intra->inner = inner;
1674 intra->next = node->intra;
1675 node->intra = intra;
1676 return isl_stat_ok;
1677 error:
1678 isl_id_free(id);
1679 isl_mat_free(outer);
1680 isl_mat_free(inner);
1681 return isl_stat_error;
1684 /* Does the sequence of linear combinations "lin" with outer rows "outer"
1685 * represent a valid intra-statement consecutivity constraint for a node
1686 * with "nvar" variables?
1688 * If the number of columns is greater than the number of variables,
1689 * then the isl_multi_aff from which this linear part was extracted
1690 * involves some local variables, meaning that it is a quasi-affine
1691 * expression rather than an affine expression.
1692 * These are not allowed.
1694 * The inner part of "lin" needs to be of full row-rank and
1695 * needs to be linearly independent of "outer".
1696 * That is, rank(lin) needs to be equal to rank(outer) + rank(inner),
1697 * while rank(inner) needs to be equal to the number of rows of the inner part.
1698 * Furthermore, the inner part needs to contain at least one row.
1700 static isl_bool valid_intra(__isl_keep isl_mat *lin, __isl_keep isl_mat *outer,
1701 int nvar)
1703 int n_col;
1704 int rank, rank_outer;
1705 int n_inner;
1707 if (!lin || !outer)
1708 return isl_bool_error;
1710 n_col = isl_mat_cols(lin);
1711 if (n_col != nvar)
1712 return isl_bool_false;
1714 n_inner = isl_mat_rows(lin) - isl_mat_rows(outer);
1715 if (n_inner < 1)
1716 return isl_bool_false;
1717 rank = isl_mat_rank(lin);
1718 rank_outer = isl_mat_rank(outer);
1719 if (rank < 0 || rank_outer < 0)
1720 return isl_bool_error;
1721 if (rank != rank_outer + n_inner)
1722 return isl_bool_false;
1724 return isl_bool_true;
1727 /* Insert the intra-statement consecutivity constraint "lin"
1728 * with identifier "id" (may be NULL) and
1729 * outer part "outer" in front of the list of
1730 * intra-statement consecutivity constraints of "node",
1731 * provided it is a valid constraint.
1733 static isl_stat insert_valid_intra(struct isl_sched_node *node,
1734 __isl_take isl_id *id, __isl_take isl_mat *lin,
1735 __isl_take isl_mat *outer)
1737 isl_bool valid;
1739 valid = valid_intra(lin, outer, node->nvar);
1740 if (valid >= 0 && valid) {
1741 isl_mat *inner;
1742 int n_outer;
1744 n_outer = isl_mat_rows(outer);
1745 inner = isl_mat_drop_rows(lin, 0, n_outer);
1746 return insert_intra(node, id, outer, inner);
1749 isl_id_free(id);
1750 isl_mat_free(lin);
1751 isl_mat_free(outer);
1752 return valid >= 0 ? isl_stat_ok : isl_stat_error;
1755 /* Insert the intra-statement consecutivity constraint "ma"
1756 * in front of the list of intra-statement consecutivity constraints
1757 * of "node", provided it is a valid constraint.
1759 * "ma" maps the (uncompressed) space of "node" to a product space
1760 * of outer and inner parts.
1761 * Only the linear parts of the affine expressions are relevant.
1762 * If the node is compressed, reformulate the constraints in terms
1763 * of the compressed domain,
1764 * extract the linear parts and store them in "node",
1765 * provided they represent a valid constraint.
1766 * If "ma" has a tuple identifier, then keep track of it as well.
1768 static isl_stat node_insert_intra(struct isl_sched_node *node,
1769 __isl_take isl_multi_aff *ma)
1771 isl_bool has_id;
1772 isl_id *id;
1773 isl_space *space;
1774 isl_mat *lin, *outer;
1775 int n_outer, n_inner;
1777 space = isl_multi_aff_get_space(ma);
1778 space = isl_space_unwrap(isl_space_range(space));
1779 n_outer = isl_space_dim(space, isl_dim_in);
1780 n_inner = isl_space_dim(space, isl_dim_out);
1781 isl_space_free(space);
1782 if (!space)
1783 goto error;
1785 has_id = isl_multi_aff_has_tuple_id(ma, isl_dim_out);
1786 if (has_id < 0)
1787 goto error;
1788 if (!has_id) {
1789 id = NULL;
1790 } else {
1791 id = isl_multi_aff_get_tuple_id(ma, isl_dim_out);
1792 if (!id)
1793 goto error;
1796 if (node->compressed)
1797 ma = isl_multi_aff_pullback_multi_aff(ma,
1798 isl_multi_aff_copy(node->decompress));
1799 lin = extract_linear(ma);
1800 outer = isl_mat_drop_rows(isl_mat_copy(lin), n_outer, n_inner);
1801 return insert_valid_intra(node, id, lin, outer);
1802 error:
1803 isl_multi_aff_free(ma);
1804 return isl_stat_error;
1807 /* Insert the intra-statement consecutivity constraint "ma"
1808 * in front of the list of intra-statement consecutivity constraints
1809 * of the corresponding node of "graph", provided there is such a node.
1811 static isl_stat graph_insert_intra(struct isl_sched_graph *graph,
1812 __isl_take isl_multi_aff *ma)
1814 struct isl_sched_node *node;
1815 isl_ctx *ctx;
1816 isl_space *space;
1818 space = isl_multi_aff_get_domain_space(ma);
1819 ctx = isl_multi_aff_get_ctx(ma);
1820 node = graph_find_node(ctx, graph, space);
1821 isl_space_free(space);
1822 if (!node)
1823 goto error;
1824 if (is_node(graph, node))
1825 return node_insert_intra(node, ma);
1826 isl_multi_aff_free(ma);
1827 return isl_stat_ok;
1828 error:
1829 isl_multi_aff_free(ma);
1830 return isl_stat_error;
1833 /* Store the intra-statement consecutivity constraints of "sc"
1834 * in the appropriate nodes of "graph".
1836 * If there are multiple constraints per node, then
1837 * the constraints are successively inserted in front of
1838 * the per-node list.
1839 * Start from the last intra-statement consecutivity constraint
1840 * to ensure that the final order of the per-node constraints
1841 * is the same as in the original list of constraints.
1843 static isl_stat graph_set_intra(struct isl_sched_graph *graph,
1844 __isl_keep isl_schedule_constraints *sc)
1846 isl_multi_aff_list *intra;
1847 int i, n;
1849 intra = isl_schedule_constraints_get_intra_consecutivity(sc);
1850 if (!intra)
1851 return isl_stat_error;
1852 n = isl_multi_aff_list_n_multi_aff(intra);
1853 for (i = n - 1; i >= 0; --i) {
1854 isl_multi_aff *ma;
1856 ma = isl_multi_aff_list_get_multi_aff(intra, i);
1857 if (graph_insert_intra(graph, ma) < 0)
1858 goto error;
1860 isl_multi_aff_list_free(intra);
1862 return isl_stat_ok;
1863 error:
1864 isl_multi_aff_list_free(intra);
1865 return isl_stat_error;
1868 /* Return the intra-statement consecutivity constraint
1869 * referenced by "node" that has identifier "id".
1870 * Return NULL if no such constraint can be found.
1872 static struct isl_sched_intra *find_intra(struct isl_sched_node *node,
1873 __isl_take isl_id *id)
1875 struct isl_sched_intra *intra;
1877 for (intra = node->intra; intra; intra = intra->next) {
1878 if (intra->id == id)
1879 break;
1882 isl_id_free(id);
1883 return intra;
1886 /* Clear the dependence relation of "edge" and remove
1887 * it from the edge tables of "graph".
1889 static isl_stat clear_edge(struct isl_sched_graph *graph,
1890 struct isl_sched_edge *edge)
1892 isl_space *space;
1894 space = isl_map_get_space(edge->map);
1895 isl_map_free(edge->map);
1896 edge->map = isl_map_empty(space);
1897 if (!edge->map)
1898 return isl_stat_error;
1900 graph_remove_edge(graph, edge);
1902 return isl_stat_ok;
1905 /* Add an edge to "graph" corresponding to the inter-statement
1906 * consecutivity constraint "map" that references
1907 * the intra-statement consecutivity constraints identified
1908 * by "id_src" and "id_dst", if those intra-statement
1909 * consecutivity constraints can be found and have the same number
1910 * of rows in their inner parts.
1912 * The edge is first created and then possibly disabled
1913 * if the intra-statement consecutivity constraints cannot be found.
1914 * A side effect of the edge creation is that the nodes are identified and
1915 * the intra-statement consecutivity constraints need to be looked up
1916 * in those nodes.
1918 static isl_stat graph_add_inter_split(struct isl_sched_graph *graph,
1919 __isl_take isl_map *map, __isl_take isl_id *id_src,
1920 __isl_take isl_id *id_dst)
1922 struct isl_sched_edge *edge;
1924 edge = add_edge(graph, isl_edge_consecutivity, map);
1925 if (!edge)
1926 goto error;
1927 edge->state = isl_sched_inter_init;
1928 edge->src_intra = find_intra(edge->src, id_src);
1929 edge->dst_intra = find_intra(edge->dst, id_dst);
1931 if (edge->src_intra && edge->dst_intra &&
1932 edge->src_intra->n_inner == edge->dst_intra->n_inner)
1933 return isl_stat_ok;
1935 return clear_edge(graph, edge);
1936 error:
1937 isl_id_free(id_src);
1938 isl_id_free(id_dst);
1939 return isl_stat_error;
1942 /* Add an edge to "graph" corresponding to the inter-statement
1943 * consecutivity constraint "map", if it represents a valid constraint.
1945 * Extract out the relation between statement instances and
1946 * the pair of intra-statement consecutivity constraint identifiers.
1948 static isl_stat add_inter_consecutivity(__isl_take isl_map *map, void *user)
1950 struct isl_sched_graph *graph = user;
1951 isl_space *space;
1952 isl_bool has_id;
1953 isl_id *id_src, *id_dst;
1955 space = isl_map_get_space(map);
1956 space = isl_space_factor_range(space);
1957 has_id = isl_space_has_tuple_id(space, isl_dim_in);
1958 if (has_id >= 0 && has_id)
1959 has_id = isl_space_has_tuple_id(space, isl_dim_out);
1960 if (has_id < 0 || !has_id) {
1961 isl_space_free(space);
1962 isl_map_free(map);
1963 return has_id < 0 ? isl_stat_error : isl_stat_ok;
1965 id_src = isl_space_get_tuple_id(space, isl_dim_in);
1966 id_dst = isl_space_get_tuple_id(space, isl_dim_out);
1967 isl_space_free(space);
1969 map = isl_map_factor_domain(map);
1971 return graph_add_inter_split(graph, map, id_src, id_dst);
1974 /* Add edges to "graph" corresponding to the valid inter-statement
1975 * consecutivity constraints of "sc".
1977 static isl_stat graph_set_inter(struct isl_sched_graph *graph,
1978 __isl_keep isl_schedule_constraints *sc)
1980 isl_map_list *inter;
1981 isl_stat r;
1983 inter = isl_schedule_constraints_get_inter_consecutivity(sc);
1984 r = isl_map_list_foreach(inter, &add_inter_consecutivity, graph);
1985 isl_map_list_free(inter);
1987 return r;
1990 /* Extract (a basis for) the purely linear part of "ma",
1991 * i.e., the coefficients of the input variables but not the local variables.
1993 * There may be linear combinations of the elements of "ma"
1994 * that do not involve local variables, while the elements themselves
1995 * do involve local variables.
1996 * Perform Gaussian elimination to remove local variables from
1997 * as many rows as possible and subsequently remove the remaining rows
1998 * involving local variables as well as the columns corresponding
1999 * to the local variables.
2001 static __isl_give isl_mat *extract_pure_linear(__isl_take isl_multi_aff *ma)
2003 int i, n, n_var, n_div;
2004 isl_mat *rows;
2006 if (!ma)
2007 return NULL;
2009 n_var = isl_multi_aff_dim(ma, isl_dim_in);
2011 rows = extract_linear(ma);
2012 rows = isl_mat_reverse_gauss(rows);
2014 if (!rows)
2015 return NULL;
2017 n = isl_mat_rows(rows);
2018 n_div = isl_mat_cols(rows) - n_var;
2019 for (i = n - 1; i >= 0; --i)
2020 if (isl_seq_first_non_zero(rows->row[i] + n_var, n_div) == -1)
2021 break;
2022 rows = isl_mat_drop_rows(rows, i + 1, n - (i + 1));
2023 rows = isl_mat_drop_cols(rows, n_var, n_div);
2025 return rows;
2028 /* Extend "complement" with the complement of the purely linear part of "ma".
2030 static isl_stat extend_prefix_complement(__isl_take isl_set *dom,
2031 __isl_take isl_multi_aff *ma, void *user)
2033 isl_mat *complement_ma;
2034 isl_mat **complement = user;
2036 isl_set_free(dom);
2038 complement_ma = isl_mat_row_complement(extract_pure_linear(ma));
2040 *complement = isl_mat_concat(*complement, complement_ma);
2042 return *complement ? isl_stat_ok : isl_stat_error;
2045 /* Extract a linear prefix schedule from "pma" that is valid
2046 * for all pieces.
2047 * In particular, if there are multiple pieces, then the result
2048 * contains linear combinations that have a fixed value in all pieces.
2049 * That is, if there is a direction that is not fixed in one or more pieces,
2050 * then it is also not fixed by the entire piecewise expression.
2051 * A direction that is not fixed needs to have a component along
2052 * the orthogonal complement of the fixed directions.
2053 * Collect these orthogonal complements over all pieces and
2054 * compute the complement of the result to obtain the desired directions.
2056 * If "pma" is empty (which indicates a missing, and therefore invalid,
2057 * prefix schedule), then the result will contain a basis for all directions,
2058 * being the complement of an empty complement.
2060 static __isl_give isl_mat *extract_prefix_pw_multi_aff(
2061 __isl_keep isl_pw_multi_aff *pma)
2063 int nvar;
2064 isl_ctx *ctx;
2065 isl_mat *complement;
2067 if (!pma)
2068 return NULL;
2070 ctx = isl_pw_multi_aff_get_ctx(pma);
2071 nvar = isl_pw_multi_aff_dim(pma, isl_dim_in);
2072 complement = isl_mat_alloc(ctx, 0, nvar);
2074 if (isl_pw_multi_aff_foreach_piece(pma, &extend_prefix_complement,
2075 &complement) < 0)
2076 complement = isl_mat_free(complement);
2078 return isl_mat_row_complement(complement);
2081 /* Extract a prefix schedule for "node" from "mupa" and add
2082 * it to node->sched.
2084 * "mupa" is formulated in terms of the original (uncompressed) spaces,
2085 * while node->sched is formulated in terms of the potentially compressed
2086 * space. If "node" is compressed, then the expression corresponding
2087 * to "node" therefore needs to be transformed first.
2089 * The prefix stored in node->sched is only used to avoid linearly
2090 * dependent schedule rows from being generated. Only the linear
2091 * part of the prefix is therefore relevant. Use zero for
2092 * the coefficients of the constant term and the parameters.
2093 * The extracted linear part may have fewer rows than "mupa",
2094 * either because of linear dependences or because some element
2095 * of "mupa" involve local variables.
2096 * Extend the number of rows of the linear part to the number
2097 * of elements in "mupa" to ensure that all nodes have the same
2098 * number of rows.
2100 * If "mupa" does not contain a prefix schedule for "node",
2101 * then it is invalid. In the current implementation, this will
2102 * cause the scheduler to not construct any further schedule rows
2103 * for "node".
2105 static isl_stat extract_prefix(struct isl_sched_node *node,
2106 __isl_keep isl_multi_union_pw_aff *mupa)
2108 isl_space *space;
2109 isl_multi_pw_aff *mpa;
2110 isl_pw_multi_aff *pma;
2111 isl_mat *prefix;
2112 int n, n_prefix;
2114 if (!mupa)
2115 return isl_stat_error;
2117 space = isl_space_copy(node->space);
2118 mpa = isl_multi_union_pw_aff_extract_multi_pw_aff(mupa, space);
2120 if (node->compress)
2121 mpa = isl_multi_pw_aff_pullback_multi_aff(mpa,
2122 isl_multi_aff_copy(node->decompress));
2124 pma = isl_pw_multi_aff_from_multi_pw_aff(mpa);
2126 prefix = extract_prefix_pw_multi_aff(pma);
2128 prefix = isl_mat_insert_zero_cols(prefix, 0, 1 + node->nparam);
2129 n_prefix = isl_multi_union_pw_aff_dim(mupa, isl_dim_set);
2130 n = isl_mat_rows(prefix);
2131 prefix = isl_mat_add_zero_rows(prefix, n_prefix - n);
2132 node->sched = isl_mat_concat(node->sched, prefix);
2134 isl_pw_multi_aff_free(pma);
2136 if (!node->sched)
2137 return isl_stat_error;
2139 return isl_stat_ok;
2142 /* Check if any (non-trivial) prefix schedule was specified in "sc".
2143 * If so, store a copy in "graph" for later simplification
2144 * of dependence relations and extract the linear parts
2145 * in the respective nodes.
2146 * These linear parts are considered as an initial outer band.
2147 * Their only effect is to try and prevent rows in the generated schedule
2148 * from being linear combinations of the prefix.
2150 * Since the prefix schedule cannot be assumed to be linearly
2151 * independent on all nodes, graph->n_row is not incremented.
2152 * Note that the ranks of the nodes will get updated regardless and
2153 * graph->maxvar is computed based on these ranks. The test for
2154 * whether more schedule rows are required in compute_schedule_wcc
2155 * therefore does take the prefix into account.
2157 * The prefix schedule specified by the user is required to
2158 * be complete on the domain. An invalid prefix will result
2159 * in nodes being essentially removed from consideration.
2161 static isl_stat handle_prefix(struct isl_sched_graph *graph,
2162 __isl_keep isl_schedule_constraints *sc)
2164 int i;
2165 int n;
2166 isl_multi_union_pw_aff *mupa;
2168 mupa = isl_schedule_constraints_get_prefix(sc);
2169 if (!mupa)
2170 return isl_stat_error;
2171 n = isl_multi_union_pw_aff_dim(mupa, isl_dim_set);
2172 if (n == 0) {
2173 isl_multi_union_pw_aff_free(mupa);
2174 return isl_stat_ok;
2177 graph->prefix = mupa;
2179 for (i = 0; i < graph->n; ++i) {
2180 if (extract_prefix(&graph->node[i], mupa) < 0)
2181 return isl_stat_error;
2184 graph->n_total_row = n;
2185 graph->band_start = graph->n_total_row;
2187 return isl_stat_ok;
2190 /* Initialize the schedule graph "graph" from the schedule constraints "sc".
2192 * The context is included in the domain before the nodes of
2193 * the graphs are extracted in order to be able to exploit
2194 * any possible additional equalities.
2195 * Note that this intersection is only performed locally here.
2197 static isl_stat graph_init(struct isl_sched_graph *graph,
2198 __isl_keep isl_schedule_constraints *sc)
2200 isl_ctx *ctx;
2201 isl_union_set *domain;
2202 isl_multi_aff_list *intra;
2203 isl_union_map *c;
2204 struct isl_extract_edge_data data;
2205 enum isl_edge_type i;
2206 isl_stat r;
2207 int n_intra, n_inter;
2209 if (!sc)
2210 return isl_stat_error;
2212 ctx = isl_schedule_constraints_get_ctx(sc);
2214 domain = isl_schedule_constraints_get_domain(sc);
2215 graph->n = isl_union_set_n_set(domain);
2216 isl_union_set_free(domain);
2218 intra = isl_schedule_constraints_get_intra_consecutivity(sc);
2219 n_intra = isl_multi_aff_list_n_multi_aff(intra);
2220 isl_multi_aff_list_free(intra);
2221 n_inter = isl_schedule_constraints_n_inter_consecutivity_map(sc);
2223 if (graph_alloc(ctx, graph, graph->n,
2224 isl_schedule_constraints_n_map(sc), n_intra + n_inter) < 0)
2225 return isl_stat_error;
2227 if (compute_max_row(graph, sc) < 0)
2228 return isl_stat_error;
2229 graph->root = graph;
2230 graph->n = 0;
2231 domain = isl_schedule_constraints_get_domain(sc);
2232 domain = isl_union_set_intersect_params(domain,
2233 isl_schedule_constraints_get_context(sc));
2234 r = isl_union_set_foreach_set(domain, &extract_node, graph);
2235 isl_union_set_free(domain);
2236 if (r < 0)
2237 return isl_stat_error;
2238 if (graph_init_table(ctx, graph) < 0)
2239 return isl_stat_error;
2240 if (graph_set_intra(graph, sc) < 0)
2241 return isl_stat_error;
2242 if (handle_prefix(graph, sc) < 0)
2243 return isl_stat_error;
2244 for (i = isl_edge_first; i <= isl_edge_last_sc; ++i) {
2245 c = isl_schedule_constraints_get(sc, i);
2246 graph->max_edge[i] = isl_union_map_n_map(c);
2247 isl_union_map_free(c);
2248 if (!c)
2249 return isl_stat_error;
2251 graph->max_edge[isl_edge_consecutivity] = n_inter;
2252 if (graph_init_edge_tables(ctx, graph) < 0)
2253 return isl_stat_error;
2254 graph->n_edge = 0;
2255 data.graph = graph;
2256 for (i = isl_edge_first; i <= isl_edge_last_sc; ++i) {
2257 isl_stat r;
2259 data.type = i;
2260 c = isl_schedule_constraints_get(sc, i);
2261 r = isl_union_map_foreach_map(c, &extract_edge, &data);
2262 isl_union_map_free(c);
2263 if (r < 0)
2264 return isl_stat_error;
2266 if (graph_set_inter(graph, sc) < 0)
2267 return isl_stat_error;
2269 return isl_stat_ok;
2272 /* Check whether there is any dependence from node[j] to node[i]
2273 * or from node[i] to node[j].
2275 static isl_bool node_follows_weak(int i, int j, void *user)
2277 isl_bool f;
2278 struct isl_sched_graph *graph = user;
2280 f = graph_has_any_edge(graph, &graph->node[j], &graph->node[i]);
2281 if (f < 0 || f)
2282 return f;
2283 return graph_has_any_edge(graph, &graph->node[i], &graph->node[j]);
2286 /* Check whether there is a (conditional) validity dependence from node[j]
2287 * to node[i], forcing node[i] to follow node[j].
2289 static isl_bool node_follows_strong(int i, int j, void *user)
2291 struct isl_sched_graph *graph = user;
2293 return graph_has_validity_edge(graph, &graph->node[j], &graph->node[i]);
2296 /* Is there a (conditional) validity dependence from node[j] to node[i],
2297 * forcing node[i] to follow node[j] or are the nodes related
2298 * through an inter-statement consecutivity constraint?
2300 static isl_bool node_follows_strong_or_consecutive(int i, int j, void *user)
2302 struct isl_sched_graph *graph = user;
2303 isl_bool r;
2304 struct isl_sched_node *node_i, *node_j;
2306 r = node_follows_strong(i, j, user);
2307 if (r < 0 || r)
2308 return r;
2310 node_i = &graph->node[i];
2311 node_j = &graph->node[j];
2312 r = graph_has_edge(graph, isl_edge_consecutivity, node_i, node_j);
2313 if (r < 0 || r)
2314 return r;
2315 return graph_has_edge(graph, isl_edge_consecutivity, node_j, node_i);
2318 /* Use Tarjan's algorithm for computing the strongly connected components
2319 * in the dependence graph only considering those edges defined by "follows".
2321 static isl_stat detect_ccs(isl_ctx *ctx, struct isl_sched_graph *graph,
2322 isl_bool (*follows)(int i, int j, void *user))
2324 int i, n;
2325 struct isl_tarjan_graph *g = NULL;
2327 g = isl_tarjan_graph_init(ctx, graph->n, follows, graph);
2328 if (!g)
2329 return isl_stat_error;
2331 graph->scc = 0;
2332 i = 0;
2333 n = graph->n;
2334 while (n) {
2335 while (g->order[i] != -1) {
2336 graph->node[g->order[i]].scc = graph->scc;
2337 --n;
2338 ++i;
2340 ++i;
2341 graph->scc++;
2344 isl_tarjan_graph_free(g);
2346 return isl_stat_ok;
2349 /* Apply Tarjan's algorithm to detect the strongly connected components
2350 * in the dependence graph.
2351 * Only consider the (conditional) validity dependences and clear "weak".
2353 static isl_stat detect_sccs(isl_ctx *ctx, struct isl_sched_graph *graph)
2355 graph->weak = 0;
2356 return detect_ccs(ctx, graph, &node_follows_strong);
2359 /* Apply Tarjan's algorithm to detect the strongly connected components
2360 * in the dependence graph, but combine components that are linked
2361 * through inter-statement consecutivity constraints.
2362 * Only consider the (conditional) validity dependences and clear "weak".
2364 static isl_stat detect_sccs_grouping_consecutive(isl_ctx *ctx,
2365 struct isl_sched_graph *graph)
2367 graph->weak = 0;
2368 return detect_ccs(ctx, graph, &node_follows_strong_or_consecutive);
2371 /* Apply Tarjan's algorithm to detect the (weakly) connected components
2372 * in the dependence graph.
2373 * Consider all dependences and set "weak".
2375 static isl_stat detect_wccs(isl_ctx *ctx, struct isl_sched_graph *graph)
2377 graph->weak = 1;
2378 return detect_ccs(ctx, graph, &node_follows_weak);
2381 static int cmp_scc(const void *a, const void *b, void *data)
2383 struct isl_sched_graph *graph = data;
2384 const int *i1 = a;
2385 const int *i2 = b;
2387 return graph->node[*i1].scc - graph->node[*i2].scc;
2390 /* Sort the elements of graph->sorted according to the corresponding SCCs.
2392 static int sort_sccs(struct isl_sched_graph *graph)
2394 return isl_sort(graph->sorted, graph->n, sizeof(int), &cmp_scc, graph);
2397 /* Return a non-parametric set in the compressed space of "node" that is
2398 * bounded by the size in each direction
2400 * { [x] : -S_i <= x_i <= S_i }
2402 * If S_i is infinity in direction i, then there are no constraints
2403 * in that direction.
2405 * Cache the result in node->bounds.
2407 static __isl_give isl_basic_set *get_size_bounds(struct isl_sched_node *node)
2409 isl_space *space;
2410 isl_basic_set *bounds;
2411 int i;
2412 unsigned nparam;
2414 if (node->bounds)
2415 return isl_basic_set_copy(node->bounds);
2417 if (node->compressed)
2418 space = isl_multi_aff_get_domain_space(node->decompress);
2419 else
2420 space = isl_space_copy(node->space);
2421 nparam = isl_space_dim(space, isl_dim_param);
2422 space = isl_space_drop_dims(space, isl_dim_param, 0, nparam);
2423 bounds = isl_basic_set_universe(space);
2425 for (i = 0; i < node->nvar; ++i) {
2426 isl_val *size;
2428 size = isl_multi_val_get_val(node->sizes, i);
2429 if (!size)
2430 return isl_basic_set_free(bounds);
2431 if (!isl_val_is_int(size)) {
2432 isl_val_free(size);
2433 continue;
2435 bounds = isl_basic_set_upper_bound_val(bounds, isl_dim_set, i,
2436 isl_val_copy(size));
2437 bounds = isl_basic_set_lower_bound_val(bounds, isl_dim_set, i,
2438 isl_val_neg(size));
2441 node->bounds = isl_basic_set_copy(bounds);
2442 return bounds;
2445 /* Drop some constraints from "delta" that could be exploited
2446 * to construct loop coalescing schedules.
2447 * In particular, drop those constraint that bound the difference
2448 * to the size of the domain.
2449 * First project out the parameters to improve the effectiveness.
2451 static __isl_give isl_set *drop_coalescing_constraints(
2452 __isl_take isl_set *delta, struct isl_sched_node *node)
2454 unsigned nparam;
2455 isl_basic_set *bounds;
2457 bounds = get_size_bounds(node);
2459 nparam = isl_set_dim(delta, isl_dim_param);
2460 delta = isl_set_project_out(delta, isl_dim_param, 0, nparam);
2461 delta = isl_set_remove_divs(delta);
2462 delta = isl_set_plain_gist_basic_set(delta, bounds);
2463 return delta;
2466 /* Given a dependence relation R from "node" to itself,
2467 * construct the set of coefficients of valid constraints for elements
2468 * in that dependence relation.
2469 * In particular, the result contains tuples of coefficients
2470 * c_0, c_n, c_x such that
2472 * c_0 + c_n n + c_x y - c_x x >= 0 for each (x,y) in R
2474 * or, equivalently,
2476 * c_0 + c_n n + c_x d >= 0 for each d in delta R = { y - x | (x,y) in R }
2478 * We choose here to compute the dual of delta R.
2479 * Alternatively, we could have computed the dual of R, resulting
2480 * in a set of tuples c_0, c_n, c_x, c_y, and then
2481 * plugged in (c_0, c_n, c_x, -c_x).
2483 * If "need_param" is set, then the resulting coefficients effectively
2484 * include coefficients for the parameters c_n. Otherwise, they may
2485 * have been projected out already.
2486 * Since the constraints may be different for these two cases,
2487 * they are stored in separate caches.
2488 * In particular, if no parameter coefficients are required and
2489 * the schedule_treat_coalescing option is set, then the parameters
2490 * are projected out and some constraints that could be exploited
2491 * to construct coalescing schedules are removed before the dual
2492 * is computed.
2494 * If "node" has been compressed, then the dependence relation
2495 * is also compressed before the set of coefficients is computed.
2497 static __isl_give isl_basic_set *intra_coefficients(
2498 struct isl_sched_graph *graph, struct isl_sched_node *node,
2499 __isl_take isl_map *map, int need_param)
2501 isl_ctx *ctx;
2502 isl_set *delta;
2503 isl_map *key;
2504 isl_basic_set *coef;
2505 isl_maybe_isl_basic_set m;
2506 isl_map_to_basic_set **hmap = &graph->intra_hmap;
2507 int treat;
2509 if (!map)
2510 return NULL;
2512 ctx = isl_map_get_ctx(map);
2513 treat = !need_param && isl_options_get_schedule_treat_coalescing(ctx);
2514 if (!treat)
2515 hmap = &graph->intra_hmap_param;
2516 m = isl_map_to_basic_set_try_get(*hmap, map);
2517 if (m.valid < 0 || m.valid) {
2518 isl_map_free(map);
2519 return m.value;
2522 key = isl_map_copy(map);
2523 if (node->compressed) {
2524 map = isl_map_preimage_domain_multi_aff(map,
2525 isl_multi_aff_copy(node->decompress));
2526 map = isl_map_preimage_range_multi_aff(map,
2527 isl_multi_aff_copy(node->decompress));
2529 delta = isl_map_deltas(map);
2530 if (treat)
2531 delta = drop_coalescing_constraints(delta, node);
2532 delta = isl_set_remove_divs(delta);
2533 coef = isl_set_coefficients(delta);
2534 *hmap = isl_map_to_basic_set_set(*hmap, key, isl_basic_set_copy(coef));
2536 return coef;
2539 /* Given a dependence relation R, construct the set of coefficients
2540 * of valid constraints for elements in that dependence relation.
2541 * In particular, the result contains tuples of coefficients
2542 * c_0, c_n, c_x, c_y such that
2544 * c_0 + c_n n + c_x x + c_y y >= 0 for each (x,y) in R
2546 * If the source or destination nodes of "edge" have been compressed,
2547 * then the dependence relation is also compressed before
2548 * the set of coefficients is computed.
2550 static __isl_give isl_basic_set *inter_coefficients(
2551 struct isl_sched_graph *graph, struct isl_sched_edge *edge,
2552 __isl_take isl_map *map)
2554 isl_set *set;
2555 isl_map *key;
2556 isl_basic_set *coef;
2557 isl_maybe_isl_basic_set m;
2559 m = isl_map_to_basic_set_try_get(graph->inter_hmap, map);
2560 if (m.valid < 0 || m.valid) {
2561 isl_map_free(map);
2562 return m.value;
2565 key = isl_map_copy(map);
2566 if (edge->src->compressed)
2567 map = isl_map_preimage_domain_multi_aff(map,
2568 isl_multi_aff_copy(edge->src->decompress));
2569 if (edge->dst->compressed)
2570 map = isl_map_preimage_range_multi_aff(map,
2571 isl_multi_aff_copy(edge->dst->decompress));
2572 set = isl_map_wrap(isl_map_remove_divs(map));
2573 coef = isl_set_coefficients(set);
2574 graph->inter_hmap = isl_map_to_basic_set_set(graph->inter_hmap, key,
2575 isl_basic_set_copy(coef));
2577 return coef;
2580 /* Return the position of the coefficients of the variables in
2581 * the coefficients constraints "coef".
2583 * The space of "coef" is of the form
2585 * { coefficients[[cst, params] -> S] }
2587 * Return the position of S.
2589 static int coef_var_offset(__isl_keep isl_basic_set *coef)
2591 int offset;
2592 isl_space *space;
2594 space = isl_space_unwrap(isl_basic_set_get_space(coef));
2595 offset = isl_space_dim(space, isl_dim_in);
2596 isl_space_free(space);
2598 return offset;
2601 /* Return the offset of the coefficient of the constant term of "node"
2602 * within the (I)LP.
2604 * Within each node, the coefficients have the following order:
2605 * - positive and negative parts of c_i_x
2606 * - c_i_n (if parametric)
2607 * - c_i_0
2609 static int node_cst_coef_offset(struct isl_sched_node *node)
2611 return node->start + 2 * node->nvar + node->nparam;
2614 /* Return the offset of the coefficients of the parameters of "node"
2615 * within the (I)LP.
2617 * Within each node, the coefficients have the following order:
2618 * - positive and negative parts of c_i_x
2619 * - c_i_n (if parametric)
2620 * - c_i_0
2622 static int node_par_coef_offset(struct isl_sched_node *node)
2624 return node->start + 2 * node->nvar;
2627 /* Return the offset of the coefficients of the variables of "node"
2628 * within the (I)LP.
2630 * Within each node, the coefficients have the following order:
2631 * - positive and negative parts of c_i_x
2632 * - c_i_n (if parametric)
2633 * - c_i_0
2635 static int node_var_coef_offset(struct isl_sched_node *node)
2637 return node->start;
2640 /* Return the position of the pair of variables encoding
2641 * coefficient "i" of "node".
2643 * The order of these variable pairs is the opposite of
2644 * that of the coefficients, with 2 variables per coefficient.
2646 static int node_var_coef_pos(struct isl_sched_node *node, int i)
2648 return node_var_coef_offset(node) + 2 * (node->nvar - 1 - i);
2651 /* Construct an isl_dim_map for mapping constraints on coefficients
2652 * for "node" to the corresponding positions in graph->lp.
2653 * "offset" is the offset of the coefficients for the variables
2654 * in the input constraints.
2655 * "s" is the sign of the mapping.
2657 * The input constraints are given in terms of the coefficients
2658 * (c_0, c_x) or (c_0, c_n, c_x).
2659 * The mapping produced by this function essentially plugs in
2660 * (0, c_i_x^+ - c_i_x^-) if s = 1 and
2661 * (0, -c_i_x^+ + c_i_x^-) if s = -1 or
2662 * (0, 0, c_i_x^+ - c_i_x^-) if s = 1 and
2663 * (0, 0, -c_i_x^+ + c_i_x^-) if s = -1.
2664 * In graph->lp, the c_i_x^- appear before their c_i_x^+ counterpart.
2665 * Furthermore, the order of these pairs is the opposite of that
2666 * of the corresponding coefficients.
2668 * The caller can extend the mapping to also map the other coefficients
2669 * (and therefore not plug in 0).
2671 static __isl_give isl_dim_map *intra_dim_map(isl_ctx *ctx,
2672 struct isl_sched_graph *graph, struct isl_sched_node *node,
2673 int offset, int s)
2675 int pos;
2676 unsigned total;
2677 isl_dim_map *dim_map;
2679 if (!node || !graph->lp)
2680 return NULL;
2682 total = isl_basic_set_total_dim(graph->lp);
2683 pos = node_var_coef_pos(node, 0);
2684 dim_map = isl_dim_map_alloc(ctx, total);
2685 isl_dim_map_range(dim_map, pos, -2, offset, 1, node->nvar, -s);
2686 isl_dim_map_range(dim_map, pos + 1, -2, offset, 1, node->nvar, s);
2688 return dim_map;
2691 /* Construct an isl_dim_map for mapping constraints on coefficients
2692 * for "src" (node i) and "dst" (node j) to the corresponding positions
2693 * in graph->lp.
2694 * "offset" is the offset of the coefficients for the variables of "src"
2695 * in the input constraints.
2696 * "s" is the sign of the mapping.
2698 * The input constraints are given in terms of the coefficients
2699 * (c_0, c_n, c_x, c_y).
2700 * The mapping produced by this function essentially plugs in
2701 * (c_j_0 - c_i_0, c_j_n - c_i_n,
2702 * -(c_i_x^+ - c_i_x^-), c_j_x^+ - c_j_x^-) if s = 1 and
2703 * (-c_j_0 + c_i_0, -c_j_n + c_i_n,
2704 * c_i_x^+ - c_i_x^-, -(c_j_x^+ - c_j_x^-)) if s = -1.
2705 * In graph->lp, the c_*^- appear before their c_*^+ counterpart.
2706 * Furthermore, the order of these pairs is the opposite of that
2707 * of the corresponding coefficients.
2709 * The caller can further extend the mapping.
2711 static __isl_give isl_dim_map *inter_dim_map(isl_ctx *ctx,
2712 struct isl_sched_graph *graph, struct isl_sched_node *src,
2713 struct isl_sched_node *dst, int offset, int s)
2715 int pos;
2716 unsigned total;
2717 isl_dim_map *dim_map;
2719 if (!src || !dst || !graph->lp)
2720 return NULL;
2722 total = isl_basic_set_total_dim(graph->lp);
2723 dim_map = isl_dim_map_alloc(ctx, total);
2725 pos = node_cst_coef_offset(dst);
2726 isl_dim_map_range(dim_map, pos, 0, 0, 0, 1, s);
2727 pos = node_par_coef_offset(dst);
2728 isl_dim_map_range(dim_map, pos, 1, 1, 1, dst->nparam, s);
2729 pos = node_var_coef_pos(dst, 0);
2730 isl_dim_map_range(dim_map, pos, -2, offset + src->nvar, 1,
2731 dst->nvar, -s);
2732 isl_dim_map_range(dim_map, pos + 1, -2, offset + src->nvar, 1,
2733 dst->nvar, s);
2735 pos = node_cst_coef_offset(src);
2736 isl_dim_map_range(dim_map, pos, 0, 0, 0, 1, -s);
2737 pos = node_par_coef_offset(src);
2738 isl_dim_map_range(dim_map, pos, 1, 1, 1, src->nparam, -s);
2739 pos = node_var_coef_pos(src, 0);
2740 isl_dim_map_range(dim_map, pos, -2, offset, 1, src->nvar, s);
2741 isl_dim_map_range(dim_map, pos + 1, -2, offset, 1, src->nvar, -s);
2743 return dim_map;
2746 /* Add the constraints from "src" to "dst" using "dim_map",
2747 * after making sure there is enough room in "dst" for the extra constraints.
2749 static __isl_give isl_basic_set *add_constraints_dim_map(
2750 __isl_take isl_basic_set *dst, __isl_take isl_basic_set *src,
2751 __isl_take isl_dim_map *dim_map)
2753 int n_eq, n_ineq;
2755 n_eq = isl_basic_set_n_equality(src);
2756 n_ineq = isl_basic_set_n_inequality(src);
2757 dst = isl_basic_set_extend_constraints(dst, n_eq, n_ineq);
2758 dst = isl_basic_set_add_constraints_dim_map(dst, src, dim_map);
2759 return dst;
2762 /* Add constraints to graph->lp that force validity for the given
2763 * dependence from a node i to itself.
2764 * That is, add constraints that enforce
2766 * (c_i_0 + c_i_n n + c_i_x y) - (c_i_0 + c_i_n n + c_i_x x)
2767 * = c_i_x (y - x) >= 0
2769 * for each (x,y) in R.
2770 * We obtain general constraints on coefficients (c_0, c_x)
2771 * of valid constraints for (y - x) and then plug in (0, c_i_x^+ - c_i_x^-),
2772 * where c_i_x = c_i_x^+ - c_i_x^-, with c_i_x^+ and c_i_x^- non-negative.
2773 * In graph->lp, the c_i_x^- appear before their c_i_x^+ counterpart.
2774 * Note that the result of intra_coefficients may also contain
2775 * parameter coefficients c_n, in which case 0 is plugged in for them as well.
2777 static isl_stat add_intra_validity_constraints(struct isl_sched_graph *graph,
2778 struct isl_sched_edge *edge)
2780 int offset;
2781 isl_map *map = isl_map_copy(edge->map);
2782 isl_ctx *ctx = isl_map_get_ctx(map);
2783 isl_dim_map *dim_map;
2784 isl_basic_set *coef;
2785 struct isl_sched_node *node = edge->src;
2787 coef = intra_coefficients(graph, node, map, 0);
2789 offset = coef_var_offset(coef);
2791 if (!coef)
2792 return isl_stat_error;
2794 dim_map = intra_dim_map(ctx, graph, node, offset, 1);
2795 graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
2797 return isl_stat_ok;
2800 /* Add constraints to graph->lp that force validity for the given
2801 * dependence from node i to node j.
2802 * That is, add constraints that enforce
2804 * (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) >= 0
2806 * for each (x,y) in R.
2807 * We obtain general constraints on coefficients (c_0, c_n, c_x, c_y)
2808 * of valid constraints for R and then plug in
2809 * (c_j_0 - c_i_0, c_j_n - c_i_n, -(c_i_x^+ - c_i_x^-), c_j_x^+ - c_j_x^-),
2810 * where c_* = c_*^+ - c_*^-, with c_*^+ and c_*^- non-negative.
2811 * In graph->lp, the c_*^- appear before their c_*^+ counterpart.
2813 static isl_stat add_inter_validity_constraints(struct isl_sched_graph *graph,
2814 struct isl_sched_edge *edge)
2816 int offset;
2817 isl_map *map;
2818 isl_ctx *ctx;
2819 isl_dim_map *dim_map;
2820 isl_basic_set *coef;
2821 struct isl_sched_node *src = edge->src;
2822 struct isl_sched_node *dst = edge->dst;
2824 if (!graph->lp)
2825 return isl_stat_error;
2827 map = isl_map_copy(edge->map);
2828 ctx = isl_map_get_ctx(map);
2829 coef = inter_coefficients(graph, edge, map);
2831 offset = coef_var_offset(coef);
2833 if (!coef)
2834 return isl_stat_error;
2836 dim_map = inter_dim_map(ctx, graph, src, dst, offset, 1);
2838 edge->start = graph->lp->n_ineq;
2839 graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
2840 if (!graph->lp)
2841 return isl_stat_error;
2842 edge->end = graph->lp->n_ineq;
2844 return isl_stat_ok;
2847 /* Add constraints to graph->lp that bound the dependence distance for the given
2848 * dependence from a node i to itself.
2849 * If s = 1, we add the constraint
2851 * c_i_x (y - x) <= m_0 + m_n n
2853 * or
2855 * -c_i_x (y - x) + m_0 + m_n n >= 0
2857 * for each (x,y) in R.
2858 * If s = -1, we add the constraint
2860 * -c_i_x (y - x) <= m_0 + m_n n
2862 * or
2864 * c_i_x (y - x) + m_0 + m_n n >= 0
2866 * for each (x,y) in R.
2867 * We obtain general constraints on coefficients (c_0, c_n, c_x)
2868 * of valid constraints for (y - x) and then plug in (m_0, m_n, -s * c_i_x),
2869 * with each coefficient (except m_0) represented as a pair of non-negative
2870 * coefficients.
2873 * If "local" is set, then we add constraints
2875 * c_i_x (y - x) <= 0
2877 * or
2879 * -c_i_x (y - x) <= 0
2881 * instead, forcing the dependence distance to be (less than or) equal to 0.
2882 * That is, we plug in (0, 0, -s * c_i_x),
2883 * intra_coefficients is not required to have c_n in its result when
2884 * "local" is set. If they are missing, then (0, -s * c_i_x) is plugged in.
2885 * Note that dependences marked local are treated as validity constraints
2886 * by add_all_validity_constraints and therefore also have
2887 * their distances bounded by 0 from below.
2889 static isl_stat add_intra_proximity_constraints(struct isl_sched_graph *graph,
2890 struct isl_sched_edge *edge, int s, int local)
2892 int offset;
2893 unsigned nparam;
2894 isl_map *map = isl_map_copy(edge->map);
2895 isl_ctx *ctx = isl_map_get_ctx(map);
2896 isl_dim_map *dim_map;
2897 isl_basic_set *coef;
2898 struct isl_sched_node *node = edge->src;
2900 coef = intra_coefficients(graph, node, map, !local);
2902 offset = coef_var_offset(coef);
2904 if (!coef)
2905 return isl_stat_error;
2907 nparam = isl_space_dim(node->space, isl_dim_param);
2908 dim_map = intra_dim_map(ctx, graph, node, offset, -s);
2910 if (!local) {
2911 isl_dim_map_range(dim_map, 1, 0, 0, 0, 1, 1);
2912 isl_dim_map_range(dim_map, 4, 2, 1, 1, nparam, -1);
2913 isl_dim_map_range(dim_map, 5, 2, 1, 1, nparam, 1);
2915 graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
2917 return isl_stat_ok;
2920 /* Add constraints to graph->lp that bound the dependence distance for the given
2921 * dependence from node i to node j.
2922 * If s = 1, we add the constraint
2924 * (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x)
2925 * <= m_0 + m_n n
2927 * or
2929 * -(c_j_0 + c_j_n n + c_j_x y) + (c_i_0 + c_i_n n + c_i_x x) +
2930 * m_0 + m_n n >= 0
2932 * for each (x,y) in R.
2933 * If s = -1, we add the constraint
2935 * -((c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x))
2936 * <= m_0 + m_n n
2938 * or
2940 * (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) +
2941 * m_0 + m_n n >= 0
2943 * for each (x,y) in R.
2944 * We obtain general constraints on coefficients (c_0, c_n, c_x, c_y)
2945 * of valid constraints for R and then plug in
2946 * (m_0 - s*c_j_0 + s*c_i_0, m_n - s*c_j_n + s*c_i_n,
2947 * s*c_i_x, -s*c_j_x)
2948 * with each coefficient (except m_0, c_*_0 and c_*_n)
2949 * represented as a pair of non-negative coefficients.
2952 * If "local" is set (and s = 1), then we add constraints
2954 * (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) <= 0
2956 * or
2958 * -((c_j_0 + c_j_n n + c_j_x y) + (c_i_0 + c_i_n n + c_i_x x)) >= 0
2960 * instead, forcing the dependence distance to be (less than or) equal to 0.
2961 * That is, we plug in
2962 * (-s*c_j_0 + s*c_i_0, -s*c_j_n + s*c_i_n, s*c_i_x, -s*c_j_x).
2963 * Note that dependences marked local are treated as validity constraints
2964 * by add_all_validity_constraints and therefore also have
2965 * their distances bounded by 0 from below.
2967 static isl_stat add_inter_proximity_constraints(struct isl_sched_graph *graph,
2968 struct isl_sched_edge *edge, int s, int local)
2970 int offset;
2971 unsigned nparam;
2972 isl_map *map = isl_map_copy(edge->map);
2973 isl_ctx *ctx = isl_map_get_ctx(map);
2974 isl_dim_map *dim_map;
2975 isl_basic_set *coef;
2976 struct isl_sched_node *src = edge->src;
2977 struct isl_sched_node *dst = edge->dst;
2979 coef = inter_coefficients(graph, edge, map);
2981 offset = coef_var_offset(coef);
2983 if (!coef)
2984 return isl_stat_error;
2986 nparam = isl_space_dim(src->space, isl_dim_param);
2987 dim_map = inter_dim_map(ctx, graph, src, dst, offset, -s);
2989 if (!local) {
2990 isl_dim_map_range(dim_map, 1, 0, 0, 0, 1, 1);
2991 isl_dim_map_range(dim_map, 4, 2, 1, 1, nparam, -1);
2992 isl_dim_map_range(dim_map, 5, 2, 1, 1, nparam, 1);
2995 graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
2997 return isl_stat_ok;
3000 /* Should the distance over "edge" be forced to zero?
3001 * That is, is it marked as a local edge?
3002 * If "use_coincidence" is set, then coincidence edges are treated
3003 * as local edges.
3005 static int force_zero(struct isl_sched_edge *edge, int use_coincidence)
3007 return is_local(edge) || (use_coincidence && is_coincidence(edge));
3010 /* Add all validity constraints to graph->lp.
3012 * An edge that is forced to be local needs to have its dependence
3013 * distances equal to zero. We take care of bounding them by 0 from below
3014 * here. add_all_proximity_constraints takes care of bounding them by 0
3015 * from above.
3017 * If "use_coincidence" is set, then we treat coincidence edges as local edges.
3018 * Otherwise, we ignore them.
3020 static int add_all_validity_constraints(struct isl_sched_graph *graph,
3021 int use_coincidence)
3023 int i;
3025 for (i = 0; i < graph->n_edge; ++i) {
3026 struct isl_sched_edge *edge = &graph->edge[i];
3027 int zero;
3029 zero = force_zero(edge, use_coincidence);
3030 if (!is_validity(edge) && !zero)
3031 continue;
3032 if (edge->src != edge->dst)
3033 continue;
3034 if (add_intra_validity_constraints(graph, edge) < 0)
3035 return -1;
3038 for (i = 0; i < graph->n_edge; ++i) {
3039 struct isl_sched_edge *edge = &graph->edge[i];
3040 int zero;
3042 zero = force_zero(edge, use_coincidence);
3043 if (!is_validity(edge) && !zero)
3044 continue;
3045 if (edge->src == edge->dst)
3046 continue;
3047 if (add_inter_validity_constraints(graph, edge) < 0)
3048 return -1;
3051 return 0;
3054 /* Add constraints to graph->lp that bound the dependence distance
3055 * for all dependence relations.
3056 * If a given proximity dependence is identical to a validity
3057 * dependence, then the dependence distance is already bounded
3058 * from below (by zero), so we only need to bound the distance
3059 * from above. (This includes the case of "local" dependences
3060 * which are treated as validity dependence by add_all_validity_constraints.)
3061 * Otherwise, we need to bound the distance both from above and from below.
3063 * If "use_coincidence" is set, then we treat coincidence edges as local edges.
3064 * Otherwise, we ignore them.
3066 static int add_all_proximity_constraints(struct isl_sched_graph *graph,
3067 int use_coincidence)
3069 int i;
3071 for (i = 0; i < graph->n_edge; ++i) {
3072 struct isl_sched_edge *edge = &graph->edge[i];
3073 int zero;
3075 zero = force_zero(edge, use_coincidence);
3076 if (!is_proximity(edge) && !zero)
3077 continue;
3078 if (edge->src == edge->dst &&
3079 add_intra_proximity_constraints(graph, edge, 1, zero) < 0)
3080 return -1;
3081 if (edge->src != edge->dst &&
3082 add_inter_proximity_constraints(graph, edge, 1, zero) < 0)
3083 return -1;
3084 if (is_validity(edge) || zero)
3085 continue;
3086 if (edge->src == edge->dst &&
3087 add_intra_proximity_constraints(graph, edge, -1, 0) < 0)
3088 return -1;
3089 if (edge->src != edge->dst &&
3090 add_inter_proximity_constraints(graph, edge, -1, 0) < 0)
3091 return -1;
3094 return 0;
3097 /* Normalize the rows of "indep" such that all rows are lexicographically
3098 * positive and such that each row contains as many final zeros as possible,
3099 * given the choice for the previous rows.
3100 * Do this by performing elementary row operations.
3102 static __isl_give isl_mat *normalize_independent(__isl_take isl_mat *indep)
3104 indep = isl_mat_reverse_gauss(indep);
3105 indep = isl_mat_lexnonneg_rows(indep);
3106 return indep;
3109 /* Extract the linear part of the current schedule for node "node".
3111 static __isl_give isl_mat *extract_linear_schedule(struct isl_sched_node *node)
3113 int n_row = isl_mat_rows(node->sched);
3115 return isl_mat_sub_alloc(node->sched, 0, n_row,
3116 1 + node->nparam, node->nvar);
3119 /* Compute a basis for the rows in the linear part of the schedule
3120 * and extend this basis to a full basis. The remaining rows
3121 * can then be used to force linear independence from the rows
3122 * in the schedule.
3124 * In particular, given the schedule rows S, we compute
3126 * S = H Q
3127 * S U = H
3129 * with H the Hermite normal form of S. That is, all but the
3130 * first rank columns of H are zero and so each row in S is
3131 * a linear combination of the first rank rows of Q.
3132 * The matrix Q can be used as a variable transformation
3133 * that isolates the directions of S in the first rank rows.
3134 * Transposing S U = H yields
3136 * U^T S^T = H^T
3138 * with all but the first rank rows of H^T zero.
3139 * The last rows of U^T are therefore linear combinations
3140 * of schedule coefficients that are all zero on schedule
3141 * coefficients that are linearly dependent on the rows of S.
3142 * At least one of these combinations is non-zero on
3143 * linearly independent schedule coefficients.
3144 * The rows are normalized to involve as few of the last
3145 * coefficients as possible and to have a positive initial value.
3147 static int node_update_vmap(struct isl_sched_node *node)
3149 isl_mat *H, *U, *Q;
3151 H = extract_linear_schedule(node);
3153 H = isl_mat_left_hermite(H, 0, &U, &Q);
3154 isl_mat_free(node->indep);
3155 isl_mat_free(node->vmap);
3156 node->vmap = Q;
3157 node->indep = isl_mat_transpose(U);
3158 node->rank = isl_mat_initial_non_zero_cols(H);
3159 node->indep = isl_mat_drop_rows(node->indep, 0, node->rank);
3160 node->indep = normalize_independent(node->indep);
3161 isl_mat_free(H);
3163 if (!node->indep || !node->vmap || node->rank < 0)
3164 return -1;
3165 return 0;
3168 /* Is "edge" marked as a validity or a conditional validity edge?
3170 static int is_any_validity(struct isl_sched_edge *edge)
3172 return is_validity(edge) || is_conditional_validity(edge);
3175 /* How many times should we count the constraints in "edge"?
3177 * We count as follows
3178 * validity -> 1 (>= 0)
3179 * validity+proximity -> 2 (>= 0 and upper bound)
3180 * proximity -> 2 (lower and upper bound)
3181 * local(+any) -> 2 (>= 0 and <= 0)
3183 * If an edge is only marked conditional_validity then it counts
3184 * as zero since it is only checked afterwards.
3186 * If "use_coincidence" is set, then we treat coincidence edges as local edges.
3187 * Otherwise, we ignore them.
3189 static int edge_multiplicity(struct isl_sched_edge *edge, int use_coincidence)
3191 if (is_proximity(edge) || force_zero(edge, use_coincidence))
3192 return 2;
3193 if (is_validity(edge))
3194 return 1;
3195 return 0;
3198 /* How many times should the constraints in "edge" be counted
3199 * as a parametric intra-node constraint?
3201 * Only proximity edges that are not forced zero need
3202 * coefficient constraints that include coefficients for parameters.
3203 * If the edge is also a validity edge, then only
3204 * an upper bound is introduced. Otherwise, both lower and upper bounds
3205 * are introduced.
3207 static int parametric_intra_edge_multiplicity(struct isl_sched_edge *edge,
3208 int use_coincidence)
3210 if (edge->src != edge->dst)
3211 return 0;
3212 if (!is_proximity(edge))
3213 return 0;
3214 if (force_zero(edge, use_coincidence))
3215 return 0;
3216 if (is_validity(edge))
3217 return 1;
3218 else
3219 return 2;
3222 /* Add "f" times the number of equality and inequality constraints of "bset"
3223 * to "n_eq" and "n_ineq" and free "bset".
3225 static isl_stat update_count(__isl_take isl_basic_set *bset,
3226 int f, int *n_eq, int *n_ineq)
3228 if (!bset)
3229 return isl_stat_error;
3231 *n_eq += isl_basic_set_n_equality(bset);
3232 *n_ineq += isl_basic_set_n_inequality(bset);
3233 isl_basic_set_free(bset);
3235 return isl_stat_ok;
3238 /* Count the number of equality and inequality constraints
3239 * that will be added for the given map.
3241 * The edges that require parameter coefficients are counted separately.
3243 * "use_coincidence" is set if we should take into account coincidence edges.
3245 static isl_stat count_map_constraints(struct isl_sched_graph *graph,
3246 struct isl_sched_edge *edge, __isl_take isl_map *map,
3247 int *n_eq, int *n_ineq, int use_coincidence)
3249 isl_map *copy;
3250 isl_basic_set *coef;
3251 int f = edge_multiplicity(edge, use_coincidence);
3252 int fp = parametric_intra_edge_multiplicity(edge, use_coincidence);
3254 if (f == 0) {
3255 isl_map_free(map);
3256 return isl_stat_ok;
3259 if (edge->src != edge->dst) {
3260 coef = inter_coefficients(graph, edge, map);
3261 return update_count(coef, f, n_eq, n_ineq);
3264 if (fp > 0) {
3265 copy = isl_map_copy(map);
3266 coef = intra_coefficients(graph, edge->src, copy, 1);
3267 if (update_count(coef, fp, n_eq, n_ineq) < 0)
3268 goto error;
3271 if (f > fp) {
3272 copy = isl_map_copy(map);
3273 coef = intra_coefficients(graph, edge->src, copy, 0);
3274 if (update_count(coef, f - fp, n_eq, n_ineq) < 0)
3275 goto error;
3278 isl_map_free(map);
3279 return isl_stat_ok;
3280 error:
3281 isl_map_free(map);
3282 return isl_stat_error;
3285 /* Count the number of equality and inequality constraints
3286 * that will be added to the main lp problem.
3287 * We count as follows
3288 * validity -> 1 (>= 0)
3289 * validity+proximity -> 2 (>= 0 and upper bound)
3290 * proximity -> 2 (lower and upper bound)
3291 * local(+any) -> 2 (>= 0 and <= 0)
3293 * If "use_coincidence" is set, then we treat coincidence edges as local edges.
3294 * Otherwise, we ignore them.
3296 static int count_constraints(struct isl_sched_graph *graph,
3297 int *n_eq, int *n_ineq, int use_coincidence)
3299 int i;
3301 *n_eq = *n_ineq = 0;
3302 for (i = 0; i < graph->n_edge; ++i) {
3303 struct isl_sched_edge *edge = &graph->edge[i];
3304 isl_map *map = isl_map_copy(edge->map);
3306 if (count_map_constraints(graph, edge, map, n_eq, n_ineq,
3307 use_coincidence) < 0)
3308 return -1;
3311 return 0;
3314 /* Count the number of constraints that will be added by
3315 * add_bound_constant_constraints to bound the values of the constant terms
3316 * and increment *n_eq and *n_ineq accordingly.
3318 * In practice, add_bound_constant_constraints only adds inequalities.
3320 static isl_stat count_bound_constant_constraints(isl_ctx *ctx,
3321 struct isl_sched_graph *graph, int *n_eq, int *n_ineq)
3323 if (isl_options_get_schedule_max_constant_term(ctx) == -1)
3324 return isl_stat_ok;
3326 *n_ineq += graph->n;
3328 return isl_stat_ok;
3331 /* Add constraints to bound the values of the constant terms in the schedule,
3332 * if requested by the user.
3334 * The maximal value of the constant terms is defined by the option
3335 * "schedule_max_constant_term".
3337 static isl_stat add_bound_constant_constraints(isl_ctx *ctx,
3338 struct isl_sched_graph *graph)
3340 int i, k;
3341 int max;
3342 int total;
3344 max = isl_options_get_schedule_max_constant_term(ctx);
3345 if (max == -1)
3346 return isl_stat_ok;
3348 total = isl_basic_set_dim(graph->lp, isl_dim_set);
3350 for (i = 0; i < graph->n; ++i) {
3351 struct isl_sched_node *node = &graph->node[i];
3352 int pos;
3354 k = isl_basic_set_alloc_inequality(graph->lp);
3355 if (k < 0)
3356 return isl_stat_error;
3357 isl_seq_clr(graph->lp->ineq[k], 1 + total);
3358 pos = node_cst_coef_offset(node);
3359 isl_int_set_si(graph->lp->ineq[k][1 + pos], -1);
3360 isl_int_set_si(graph->lp->ineq[k][0], max);
3363 return isl_stat_ok;
3366 /* Count the number of constraints that will be added by
3367 * add_bound_coefficient_constraints and increment *n_eq and *n_ineq
3368 * accordingly.
3370 * In practice, add_bound_coefficient_constraints only adds inequalities.
3372 static int count_bound_coefficient_constraints(isl_ctx *ctx,
3373 struct isl_sched_graph *graph, int *n_eq, int *n_ineq)
3375 int i;
3377 if (isl_options_get_schedule_max_coefficient(ctx) == -1 &&
3378 !isl_options_get_schedule_treat_coalescing(ctx))
3379 return 0;
3381 for (i = 0; i < graph->n; ++i)
3382 *n_ineq += graph->node[i].nparam + 2 * graph->node[i].nvar;
3384 return 0;
3387 /* Add constraints to graph->lp that bound the values of
3388 * the parameter schedule coefficients of "node" to "max" and
3389 * the variable schedule coefficients to the corresponding entry
3390 * in node->max.
3391 * In either case, a negative value means that no bound needs to be imposed.
3393 * For parameter coefficients, this amounts to adding a constraint
3395 * c_n <= max
3397 * i.e.,
3399 * -c_n + max >= 0
3401 * The variables coefficients are, however, not represented directly.
3402 * Instead, the variable coefficients c_x are written as differences
3403 * c_x = c_x^+ - c_x^-.
3404 * That is,
3406 * -max_i <= c_x_i <= max_i
3408 * is encoded as
3410 * -max_i <= c_x_i^+ - c_x_i^- <= max_i
3412 * or
3414 * -(c_x_i^+ - c_x_i^-) + max_i >= 0
3415 * c_x_i^+ - c_x_i^- + max_i >= 0
3417 static isl_stat node_add_coefficient_constraints(isl_ctx *ctx,
3418 struct isl_sched_graph *graph, struct isl_sched_node *node, int max)
3420 int i, j, k;
3421 int total;
3422 isl_vec *ineq;
3424 total = isl_basic_set_dim(graph->lp, isl_dim_set);
3426 for (j = 0; j < node->nparam; ++j) {
3427 int dim;
3429 if (max < 0)
3430 continue;
3432 k = isl_basic_set_alloc_inequality(graph->lp);
3433 if (k < 0)
3434 return isl_stat_error;
3435 dim = 1 + node_par_coef_offset(node) + j;
3436 isl_seq_clr(graph->lp->ineq[k], 1 + total);
3437 isl_int_set_si(graph->lp->ineq[k][dim], -1);
3438 isl_int_set_si(graph->lp->ineq[k][0], max);
3441 ineq = isl_vec_alloc(ctx, 1 + total);
3442 ineq = isl_vec_clr(ineq);
3443 if (!ineq)
3444 return isl_stat_error;
3445 for (i = 0; i < node->nvar; ++i) {
3446 int pos = 1 + node_var_coef_pos(node, i);
3448 if (isl_int_is_neg(node->max->el[i]))
3449 continue;
3451 isl_int_set_si(ineq->el[pos], 1);
3452 isl_int_set_si(ineq->el[pos + 1], -1);
3453 isl_int_set(ineq->el[0], node->max->el[i]);
3455 k = isl_basic_set_alloc_inequality(graph->lp);
3456 if (k < 0)
3457 goto error;
3458 isl_seq_cpy(graph->lp->ineq[k], ineq->el, 1 + total);
3460 isl_seq_neg(ineq->el + pos, ineq->el + pos, 2);
3461 k = isl_basic_set_alloc_inequality(graph->lp);
3462 if (k < 0)
3463 goto error;
3464 isl_seq_cpy(graph->lp->ineq[k], ineq->el, 1 + total);
3466 isl_seq_clr(ineq->el + pos, 2);
3468 isl_vec_free(ineq);
3470 return isl_stat_ok;
3471 error:
3472 isl_vec_free(ineq);
3473 return isl_stat_error;
3476 /* Add constraints that bound the values of the variable and parameter
3477 * coefficients of the schedule.
3479 * The maximal value of the coefficients is defined by the option
3480 * 'schedule_max_coefficient' and the entries in node->max.
3481 * These latter entries are only set if either the schedule_max_coefficient
3482 * option or the schedule_treat_coalescing option is set.
3484 static isl_stat add_bound_coefficient_constraints(isl_ctx *ctx,
3485 struct isl_sched_graph *graph)
3487 int i;
3488 int max;
3490 max = isl_options_get_schedule_max_coefficient(ctx);
3492 if (max == -1 && !isl_options_get_schedule_treat_coalescing(ctx))
3493 return isl_stat_ok;
3495 for (i = 0; i < graph->n; ++i) {
3496 struct isl_sched_node *node = &graph->node[i];
3498 if (node_add_coefficient_constraints(ctx, graph, node, max) < 0)
3499 return isl_stat_error;
3502 return isl_stat_ok;
3505 /* Add a constraint to graph->lp that equates the value at position
3506 * "sum_pos" to the sum of the "n" values starting at "first".
3508 static isl_stat add_sum_constraint(struct isl_sched_graph *graph,
3509 int sum_pos, int first, int n)
3511 int i, k;
3512 int total;
3514 total = isl_basic_set_dim(graph->lp, isl_dim_set);
3516 k = isl_basic_set_alloc_equality(graph->lp);
3517 if (k < 0)
3518 return isl_stat_error;
3519 isl_seq_clr(graph->lp->eq[k], 1 + total);
3520 isl_int_set_si(graph->lp->eq[k][1 + sum_pos], -1);
3521 for (i = 0; i < n; ++i)
3522 isl_int_set_si(graph->lp->eq[k][1 + first + i], 1);
3524 return isl_stat_ok;
3527 /* Add a constraint to graph->lp that equates the value at position
3528 * "sum_pos" to the sum of the parameter coefficients of all nodes.
3530 static isl_stat add_param_sum_constraint(struct isl_sched_graph *graph,
3531 int sum_pos)
3533 int i, j, k;
3534 int total;
3536 total = isl_basic_set_dim(graph->lp, isl_dim_set);
3538 k = isl_basic_set_alloc_equality(graph->lp);
3539 if (k < 0)
3540 return isl_stat_error;
3541 isl_seq_clr(graph->lp->eq[k], 1 + total);
3542 isl_int_set_si(graph->lp->eq[k][1 + sum_pos], -1);
3543 for (i = 0; i < graph->n; ++i) {
3544 int pos = 1 + node_par_coef_offset(&graph->node[i]);
3546 for (j = 0; j < graph->node[i].nparam; ++j)
3547 isl_int_set_si(graph->lp->eq[k][pos + j], 1);
3550 return isl_stat_ok;
3553 /* Add a constraint to graph->lp that equates the value at position
3554 * "sum_pos" to the sum of the variable coefficients of all nodes.
3556 static isl_stat add_var_sum_constraint(struct isl_sched_graph *graph,
3557 int sum_pos)
3559 int i, j, k;
3560 int total;
3562 total = isl_basic_set_dim(graph->lp, isl_dim_set);
3564 k = isl_basic_set_alloc_equality(graph->lp);
3565 if (k < 0)
3566 return isl_stat_error;
3567 isl_seq_clr(graph->lp->eq[k], 1 + total);
3568 isl_int_set_si(graph->lp->eq[k][1 + sum_pos], -1);
3569 for (i = 0; i < graph->n; ++i) {
3570 struct isl_sched_node *node = &graph->node[i];
3571 int pos = 1 + node_var_coef_offset(node);
3573 for (j = 0; j < 2 * node->nvar; ++j)
3574 isl_int_set_si(graph->lp->eq[k][pos + j], 1);
3577 return isl_stat_ok;
3580 /* Construct an ILP problem for finding schedule coefficients
3581 * that result in non-negative, but small dependence distances
3582 * over all dependences.
3583 * In particular, the dependence distances over proximity edges
3584 * are bounded by m_0 + m_n n and we compute schedule coefficients
3585 * with small values (preferably zero) of m_n and m_0.
3587 * All variables of the ILP are non-negative. The actual coefficients
3588 * may be negative, so each coefficient is represented as the difference
3589 * of two non-negative variables. The negative part always appears
3590 * immediately before the positive part.
3591 * Other than that, the variables have the following order
3593 * - sum of positive and negative parts of m_n coefficients
3594 * - m_0
3595 * - sum of all c_n coefficients
3596 * (unconstrained when computing non-parametric schedules)
3597 * - sum of positive and negative parts of all c_x coefficients
3598 * - positive and negative parts of m_n coefficients
3599 * - for each node
3600 * - positive and negative parts of c_i_x, in opposite order
3601 * - c_i_n (if parametric)
3602 * - c_i_0
3604 * The constraints are those from the edges plus two or three equalities
3605 * to express the sums.
3607 * If "use_coincidence" is set, then we treat coincidence edges as local edges.
3608 * Otherwise, we ignore them.
3610 static isl_stat setup_lp(isl_ctx *ctx, struct isl_sched_graph *graph,
3611 int use_coincidence)
3613 int i;
3614 unsigned nparam;
3615 unsigned total;
3616 isl_space *space;
3617 int parametric;
3618 int param_pos;
3619 int n_eq, n_ineq;
3621 parametric = ctx->opt->schedule_parametric;
3622 nparam = isl_space_dim(graph->node[0].space, isl_dim_param);
3623 param_pos = 4;
3624 total = param_pos + 2 * nparam;
3625 for (i = 0; i < graph->n; ++i) {
3626 struct isl_sched_node *node = &graph->node[graph->sorted[i]];
3627 if (node_update_vmap(node) < 0)
3628 return isl_stat_error;
3629 node->start = total;
3630 total += 1 + node->nparam + 2 * node->nvar;
3633 if (count_constraints(graph, &n_eq, &n_ineq, use_coincidence) < 0)
3634 return isl_stat_error;
3635 if (count_bound_constant_constraints(ctx, graph, &n_eq, &n_ineq) < 0)
3636 return isl_stat_error;
3637 if (count_bound_coefficient_constraints(ctx, graph, &n_eq, &n_ineq) < 0)
3638 return isl_stat_error;
3640 space = isl_space_set_alloc(ctx, 0, total);
3641 isl_basic_set_free(graph->lp);
3642 n_eq += 2 + parametric;
3644 graph->lp = isl_basic_set_alloc_space(space, 0, n_eq, n_ineq);
3646 if (add_sum_constraint(graph, 0, param_pos, 2 * nparam) < 0)
3647 return isl_stat_error;
3648 if (parametric && add_param_sum_constraint(graph, 2) < 0)
3649 return isl_stat_error;
3650 if (add_var_sum_constraint(graph, 3) < 0)
3651 return isl_stat_error;
3652 if (add_bound_constant_constraints(ctx, graph) < 0)
3653 return isl_stat_error;
3654 if (add_bound_coefficient_constraints(ctx, graph) < 0)
3655 return isl_stat_error;
3656 if (add_all_validity_constraints(graph, use_coincidence) < 0)
3657 return isl_stat_error;
3658 if (add_all_proximity_constraints(graph, use_coincidence) < 0)
3659 return isl_stat_error;
3661 return isl_stat_ok;
3664 /* Analyze the conflicting constraint found by
3665 * isl_tab_basic_set_constrained_lexmin. If it corresponds to the validity
3666 * constraint of one of the edges between distinct nodes, living, moreover
3667 * in distinct SCCs, then record the source and sink SCC as this may
3668 * be a good place to cut between SCCs.
3670 static int check_conflict(int con, void *user)
3672 int i;
3673 struct isl_sched_graph *graph = user;
3675 if (graph->src_scc >= 0)
3676 return 0;
3678 con -= graph->lp->n_eq;
3680 if (con >= graph->lp->n_ineq)
3681 return 0;
3683 for (i = 0; i < graph->n_edge; ++i) {
3684 if (!is_validity(&graph->edge[i]))
3685 continue;
3686 if (graph->edge[i].src == graph->edge[i].dst)
3687 continue;
3688 if (graph->edge[i].src->scc == graph->edge[i].dst->scc)
3689 continue;
3690 if (graph->edge[i].start > con)
3691 continue;
3692 if (graph->edge[i].end <= con)
3693 continue;
3694 graph->src_scc = graph->edge[i].src->scc;
3695 graph->dst_scc = graph->edge[i].dst->scc;
3698 return 0;
3701 /* Check whether the next schedule row of the given node needs to be
3702 * non-trivial. Lower-dimensional domains may have some trivial rows,
3703 * but as soon as the number of remaining required non-trivial rows
3704 * is as large as the number or remaining rows to be computed,
3705 * all remaining rows need to be non-trivial.
3707 static int needs_row(struct isl_sched_graph *graph, struct isl_sched_node *node)
3709 return node->nvar - node->rank >= graph->maxvar - graph->n_row;
3712 /* Take a linear combination "lin" in terms of the schedule coefficients c_i
3713 * and express it in terms of the variables of the ILP problem
3714 * as constructed by setup_lp.
3715 * In particular, in the ILP, the schedule coefficients are represented by
3716 * pairs of non-negative variables c^+_i - c^-_i, with c^-_i appearing
3717 * before c^+_i. Furthermore,
3718 * the pairs of non-negative variables representing the coefficients
3719 * are stored in the opposite order.
3721 static __isl_give isl_mat *linear_to_lp(__isl_keep isl_mat *lin)
3723 isl_ctx *ctx;
3724 isl_mat *mat;
3725 int i, j, n, n_var;
3727 if (!lin)
3728 return NULL;
3730 ctx = isl_mat_get_ctx(lin);
3731 n = isl_mat_rows(lin);
3732 n_var = isl_mat_cols(lin);
3733 mat = isl_mat_alloc(ctx, n, 2 * n_var);
3734 if (!mat)
3735 return NULL;
3736 for (i = 0; i < n; ++i) {
3737 for (j = 0; j < n_var; ++j) {
3738 int nj = n_var - 1 - j;
3739 isl_int_neg(mat->row[i][2 * nj], lin->row[i][j]);
3740 isl_int_set(mat->row[i][2 * nj + 1], lin->row[i][j]);
3744 return mat;
3747 /* Clear all memory associated to "region" and reset the fields
3748 * to their default values.
3750 static void clear_region(struct isl_ilp_region *region)
3752 region->has_non_zero = 0;
3753 region->has_fixed = 0;
3754 region->optional = 0;
3755 region->failed = 0;
3756 region->disjunctive = 0;
3757 region->conditional = 0;
3758 region->non_zero = isl_mat_free(region->non_zero);
3759 region->fixed = isl_mat_free(region->fixed);
3760 region->fixed_val = isl_vec_free(region->fixed_val);
3761 region->user = NULL;
3764 /* Clear all "n" regions of "graph" and return -1.
3766 static int clear_regions(struct isl_sched_graph *graph, int n)
3768 int i;
3770 for (i = 0; i < n; ++i)
3771 clear_region(&graph->region[i]);
3772 return -1;
3775 /* Set the fixed-value constraint of "region" to force
3776 * the linear combinations "zero" to be zero on the schedule coefficients.
3777 * "zero" is expressed in terms of the schedule coefficients and
3778 * needs to be expanded to the ILP variables first.
3780 static void set_zero(struct isl_ilp_region *region, __isl_keep isl_mat *zero)
3782 int len;
3783 isl_ctx *ctx;
3785 ctx = isl_mat_get_ctx(zero);
3786 len = isl_mat_rows(zero);
3787 region->has_fixed = 1;
3788 region->fixed = linear_to_lp(zero);
3789 region->fixed_val = isl_vec_zero(ctx, len);
3792 /* Set the fixed value constraint of "region" to
3793 * force the next schedule row to be equal to row "pos" of the inner part
3794 * of intra-statement consecutivity constraint "intra" of node "node"
3795 * plus some linear combination of the schedule rows
3796 * prior to the one that corresponds to the first inner row and/or
3797 * rows of the inner part prior to "pos".
3798 * What is not allowed in the linear combination are any linearly
3799 * independent schedule rows that appear between rows that
3800 * correspond to rows of the inner part.
3801 * "outer_complement" is the orthogonal complement of [T_0; G], with
3802 * T_0 the schedule computed so far and G the outer part of "intra".
3803 * Since the outer part has been covered by T_0 at this stage,
3804 * "outer_complement" is effectively the orthogonal complement of T_0.
3806 * Let T_1 be the part of the schedule computed so far (T_0) that
3807 * does not include any rows corresponding to rows of the inner part.
3808 * Let H_< be the inner part before row "pos" and H_= the row
3809 * corresponding to "pos".
3810 * The next schedule row c should be equal to H_= plus a linear combination
3811 * of [T_1; H_<]
3812 * Let U be the orthogonal complement of [T_1; H_<; H_=] and
3813 * let U' be the orthogonal complement of [T_1; H_<].
3814 * Then c U should be zero, while c U' should be equal to H_= U'.
3815 * The latter condition can be refined to c U'' = H_= U'' with
3816 * U'' a basis extension of U to cover U'.
3817 * This means that the remaining row U'' is such that H_= U''
3818 * is not zero. U'' contains exactly one row because the rank of U'
3819 * is one greater than that of U. This assumes that T_1 is
3820 * linearly independent of H, but if it is not then consecutivity
3821 * cannot be achieved anyway.
3823 * If no inner rows have been covered so far, then T_0 = T_1 and
3824 * U' can be obtained as "outer_complement".
3825 * Otherwise, it is computed from T_1 and H_<.
3827 * The fixed field of "region" is set to [U''; U], while
3828 * the fixed_val field is set to H_= U'' followed by zeros.
3830 static void set_equal(struct isl_ilp_region *region,
3831 struct isl_sched_node *node, struct isl_sched_intra *intra, int pos,
3832 __isl_take isl_mat *complement)
3834 int n;
3835 isl_vec *val;
3836 isl_vec *row;
3837 isl_mat *inner, *mat, *ext_complement;
3839 mat = extract_linear_schedule(node);
3840 if (intra->n_fixed > 0) {
3841 n = isl_mat_rows(mat);
3842 mat = isl_mat_drop_rows(mat,
3843 intra->first_fixed, n - intra->first_fixed);
3844 inner = isl_mat_copy(intra->inner);
3845 inner = isl_mat_drop_rows(inner, intra->n_fixed,
3846 intra->n_inner - intra->n_fixed);
3847 mat = isl_mat_concat(mat, inner);
3848 isl_mat_free(complement);
3849 complement = isl_mat_row_complement(isl_mat_copy(mat));
3851 row = isl_mat_get_row(intra->inner, pos);
3852 mat = isl_mat_vec_concat(mat, isl_vec_copy(row));
3853 ext_complement = isl_mat_row_complement(mat);
3854 complement = isl_mat_row_basis_extension(isl_mat_copy(ext_complement),
3855 complement);
3857 val = isl_mat_vec_product(isl_mat_copy(complement), row);
3858 mat = isl_mat_concat(complement, ext_complement);
3859 val = isl_vec_zero_extend(val, isl_mat_rows(mat));
3860 region->has_fixed = 1;
3861 region->fixed = linear_to_lp(mat);
3862 region->fixed_val = val;
3863 isl_mat_free(mat);
3866 /* Given a node and an intra-statement consecutivity constraint
3867 * on that node, construct a matrix that contains
3868 * - the linear part of the current schedule
3869 * - the outer part of the constraint, if "add_outer" is set
3870 * - the inner part of the constraint, if "add_inner" is set
3872 static __isl_give isl_mat *intra_rows(struct isl_sched_node *node,
3873 struct isl_sched_intra *intra, int add_outer, int add_inner)
3875 isl_mat *mat;
3877 mat = extract_linear_schedule(node);
3878 if (add_outer)
3879 mat = isl_mat_concat(mat, isl_mat_copy(intra->outer));
3880 if (add_inner)
3881 mat = isl_mat_concat(mat, isl_mat_copy(intra->inner));
3882 return mat;
3885 /* Finish the initialization of graph->region[n] as a region
3886 * corresponding to intra-statement consecutivity constraint "intra"
3887 * for node "node".
3888 * In particular, set the position of the sequence of variables
3889 * to which the region applies, mark the region as optional and
3890 * add a pointer to "intra" to be able to recover the constraint
3891 * from the region.
3893 static int finish_intra(struct isl_sched_graph *graph,
3894 struct isl_sched_node *node, struct isl_sched_intra *intra, int n)
3896 graph->region[n].pos = node_var_coef_offset(node);
3897 graph->region[n].optional = 1;
3898 graph->region[n].user = intra;
3900 return n + 1;
3903 /* Set the non-zero constraint of "region" to "non_zero".
3904 * "non_zero" is expressed in terms of the schedule coefficients.
3905 * Normalize it first and expand it to the ILP variables.
3907 static void set_non_zero(struct isl_ilp_region *region,
3908 __isl_take isl_mat *non_zero)
3910 non_zero = normalize_independent(non_zero);
3911 region->has_non_zero = 1;
3912 region->non_zero = linear_to_lp(non_zero);
3913 isl_mat_free(non_zero);
3916 /* Set the non-zero constraint of "region" to force the schedule row
3917 * to be linearly independent of the combination of the schedule computed
3918 * so far for "node" and the inner rows of "intra".
3919 * Simplify the constraints by exploiting the fact that
3920 * the linear combinations "zero" are all zero on the corresponding
3921 * schedule coefficients.
3923 static void set_independent_to_inner(struct isl_ilp_region *region,
3924 struct isl_sched_node *node, struct isl_sched_intra *intra,
3925 __isl_take isl_mat *zero)
3927 isl_mat *mat;
3929 mat = intra_rows(node, intra, 0, 1);
3930 mat = isl_mat_row_complement(mat);
3931 mat = isl_mat_row_basis_extension(zero, mat);
3932 set_non_zero(region, mat);
3935 /* Add an ILP region to "graph" that forces the next schedule row
3936 * for "node" to be a linear combination of the outer rows of
3937 * intra-statement consecutivity constraint "intra" and
3938 * of the schedule rows computed so far.
3939 * "n" is the current number of ILP regions.
3940 * "outer_complement" is the orthogonal complement of [T_0; G], with
3941 * T_0 the schedule computed so far and G the outer part of "intra".
3942 * Return the updated number of ILP regions.
3944 * The next row being a linear combination of T_0 and G means
3945 * that "outer_complement" needs to be zero on the schedule coefficients.
3946 * The schedule row further needs to be linearly independent of
3947 * the inner part of "intra" (in order for later rows to be set
3948 * equal to the inner part) and of the previous schedule rows
3949 * (in order to make progress).
3951 static int add_linear_combination(struct isl_sched_graph *graph,
3952 struct isl_sched_node *node, struct isl_sched_intra *intra, int n,
3953 __isl_take isl_mat *outer_complement)
3955 set_zero(&graph->region[n], outer_complement);
3956 set_independent_to_inner(&graph->region[n], node, intra,
3957 outer_complement);
3958 return finish_intra(graph, node, intra, n);
3961 /* Add an ILP region to "graph" that forces the next schedule row
3962 * for "node" to be equal to the next inner row of "intra"
3963 * (plus a linear combination of the schedule computed so far,
3964 * except for linearly independent rows that appear in the middle
3965 * of rows that correspond to the inner part of "intra").
3966 * "n" is the current number of ILP regions.
3967 * "outer_complement" is the orthogonal complement of [T_0; G], with
3968 * T_0 the schedule computed so far and G the outer part of "intra".
3969 * Return the updated number of ILP regions.
3971 * Note that the schedule computed so far should be linearly
3972 * independent of the next inner row of "intra", meaning that
3973 * the linear combination does not cancel out the contribution
3974 * of the next inner row of "intra".
3975 * The next schedule row also needs to be
3976 * linearly independent of the remaining inner part of "intra"
3977 * (in order for later rows to be set equal to this remaining part) and
3978 * of the previous schedule rows (in order to make progress).
3979 * However, both of these are linearly independent of the next inner
3980 * row of "intra". Since the next schedule row has a non-zero
3981 * contribution of this next inner row, it is also linearly
3982 * independent of those rows.
3984 static int add_next_inner(struct isl_sched_graph *graph,
3985 struct isl_sched_node *node, struct isl_sched_intra *intra, int n,
3986 __isl_take isl_mat *outer_complement)
3988 set_equal(&graph->region[n], node, intra, intra->n_fixed,
3989 outer_complement);
3990 return finish_intra(graph, node, intra, n);
3993 /* Add an extra ILP region to "graph" that allows the next schedule row
3994 * for "node" to be linearly independent of the combination
3995 * of the schedule computed so far and all rows of "intra".
3996 * "n" is the current number of ILP regions or -1 on error.
3997 * Return the updated number of ILP regions or -1 on error.
3999 * The next schedule row is linearly independent of these rows
4000 * if the orthogonal complement is not zero on the schedule coefficients.
4002 static int allow_linear_independence(struct isl_sched_graph *graph,
4003 struct isl_sched_node *node, struct isl_sched_intra *intra, int n)
4005 isl_mat *mat;
4006 isl_mat *U;
4008 if (n < 0)
4009 return -1;
4011 mat = intra_rows(node, intra, 1, 1);
4012 U = isl_mat_row_complement(mat);
4013 set_non_zero(&graph->region[n], U);
4014 return finish_intra(graph, node, intra, n);
4017 /* Add an extra ILP region to "graph" that allows the next schedule row
4018 * for "node" to be a linear combination
4019 * of the schedule computed so far.
4020 * "n" is the current number of ILP regions or -1 on error.
4021 * Return the updated number of ILP regions or -1 on error.
4023 * The next schedule row is a linear combination of the current schedule
4024 * if its orthogonal complement is zero on the schedule coefficients.
4026 static int allow_linear_combination(struct isl_sched_graph *graph,
4027 struct isl_sched_node *node, struct isl_sched_intra *intra, int n)
4029 isl_mat *mat;
4031 if (n < 0)
4032 return -1;
4034 mat = extract_linear_schedule(node);
4035 mat = isl_mat_row_complement(mat);
4037 set_zero(&graph->region[n], mat);
4039 isl_mat_free(mat);
4041 return finish_intra(graph, node, intra, n);
4044 /* Is "region1" equal to "region2"?
4046 * Two regions are considered equal if they refer to the same sequence
4047 * of variables and if their non-zero and fixed-value constraints
4048 * are the same.
4050 static isl_bool region_is_equal(struct isl_ilp_region *region1,
4051 struct isl_ilp_region *region2)
4053 isl_bool equal;
4055 if (region1->pos != region2->pos)
4056 return isl_bool_false;
4057 if (region1->has_non_zero != region2->has_non_zero)
4058 return isl_bool_false;
4059 if (region1->has_fixed != region2->has_fixed)
4060 return isl_bool_false;
4062 if (region1->has_non_zero) {
4063 equal = isl_mat_is_equal(region1->non_zero, region2->non_zero);
4064 if (equal < 0 || !equal)
4065 return equal;
4068 if (region1->has_fixed) {
4069 equal = isl_mat_is_equal(region1->fixed, region2->fixed);
4070 if (equal < 0 || !equal)
4071 return equal;
4072 equal = isl_vec_is_equal(region1->fixed_val,
4073 region2->fixed_val);
4074 if (equal < 0 || !equal)
4075 return equal;
4078 return isl_bool_true;
4081 /* Is the region at position "pos" a duplicate of any of the regions
4082 * of "graph" starting at "first" and before "pos"?
4084 static isl_bool is_duplicate(struct isl_sched_graph *graph, int first, int pos)
4086 int i;
4088 for (i = first; i < pos; ++i) {
4089 isl_bool equal;
4091 equal = region_is_equal(&graph->region[i], &graph->region[pos]);
4092 if (equal < 0 || equal)
4093 return equal;
4096 return isl_bool_false;
4099 /* Check if the most recently added disjunct, the one at position n - 1,
4100 * is a duplicate of any of the regions in "graph" starting at "first".
4101 * If so, drop this disjunct and return the updated "n".
4102 * Return -1 on error.
4104 static int drop_if_duplicate(struct isl_sched_graph *graph, int first, int n)
4106 isl_bool duplicate;
4108 if (n < 0)
4109 return -1;
4111 duplicate = is_duplicate(graph, first, n - 1);
4112 if (duplicate < 0)
4113 return clear_regions(graph, n);
4114 if (duplicate)
4115 clear_region(&graph->region[--n]);
4117 return n;
4120 /* Add ILP regions for the intra-statement consecutivity constraint "intra"
4121 * on node "node" in "graph".
4122 * "first" is the position of the first intra-statement consecutivity
4123 * constraint ILP region for "node".
4124 * "n" is the current number of ILP regions.
4125 * Return the updated number of ILP regions or -1 on error.
4127 * The type of constraint that needs to be imposed is prescribed by
4128 * intra->state.
4129 * "outer_complement" is the orthogonal complement of [T_0; G].
4130 * "allow_independent" is set if the next schedule row should
4131 * be allowed to be linearly independent of [T_0; G; H].
4133 * The regions introduced for "intra" form a disjunction of at most
4134 * three disjuncts.
4135 * The first option is to make progress on the consecutivity,
4136 * meaning that the next schedule row is either a linear combination
4137 * of the outer rows of the consecutivity constraint or
4138 * equal to the next inner row.
4139 * The second option is for the next schedule row to be linearly
4140 * independent of both the current schedule rows and all
4141 * rows from the consecutivity constraint.
4142 * The caller has already checked whether this is possible.
4143 * The third option is for the next schedule row to be a linear
4144 * combination of the outer schedule rows.
4146 * First check if the next schedule row should be allowed to
4147 * be a linear combination of outer schedule rows.
4148 * This is only allowed if no linear independence constraint
4149 * will be added for this node.
4150 * Furthermore, since the corresponding region is independent
4151 * of the intra-statement consecutivity constraint, it should
4152 * only be considered if this is the first intra-statement consecutivity
4153 * constraint for the node. Otherwise, the same region will already
4154 * have been added as part of the encoding of the first constraint and
4155 * the region would only be reached in cases where it is known that
4156 * it cannot be satisfied.
4158 * If this is not the first intra-statement consecutivity constraint,
4159 * then the other two disjuncts are also checked for being duplicates
4160 * of disjuncts from earlier intra-statement consecutivity constraints
4161 * for the same node. If so, then the duplicates are removed.
4162 * The first disjunct for this constraint (if any is left) is also
4163 * made conditional on the previous disjunction to ensure that
4164 * this disjunction is only considered if all previous disjunctions
4165 * for the same node have failed.
4167 * Finally, the initial disjuncts in the disjunction are marked
4168 * disjunctive.
4170 static int add_state_intra_ilp_regions(isl_ctx *ctx,
4171 struct isl_sched_graph *graph, struct isl_sched_node *node,
4172 struct isl_sched_intra *intra, int first, int n,
4173 __isl_take isl_mat *outer_complement, int allow_independent)
4175 int allow_combination;
4176 int needed;
4177 int conditional = n > first;
4178 int first_disj = n;
4179 int i;
4181 allow_combination = !conditional && !needs_row(graph, node);
4182 needed = n + 1 + allow_independent + allow_combination;
4183 if (graph_extend_region(ctx, graph, needed) < 0)
4184 goto error;
4186 if (intra->state == isl_sched_intra_outer)
4187 n = add_linear_combination(graph, node, intra, n,
4188 outer_complement);
4189 else
4190 n = add_next_inner(graph, node, intra, n, outer_complement);
4191 if (conditional)
4192 n = drop_if_duplicate(graph, first, n);
4194 if (allow_independent) {
4195 n = allow_linear_independence(graph, node, intra, n);
4196 if (conditional)
4197 n = drop_if_duplicate(graph, first, n);
4199 if (allow_combination)
4200 n = allow_linear_combination(graph, node, intra, n);
4202 if (conditional && n > first_disj)
4203 graph->region[first_disj].conditional = 1;
4204 for (i = first_disj; i + 1 < n; ++i)
4205 graph->region[i].disjunctive = 1;
4207 return n;
4208 error:
4209 isl_mat_free(outer_complement);
4210 return clear_regions(graph, n);
4213 /* Add ILP regions for the intra-statement consecutivity constraint "intra"
4214 * on node "node" in "graph".
4215 * "first" is the position of the first intra-statement consecutivity
4216 * constraint ILP region for "node".
4217 * "n" is the current number of ILP regions.
4218 * Return the updated number of ILP regions or -1 on error.
4220 * Let T_0 be the schedule computed so far,
4221 * let G be the outer part of the consecutivity constraint,
4222 * let H be the inner part of the consecutivity constraint, and
4223 * let h be the number of rows of H that still need to be handled.
4225 * If rank(T_0; G; H) < rank(T_0; G) + h,
4226 * then T_0 can no longer be extended with those remaining h rows
4227 * without introducing a linear dependence.
4228 * Mark the constraint as failed.
4230 * Otherwise, if rank(T_0) < rank(T_0; G), then T_0 does not cover G yet and
4231 * T_0 should be extended with a linear combination of G first.
4233 * Otherwise, if the number of rows of H that appear in T_0 is smaller
4234 * than the total number of rows in H, then the next schedule row
4235 * should be equal to the next row of H.
4237 * Otherwise, consecutivity has been achieved and no ILP constraint
4238 * needs to be added.
4240 * In the cases where some ILP regions need to be added,
4241 * check whether rank(T_0; G; H) < dim, in which case
4242 * a schedule row that is linearly independent of T_0; G; H
4243 * is also allowed.
4245 static int add_intra_ilp_regions(isl_ctx *ctx, struct isl_sched_graph *graph,
4246 struct isl_sched_node *node, struct isl_sched_intra *intra,
4247 int first, int n)
4249 isl_mat *mat, *complement, *all;
4250 int allow_independent;
4251 int n_inner, rank_outer, rank_all;
4253 mat = intra_rows(node, intra, 1, 0);
4254 complement = isl_mat_row_complement(mat);
4255 if (!complement)
4256 return clear_regions(graph, n);
4257 rank_outer = isl_mat_cols(complement) - isl_mat_rows(complement);
4259 n_inner = intra->n_inner - intra->n_fixed;
4261 all = intra_rows(node, intra, 1, 1);
4262 rank_all = isl_mat_rank(all);
4263 isl_mat_free(all);
4264 if (rank_all < 0)
4265 goto error;
4267 allow_independent = rank_all < node->nvar;
4269 if (rank_all < rank_outer + n_inner)
4270 intra->n_fixed = -1;
4271 else if (node->rank < rank_outer)
4272 intra->state = isl_sched_intra_outer;
4273 else if (intra->n_fixed < intra->n_inner)
4274 intra->state = isl_sched_intra_inner;
4275 else
4276 intra->state = isl_sched_intra_free;
4277 if (intra->n_fixed >= 0 && intra->state != isl_sched_intra_free)
4278 return add_state_intra_ilp_regions(ctx, graph, node, intra,
4279 first, n, complement, allow_independent);
4281 isl_mat_free(complement);
4282 return n;
4283 error:
4284 isl_mat_free(complement);
4285 return clear_regions(graph, n);
4288 /* Add ILP regions for all active intra-statement consecutivity constraints
4289 * in "graph".
4290 * Return the total number of such regions or -1 is some error occurred.
4292 static int add_graph_intra_ilp_regions(isl_ctx *ctx,
4293 struct isl_sched_graph *graph)
4295 int i, n;
4297 n = 0;
4298 for (i = 0; i < graph->n; ++i) {
4299 struct isl_sched_node *node = &graph->node[i];
4300 struct isl_sched_intra *intra;
4301 int first = n;
4303 for (intra = node->intra; intra; intra = intra->next) {
4304 if (intra->n_fixed < 0)
4305 continue;
4306 n = add_intra_ilp_regions(ctx, graph, node, intra,
4307 first, n);
4308 if (n < 0)
4309 return -1;
4313 return n;
4316 /* Set graph->region[n] to an optional fixed-value constraint with
4317 * linear combinations "eq" and expected value "val"
4318 * that applies to the entire sequence of variables.
4320 static int set_global_eq(struct isl_sched_graph *graph,
4321 struct isl_sched_edge *edge, __isl_take isl_mat *eq,
4322 __isl_take isl_vec *val, int n)
4324 graph->region[n].has_fixed = 1;
4325 graph->region[n].fixed = eq;
4326 graph->region[n].fixed_val = val;
4327 graph->region[n].pos = 0;
4328 graph->region[n].optional = 1;
4329 graph->region[n].user = edge;
4331 return n + 1;
4334 /* Construct the equality constraints on the set of coefficients
4335 * for valid equality constraints for the dependence relation of "edge".
4337 * First compute the affine hull of the dependence relation.
4338 * An equality constraint is valid for the dependence relation
4339 * if it is a linear combination of the (equality) constraints
4340 * of the affine hull.
4341 * This means the coefficients of such an equality constraint
4342 * need to be orthogonal to the orthogonal complement of
4343 * the constraints of the affine hull.
4344 * Return this orthogonal complement E.
4345 * Note that the first element in this matrix corresponds
4346 * to the coefficient of the constant term.
4348 * That is
4350 * E (c_0, c_n, c_x, c_y)^T = 0
4352 * for constraints
4354 * c_0 + c_n n + c_x x + c_y y = 0 for each (x,y) in R
4356 * If the source or destination nodes of "edge" have been compressed,
4357 * then the dependence relation is also compressed before
4358 * the affine hull of the set of coefficients is computed.
4360 static __isl_give isl_mat *inter_coefficients_eq(struct isl_sched_edge *edge)
4362 isl_mat *mat;
4363 isl_map *map;
4364 isl_basic_map *aff;
4366 map = isl_map_copy(edge->map);
4367 if (edge->src->compressed)
4368 map = isl_map_preimage_domain_multi_aff(map,
4369 isl_multi_aff_copy(edge->src->decompress));
4370 if (edge->dst->compressed)
4371 map = isl_map_preimage_range_multi_aff(map,
4372 isl_multi_aff_copy(edge->dst->decompress));
4373 aff = isl_map_affine_hull(map);
4374 aff = isl_basic_map_remove_divs(aff);
4375 mat = isl_basic_map_extract_equalities(aff);
4376 mat = isl_mat_row_complement(mat);
4377 isl_basic_map_free(aff);
4379 return mat;
4382 /* Add an ILP region for the inter-statement consecutivity constraint "edge"
4383 * in "graph" that fixed the dependence distance to zero or
4384 * one (if "one" is set).
4385 * "n" is the current number of ILP regions.
4386 * Return the updated number of ILP regions or -1 on error.
4388 * Construct a matrix E with
4390 * E (c_0, c_n, c_x, c_y)^T = 0
4392 * for constraints
4394 * c_0 + c_n n + c_x x + c_y y = 0 for each (x,y) in R
4396 * In case "one" is set, the constraints need to be of the form
4398 * c_0 + c_n n + c_x x + c_y y = 1 for each (x,y) in R
4400 * i.e.,
4402 * (c_0 - 1) + c_n n + c_x x + c_y y = 0 for each (x,y) in R
4404 * or
4406 * E (c_0 - 1, c_n, c_x, c_y)^T = 0
4408 * i.e.,
4410 * E (c_0, c_n, c_x, c_y)^T = E (1, 0, 0, 0)^T
4412 * That is the linear combinations E need to be equal to either zero
4413 * (if "one" is not set) or the first column of E (if "one" is set);
4415 * Before imposing these constraints, they need to be formulated
4416 * in terms of the ILP variables by plugging in
4417 * (c_j_0 - c_i_0, c_j_n - c_i_n, -(c_i_x^+ - c_i_x^-), c_j_x^+ - c_j_x^-)
4418 * for (c_0, c_n, c_x, c_y),
4419 * where c_* = c_*^+ - c_*^-, with c_*^+ and c_*^- non-negative.
4420 * In graph->lp, the c_*^- appear before their c_*^+ counterpart.
4421 * This results in constraints
4423 * (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) = 0
4425 * or
4427 * (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) = 1
4429 * Note that the first column of E corresponds to the coefficient
4430 * of the constant term, while the mapping returned by inter_dim_map
4431 * assumes the presence of a constant term. An extra zero column
4432 * is therefore temporarily inserted to represent this constant term.
4434 static int add_fixed_inter_ilp_region(isl_ctx *ctx,
4435 struct isl_sched_graph *graph, struct isl_sched_edge *edge, int n,
4436 int one)
4438 int offset;
4439 isl_dim_map *dim_map;
4440 isl_mat *eq;
4441 isl_vec *val;
4443 if (graph_extend_region(ctx, graph, n + 1) < 0)
4444 return -1;
4446 eq = inter_coefficients_eq(edge);
4447 val = isl_mat_get_col(eq, 0);
4448 if (!one)
4449 val = isl_vec_set_si(val, 0);
4450 offset = isl_map_offset(edge->map, isl_dim_in);
4451 dim_map = inter_dim_map(ctx, graph, edge->src, edge->dst, offset, 1);
4452 eq = isl_mat_insert_zero_cols(eq, 0, 1);
4453 eq = isl_mat_dim_map(eq, dim_map);
4454 eq = isl_mat_drop_cols(eq, 0, 1);
4455 n = set_global_eq(graph, edge, eq, val, n);
4457 return n;
4460 /* Add an ILP region for the inter-statement consecutivity constraint "edge"
4461 * in "graph" that fixed the dependence distance to zero.
4462 * "n" is the current number of ILP regions.
4463 * Return the updated number of ILP regions or -1 on error.
4465 static int add_outer_inter_ilp_region(isl_ctx *ctx,
4466 struct isl_sched_graph *graph, struct isl_sched_edge *edge, int n)
4468 return add_fixed_inter_ilp_region(ctx, graph, edge, n, 0);
4471 /* Add an ILP region for the inter-statement consecutivity constraint "edge"
4472 * in "graph" that fixed the dependence distance to one.
4473 * "n" is the current number of ILP regions.
4474 * Return the updated number of ILP regions or -1 on error.
4476 * This distance should only be one at the point where both
4477 * corresponding intra-statement consecutivity constraints
4478 * fix the schedule row to be equal to the first inner row.
4479 * If either of these intra-statement consecutivity constraints
4480 * already fixed some inner rows, then this can no longer be achieved.
4482 static int add_inner_inter_ilp_region(isl_ctx *ctx,
4483 struct isl_sched_graph *graph, struct isl_sched_edge *edge, int n)
4485 if (edge->src_intra->n_fixed > 0 || edge->dst_intra->n_fixed > 0) {
4486 edge->state = isl_sched_inter_failed;
4487 return n;
4489 edge->state = isl_sched_inter_inner;
4490 return add_fixed_inter_ilp_region(ctx, graph, edge, n, 1);
4493 /* Add an ILP region for the inter-statement consecutivity constraint "edge"
4494 * in "graph".
4495 * "n" is the current number of ILP regions.
4496 * Return the updated number of ILP regions or -1 on error.
4498 * The dependence distance should be kept at zero as long as
4499 * both corresponding intra-statement consecutivity constraints
4500 * haven't had their outer parts covered by the current schedule.
4501 * The distance should be one when the schedule is made equal
4502 * to the first rows of their inner parts.
4503 * Note that at each level the constraint on dependence distance
4504 * is imposed unconditionally. This means that it is imposed
4505 * even if the regions of the corresponding intra-statement consecutivity
4506 * constraints cannot be satisfied at that level.
4507 * The distance-one constraint is also imposed if those regions
4508 * succeed for the wrong reason, i.e., if the schedule row does
4509 * not get equated to the first inner row but if rather one of
4510 * the other potential disjuncts applies.
4511 * The assumption is that the distance-one constraint somehow aligns
4512 * with the first inner rows and is therefore unlikely to succeed
4513 * if the schedule is not made equal to those first inner rows.
4514 * In any case, the distance-one constraint is applied only once.
4516 static int add_inter_ilp_region(isl_ctx *ctx, struct isl_sched_graph *graph,
4517 struct isl_sched_edge *edge, int n)
4519 enum isl_sched_intra_state src_state, dst_state;
4521 if (edge->state == isl_sched_inter_failed)
4522 return n;
4523 if (edge->state == isl_sched_inter_free)
4524 return n;
4525 if (edge->src_intra->n_fixed < 0 || edge->dst_intra->n_fixed < 0) {
4526 edge->state = isl_sched_inter_failed;
4527 return n;
4529 if (edge->state == isl_sched_inter_inner) {
4530 if (edge->src_intra->n_fixed == 0 ||
4531 edge->dst_intra->n_fixed == 0)
4532 edge->state = isl_sched_inter_failed;
4533 else
4534 edge->state = isl_sched_inter_free;
4535 return n;
4537 src_state = edge->src_intra->state;
4538 dst_state = edge->dst_intra->state;
4539 if (src_state == isl_sched_intra_outer ||
4540 dst_state == isl_sched_intra_outer)
4541 return add_outer_inter_ilp_region(ctx, graph, edge, n);
4542 if (src_state == isl_sched_intra_inner &&
4543 dst_state == isl_sched_intra_inner)
4544 return add_inner_inter_ilp_region(ctx, graph, edge, n);
4545 return n;
4548 /* Add ILP regions for all active inter-statement consecutivity constraints
4549 * in "graph".
4550 * "n" is the current number of ILP regions.
4551 * Return the updated number of ILP regions or -1 on error.
4553 static int add_graph_inter_ilp_regions(isl_ctx *ctx,
4554 struct isl_sched_graph *graph, int n)
4556 int i;
4558 for (i = 0; i < graph->n_edge; ++i) {
4559 struct isl_sched_edge *edge = &graph->edge[i];
4561 if (!is_consecutivity(edge))
4562 continue;
4563 n = add_inter_ilp_region(ctx, graph, edge, n);
4564 if (n < 0)
4565 return -1;
4567 return n;
4570 /* Add ILP regions for all active intra-statement and inter-statement
4571 * consecutivity constraints in "graph".
4572 * Return the total number of such regions or -1 is some error occurred.
4574 static int add_graph_consecutivity_ilp_regions(isl_ctx *ctx,
4575 struct isl_sched_graph *graph)
4577 int n;
4579 n = add_graph_intra_ilp_regions(ctx, graph);
4580 if (n < 0)
4581 return -1;
4582 n = add_graph_inter_ilp_regions(ctx, graph, n);
4584 return n;
4587 /* Does this region try to fix the schedule row to be equal
4588 * to a row of the inner part of the corresponding
4589 * intra-statement consecutivity constraint?
4590 * In particular, was such a constraint imposed and
4591 * is this the region that imposes the constraint
4592 * (rather than any of the potential other disjuncts
4593 * corresponding to the same intra-statement consecutivity constraint)?
4594 * Note that the other disjuncts express either a pure linear combination
4595 * (with a zero fixed_val) or a (pure) linear independence.
4596 * The "user" field was set by finish_intra.
4597 * set_equal puts the row with the non-zero fixed_val in the first position.
4599 static isl_bool fixes_inner(struct isl_ilp_region *region)
4601 struct isl_sched_intra *intra = region->user;
4603 if (intra->state != isl_sched_intra_inner)
4604 return isl_bool_false;
4605 if (!region->has_fixed)
4606 return isl_bool_false;
4607 return isl_bool_not(isl_vec_element_is_zero(region->fixed_val, 0));
4610 /* Extract the subsequence of elements of length "len" starting at "pos"
4611 * from "v".
4613 static __isl_give isl_vec *extract(__isl_keep isl_vec *v, int pos, int len)
4615 isl_ctx *ctx;
4616 isl_vec *r;
4618 if (!v)
4619 return NULL;
4620 ctx = isl_vec_get_ctx(v);
4621 r = isl_vec_alloc(ctx, len);
4622 if (!r)
4623 return NULL;
4624 isl_seq_cpy(r->el, v->el + pos, len);
4625 return r;
4628 /* Given a region that tries to fix the schedule row to be equal
4629 * to a row of the inner part of the corresponding
4630 * intra-statement consecutivity constraint, does "sol"
4631 * satisfy this constraint?
4633 * By definition, such a region imposes some linear combinations
4634 * that need to be equal to some fixed values.
4635 * Note that "sol" is the solution to an ILP problem,
4636 * so the denominator is always 1.
4638 static isl_bool is_fixed(struct isl_ilp_region *region, __isl_keep isl_vec *sol)
4640 int len;
4641 isl_bool fixed;
4642 isl_vec *v;
4644 len = isl_mat_cols(region->fixed);
4645 v = extract(sol, 1 + region->pos, len);
4647 v = isl_mat_vec_product(isl_mat_copy(region->fixed), v);
4648 fixed = isl_vec_is_equal(v, region->fixed_val);
4649 isl_vec_free(v);
4651 return fixed;
4654 /* Update the information on the intra-statement consecutivity constraint
4655 * associated to ILP region "pos" in "graph" based on the region itself and
4656 * on the (non-empty) ILP solution "sol".
4658 * If the entire disjunctive constraint associated to
4659 * an intra-statement consecutivity constraint could not be imposed
4660 * (as witnessed by the region corresponding to the last disjunct
4661 * being marked failed), then mark the intra-statement consecutivity constraint
4662 * as failed.
4663 * Otherwise, if the region was meant to fix the schedule row to be equal
4664 * to a row of the inner part of the corresponding
4665 * intra-statement consecutivity constraint and if this succeeded,
4666 * then update the number of fixed rows of the
4667 * intra-statement consecutivity constraint.
4668 * If this is the first such row, then also keep track of its position.
4670 * Note that the "user" field of the region was set by finish_intra.
4672 static isl_stat update_intra(struct isl_sched_graph *graph, int pos,
4673 __isl_keep isl_vec *sol)
4675 struct isl_ilp_region *region = &graph->region[pos];
4676 struct isl_sched_intra *intra = region->user;
4677 isl_bool fixes;
4679 if (region->failed) {
4680 intra->n_fixed = -1;
4681 return isl_stat_ok;
4684 fixes = fixes_inner(region);
4685 if (fixes < 0)
4686 return isl_stat_error;
4687 if (!fixes)
4688 return isl_stat_ok;
4690 fixes = is_fixed(region, sol);
4691 if (fixes < 0)
4692 return isl_stat_error;
4693 if (!fixes)
4694 return isl_stat_ok;
4696 if (intra->n_fixed == 0)
4697 intra->first_fixed = graph->n_total_row;
4698 intra->n_fixed++;
4700 return isl_stat_ok;
4703 /* Update the information on the inter-statement consecutivity constraint
4704 * associated to ILP region "pos" in "graph" based on the region itself and
4705 * on the (non-empty) ILP solution "sol".
4707 * Check if the region was marked failed and, if so,
4708 * mark the corresponding edge as failed.
4710 * Note that the "user" field of the region was set by set_global_eq.
4712 static isl_stat update_inter(struct isl_sched_graph *graph, int pos)
4714 struct isl_ilp_region *region = &graph->region[pos];
4715 struct isl_sched_edge *edge = region->user;
4717 if (region->failed)
4718 edge->state = isl_sched_inter_failed;
4720 return isl_stat_ok;
4723 /* Update the information on the intra-statement or inter-statement
4724 * consecutivity constraint
4725 * associated to ILP region "pos" in "graph" based on the region itself and
4726 * on the ILP solution "sol".
4728 * If no solution was computed, then no useful information can be extracted
4729 * from the region.
4731 * The "user" field points to either an isl_sched_intra object
4732 * (set by finish_intra) or an isl_sched_edge object
4733 * (set by set_global_eq).
4734 * Use the field to determine whether the region corresponds
4735 * to an intra-statement on inter-statement consecutivity constraint and
4736 * handle it accordingly.
4738 static isl_stat update_consecutivity(struct isl_sched_graph *graph, int pos,
4739 __isl_keep isl_vec *sol)
4741 struct isl_ilp_region *region = &graph->region[pos];
4743 if (!sol)
4744 return isl_stat_error;
4745 if (sol->size == 0)
4746 return isl_stat_ok;
4747 if (!region->user)
4748 return isl_stat_ok;
4750 if (is_edge(graph, region->user))
4751 return update_inter(graph, pos);
4752 else
4753 return update_intra(graph, pos, sol);
4757 /* Solve the ILP problem constructed in setup_lp.
4758 * First construct one or more ILP regions for each active intra-statement
4759 * consecutivity constraint. These (optional) regions try to
4760 * make progress in achieving consecutivity.
4761 * Next, for each node such that all the remaining rows of its schedule
4762 * need to be non-trivial, construct a region with a non-zero constraint.
4763 * This region imposes that the next row is independent of previous rows,
4764 * by enforcing that at least
4765 * one of the linear combinations in the rows of node->indep is non-zero.
4766 * The ILP regions corresponding to intra-statement and inter-statement
4767 * consecutivity constraints are added first to allow more freedom for them
4768 * to be satisfied.
4770 * After a solution has been computed, update the information
4771 * on intra-statement consecutivity constraints based on
4772 * the solution and on failed optional regions.
4774 static __isl_give isl_vec *solve_lp(isl_ctx *ctx, struct isl_sched_graph *graph)
4776 int i, n;
4777 isl_vec *sol;
4778 isl_basic_set *lp;
4780 n = add_graph_consecutivity_ilp_regions(ctx, graph);
4781 if (n < 0)
4782 return NULL;
4783 if (graph_extend_region(ctx, graph, n + graph->n) < 0)
4784 goto error;
4785 for (i = 0; i < graph->n; ++i) {
4786 struct isl_sched_node *node = &graph->node[i];
4788 graph->region[n + i].pos = node_var_coef_offset(node);
4789 graph->region[n + i].has_non_zero = needs_row(graph, node);
4790 if (!graph->region[n + i].has_non_zero)
4791 continue;
4792 graph->region[n + i].non_zero = linear_to_lp(node->indep);
4794 lp = isl_basic_set_copy(graph->lp);
4795 sol = isl_tab_basic_set_constrained_lexmin(lp, 2, n + graph->n,
4796 graph->region, &check_conflict, graph);
4797 for (i = 0; i < n; ++i)
4798 if (update_consecutivity(graph, i, sol) < 0)
4799 return isl_vec_free(sol);
4800 clear_regions(graph, n + graph->n);
4801 return sol;
4802 error:
4803 clear_regions(graph, n + graph->n);
4804 return NULL;
4807 /* Extract the coefficients for the variables of "node" from "sol".
4809 * Each schedule coefficient c_i_x is represented as the difference
4810 * between two non-negative variables c_i_x^+ - c_i_x^-.
4811 * The c_i_x^- appear before their c_i_x^+ counterpart.
4812 * Furthermore, the order of these pairs is the opposite of that
4813 * of the corresponding coefficients.
4815 * Return c_i_x = c_i_x^+ - c_i_x^-
4817 static __isl_give isl_vec *extract_var_coef(struct isl_sched_node *node,
4818 __isl_keep isl_vec *sol)
4820 int i;
4821 int pos;
4822 isl_vec *csol;
4824 if (!sol)
4825 return NULL;
4826 csol = isl_vec_alloc(isl_vec_get_ctx(sol), node->nvar);
4827 if (!csol)
4828 return NULL;
4830 pos = 1 + node_var_coef_offset(node);
4831 for (i = 0; i < node->nvar; ++i)
4832 isl_int_sub(csol->el[node->nvar - 1 - i],
4833 sol->el[pos + 2 * i + 1], sol->el[pos + 2 * i]);
4835 return csol;
4838 /* Update the schedules of all nodes based on the given solution
4839 * of the LP problem.
4840 * The new row is added to the current band.
4841 * All possibly negative coefficients are encoded as a difference
4842 * of two non-negative variables, so we need to perform the subtraction
4843 * here.
4845 * If coincident is set, then the caller guarantees that the new
4846 * row satisfies the coincidence constraints.
4848 static int update_schedule(struct isl_sched_graph *graph,
4849 __isl_take isl_vec *sol, int coincident)
4851 int i, j;
4852 isl_vec *csol = NULL;
4854 if (!sol)
4855 goto error;
4856 if (sol->size == 0)
4857 isl_die(sol->ctx, isl_error_internal,
4858 "no solution found", goto error);
4859 if (graph->n_total_row >= graph->max_row)
4860 isl_die(sol->ctx, isl_error_internal,
4861 "too many schedule rows", goto error);
4863 for (i = 0; i < graph->n; ++i) {
4864 struct isl_sched_node *node = &graph->node[i];
4865 int pos;
4866 int row = isl_mat_rows(node->sched);
4868 isl_vec_free(csol);
4869 csol = extract_var_coef(node, sol);
4870 if (!csol)
4871 goto error;
4873 isl_map_free(node->band_sched);
4874 node->band_sched = NULL;
4875 node->sched = isl_mat_add_rows(node->sched, 1);
4876 if (!node->sched)
4877 goto error;
4878 pos = node_cst_coef_offset(node);
4879 node->sched = isl_mat_set_element(node->sched,
4880 row, 0, sol->el[1 + pos]);
4881 pos = node_par_coef_offset(node);
4882 for (j = 0; j < node->nparam; ++j)
4883 node->sched = isl_mat_set_element(node->sched,
4884 row, 1 + j, sol->el[1 + pos + j]);
4885 for (j = 0; j < node->nvar; ++j)
4886 node->sched = isl_mat_set_element(node->sched,
4887 row, 1 + node->nparam + j, csol->el[j]);
4888 node->coincident[graph->n_total_row] = coincident;
4890 isl_vec_free(sol);
4891 isl_vec_free(csol);
4893 graph->n_row++;
4894 graph->n_total_row++;
4896 return 0;
4897 error:
4898 isl_vec_free(sol);
4899 isl_vec_free(csol);
4900 return -1;
4903 /* Convert row "row" of node->sched into an isl_aff living in "ls"
4904 * and return this isl_aff.
4906 static __isl_give isl_aff *extract_schedule_row(__isl_take isl_local_space *ls,
4907 struct isl_sched_node *node, int row)
4909 int j;
4910 isl_int v;
4911 isl_aff *aff;
4913 isl_int_init(v);
4915 aff = isl_aff_zero_on_domain(ls);
4916 if (isl_mat_get_element(node->sched, row, 0, &v) < 0)
4917 goto error;
4918 aff = isl_aff_set_constant(aff, v);
4919 for (j = 0; j < node->nparam; ++j) {
4920 if (isl_mat_get_element(node->sched, row, 1 + j, &v) < 0)
4921 goto error;
4922 aff = isl_aff_set_coefficient(aff, isl_dim_param, j, v);
4924 for (j = 0; j < node->nvar; ++j) {
4925 if (isl_mat_get_element(node->sched, row,
4926 1 + node->nparam + j, &v) < 0)
4927 goto error;
4928 aff = isl_aff_set_coefficient(aff, isl_dim_in, j, v);
4931 isl_int_clear(v);
4933 return aff;
4934 error:
4935 isl_int_clear(v);
4936 isl_aff_free(aff);
4937 return NULL;
4940 /* Convert the "n" rows starting at "first" of node->sched into a multi_aff
4941 * and return this multi_aff.
4943 * The result is defined over the uncompressed node domain.
4945 static __isl_give isl_multi_aff *node_extract_partial_schedule_multi_aff(
4946 struct isl_sched_node *node, int first, int n)
4948 int i;
4949 isl_space *space;
4950 isl_local_space *ls;
4951 isl_aff *aff;
4952 isl_multi_aff *ma;
4953 int nrow;
4955 if (!node)
4956 return NULL;
4957 nrow = isl_mat_rows(node->sched);
4958 if (node->compressed)
4959 space = isl_multi_aff_get_domain_space(node->decompress);
4960 else
4961 space = isl_space_copy(node->space);
4962 ls = isl_local_space_from_space(isl_space_copy(space));
4963 space = isl_space_from_domain(space);
4964 space = isl_space_add_dims(space, isl_dim_out, n);
4965 ma = isl_multi_aff_zero(space);
4967 for (i = first; i < first + n; ++i) {
4968 aff = extract_schedule_row(isl_local_space_copy(ls), node, i);
4969 ma = isl_multi_aff_set_aff(ma, i - first, aff);
4972 isl_local_space_free(ls);
4974 if (node->compressed)
4975 ma = isl_multi_aff_pullback_multi_aff(ma,
4976 isl_multi_aff_copy(node->compress));
4978 return ma;
4981 /* Convert the part of node->sched that corresponds to the current band
4982 * into a multi_aff and return this multi_aff.
4984 * The result is defined over the uncompressed node domain.
4986 static __isl_give isl_multi_aff *node_extract_band_schedule_multi_aff(
4987 struct isl_sched_graph *graph, struct isl_sched_node *node)
4989 int nrow;
4990 int start;
4992 start = graph->band_start;
4993 nrow = isl_mat_rows(node->sched) - start;
4994 return node_extract_partial_schedule_multi_aff(node, start, nrow);
4997 /* Convert the part of node->sched that corresponds to the current band
4998 * into a map and return this map.
5000 * The result is cached in node->band_sched, which needs to be released
5001 * whenever node->sched is updated.
5002 * It is defined over the uncompressed node domain.
5004 static __isl_give isl_map *node_extract_band_schedule(
5005 struct isl_sched_graph *graph, struct isl_sched_node *node)
5007 if (!node->band_sched) {
5008 isl_multi_aff *ma;
5010 ma = node_extract_band_schedule_multi_aff(graph, node);
5011 node->band_sched = isl_map_from_multi_aff(ma);
5014 return isl_map_copy(node->band_sched);
5017 /* Construct a map that can be used to update a dependence relation
5018 * based on the current band schedule.
5019 * That is, construct a map expressing that source and sink
5020 * are executed within the same iteration of the current band.
5021 * This map can then be intersected with the dependence relation.
5022 * This is not the most efficient way, but this shouldn't be a critical
5023 * operation.
5025 static __isl_give isl_map *specializer(struct isl_sched_graph *graph,
5026 struct isl_sched_node *src, struct isl_sched_node *dst)
5028 isl_map *src_sched, *dst_sched;
5030 src_sched = node_extract_band_schedule(graph, src);
5031 dst_sched = node_extract_band_schedule(graph, dst);
5032 return isl_map_apply_range(src_sched, isl_map_reverse(dst_sched));
5035 /* Intersect the domains of the nested relations in domain and range
5036 * of "umap" with "map".
5038 static __isl_give isl_union_map *intersect_domains(
5039 __isl_take isl_union_map *umap, __isl_keep isl_map *map)
5041 isl_union_set *uset;
5043 umap = isl_union_map_zip(umap);
5044 uset = isl_union_set_from_set(isl_map_wrap(isl_map_copy(map)));
5045 umap = isl_union_map_intersect_domain(umap, uset);
5046 umap = isl_union_map_zip(umap);
5047 return umap;
5050 /* Update the dependence relation of the given edge based
5051 * on the current band schedule.
5052 * If the dependence is carried completely by the current band, then
5053 * it is removed from the edge_tables. It is kept in the list of edges
5054 * as otherwise all edge_tables would have to be recomputed.
5056 * If the edge is of a type that can appear multiple times
5057 * between the same pair of nodes, then it is added to
5058 * the edge table (again). This prevents the situation
5059 * where none of these edges is referenced from the edge table
5060 * because the one that was referenced turned out to be empty and
5061 * was therefore removed from the table.
5063 * If the edge is marked failed or completely handled, then it is
5064 * (only) a consecutivity edge and it can be removed from consideration
5065 * without even updating the dependence relation.
5066 * Other types of edges have the default (isl_sched_inter_init) value
5067 * for this field.
5069 static isl_stat update_edge(isl_ctx *ctx, struct isl_sched_graph *graph,
5070 struct isl_sched_edge *edge)
5072 int empty;
5073 isl_map *id;
5075 if (edge->state == isl_sched_inter_failed)
5076 return clear_edge(graph, edge);
5077 if (edge->state == isl_sched_inter_free)
5078 return clear_edge(graph, edge);
5080 id = specializer(graph, edge->src, edge->dst);
5081 edge->map = isl_map_intersect(edge->map, isl_map_copy(id));
5082 if (!edge->map)
5083 goto error;
5085 if (edge->tagged_condition) {
5086 edge->tagged_condition =
5087 intersect_domains(edge->tagged_condition, id);
5088 if (!edge->tagged_condition)
5089 goto error;
5091 if (edge->tagged_validity) {
5092 edge->tagged_validity =
5093 intersect_domains(edge->tagged_validity, id);
5094 if (!edge->tagged_validity)
5095 goto error;
5098 empty = isl_map_plain_is_empty(edge->map);
5099 if (empty < 0)
5100 goto error;
5101 if (empty) {
5102 graph_remove_edge(graph, edge);
5103 } else if (is_multi_edge_type(edge)) {
5104 if (graph_edge_tables_add(ctx, graph, edge) < 0)
5105 goto error;
5108 isl_map_free(id);
5109 return isl_stat_ok;
5110 error:
5111 isl_map_free(id);
5112 return isl_stat_error;
5115 /* Does the domain of "umap" intersect "uset"?
5117 static int domain_intersects(__isl_keep isl_union_map *umap,
5118 __isl_keep isl_union_set *uset)
5120 int empty;
5122 umap = isl_union_map_copy(umap);
5123 umap = isl_union_map_intersect_domain(umap, isl_union_set_copy(uset));
5124 empty = isl_union_map_is_empty(umap);
5125 isl_union_map_free(umap);
5127 return empty < 0 ? -1 : !empty;
5130 /* Does the range of "umap" intersect "uset"?
5132 static int range_intersects(__isl_keep isl_union_map *umap,
5133 __isl_keep isl_union_set *uset)
5135 int empty;
5137 umap = isl_union_map_copy(umap);
5138 umap = isl_union_map_intersect_range(umap, isl_union_set_copy(uset));
5139 empty = isl_union_map_is_empty(umap);
5140 isl_union_map_free(umap);
5142 return empty < 0 ? -1 : !empty;
5145 /* Are the condition dependences of "edge" local with respect to
5146 * the current band schedule?
5148 * That is, are domain and range of the condition dependences mapped
5149 * to the same point?
5151 * In other words, is the condition false?
5153 static int is_condition_false(struct isl_sched_graph *graph,
5154 struct isl_sched_edge *edge)
5156 isl_union_map *umap;
5157 isl_map *map, *sched, *test;
5158 int empty, local;
5160 empty = isl_union_map_is_empty(edge->tagged_condition);
5161 if (empty < 0 || empty)
5162 return empty;
5164 umap = isl_union_map_copy(edge->tagged_condition);
5165 umap = isl_union_map_zip(umap);
5166 umap = isl_union_set_unwrap(isl_union_map_domain(umap));
5167 map = isl_map_from_union_map(umap);
5169 sched = node_extract_band_schedule(graph, edge->src);
5170 map = isl_map_apply_domain(map, sched);
5171 sched = node_extract_band_schedule(graph, edge->dst);
5172 map = isl_map_apply_range(map, sched);
5174 test = isl_map_identity(isl_map_get_space(map));
5175 local = isl_map_is_subset(map, test);
5176 isl_map_free(map);
5177 isl_map_free(test);
5179 return local;
5182 /* For each conditional validity constraint that is adjacent
5183 * to a condition with domain in condition_source or range in condition_sink,
5184 * turn it into an unconditional validity constraint.
5186 static int unconditionalize_adjacent_validity(struct isl_sched_graph *graph,
5187 __isl_take isl_union_set *condition_source,
5188 __isl_take isl_union_set *condition_sink)
5190 int i;
5192 condition_source = isl_union_set_coalesce(condition_source);
5193 condition_sink = isl_union_set_coalesce(condition_sink);
5195 for (i = 0; i < graph->n_edge; ++i) {
5196 int adjacent;
5197 isl_union_map *validity;
5199 if (!is_conditional_validity(&graph->edge[i]))
5200 continue;
5201 if (is_validity(&graph->edge[i]))
5202 continue;
5204 validity = graph->edge[i].tagged_validity;
5205 adjacent = domain_intersects(validity, condition_sink);
5206 if (adjacent >= 0 && !adjacent)
5207 adjacent = range_intersects(validity, condition_source);
5208 if (adjacent < 0)
5209 goto error;
5210 if (!adjacent)
5211 continue;
5213 set_validity(&graph->edge[i]);
5216 isl_union_set_free(condition_source);
5217 isl_union_set_free(condition_sink);
5218 return 0;
5219 error:
5220 isl_union_set_free(condition_source);
5221 isl_union_set_free(condition_sink);
5222 return -1;
5225 /* Update the dependence relations of all edges based on the current band
5226 * schedule and enforce conditional validity constraints that are adjacent
5227 * to satisfied condition constraints.
5229 * First check if any of the condition constraints are satisfied
5230 * (i.e., not local to the outer schedule) and keep track of
5231 * their domain and range.
5232 * Then update all dependence relations (which removes the non-local
5233 * constraints).
5234 * Finally, if any condition constraints turned out to be satisfied,
5235 * then turn all adjacent conditional validity constraints into
5236 * unconditional validity constraints.
5238 static int update_edges(isl_ctx *ctx, struct isl_sched_graph *graph)
5240 int i;
5241 int any = 0;
5242 isl_union_set *source, *sink;
5244 source = isl_union_set_empty(isl_space_params_alloc(ctx, 0));
5245 sink = isl_union_set_empty(isl_space_params_alloc(ctx, 0));
5246 for (i = 0; i < graph->n_edge; ++i) {
5247 int local;
5248 isl_union_set *uset;
5249 isl_union_map *umap;
5251 if (!is_condition(&graph->edge[i]))
5252 continue;
5253 if (is_local(&graph->edge[i]))
5254 continue;
5255 local = is_condition_false(graph, &graph->edge[i]);
5256 if (local < 0)
5257 goto error;
5258 if (local)
5259 continue;
5261 any = 1;
5263 umap = isl_union_map_copy(graph->edge[i].tagged_condition);
5264 uset = isl_union_map_domain(umap);
5265 source = isl_union_set_union(source, uset);
5267 umap = isl_union_map_copy(graph->edge[i].tagged_condition);
5268 uset = isl_union_map_range(umap);
5269 sink = isl_union_set_union(sink, uset);
5272 for (i = 0; i < graph->n_edge; ++i) {
5273 if (update_edge(ctx, graph, &graph->edge[i]) < 0)
5274 goto error;
5277 if (any)
5278 return unconditionalize_adjacent_validity(graph, source, sink);
5280 isl_union_set_free(source);
5281 isl_union_set_free(sink);
5282 return 0;
5283 error:
5284 isl_union_set_free(source);
5285 isl_union_set_free(sink);
5286 return -1;
5289 /* Initiate a new band by recording the starting position of the new band and
5290 * by keeping track of the number of inner rows already taken into account for
5291 * all intra-statement consecutivity constraints at this point and
5292 * the states of the all inter-statement consecutivity constraints
5293 * such that they can be reset when the band gets discarded
5294 * in reset_band.
5296 static void next_band(struct isl_sched_graph *graph)
5298 int i;
5300 graph->band_start = graph->n_total_row;
5302 for (i = 0; i < graph->n; ++i) {
5303 struct isl_sched_node *node = &graph->node[i];
5304 struct isl_sched_intra *intra;
5306 for (intra = node->intra; intra; intra = intra->next)
5307 intra->band_n_fixed = intra->n_fixed;
5309 for (i = 0; i < graph->n_edge; ++i)
5310 graph->edge[i].band_state = graph->edge[i].state;
5313 /* Return the union of the universe domains of the nodes in "graph"
5314 * that satisfy "pred".
5316 static __isl_give isl_union_set *isl_sched_graph_domain(isl_ctx *ctx,
5317 struct isl_sched_graph *graph,
5318 int (*pred)(struct isl_sched_node *node, int data), int data)
5320 int i;
5321 isl_set *set;
5322 isl_union_set *dom;
5324 for (i = 0; i < graph->n; ++i)
5325 if (pred(&graph->node[i], data))
5326 break;
5328 if (i >= graph->n)
5329 isl_die(ctx, isl_error_internal,
5330 "empty component", return NULL);
5332 set = isl_set_universe(isl_space_copy(graph->node[i].space));
5333 dom = isl_union_set_from_set(set);
5335 for (i = i + 1; i < graph->n; ++i) {
5336 if (!pred(&graph->node[i], data))
5337 continue;
5338 set = isl_set_universe(isl_space_copy(graph->node[i].space));
5339 dom = isl_union_set_union(dom, isl_union_set_from_set(set));
5342 return dom;
5345 /* Return a list of unions of universe domains, where each element
5346 * in the list corresponds to an SCC (or WCC) indexed by node->scc.
5348 static __isl_give isl_union_set_list *extract_sccs(isl_ctx *ctx,
5349 struct isl_sched_graph *graph)
5351 int i;
5352 isl_union_set_list *filters;
5354 filters = isl_union_set_list_alloc(ctx, graph->scc);
5355 for (i = 0; i < graph->scc; ++i) {
5356 isl_union_set *dom;
5358 dom = isl_sched_graph_domain(ctx, graph, &node_scc_exactly, i);
5359 filters = isl_union_set_list_add(filters, dom);
5362 return filters;
5365 /* Return a list of two unions of universe domains, one for the SCCs up
5366 * to and including graph->src_scc and another for the other SCCs.
5368 static __isl_give isl_union_set_list *extract_split(isl_ctx *ctx,
5369 struct isl_sched_graph *graph)
5371 isl_union_set *dom;
5372 isl_union_set_list *filters;
5374 filters = isl_union_set_list_alloc(ctx, 2);
5375 dom = isl_sched_graph_domain(ctx, graph,
5376 &node_scc_at_most, graph->src_scc);
5377 filters = isl_union_set_list_add(filters, dom);
5378 dom = isl_sched_graph_domain(ctx, graph,
5379 &node_scc_at_least, graph->src_scc + 1);
5380 filters = isl_union_set_list_add(filters, dom);
5382 return filters;
5385 /* Copy nodes that satisfy node_pred from the src dependence graph
5386 * to the dst dependence graph.
5388 * The subgraph into which the nodes are copied will be used
5389 * to create a new band, so the cached value of the current
5390 * band schedule does not need to be copied.
5392 static isl_stat copy_nodes(struct isl_sched_graph *dst,
5393 struct isl_sched_graph *src,
5394 int (*node_pred)(struct isl_sched_node *node, int data), int data)
5396 int i;
5398 dst->n = 0;
5399 for (i = 0; i < src->n; ++i) {
5400 int j;
5402 if (!node_pred(&src->node[i], data))
5403 continue;
5405 j = dst->n;
5406 dst->node[j].space = isl_space_copy(src->node[i].space);
5407 dst->node[j].compressed = src->node[i].compressed;
5408 dst->node[j].hull = isl_set_copy(src->node[i].hull);
5409 dst->node[j].compress =
5410 isl_multi_aff_copy(src->node[i].compress);
5411 dst->node[j].decompress =
5412 isl_multi_aff_copy(src->node[i].decompress);
5413 dst->node[j].nvar = src->node[i].nvar;
5414 dst->node[j].nparam = src->node[i].nparam;
5415 dst->node[j].sched = isl_mat_copy(src->node[i].sched);
5416 dst->node[j].band_sched = NULL;
5417 dst->node[j].coincident = src->node[i].coincident;
5418 dst->node[j].sizes = isl_multi_val_copy(src->node[i].sizes);
5419 dst->node[j].bounds = isl_basic_set_copy(src->node[i].bounds);
5420 dst->node[j].max = isl_vec_copy(src->node[i].max);
5421 dst->node[j].intra = src->node[i].intra;
5422 dst->n++;
5424 if (!dst->node[j].space || !dst->node[j].sched)
5425 return isl_stat_error;
5426 if (dst->node[j].compressed &&
5427 (!dst->node[j].hull || !dst->node[j].compress ||
5428 !dst->node[j].decompress))
5429 return isl_stat_error;
5432 return isl_stat_ok;
5435 /* Copy non-empty edges that satisfy edge_pred from the src dependence graph
5436 * to the dst dependence graph.
5437 * If the source or destination node of the edge is not in the destination
5438 * graph, then it must be a backward proximity edge and it should simply
5439 * be ignored.
5440 * Note that the intra-statement consecutivity constraints are
5441 * shared between the nodes of "src" and "dst".
5442 * If an edge references any intra-statement consecutivity constraints,
5443 * they can therefore simply be copied.
5445 static isl_stat copy_edges(isl_ctx *ctx, struct isl_sched_graph *dst,
5446 struct isl_sched_graph *src,
5447 int (*edge_pred)(struct isl_sched_edge *edge, int data), int data)
5449 int i;
5451 dst->n_edge = 0;
5452 for (i = 0; i < src->n_edge; ++i) {
5453 struct isl_sched_edge *edge = &src->edge[i];
5454 isl_map *map;
5455 isl_union_map *tagged_condition;
5456 isl_union_map *tagged_validity;
5457 struct isl_sched_node *dst_src, *dst_dst;
5458 struct isl_sched_intra *dst_src_intra, *dst_dst_intra;
5460 if (!edge_pred(edge, data))
5461 continue;
5463 if (isl_map_plain_is_empty(edge->map))
5464 continue;
5466 dst_src = graph_find_node(ctx, dst, edge->src->space);
5467 dst_dst = graph_find_node(ctx, dst, edge->dst->space);
5468 if (!dst_src || !dst_dst)
5469 return isl_stat_error;
5470 if (!is_node(dst, dst_src) || !is_node(dst, dst_dst)) {
5471 if (is_validity(edge) || is_conditional_validity(edge))
5472 isl_die(ctx, isl_error_internal,
5473 "backward (conditional) validity edge",
5474 return isl_stat_error);
5475 continue;
5478 dst_src_intra = edge->src_intra;
5479 dst_dst_intra = edge->dst_intra;
5481 map = isl_map_copy(edge->map);
5482 tagged_condition = isl_union_map_copy(edge->tagged_condition);
5483 tagged_validity = isl_union_map_copy(edge->tagged_validity);
5485 dst->edge[dst->n_edge].src = dst_src;
5486 dst->edge[dst->n_edge].dst = dst_dst;
5487 dst->edge[dst->n_edge].src_intra = dst_src_intra;
5488 dst->edge[dst->n_edge].dst_intra = dst_dst_intra;
5489 dst->edge[dst->n_edge].map = map;
5490 dst->edge[dst->n_edge].tagged_condition = tagged_condition;
5491 dst->edge[dst->n_edge].tagged_validity = tagged_validity;
5492 dst->edge[dst->n_edge].types = edge->types;
5493 dst->n_edge++;
5495 if (edge->tagged_condition && !tagged_condition)
5496 return isl_stat_error;
5497 if (edge->tagged_validity && !tagged_validity)
5498 return isl_stat_error;
5500 if (graph_edge_tables_add(ctx, dst,
5501 &dst->edge[dst->n_edge - 1]) < 0)
5502 return isl_stat_error;
5505 return isl_stat_ok;
5508 /* Compute the maximal number of variables over all nodes.
5509 * This is the maximal number of linearly independent schedule
5510 * rows that we need to compute.
5511 * Just in case we end up in a part of the dependence graph
5512 * with only lower-dimensional domains, we make sure we will
5513 * compute the required amount of extra linearly independent rows.
5515 static int compute_maxvar(struct isl_sched_graph *graph)
5517 int i;
5519 graph->maxvar = 0;
5520 for (i = 0; i < graph->n; ++i) {
5521 struct isl_sched_node *node = &graph->node[i];
5522 int nvar;
5524 if (node_update_vmap(node) < 0)
5525 return -1;
5526 nvar = node->nvar + graph->n_row - node->rank;
5527 if (nvar > graph->maxvar)
5528 graph->maxvar = nvar;
5531 return 0;
5534 /* Count the number of (active) intra-statement consecutivity constraints
5535 * associated to "node".
5537 static int node_n_intra(struct isl_sched_node *node)
5539 int n;
5540 struct isl_sched_intra *intra;
5542 n = 0;
5543 for (intra = node->intra; intra; intra = intra->next)
5544 if (intra->n_fixed >= 0)
5545 ++n;
5547 return n;
5550 /* Extract the subgraph of "graph" that consists of the nodes satisfying
5551 * "node_pred" and the edges satisfying "edge_pred" and store
5552 * the result in "sub".
5554 static isl_stat extract_sub_graph(isl_ctx *ctx, struct isl_sched_graph *graph,
5555 int (*node_pred)(struct isl_sched_node *node, int data),
5556 int (*edge_pred)(struct isl_sched_edge *edge, int data),
5557 int data, struct isl_sched_graph *sub)
5559 int i, n = 0, n_edge = 0, n_intra = 0, n_inter = 0;
5560 int t;
5562 for (i = 0; i < graph->n; ++i)
5563 if (node_pred(&graph->node[i], data)) {
5564 ++n;
5565 n_intra += node_n_intra(&graph->node[i]);
5567 for (i = 0; i < graph->n_edge; ++i)
5568 if (edge_pred(&graph->edge[i], data)) {
5569 ++n_edge;
5570 if (is_consecutivity(&graph->edge[i]))
5571 ++n_inter;
5573 if (graph_alloc(ctx, sub, n, n_edge, n_intra + n_inter) < 0)
5574 return isl_stat_error;
5575 sub->root = graph->root;
5576 if (copy_nodes(sub, graph, node_pred, data) < 0)
5577 return isl_stat_error;
5578 if (graph_init_table(ctx, sub) < 0)
5579 return isl_stat_error;
5580 for (t = 0; t <= isl_edge_last_table; ++t)
5581 sub->max_edge[t] = graph->max_edge[t];
5582 if (graph_init_edge_tables(ctx, sub) < 0)
5583 return isl_stat_error;
5584 if (copy_edges(ctx, sub, graph, edge_pred, data) < 0)
5585 return isl_stat_error;
5586 sub->n_row = graph->n_row;
5587 sub->max_row = graph->max_row;
5588 sub->n_total_row = graph->n_total_row;
5589 sub->band_start = graph->band_start;
5591 return isl_stat_ok;
5594 static __isl_give isl_schedule_node *compute_schedule(isl_schedule_node *node,
5595 struct isl_sched_graph *graph);
5596 static __isl_give isl_schedule_node *compute_schedule_wcc(
5597 isl_schedule_node *node, struct isl_sched_graph *graph);
5599 /* Compute a schedule for a subgraph of "graph". In particular, for
5600 * the graph composed of nodes that satisfy node_pred and edges that
5601 * that satisfy edge_pred.
5602 * If the subgraph is known to consist of a single component, then wcc should
5603 * be set and then we call compute_schedule_wcc on the constructed subgraph.
5604 * Otherwise, we call compute_schedule, which will check whether the subgraph
5605 * is connected.
5607 * The schedule is inserted at "node" and the updated schedule node
5608 * is returned.
5610 static __isl_give isl_schedule_node *compute_sub_schedule(
5611 __isl_take isl_schedule_node *node, isl_ctx *ctx,
5612 struct isl_sched_graph *graph,
5613 int (*node_pred)(struct isl_sched_node *node, int data),
5614 int (*edge_pred)(struct isl_sched_edge *edge, int data),
5615 int data, int wcc)
5617 struct isl_sched_graph split = { 0 };
5619 if (extract_sub_graph(ctx, graph, node_pred, edge_pred, data,
5620 &split) < 0)
5621 goto error;
5623 if (wcc)
5624 node = compute_schedule_wcc(node, &split);
5625 else
5626 node = compute_schedule(node, &split);
5628 graph_free(ctx, &split);
5629 return node;
5630 error:
5631 graph_free(ctx, &split);
5632 return isl_schedule_node_free(node);
5635 static int edge_scc_exactly(struct isl_sched_edge *edge, int scc)
5637 return edge->src->scc == scc && edge->dst->scc == scc;
5640 static int edge_dst_scc_at_most(struct isl_sched_edge *edge, int scc)
5642 return edge->dst->scc <= scc;
5645 static int edge_src_scc_at_least(struct isl_sched_edge *edge, int scc)
5647 return edge->src->scc >= scc;
5650 /* Reset the current band by dropping all its schedule rows and
5651 * resetting the number of inner rows of
5652 * the intra-statement consecutivity constraints already taken into account
5653 * to their original values at the start of the band.
5654 * The states of the inter-statement consecutivity constraints
5655 * are also reset to their values at the start of the band.
5657 static isl_stat reset_band(struct isl_sched_graph *graph)
5659 int i;
5660 int drop;
5662 drop = graph->n_total_row - graph->band_start;
5663 graph->n_total_row -= drop;
5664 graph->n_row -= drop;
5666 for (i = 0; i < graph->n; ++i) {
5667 struct isl_sched_node *node = &graph->node[i];
5668 struct isl_sched_intra *intra;
5670 for (intra = node->intra; intra; intra = intra->next)
5671 intra->n_fixed = intra->band_n_fixed;
5673 isl_map_free(node->band_sched);
5674 node->band_sched = NULL;
5676 node->sched = isl_mat_drop_rows(node->sched,
5677 graph->band_start, drop);
5679 if (!node->sched)
5680 return isl_stat_error;
5682 for (i = 0; i < graph->n_edge; ++i)
5683 graph->edge[i].state = graph->edge[i].band_state;
5685 return isl_stat_ok;
5688 /* Split the current graph into two parts and compute a schedule for each
5689 * part individually. In particular, one part consists of all SCCs up
5690 * to and including graph->src_scc, while the other part contains the other
5691 * SCCs. The split is enforced by a sequence node inserted at position "node"
5692 * in the schedule tree. Return the updated schedule node.
5693 * If either of these two parts consists of a sequence, then it is spliced
5694 * into the sequence containing the two parts.
5696 * The current band is reset. It would be possible to reuse
5697 * the previously computed rows as the first rows in the next
5698 * band, but recomputing them may result in better rows as we are looking
5699 * at a smaller part of the dependence graph.
5701 static __isl_give isl_schedule_node *compute_split_schedule(
5702 __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
5704 int is_seq;
5705 isl_ctx *ctx;
5706 isl_union_set_list *filters;
5708 if (!node)
5709 return NULL;
5711 if (reset_band(graph) < 0)
5712 return isl_schedule_node_free(node);
5714 next_band(graph);
5716 ctx = isl_schedule_node_get_ctx(node);
5717 filters = extract_split(ctx, graph);
5718 node = isl_schedule_node_insert_sequence(node, filters);
5719 node = isl_schedule_node_child(node, 1);
5720 node = isl_schedule_node_child(node, 0);
5722 node = compute_sub_schedule(node, ctx, graph,
5723 &node_scc_at_least, &edge_src_scc_at_least,
5724 graph->src_scc + 1, 0);
5725 is_seq = isl_schedule_node_get_type(node) == isl_schedule_node_sequence;
5726 node = isl_schedule_node_parent(node);
5727 node = isl_schedule_node_parent(node);
5728 if (is_seq)
5729 node = isl_schedule_node_sequence_splice_child(node, 1);
5730 node = isl_schedule_node_child(node, 0);
5731 node = isl_schedule_node_child(node, 0);
5732 node = compute_sub_schedule(node, ctx, graph,
5733 &node_scc_at_most, &edge_dst_scc_at_most,
5734 graph->src_scc, 0);
5735 is_seq = isl_schedule_node_get_type(node) == isl_schedule_node_sequence;
5736 node = isl_schedule_node_parent(node);
5737 node = isl_schedule_node_parent(node);
5738 if (is_seq)
5739 node = isl_schedule_node_sequence_splice_child(node, 0);
5741 return node;
5744 /* Insert a band node at position "node" in the schedule tree corresponding
5745 * to the current band in "graph". Mark the band node permutable
5746 * if "permutable" is set.
5747 * The partial schedules and the coincidence property are extracted
5748 * from the graph nodes.
5749 * Return the updated schedule node.
5751 static __isl_give isl_schedule_node *insert_current_band(
5752 __isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
5753 int permutable)
5755 int i;
5756 int start, end, n;
5757 isl_multi_aff *ma;
5758 isl_multi_pw_aff *mpa;
5759 isl_multi_union_pw_aff *mupa;
5761 if (!node)
5762 return NULL;
5764 if (graph->n < 1)
5765 isl_die(isl_schedule_node_get_ctx(node), isl_error_internal,
5766 "graph should have at least one node",
5767 return isl_schedule_node_free(node));
5769 start = graph->band_start;
5770 end = graph->n_total_row;
5771 n = end - start;
5773 ma = node_extract_partial_schedule_multi_aff(&graph->node[0], start, n);
5774 mpa = isl_multi_pw_aff_from_multi_aff(ma);
5775 mupa = isl_multi_union_pw_aff_from_multi_pw_aff(mpa);
5777 for (i = 1; i < graph->n; ++i) {
5778 isl_multi_union_pw_aff *mupa_i;
5780 ma = node_extract_partial_schedule_multi_aff(&graph->node[i],
5781 start, n);
5782 mpa = isl_multi_pw_aff_from_multi_aff(ma);
5783 mupa_i = isl_multi_union_pw_aff_from_multi_pw_aff(mpa);
5784 mupa = isl_multi_union_pw_aff_union_add(mupa, mupa_i);
5786 node = isl_schedule_node_insert_partial_schedule(node, mupa);
5788 for (i = 0; i < n; ++i)
5789 node = isl_schedule_node_band_member_set_coincident(node, i,
5790 graph->node[0].coincident[start + i]);
5791 node = isl_schedule_node_band_set_permutable(node, permutable);
5793 return node;
5796 /* Update the dependence relations based on the current schedule,
5797 * add the current band to "node" and then continue with the computation
5798 * of the next band.
5799 * Return the updated schedule node.
5801 static __isl_give isl_schedule_node *compute_next_band(
5802 __isl_take isl_schedule_node *node,
5803 struct isl_sched_graph *graph, int permutable)
5805 isl_ctx *ctx;
5807 if (!node)
5808 return NULL;
5810 ctx = isl_schedule_node_get_ctx(node);
5811 if (update_edges(ctx, graph) < 0)
5812 return isl_schedule_node_free(node);
5813 node = insert_current_band(node, graph, permutable);
5814 next_band(graph);
5816 node = isl_schedule_node_child(node, 0);
5817 node = compute_schedule(node, graph);
5818 node = isl_schedule_node_parent(node);
5820 return node;
5823 /* Add the constraints "coef" derived from an edge from "node" to itself
5824 * to graph->lp in order to respect the dependences and to try and carry them.
5825 * "pos" is the sequence number of the edge that needs to be carried.
5826 * "coef" represents general constraints on coefficients (c_0, c_x)
5827 * of valid constraints for (y - x) with x and y instances of the node.
5829 * The constraints added to graph->lp need to enforce
5831 * (c_j_0 + c_j_x y) - (c_j_0 + c_j_x x)
5832 * = c_j_x (y - x) >= e_i
5834 * for each (x,y) in the dependence relation of the edge.
5835 * That is, (-e_i, c_j_x) needs to be plugged in for (c_0, c_x),
5836 * taking into account that each coefficient in c_j_x is represented
5837 * as a pair of non-negative coefficients.
5839 static isl_stat add_intra_constraints(struct isl_sched_graph *graph,
5840 struct isl_sched_node *node, __isl_take isl_basic_set *coef, int pos)
5842 int offset;
5843 isl_ctx *ctx;
5844 isl_dim_map *dim_map;
5846 if (!coef)
5847 return isl_stat_error;
5849 ctx = isl_basic_set_get_ctx(coef);
5850 offset = coef_var_offset(coef);
5851 dim_map = intra_dim_map(ctx, graph, node, offset, 1);
5852 isl_dim_map_range(dim_map, 3 + pos, 0, 0, 0, 1, -1);
5853 graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
5855 return isl_stat_ok;
5858 /* Add the constraints "coef" derived from an edge from "src" to "dst"
5859 * to graph->lp in order to respect the dependences and to try and carry them.
5860 * "pos" is the sequence number of the edge that needs to be carried or
5861 * -1 if no attempt should be made to carry the dependences.
5862 * "coef" represents general constraints on coefficients (c_0, c_n, c_x, c_y)
5863 * of valid constraints for (x, y) with x and y instances of "src" and "dst".
5865 * The constraints added to graph->lp need to enforce
5867 * (c_k_0 + c_k_n n + c_k_x y) - (c_j_0 + c_j_n n + c_j_x x) >= e_i
5869 * for each (x,y) in the dependence relation of the edge or
5871 * (c_k_0 + c_k_n n + c_k_x y) - (c_j_0 + c_j_n n + c_j_x x) >= 0
5873 * if pos is -1.
5874 * That is,
5875 * (-e_i + c_k_0 - c_j_0, c_k_n - c_j_n, -c_j_x, c_k_x)
5876 * or
5877 * (c_k_0 - c_j_0, c_k_n - c_j_n, -c_j_x, c_k_x)
5878 * needs to be plugged in for (c_0, c_n, c_x, c_y),
5879 * taking into account that each coefficient in c_j_x and c_k_x is represented
5880 * as a pair of non-negative coefficients.
5882 static isl_stat add_inter_constraints(struct isl_sched_graph *graph,
5883 struct isl_sched_node *src, struct isl_sched_node *dst,
5884 __isl_take isl_basic_set *coef, int pos)
5886 int offset;
5887 isl_ctx *ctx;
5888 isl_dim_map *dim_map;
5890 if (!coef)
5891 return isl_stat_error;
5893 ctx = isl_basic_set_get_ctx(coef);
5894 offset = coef_var_offset(coef);
5895 dim_map = inter_dim_map(ctx, graph, src, dst, offset, 1);
5896 if (pos >= 0)
5897 isl_dim_map_range(dim_map, 3 + pos, 0, 0, 0, 1, -1);
5898 graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
5900 return isl_stat_ok;
5903 /* Data structure for keeping track of the data needed
5904 * to exploit non-trivial lineality spaces.
5906 * "any_non_trivial" is true if there are any non-trivial lineality spaces.
5907 * If "any_non_trivial" is not true, then "equivalent" and "mask" may be NULL.
5908 * "equivalent" connects instances to other instances on the same line(s).
5909 * "mask" contains the domain spaces of "equivalent".
5910 * Any instance set not in "mask" does not have a non-trivial lineality space.
5912 struct isl_exploit_lineality_data {
5913 isl_bool any_non_trivial;
5914 isl_union_map *equivalent;
5915 isl_union_set *mask;
5918 /* Data structure collecting information used during the construction
5919 * of an LP for carrying dependences.
5921 * "intra" is a sequence of coefficient constraints for intra-node edges.
5922 * "inter" is a sequence of coefficient constraints for inter-node edges.
5923 * "lineality" contains data used to exploit non-trivial lineality spaces.
5925 struct isl_carry {
5926 isl_basic_set_list *intra;
5927 isl_basic_set_list *inter;
5928 struct isl_exploit_lineality_data lineality;
5931 /* Free all the data stored in "carry".
5933 static void isl_carry_clear(struct isl_carry *carry)
5935 isl_basic_set_list_free(carry->intra);
5936 isl_basic_set_list_free(carry->inter);
5937 isl_union_map_free(carry->lineality.equivalent);
5938 isl_union_set_free(carry->lineality.mask);
5941 /* Return a pointer to the node in "graph" that lives in "space".
5942 * If the requested node has been compressed, then "space"
5943 * corresponds to the compressed space.
5944 * The graph is assumed to have such a node.
5945 * Return NULL in case of error.
5947 * First try and see if "space" is the space of an uncompressed node.
5948 * If so, return that node.
5949 * Otherwise, "space" was constructed by construct_compressed_id and
5950 * contains a user pointer pointing to the node in the tuple id.
5951 * However, this node belongs to the original dependence graph.
5952 * If "graph" is a subgraph of this original dependence graph,
5953 * then the node with the same space still needs to be looked up
5954 * in the current graph.
5956 static struct isl_sched_node *graph_find_compressed_node(isl_ctx *ctx,
5957 struct isl_sched_graph *graph, __isl_keep isl_space *space)
5959 isl_id *id;
5960 struct isl_sched_node *node;
5962 if (!space)
5963 return NULL;
5965 node = graph_find_node(ctx, graph, space);
5966 if (!node)
5967 return NULL;
5968 if (is_node(graph, node))
5969 return node;
5971 id = isl_space_get_tuple_id(space, isl_dim_set);
5972 node = isl_id_get_user(id);
5973 isl_id_free(id);
5975 if (!node)
5976 return NULL;
5978 if (!is_node(graph->root, node))
5979 isl_die(ctx, isl_error_internal,
5980 "space points to invalid node", return NULL);
5981 if (graph != graph->root)
5982 node = graph_find_node(ctx, graph, node->space);
5983 if (!is_node(graph, node))
5984 isl_die(ctx, isl_error_internal,
5985 "unable to find node", return NULL);
5987 return node;
5990 /* Internal data structure for add_all_constraints.
5992 * "graph" is the schedule constraint graph for which an LP problem
5993 * is being constructed.
5994 * "carry_inter" indicates whether inter-node edges should be carried.
5995 * "pos" is the position of the next edge that needs to be carried.
5997 struct isl_add_all_constraints_data {
5998 isl_ctx *ctx;
5999 struct isl_sched_graph *graph;
6000 int carry_inter;
6001 int pos;
6004 /* Add the constraints "coef" derived from an edge from a node to itself
6005 * to data->graph->lp in order to respect the dependences and
6006 * to try and carry them.
6008 * The space of "coef" is of the form
6010 * coefficients[[c_cst] -> S[c_x]]
6012 * with S[c_x] the (compressed) space of the node.
6013 * Extract the node from the space and call add_intra_constraints.
6015 static isl_stat lp_add_intra(__isl_take isl_basic_set *coef, void *user)
6017 struct isl_add_all_constraints_data *data = user;
6018 isl_space *space;
6019 struct isl_sched_node *node;
6021 space = isl_basic_set_get_space(coef);
6022 space = isl_space_range(isl_space_unwrap(space));
6023 node = graph_find_compressed_node(data->ctx, data->graph, space);
6024 isl_space_free(space);
6025 return add_intra_constraints(data->graph, node, coef, data->pos++);
6028 /* Add the constraints "coef" derived from an edge from a node j
6029 * to a node k to data->graph->lp in order to respect the dependences and
6030 * to try and carry them (provided data->carry_inter is set).
6032 * The space of "coef" is of the form
6034 * coefficients[[c_cst, c_n] -> [S_j[c_x] -> S_k[c_y]]]
6036 * with S_j[c_x] and S_k[c_y] the (compressed) spaces of the nodes.
6037 * Extract the nodes from the space and call add_inter_constraints.
6039 static isl_stat lp_add_inter(__isl_take isl_basic_set *coef, void *user)
6041 struct isl_add_all_constraints_data *data = user;
6042 isl_space *space, *dom;
6043 struct isl_sched_node *src, *dst;
6044 int pos;
6046 space = isl_basic_set_get_space(coef);
6047 space = isl_space_unwrap(isl_space_range(isl_space_unwrap(space)));
6048 dom = isl_space_domain(isl_space_copy(space));
6049 src = graph_find_compressed_node(data->ctx, data->graph, dom);
6050 isl_space_free(dom);
6051 space = isl_space_range(space);
6052 dst = graph_find_compressed_node(data->ctx, data->graph, space);
6053 isl_space_free(space);
6055 pos = data->carry_inter ? data->pos++ : -1;
6056 return add_inter_constraints(data->graph, src, dst, coef, pos);
6059 /* Add constraints to graph->lp that force all (conditional) validity
6060 * dependences to be respected and attempt to carry them.
6061 * "intra" is the sequence of coefficient constraints for intra-node edges.
6062 * "inter" is the sequence of coefficient constraints for inter-node edges.
6063 * "carry_inter" indicates whether inter-node edges should be carried or
6064 * only respected.
6066 static isl_stat add_all_constraints(isl_ctx *ctx, struct isl_sched_graph *graph,
6067 __isl_keep isl_basic_set_list *intra,
6068 __isl_keep isl_basic_set_list *inter, int carry_inter)
6070 struct isl_add_all_constraints_data data = { ctx, graph, carry_inter };
6072 data.pos = 0;
6073 if (isl_basic_set_list_foreach(intra, &lp_add_intra, &data) < 0)
6074 return isl_stat_error;
6075 if (isl_basic_set_list_foreach(inter, &lp_add_inter, &data) < 0)
6076 return isl_stat_error;
6077 return isl_stat_ok;
6080 /* Internal data structure for count_all_constraints
6081 * for keeping track of the number of equality and inequality constraints.
6083 struct isl_sched_count {
6084 int n_eq;
6085 int n_ineq;
6088 /* Add the number of equality and inequality constraints of "bset"
6089 * to data->n_eq and data->n_ineq.
6091 static isl_stat bset_update_count(__isl_take isl_basic_set *bset, void *user)
6093 struct isl_sched_count *data = user;
6095 return update_count(bset, 1, &data->n_eq, &data->n_ineq);
6098 /* Count the number of equality and inequality constraints
6099 * that will be added to the carry_lp problem.
6100 * We count each edge exactly once.
6101 * "intra" is the sequence of coefficient constraints for intra-node edges.
6102 * "inter" is the sequence of coefficient constraints for inter-node edges.
6104 static isl_stat count_all_constraints(__isl_keep isl_basic_set_list *intra,
6105 __isl_keep isl_basic_set_list *inter, int *n_eq, int *n_ineq)
6107 struct isl_sched_count data;
6109 data.n_eq = data.n_ineq = 0;
6110 if (isl_basic_set_list_foreach(inter, &bset_update_count, &data) < 0)
6111 return isl_stat_error;
6112 if (isl_basic_set_list_foreach(intra, &bset_update_count, &data) < 0)
6113 return isl_stat_error;
6115 *n_eq = data.n_eq;
6116 *n_ineq = data.n_ineq;
6118 return isl_stat_ok;
6121 /* Construct an LP problem for finding schedule coefficients
6122 * such that the schedule carries as many validity dependences as possible.
6123 * In particular, for each dependence i, we bound the dependence distance
6124 * from below by e_i, with 0 <= e_i <= 1 and then maximize the sum
6125 * of all e_i's. Dependences with e_i = 0 in the solution are simply
6126 * respected, while those with e_i > 0 (in practice e_i = 1) are carried.
6127 * "intra" is the sequence of coefficient constraints for intra-node edges.
6128 * "inter" is the sequence of coefficient constraints for inter-node edges.
6129 * "n_edge" is the total number of edges.
6130 * "carry_inter" indicates whether inter-node edges should be carried or
6131 * only respected. That is, if "carry_inter" is not set, then
6132 * no e_i variables are introduced for the inter-node edges.
6134 * All variables of the LP are non-negative. The actual coefficients
6135 * may be negative, so each coefficient is represented as the difference
6136 * of two non-negative variables. The negative part always appears
6137 * immediately before the positive part.
6138 * Other than that, the variables have the following order
6140 * - sum of (1 - e_i) over all edges
6141 * - sum of all c_n coefficients
6142 * (unconstrained when computing non-parametric schedules)
6143 * - sum of positive and negative parts of all c_x coefficients
6144 * - for each edge
6145 * - e_i
6146 * - for each node
6147 * - positive and negative parts of c_i_x, in opposite order
6148 * - c_i_n (if parametric)
6149 * - c_i_0
6151 * The constraints are those from the (validity) edges plus three equalities
6152 * to express the sums and n_edge inequalities to express e_i <= 1.
6154 static isl_stat setup_carry_lp(isl_ctx *ctx, struct isl_sched_graph *graph,
6155 int n_edge, __isl_keep isl_basic_set_list *intra,
6156 __isl_keep isl_basic_set_list *inter, int carry_inter)
6158 int i;
6159 int k;
6160 isl_space *dim;
6161 unsigned total;
6162 int n_eq, n_ineq;
6164 total = 3 + n_edge;
6165 for (i = 0; i < graph->n; ++i) {
6166 struct isl_sched_node *node = &graph->node[graph->sorted[i]];
6167 node->start = total;
6168 total += 1 + node->nparam + 2 * node->nvar;
6171 if (count_all_constraints(intra, inter, &n_eq, &n_ineq) < 0)
6172 return isl_stat_error;
6174 dim = isl_space_set_alloc(ctx, 0, total);
6175 isl_basic_set_free(graph->lp);
6176 n_eq += 3;
6177 n_ineq += n_edge;
6178 graph->lp = isl_basic_set_alloc_space(dim, 0, n_eq, n_ineq);
6179 graph->lp = isl_basic_set_set_rational(graph->lp);
6181 k = isl_basic_set_alloc_equality(graph->lp);
6182 if (k < 0)
6183 return isl_stat_error;
6184 isl_seq_clr(graph->lp->eq[k], 1 + total);
6185 isl_int_set_si(graph->lp->eq[k][0], -n_edge);
6186 isl_int_set_si(graph->lp->eq[k][1], 1);
6187 for (i = 0; i < n_edge; ++i)
6188 isl_int_set_si(graph->lp->eq[k][4 + i], 1);
6190 if (add_param_sum_constraint(graph, 1) < 0)
6191 return isl_stat_error;
6192 if (add_var_sum_constraint(graph, 2) < 0)
6193 return isl_stat_error;
6195 for (i = 0; i < n_edge; ++i) {
6196 k = isl_basic_set_alloc_inequality(graph->lp);
6197 if (k < 0)
6198 return isl_stat_error;
6199 isl_seq_clr(graph->lp->ineq[k], 1 + total);
6200 isl_int_set_si(graph->lp->ineq[k][4 + i], -1);
6201 isl_int_set_si(graph->lp->ineq[k][0], 1);
6204 if (add_all_constraints(ctx, graph, intra, inter, carry_inter) < 0)
6205 return isl_stat_error;
6207 return isl_stat_ok;
6210 static __isl_give isl_schedule_node *compute_component_schedule(
6211 __isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
6212 int wcc);
6214 /* If the schedule_split_scaled option is set and if the linear
6215 * parts of the scheduling rows for all nodes in the graphs have
6216 * a non-trivial common divisor, then remove this
6217 * common divisor from the linear part.
6218 * Otherwise, insert a band node directly and continue with
6219 * the construction of the schedule.
6221 * If a non-trivial common divisor is found, then
6222 * the linear part is reduced and the remainder is ignored.
6223 * The pieces of the graph that are assigned different remainders
6224 * form (groups of) strongly connected components within
6225 * the scaled down band. If needed, they can therefore
6226 * be ordered along this remainder in a sequence node.
6227 * However, this ordering is not enforced here in order to allow
6228 * the scheduler to combine some of the strongly connected components.
6230 static __isl_give isl_schedule_node *split_scaled(
6231 __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
6233 int i;
6234 int row;
6235 isl_ctx *ctx;
6236 isl_int gcd, gcd_i;
6238 if (!node)
6239 return NULL;
6241 ctx = isl_schedule_node_get_ctx(node);
6242 if (!ctx->opt->schedule_split_scaled)
6243 return compute_next_band(node, graph, 0);
6244 if (graph->n <= 1)
6245 return compute_next_band(node, graph, 0);
6247 isl_int_init(gcd);
6248 isl_int_init(gcd_i);
6250 isl_int_set_si(gcd, 0);
6252 row = isl_mat_rows(graph->node[0].sched) - 1;
6254 for (i = 0; i < graph->n; ++i) {
6255 struct isl_sched_node *node = &graph->node[i];
6256 int cols = isl_mat_cols(node->sched);
6258 isl_seq_gcd(node->sched->row[row] + 1, cols - 1, &gcd_i);
6259 isl_int_gcd(gcd, gcd, gcd_i);
6262 isl_int_clear(gcd_i);
6264 if (isl_int_cmp_si(gcd, 1) <= 0) {
6265 isl_int_clear(gcd);
6266 return compute_next_band(node, graph, 0);
6269 for (i = 0; i < graph->n; ++i) {
6270 struct isl_sched_node *node = &graph->node[i];
6272 isl_int_fdiv_q(node->sched->row[row][0],
6273 node->sched->row[row][0], gcd);
6274 isl_int_mul(node->sched->row[row][0],
6275 node->sched->row[row][0], gcd);
6276 node->sched = isl_mat_scale_down_row(node->sched, row, gcd);
6277 if (!node->sched)
6278 goto error;
6281 isl_int_clear(gcd);
6283 return compute_next_band(node, graph, 0);
6284 error:
6285 isl_int_clear(gcd);
6286 return isl_schedule_node_free(node);
6289 /* Is the schedule row "sol" trivial on node "node"?
6290 * That is, is the solution zero on the dimensions linearly independent of
6291 * the previously found solutions?
6292 * Return 1 if the solution is trivial, 0 if it is not and -1 on error.
6294 * Each coefficient is represented as the difference between
6295 * two non-negative values in "sol".
6296 * We construct the schedule row s and check if it is linearly
6297 * independent of previously computed schedule rows
6298 * by computing T s, with T the linear combinations that are zero
6299 * on linearly dependent schedule rows.
6300 * If the result consists of all zeros, then the solution is trivial.
6302 static int is_trivial(struct isl_sched_node *node, __isl_keep isl_vec *sol)
6304 int trivial;
6305 isl_vec *node_sol;
6307 if (!sol)
6308 return -1;
6309 if (node->nvar == node->rank)
6310 return 0;
6312 node_sol = extract_var_coef(node, sol);
6313 node_sol = isl_mat_vec_product(isl_mat_copy(node->indep), node_sol);
6314 if (!node_sol)
6315 return -1;
6317 trivial = isl_seq_first_non_zero(node_sol->el,
6318 node->nvar - node->rank) == -1;
6320 isl_vec_free(node_sol);
6322 return trivial;
6325 /* Is the schedule row "sol" trivial on any node where it should
6326 * not be trivial?
6327 * Return 1 if any solution is trivial, 0 if they are not and -1 on error.
6329 static int is_any_trivial(struct isl_sched_graph *graph,
6330 __isl_keep isl_vec *sol)
6332 int i;
6334 for (i = 0; i < graph->n; ++i) {
6335 struct isl_sched_node *node = &graph->node[i];
6336 int trivial;
6338 if (!needs_row(graph, node))
6339 continue;
6340 trivial = is_trivial(node, sol);
6341 if (trivial < 0 || trivial)
6342 return trivial;
6345 return 0;
6348 /* Does the schedule represented by "sol" perform loop coalescing on "node"?
6349 * If so, return the position of the coalesced dimension.
6350 * Otherwise, return node->nvar or -1 on error.
6352 * In particular, look for pairs of coefficients c_i and c_j such that
6353 * |c_j/c_i| > ceil(size_i/2), i.e., |c_j| > |c_i * ceil(size_i/2)|.
6354 * If any such pair is found, then return i.
6355 * If size_i is infinity, then no check on c_i needs to be performed.
6357 static int find_node_coalescing(struct isl_sched_node *node,
6358 __isl_keep isl_vec *sol)
6360 int i, j;
6361 isl_int max;
6362 isl_vec *csol;
6364 if (node->nvar <= 1)
6365 return node->nvar;
6367 csol = extract_var_coef(node, sol);
6368 if (!csol)
6369 return -1;
6370 isl_int_init(max);
6371 for (i = 0; i < node->nvar; ++i) {
6372 isl_val *v;
6374 if (isl_int_is_zero(csol->el[i]))
6375 continue;
6376 v = isl_multi_val_get_val(node->sizes, i);
6377 if (!v)
6378 goto error;
6379 if (!isl_val_is_int(v)) {
6380 isl_val_free(v);
6381 continue;
6383 v = isl_val_div_ui(v, 2);
6384 v = isl_val_ceil(v);
6385 if (!v)
6386 goto error;
6387 isl_int_mul(max, v->n, csol->el[i]);
6388 isl_val_free(v);
6390 for (j = 0; j < node->nvar; ++j) {
6391 if (j == i)
6392 continue;
6393 if (isl_int_abs_gt(csol->el[j], max))
6394 break;
6396 if (j < node->nvar)
6397 break;
6400 isl_int_clear(max);
6401 isl_vec_free(csol);
6402 return i;
6403 error:
6404 isl_int_clear(max);
6405 isl_vec_free(csol);
6406 return -1;
6409 /* Force the schedule coefficient at position "pos" of "node" to be zero
6410 * in "tl".
6411 * The coefficient is encoded as the difference between two non-negative
6412 * variables. Force these two variables to have the same value.
6414 static __isl_give isl_tab_lexmin *zero_out_node_coef(
6415 __isl_take isl_tab_lexmin *tl, struct isl_sched_node *node, int pos)
6417 int dim;
6418 isl_ctx *ctx;
6419 isl_vec *eq;
6421 ctx = isl_space_get_ctx(node->space);
6422 dim = isl_tab_lexmin_dim(tl);
6423 if (dim < 0)
6424 return isl_tab_lexmin_free(tl);
6425 eq = isl_vec_alloc(ctx, 1 + dim);
6426 eq = isl_vec_clr(eq);
6427 if (!eq)
6428 return isl_tab_lexmin_free(tl);
6430 pos = 1 + node_var_coef_pos(node, pos);
6431 isl_int_set_si(eq->el[pos], 1);
6432 isl_int_set_si(eq->el[pos + 1], -1);
6433 tl = isl_tab_lexmin_add_eq(tl, eq->el);
6434 isl_vec_free(eq);
6436 return tl;
6439 /* Return the lexicographically smallest rational point in the basic set
6440 * from which "tl" was constructed, double checking that this input set
6441 * was not empty.
6443 static __isl_give isl_vec *non_empty_solution(__isl_keep isl_tab_lexmin *tl)
6445 isl_vec *sol;
6447 sol = isl_tab_lexmin_get_solution(tl);
6448 if (!sol)
6449 return NULL;
6450 if (sol->size == 0)
6451 isl_die(isl_vec_get_ctx(sol), isl_error_internal,
6452 "error in schedule construction",
6453 return isl_vec_free(sol));
6454 return sol;
6457 /* Does the solution "sol" of the LP problem constructed by setup_carry_lp
6458 * carry any of the "n_edge" groups of dependences?
6459 * The value in the first position is the sum of (1 - e_i) over all "n_edge"
6460 * edges, with 0 <= e_i <= 1 equal to 1 when the dependences represented
6461 * by the edge are carried by the solution.
6462 * If the sum of the (1 - e_i) is smaller than "n_edge" then at least
6463 * one of those is carried.
6465 * Note that despite the fact that the problem is solved using a rational
6466 * solver, the solution is guaranteed to be integral.
6467 * Specifically, the dependence distance lower bounds e_i (and therefore
6468 * also their sum) are integers. See Lemma 5 of [1].
6470 * Any potential denominator of the sum is cleared by this function.
6471 * The denominator is not relevant for any of the other elements
6472 * in the solution.
6474 * [1] P. Feautrier, Some Efficient Solutions to the Affine Scheduling
6475 * Problem, Part II: Multi-Dimensional Time.
6476 * In Intl. Journal of Parallel Programming, 1992.
6478 static int carries_dependences(__isl_keep isl_vec *sol, int n_edge)
6480 isl_int_divexact(sol->el[1], sol->el[1], sol->el[0]);
6481 isl_int_set_si(sol->el[0], 1);
6482 return isl_int_cmp_si(sol->el[1], n_edge) < 0;
6485 /* Return the lexicographically smallest rational point in "lp",
6486 * assuming that all variables are non-negative and performing some
6487 * additional sanity checks.
6488 * If "want_integral" is set, then compute the lexicographically smallest
6489 * integer point instead.
6490 * In particular, "lp" should not be empty by construction.
6491 * Double check that this is the case.
6492 * If dependences are not carried for any of the "n_edge" edges,
6493 * then return an empty vector.
6495 * If the schedule_treat_coalescing option is set and
6496 * if the computed schedule performs loop coalescing on a given node,
6497 * i.e., if it is of the form
6499 * c_i i + c_j j + ...
6501 * with |c_j/c_i| >= size_i, then force the coefficient c_i to be zero
6502 * to cut out this solution. Repeat this process until no more loop
6503 * coalescing occurs or until no more dependences can be carried.
6504 * In the latter case, revert to the previously computed solution.
6506 * If the caller requests an integral solution and if coalescing should
6507 * be treated, then perform the coalescing treatment first as
6508 * an integral solution computed before coalescing treatment
6509 * would carry the same number of edges and would therefore probably
6510 * also be coalescing.
6512 * To allow the coalescing treatment to be performed first,
6513 * the initial solution is allowed to be rational and it is only
6514 * cut out (if needed) in the next iteration, if no coalescing measures
6515 * were taken.
6517 static __isl_give isl_vec *non_neg_lexmin(struct isl_sched_graph *graph,
6518 __isl_take isl_basic_set *lp, int n_edge, int want_integral)
6520 int i, pos, cut;
6521 isl_ctx *ctx;
6522 isl_tab_lexmin *tl;
6523 isl_vec *sol = NULL, *prev;
6524 int treat_coalescing;
6525 int try_again;
6527 if (!lp)
6528 return NULL;
6529 ctx = isl_basic_set_get_ctx(lp);
6530 treat_coalescing = isl_options_get_schedule_treat_coalescing(ctx);
6531 tl = isl_tab_lexmin_from_basic_set(lp);
6533 cut = 0;
6534 do {
6535 int integral;
6537 try_again = 0;
6538 if (cut)
6539 tl = isl_tab_lexmin_cut_to_integer(tl);
6540 prev = sol;
6541 sol = non_empty_solution(tl);
6542 if (!sol)
6543 goto error;
6545 integral = isl_int_is_one(sol->el[0]);
6546 if (!carries_dependences(sol, n_edge)) {
6547 if (!prev)
6548 prev = isl_vec_alloc(ctx, 0);
6549 isl_vec_free(sol);
6550 sol = prev;
6551 break;
6553 prev = isl_vec_free(prev);
6554 cut = want_integral && !integral;
6555 if (cut)
6556 try_again = 1;
6557 if (!treat_coalescing)
6558 continue;
6559 for (i = 0; i < graph->n; ++i) {
6560 struct isl_sched_node *node = &graph->node[i];
6562 pos = find_node_coalescing(node, sol);
6563 if (pos < 0)
6564 goto error;
6565 if (pos < node->nvar)
6566 break;
6568 if (i < graph->n) {
6569 try_again = 1;
6570 tl = zero_out_node_coef(tl, &graph->node[i], pos);
6571 cut = 0;
6573 } while (try_again);
6575 isl_tab_lexmin_free(tl);
6577 return sol;
6578 error:
6579 isl_tab_lexmin_free(tl);
6580 isl_vec_free(prev);
6581 isl_vec_free(sol);
6582 return NULL;
6585 /* If "edge" is an edge from a node to itself, then add the corresponding
6586 * dependence relation to "umap".
6587 * If "node" has been compressed, then the dependence relation
6588 * is also compressed first.
6590 static __isl_give isl_union_map *add_intra(__isl_take isl_union_map *umap,
6591 struct isl_sched_edge *edge)
6593 isl_map *map;
6594 struct isl_sched_node *node = edge->src;
6596 if (edge->src != edge->dst)
6597 return umap;
6599 map = isl_map_copy(edge->map);
6600 if (node->compressed) {
6601 map = isl_map_preimage_domain_multi_aff(map,
6602 isl_multi_aff_copy(node->decompress));
6603 map = isl_map_preimage_range_multi_aff(map,
6604 isl_multi_aff_copy(node->decompress));
6606 umap = isl_union_map_add_map(umap, map);
6607 return umap;
6610 /* If "edge" is an edge from a node to another node, then add the corresponding
6611 * dependence relation to "umap".
6612 * If the source or destination nodes of "edge" have been compressed,
6613 * then the dependence relation is also compressed first.
6615 static __isl_give isl_union_map *add_inter(__isl_take isl_union_map *umap,
6616 struct isl_sched_edge *edge)
6618 isl_map *map;
6620 if (edge->src == edge->dst)
6621 return umap;
6623 map = isl_map_copy(edge->map);
6624 if (edge->src->compressed)
6625 map = isl_map_preimage_domain_multi_aff(map,
6626 isl_multi_aff_copy(edge->src->decompress));
6627 if (edge->dst->compressed)
6628 map = isl_map_preimage_range_multi_aff(map,
6629 isl_multi_aff_copy(edge->dst->decompress));
6630 umap = isl_union_map_add_map(umap, map);
6631 return umap;
6634 /* Internal data structure used by union_drop_coalescing_constraints
6635 * to collect bounds on all relevant statements.
6637 * "graph" is the schedule constraint graph for which an LP problem
6638 * is being constructed.
6639 * "bounds" collects the bounds.
6641 struct isl_collect_bounds_data {
6642 isl_ctx *ctx;
6643 struct isl_sched_graph *graph;
6644 isl_union_set *bounds;
6647 /* Add the size bounds for the node with instance deltas in "set"
6648 * to data->bounds.
6650 static isl_stat collect_bounds(__isl_take isl_set *set, void *user)
6652 struct isl_collect_bounds_data *data = user;
6653 struct isl_sched_node *node;
6654 isl_space *space;
6655 isl_set *bounds;
6657 space = isl_set_get_space(set);
6658 isl_set_free(set);
6660 node = graph_find_compressed_node(data->ctx, data->graph, space);
6661 isl_space_free(space);
6663 bounds = isl_set_from_basic_set(get_size_bounds(node));
6664 data->bounds = isl_union_set_add_set(data->bounds, bounds);
6666 return isl_stat_ok;
6669 /* Drop some constraints from "delta" that could be exploited
6670 * to construct loop coalescing schedules.
6671 * In particular, drop those constraint that bound the difference
6672 * to the size of the domain.
6673 * Do this for each set/node in "delta" separately.
6674 * The parameters are assumed to have been projected out by the caller.
6676 static __isl_give isl_union_set *union_drop_coalescing_constraints(isl_ctx *ctx,
6677 struct isl_sched_graph *graph, __isl_take isl_union_set *delta)
6679 struct isl_collect_bounds_data data = { ctx, graph };
6681 data.bounds = isl_union_set_empty(isl_space_params_alloc(ctx, 0));
6682 if (isl_union_set_foreach_set(delta, &collect_bounds, &data) < 0)
6683 data.bounds = isl_union_set_free(data.bounds);
6684 delta = isl_union_set_plain_gist(delta, data.bounds);
6686 return delta;
6689 /* Given a non-trivial lineality space "lineality", add the corresponding
6690 * universe set to data->mask and add a map from elements to
6691 * other elements along the lines in "lineality" to data->equivalent.
6692 * If this is the first time this function gets called
6693 * (data->any_non_trivial is still false), then set data->any_non_trivial and
6694 * initialize data->mask and data->equivalent.
6696 * In particular, if the lineality space is defined by equality constraints
6698 * E x = 0
6700 * then construct an affine mapping
6702 * f : x -> E x
6704 * and compute the equivalence relation of having the same image under f:
6706 * { x -> x' : E x = E x' }
6708 static isl_stat add_non_trivial_lineality(__isl_take isl_basic_set *lineality,
6709 struct isl_exploit_lineality_data *data)
6711 isl_mat *eq;
6712 isl_space *space;
6713 isl_set *univ;
6714 isl_multi_aff *ma;
6715 isl_multi_pw_aff *mpa;
6716 isl_map *map;
6717 int n;
6719 if (!lineality)
6720 return isl_stat_error;
6721 if (isl_basic_set_dim(lineality, isl_dim_div) != 0)
6722 isl_die(isl_basic_set_get_ctx(lineality), isl_error_internal,
6723 "local variables not allowed", goto error);
6725 space = isl_basic_set_get_space(lineality);
6726 if (!data->any_non_trivial) {
6727 data->equivalent = isl_union_map_empty(isl_space_copy(space));
6728 data->mask = isl_union_set_empty(isl_space_copy(space));
6730 data->any_non_trivial = isl_bool_true;
6732 univ = isl_set_universe(isl_space_copy(space));
6733 data->mask = isl_union_set_add_set(data->mask, univ);
6735 eq = isl_basic_set_extract_equalities(lineality);
6736 n = isl_mat_rows(eq);
6737 eq = isl_mat_insert_zero_rows(eq, 0, 1);
6738 eq = isl_mat_set_element_si(eq, 0, 0, 1);
6739 space = isl_space_from_domain(space);
6740 space = isl_space_add_dims(space, isl_dim_out, n);
6741 ma = isl_multi_aff_from_aff_mat(space, eq);
6742 mpa = isl_multi_pw_aff_from_multi_aff(ma);
6743 map = isl_multi_pw_aff_eq_map(mpa, isl_multi_pw_aff_copy(mpa));
6744 data->equivalent = isl_union_map_add_map(data->equivalent, map);
6746 isl_basic_set_free(lineality);
6747 return isl_stat_ok;
6748 error:
6749 isl_basic_set_free(lineality);
6750 return isl_stat_error;
6753 /* Check if the lineality space "set" is non-trivial (i.e., is not just
6754 * the origin or, in other words, satisfies a number of equality constraints
6755 * that is smaller than the dimension of the set).
6756 * If so, extend data->mask and data->equivalent accordingly.
6758 * The input should not have any local variables already, but
6759 * isl_set_remove_divs is called to make sure it does not.
6761 static isl_stat add_lineality(__isl_take isl_set *set, void *user)
6763 struct isl_exploit_lineality_data *data = user;
6764 isl_basic_set *hull;
6765 int dim, n_eq;
6767 set = isl_set_remove_divs(set);
6768 hull = isl_set_unshifted_simple_hull(set);
6769 dim = isl_basic_set_dim(hull, isl_dim_set);
6770 n_eq = isl_basic_set_n_equality(hull);
6771 if (!hull)
6772 return isl_stat_error;
6773 if (dim != n_eq)
6774 return add_non_trivial_lineality(hull, data);
6775 isl_basic_set_free(hull);
6776 return isl_stat_ok;
6779 /* Check if the difference set on intra-node schedule constraints "intra"
6780 * has any non-trivial lineality space.
6781 * If so, then extend the difference set to a difference set
6782 * on equivalent elements. That is, if "intra" is
6784 * { y - x : (x,y) \in V }
6786 * and elements are equivalent if they have the same image under f,
6787 * then return
6789 * { y' - x' : (x,y) \in V and f(x) = f(x') and f(y) = f(y') }
6791 * or, since f is linear,
6793 * { y' - x' : (x,y) \in V and f(y - x) = f(y' - x') }
6795 * The results of the search for non-trivial lineality spaces is stored
6796 * in "data".
6798 static __isl_give isl_union_set *exploit_intra_lineality(
6799 __isl_take isl_union_set *intra,
6800 struct isl_exploit_lineality_data *data)
6802 isl_union_set *lineality;
6803 isl_union_set *uset;
6805 data->any_non_trivial = isl_bool_false;
6806 lineality = isl_union_set_copy(intra);
6807 lineality = isl_union_set_combined_lineality_space(lineality);
6808 if (isl_union_set_foreach_set(lineality, &add_lineality, data) < 0)
6809 data->any_non_trivial = isl_bool_error;
6810 isl_union_set_free(lineality);
6812 if (data->any_non_trivial < 0)
6813 return isl_union_set_free(intra);
6814 if (!data->any_non_trivial)
6815 return intra;
6817 uset = isl_union_set_copy(intra);
6818 intra = isl_union_set_subtract(intra, isl_union_set_copy(data->mask));
6819 uset = isl_union_set_apply(uset, isl_union_map_copy(data->equivalent));
6820 intra = isl_union_set_union(intra, uset);
6822 intra = isl_union_set_remove_divs(intra);
6824 return intra;
6827 /* If the difference set on intra-node schedule constraints was found to have
6828 * any non-trivial lineality space by exploit_intra_lineality,
6829 * as recorded in "data", then extend the inter-node
6830 * schedule constraints "inter" to schedule constraints on equivalent elements.
6831 * That is, if "inter" is V and
6832 * elements are equivalent if they have the same image under f, then return
6834 * { (x', y') : (x,y) \in V and f(x) = f(x') and f(y) = f(y') }
6836 static __isl_give isl_union_map *exploit_inter_lineality(
6837 __isl_take isl_union_map *inter,
6838 struct isl_exploit_lineality_data *data)
6840 isl_union_map *umap;
6842 if (data->any_non_trivial < 0)
6843 return isl_union_map_free(inter);
6844 if (!data->any_non_trivial)
6845 return inter;
6847 umap = isl_union_map_copy(inter);
6848 inter = isl_union_map_subtract_range(inter,
6849 isl_union_set_copy(data->mask));
6850 umap = isl_union_map_apply_range(umap,
6851 isl_union_map_copy(data->equivalent));
6852 inter = isl_union_map_union(inter, umap);
6853 umap = isl_union_map_copy(inter);
6854 inter = isl_union_map_subtract_domain(inter,
6855 isl_union_set_copy(data->mask));
6856 umap = isl_union_map_apply_range(isl_union_map_copy(data->equivalent),
6857 umap);
6858 inter = isl_union_map_union(inter, umap);
6860 inter = isl_union_map_remove_divs(inter);
6862 return inter;
6865 /* For each (conditional) validity edge in "graph",
6866 * add the corresponding dependence relation using "add"
6867 * to a collection of dependence relations and return the result.
6868 * If "coincidence" is set, then coincidence edges are considered as well.
6870 static __isl_give isl_union_map *collect_validity(struct isl_sched_graph *graph,
6871 __isl_give isl_union_map *(*add)(__isl_take isl_union_map *umap,
6872 struct isl_sched_edge *edge), int coincidence)
6874 int i;
6875 isl_space *space;
6876 isl_union_map *umap;
6878 space = isl_space_copy(graph->node[0].space);
6879 umap = isl_union_map_empty(space);
6881 for (i = 0; i < graph->n_edge; ++i) {
6882 struct isl_sched_edge *edge = &graph->edge[i];
6884 if (!is_any_validity(edge) &&
6885 (!coincidence || !is_coincidence(edge)))
6886 continue;
6888 umap = add(umap, edge);
6891 return umap;
6894 /* Project out all parameters from "uset" and return the result.
6896 static __isl_give isl_union_set *union_set_drop_parameters(
6897 __isl_take isl_union_set *uset)
6899 unsigned nparam;
6901 nparam = isl_union_set_dim(uset, isl_dim_param);
6902 return isl_union_set_project_out(uset, isl_dim_param, 0, nparam);
6905 /* For each dependence relation on a (conditional) validity edge
6906 * from a node to itself,
6907 * construct the set of coefficients of valid constraints for elements
6908 * in that dependence relation and collect the results.
6909 * If "coincidence" is set, then coincidence edges are considered as well.
6911 * In particular, for each dependence relation R, constraints
6912 * on coefficients (c_0, c_x) are constructed such that
6914 * c_0 + c_x d >= 0 for each d in delta R = { y - x | (x,y) in R }
6916 * If the schedule_treat_coalescing option is set, then some constraints
6917 * that could be exploited to construct coalescing schedules
6918 * are removed before the dual is computed, but after the parameters
6919 * have been projected out.
6920 * The entire computation is essentially the same as that performed
6921 * by intra_coefficients, except that it operates on multiple
6922 * edges together and that the parameters are always projected out.
6924 * Additionally, exploit any non-trivial lineality space
6925 * in the difference set after removing coalescing constraints and
6926 * store the results of the non-trivial lineality space detection in "data".
6927 * The procedure is currently run unconditionally, but it is unlikely
6928 * to find any non-trivial lineality spaces if no coalescing constraints
6929 * have been removed.
6931 * Note that if a dependence relation is a union of basic maps,
6932 * then each basic map needs to be treated individually as it may only
6933 * be possible to carry the dependences expressed by some of those
6934 * basic maps and not all of them.
6935 * The collected validity constraints are therefore not coalesced and
6936 * it is assumed that they are not coalesced automatically.
6937 * Duplicate basic maps can be removed, however.
6938 * In particular, if the same basic map appears as a disjunct
6939 * in multiple edges, then it only needs to be carried once.
6941 static __isl_give isl_basic_set_list *collect_intra_validity(isl_ctx *ctx,
6942 struct isl_sched_graph *graph, int coincidence,
6943 struct isl_exploit_lineality_data *data)
6945 isl_union_map *intra;
6946 isl_union_set *delta;
6947 isl_basic_set_list *list;
6949 intra = collect_validity(graph, &add_intra, coincidence);
6950 delta = isl_union_map_deltas(intra);
6951 delta = union_set_drop_parameters(delta);
6952 delta = isl_union_set_remove_divs(delta);
6953 if (isl_options_get_schedule_treat_coalescing(ctx))
6954 delta = union_drop_coalescing_constraints(ctx, graph, delta);
6955 delta = exploit_intra_lineality(delta, data);
6956 list = isl_union_set_get_basic_set_list(delta);
6957 isl_union_set_free(delta);
6959 return isl_basic_set_list_coefficients(list);
6962 /* For each dependence relation on a (conditional) validity edge
6963 * from a node to some other node,
6964 * construct the set of coefficients of valid constraints for elements
6965 * in that dependence relation and collect the results.
6966 * If "coincidence" is set, then coincidence edges are considered as well.
6968 * In particular, for each dependence relation R, constraints
6969 * on coefficients (c_0, c_n, c_x, c_y) are constructed such that
6971 * c_0 + c_n n + c_x x + c_y y >= 0 for each (x,y) in R
6973 * This computation is essentially the same as that performed
6974 * by inter_coefficients, except that it operates on multiple
6975 * edges together.
6977 * Additionally, exploit any non-trivial lineality space
6978 * that may have been discovered by collect_intra_validity
6979 * (as stored in "data").
6981 * Note that if a dependence relation is a union of basic maps,
6982 * then each basic map needs to be treated individually as it may only
6983 * be possible to carry the dependences expressed by some of those
6984 * basic maps and not all of them.
6985 * The collected validity constraints are therefore not coalesced and
6986 * it is assumed that they are not coalesced automatically.
6987 * Duplicate basic maps can be removed, however.
6988 * In particular, if the same basic map appears as a disjunct
6989 * in multiple edges, then it only needs to be carried once.
6991 static __isl_give isl_basic_set_list *collect_inter_validity(
6992 struct isl_sched_graph *graph, int coincidence,
6993 struct isl_exploit_lineality_data *data)
6995 isl_union_map *inter;
6996 isl_union_set *wrap;
6997 isl_basic_set_list *list;
6999 inter = collect_validity(graph, &add_inter, coincidence);
7000 inter = exploit_inter_lineality(inter, data);
7001 inter = isl_union_map_remove_divs(inter);
7002 wrap = isl_union_map_wrap(inter);
7003 list = isl_union_set_get_basic_set_list(wrap);
7004 isl_union_set_free(wrap);
7005 return isl_basic_set_list_coefficients(list);
7008 /* Construct an LP problem for finding schedule coefficients
7009 * such that the schedule carries as many of the "n_edge" groups of
7010 * dependences as possible based on the corresponding coefficient
7011 * constraints and return the lexicographically smallest non-trivial solution.
7012 * "intra" is the sequence of coefficient constraints for intra-node edges.
7013 * "inter" is the sequence of coefficient constraints for inter-node edges.
7014 * If "want_integral" is set, then compute an integral solution
7015 * for the coefficients rather than using the numerators
7016 * of a rational solution.
7017 * "carry_inter" indicates whether inter-node edges should be carried or
7018 * only respected.
7020 * If none of the "n_edge" groups can be carried
7021 * then return an empty vector.
7023 static __isl_give isl_vec *compute_carrying_sol_coef(isl_ctx *ctx,
7024 struct isl_sched_graph *graph, int n_edge,
7025 __isl_keep isl_basic_set_list *intra,
7026 __isl_keep isl_basic_set_list *inter, int want_integral,
7027 int carry_inter)
7029 isl_basic_set *lp;
7031 if (setup_carry_lp(ctx, graph, n_edge, intra, inter, carry_inter) < 0)
7032 return NULL;
7034 lp = isl_basic_set_copy(graph->lp);
7035 return non_neg_lexmin(graph, lp, n_edge, want_integral);
7038 /* Construct an LP problem for finding schedule coefficients
7039 * such that the schedule carries as many of the validity dependences
7040 * as possible and
7041 * return the lexicographically smallest non-trivial solution.
7042 * If "fallback" is set, then the carrying is performed as a fallback
7043 * for the Pluto-like scheduler.
7044 * If "coincidence" is set, then try and carry coincidence edges as well.
7046 * The variable "n_edge" stores the number of groups that should be carried.
7047 * If none of the "n_edge" groups can be carried
7048 * then return an empty vector.
7049 * If, moreover, "n_edge" is zero, then the LP problem does not even
7050 * need to be constructed.
7052 * If a fallback solution is being computed, then compute an integral solution
7053 * for the coefficients rather than using the numerators
7054 * of a rational solution.
7056 * If a fallback solution is being computed, if there are any intra-node
7057 * dependences, and if requested by the user, then first try
7058 * to only carry those intra-node dependences.
7059 * If this fails to carry any dependences, then try again
7060 * with the inter-node dependences included.
7062 static __isl_give isl_vec *compute_carrying_sol(isl_ctx *ctx,
7063 struct isl_sched_graph *graph, int fallback, int coincidence)
7065 int n_intra, n_inter;
7066 int n_edge;
7067 struct isl_carry carry = { 0 };
7068 isl_vec *sol;
7070 carry.intra = collect_intra_validity(ctx, graph, coincidence,
7071 &carry.lineality);
7072 carry.inter = collect_inter_validity(graph, coincidence,
7073 &carry.lineality);
7074 if (!carry.intra || !carry.inter)
7075 goto error;
7076 n_intra = isl_basic_set_list_n_basic_set(carry.intra);
7077 n_inter = isl_basic_set_list_n_basic_set(carry.inter);
7079 if (fallback && n_intra > 0 &&
7080 isl_options_get_schedule_carry_self_first(ctx)) {
7081 sol = compute_carrying_sol_coef(ctx, graph, n_intra,
7082 carry.intra, carry.inter, fallback, 0);
7083 if (!sol || sol->size != 0 || n_inter == 0) {
7084 isl_carry_clear(&carry);
7085 return sol;
7087 isl_vec_free(sol);
7090 n_edge = n_intra + n_inter;
7091 if (n_edge == 0) {
7092 isl_carry_clear(&carry);
7093 return isl_vec_alloc(ctx, 0);
7096 sol = compute_carrying_sol_coef(ctx, graph, n_edge,
7097 carry.intra, carry.inter, fallback, 1);
7098 isl_carry_clear(&carry);
7099 return sol;
7100 error:
7101 isl_carry_clear(&carry);
7102 return NULL;
7105 /* Construct a schedule row for each node such that as many validity dependences
7106 * as possible are carried and then continue with the next band.
7107 * If "fallback" is set, then the carrying is performed as a fallback
7108 * for the Pluto-like scheduler.
7109 * If "coincidence" is set, then try and carry coincidence edges as well.
7111 * If there are no validity dependences, then no dependence can be carried and
7112 * the procedure is guaranteed to fail. If there is more than one component,
7113 * then try computing a schedule on each component separately
7114 * to prevent or at least postpone this failure.
7116 * If a schedule row is computed, then check that dependences are carried
7117 * for at least one of the edges.
7119 * If the computed schedule row turns out to be trivial on one or
7120 * more nodes where it should not be trivial, then we throw it away
7121 * and try again on each component separately.
7123 * If there is only one component, then we accept the schedule row anyway,
7124 * but we do not consider it as a complete row and therefore do not
7125 * increment graph->n_row. Note that the ranks of the nodes that
7126 * do get a non-trivial schedule part will get updated regardless and
7127 * graph->maxvar is computed based on these ranks. The test for
7128 * whether more schedule rows are required in compute_schedule_wcc
7129 * is therefore not affected.
7131 * Insert a band corresponding to the schedule row at position "node"
7132 * of the schedule tree and continue with the construction of the schedule.
7133 * This insertion and the continued construction is performed by split_scaled
7134 * after optionally checking for non-trivial common divisors.
7136 static __isl_give isl_schedule_node *carry(__isl_take isl_schedule_node *node,
7137 struct isl_sched_graph *graph, int fallback, int coincidence)
7139 int trivial;
7140 isl_ctx *ctx;
7141 isl_vec *sol;
7143 if (!node)
7144 return NULL;
7146 ctx = isl_schedule_node_get_ctx(node);
7147 sol = compute_carrying_sol(ctx, graph, fallback, coincidence);
7148 if (!sol)
7149 return isl_schedule_node_free(node);
7150 if (sol->size == 0) {
7151 isl_vec_free(sol);
7152 if (graph->scc > 1)
7153 return compute_component_schedule(node, graph, 1);
7154 isl_die(ctx, isl_error_unknown, "unable to carry dependences",
7155 return isl_schedule_node_free(node));
7158 trivial = is_any_trivial(graph, sol);
7159 if (trivial < 0) {
7160 sol = isl_vec_free(sol);
7161 } else if (trivial && graph->scc > 1) {
7162 isl_vec_free(sol);
7163 return compute_component_schedule(node, graph, 1);
7166 if (update_schedule(graph, sol, 0) < 0)
7167 return isl_schedule_node_free(node);
7168 if (trivial)
7169 graph->n_row--;
7171 return split_scaled(node, graph);
7174 /* Construct a schedule row for each node such that as many validity dependences
7175 * as possible are carried and then continue with the next band.
7176 * Do so as a fallback for the Pluto-like scheduler.
7177 * If "coincidence" is set, then try and carry coincidence edges as well.
7179 static __isl_give isl_schedule_node *carry_fallback(
7180 __isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
7181 int coincidence)
7183 return carry(node, graph, 1, coincidence);
7186 /* Construct a schedule row for each node such that as many validity dependences
7187 * as possible are carried and then continue with the next band.
7188 * Do so for the case where the Feautrier scheduler was selected
7189 * by the user.
7191 static __isl_give isl_schedule_node *carry_feautrier(
7192 __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
7194 return carry(node, graph, 0, 0);
7197 /* Construct a schedule row for each node such that as many validity dependences
7198 * as possible are carried and then continue with the next band.
7199 * Do so as a fallback for the Pluto-like scheduler.
7201 static __isl_give isl_schedule_node *carry_dependences(
7202 __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
7204 return carry_fallback(node, graph, 0);
7207 /* Construct a schedule row for each node such that as many validity or
7208 * coincidence dependences as possible are carried and
7209 * then continue with the next band.
7210 * Do so as a fallback for the Pluto-like scheduler.
7212 static __isl_give isl_schedule_node *carry_coincidence(
7213 __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
7215 return carry_fallback(node, graph, 1);
7218 /* Topologically sort statements mapped to the same schedule iteration
7219 * and add insert a sequence node in front of "node"
7220 * corresponding to this order.
7221 * If "initialized" is set, then it may be assumed that compute_maxvar
7222 * has been called on the current band. Otherwise, call
7223 * compute_maxvar if and before carry_dependences gets called.
7225 * If it turns out to be impossible to sort the statements apart,
7226 * because different dependences impose different orderings
7227 * on the statements, then we extend the schedule such that
7228 * it carries at least one more dependence.
7230 static __isl_give isl_schedule_node *sort_statements(
7231 __isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
7232 int initialized)
7234 isl_ctx *ctx;
7235 isl_union_set_list *filters;
7237 if (!node)
7238 return NULL;
7240 ctx = isl_schedule_node_get_ctx(node);
7241 if (graph->n < 1)
7242 isl_die(ctx, isl_error_internal,
7243 "graph should have at least one node",
7244 return isl_schedule_node_free(node));
7246 if (graph->n == 1)
7247 return node;
7249 if (update_edges(ctx, graph) < 0)
7250 return isl_schedule_node_free(node);
7252 if (graph->n_edge == 0)
7253 return node;
7255 if (detect_sccs(ctx, graph) < 0)
7256 return isl_schedule_node_free(node);
7258 next_band(graph);
7259 if (graph->scc < graph->n) {
7260 if (!initialized && compute_maxvar(graph) < 0)
7261 return isl_schedule_node_free(node);
7262 return carry_dependences(node, graph);
7265 filters = extract_sccs(ctx, graph);
7266 node = isl_schedule_node_insert_sequence(node, filters);
7268 return node;
7271 /* Are there any (non-empty) (conditional) validity edges in the graph?
7273 static int has_validity_edges(struct isl_sched_graph *graph)
7275 int i;
7277 for (i = 0; i < graph->n_edge; ++i) {
7278 int empty;
7280 empty = isl_map_plain_is_empty(graph->edge[i].map);
7281 if (empty < 0)
7282 return -1;
7283 if (empty)
7284 continue;
7285 if (is_any_validity(&graph->edge[i]))
7286 return 1;
7289 return 0;
7292 /* Should we apply a Feautrier step?
7293 * That is, did the user request the Feautrier algorithm and are
7294 * there any validity dependences (left)?
7296 static int need_feautrier_step(isl_ctx *ctx, struct isl_sched_graph *graph)
7298 if (ctx->opt->schedule_algorithm != ISL_SCHEDULE_ALGORITHM_FEAUTRIER)
7299 return 0;
7301 return has_validity_edges(graph);
7304 /* Compute a schedule for a connected dependence graph using Feautrier's
7305 * multi-dimensional scheduling algorithm and return the updated schedule node.
7307 * The original algorithm is described in [1].
7308 * The main idea is to minimize the number of scheduling dimensions, by
7309 * trying to satisfy as many dependences as possible per scheduling dimension.
7311 * [1] P. Feautrier, Some Efficient Solutions to the Affine Scheduling
7312 * Problem, Part II: Multi-Dimensional Time.
7313 * In Intl. Journal of Parallel Programming, 1992.
7315 static __isl_give isl_schedule_node *compute_schedule_wcc_feautrier(
7316 isl_schedule_node *node, struct isl_sched_graph *graph)
7318 return carry_feautrier(node, graph);
7321 /* Turn off the "local" bit on all (condition) edges.
7323 static void clear_local_edges(struct isl_sched_graph *graph)
7325 int i;
7327 for (i = 0; i < graph->n_edge; ++i)
7328 if (is_condition(&graph->edge[i]))
7329 clear_local(&graph->edge[i]);
7332 /* Does "graph" have both condition and conditional validity edges?
7334 static int need_condition_check(struct isl_sched_graph *graph)
7336 int i;
7337 int any_condition = 0;
7338 int any_conditional_validity = 0;
7340 for (i = 0; i < graph->n_edge; ++i) {
7341 if (is_condition(&graph->edge[i]))
7342 any_condition = 1;
7343 if (is_conditional_validity(&graph->edge[i]))
7344 any_conditional_validity = 1;
7347 return any_condition && any_conditional_validity;
7350 /* Does "graph" contain any coincidence edge?
7352 static int has_any_coincidence(struct isl_sched_graph *graph)
7354 int i;
7356 for (i = 0; i < graph->n_edge; ++i)
7357 if (is_coincidence(&graph->edge[i]))
7358 return 1;
7360 return 0;
7363 /* Extract the final schedule row as a map with the iteration domain
7364 * of "node" as domain.
7366 static __isl_give isl_map *final_row(struct isl_sched_node *node)
7368 isl_multi_aff *ma;
7369 int row;
7371 row = isl_mat_rows(node->sched) - 1;
7372 ma = node_extract_partial_schedule_multi_aff(node, row, 1);
7373 return isl_map_from_multi_aff(ma);
7376 /* Is the conditional validity dependence in the edge with index "edge_index"
7377 * violated by the latest (i.e., final) row of the schedule?
7378 * That is, is i scheduled after j
7379 * for any conditional validity dependence i -> j?
7381 static int is_violated(struct isl_sched_graph *graph, int edge_index)
7383 isl_map *src_sched, *dst_sched, *map;
7384 struct isl_sched_edge *edge = &graph->edge[edge_index];
7385 int empty;
7387 src_sched = final_row(edge->src);
7388 dst_sched = final_row(edge->dst);
7389 map = isl_map_copy(edge->map);
7390 map = isl_map_apply_domain(map, src_sched);
7391 map = isl_map_apply_range(map, dst_sched);
7392 map = isl_map_order_gt(map, isl_dim_in, 0, isl_dim_out, 0);
7393 empty = isl_map_is_empty(map);
7394 isl_map_free(map);
7396 if (empty < 0)
7397 return -1;
7399 return !empty;
7402 /* Does "graph" have any satisfied condition edges that
7403 * are adjacent to the conditional validity constraint with
7404 * domain "conditional_source" and range "conditional_sink"?
7406 * A satisfied condition is one that is not local.
7407 * If a condition was forced to be local already (i.e., marked as local)
7408 * then there is no need to check if it is in fact local.
7410 * Additionally, mark all adjacent condition edges found as local.
7412 static int has_adjacent_true_conditions(struct isl_sched_graph *graph,
7413 __isl_keep isl_union_set *conditional_source,
7414 __isl_keep isl_union_set *conditional_sink)
7416 int i;
7417 int any = 0;
7419 for (i = 0; i < graph->n_edge; ++i) {
7420 int adjacent, local;
7421 isl_union_map *condition;
7423 if (!is_condition(&graph->edge[i]))
7424 continue;
7425 if (is_local(&graph->edge[i]))
7426 continue;
7428 condition = graph->edge[i].tagged_condition;
7429 adjacent = domain_intersects(condition, conditional_sink);
7430 if (adjacent >= 0 && !adjacent)
7431 adjacent = range_intersects(condition,
7432 conditional_source);
7433 if (adjacent < 0)
7434 return -1;
7435 if (!adjacent)
7436 continue;
7438 set_local(&graph->edge[i]);
7440 local = is_condition_false(graph, &graph->edge[i]);
7441 if (local < 0)
7442 return -1;
7443 if (!local)
7444 any = 1;
7447 return any;
7450 /* Are there any violated conditional validity dependences with
7451 * adjacent condition dependences that are not local with respect
7452 * to the current schedule?
7453 * That is, is the conditional validity constraint violated?
7455 * Additionally, mark all those adjacent condition dependences as local.
7456 * We also mark those adjacent condition dependences that were not marked
7457 * as local before, but just happened to be local already. This ensures
7458 * that they remain local if the schedule is recomputed.
7460 * We first collect domain and range of all violated conditional validity
7461 * dependences and then check if there are any adjacent non-local
7462 * condition dependences.
7464 static int has_violated_conditional_constraint(isl_ctx *ctx,
7465 struct isl_sched_graph *graph)
7467 int i;
7468 int any = 0;
7469 isl_union_set *source, *sink;
7471 source = isl_union_set_empty(isl_space_params_alloc(ctx, 0));
7472 sink = isl_union_set_empty(isl_space_params_alloc(ctx, 0));
7473 for (i = 0; i < graph->n_edge; ++i) {
7474 isl_union_set *uset;
7475 isl_union_map *umap;
7476 int violated;
7478 if (!is_conditional_validity(&graph->edge[i]))
7479 continue;
7481 violated = is_violated(graph, i);
7482 if (violated < 0)
7483 goto error;
7484 if (!violated)
7485 continue;
7487 any = 1;
7489 umap = isl_union_map_copy(graph->edge[i].tagged_validity);
7490 uset = isl_union_map_domain(umap);
7491 source = isl_union_set_union(source, uset);
7492 source = isl_union_set_coalesce(source);
7494 umap = isl_union_map_copy(graph->edge[i].tagged_validity);
7495 uset = isl_union_map_range(umap);
7496 sink = isl_union_set_union(sink, uset);
7497 sink = isl_union_set_coalesce(sink);
7500 if (any)
7501 any = has_adjacent_true_conditions(graph, source, sink);
7503 isl_union_set_free(source);
7504 isl_union_set_free(sink);
7505 return any;
7506 error:
7507 isl_union_set_free(source);
7508 isl_union_set_free(sink);
7509 return -1;
7512 /* Examine the current band (the rows between graph->band_start and
7513 * graph->n_total_row), deciding whether to drop it or add it to "node"
7514 * and then continue with the computation of the next band, if any.
7515 * If "initialized" is set, then it may be assumed that compute_maxvar
7516 * has been called on the current band. Otherwise, call
7517 * compute_maxvar if and before carry_dependences gets called.
7519 * The caller keeps looking for a new row as long as
7520 * graph->n_row < graph->maxvar. If the latest attempt to find
7521 * such a row failed (i.e., we still have graph->n_row < graph->maxvar),
7522 * then we either
7523 * - split between SCCs and start over (assuming we found an interesting
7524 * pair of SCCs between which to split)
7525 * - continue with the next band (assuming the current band has at least
7526 * one row)
7527 * - if there is more than one SCC left, then split along all SCCs
7528 * - if outer coincidence needs to be enforced, then try to carry as many
7529 * validity or coincidence dependences as possible and
7530 * continue with the next band
7531 * - try to carry as many validity dependences as possible and
7532 * continue with the next band
7533 * In each case, we first insert a band node in the schedule tree
7534 * if any rows have been computed.
7536 * If the caller managed to complete the schedule and the current band
7537 * is empty, then finish off by topologically
7538 * sorting the statements based on the remaining dependences.
7539 * If, on the other hand, the current band has at least one row,
7540 * then continue with the next band. Note that this next band
7541 * will necessarily be empty, but the graph may still be split up
7542 * into weakly connected components before arriving back here.
7544 static __isl_give isl_schedule_node *compute_schedule_finish_band(
7545 __isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
7546 int initialized)
7548 int empty;
7550 if (!node)
7551 return NULL;
7553 empty = graph->n_total_row == graph->band_start;
7554 if (graph->n_row < graph->maxvar) {
7555 isl_ctx *ctx;
7557 ctx = isl_schedule_node_get_ctx(node);
7558 if (!ctx->opt->schedule_maximize_band_depth && !empty)
7559 return compute_next_band(node, graph, 1);
7560 if (graph->src_scc >= 0)
7561 return compute_split_schedule(node, graph);
7562 if (!empty)
7563 return compute_next_band(node, graph, 1);
7564 if (graph->scc > 1)
7565 return compute_component_schedule(node, graph, 1);
7566 if (!initialized && compute_maxvar(graph) < 0)
7567 return isl_schedule_node_free(node);
7568 if (isl_options_get_schedule_outer_coincidence(ctx))
7569 return carry_coincidence(node, graph);
7570 return carry_dependences(node, graph);
7573 if (!empty)
7574 return compute_next_band(node, graph, 1);
7575 return sort_statements(node, graph, initialized);
7578 /* Construct a band of schedule rows for a connected dependence graph.
7579 * The caller is responsible for determining the strongly connected
7580 * components and calling compute_maxvar first.
7582 * We try to find a sequence of as many schedule rows as possible that result
7583 * in non-negative dependence distances (independent of the previous rows
7584 * in the sequence, i.e., such that the sequence is tilable), with as
7585 * many of the initial rows as possible satisfying the coincidence constraints.
7586 * The computation stops if we can't find any more rows or if we have found
7587 * all the rows we wanted to find.
7589 * If ctx->opt->schedule_outer_coincidence is set, then we force the
7590 * outermost dimension to satisfy the coincidence constraints. If this
7591 * turns out to be impossible, we fall back on the general scheme above
7592 * and try to carry as many dependences as possible.
7594 * If "graph" contains both condition and conditional validity dependences,
7595 * then we need to check that that the conditional schedule constraint
7596 * is satisfied, i.e., there are no violated conditional validity dependences
7597 * that are adjacent to any non-local condition dependences.
7598 * If there are, then we mark all those adjacent condition dependences
7599 * as local and recompute the current band. Those dependences that
7600 * are marked local will then be forced to be local.
7601 * The initial computation is performed with no dependences marked as local.
7602 * If we are lucky, then there will be no violated conditional validity
7603 * dependences adjacent to any non-local condition dependences.
7604 * Otherwise, we mark some additional condition dependences as local and
7605 * recompute. We continue this process until there are no violations left or
7606 * until we are no longer able to compute a schedule.
7607 * Since there are only a finite number of dependences,
7608 * there will only be a finite number of iterations.
7610 static isl_stat compute_schedule_wcc_band(isl_ctx *ctx,
7611 struct isl_sched_graph *graph)
7613 int has_coincidence;
7614 int use_coincidence;
7615 int force_coincidence = 0;
7616 int check_conditional;
7618 if (sort_sccs(graph) < 0)
7619 return isl_stat_error;
7621 clear_local_edges(graph);
7622 check_conditional = need_condition_check(graph);
7623 has_coincidence = has_any_coincidence(graph);
7625 if (ctx->opt->schedule_outer_coincidence)
7626 force_coincidence = 1;
7628 use_coincidence = has_coincidence;
7629 while (graph->n_row < graph->maxvar) {
7630 isl_vec *sol;
7631 int violated;
7632 int coincident;
7634 graph->src_scc = -1;
7635 graph->dst_scc = -1;
7637 if (setup_lp(ctx, graph, use_coincidence) < 0)
7638 return isl_stat_error;
7639 sol = solve_lp(ctx, graph);
7640 if (!sol)
7641 return isl_stat_error;
7642 if (sol->size == 0) {
7643 int empty = graph->n_total_row == graph->band_start;
7645 isl_vec_free(sol);
7646 if (use_coincidence && (!force_coincidence || !empty)) {
7647 use_coincidence = 0;
7648 continue;
7650 return isl_stat_ok;
7652 coincident = !has_coincidence || use_coincidence;
7653 if (update_schedule(graph, sol, coincident) < 0)
7654 return isl_stat_error;
7656 if (!check_conditional)
7657 continue;
7658 violated = has_violated_conditional_constraint(ctx, graph);
7659 if (violated < 0)
7660 return isl_stat_error;
7661 if (!violated)
7662 continue;
7663 if (reset_band(graph) < 0)
7664 return isl_stat_error;
7665 use_coincidence = has_coincidence;
7668 return isl_stat_ok;
7671 /* Compute a schedule for a connected dependence graph by considering
7672 * the graph as a whole and return the updated schedule node.
7674 * The actual schedule rows of the current band are computed by
7675 * compute_schedule_wcc_band. compute_schedule_finish_band takes
7676 * care of integrating the band into "node" and continuing
7677 * the computation.
7679 static __isl_give isl_schedule_node *compute_schedule_wcc_whole(
7680 __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
7682 isl_ctx *ctx;
7684 if (!node)
7685 return NULL;
7687 ctx = isl_schedule_node_get_ctx(node);
7688 if (compute_schedule_wcc_band(ctx, graph) < 0)
7689 return isl_schedule_node_free(node);
7691 return compute_schedule_finish_band(node, graph, 1);
7694 /* Clustering information used by compute_schedule_wcc_clustering.
7696 * "n" is the number of SCCs in the original dependence graph
7697 * "scc" is an array of "n" elements, each representing an SCC
7698 * of the original dependence graph. All entries in the same cluster
7699 * have the same number of schedule rows.
7700 * "scc_cluster" maps each SCC index to the cluster to which it belongs,
7701 * where each cluster is represented by the index of the first SCC
7702 * in the cluster. Initially, each SCC belongs to a cluster containing
7703 * only that SCC.
7705 * "scc_in_merge" is used by merge_clusters_along_edge to keep
7706 * track of which SCCs need to be merged.
7708 * "cluster" contains the merged clusters of SCCs after the clustering
7709 * has completed.
7711 * "scc_node" is a temporary data structure used inside copy_partial.
7712 * For each SCC, it keeps track of the number of nodes in the SCC
7713 * that have already been copied.
7715 struct isl_clustering {
7716 int n;
7717 struct isl_sched_graph *scc;
7718 struct isl_sched_graph *cluster;
7719 int *scc_cluster;
7720 int *scc_node;
7721 int *scc_in_merge;
7724 /* Initialize the clustering data structure "c" from "graph".
7726 * In particular, allocate memory, extract the SCCs from "graph"
7727 * into c->scc, initialize scc_cluster and construct
7728 * a band of schedule rows for each SCC.
7729 * Within each SCC, there is only one SCC by definition.
7730 * Each SCC initially belongs to a cluster containing only that SCC.
7732 static isl_stat clustering_init(isl_ctx *ctx, struct isl_clustering *c,
7733 struct isl_sched_graph *graph)
7735 int i;
7737 c->n = graph->scc;
7738 c->scc = isl_calloc_array(ctx, struct isl_sched_graph, c->n);
7739 c->cluster = isl_calloc_array(ctx, struct isl_sched_graph, c->n);
7740 c->scc_cluster = isl_calloc_array(ctx, int, c->n);
7741 c->scc_node = isl_calloc_array(ctx, int, c->n);
7742 c->scc_in_merge = isl_calloc_array(ctx, int, c->n);
7743 if (!c->scc || !c->cluster ||
7744 !c->scc_cluster || !c->scc_node || !c->scc_in_merge)
7745 return isl_stat_error;
7747 for (i = 0; i < c->n; ++i) {
7748 if (extract_sub_graph(ctx, graph, &node_scc_exactly,
7749 &edge_scc_exactly, i, &c->scc[i]) < 0)
7750 return isl_stat_error;
7751 c->scc[i].scc = 1;
7752 if (compute_maxvar(&c->scc[i]) < 0)
7753 return isl_stat_error;
7754 if (compute_schedule_wcc_band(ctx, &c->scc[i]) < 0)
7755 return isl_stat_error;
7756 c->scc_cluster[i] = i;
7759 return isl_stat_ok;
7762 /* Free all memory allocated for "c".
7764 static void clustering_free(isl_ctx *ctx, struct isl_clustering *c)
7766 int i;
7768 if (c->scc)
7769 for (i = 0; i < c->n; ++i)
7770 graph_free(ctx, &c->scc[i]);
7771 free(c->scc);
7772 if (c->cluster)
7773 for (i = 0; i < c->n; ++i)
7774 graph_free(ctx, &c->cluster[i]);
7775 free(c->cluster);
7776 free(c->scc_cluster);
7777 free(c->scc_node);
7778 free(c->scc_in_merge);
7781 /* Should we refrain from merging the cluster in "graph" with
7782 * any other cluster?
7783 * In particular, is its current schedule band empty and incomplete.
7785 static int bad_cluster(struct isl_sched_graph *graph)
7787 return graph->n_row < graph->maxvar &&
7788 graph->n_total_row == graph->band_start;
7791 /* Is "edge" a proximity edge with a non-empty dependence relation?
7793 static isl_bool is_non_empty_proximity(struct isl_sched_edge *edge)
7795 if (!is_proximity(edge))
7796 return isl_bool_false;
7797 return isl_bool_not(isl_map_plain_is_empty(edge->map));
7800 /* Return the index of an edge in "graph" that can be used to merge
7801 * two clusters in "c".
7802 * Return graph->n_edge if no such edge can be found.
7803 * Return -1 on error.
7805 * In particular, return a proximity edge between two clusters
7806 * that is not marked "no_merge" and such that neither of the
7807 * two clusters has an incomplete, empty band.
7809 * If there are multiple such edges, then try and find the most
7810 * appropriate edge to use for merging. In particular, pick the edge
7811 * with the greatest weight. If there are multiple of those,
7812 * then pick one with the shortest distance between
7813 * the two cluster representatives.
7815 static int find_proximity(struct isl_sched_graph *graph,
7816 struct isl_clustering *c)
7818 int i, best = graph->n_edge, best_dist, best_weight;
7820 for (i = 0; i < graph->n_edge; ++i) {
7821 struct isl_sched_edge *edge = &graph->edge[i];
7822 int dist, weight;
7823 isl_bool prox;
7825 prox = is_non_empty_proximity(edge);
7826 if (prox < 0)
7827 return -1;
7828 if (!prox)
7829 continue;
7830 if (edge->no_merge)
7831 continue;
7832 if (bad_cluster(&c->scc[edge->src->scc]) ||
7833 bad_cluster(&c->scc[edge->dst->scc]))
7834 continue;
7835 dist = c->scc_cluster[edge->dst->scc] -
7836 c->scc_cluster[edge->src->scc];
7837 if (dist == 0)
7838 continue;
7839 weight = edge->weight;
7840 if (best < graph->n_edge) {
7841 if (best_weight > weight)
7842 continue;
7843 if (best_weight == weight && best_dist <= dist)
7844 continue;
7846 best = i;
7847 best_dist = dist;
7848 best_weight = weight;
7851 return best;
7854 /* Internal data structure used in mark_merge_sccs.
7856 * "graph" is the dependence graph in which a strongly connected
7857 * component is constructed.
7858 * "scc_cluster" maps each SCC index to the cluster to which it belongs.
7859 * "src" and "dst" are the indices of the nodes that are being merged.
7861 struct isl_mark_merge_sccs_data {
7862 struct isl_sched_graph *graph;
7863 int *scc_cluster;
7864 int src;
7865 int dst;
7868 /* Check whether the cluster containing node "i" depends on the cluster
7869 * containing node "j". If "i" and "j" belong to the same cluster,
7870 * then they are taken to depend on each other to ensure that
7871 * the resulting strongly connected component consists of complete
7872 * clusters. Furthermore, if "i" and "j" are the two nodes that
7873 * are being merged, then they are taken to depend on each other as well.
7874 * Otherwise, check if there is a (conditional) validity dependence
7875 * from node[j] to node[i], forcing node[i] to follow node[j].
7877 static isl_bool cluster_follows(int i, int j, void *user)
7879 struct isl_mark_merge_sccs_data *data = user;
7880 struct isl_sched_graph *graph = data->graph;
7881 int *scc_cluster = data->scc_cluster;
7883 if (data->src == i && data->dst == j)
7884 return isl_bool_true;
7885 if (data->src == j && data->dst == i)
7886 return isl_bool_true;
7887 if (scc_cluster[graph->node[i].scc] == scc_cluster[graph->node[j].scc])
7888 return isl_bool_true;
7890 return graph_has_validity_edge(graph, &graph->node[j], &graph->node[i]);
7893 /* Mark all SCCs that belong to either of the two clusters in "c"
7894 * connected by the edge in "graph" with index "edge", or to any
7895 * of the intermediate clusters.
7896 * The marking is recorded in c->scc_in_merge.
7898 * The given edge has been selected for merging two clusters,
7899 * meaning that there is at least a proximity edge between the two nodes.
7900 * However, there may also be (indirect) validity dependences
7901 * between the two nodes. When merging the two clusters, all clusters
7902 * containing one or more of the intermediate nodes along the
7903 * indirect validity dependences need to be merged in as well.
7905 * First collect all such nodes by computing the strongly connected
7906 * component (SCC) containing the two nodes connected by the edge, where
7907 * the two nodes are considered to depend on each other to make
7908 * sure they end up in the same SCC. Similarly, each node is considered
7909 * to depend on every other node in the same cluster to ensure
7910 * that the SCC consists of complete clusters.
7912 * Then the original SCCs that contain any of these nodes are marked
7913 * in c->scc_in_merge.
7915 static isl_stat mark_merge_sccs(isl_ctx *ctx, struct isl_sched_graph *graph,
7916 int edge, struct isl_clustering *c)
7918 struct isl_mark_merge_sccs_data data;
7919 struct isl_tarjan_graph *g;
7920 int i;
7922 for (i = 0; i < c->n; ++i)
7923 c->scc_in_merge[i] = 0;
7925 data.graph = graph;
7926 data.scc_cluster = c->scc_cluster;
7927 data.src = graph->edge[edge].src - graph->node;
7928 data.dst = graph->edge[edge].dst - graph->node;
7930 g = isl_tarjan_graph_component(ctx, graph->n, data.dst,
7931 &cluster_follows, &data);
7932 if (!g)
7933 goto error;
7935 i = g->op;
7936 if (i < 3)
7937 isl_die(ctx, isl_error_internal,
7938 "expecting at least two nodes in component",
7939 goto error);
7940 if (g->order[--i] != -1)
7941 isl_die(ctx, isl_error_internal,
7942 "expecting end of component marker", goto error);
7944 for (--i; i >= 0 && g->order[i] != -1; --i) {
7945 int scc = graph->node[g->order[i]].scc;
7946 c->scc_in_merge[scc] = 1;
7949 isl_tarjan_graph_free(g);
7950 return isl_stat_ok;
7951 error:
7952 isl_tarjan_graph_free(g);
7953 return isl_stat_error;
7956 /* Construct the identifier "cluster_i".
7958 static __isl_give isl_id *cluster_id(isl_ctx *ctx, int i)
7960 char name[40];
7962 snprintf(name, sizeof(name), "cluster_%d", i);
7963 return isl_id_alloc(ctx, name, NULL);
7966 /* Construct the space of the cluster with index "i" containing
7967 * the strongly connected component "scc".
7969 * In particular, construct a space called cluster_i with dimension equal
7970 * to the number of schedule rows in the current band of "scc".
7972 static __isl_give isl_space *cluster_space(struct isl_sched_graph *scc, int i)
7974 int nvar;
7975 isl_space *space;
7976 isl_id *id;
7978 nvar = scc->n_total_row - scc->band_start;
7979 space = isl_space_copy(scc->node[0].space);
7980 space = isl_space_params(space);
7981 space = isl_space_set_from_params(space);
7982 space = isl_space_add_dims(space, isl_dim_set, nvar);
7983 id = cluster_id(isl_space_get_ctx(space), i);
7984 space = isl_space_set_tuple_id(space, isl_dim_set, id);
7986 return space;
7989 /* Collect the domain of the graph for merging clusters.
7991 * In particular, for each cluster with first SCC "i", construct
7992 * a set in the space called cluster_i with dimension equal
7993 * to the number of schedule rows in the current band of the cluster.
7995 static __isl_give isl_union_set *collect_domain(isl_ctx *ctx,
7996 struct isl_sched_graph *graph, struct isl_clustering *c)
7998 int i;
7999 isl_space *space;
8000 isl_union_set *domain;
8002 space = isl_space_params_alloc(ctx, 0);
8003 domain = isl_union_set_empty(space);
8005 for (i = 0; i < graph->scc; ++i) {
8006 isl_space *space;
8008 if (!c->scc_in_merge[i])
8009 continue;
8010 if (c->scc_cluster[i] != i)
8011 continue;
8012 space = cluster_space(&c->scc[i], i);
8013 domain = isl_union_set_add_set(domain, isl_set_universe(space));
8016 return domain;
8019 /* Construct a map from the original instances to the corresponding
8020 * cluster instance in the current bands of the clusters in "c".
8022 static __isl_give isl_union_map *collect_cluster_map(isl_ctx *ctx,
8023 struct isl_sched_graph *graph, struct isl_clustering *c)
8025 int i, j;
8026 isl_space *space;
8027 isl_union_map *cluster_map;
8029 space = isl_space_params_alloc(ctx, 0);
8030 cluster_map = isl_union_map_empty(space);
8031 for (i = 0; i < graph->scc; ++i) {
8032 int start, n;
8033 isl_id *id;
8035 if (!c->scc_in_merge[i])
8036 continue;
8038 id = cluster_id(ctx, c->scc_cluster[i]);
8039 start = c->scc[i].band_start;
8040 n = c->scc[i].n_total_row - start;
8041 for (j = 0; j < c->scc[i].n; ++j) {
8042 isl_multi_aff *ma;
8043 isl_map *map;
8044 struct isl_sched_node *node = &c->scc[i].node[j];
8046 ma = node_extract_partial_schedule_multi_aff(node,
8047 start, n);
8048 ma = isl_multi_aff_set_tuple_id(ma, isl_dim_out,
8049 isl_id_copy(id));
8050 map = isl_map_from_multi_aff(ma);
8051 cluster_map = isl_union_map_add_map(cluster_map, map);
8053 isl_id_free(id);
8056 return cluster_map;
8059 /* Add "umap" to the schedule constraints "sc" of all types of "edge"
8060 * that are not isl_edge_condition or isl_edge_conditional_validity.
8062 static __isl_give isl_schedule_constraints *add_non_conditional_constraints(
8063 struct isl_sched_edge *edge, __isl_keep isl_union_map *umap,
8064 __isl_take isl_schedule_constraints *sc)
8066 enum isl_edge_type t;
8068 if (!sc)
8069 return NULL;
8071 for (t = isl_edge_first; t <= isl_edge_last_sc; ++t) {
8072 if (t == isl_edge_condition ||
8073 t == isl_edge_conditional_validity)
8074 continue;
8075 if (!is_type(edge, t))
8076 continue;
8077 sc = isl_schedule_constraints_add(sc, t,
8078 isl_union_map_copy(umap));
8081 return sc;
8084 /* Add schedule constraints of types isl_edge_condition and
8085 * isl_edge_conditional_validity to "sc" by applying "umap" to
8086 * the domains of the wrapped relations in domain and range
8087 * of the corresponding tagged constraints of "edge".
8089 static __isl_give isl_schedule_constraints *add_conditional_constraints(
8090 struct isl_sched_edge *edge, __isl_keep isl_union_map *umap,
8091 __isl_take isl_schedule_constraints *sc)
8093 enum isl_edge_type t;
8094 isl_union_map *tagged;
8096 for (t = isl_edge_condition; t <= isl_edge_conditional_validity; ++t) {
8097 if (!is_type(edge, t))
8098 continue;
8099 if (t == isl_edge_condition)
8100 tagged = isl_union_map_copy(edge->tagged_condition);
8101 else
8102 tagged = isl_union_map_copy(edge->tagged_validity);
8103 tagged = isl_union_map_zip(tagged);
8104 tagged = isl_union_map_apply_domain(tagged,
8105 isl_union_map_copy(umap));
8106 tagged = isl_union_map_zip(tagged);
8107 sc = isl_schedule_constraints_add(sc, t, tagged);
8108 if (!sc)
8109 return NULL;
8112 return sc;
8115 /* Given a mapping "cluster_map" from the original instances to
8116 * the cluster instances, add schedule constraints on the clusters
8117 * to "sc" corresponding to the original constraints represented by "edge".
8119 * For non-tagged dependence constraints, the cluster constraints
8120 * are obtained by applying "cluster_map" to the edge->map.
8122 * For tagged dependence constraints, "cluster_map" needs to be applied
8123 * to the domains of the wrapped relations in domain and range
8124 * of the tagged dependence constraints. Pick out the mappings
8125 * from these domains from "cluster_map" and construct their product.
8126 * This mapping can then be applied to the pair of domains.
8128 static __isl_give isl_schedule_constraints *collect_edge_constraints(
8129 struct isl_sched_edge *edge, __isl_keep isl_union_map *cluster_map,
8130 __isl_take isl_schedule_constraints *sc)
8132 isl_union_map *umap;
8133 isl_space *space;
8134 isl_union_set *uset;
8135 isl_union_map *umap1, *umap2;
8137 if (!sc)
8138 return NULL;
8140 umap = isl_union_map_from_map(isl_map_copy(edge->map));
8141 umap = isl_union_map_apply_domain(umap,
8142 isl_union_map_copy(cluster_map));
8143 umap = isl_union_map_apply_range(umap,
8144 isl_union_map_copy(cluster_map));
8145 sc = add_non_conditional_constraints(edge, umap, sc);
8146 isl_union_map_free(umap);
8148 if (!sc || (!is_condition(edge) && !is_conditional_validity(edge)))
8149 return sc;
8151 space = isl_space_domain(isl_map_get_space(edge->map));
8152 uset = isl_union_set_from_set(isl_set_universe(space));
8153 umap1 = isl_union_map_copy(cluster_map);
8154 umap1 = isl_union_map_intersect_domain(umap1, uset);
8155 space = isl_space_range(isl_map_get_space(edge->map));
8156 uset = isl_union_set_from_set(isl_set_universe(space));
8157 umap2 = isl_union_map_copy(cluster_map);
8158 umap2 = isl_union_map_intersect_domain(umap2, uset);
8159 umap = isl_union_map_product(umap1, umap2);
8161 sc = add_conditional_constraints(edge, umap, sc);
8163 isl_union_map_free(umap);
8164 return sc;
8167 /* How many of the outer dimensions of the band of size "dim" starting
8168 * at "start" can be freely combined without destroying
8169 * the schedule rows that correspond to the inner part
8170 * of any of the intra-statement consecutivity constraints of "node".
8172 * If the first such row belongs to an outer band,
8173 * then all schedule rows need to be preserved and
8174 * the number of rows that can be freely combined is zero.
8175 * Otherwise, it's the minimal number of rows that lie
8176 * outside the first row corresponding to an inner part.
8177 * Only take into account intra-statement consecutivity constraints
8178 * that are still active.
8180 static int n_outer_in_band(struct isl_sched_node *node, int start, int dim)
8182 struct isl_sched_intra *intra;
8183 int n_outer;
8185 n_outer = dim;
8186 for (intra = node->intra; intra; intra = intra->next) {
8187 if (intra->n_fixed < 0)
8188 continue;
8189 if (intra->n_fixed == 0)
8190 continue;
8191 if (intra->first_fixed <= start)
8192 return 0;
8193 else if (n_outer > intra->first_fixed - start)
8194 n_outer = intra->first_fixed - start;
8197 return n_outer;
8200 /* Construct the space for an intra-statement consecutivity constraint
8201 * on domain "space" with "n_outer" outer expressions and
8202 * "n_inner" inner expressions.
8204 * The returned space is of the form
8206 * space -> [outer -> inner]
8208 static __isl_give isl_space *intra_space(__isl_take isl_space *space,
8209 int n_outer, int n_inner)
8211 isl_space *space2;
8213 space = isl_space_from_domain(space);
8214 space2 = isl_space_copy(space);
8215 space = isl_space_add_dims(space, isl_dim_out, n_outer);
8216 space2 = isl_space_add_dims(space2, isl_dim_out, n_inner);
8217 space = isl_space_range_product(space, space2);
8219 return space;
8222 /* Add an intra-statement consecutivity constraint on the cluster
8223 * with index "cluster" to "sc" that preserves the schedule rows of "node"
8224 * that correspond to inner parts of the original intra-statement
8225 * consecutivity constraints.
8227 * The constraint is an identity affine expression with the inner part
8228 * covering all the rows in the schedule that correspond to
8229 * inner parts of the original intra-statement consecutivity constraints.
8230 * This ensures that those inner parts are not modified and remain innermost
8231 * in the band.
8232 * If the inner part would be empty (this includes the case where there are no
8233 * original intra-statement consecutivity constraints), then
8234 * no constraint needs to be added.
8236 static __isl_give isl_schedule_constraints *collect_intra_consecutivity(
8237 struct isl_sched_node *node, __isl_take isl_schedule_constraints *sc,
8238 int cluster, int start, int dim)
8240 int n_outer;
8241 isl_ctx *ctx;
8242 isl_id *id;
8243 isl_space *space;
8244 isl_multi_aff *ma;
8245 isl_multi_aff_list *list;
8247 n_outer = n_outer_in_band(node, start, dim);
8248 if (n_outer == dim)
8249 return sc;
8251 ctx = isl_schedule_constraints_get_ctx(sc);
8252 id = cluster_id(ctx, cluster);
8253 space = isl_space_set_alloc(ctx, 0, dim);
8254 space = isl_space_set_tuple_id(space, isl_dim_set, id);
8255 space = intra_space(space, n_outer, dim - n_outer);
8256 ma = isl_multi_aff_identity(space);
8258 list = isl_schedule_constraints_get_intra_consecutivity(sc);
8259 list = isl_multi_aff_list_add(list, ma);
8260 sc = isl_schedule_constraints_set_intra_consecutivity(sc, list);
8262 return sc;
8265 /* Given a mapping "cluster_map" from the original instances to
8266 * the cluster instances, add schedule constraints on the clusters
8267 * to "sc" corresponding to all edges in "graph" between nodes that
8268 * belong to SCCs that are marked for merging in "c".
8269 * Also add intra-statement consecutivity constraints
8270 * that preserve the schedule rows that correspond
8271 * to the inner parts of intra-statement consecutivity constraints
8272 * on the original graph.
8273 * Any inter-statement consecutivity constraint forces the two
8274 * corresponding statements to be part of the same cluster.
8275 * There are therefore no cross-cluster inter-statement consecutivity
8276 * constraints.
8278 static __isl_give isl_schedule_constraints *collect_constraints(
8279 struct isl_sched_graph *graph, struct isl_clustering *c,
8280 __isl_keep isl_union_map *cluster_map,
8281 __isl_take isl_schedule_constraints *sc)
8283 int i;
8285 for (i = 0; i < graph->n_edge; ++i) {
8286 struct isl_sched_edge *edge = &graph->edge[i];
8288 if (!c->scc_in_merge[edge->src->scc])
8289 continue;
8290 if (!c->scc_in_merge[edge->dst->scc])
8291 continue;
8292 sc = collect_edge_constraints(edge, cluster_map, sc);
8295 for (i = 0; i < graph->n; ++i) {
8296 struct isl_sched_node *node = &graph->node[i];
8297 struct isl_sched_graph *scc;
8298 int start, dim, cluster;
8300 if (!c->scc_in_merge[node->scc])
8301 continue;
8302 cluster = c->scc_cluster[node->scc];
8303 scc = &c->scc[node->scc];
8304 start = scc->band_start;
8305 dim = scc->n_total_row - start;
8306 sc = collect_intra_consecutivity(node, sc, cluster, start, dim);
8309 return sc;
8312 /* Construct a dependence graph for scheduling clusters with respect
8313 * to each other and store the result in "merge_graph".
8314 * In particular, the nodes of the graph correspond to the schedule
8315 * dimensions of the current bands of those clusters that have been
8316 * marked for merging in "c".
8318 * First construct an isl_schedule_constraints object for this domain
8319 * by transforming the edges in "graph" to the domain.
8320 * Then initialize a dependence graph for scheduling from these
8321 * constraints.
8323 static isl_stat init_merge_graph(isl_ctx *ctx, struct isl_sched_graph *graph,
8324 struct isl_clustering *c, struct isl_sched_graph *merge_graph)
8326 isl_union_set *domain;
8327 isl_union_map *cluster_map;
8328 isl_schedule_constraints *sc;
8329 isl_stat r;
8331 domain = collect_domain(ctx, graph, c);
8332 sc = isl_schedule_constraints_on_domain(domain);
8333 if (!sc)
8334 return isl_stat_error;
8335 cluster_map = collect_cluster_map(ctx, graph, c);
8336 sc = collect_constraints(graph, c, cluster_map, sc);
8337 isl_union_map_free(cluster_map);
8339 r = graph_init(merge_graph, sc);
8341 isl_schedule_constraints_free(sc);
8343 return r;
8346 /* Compute the maximal number of remaining schedule rows that still need
8347 * to be computed for the nodes that belong to clusters with the maximal
8348 * dimension for the current band (i.e., the band that is to be merged).
8349 * Only clusters that are about to be merged are considered.
8350 * "maxvar" is the maximal dimension for the current band.
8351 * "c" contains information about the clusters.
8353 * Return the maximal number of remaining schedule rows or -1 on error.
8355 static int compute_maxvar_max_slack(int maxvar, struct isl_clustering *c)
8357 int i, j;
8358 int max_slack;
8360 max_slack = 0;
8361 for (i = 0; i < c->n; ++i) {
8362 int nvar;
8363 struct isl_sched_graph *scc;
8365 if (!c->scc_in_merge[i])
8366 continue;
8367 scc = &c->scc[i];
8368 nvar = scc->n_total_row - scc->band_start;
8369 if (nvar != maxvar)
8370 continue;
8371 for (j = 0; j < scc->n; ++j) {
8372 struct isl_sched_node *node = &scc->node[j];
8373 int slack;
8375 if (node_update_vmap(node) < 0)
8376 return -1;
8377 slack = node->nvar - node->rank;
8378 if (slack > max_slack)
8379 max_slack = slack;
8383 return max_slack;
8386 /* If there are any clusters where the dimension of the current band
8387 * (i.e., the band that is to be merged) is smaller than "maxvar" and
8388 * if there are any nodes in such a cluster where the number
8389 * of remaining schedule rows that still need to be computed
8390 * is greater than "max_slack", then return the smallest current band
8391 * dimension of all these clusters. Otherwise return the original value
8392 * of "maxvar". Return -1 in case of any error.
8393 * Only clusters that are about to be merged are considered.
8394 * "c" contains information about the clusters.
8396 static int limit_maxvar_to_slack(int maxvar, int max_slack,
8397 struct isl_clustering *c)
8399 int i, j;
8401 for (i = 0; i < c->n; ++i) {
8402 int nvar;
8403 struct isl_sched_graph *scc;
8405 if (!c->scc_in_merge[i])
8406 continue;
8407 scc = &c->scc[i];
8408 nvar = scc->n_total_row - scc->band_start;
8409 if (nvar >= maxvar)
8410 continue;
8411 for (j = 0; j < scc->n; ++j) {
8412 struct isl_sched_node *node = &scc->node[j];
8413 int slack;
8415 if (node_update_vmap(node) < 0)
8416 return -1;
8417 slack = node->nvar - node->rank;
8418 if (slack > max_slack) {
8419 maxvar = nvar;
8420 break;
8425 return maxvar;
8428 /* Adjust merge_graph->maxvar based on the number of remaining schedule rows
8429 * that still need to be computed. In particular, if there is a node
8430 * in a cluster where the dimension of the current band is smaller
8431 * than merge_graph->maxvar, but the number of remaining schedule rows
8432 * is greater than that of any node in a cluster with the maximal
8433 * dimension for the current band (i.e., merge_graph->maxvar),
8434 * then adjust merge_graph->maxvar to the (smallest) current band dimension
8435 * of those clusters. Without this adjustment, the total number of
8436 * schedule dimensions would be increased, resulting in a skewed view
8437 * of the number of coincident dimensions.
8438 * "c" contains information about the clusters.
8440 * If the maximize_band_depth option is set and merge_graph->maxvar is reduced,
8441 * then there is no point in attempting any merge since it will be rejected
8442 * anyway. Set merge_graph->maxvar to zero in such cases.
8444 static isl_stat adjust_maxvar_to_slack(isl_ctx *ctx,
8445 struct isl_sched_graph *merge_graph, struct isl_clustering *c)
8447 int max_slack, maxvar;
8449 max_slack = compute_maxvar_max_slack(merge_graph->maxvar, c);
8450 if (max_slack < 0)
8451 return isl_stat_error;
8452 maxvar = limit_maxvar_to_slack(merge_graph->maxvar, max_slack, c);
8453 if (maxvar < 0)
8454 return isl_stat_error;
8456 if (maxvar < merge_graph->maxvar) {
8457 if (isl_options_get_schedule_maximize_band_depth(ctx))
8458 merge_graph->maxvar = 0;
8459 else
8460 merge_graph->maxvar = maxvar;
8463 return isl_stat_ok;
8466 /* Return the number of coincident dimensions in the current band of "graph",
8467 * where the nodes of "graph" are assumed to be scheduled by a single band.
8469 static int get_n_coincident(struct isl_sched_graph *graph)
8471 int i;
8473 for (i = graph->band_start; i < graph->n_total_row; ++i)
8474 if (!graph->node[0].coincident[i])
8475 break;
8477 return i - graph->band_start;
8480 /* Should the clusters be merged based on the cluster schedule
8481 * in the current (and only) band of "merge_graph", given that
8482 * coincidence should be maximized?
8484 * If the number of coincident schedule dimensions in the merged band
8485 * would be less than the maximal number of coincident schedule dimensions
8486 * in any of the merged clusters, then the clusters should not be merged.
8488 static isl_bool ok_to_merge_coincident(struct isl_clustering *c,
8489 struct isl_sched_graph *merge_graph)
8491 int i;
8492 int n_coincident;
8493 int max_coincident;
8495 max_coincident = 0;
8496 for (i = 0; i < c->n; ++i) {
8497 if (!c->scc_in_merge[i])
8498 continue;
8499 n_coincident = get_n_coincident(&c->scc[i]);
8500 if (n_coincident > max_coincident)
8501 max_coincident = n_coincident;
8504 n_coincident = get_n_coincident(merge_graph);
8506 return n_coincident >= max_coincident;
8509 /* Return the transformation on "node" expressed by the current (and only)
8510 * band of "merge_graph" applied to the clusters in "c".
8512 * First find the representation of "node" in its SCC in "c" and
8513 * extract the transformation expressed by the current band.
8514 * Then extract the transformation applied by "merge_graph"
8515 * to the cluster to which this SCC belongs.
8516 * Combine the two to obtain the complete transformation on the node.
8518 * Note that the range of the first transformation is an anonymous space,
8519 * while the domain of the second is named "cluster_X". The range
8520 * of the former therefore needs to be adjusted before the two
8521 * can be combined.
8523 static __isl_give isl_map *extract_node_transformation(isl_ctx *ctx,
8524 struct isl_sched_node *node, struct isl_clustering *c,
8525 struct isl_sched_graph *merge_graph)
8527 struct isl_sched_node *scc_node, *cluster_node;
8528 int start, n;
8529 isl_id *id;
8530 isl_space *space;
8531 isl_multi_aff *ma, *ma2;
8533 scc_node = graph_find_node(ctx, &c->scc[node->scc], node->space);
8534 if (scc_node && !is_node(&c->scc[node->scc], scc_node))
8535 isl_die(ctx, isl_error_internal, "unable to find node",
8536 return NULL);
8537 start = c->scc[node->scc].band_start;
8538 n = c->scc[node->scc].n_total_row - start;
8539 ma = node_extract_partial_schedule_multi_aff(scc_node, start, n);
8540 space = cluster_space(&c->scc[node->scc], c->scc_cluster[node->scc]);
8541 cluster_node = graph_find_node(ctx, merge_graph, space);
8542 if (cluster_node && !is_node(merge_graph, cluster_node))
8543 isl_die(ctx, isl_error_internal, "unable to find cluster",
8544 space = isl_space_free(space));
8545 id = isl_space_get_tuple_id(space, isl_dim_set);
8546 ma = isl_multi_aff_set_tuple_id(ma, isl_dim_out, id);
8547 isl_space_free(space);
8548 n = merge_graph->n_total_row;
8549 ma2 = node_extract_partial_schedule_multi_aff(cluster_node, 0, n);
8550 ma = isl_multi_aff_pullback_multi_aff(ma2, ma);
8552 return isl_map_from_multi_aff(ma);
8555 /* Give a set of distances "set", are they bounded by a small constant
8556 * in direction "pos"?
8557 * In practice, check if they are bounded by 2 by checking that there
8558 * are no elements with a value greater than or equal to 3 or
8559 * smaller than or equal to -3.
8561 static isl_bool distance_is_bounded(__isl_keep isl_set *set, int pos)
8563 isl_bool bounded;
8564 isl_set *test;
8566 if (!set)
8567 return isl_bool_error;
8569 test = isl_set_copy(set);
8570 test = isl_set_lower_bound_si(test, isl_dim_set, pos, 3);
8571 bounded = isl_set_is_empty(test);
8572 isl_set_free(test);
8574 if (bounded < 0 || !bounded)
8575 return bounded;
8577 test = isl_set_copy(set);
8578 test = isl_set_upper_bound_si(test, isl_dim_set, pos, -3);
8579 bounded = isl_set_is_empty(test);
8580 isl_set_free(test);
8582 return bounded;
8585 /* Does the set "set" have a fixed (but possible parametric) value
8586 * at dimension "pos"?
8588 static isl_bool has_single_value(__isl_keep isl_set *set, int pos)
8590 int n;
8591 isl_bool single;
8593 if (!set)
8594 return isl_bool_error;
8595 set = isl_set_copy(set);
8596 n = isl_set_dim(set, isl_dim_set);
8597 set = isl_set_project_out(set, isl_dim_set, pos + 1, n - (pos + 1));
8598 set = isl_set_project_out(set, isl_dim_set, 0, pos);
8599 single = isl_set_is_singleton(set);
8600 isl_set_free(set);
8602 return single;
8605 /* Does "map" have a fixed (but possible parametric) value
8606 * at dimension "pos" of either its domain or its range?
8608 static isl_bool has_singular_src_or_dst(__isl_keep isl_map *map, int pos)
8610 isl_set *set;
8611 isl_bool single;
8613 set = isl_map_domain(isl_map_copy(map));
8614 single = has_single_value(set, pos);
8615 isl_set_free(set);
8617 if (single < 0 || single)
8618 return single;
8620 set = isl_map_range(isl_map_copy(map));
8621 single = has_single_value(set, pos);
8622 isl_set_free(set);
8624 return single;
8627 /* Does the edge "edge" from "graph" have bounded dependence distances
8628 * in the merged graph "merge_graph" of a selection of clusters in "c"?
8630 * Extract the complete transformations of the source and destination
8631 * nodes of the edge, apply them to the edge constraints and
8632 * compute the differences. Finally, check if these differences are bounded
8633 * in each direction.
8635 * If the dimension of the band is greater than the number of
8636 * dimensions that can be expected to be optimized by the edge
8637 * (based on its weight), then also allow the differences to be unbounded
8638 * in the remaining dimensions, but only if either the source or
8639 * the destination has a fixed value in that direction.
8640 * This allows a statement that produces values that are used by
8641 * several instances of another statement to be merged with that
8642 * other statement.
8643 * However, merging such clusters will introduce an inherently
8644 * large proximity distance inside the merged cluster, meaning
8645 * that proximity distances will no longer be optimized in
8646 * subsequent merges. These merges are therefore only allowed
8647 * after all other possible merges have been tried.
8648 * The first time such a merge is encountered, the weight of the edge
8649 * is replaced by a negative weight. The second time (i.e., after
8650 * all merges over edges with a non-negative weight have been tried),
8651 * the merge is allowed.
8653 static isl_bool has_bounded_distances(isl_ctx *ctx, struct isl_sched_edge *edge,
8654 struct isl_sched_graph *graph, struct isl_clustering *c,
8655 struct isl_sched_graph *merge_graph)
8657 int i, n, n_slack;
8658 isl_bool bounded;
8659 isl_map *map, *t;
8660 isl_set *dist;
8662 map = isl_map_copy(edge->map);
8663 t = extract_node_transformation(ctx, edge->src, c, merge_graph);
8664 map = isl_map_apply_domain(map, t);
8665 t = extract_node_transformation(ctx, edge->dst, c, merge_graph);
8666 map = isl_map_apply_range(map, t);
8667 dist = isl_map_deltas(isl_map_copy(map));
8669 bounded = isl_bool_true;
8670 n = isl_set_dim(dist, isl_dim_set);
8671 n_slack = n - edge->weight;
8672 if (edge->weight < 0)
8673 n_slack -= graph->max_weight + 1;
8674 for (i = 0; i < n; ++i) {
8675 isl_bool bounded_i, singular_i;
8677 bounded_i = distance_is_bounded(dist, i);
8678 if (bounded_i < 0)
8679 goto error;
8680 if (bounded_i)
8681 continue;
8682 if (edge->weight >= 0)
8683 bounded = isl_bool_false;
8684 n_slack--;
8685 if (n_slack < 0)
8686 break;
8687 singular_i = has_singular_src_or_dst(map, i);
8688 if (singular_i < 0)
8689 goto error;
8690 if (singular_i)
8691 continue;
8692 bounded = isl_bool_false;
8693 break;
8695 if (!bounded && i >= n && edge->weight >= 0)
8696 edge->weight -= graph->max_weight + 1;
8697 isl_map_free(map);
8698 isl_set_free(dist);
8700 return bounded;
8701 error:
8702 isl_map_free(map);
8703 isl_set_free(dist);
8704 return isl_bool_error;
8707 /* Should the clusters be merged based on the cluster schedule
8708 * in the current (and only) band of "merge_graph"?
8709 * "graph" is the original dependence graph, while "c" records
8710 * which SCCs are involved in the latest merge.
8712 * In particular, is there at least one proximity constraint
8713 * that is optimized by the merge?
8715 * A proximity constraint is considered to be optimized
8716 * if the dependence distances are small.
8718 static isl_bool ok_to_merge_proximity(isl_ctx *ctx,
8719 struct isl_sched_graph *graph, struct isl_clustering *c,
8720 struct isl_sched_graph *merge_graph)
8722 int i;
8724 for (i = 0; i < graph->n_edge; ++i) {
8725 struct isl_sched_edge *edge = &graph->edge[i];
8726 isl_bool bounded;
8728 if (!is_proximity(edge))
8729 continue;
8730 if (!c->scc_in_merge[edge->src->scc])
8731 continue;
8732 if (!c->scc_in_merge[edge->dst->scc])
8733 continue;
8734 if (c->scc_cluster[edge->dst->scc] ==
8735 c->scc_cluster[edge->src->scc])
8736 continue;
8737 bounded = has_bounded_distances(ctx, edge, graph, c,
8738 merge_graph);
8739 if (bounded < 0 || bounded)
8740 return bounded;
8743 return isl_bool_false;
8746 /* Should the clusters be merged based on the cluster schedule
8747 * in the current (and only) band of "merge_graph"?
8748 * "graph" is the original dependence graph, while "c" records
8749 * which SCCs are involved in the latest merge.
8751 * If the current band is empty, then the clusters should not be merged.
8753 * If the band depth should be maximized and the merge schedule
8754 * is incomplete (meaning that the dimension of some of the schedule
8755 * bands in the original schedule will be reduced), then the clusters
8756 * should not be merged.
8758 * If the schedule_maximize_coincidence option is set, then check that
8759 * the number of coincident schedule dimensions is not reduced.
8761 * Finally, only allow the merge if at least one proximity
8762 * constraint is optimized.
8764 static isl_bool ok_to_merge(isl_ctx *ctx, struct isl_sched_graph *graph,
8765 struct isl_clustering *c, struct isl_sched_graph *merge_graph)
8767 if (merge_graph->n_total_row == merge_graph->band_start)
8768 return isl_bool_false;
8770 if (isl_options_get_schedule_maximize_band_depth(ctx) &&
8771 merge_graph->n_total_row < merge_graph->maxvar)
8772 return isl_bool_false;
8774 if (isl_options_get_schedule_maximize_coincidence(ctx)) {
8775 isl_bool ok;
8777 ok = ok_to_merge_coincident(c, merge_graph);
8778 if (ok < 0 || !ok)
8779 return ok;
8782 return ok_to_merge_proximity(ctx, graph, c, merge_graph);
8785 /* Apply the schedule in "t_node" to the "n" rows starting at "first"
8786 * of the schedule in "node" and return the result.
8788 * That is, essentially compute
8790 * T * N(first:first+n-1)
8792 * taking into account the constant term and the parameter coefficients
8793 * in "t_node".
8795 static __isl_give isl_mat *node_transformation(isl_ctx *ctx,
8796 struct isl_sched_node *t_node, struct isl_sched_node *node,
8797 int first, int n)
8799 int i, j;
8800 isl_mat *t;
8801 int n_row, n_col, n_param, n_var;
8803 n_param = node->nparam;
8804 n_var = node->nvar;
8805 n_row = isl_mat_rows(t_node->sched);
8806 n_col = isl_mat_cols(node->sched);
8807 t = isl_mat_alloc(ctx, n_row, n_col);
8808 if (!t)
8809 return NULL;
8810 for (i = 0; i < n_row; ++i) {
8811 isl_seq_cpy(t->row[i], t_node->sched->row[i], 1 + n_param);
8812 isl_seq_clr(t->row[i] + 1 + n_param, n_var);
8813 for (j = 0; j < n; ++j)
8814 isl_seq_addmul(t->row[i],
8815 t_node->sched->row[i][1 + n_param + j],
8816 node->sched->row[first + j],
8817 1 + n_param + n_var);
8819 return t;
8822 /* Apply the cluster schedule in "t_node" to the current band
8823 * schedule of the nodes in "graph".
8825 * In particular, replace the rows starting at band_start
8826 * by the result of applying the cluster schedule in "t_node"
8827 * to the original rows.
8829 * The coincidence of the schedule is determined by the coincidence
8830 * of the cluster schedule.
8832 static isl_stat transform(isl_ctx *ctx, struct isl_sched_graph *graph,
8833 struct isl_sched_node *t_node)
8835 int i, j;
8836 int n_new;
8837 int start, n;
8839 start = graph->band_start;
8840 n = graph->n_total_row - start;
8842 n_new = isl_mat_rows(t_node->sched);
8843 for (i = 0; i < graph->n; ++i) {
8844 struct isl_sched_node *node = &graph->node[i];
8845 isl_mat *t;
8847 t = node_transformation(ctx, t_node, node, start, n);
8848 node->sched = isl_mat_drop_rows(node->sched, start, n);
8849 node->sched = isl_mat_concat(node->sched, t);
8850 node->band_sched = isl_map_free(node->band_sched);
8851 if (!node->sched)
8852 return isl_stat_error;
8853 for (j = 0; j < n_new; ++j)
8854 node->coincident[start + j] = t_node->coincident[j];
8856 graph->n_total_row -= n;
8857 graph->n_row -= n;
8858 graph->n_total_row += n_new;
8859 graph->n_row += n_new;
8861 return isl_stat_ok;
8864 /* Merge the clusters marked for merging in "c" into a single
8865 * cluster using the cluster schedule in the current band of "merge_graph".
8866 * The representative SCC for the new cluster is the SCC with
8867 * the smallest index.
8869 * The current band schedule of each SCC in the new cluster is obtained
8870 * by applying the schedule of the corresponding original cluster
8871 * to the original band schedule.
8872 * All SCCs in the new cluster have the same number of schedule rows.
8874 static isl_stat merge(isl_ctx *ctx, struct isl_clustering *c,
8875 struct isl_sched_graph *merge_graph)
8877 int i;
8878 int cluster = -1;
8879 isl_space *space;
8881 for (i = 0; i < c->n; ++i) {
8882 struct isl_sched_node *node;
8884 if (!c->scc_in_merge[i])
8885 continue;
8886 if (cluster < 0)
8887 cluster = i;
8888 space = cluster_space(&c->scc[i], c->scc_cluster[i]);
8889 node = graph_find_node(ctx, merge_graph, space);
8890 isl_space_free(space);
8891 if (!node)
8892 return isl_stat_error;
8893 if (!is_node(merge_graph, node))
8894 isl_die(ctx, isl_error_internal,
8895 "unable to find cluster",
8896 return isl_stat_error);
8897 if (transform(ctx, &c->scc[i], node) < 0)
8898 return isl_stat_error;
8899 c->scc_cluster[i] = cluster;
8902 return isl_stat_ok;
8905 /* Try and merge the clusters of SCCs marked in c->scc_in_merge
8906 * by scheduling the current cluster bands with respect to each other.
8908 * Construct a dependence graph with a space for each cluster and
8909 * with the coordinates of each space corresponding to the schedule
8910 * dimensions of the current band of that cluster.
8911 * Construct a cluster schedule in this cluster dependence graph and
8912 * apply it to the current cluster bands if it is applicable
8913 * according to ok_to_merge.
8915 * If the number of remaining schedule dimensions in a cluster
8916 * with a non-maximal current schedule dimension is greater than
8917 * the number of remaining schedule dimensions in clusters
8918 * with a maximal current schedule dimension, then restrict
8919 * the number of rows to be computed in the cluster schedule
8920 * to the minimal such non-maximal current schedule dimension.
8921 * Do this by adjusting merge_graph.maxvar.
8923 * Return isl_bool_true if the clusters have effectively been merged
8924 * into a single cluster.
8926 * Note that since the standard scheduling algorithm minimizes the maximal
8927 * distance over proximity constraints, the proximity constraints between
8928 * the merged clusters may not be optimized any further than what is
8929 * sufficient to bring the distances within the limits of the internal
8930 * proximity constraints inside the individual clusters.
8931 * It may therefore make sense to perform an additional translation step
8932 * to bring the clusters closer to each other, while maintaining
8933 * the linear part of the merging schedule found using the standard
8934 * scheduling algorithm.
8936 static isl_bool try_merge(isl_ctx *ctx, struct isl_sched_graph *graph,
8937 struct isl_clustering *c)
8939 struct isl_sched_graph merge_graph = { 0 };
8940 isl_bool merged;
8942 if (init_merge_graph(ctx, graph, c, &merge_graph) < 0)
8943 goto error;
8945 if (compute_maxvar(&merge_graph) < 0)
8946 goto error;
8947 if (adjust_maxvar_to_slack(ctx, &merge_graph,c) < 0)
8948 goto error;
8949 if (compute_schedule_wcc_band(ctx, &merge_graph) < 0)
8950 goto error;
8951 merged = ok_to_merge(ctx, graph, c, &merge_graph);
8952 if (merged && merge(ctx, c, &merge_graph) < 0)
8953 goto error;
8955 graph_free(ctx, &merge_graph);
8956 return merged;
8957 error:
8958 graph_free(ctx, &merge_graph);
8959 return isl_bool_error;
8962 /* Is there any edge marked "no_merge" between two SCCs that are
8963 * about to be merged (i.e., that are set in "scc_in_merge")?
8964 * "merge_edge" is the proximity edge along which the clusters of SCCs
8965 * are going to be merged.
8967 * If there is any edge between two SCCs with a negative weight,
8968 * while the weight of "merge_edge" is non-negative, then this
8969 * means that the edge was postponed. "merge_edge" should then
8970 * also be postponed since merging along the edge with negative weight should
8971 * be postponed until all edges with non-negative weight have been tried.
8972 * Replace the weight of "merge_edge" by a negative weight as well and
8973 * tell the caller not to attempt a merge.
8975 static int any_no_merge(struct isl_sched_graph *graph, int *scc_in_merge,
8976 struct isl_sched_edge *merge_edge)
8978 int i;
8980 for (i = 0; i < graph->n_edge; ++i) {
8981 struct isl_sched_edge *edge = &graph->edge[i];
8983 if (!scc_in_merge[edge->src->scc])
8984 continue;
8985 if (!scc_in_merge[edge->dst->scc])
8986 continue;
8987 if (edge->no_merge)
8988 return 1;
8989 if (merge_edge->weight >= 0 && edge->weight < 0) {
8990 merge_edge->weight -= graph->max_weight + 1;
8991 return 1;
8995 return 0;
8998 /* Merge the two clusters in "c" connected by the edge in "graph"
8999 * with index "edge" into a single cluster.
9000 * If it turns out to be impossible to merge these two clusters,
9001 * then mark the edge as "no_merge" such that it will not be
9002 * considered again.
9004 * First mark all SCCs that need to be merged. This includes the SCCs
9005 * in the two clusters, but it may also include the SCCs
9006 * of intermediate clusters.
9007 * If there is already a no_merge edge between any pair of such SCCs,
9008 * then simply mark the current edge as no_merge as well.
9009 * Likewise, if any of those edges was postponed by has_bounded_distances,
9010 * then postpone the current edge as well.
9011 * Otherwise, try and merge the clusters and mark "edge" as "no_merge"
9012 * if the clusters did not end up getting merged, unless the non-merge
9013 * is due to the fact that the edge was postponed. This postponement
9014 * can be recognized by a change in weight (from non-negative to negative).
9016 static isl_stat merge_clusters_along_edge(isl_ctx *ctx,
9017 struct isl_sched_graph *graph, int edge, struct isl_clustering *c)
9019 isl_bool merged;
9020 int edge_weight = graph->edge[edge].weight;
9022 if (mark_merge_sccs(ctx, graph, edge, c) < 0)
9023 return isl_stat_error;
9025 if (any_no_merge(graph, c->scc_in_merge, &graph->edge[edge]))
9026 merged = isl_bool_false;
9027 else
9028 merged = try_merge(ctx, graph, c);
9029 if (merged < 0)
9030 return isl_stat_error;
9031 if (!merged && edge_weight == graph->edge[edge].weight)
9032 graph->edge[edge].no_merge = 1;
9034 return isl_stat_ok;
9037 /* Does "node" belong to the cluster identified by "cluster"?
9039 static int node_cluster_exactly(struct isl_sched_node *node, int cluster)
9041 return node->cluster == cluster;
9044 /* Does "edge" connect two nodes belonging to the cluster
9045 * identified by "cluster"?
9047 static int edge_cluster_exactly(struct isl_sched_edge *edge, int cluster)
9049 return edge->src->cluster == cluster && edge->dst->cluster == cluster;
9052 /* Swap the schedule of "node1" and "node2".
9053 * Both nodes have been derived from the same node in a common parent graph.
9054 * Since the "coincident" field is shared with that node
9055 * in the parent graph, there is no need to also swap this field.
9057 static void swap_sched(struct isl_sched_node *node1,
9058 struct isl_sched_node *node2)
9060 isl_mat *sched;
9061 isl_map *band_sched;
9063 sched = node1->sched;
9064 node1->sched = node2->sched;
9065 node2->sched = sched;
9067 band_sched = node1->band_sched;
9068 node1->band_sched = node2->band_sched;
9069 node2->band_sched = band_sched;
9072 /* Copy the current band schedule from the SCCs that form the cluster
9073 * with index "pos" to the actual cluster at position "pos".
9074 * By construction, the index of the first SCC that belongs to the cluster
9075 * is also "pos".
9077 * The order of the nodes inside both the SCCs and the cluster
9078 * is assumed to be same as the order in the original "graph".
9080 * Since the SCC graphs will no longer be used after this function,
9081 * the schedules are actually swapped rather than copied.
9083 static isl_stat copy_partial(struct isl_sched_graph *graph,
9084 struct isl_clustering *c, int pos)
9086 int i, j;
9088 c->cluster[pos].n_total_row = c->scc[pos].n_total_row;
9089 c->cluster[pos].n_row = c->scc[pos].n_row;
9090 c->cluster[pos].maxvar = c->scc[pos].maxvar;
9091 j = 0;
9092 for (i = 0; i < graph->n; ++i) {
9093 int k;
9094 int s;
9096 if (graph->node[i].cluster != pos)
9097 continue;
9098 s = graph->node[i].scc;
9099 k = c->scc_node[s]++;
9100 swap_sched(&c->cluster[pos].node[j], &c->scc[s].node[k]);
9101 if (c->scc[s].maxvar > c->cluster[pos].maxvar)
9102 c->cluster[pos].maxvar = c->scc[s].maxvar;
9103 ++j;
9106 return isl_stat_ok;
9109 /* Is there a (conditional) validity dependence from node[j] to node[i],
9110 * forcing node[i] to follow node[j] or do the nodes belong to the same
9111 * cluster?
9113 static isl_bool node_follows_strong_or_same_cluster(int i, int j, void *user)
9115 struct isl_sched_graph *graph = user;
9117 if (graph->node[i].cluster == graph->node[j].cluster)
9118 return isl_bool_true;
9119 return graph_has_validity_edge(graph, &graph->node[j], &graph->node[i]);
9122 /* Extract the merged clusters of SCCs in "graph", sort them, and
9123 * store them in c->clusters. Update c->scc_cluster accordingly.
9125 * First keep track of the cluster containing the SCC to which a node
9126 * belongs in the node itself.
9127 * Then extract the clusters into c->clusters, copying the current
9128 * band schedule from the SCCs that belong to the cluster.
9129 * Do this only once per cluster.
9131 * Finally, topologically sort the clusters and update c->scc_cluster
9132 * to match the new scc numbering. While the SCCs were originally
9133 * sorted already, some SCCs that depend on some other SCCs may
9134 * have been merged with SCCs that appear before these other SCCs.
9135 * A reordering may therefore be required.
9137 static isl_stat extract_clusters(isl_ctx *ctx, struct isl_sched_graph *graph,
9138 struct isl_clustering *c)
9140 int i;
9142 for (i = 0; i < graph->n; ++i)
9143 graph->node[i].cluster = c->scc_cluster[graph->node[i].scc];
9145 for (i = 0; i < graph->scc; ++i) {
9146 if (c->scc_cluster[i] != i)
9147 continue;
9148 if (extract_sub_graph(ctx, graph, &node_cluster_exactly,
9149 &edge_cluster_exactly, i, &c->cluster[i]) < 0)
9150 return isl_stat_error;
9151 c->cluster[i].src_scc = -1;
9152 c->cluster[i].dst_scc = -1;
9153 if (copy_partial(graph, c, i) < 0)
9154 return isl_stat_error;
9157 if (detect_ccs(ctx, graph, &node_follows_strong_or_same_cluster) < 0)
9158 return isl_stat_error;
9159 for (i = 0; i < graph->n; ++i)
9160 c->scc_cluster[graph->node[i].scc] = graph->node[i].cluster;
9162 return isl_stat_ok;
9165 /* Compute weights on the proximity edges of "graph" that can
9166 * be used by find_proximity to find the most appropriate
9167 * proximity edge to use to merge two clusters in "c".
9168 * The weights are also used by has_bounded_distances to determine
9169 * whether the merge should be allowed.
9170 * Store the maximum of the computed weights in graph->max_weight.
9172 * The computed weight is a measure for the number of remaining schedule
9173 * dimensions that can still be completely aligned.
9174 * In particular, compute the number of equalities between
9175 * input dimensions and output dimensions in the proximity constraints.
9176 * The directions that are already handled by outer schedule bands
9177 * are projected out prior to determining this number.
9179 * Edges that will never be considered by find_proximity are ignored.
9181 static isl_stat compute_weights(struct isl_sched_graph *graph,
9182 struct isl_clustering *c)
9184 int i;
9186 graph->max_weight = 0;
9188 for (i = 0; i < graph->n_edge; ++i) {
9189 struct isl_sched_edge *edge = &graph->edge[i];
9190 struct isl_sched_node *src = edge->src;
9191 struct isl_sched_node *dst = edge->dst;
9192 isl_basic_map *hull;
9193 isl_bool prox;
9194 int n_in, n_out;
9196 prox = is_non_empty_proximity(edge);
9197 if (prox < 0)
9198 return isl_stat_error;
9199 if (!prox)
9200 continue;
9201 if (bad_cluster(&c->scc[edge->src->scc]) ||
9202 bad_cluster(&c->scc[edge->dst->scc]))
9203 continue;
9204 if (c->scc_cluster[edge->dst->scc] ==
9205 c->scc_cluster[edge->src->scc])
9206 continue;
9208 hull = isl_map_affine_hull(isl_map_copy(edge->map));
9209 hull = isl_basic_map_transform_dims(hull, isl_dim_in, 0,
9210 isl_mat_copy(src->vmap));
9211 hull = isl_basic_map_transform_dims(hull, isl_dim_out, 0,
9212 isl_mat_copy(dst->vmap));
9213 hull = isl_basic_map_project_out(hull,
9214 isl_dim_in, 0, src->rank);
9215 hull = isl_basic_map_project_out(hull,
9216 isl_dim_out, 0, dst->rank);
9217 hull = isl_basic_map_remove_divs(hull);
9218 n_in = isl_basic_map_dim(hull, isl_dim_in);
9219 n_out = isl_basic_map_dim(hull, isl_dim_out);
9220 hull = isl_basic_map_drop_constraints_not_involving_dims(hull,
9221 isl_dim_in, 0, n_in);
9222 hull = isl_basic_map_drop_constraints_not_involving_dims(hull,
9223 isl_dim_out, 0, n_out);
9224 if (!hull)
9225 return isl_stat_error;
9226 edge->weight = isl_basic_map_n_equality(hull);
9227 isl_basic_map_free(hull);
9229 if (edge->weight > graph->max_weight)
9230 graph->max_weight = edge->weight;
9233 return isl_stat_ok;
9236 /* Call compute_schedule_finish_band on each of the clusters in "c"
9237 * in their topological order. This order is determined by the scc
9238 * fields of the nodes in "graph".
9239 * Combine the results in a sequence expressing the topological order.
9241 * If there is only one cluster left, then there is no need to introduce
9242 * a sequence node. Also, in this case, the cluster necessarily contains
9243 * the SCC at position 0 in the original graph and is therefore also
9244 * stored in the first cluster of "c".
9246 static __isl_give isl_schedule_node *finish_bands_clustering(
9247 __isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
9248 struct isl_clustering *c)
9250 int i;
9251 isl_ctx *ctx;
9252 isl_union_set_list *filters;
9254 if (graph->scc == 1)
9255 return compute_schedule_finish_band(node, &c->cluster[0], 0);
9257 ctx = isl_schedule_node_get_ctx(node);
9259 filters = extract_sccs(ctx, graph);
9260 node = isl_schedule_node_insert_sequence(node, filters);
9262 for (i = 0; i < graph->scc; ++i) {
9263 int j = c->scc_cluster[i];
9264 node = isl_schedule_node_child(node, i);
9265 node = isl_schedule_node_child(node, 0);
9266 node = compute_schedule_finish_band(node, &c->cluster[j], 0);
9267 node = isl_schedule_node_parent(node);
9268 node = isl_schedule_node_parent(node);
9271 return node;
9274 /* Compute a schedule for a connected dependence graph by first considering
9275 * each strongly connected component (SCC) in the graph separately and then
9276 * incrementally combining them into clusters.
9277 * Return the updated schedule node.
9279 * Initially, each cluster consists of a single SCC, each with its
9280 * own band schedule. The algorithm then tries to merge pairs
9281 * of clusters along a proximity edge until no more suitable
9282 * proximity edges can be found. During this merging, the schedule
9283 * is maintained in the individual SCCs.
9284 * After the merging is completed, the full resulting clusters
9285 * are extracted and in finish_bands_clustering,
9286 * compute_schedule_finish_band is called on each of them to integrate
9287 * the band into "node" and to continue the computation.
9289 * compute_weights initializes the weights that are used by find_proximity.
9291 static __isl_give isl_schedule_node *compute_schedule_wcc_clustering(
9292 __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
9294 isl_ctx *ctx;
9295 struct isl_clustering c;
9296 int i;
9298 ctx = isl_schedule_node_get_ctx(node);
9300 if (clustering_init(ctx, &c, graph) < 0)
9301 goto error;
9303 if (compute_weights(graph, &c) < 0)
9304 goto error;
9306 for (;;) {
9307 i = find_proximity(graph, &c);
9308 if (i < 0)
9309 goto error;
9310 if (i >= graph->n_edge)
9311 break;
9312 if (merge_clusters_along_edge(ctx, graph, i, &c) < 0)
9313 goto error;
9316 if (extract_clusters(ctx, graph, &c) < 0)
9317 goto error;
9319 node = finish_bands_clustering(node, graph, &c);
9321 clustering_free(ctx, &c);
9322 return node;
9323 error:
9324 clustering_free(ctx, &c);
9325 return isl_schedule_node_free(node);
9328 /* Compute a schedule for a connected dependence graph and return
9329 * the updated schedule node.
9331 * If Feautrier's algorithm is selected, we first recursively try to satisfy
9332 * as many validity dependences as possible. When all validity dependences
9333 * are satisfied we extend the schedule to a full-dimensional schedule.
9335 * Call compute_schedule_wcc_whole or compute_schedule_wcc_clustering
9336 * depending on whether the user has selected the option to try and
9337 * compute a schedule for the entire (weakly connected) component first.
9338 * If there is only a single strongly connected component (SCC), then
9339 * there is no point in trying to combine SCCs
9340 * in compute_schedule_wcc_clustering, so compute_schedule_wcc_whole
9341 * is called instead.
9342 * Strongly connected components that are connected through
9343 * inter-statement consecutivity constraints are treated as
9344 * a single component to ensure that those constraints can be applied.
9346 static __isl_give isl_schedule_node *compute_schedule_wcc(
9347 __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
9349 isl_ctx *ctx;
9351 if (!node)
9352 return NULL;
9354 ctx = isl_schedule_node_get_ctx(node);
9355 if (detect_sccs_grouping_consecutive(ctx, graph) < 0)
9356 return isl_schedule_node_free(node);
9358 if (compute_maxvar(graph) < 0)
9359 return isl_schedule_node_free(node);
9361 if (need_feautrier_step(ctx, graph))
9362 return compute_schedule_wcc_feautrier(node, graph);
9364 if (graph->scc <= 1 || isl_options_get_schedule_whole_component(ctx))
9365 return compute_schedule_wcc_whole(node, graph);
9366 else
9367 return compute_schedule_wcc_clustering(node, graph);
9370 /* Compute a schedule for each group of nodes identified by node->scc
9371 * separately and then combine them in a sequence node (or as set node
9372 * if graph->weak is set) inserted at position "node" of the schedule tree.
9373 * Return the updated schedule node.
9375 * If "wcc" is set then each of the groups belongs to a single
9376 * weakly connected component in the dependence graph so that
9377 * there is no need for compute_sub_schedule to look for weakly
9378 * connected components.
9380 * If a set node would be introduced and if the number of components
9381 * is equal to the number of nodes, then check if the schedule
9382 * is already complete. If so, a redundant set node would be introduced
9383 * (without any further descendants) stating that the statements
9384 * can be executed in arbitrary order, which is also expressed
9385 * by the absence of any node. Refrain from inserting any nodes
9386 * in this case and simply return.
9388 static __isl_give isl_schedule_node *compute_component_schedule(
9389 __isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
9390 int wcc)
9392 int component;
9393 isl_ctx *ctx;
9394 isl_union_set_list *filters;
9396 if (!node)
9397 return NULL;
9399 if (graph->weak && graph->scc == graph->n) {
9400 if (compute_maxvar(graph) < 0)
9401 return isl_schedule_node_free(node);
9402 if (graph->n_row >= graph->maxvar)
9403 return node;
9406 ctx = isl_schedule_node_get_ctx(node);
9407 filters = extract_sccs(ctx, graph);
9408 if (graph->weak)
9409 node = isl_schedule_node_insert_set(node, filters);
9410 else
9411 node = isl_schedule_node_insert_sequence(node, filters);
9413 for (component = 0; component < graph->scc; ++component) {
9414 node = isl_schedule_node_child(node, component);
9415 node = isl_schedule_node_child(node, 0);
9416 node = compute_sub_schedule(node, ctx, graph,
9417 &node_scc_exactly,
9418 &edge_scc_exactly, component, wcc);
9419 node = isl_schedule_node_parent(node);
9420 node = isl_schedule_node_parent(node);
9423 return node;
9426 /* Compute a schedule for the given dependence graph and insert it at "node".
9427 * Return the updated schedule node.
9429 * We first check if the graph is connected (through validity and conditional
9430 * validity dependences) and, if not, compute a schedule
9431 * for each component separately.
9432 * If the schedule_serialize_sccs option is set, then we check for strongly
9433 * connected components instead and compute a separate schedule for
9434 * each such strongly connected component.
9436 static __isl_give isl_schedule_node *compute_schedule(isl_schedule_node *node,
9437 struct isl_sched_graph *graph)
9439 isl_ctx *ctx;
9441 if (!node)
9442 return NULL;
9444 ctx = isl_schedule_node_get_ctx(node);
9445 if (isl_options_get_schedule_serialize_sccs(ctx)) {
9446 if (detect_sccs(ctx, graph) < 0)
9447 return isl_schedule_node_free(node);
9448 } else {
9449 if (detect_wccs(ctx, graph) < 0)
9450 return isl_schedule_node_free(node);
9453 if (graph->scc > 1)
9454 return compute_component_schedule(node, graph, 1);
9456 return compute_schedule_wcc(node, graph);
9459 /* Compute a schedule on sc->domain that respects the given schedule
9460 * constraints.
9462 * In particular, the schedule respects all the validity dependences.
9463 * If the default isl scheduling algorithm is used, it tries to minimize
9464 * the dependence distances over the proximity dependences.
9465 * If Feautrier's scheduling algorithm is used, the proximity dependence
9466 * distances are only minimized during the extension to a full-dimensional
9467 * schedule.
9469 * If there are any condition and conditional validity dependences,
9470 * then the conditional validity dependences may be violated inside
9471 * a tilable band, provided they have no adjacent non-local
9472 * condition dependences.
9474 __isl_give isl_schedule *isl_schedule_constraints_compute_schedule(
9475 __isl_take isl_schedule_constraints *sc)
9477 isl_ctx *ctx = isl_schedule_constraints_get_ctx(sc);
9478 struct isl_sched_graph graph = { 0 };
9479 isl_schedule *sched;
9480 isl_schedule_node *node;
9481 isl_union_set *domain;
9483 sc = isl_schedule_constraints_align_params(sc);
9485 domain = isl_schedule_constraints_get_domain(sc);
9486 if (isl_union_set_n_set(domain) == 0) {
9487 isl_schedule_constraints_free(sc);
9488 return isl_schedule_from_domain(domain);
9491 if (graph_init(&graph, sc) < 0)
9492 domain = isl_union_set_free(domain);
9494 node = isl_schedule_node_from_domain(domain);
9495 node = isl_schedule_node_child(node, 0);
9496 if (graph.n > 0)
9497 node = compute_schedule(node, &graph);
9498 sched = isl_schedule_node_get_schedule(node);
9499 isl_schedule_node_free(node);
9501 graph_free(ctx, &graph);
9502 isl_schedule_constraints_free(sc);
9504 return sched;
9507 /* Compute a schedule for the given union of domains that respects
9508 * all the validity dependences and minimizes
9509 * the dependence distances over the proximity dependences.
9511 * This function is kept for backward compatibility.
9513 __isl_give isl_schedule *isl_union_set_compute_schedule(
9514 __isl_take isl_union_set *domain,
9515 __isl_take isl_union_map *validity,
9516 __isl_take isl_union_map *proximity)
9518 isl_schedule_constraints *sc;
9520 sc = isl_schedule_constraints_on_domain(domain);
9521 sc = isl_schedule_constraints_set_validity(sc, validity);
9522 sc = isl_schedule_constraints_set_proximity(sc, proximity);
9524 return isl_schedule_constraints_compute_schedule(sc);