2 * Copyright 2011 INRIA Saclay
3 * Copyright 2012-2014 Ecole Normale Superieure
4 * Copyright 2015-2016 Sven Verdoolaege
5 * Copyright 2016 INRIA Paris
6 * Copyright 2017 Sven Verdoolaege
8 * Use of this software is governed by the MIT license
10 * Written by Sven Verdoolaege, INRIA Saclay - Ile-de-France,
11 * Parc Club Orsay Universite, ZAC des vignes, 4 rue Jacques Monod,
13 * and Ecole Normale Superieure, 45 rue d'Ulm, 75230 Paris, France
14 * and Centre de Recherche Inria de Paris, 2 rue Simone Iff - Voie DQ12,
15 * CS 42112, 75589 Paris Cedex 12, France
20 #include <isl_ctx_private.h>
21 #include <isl_map_private.h>
22 #include <isl_space_private.h>
23 #include <isl_aff_private.h>
25 #include <isl/constraint.h>
26 #include <isl/schedule.h>
27 #include <isl_schedule_constraints.h>
28 #include <isl/schedule_node.h>
29 #include <isl_mat_private.h>
30 #include <isl_vec_private.h>
32 #include <isl_union_set_private.h>
35 #include <isl_dim_map.h>
36 #include <isl/map_to_basic_set.h>
38 #include <isl_options_private.h>
39 #include <isl_tarjan.h>
40 #include <isl_morph.h>
42 #include <isl_val_private.h>
45 * The scheduling algorithm implemented in this file was inspired by
46 * Bondhugula et al., "Automatic Transformations for Communication-Minimized
47 * Parallelization and Locality Optimization in the Polyhedral Model".
49 * For a detailed description of the variant implemented in isl,
50 * see Verdoolaege and Janssens, "Scheduling for PPCG" (2017).
54 /* Extract the linear part, i.e., the coefficients of the input variables
55 * and the local variables (if any), from the affine expression "ma".
57 static __isl_give isl_mat
*extract_linear(__isl_take isl_multi_aff
*ma
)
59 int i
, n
, n_param
, n_var
, n_div
;
63 ma
= isl_multi_aff_align_divs(ma
);
67 ctx
= isl_multi_aff_get_ctx(ma
);
68 n
= isl_multi_aff_dim(ma
, isl_dim_out
);
69 n_param
= isl_multi_aff_dim(ma
, isl_dim_param
);
70 n_var
= isl_multi_aff_dim(ma
, isl_dim_in
);
73 n_div
= isl_aff_dim(ma
->p
[0], isl_dim_div
);
74 rows
= isl_mat_alloc(ctx
, n
, n_var
+ n_div
);
77 for (i
= 0; i
< n
; ++i
)
78 isl_seq_cpy(rows
->row
[i
],
79 ma
->p
[i
]->v
->el
+ 1 + 1 + n_param
, n_var
+ n_div
);
80 isl_multi_aff_free(ma
);
83 isl_multi_aff_free(ma
);
87 /* Enumeration for indicating the type of ILP constraints that are added
88 * for an intra-statement consecutivity constraint.
90 * outer: linear combination of outer rows
91 * inner: equal to some inner row
94 enum isl_sched_intra_state
{
95 isl_sched_intra_outer
,
96 isl_sched_intra_inner
,
100 /* A linked list of intra-statement consecutivity constraints
101 * for a particular statement.
103 * "id" is the tuple identifier of the isl_multi_aff from
104 * which the constraint is derived. It may be NULL if the isl_multi_aff
105 * did not have a tuple identifier.
106 * "outer": the rows that should be covered by the outer part of the schedule.
107 * "inner": the desired inner schedule rows.
108 * "n_inner": the number of rows in "inner".
109 * "outer" and "inter" are expressed in terms of the compressed domain space.
111 * "state": the type of ILP constraint that is added.
112 * "n_fixed": the number of rows of "inner" that have already been taken
113 * into account. A negative value means that this intra-statement
114 * consecutivity constraint can no longer be imposed.
115 * "band_n_fixed": the number of rows of "inner" that had already been taken
116 * into account at the start of the current band.
117 * If "n_fixed" is greater than 0, then "first_fixed" is
118 * the index of the schedule row that corresponds to
119 * the first row of "inner".
121 * "next": next constraint in the linked list.
123 struct isl_sched_intra
{
129 enum isl_sched_intra_state state
;
134 struct isl_sched_intra
*next
;
137 /* Internal information about a node that is used during the construction
139 * space represents the original space in which the domain lives;
140 * that is, the space is not affected by compression
141 * sched is a matrix representation of the schedule being constructed
142 * for this node; if compressed is set, then this schedule is
143 * defined over the compressed domain space
144 * band_sched is an isl_map representation of the schedule of the current band
145 * band_sched may be NULL; if compressed is set, then this map
146 * is defined over the uncompressed domain space
147 * rank is the number of linearly independent rows in the linear part
149 * the rows of "vmap" represent a change of basis for the node
150 * variables; the first rank rows span the linear part of
151 * the schedule rows; the remaining rows are linearly independent
152 * the rows of "indep" represent linear combinations of the schedule
153 * coefficients that are non-zero when the schedule coefficients are
154 * linearly independent of previously computed schedule rows.
155 * start is the first variable in the LP problem in the sequences that
156 * represents the schedule coefficients of this node
157 * nvar is the dimension of the (compressed) domain
158 * nparam is the number of parameters or 0 if we are not constructing
159 * a parametric schedule
161 * If compressed is set, then hull represents the constraints
162 * that were used to derive the compression, while compress and
163 * decompress map the original space to the compressed space and
166 * scc is the index of SCC (or WCC) this node belongs to
168 * "cluster" is only used inside extract_clusters and identifies
169 * the cluster of SCCs that the node belongs to.
171 * coincident contains a boolean for each of the rows of the schedule,
172 * indicating whether the corresponding scheduling dimension satisfies
173 * the coincidence constraints in the sense that the corresponding
174 * dependence distances are zero.
176 * If the schedule_treat_coalescing option is set, then
177 * "sizes" contains the sizes of the (compressed) instance set
178 * in each direction. If there is no fixed size in a given direction,
179 * then the corresponding size value is set to infinity.
180 * If the schedule_treat_coalescing option or the schedule_max_coefficient
181 * option is set, then "max" contains the maximal values for
182 * schedule coefficients of the (compressed) variables. If no bound
183 * needs to be imposed on a particular variable, then the corresponding
185 * If not NULL, then "bounds" contains a non-parametric set
186 * in the compressed space that is bounded by the size in each direction.
187 * "intra" is a linked list of intra-statement consecutivity constraints,
188 * with the highest priority constraints appearing first.
189 * If the node belongs to a graph that is derived through splitting,
190 * then the "intra" list is shared with the node in the original graph.
192 struct isl_sched_node
{
196 isl_multi_aff
*compress
;
197 isl_multi_aff
*decompress
;
212 isl_multi_val
*sizes
;
213 isl_basic_set
*bounds
;
216 struct isl_sched_intra
*intra
;
219 static int node_has_tuples(const void *entry
, const void *val
)
221 struct isl_sched_node
*node
= (struct isl_sched_node
*)entry
;
222 isl_space
*space
= (isl_space
*) val
;
224 return isl_space_has_equal_tuples(node
->space
, space
);
227 static int node_scc_exactly(struct isl_sched_node
*node
, int scc
)
229 return node
->scc
== scc
;
232 static int node_scc_at_most(struct isl_sched_node
*node
, int scc
)
234 return node
->scc
<= scc
;
237 static int node_scc_at_least(struct isl_sched_node
*node
, int scc
)
239 return node
->scc
>= scc
;
242 /* Enumeration for indicating the type of ILP constraint that is added
243 * for an inter-statement consecutivity constraint.
245 * failed: the inter-statement consecutivity constraint
246 * init: no constraint has been added or only zero-distance constraints
247 * inner: the one-distance constraint has been added
248 * free: no more constraints are added
250 * In practice, the "failed" and the "free" state have the same effect.
251 * They both result in the constraint being ignored in later steps.
253 enum isl_sched_inter_state
{
254 isl_sched_inter_failed
= -1,
255 isl_sched_inter_init
= 0,
256 isl_sched_inter_inner
,
257 isl_sched_inter_free
,
260 /* An edge in the dependence graph. An edge may be used to
261 * ensure validity of the generated schedule, to minimize the dependence
264 * map is the dependence relation, with i -> j in the map if j depends on i
265 * tagged_condition and tagged_validity contain the union of all tagged
266 * condition or conditional validity dependence relations that
267 * specialize the dependence relation "map"; that is,
268 * if (i -> a) -> (j -> b) is an element of "tagged_condition"
269 * or "tagged_validity", then i -> j is an element of "map".
270 * If these fields are NULL, then they represent the empty relation.
271 * src is the source node
272 * dst is the sink node
274 * types is a bit vector containing the types of this edge.
275 * validity is set if the edge is used to ensure correctness
276 * coincidence is used to enforce zero dependence distances
277 * proximity is set if the edge is used to minimize dependence distances
278 * condition is set if the edge represents a condition
279 * for a conditional validity schedule constraint
280 * local can only be set for condition edges and indicates that
281 * the dependence distance over the edge should be zero
282 * conditional_validity is set if the edge is used to conditionally
284 * consecutivity is set if the edge is used to make pairs of instances
285 * consecutive at a given level. A consecutivity edge is exclusively
286 * used to represent a single consecutivity constraint.
288 * For validity edges, start and end mark the sequence of inequality
289 * constraints in the LP problem that encode the validity constraint
290 * corresponding to this edge.
292 * For consecutivity edges, "src_intra" and "dst_intra" point
293 * to the corresponding intra-statement consecutivity constraints
294 * in "src" and "dst". "state" reflects the type of ILP constraints
295 * that have been imposed. "band_state" is the state at the start
296 * of the current band.
298 * During clustering, an edge may be marked "no_merge" if it should
299 * not be used to merge clusters.
300 * The weight is also only used during clustering and it is
301 * an indication of how many schedule dimensions on either side
302 * of the schedule constraints can be aligned.
303 * If the weight is negative, then this means that this edge was postponed
304 * by has_bounded_distances or any_no_merge. The original weight can
305 * be retrieved by adding 1 + graph->max_weight, with "graph"
306 * the graph containing this edge.
308 struct isl_sched_edge
{
310 isl_union_map
*tagged_condition
;
311 isl_union_map
*tagged_validity
;
313 struct isl_sched_node
*src
;
314 struct isl_sched_node
*dst
;
315 struct isl_sched_intra
*src_intra
;
316 struct isl_sched_intra
*dst_intra
;
323 enum isl_sched_inter_state state
;
324 enum isl_sched_inter_state band_state
;
330 /* Is "edge" marked as being of type "type"?
332 static int is_type(struct isl_sched_edge
*edge
, enum isl_edge_type type
)
334 return ISL_FL_ISSET(edge
->types
, 1 << type
);
337 /* Mark "edge" as being of type "type".
339 static void set_type(struct isl_sched_edge
*edge
, enum isl_edge_type type
)
341 ISL_FL_SET(edge
->types
, 1 << type
);
344 /* No longer mark "edge" as being of type "type"?
346 static void clear_type(struct isl_sched_edge
*edge
, enum isl_edge_type type
)
348 ISL_FL_CLR(edge
->types
, 1 << type
);
351 /* Is "edge" marked as a validity edge?
353 static int is_validity(struct isl_sched_edge
*edge
)
355 return is_type(edge
, isl_edge_validity
);
358 /* Mark "edge" as a validity edge.
360 static void set_validity(struct isl_sched_edge
*edge
)
362 set_type(edge
, isl_edge_validity
);
365 /* Is "edge" marked as a proximity edge?
367 static int is_proximity(struct isl_sched_edge
*edge
)
369 return is_type(edge
, isl_edge_proximity
);
372 /* Is "edge" marked as a local edge?
374 static int is_local(struct isl_sched_edge
*edge
)
376 return is_type(edge
, isl_edge_local
);
379 /* Mark "edge" as a local edge.
381 static void set_local(struct isl_sched_edge
*edge
)
383 set_type(edge
, isl_edge_local
);
386 /* No longer mark "edge" as a local edge.
388 static void clear_local(struct isl_sched_edge
*edge
)
390 clear_type(edge
, isl_edge_local
);
393 /* Is "edge" marked as a coincidence edge?
395 static int is_coincidence(struct isl_sched_edge
*edge
)
397 return is_type(edge
, isl_edge_coincidence
);
400 /* Is "edge" marked as a condition edge?
402 static int is_condition(struct isl_sched_edge
*edge
)
404 return is_type(edge
, isl_edge_condition
);
407 /* Is "edge" marked as a conditional validity edge?
409 static int is_conditional_validity(struct isl_sched_edge
*edge
)
411 return is_type(edge
, isl_edge_conditional_validity
);
414 /* Is "edge" marked as a consecutivity edge?
416 static int is_consecutivity(struct isl_sched_edge
*edge
)
418 return is_type(edge
, isl_edge_consecutivity
);
421 /* Is "edge" of a type that can appear multiple times between
422 * the same pair of nodes?
424 * Condition edges and conditional validity edges may have tagged
425 * dependence relations, in which case an edge is added for each
428 static int is_multi_edge_type(struct isl_sched_edge
*edge
)
430 return is_condition(edge
) || is_conditional_validity(edge
) ||
431 is_consecutivity(edge
);
434 /* Internal information about the dependence graph used during
435 * the construction of the schedule.
437 * intra_hmap is a cache, mapping dependence relations to their dual,
438 * for dependences from a node to itself, possibly without
439 * coefficients for the parameters
440 * intra_hmap_param is a cache, mapping dependence relations to their dual,
441 * for dependences from a node to itself, including coefficients
443 * inter_hmap is a cache, mapping dependence relations to their dual,
444 * for dependences between distinct nodes
445 * if compression is involved then the key for these maps
446 * is the original, uncompressed dependence relation, while
447 * the value is the dual of the compressed dependence relation.
449 * prefix is the schedule prefix specified by the user.
450 * This field may be NULL if no (non-trivial) schedule prefix
453 * n is the number of nodes
454 * node is the list of nodes
455 * maxvar is the maximal number of variables over all nodes
456 * max_row is the allocated number of rows in the schedule
457 * n_row is the current (maximal) number of linearly independent
458 * rows in the node schedules
459 * n_total_row is the current number of rows in the node schedules
460 * band_start is the starting row in the node schedules of the current band
461 * root is set to the original dependence graph from which this graph
462 * is derived through splitting. If this graph is not the result of
463 * splitting, then the root field points to the graph itself.
465 * sorted contains a list of node indices sorted according to the
466 * SCC to which a node belongs
468 * n_edge is the number of edges
469 * edge is the list of edges
470 * max_edge contains the maximal number of edges of each type;
471 * in particular, it contains the number of edges in the inital graph.
472 * edge_table contains pointers into the edge array, hashed on the source
473 * and sink spaces; there is one such table for each type;
474 * a given edge may be referenced from more than one table
475 * if the corresponding relation appears in more than one of the
476 * sets of dependences; however, for each type there is only
477 * a single edge between a given pair of source and sink space
478 * in the entire graph
480 * node_table contains pointers into the node array, hashed on the space tuples
482 * "region" contains a list of variable sequences with constraints
483 * that need to be satisfied.
484 * "n_region" contains the size of the allocated array.
486 * lp contains the (I)LP problem used to obtain new schedule rows
488 * src_scc and dst_scc are the source and sink SCCs of an edge with
489 * conflicting constraints
491 * scc represents the number of components
492 * weak is set if the components are weakly connected
494 * max_weight is used during clustering and represents the maximal
495 * weight of the relevant proximity edges.
497 struct isl_sched_graph
{
498 isl_map_to_basic_set
*intra_hmap
;
499 isl_map_to_basic_set
*intra_hmap_param
;
500 isl_map_to_basic_set
*inter_hmap
;
502 isl_multi_union_pw_aff
*prefix
;
504 struct isl_sched_node
*node
;
515 struct isl_sched_graph
*root
;
517 struct isl_sched_edge
*edge
;
519 int max_edge
[isl_edge_last_table
+ 1];
520 struct isl_hash_table
*edge_table
[isl_edge_last_table
+ 1];
522 struct isl_hash_table
*node_table
;
524 struct isl_ilp_region
*region
;
537 /* Initialize node_table based on the list of nodes.
539 static int graph_init_table(isl_ctx
*ctx
, struct isl_sched_graph
*graph
)
543 graph
->node_table
= isl_hash_table_alloc(ctx
, graph
->n
);
544 if (!graph
->node_table
)
547 for (i
= 0; i
< graph
->n
; ++i
) {
548 struct isl_hash_table_entry
*entry
;
551 hash
= isl_space_get_tuple_hash(graph
->node
[i
].space
);
552 entry
= isl_hash_table_find(ctx
, graph
->node_table
, hash
,
554 graph
->node
[i
].space
, 1);
557 entry
->data
= &graph
->node
[i
];
563 /* Return a pointer to the node that lives within the given space,
564 * an invalid node if there is no such node, or NULL in case of error.
566 static struct isl_sched_node
*graph_find_node(isl_ctx
*ctx
,
567 struct isl_sched_graph
*graph
, __isl_keep isl_space
*space
)
569 struct isl_hash_table_entry
*entry
;
575 hash
= isl_space_get_tuple_hash(space
);
576 entry
= isl_hash_table_find(ctx
, graph
->node_table
, hash
,
577 &node_has_tuples
, space
, 0);
579 return entry
? entry
->data
: graph
->node
+ graph
->n
;
582 /* Is "node" a node in "graph"?
584 static int is_node(struct isl_sched_graph
*graph
,
585 struct isl_sched_node
*node
)
587 return node
&& node
>= &graph
->node
[0] && node
< &graph
->node
[graph
->n
];
590 static int edge_has_src_and_dst(const void *entry
, const void *val
)
592 const struct isl_sched_edge
*edge
= entry
;
593 const struct isl_sched_edge
*temp
= val
;
595 return edge
->src
== temp
->src
&& edge
->dst
== temp
->dst
;
598 /* Add the given edge to graph->edge_table[type].
600 static isl_stat
graph_edge_table_add(isl_ctx
*ctx
,
601 struct isl_sched_graph
*graph
, enum isl_edge_type type
,
602 struct isl_sched_edge
*edge
)
604 struct isl_hash_table_entry
*entry
;
607 hash
= isl_hash_init();
608 hash
= isl_hash_builtin(hash
, edge
->src
);
609 hash
= isl_hash_builtin(hash
, edge
->dst
);
610 entry
= isl_hash_table_find(ctx
, graph
->edge_table
[type
], hash
,
611 &edge_has_src_and_dst
, edge
, 1);
613 return isl_stat_error
;
619 /* Add "edge" to all relevant edge tables.
620 * That is, for every type of the edge, add it to the corresponding table.
622 static isl_stat
graph_edge_tables_add(isl_ctx
*ctx
,
623 struct isl_sched_graph
*graph
, struct isl_sched_edge
*edge
)
625 enum isl_edge_type t
;
627 for (t
= isl_edge_first
; t
<= isl_edge_last_table
; ++t
) {
628 if (!is_type(edge
, t
))
630 if (graph_edge_table_add(ctx
, graph
, t
, edge
) < 0)
631 return isl_stat_error
;
637 /* Allocate the edge_tables based on the maximal number of edges of
640 static int graph_init_edge_tables(isl_ctx
*ctx
, struct isl_sched_graph
*graph
)
644 for (i
= 0; i
<= isl_edge_last_table
; ++i
) {
645 graph
->edge_table
[i
] = isl_hash_table_alloc(ctx
,
647 if (!graph
->edge_table
[i
])
654 /* If graph->edge_table[type] contains an edge from the given source
655 * to the given destination, then return the hash table entry of this edge.
656 * Otherwise, return NULL.
658 static struct isl_hash_table_entry
*graph_find_edge_entry(
659 struct isl_sched_graph
*graph
,
660 enum isl_edge_type type
,
661 struct isl_sched_node
*src
, struct isl_sched_node
*dst
)
663 isl_ctx
*ctx
= isl_space_get_ctx(src
->space
);
665 struct isl_sched_edge temp
= { .src
= src
, .dst
= dst
};
667 hash
= isl_hash_init();
668 hash
= isl_hash_builtin(hash
, temp
.src
);
669 hash
= isl_hash_builtin(hash
, temp
.dst
);
670 return isl_hash_table_find(ctx
, graph
->edge_table
[type
], hash
,
671 &edge_has_src_and_dst
, &temp
, 0);
675 /* If graph->edge_table[type] contains an edge from the given source
676 * to the given destination, then return this edge.
677 * Otherwise, return NULL.
679 static struct isl_sched_edge
*graph_find_edge(struct isl_sched_graph
*graph
,
680 enum isl_edge_type type
,
681 struct isl_sched_node
*src
, struct isl_sched_node
*dst
)
683 struct isl_hash_table_entry
*entry
;
685 entry
= graph_find_edge_entry(graph
, type
, src
, dst
);
692 /* Check whether the dependence graph has an edge of the given type
693 * between the given two nodes.
695 static isl_bool
graph_has_edge(struct isl_sched_graph
*graph
,
696 enum isl_edge_type type
,
697 struct isl_sched_node
*src
, struct isl_sched_node
*dst
)
699 struct isl_sched_edge
*edge
;
702 edge
= graph_find_edge(graph
, type
, src
, dst
);
706 empty
= isl_map_plain_is_empty(edge
->map
);
708 return isl_bool_error
;
713 /* Look for any edge with the same src, dst and map fields as "model".
714 * Do not look for matching edges of consecutivity constraints or
715 * matching consecutivity edges.
717 * Return the matching edge if one can be found.
718 * Return "model" if no matching edge is found.
719 * Return NULL on error.
721 static struct isl_sched_edge
*graph_find_matching_edge(
722 struct isl_sched_graph
*graph
, struct isl_sched_edge
*model
)
724 enum isl_edge_type i
;
725 struct isl_sched_edge
*edge
;
727 if (is_consecutivity(model
))
729 for (i
= isl_edge_first
; i
<= isl_edge_last_table
; ++i
) {
732 if (i
== isl_edge_consecutivity
)
734 edge
= graph_find_edge(graph
, i
, model
->src
, model
->dst
);
737 is_equal
= isl_map_plain_is_equal(model
->map
, edge
->map
);
747 /* Remove the given edge from all the edge_tables that refer to it.
749 static void graph_remove_edge(struct isl_sched_graph
*graph
,
750 struct isl_sched_edge
*edge
)
752 isl_ctx
*ctx
= isl_map_get_ctx(edge
->map
);
753 enum isl_edge_type i
;
755 for (i
= isl_edge_first
; i
<= isl_edge_last_table
; ++i
) {
756 struct isl_hash_table_entry
*entry
;
758 entry
= graph_find_edge_entry(graph
, i
, edge
->src
, edge
->dst
);
761 if (entry
->data
!= edge
)
763 isl_hash_table_remove(ctx
, graph
->edge_table
[i
], entry
);
767 /* Check whether the dependence graph has any edge
768 * between the given two nodes.
770 static isl_bool
graph_has_any_edge(struct isl_sched_graph
*graph
,
771 struct isl_sched_node
*src
, struct isl_sched_node
*dst
)
773 enum isl_edge_type i
;
776 for (i
= isl_edge_first
; i
<= isl_edge_last_table
; ++i
) {
777 r
= graph_has_edge(graph
, i
, src
, dst
);
785 /* Check whether the dependence graph has a validity edge
786 * between the given two nodes.
788 * Conditional validity edges are essentially validity edges that
789 * can be ignored if the corresponding condition edges are iteration private.
790 * Here, we are only checking for the presence of validity
791 * edges, so we need to consider the conditional validity edges too.
792 * In particular, this function is used during the detection
793 * of strongly connected components and we cannot ignore
794 * conditional validity edges during this detection.
796 static isl_bool
graph_has_validity_edge(struct isl_sched_graph
*graph
,
797 struct isl_sched_node
*src
, struct isl_sched_node
*dst
)
801 r
= graph_has_edge(graph
, isl_edge_validity
, src
, dst
);
805 return graph_has_edge(graph
, isl_edge_conditional_validity
, src
, dst
);
808 /* Perform all the required memory allocations for a schedule graph "graph"
809 * with "n_node" nodes and "n_edge" edge and initialize the corresponding
811 * "n_consecutive" is the number of consecutivity constraints.
812 * The number of regions introduced per intra-statement consecutivity
813 * constraint can vary between one and three, while an additional
814 * single region is introduced per inter-statement consecutivity constraint.
815 * Only allocate a single entry
816 * per consecutivity constraint for now, relying on graph_extend_region
817 * to extend the list of regions when needed.
819 static isl_stat
graph_alloc(isl_ctx
*ctx
, struct isl_sched_graph
*graph
,
820 int n_node
, int n_edge
, int n_consecutive
)
825 graph
->n_edge
= n_edge
;
826 graph
->node
= isl_calloc_array(ctx
, struct isl_sched_node
, graph
->n
);
827 graph
->sorted
= isl_calloc_array(ctx
, int, graph
->n
);
828 graph
->n_region
= n_consecutive
+ graph
->n
;
829 graph
->region
= isl_calloc_array(ctx
, struct isl_ilp_region
,
831 graph
->edge
= isl_calloc_array(ctx
,
832 struct isl_sched_edge
, graph
->n_edge
);
834 graph
->intra_hmap
= isl_map_to_basic_set_alloc(ctx
, 2 * n_edge
);
835 graph
->intra_hmap_param
= isl_map_to_basic_set_alloc(ctx
, 2 * n_edge
);
836 graph
->inter_hmap
= isl_map_to_basic_set_alloc(ctx
, 2 * n_edge
);
838 if (!graph
->node
|| !graph
->region
|| (graph
->n_edge
&& !graph
->edge
) ||
840 return isl_stat_error
;
842 for(i
= 0; i
< graph
->n
; ++i
)
843 graph
->sorted
[i
] = i
;
848 /* Extend the size of graph->region to contain at least "n" elements,
849 * clearing the additionally allocated elements.
851 static isl_stat
graph_extend_region(isl_ctx
*ctx
, struct isl_sched_graph
*graph
,
854 struct isl_ilp_region
*region
;
856 if (graph
->n_region
>= n
)
860 region
= isl_realloc_array(ctx
, graph
->region
, struct isl_ilp_region
,
863 return isl_stat_error
;
865 memset(region
+ graph
->n_region
, 0,
866 (n
- graph
->n_region
) * sizeof(struct isl_ilp_region
));
867 graph
->region
= region
;
873 /* Free the memory associated to node "node" in "graph".
874 * The "coincident" and the "intra" fields are shared by nodes in a graph and
876 * They therefore only need to be freed for the original dependence graph,
877 * i.e., one that is not the result of splitting.
879 static void clear_node(struct isl_sched_graph
*graph
,
880 struct isl_sched_node
*node
)
882 isl_space_free(node
->space
);
883 isl_set_free(node
->hull
);
884 isl_multi_aff_free(node
->compress
);
885 isl_multi_aff_free(node
->decompress
);
886 isl_mat_free(node
->sched
);
887 isl_map_free(node
->band_sched
);
888 isl_mat_free(node
->indep
);
889 isl_mat_free(node
->vmap
);
890 isl_multi_val_free(node
->sizes
);
891 isl_basic_set_free(node
->bounds
);
892 isl_vec_free(node
->max
);
894 if (graph
->root
!= graph
)
897 free(node
->coincident
);
898 while (node
->intra
) {
899 struct isl_sched_intra
*intra
= node
->intra
;
901 node
->intra
= intra
->next
;
902 isl_id_free(intra
->id
);
903 isl_mat_free(intra
->outer
);
904 isl_mat_free(intra
->inner
);
909 static void graph_free(isl_ctx
*ctx
, struct isl_sched_graph
*graph
)
913 isl_map_to_basic_set_free(graph
->intra_hmap
);
914 isl_map_to_basic_set_free(graph
->intra_hmap_param
);
915 isl_map_to_basic_set_free(graph
->inter_hmap
);
918 for (i
= 0; i
< graph
->n
; ++i
)
919 clear_node(graph
, &graph
->node
[i
]);
923 for (i
= 0; i
< graph
->n_edge
; ++i
) {
924 isl_map_free(graph
->edge
[i
].map
);
925 isl_union_map_free(graph
->edge
[i
].tagged_condition
);
926 isl_union_map_free(graph
->edge
[i
].tagged_validity
);
930 for (i
= 0; i
<= isl_edge_last_table
; ++i
)
931 isl_hash_table_free(ctx
, graph
->edge_table
[i
]);
932 isl_hash_table_free(ctx
, graph
->node_table
);
933 isl_basic_set_free(graph
->lp
);
935 isl_multi_union_pw_aff_free(graph
->prefix
);
938 /* For each "set" on which this function is called, increment
939 * graph->n by one and update graph->maxvar.
941 static isl_stat
init_n_maxvar(__isl_take isl_set
*set
, void *user
)
943 struct isl_sched_graph
*graph
= user
;
944 int nvar
= isl_set_dim(set
, isl_dim_set
);
947 if (nvar
> graph
->maxvar
)
948 graph
->maxvar
= nvar
;
955 /* Compute the number of rows that should be allocated for the schedule.
956 * In particular, we need one row for each variable or one row
957 * for each basic map in the dependences.
958 * Note that it is practically impossible to exhaust both
959 * the number of dependences and the number of variables.
960 * If any prefix schedule was specified, then the initial rows
961 * are initialized from this prefix. Since the prefix may be
962 * completely trivial, it needs to be taken into account separately.
964 static isl_stat
compute_max_row(struct isl_sched_graph
*graph
,
965 __isl_keep isl_schedule_constraints
*sc
)
967 int n_prefix
, n_edge
;
969 isl_union_set
*domain
;
970 isl_multi_union_pw_aff
*prefix
;
974 domain
= isl_schedule_constraints_get_domain(sc
);
975 r
= isl_union_set_foreach_set(domain
, &init_n_maxvar
, graph
);
976 isl_union_set_free(domain
);
978 return isl_stat_error
;
979 prefix
= isl_schedule_constraints_get_prefix(sc
);
980 n_prefix
= isl_multi_union_pw_aff_dim(prefix
, isl_dim_set
);
981 isl_multi_union_pw_aff_free(prefix
);
983 return isl_stat_error
;
984 n_edge
= isl_schedule_constraints_n_basic_map(sc
);
986 return isl_stat_error
;
987 graph
->max_row
= n_prefix
+ n_edge
+ graph
->maxvar
;
992 /* Does "bset" have any defining equalities for its set variables?
994 static isl_bool
has_any_defining_equality(__isl_keep isl_basic_set
*bset
)
999 return isl_bool_error
;
1001 n
= isl_basic_set_dim(bset
, isl_dim_set
);
1002 for (i
= 0; i
< n
; ++i
) {
1005 has
= isl_basic_set_has_defining_equality(bset
, isl_dim_set
, i
,
1011 return isl_bool_false
;
1014 /* Set the entries of node->max to the value of the schedule_max_coefficient
1017 static isl_stat
set_max_coefficient(isl_ctx
*ctx
, struct isl_sched_node
*node
)
1021 max
= isl_options_get_schedule_max_coefficient(ctx
);
1025 node
->max
= isl_vec_alloc(ctx
, node
->nvar
);
1026 node
->max
= isl_vec_set_si(node
->max
, max
);
1028 return isl_stat_error
;
1033 /* Set the entries of node->max to the minimum of the schedule_max_coefficient
1034 * option (if set) and half of the minimum of the sizes in the other
1035 * dimensions. Round up when computing the half such that
1036 * if the minimum of the sizes is one, half of the size is taken to be one
1038 * If the global minimum is unbounded (i.e., if both
1039 * the schedule_max_coefficient is not set and the sizes in the other
1040 * dimensions are unbounded), then store a negative value.
1041 * If the schedule coefficient is close to the size of the instance set
1042 * in another dimension, then the schedule may represent a loop
1043 * coalescing transformation (especially if the coefficient
1044 * in that other dimension is one). Forcing the coefficient to be
1045 * smaller than or equal to half the minimal size should avoid this
1048 static isl_stat
compute_max_coefficient(isl_ctx
*ctx
,
1049 struct isl_sched_node
*node
)
1055 max
= isl_options_get_schedule_max_coefficient(ctx
);
1056 v
= isl_vec_alloc(ctx
, node
->nvar
);
1058 return isl_stat_error
;
1060 for (i
= 0; i
< node
->nvar
; ++i
) {
1061 isl_int_set_si(v
->el
[i
], max
);
1062 isl_int_mul_si(v
->el
[i
], v
->el
[i
], 2);
1065 for (i
= 0; i
< node
->nvar
; ++i
) {
1068 size
= isl_multi_val_get_val(node
->sizes
, i
);
1071 if (!isl_val_is_int(size
)) {
1075 for (j
= 0; j
< node
->nvar
; ++j
) {
1078 if (isl_int_is_neg(v
->el
[j
]) ||
1079 isl_int_gt(v
->el
[j
], size
->n
))
1080 isl_int_set(v
->el
[j
], size
->n
);
1085 for (i
= 0; i
< node
->nvar
; ++i
)
1086 isl_int_cdiv_q_ui(v
->el
[i
], v
->el
[i
], 2);
1092 return isl_stat_error
;
1095 /* Compute and return the size of "set" in dimension "dim".
1096 * The size is taken to be the difference in values for that variable
1097 * for fixed values of the other variables.
1098 * This assumes that "set" is convex.
1099 * In particular, the variable is first isolated from the other variables
1100 * in the range of a map
1102 * [i_0, ..., i_dim-1, i_dim+1, ...] -> [i_dim]
1104 * and then duplicated
1106 * [i_0, ..., i_dim-1, i_dim+1, ...] -> [[i_dim] -> [i_dim']]
1108 * The shared variables are then projected out and the maximal value
1109 * of i_dim' - i_dim is computed.
1111 static __isl_give isl_val
*compute_size(__isl_take isl_set
*set
, int dim
)
1114 isl_local_space
*ls
;
1118 map
= isl_set_project_onto_map(set
, isl_dim_set
, dim
, 1);
1119 map
= isl_map_project_out(map
, isl_dim_in
, dim
, 1);
1120 map
= isl_map_range_product(map
, isl_map_copy(map
));
1121 map
= isl_set_unwrap(isl_map_range(map
));
1122 set
= isl_map_deltas(map
);
1123 ls
= isl_local_space_from_space(isl_set_get_space(set
));
1124 obj
= isl_aff_var_on_domain(ls
, isl_dim_set
, 0);
1125 v
= isl_set_max_val(set
, obj
);
1132 /* Compute the size of the instance set "set" of "node", after compression,
1133 * as well as bounds on the corresponding coefficients, if needed.
1135 * The sizes are needed when the schedule_treat_coalescing option is set.
1136 * The bounds are needed when the schedule_treat_coalescing option or
1137 * the schedule_max_coefficient option is set.
1139 * If the schedule_treat_coalescing option is not set, then at most
1140 * the bounds need to be set and this is done in set_max_coefficient.
1141 * Otherwise, compress the domain if needed, compute the size
1142 * in each direction and store the results in node->size.
1143 * If the domain is not convex, then the sizes are computed
1144 * on a convex superset in order to avoid picking up sizes
1145 * that are valid for the individual disjuncts, but not for
1146 * the domain as a whole.
1147 * Finally, set the bounds on the coefficients based on the sizes
1148 * and the schedule_max_coefficient option in compute_max_coefficient.
1150 static isl_stat
compute_sizes_and_max(isl_ctx
*ctx
, struct isl_sched_node
*node
,
1151 __isl_take isl_set
*set
)
1156 if (!isl_options_get_schedule_treat_coalescing(ctx
)) {
1158 return set_max_coefficient(ctx
, node
);
1161 if (node
->compressed
)
1162 set
= isl_set_preimage_multi_aff(set
,
1163 isl_multi_aff_copy(node
->decompress
));
1164 set
= isl_set_from_basic_set(isl_set_simple_hull(set
));
1165 mv
= isl_multi_val_zero(isl_set_get_space(set
));
1166 n
= isl_set_dim(set
, isl_dim_set
);
1167 for (j
= 0; j
< n
; ++j
) {
1170 v
= compute_size(isl_set_copy(set
), j
);
1171 mv
= isl_multi_val_set_val(mv
, j
, v
);
1176 return isl_stat_error
;
1177 return compute_max_coefficient(ctx
, node
);
1180 /* Add a new node to the graph representing the given instance set.
1181 * "nvar" is the (possibly compressed) number of variables and
1182 * may be smaller than then number of set variables in "set"
1183 * if "compressed" is set.
1184 * If "compressed" is set, then "hull" represents the constraints
1185 * that were used to derive the compression, while "compress" and
1186 * "decompress" map the original space to the compressed space and
1188 * If "compressed" is not set, then "hull", "compress" and "decompress"
1191 * Compute the size of the instance set and bounds on the coefficients,
1194 static isl_stat
add_node(struct isl_sched_graph
*graph
,
1195 __isl_take isl_set
*set
, int nvar
, int compressed
,
1196 __isl_take isl_set
*hull
, __isl_take isl_multi_aff
*compress
,
1197 __isl_take isl_multi_aff
*decompress
)
1204 struct isl_sched_node
*node
;
1207 return isl_stat_error
;
1209 ctx
= isl_set_get_ctx(set
);
1210 nparam
= isl_set_dim(set
, isl_dim_param
);
1211 if (!ctx
->opt
->schedule_parametric
)
1213 sched
= isl_mat_alloc(ctx
, 0, 1 + nparam
+ nvar
);
1214 node
= &graph
->node
[graph
->n
];
1216 space
= isl_set_get_space(set
);
1217 node
->space
= space
;
1219 node
->nparam
= nparam
;
1220 node
->sched
= sched
;
1221 node
->band_sched
= NULL
;
1222 coincident
= isl_calloc_array(ctx
, int, graph
->max_row
);
1223 node
->coincident
= coincident
;
1224 node
->compressed
= compressed
;
1226 node
->compress
= compress
;
1227 node
->decompress
= decompress
;
1228 if (compute_sizes_and_max(ctx
, node
, set
) < 0)
1229 return isl_stat_error
;
1231 if (!space
|| !sched
|| (graph
->max_row
&& !coincident
))
1232 return isl_stat_error
;
1233 if (compressed
&& (!hull
|| !compress
|| !decompress
))
1234 return isl_stat_error
;
1239 /* Construct an identifier for node "node", which will represent "set".
1240 * The name of the identifier is either "compressed" or
1241 * "compressed_<name>", with <name> the name of the space of "set".
1242 * The user pointer of the identifier points to "node".
1244 static __isl_give isl_id
*construct_compressed_id(__isl_keep isl_set
*set
,
1245 struct isl_sched_node
*node
)
1254 has_name
= isl_set_has_tuple_name(set
);
1258 ctx
= isl_set_get_ctx(set
);
1260 return isl_id_alloc(ctx
, "compressed", node
);
1262 p
= isl_printer_to_str(ctx
);
1263 name
= isl_set_get_tuple_name(set
);
1264 p
= isl_printer_print_str(p
, "compressed_");
1265 p
= isl_printer_print_str(p
, name
);
1266 id_name
= isl_printer_get_str(p
);
1267 isl_printer_free(p
);
1269 id
= isl_id_alloc(ctx
, id_name
, node
);
1275 /* Add a new node to the graph representing the given set.
1277 * If any of the set variables is defined by an equality, then
1278 * we perform variable compression such that we can perform
1279 * the scheduling on the compressed domain.
1280 * In this case, an identifier is used that references the new node
1281 * such that each compressed space is unique and
1282 * such that the node can be recovered from the compressed space.
1284 static isl_stat
extract_node(__isl_take isl_set
*set
, void *user
)
1287 isl_bool has_equality
;
1289 isl_basic_set
*hull
;
1292 isl_multi_aff
*compress
, *decompress
;
1293 struct isl_sched_graph
*graph
= user
;
1295 hull
= isl_set_affine_hull(isl_set_copy(set
));
1296 hull
= isl_basic_set_remove_divs(hull
);
1297 nvar
= isl_set_dim(set
, isl_dim_set
);
1298 has_equality
= has_any_defining_equality(hull
);
1300 if (has_equality
< 0)
1302 if (!has_equality
) {
1303 isl_basic_set_free(hull
);
1304 return add_node(graph
, set
, nvar
, 0, NULL
, NULL
, NULL
);
1307 id
= construct_compressed_id(set
, &graph
->node
[graph
->n
]);
1308 morph
= isl_basic_set_variable_compression_with_id(hull
,
1311 nvar
= isl_morph_ran_dim(morph
, isl_dim_set
);
1312 compress
= isl_morph_get_var_multi_aff(morph
);
1313 morph
= isl_morph_inverse(morph
);
1314 decompress
= isl_morph_get_var_multi_aff(morph
);
1315 isl_morph_free(morph
);
1317 hull_set
= isl_set_from_basic_set(hull
);
1318 return add_node(graph
, set
, nvar
, 1, hull_set
, compress
, decompress
);
1320 isl_basic_set_free(hull
);
1322 return isl_stat_error
;
1325 struct isl_extract_edge_data
{
1326 enum isl_edge_type type
;
1327 struct isl_sched_graph
*graph
;
1330 /* Merge edge2 into edge1, freeing the contents of edge2.
1331 * Return 0 on success and -1 on failure.
1333 * edge1 and edge2 are assumed to have the same value for the map field.
1335 static int merge_edge(struct isl_sched_edge
*edge1
,
1336 struct isl_sched_edge
*edge2
)
1338 edge1
->types
|= edge2
->types
;
1339 isl_map_free(edge2
->map
);
1341 if (is_condition(edge2
)) {
1342 if (!edge1
->tagged_condition
)
1343 edge1
->tagged_condition
= edge2
->tagged_condition
;
1345 edge1
->tagged_condition
=
1346 isl_union_map_union(edge1
->tagged_condition
,
1347 edge2
->tagged_condition
);
1350 if (is_conditional_validity(edge2
)) {
1351 if (!edge1
->tagged_validity
)
1352 edge1
->tagged_validity
= edge2
->tagged_validity
;
1354 edge1
->tagged_validity
=
1355 isl_union_map_union(edge1
->tagged_validity
,
1356 edge2
->tagged_validity
);
1359 if (is_condition(edge2
) && !edge1
->tagged_condition
)
1361 if (is_conditional_validity(edge2
) && !edge1
->tagged_validity
)
1367 /* Insert dummy tags in domain and range of "map".
1369 * In particular, if "map" is of the form
1375 * [A -> dummy_tag] -> [B -> dummy_tag]
1377 * where the dummy_tags are identical and equal to any dummy tags
1378 * introduced by any other call to this function.
1380 static __isl_give isl_map
*insert_dummy_tags(__isl_take isl_map
*map
)
1386 isl_set
*domain
, *range
;
1388 ctx
= isl_map_get_ctx(map
);
1390 id
= isl_id_alloc(ctx
, NULL
, &dummy
);
1391 space
= isl_space_params(isl_map_get_space(map
));
1392 space
= isl_space_set_from_params(space
);
1393 space
= isl_space_set_tuple_id(space
, isl_dim_set
, id
);
1394 space
= isl_space_map_from_set(space
);
1396 domain
= isl_map_wrap(map
);
1397 range
= isl_map_wrap(isl_map_universe(space
));
1398 map
= isl_map_from_domain_and_range(domain
, range
);
1399 map
= isl_map_zip(map
);
1404 /* Return a map in the same space as that of "map" that relates
1405 * the elements with equal schedule prefix.
1406 * Use the original schedule prefix specified by the user and
1407 * not the linear information extracted from it for the purpose
1408 * of avoiding redundant rows in the generated schedule.
1410 static __isl_give isl_map
*extract_equal_prefix(struct isl_sched_graph
*graph
,
1411 __isl_keep isl_map
*map
)
1414 isl_union_map
*umap
;
1415 isl_multi_union_pw_aff
*prefix
;
1417 space
= isl_map_get_space(map
);
1418 map
= isl_map_universe(isl_space_copy(space
));
1419 umap
= isl_union_map_from_map(map
);
1420 prefix
= isl_multi_union_pw_aff_copy(graph
->prefix
);
1421 umap
= isl_union_map_eq_at_multi_union_pw_aff(umap
, prefix
);
1422 map
= isl_union_map_extract_map(umap
, space
);
1423 isl_union_map_free(umap
);
1428 /* Given that at least one of "src" or "dst" is compressed, return
1429 * a map between the spaces of these nodes restricted to the affine
1430 * hull that was used in the compression.
1432 static __isl_give isl_map
*extract_hull(struct isl_sched_node
*src
,
1433 struct isl_sched_node
*dst
)
1437 if (src
->compressed
)
1438 dom
= isl_set_copy(src
->hull
);
1440 dom
= isl_set_universe(isl_space_copy(src
->space
));
1441 if (dst
->compressed
)
1442 ran
= isl_set_copy(dst
->hull
);
1444 ran
= isl_set_universe(isl_space_copy(dst
->space
));
1446 return isl_map_from_domain_and_range(dom
, ran
);
1449 /* Intersect the domains of the nested relations in domain and range
1450 * of "tagged" with "map".
1452 static __isl_give isl_map
*map_intersect_domains(__isl_take isl_map
*tagged
,
1453 __isl_keep isl_map
*map
)
1457 tagged
= isl_map_zip(tagged
);
1458 set
= isl_map_wrap(isl_map_copy(map
));
1459 tagged
= isl_map_intersect_domain(tagged
, set
);
1460 tagged
= isl_map_zip(tagged
);
1464 /* Return a pointer to the node that lives in the domain space of "map",
1465 * an invalid node if there is no such node, or NULL in case of error.
1467 static struct isl_sched_node
*find_domain_node(isl_ctx
*ctx
,
1468 struct isl_sched_graph
*graph
, __isl_keep isl_map
*map
)
1470 struct isl_sched_node
*node
;
1473 space
= isl_space_domain(isl_map_get_space(map
));
1474 node
= graph_find_node(ctx
, graph
, space
);
1475 isl_space_free(space
);
1480 /* Return a pointer to the node that lives in the range space of "map",
1481 * an invalid node if there is no such node, or NULL in case of error.
1483 static struct isl_sched_node
*find_range_node(isl_ctx
*ctx
,
1484 struct isl_sched_graph
*graph
, __isl_keep isl_map
*map
)
1486 struct isl_sched_node
*node
;
1489 space
= isl_space_range(isl_map_get_space(map
));
1490 node
= graph_find_node(ctx
, graph
, space
);
1491 isl_space_free(space
);
1496 /* Refrain from adding a new edge based on "map" to "graph".
1497 * Instead, just free the map and tell the caller
1498 * no edge was added.
1499 * "tagged" is either a copy of "map" with additional tags or NULL.
1501 static struct isl_sched_edge
*skip_edge(struct isl_sched_graph
*graph
,
1502 __isl_take isl_map
*map
, __isl_take isl_map
*tagged
)
1505 isl_map_free(tagged
);
1507 return &graph
->edge
[graph
->n_edge
];
1510 /* Add a new edge to the graph based on the given map
1511 * and add it to graph->edge_table[type].
1512 * If a dependence relation of a given type happens to be identical
1513 * to one of the dependence relations of a type that was added before,
1514 * then we don't create a new edge, but instead mark the original edge
1515 * as also representing a dependence of the current type.
1516 * No such merging is performed on consecutivity edges.
1517 * If no corresponding source or destination nodes can be found,
1518 * then no edge is created.
1519 * Return a pointer to the new or merged edge if an edge was created or
1520 * updated. Return an invalid edge otherwise.
1521 * Return NULL on error.
1523 * Edges of type isl_edge_condition or isl_edge_conditional_validity
1524 * may be specified as "tagged" dependence relations. That is, "map"
1525 * may contain elements (i -> a) -> (j -> b), where i -> j denotes
1526 * the dependence on iterations and a and b are tags.
1527 * edge->map is set to the relation containing the elements i -> j,
1528 * while edge->tagged_condition and edge->tagged_validity contain
1529 * the union of all the "map" relations
1530 * for which extract_edge is called that result in the same edge->map.
1532 * If the source or the destination node is compressed, then
1533 * intersect both "map" and "tagged" with the constraints that
1534 * were used to construct the compression.
1535 * This ensures that there are no schedule constraints defined
1536 * outside of these domains, while the scheduler no longer has
1537 * any control over those outside parts.
1539 * If a (non-trivial) prefix schedule was specified by the user,
1540 * then only retain dependences between instances with equal
1541 * prefix values. If the specified prefix schedule was incomplete,
1542 * then this may result in the removal of all dependences.
1544 static struct isl_sched_edge
*add_edge(struct isl_sched_graph
*graph
,
1545 enum isl_edge_type type
, __isl_take isl_map
*map
)
1548 isl_ctx
*ctx
= isl_map_get_ctx(map
);
1549 struct isl_sched_node
*src
, *dst
;
1550 struct isl_sched_edge
*edge
;
1551 isl_map
*tagged
= NULL
;
1553 if (type
== isl_edge_condition
||
1554 type
== isl_edge_conditional_validity
) {
1555 if (isl_map_can_zip(map
)) {
1556 tagged
= isl_map_copy(map
);
1557 map
= isl_set_unwrap(isl_map_domain(isl_map_zip(map
)));
1559 tagged
= insert_dummy_tags(isl_map_copy(map
));
1563 if (graph
->prefix
) {
1564 isl_map
*equal_prefix
;
1565 equal_prefix
= extract_equal_prefix(graph
, map
);
1567 tagged
= map_intersect_domains(tagged
, equal_prefix
);
1568 map
= isl_map_intersect(map
, equal_prefix
);
1571 src
= find_domain_node(ctx
, graph
, map
);
1572 dst
= find_range_node(ctx
, graph
, map
);
1576 if (!is_node(graph
, src
) || !is_node(graph
, dst
))
1577 return skip_edge(graph
, map
, tagged
);
1579 if (src
->compressed
|| dst
->compressed
) {
1581 hull
= extract_hull(src
, dst
);
1583 tagged
= map_intersect_domains(tagged
, hull
);
1584 map
= isl_map_intersect(map
, hull
);
1587 empty
= isl_map_plain_is_empty(map
);
1591 return skip_edge(graph
, map
, tagged
);
1593 graph
->edge
[graph
->n_edge
].src
= src
;
1594 graph
->edge
[graph
->n_edge
].dst
= dst
;
1595 graph
->edge
[graph
->n_edge
].map
= map
;
1596 graph
->edge
[graph
->n_edge
].types
= 0;
1597 graph
->edge
[graph
->n_edge
].tagged_condition
= NULL
;
1598 graph
->edge
[graph
->n_edge
].tagged_validity
= NULL
;
1599 set_type(&graph
->edge
[graph
->n_edge
], type
);
1600 if (type
== isl_edge_condition
)
1601 graph
->edge
[graph
->n_edge
].tagged_condition
=
1602 isl_union_map_from_map(tagged
);
1603 if (type
== isl_edge_conditional_validity
)
1604 graph
->edge
[graph
->n_edge
].tagged_validity
=
1605 isl_union_map_from_map(tagged
);
1607 edge
= graph_find_matching_edge(graph
, &graph
->edge
[graph
->n_edge
]);
1612 if (edge
== &graph
->edge
[graph
->n_edge
])
1613 edge
= &graph
->edge
[graph
->n_edge
++];
1614 else if (merge_edge(edge
, &graph
->edge
[graph
->n_edge
]) < 0)
1617 if (graph_edge_table_add(ctx
, graph
, type
, edge
) < 0)
1622 isl_map_free(tagged
);
1626 /* Is "edge" an edge in "graph"?
1628 static int is_edge(struct isl_sched_graph
*graph
,
1629 struct isl_sched_edge
*edge
)
1632 edge
>= graph
->edge
&& edge
< graph
->edge
+ graph
->n_edge
;
1635 /* Add a new edge to the graph based on the given map
1636 * and add it to data->graph->edge_table[data->type].
1638 static isl_stat
extract_edge(__isl_take isl_map
*map
, void *user
)
1640 struct isl_extract_edge_data
*data
= user
;
1641 struct isl_sched_graph
*graph
= data
->graph
;
1642 struct isl_sched_edge
*edge
;
1644 edge
= add_edge(graph
, data
->type
, map
);
1645 return edge
? isl_stat_ok
: isl_stat_error
;
1648 /* Insert an intra-statement consecutivity constraint with
1649 * identifier "id" (may be NULL),
1650 * outer part "outer" and inner part "inner" in front of the list of
1651 * intra-statement consecutivity constraints of "node".
1652 * "outer" is replaced by a basis because only the spanned
1653 * space is relevant and not the individual rows.
1655 static isl_stat
insert_intra(struct isl_sched_node
*node
,
1656 __isl_take isl_id
*id
, __isl_take isl_mat
*outer
,
1657 __isl_take isl_mat
*inner
)
1660 struct isl_sched_intra
*intra
;
1662 outer
= isl_mat_row_basis(outer
);
1663 if (!outer
|| !inner
)
1666 ctx
= isl_mat_get_ctx(outer
);
1667 intra
= isl_calloc_type(ctx
, struct isl_sched_intra
);
1670 intra
->n_inner
= isl_mat_rows(inner
);
1672 intra
->outer
= outer
;
1673 intra
->inner
= inner
;
1674 intra
->next
= node
->intra
;
1675 node
->intra
= intra
;
1679 isl_mat_free(outer
);
1680 isl_mat_free(inner
);
1681 return isl_stat_error
;
1684 /* Does the sequence of linear combinations "lin" with outer rows "outer"
1685 * represent a valid intra-statement consecutivity constraint for a node
1686 * with "nvar" variables?
1688 * If the number of columns is greater than the number of variables,
1689 * then the isl_multi_aff from which this linear part was extracted
1690 * involves some local variables, meaning that it is a quasi-affine
1691 * expression rather than an affine expression.
1692 * These are not allowed.
1694 * The inner part of "lin" needs to be of full row-rank and
1695 * needs to be linearly independent of "outer".
1696 * That is, rank(lin) needs to be equal to rank(outer) + rank(inner),
1697 * while rank(inner) needs to be equal to the number of rows of the inner part.
1698 * Furthermore, the inner part needs to contain at least one row.
1700 static isl_bool
valid_intra(__isl_keep isl_mat
*lin
, __isl_keep isl_mat
*outer
,
1704 int rank
, rank_outer
;
1708 return isl_bool_error
;
1710 n_col
= isl_mat_cols(lin
);
1712 return isl_bool_false
;
1714 n_inner
= isl_mat_rows(lin
) - isl_mat_rows(outer
);
1716 return isl_bool_false
;
1717 rank
= isl_mat_rank(lin
);
1718 rank_outer
= isl_mat_rank(outer
);
1719 if (rank
< 0 || rank_outer
< 0)
1720 return isl_bool_error
;
1721 if (rank
!= rank_outer
+ n_inner
)
1722 return isl_bool_false
;
1724 return isl_bool_true
;
1727 /* Insert the intra-statement consecutivity constraint "lin"
1728 * with identifier "id" (may be NULL) and
1729 * outer part "outer" in front of the list of
1730 * intra-statement consecutivity constraints of "node",
1731 * provided it is a valid constraint.
1733 static isl_stat
insert_valid_intra(struct isl_sched_node
*node
,
1734 __isl_take isl_id
*id
, __isl_take isl_mat
*lin
,
1735 __isl_take isl_mat
*outer
)
1739 valid
= valid_intra(lin
, outer
, node
->nvar
);
1740 if (valid
>= 0 && valid
) {
1744 n_outer
= isl_mat_rows(outer
);
1745 inner
= isl_mat_drop_rows(lin
, 0, n_outer
);
1746 return insert_intra(node
, id
, outer
, inner
);
1751 isl_mat_free(outer
);
1752 return valid
>= 0 ? isl_stat_ok
: isl_stat_error
;
1755 /* Insert the intra-statement consecutivity constraint "ma"
1756 * in front of the list of intra-statement consecutivity constraints
1757 * of "node", provided it is a valid constraint.
1759 * "ma" maps the (uncompressed) space of "node" to a product space
1760 * of outer and inner parts.
1761 * Only the linear parts of the affine expressions are relevant.
1762 * If the node is compressed, reformulate the constraints in terms
1763 * of the compressed domain,
1764 * extract the linear parts and store them in "node",
1765 * provided they represent a valid constraint.
1766 * If "ma" has a tuple identifier, then keep track of it as well.
1768 static isl_stat
node_insert_intra(struct isl_sched_node
*node
,
1769 __isl_take isl_multi_aff
*ma
)
1774 isl_mat
*lin
, *outer
;
1775 int n_outer
, n_inner
;
1777 space
= isl_multi_aff_get_space(ma
);
1778 space
= isl_space_unwrap(isl_space_range(space
));
1779 n_outer
= isl_space_dim(space
, isl_dim_in
);
1780 n_inner
= isl_space_dim(space
, isl_dim_out
);
1781 isl_space_free(space
);
1785 has_id
= isl_multi_aff_has_tuple_id(ma
, isl_dim_out
);
1791 id
= isl_multi_aff_get_tuple_id(ma
, isl_dim_out
);
1796 if (node
->compressed
)
1797 ma
= isl_multi_aff_pullback_multi_aff(ma
,
1798 isl_multi_aff_copy(node
->decompress
));
1799 lin
= extract_linear(ma
);
1800 outer
= isl_mat_drop_rows(isl_mat_copy(lin
), n_outer
, n_inner
);
1801 return insert_valid_intra(node
, id
, lin
, outer
);
1803 isl_multi_aff_free(ma
);
1804 return isl_stat_error
;
1807 /* Insert the intra-statement consecutivity constraint "ma"
1808 * in front of the list of intra-statement consecutivity constraints
1809 * of the corresponding node of "graph", provided there is such a node.
1811 static isl_stat
graph_insert_intra(struct isl_sched_graph
*graph
,
1812 __isl_take isl_multi_aff
*ma
)
1814 struct isl_sched_node
*node
;
1818 space
= isl_multi_aff_get_domain_space(ma
);
1819 ctx
= isl_multi_aff_get_ctx(ma
);
1820 node
= graph_find_node(ctx
, graph
, space
);
1821 isl_space_free(space
);
1824 if (is_node(graph
, node
))
1825 return node_insert_intra(node
, ma
);
1826 isl_multi_aff_free(ma
);
1829 isl_multi_aff_free(ma
);
1830 return isl_stat_error
;
1833 /* Store the intra-statement consecutivity constraints of "sc"
1834 * in the appropriate nodes of "graph".
1836 * If there are multiple constraints per node, then
1837 * the constraints are successively inserted in front of
1838 * the per-node list.
1839 * Start from the last intra-statement consecutivity constraint
1840 * to ensure that the final order of the per-node constraints
1841 * is the same as in the original list of constraints.
1843 static isl_stat
graph_set_intra(struct isl_sched_graph
*graph
,
1844 __isl_keep isl_schedule_constraints
*sc
)
1846 isl_multi_aff_list
*intra
;
1849 intra
= isl_schedule_constraints_get_intra_consecutivity(sc
);
1851 return isl_stat_error
;
1852 n
= isl_multi_aff_list_n_multi_aff(intra
);
1853 for (i
= n
- 1; i
>= 0; --i
) {
1856 ma
= isl_multi_aff_list_get_multi_aff(intra
, i
);
1857 if (graph_insert_intra(graph
, ma
) < 0)
1860 isl_multi_aff_list_free(intra
);
1864 isl_multi_aff_list_free(intra
);
1865 return isl_stat_error
;
1868 /* Return the intra-statement consecutivity constraint
1869 * referenced by "node" that has identifier "id".
1870 * Return NULL if no such constraint can be found.
1872 static struct isl_sched_intra
*find_intra(struct isl_sched_node
*node
,
1873 __isl_take isl_id
*id
)
1875 struct isl_sched_intra
*intra
;
1877 for (intra
= node
->intra
; intra
; intra
= intra
->next
) {
1878 if (intra
->id
== id
)
1886 /* Clear the dependence relation of "edge" and remove
1887 * it from the edge tables of "graph".
1889 static isl_stat
clear_edge(struct isl_sched_graph
*graph
,
1890 struct isl_sched_edge
*edge
)
1894 space
= isl_map_get_space(edge
->map
);
1895 isl_map_free(edge
->map
);
1896 edge
->map
= isl_map_empty(space
);
1898 return isl_stat_error
;
1900 graph_remove_edge(graph
, edge
);
1905 /* Add an edge to "graph" corresponding to the inter-statement
1906 * consecutivity constraint "map" that references
1907 * the intra-statement consecutivity constraints identified
1908 * by "id_src" and "id_dst", if those intra-statement
1909 * consecutivity constraints can be found and have the same number
1910 * of rows in their inner parts.
1912 * The edge is first created and then possibly disabled
1913 * if the intra-statement consecutivity constraints cannot be found.
1914 * A side effect of the edge creation is that the nodes are identified and
1915 * the intra-statement consecutivity constraints need to be looked up
1918 static isl_stat
graph_add_inter_split(struct isl_sched_graph
*graph
,
1919 __isl_take isl_map
*map
, __isl_take isl_id
*id_src
,
1920 __isl_take isl_id
*id_dst
)
1922 struct isl_sched_edge
*edge
;
1924 edge
= add_edge(graph
, isl_edge_consecutivity
, map
);
1927 edge
->state
= isl_sched_inter_init
;
1928 edge
->src_intra
= find_intra(edge
->src
, id_src
);
1929 edge
->dst_intra
= find_intra(edge
->dst
, id_dst
);
1931 if (edge
->src_intra
&& edge
->dst_intra
&&
1932 edge
->src_intra
->n_inner
== edge
->dst_intra
->n_inner
)
1935 return clear_edge(graph
, edge
);
1937 isl_id_free(id_src
);
1938 isl_id_free(id_dst
);
1939 return isl_stat_error
;
1942 /* Add an edge to "graph" corresponding to the inter-statement
1943 * consecutivity constraint "map", if it represents a valid constraint.
1945 * Extract out the relation between statement instances and
1946 * the pair of intra-statement consecutivity constraint identifiers.
1948 static isl_stat
add_inter_consecutivity(__isl_take isl_map
*map
, void *user
)
1950 struct isl_sched_graph
*graph
= user
;
1953 isl_id
*id_src
, *id_dst
;
1955 space
= isl_map_get_space(map
);
1956 space
= isl_space_factor_range(space
);
1957 has_id
= isl_space_has_tuple_id(space
, isl_dim_in
);
1958 if (has_id
>= 0 && has_id
)
1959 has_id
= isl_space_has_tuple_id(space
, isl_dim_out
);
1960 if (has_id
< 0 || !has_id
) {
1961 isl_space_free(space
);
1963 return has_id
< 0 ? isl_stat_error
: isl_stat_ok
;
1965 id_src
= isl_space_get_tuple_id(space
, isl_dim_in
);
1966 id_dst
= isl_space_get_tuple_id(space
, isl_dim_out
);
1967 isl_space_free(space
);
1969 map
= isl_map_factor_domain(map
);
1971 return graph_add_inter_split(graph
, map
, id_src
, id_dst
);
1974 /* Add edges to "graph" corresponding to the valid inter-statement
1975 * consecutivity constraints of "sc".
1977 static isl_stat
graph_set_inter(struct isl_sched_graph
*graph
,
1978 __isl_keep isl_schedule_constraints
*sc
)
1980 isl_map_list
*inter
;
1983 inter
= isl_schedule_constraints_get_inter_consecutivity(sc
);
1984 r
= isl_map_list_foreach(inter
, &add_inter_consecutivity
, graph
);
1985 isl_map_list_free(inter
);
1990 /* Extract (a basis for) the purely linear part of "ma",
1991 * i.e., the coefficients of the input variables but not the local variables.
1993 * There may be linear combinations of the elements of "ma"
1994 * that do not involve local variables, while the elements themselves
1995 * do involve local variables.
1996 * Perform Gaussian elimination to remove local variables from
1997 * as many rows as possible and subsequently remove the remaining rows
1998 * involving local variables as well as the columns corresponding
1999 * to the local variables.
2001 static __isl_give isl_mat
*extract_pure_linear(__isl_take isl_multi_aff
*ma
)
2003 int i
, n
, n_var
, n_div
;
2009 n_var
= isl_multi_aff_dim(ma
, isl_dim_in
);
2011 rows
= extract_linear(ma
);
2012 rows
= isl_mat_reverse_gauss(rows
);
2017 n
= isl_mat_rows(rows
);
2018 n_div
= isl_mat_cols(rows
) - n_var
;
2019 for (i
= n
- 1; i
>= 0; --i
)
2020 if (isl_seq_first_non_zero(rows
->row
[i
] + n_var
, n_div
) == -1)
2022 rows
= isl_mat_drop_rows(rows
, i
+ 1, n
- (i
+ 1));
2023 rows
= isl_mat_drop_cols(rows
, n_var
, n_div
);
2028 /* Extend "complement" with the complement of the purely linear part of "ma".
2030 static isl_stat
extend_prefix_complement(__isl_take isl_set
*dom
,
2031 __isl_take isl_multi_aff
*ma
, void *user
)
2033 isl_mat
*complement_ma
;
2034 isl_mat
**complement
= user
;
2038 complement_ma
= isl_mat_row_complement(extract_pure_linear(ma
));
2040 *complement
= isl_mat_concat(*complement
, complement_ma
);
2042 return *complement
? isl_stat_ok
: isl_stat_error
;
2045 /* Extract a linear prefix schedule from "pma" that is valid
2047 * In particular, if there are multiple pieces, then the result
2048 * contains linear combinations that have a fixed value in all pieces.
2049 * That is, if there is a direction that is not fixed in one or more pieces,
2050 * then it is also not fixed by the entire piecewise expression.
2051 * A direction that is not fixed needs to have a component along
2052 * the orthogonal complement of the fixed directions.
2053 * Collect these orthogonal complements over all pieces and
2054 * compute the complement of the result to obtain the desired directions.
2056 * If "pma" is empty (which indicates a missing, and therefore invalid,
2057 * prefix schedule), then the result will contain a basis for all directions,
2058 * being the complement of an empty complement.
2060 static __isl_give isl_mat
*extract_prefix_pw_multi_aff(
2061 __isl_keep isl_pw_multi_aff
*pma
)
2065 isl_mat
*complement
;
2070 ctx
= isl_pw_multi_aff_get_ctx(pma
);
2071 nvar
= isl_pw_multi_aff_dim(pma
, isl_dim_in
);
2072 complement
= isl_mat_alloc(ctx
, 0, nvar
);
2074 if (isl_pw_multi_aff_foreach_piece(pma
, &extend_prefix_complement
,
2076 complement
= isl_mat_free(complement
);
2078 return isl_mat_row_complement(complement
);
2081 /* Extract a prefix schedule for "node" from "mupa" and add
2082 * it to node->sched.
2084 * "mupa" is formulated in terms of the original (uncompressed) spaces,
2085 * while node->sched is formulated in terms of the potentially compressed
2086 * space. If "node" is compressed, then the expression corresponding
2087 * to "node" therefore needs to be transformed first.
2089 * The prefix stored in node->sched is only used to avoid linearly
2090 * dependent schedule rows from being generated. Only the linear
2091 * part of the prefix is therefore relevant. Use zero for
2092 * the coefficients of the constant term and the parameters.
2093 * The extracted linear part may have fewer rows than "mupa",
2094 * either because of linear dependences or because some element
2095 * of "mupa" involve local variables.
2096 * Extend the number of rows of the linear part to the number
2097 * of elements in "mupa" to ensure that all nodes have the same
2100 * If "mupa" does not contain a prefix schedule for "node",
2101 * then it is invalid. In the current implementation, this will
2102 * cause the scheduler to not construct any further schedule rows
2105 static isl_stat
extract_prefix(struct isl_sched_node
*node
,
2106 __isl_keep isl_multi_union_pw_aff
*mupa
)
2109 isl_multi_pw_aff
*mpa
;
2110 isl_pw_multi_aff
*pma
;
2115 return isl_stat_error
;
2117 space
= isl_space_copy(node
->space
);
2118 mpa
= isl_multi_union_pw_aff_extract_multi_pw_aff(mupa
, space
);
2121 mpa
= isl_multi_pw_aff_pullback_multi_aff(mpa
,
2122 isl_multi_aff_copy(node
->decompress
));
2124 pma
= isl_pw_multi_aff_from_multi_pw_aff(mpa
);
2126 prefix
= extract_prefix_pw_multi_aff(pma
);
2128 prefix
= isl_mat_insert_zero_cols(prefix
, 0, 1 + node
->nparam
);
2129 n_prefix
= isl_multi_union_pw_aff_dim(mupa
, isl_dim_set
);
2130 n
= isl_mat_rows(prefix
);
2131 prefix
= isl_mat_add_zero_rows(prefix
, n_prefix
- n
);
2132 node
->sched
= isl_mat_concat(node
->sched
, prefix
);
2134 isl_pw_multi_aff_free(pma
);
2137 return isl_stat_error
;
2142 /* Check if any (non-trivial) prefix schedule was specified in "sc".
2143 * If so, store a copy in "graph" for later simplification
2144 * of dependence relations and extract the linear parts
2145 * in the respective nodes.
2146 * These linear parts are considered as an initial outer band.
2147 * Their only effect is to try and prevent rows in the generated schedule
2148 * from being linear combinations of the prefix.
2150 * Since the prefix schedule cannot be assumed to be linearly
2151 * independent on all nodes, graph->n_row is not incremented.
2152 * Note that the ranks of the nodes will get updated regardless and
2153 * graph->maxvar is computed based on these ranks. The test for
2154 * whether more schedule rows are required in compute_schedule_wcc
2155 * therefore does take the prefix into account.
2157 * The prefix schedule specified by the user is required to
2158 * be complete on the domain. An invalid prefix will result
2159 * in nodes being essentially removed from consideration.
2161 static isl_stat
handle_prefix(struct isl_sched_graph
*graph
,
2162 __isl_keep isl_schedule_constraints
*sc
)
2166 isl_multi_union_pw_aff
*mupa
;
2168 mupa
= isl_schedule_constraints_get_prefix(sc
);
2170 return isl_stat_error
;
2171 n
= isl_multi_union_pw_aff_dim(mupa
, isl_dim_set
);
2173 isl_multi_union_pw_aff_free(mupa
);
2177 graph
->prefix
= mupa
;
2179 for (i
= 0; i
< graph
->n
; ++i
) {
2180 if (extract_prefix(&graph
->node
[i
], mupa
) < 0)
2181 return isl_stat_error
;
2184 graph
->n_total_row
= n
;
2185 graph
->band_start
= graph
->n_total_row
;
2190 /* Initialize the schedule graph "graph" from the schedule constraints "sc".
2192 * The context is included in the domain before the nodes of
2193 * the graphs are extracted in order to be able to exploit
2194 * any possible additional equalities.
2195 * Note that this intersection is only performed locally here.
2197 static isl_stat
graph_init(struct isl_sched_graph
*graph
,
2198 __isl_keep isl_schedule_constraints
*sc
)
2201 isl_union_set
*domain
;
2202 isl_multi_aff_list
*intra
;
2204 struct isl_extract_edge_data data
;
2205 enum isl_edge_type i
;
2207 int n_intra
, n_inter
;
2210 return isl_stat_error
;
2212 ctx
= isl_schedule_constraints_get_ctx(sc
);
2214 domain
= isl_schedule_constraints_get_domain(sc
);
2215 graph
->n
= isl_union_set_n_set(domain
);
2216 isl_union_set_free(domain
);
2218 intra
= isl_schedule_constraints_get_intra_consecutivity(sc
);
2219 n_intra
= isl_multi_aff_list_n_multi_aff(intra
);
2220 isl_multi_aff_list_free(intra
);
2221 n_inter
= isl_schedule_constraints_n_inter_consecutivity_map(sc
);
2223 if (graph_alloc(ctx
, graph
, graph
->n
,
2224 isl_schedule_constraints_n_map(sc
), n_intra
+ n_inter
) < 0)
2225 return isl_stat_error
;
2227 if (compute_max_row(graph
, sc
) < 0)
2228 return isl_stat_error
;
2229 graph
->root
= graph
;
2231 domain
= isl_schedule_constraints_get_domain(sc
);
2232 domain
= isl_union_set_intersect_params(domain
,
2233 isl_schedule_constraints_get_context(sc
));
2234 r
= isl_union_set_foreach_set(domain
, &extract_node
, graph
);
2235 isl_union_set_free(domain
);
2237 return isl_stat_error
;
2238 if (graph_init_table(ctx
, graph
) < 0)
2239 return isl_stat_error
;
2240 if (graph_set_intra(graph
, sc
) < 0)
2241 return isl_stat_error
;
2242 if (handle_prefix(graph
, sc
) < 0)
2243 return isl_stat_error
;
2244 for (i
= isl_edge_first
; i
<= isl_edge_last_sc
; ++i
) {
2245 c
= isl_schedule_constraints_get(sc
, i
);
2246 graph
->max_edge
[i
] = isl_union_map_n_map(c
);
2247 isl_union_map_free(c
);
2249 return isl_stat_error
;
2251 graph
->max_edge
[isl_edge_consecutivity
] = n_inter
;
2252 if (graph_init_edge_tables(ctx
, graph
) < 0)
2253 return isl_stat_error
;
2256 for (i
= isl_edge_first
; i
<= isl_edge_last_sc
; ++i
) {
2260 c
= isl_schedule_constraints_get(sc
, i
);
2261 r
= isl_union_map_foreach_map(c
, &extract_edge
, &data
);
2262 isl_union_map_free(c
);
2264 return isl_stat_error
;
2266 if (graph_set_inter(graph
, sc
) < 0)
2267 return isl_stat_error
;
2272 /* Check whether there is any dependence from node[j] to node[i]
2273 * or from node[i] to node[j].
2275 static isl_bool
node_follows_weak(int i
, int j
, void *user
)
2278 struct isl_sched_graph
*graph
= user
;
2280 f
= graph_has_any_edge(graph
, &graph
->node
[j
], &graph
->node
[i
]);
2283 return graph_has_any_edge(graph
, &graph
->node
[i
], &graph
->node
[j
]);
2286 /* Check whether there is a (conditional) validity dependence from node[j]
2287 * to node[i], forcing node[i] to follow node[j].
2289 static isl_bool
node_follows_strong(int i
, int j
, void *user
)
2291 struct isl_sched_graph
*graph
= user
;
2293 return graph_has_validity_edge(graph
, &graph
->node
[j
], &graph
->node
[i
]);
2296 /* Is there a (conditional) validity dependence from node[j] to node[i],
2297 * forcing node[i] to follow node[j] or are the nodes related
2298 * through an inter-statement consecutivity constraint?
2300 static isl_bool
node_follows_strong_or_consecutive(int i
, int j
, void *user
)
2302 struct isl_sched_graph
*graph
= user
;
2304 struct isl_sched_node
*node_i
, *node_j
;
2306 r
= node_follows_strong(i
, j
, user
);
2310 node_i
= &graph
->node
[i
];
2311 node_j
= &graph
->node
[j
];
2312 r
= graph_has_edge(graph
, isl_edge_consecutivity
, node_i
, node_j
);
2315 return graph_has_edge(graph
, isl_edge_consecutivity
, node_j
, node_i
);
2318 /* Use Tarjan's algorithm for computing the strongly connected components
2319 * in the dependence graph only considering those edges defined by "follows".
2321 static isl_stat
detect_ccs(isl_ctx
*ctx
, struct isl_sched_graph
*graph
,
2322 isl_bool (*follows
)(int i
, int j
, void *user
))
2325 struct isl_tarjan_graph
*g
= NULL
;
2327 g
= isl_tarjan_graph_init(ctx
, graph
->n
, follows
, graph
);
2329 return isl_stat_error
;
2335 while (g
->order
[i
] != -1) {
2336 graph
->node
[g
->order
[i
]].scc
= graph
->scc
;
2344 isl_tarjan_graph_free(g
);
2349 /* Apply Tarjan's algorithm to detect the strongly connected components
2350 * in the dependence graph.
2351 * Only consider the (conditional) validity dependences and clear "weak".
2353 static isl_stat
detect_sccs(isl_ctx
*ctx
, struct isl_sched_graph
*graph
)
2356 return detect_ccs(ctx
, graph
, &node_follows_strong
);
2359 /* Apply Tarjan's algorithm to detect the strongly connected components
2360 * in the dependence graph, but combine components that are linked
2361 * through inter-statement consecutivity constraints.
2362 * Only consider the (conditional) validity dependences and clear "weak".
2364 static isl_stat
detect_sccs_grouping_consecutive(isl_ctx
*ctx
,
2365 struct isl_sched_graph
*graph
)
2368 return detect_ccs(ctx
, graph
, &node_follows_strong_or_consecutive
);
2371 /* Apply Tarjan's algorithm to detect the (weakly) connected components
2372 * in the dependence graph.
2373 * Consider all dependences and set "weak".
2375 static isl_stat
detect_wccs(isl_ctx
*ctx
, struct isl_sched_graph
*graph
)
2378 return detect_ccs(ctx
, graph
, &node_follows_weak
);
2381 static int cmp_scc(const void *a
, const void *b
, void *data
)
2383 struct isl_sched_graph
*graph
= data
;
2387 return graph
->node
[*i1
].scc
- graph
->node
[*i2
].scc
;
2390 /* Sort the elements of graph->sorted according to the corresponding SCCs.
2392 static int sort_sccs(struct isl_sched_graph
*graph
)
2394 return isl_sort(graph
->sorted
, graph
->n
, sizeof(int), &cmp_scc
, graph
);
2397 /* Return a non-parametric set in the compressed space of "node" that is
2398 * bounded by the size in each direction
2400 * { [x] : -S_i <= x_i <= S_i }
2402 * If S_i is infinity in direction i, then there are no constraints
2403 * in that direction.
2405 * Cache the result in node->bounds.
2407 static __isl_give isl_basic_set
*get_size_bounds(struct isl_sched_node
*node
)
2410 isl_basic_set
*bounds
;
2415 return isl_basic_set_copy(node
->bounds
);
2417 if (node
->compressed
)
2418 space
= isl_multi_aff_get_domain_space(node
->decompress
);
2420 space
= isl_space_copy(node
->space
);
2421 nparam
= isl_space_dim(space
, isl_dim_param
);
2422 space
= isl_space_drop_dims(space
, isl_dim_param
, 0, nparam
);
2423 bounds
= isl_basic_set_universe(space
);
2425 for (i
= 0; i
< node
->nvar
; ++i
) {
2428 size
= isl_multi_val_get_val(node
->sizes
, i
);
2430 return isl_basic_set_free(bounds
);
2431 if (!isl_val_is_int(size
)) {
2435 bounds
= isl_basic_set_upper_bound_val(bounds
, isl_dim_set
, i
,
2436 isl_val_copy(size
));
2437 bounds
= isl_basic_set_lower_bound_val(bounds
, isl_dim_set
, i
,
2441 node
->bounds
= isl_basic_set_copy(bounds
);
2445 /* Drop some constraints from "delta" that could be exploited
2446 * to construct loop coalescing schedules.
2447 * In particular, drop those constraint that bound the difference
2448 * to the size of the domain.
2449 * First project out the parameters to improve the effectiveness.
2451 static __isl_give isl_set
*drop_coalescing_constraints(
2452 __isl_take isl_set
*delta
, struct isl_sched_node
*node
)
2455 isl_basic_set
*bounds
;
2457 bounds
= get_size_bounds(node
);
2459 nparam
= isl_set_dim(delta
, isl_dim_param
);
2460 delta
= isl_set_project_out(delta
, isl_dim_param
, 0, nparam
);
2461 delta
= isl_set_remove_divs(delta
);
2462 delta
= isl_set_plain_gist_basic_set(delta
, bounds
);
2466 /* Given a dependence relation R from "node" to itself,
2467 * construct the set of coefficients of valid constraints for elements
2468 * in that dependence relation.
2469 * In particular, the result contains tuples of coefficients
2470 * c_0, c_n, c_x such that
2472 * c_0 + c_n n + c_x y - c_x x >= 0 for each (x,y) in R
2476 * c_0 + c_n n + c_x d >= 0 for each d in delta R = { y - x | (x,y) in R }
2478 * We choose here to compute the dual of delta R.
2479 * Alternatively, we could have computed the dual of R, resulting
2480 * in a set of tuples c_0, c_n, c_x, c_y, and then
2481 * plugged in (c_0, c_n, c_x, -c_x).
2483 * If "need_param" is set, then the resulting coefficients effectively
2484 * include coefficients for the parameters c_n. Otherwise, they may
2485 * have been projected out already.
2486 * Since the constraints may be different for these two cases,
2487 * they are stored in separate caches.
2488 * In particular, if no parameter coefficients are required and
2489 * the schedule_treat_coalescing option is set, then the parameters
2490 * are projected out and some constraints that could be exploited
2491 * to construct coalescing schedules are removed before the dual
2494 * If "node" has been compressed, then the dependence relation
2495 * is also compressed before the set of coefficients is computed.
2497 static __isl_give isl_basic_set
*intra_coefficients(
2498 struct isl_sched_graph
*graph
, struct isl_sched_node
*node
,
2499 __isl_take isl_map
*map
, int need_param
)
2504 isl_basic_set
*coef
;
2505 isl_maybe_isl_basic_set m
;
2506 isl_map_to_basic_set
**hmap
= &graph
->intra_hmap
;
2512 ctx
= isl_map_get_ctx(map
);
2513 treat
= !need_param
&& isl_options_get_schedule_treat_coalescing(ctx
);
2515 hmap
= &graph
->intra_hmap_param
;
2516 m
= isl_map_to_basic_set_try_get(*hmap
, map
);
2517 if (m
.valid
< 0 || m
.valid
) {
2522 key
= isl_map_copy(map
);
2523 if (node
->compressed
) {
2524 map
= isl_map_preimage_domain_multi_aff(map
,
2525 isl_multi_aff_copy(node
->decompress
));
2526 map
= isl_map_preimage_range_multi_aff(map
,
2527 isl_multi_aff_copy(node
->decompress
));
2529 delta
= isl_map_deltas(map
);
2531 delta
= drop_coalescing_constraints(delta
, node
);
2532 delta
= isl_set_remove_divs(delta
);
2533 coef
= isl_set_coefficients(delta
);
2534 *hmap
= isl_map_to_basic_set_set(*hmap
, key
, isl_basic_set_copy(coef
));
2539 /* Given a dependence relation R, construct the set of coefficients
2540 * of valid constraints for elements in that dependence relation.
2541 * In particular, the result contains tuples of coefficients
2542 * c_0, c_n, c_x, c_y such that
2544 * c_0 + c_n n + c_x x + c_y y >= 0 for each (x,y) in R
2546 * If the source or destination nodes of "edge" have been compressed,
2547 * then the dependence relation is also compressed before
2548 * the set of coefficients is computed.
2550 static __isl_give isl_basic_set
*inter_coefficients(
2551 struct isl_sched_graph
*graph
, struct isl_sched_edge
*edge
,
2552 __isl_take isl_map
*map
)
2556 isl_basic_set
*coef
;
2557 isl_maybe_isl_basic_set m
;
2559 m
= isl_map_to_basic_set_try_get(graph
->inter_hmap
, map
);
2560 if (m
.valid
< 0 || m
.valid
) {
2565 key
= isl_map_copy(map
);
2566 if (edge
->src
->compressed
)
2567 map
= isl_map_preimage_domain_multi_aff(map
,
2568 isl_multi_aff_copy(edge
->src
->decompress
));
2569 if (edge
->dst
->compressed
)
2570 map
= isl_map_preimage_range_multi_aff(map
,
2571 isl_multi_aff_copy(edge
->dst
->decompress
));
2572 set
= isl_map_wrap(isl_map_remove_divs(map
));
2573 coef
= isl_set_coefficients(set
);
2574 graph
->inter_hmap
= isl_map_to_basic_set_set(graph
->inter_hmap
, key
,
2575 isl_basic_set_copy(coef
));
2580 /* Return the position of the coefficients of the variables in
2581 * the coefficients constraints "coef".
2583 * The space of "coef" is of the form
2585 * { coefficients[[cst, params] -> S] }
2587 * Return the position of S.
2589 static int coef_var_offset(__isl_keep isl_basic_set
*coef
)
2594 space
= isl_space_unwrap(isl_basic_set_get_space(coef
));
2595 offset
= isl_space_dim(space
, isl_dim_in
);
2596 isl_space_free(space
);
2601 /* Return the offset of the coefficient of the constant term of "node"
2604 * Within each node, the coefficients have the following order:
2605 * - positive and negative parts of c_i_x
2606 * - c_i_n (if parametric)
2609 static int node_cst_coef_offset(struct isl_sched_node
*node
)
2611 return node
->start
+ 2 * node
->nvar
+ node
->nparam
;
2614 /* Return the offset of the coefficients of the parameters of "node"
2617 * Within each node, the coefficients have the following order:
2618 * - positive and negative parts of c_i_x
2619 * - c_i_n (if parametric)
2622 static int node_par_coef_offset(struct isl_sched_node
*node
)
2624 return node
->start
+ 2 * node
->nvar
;
2627 /* Return the offset of the coefficients of the variables of "node"
2630 * Within each node, the coefficients have the following order:
2631 * - positive and negative parts of c_i_x
2632 * - c_i_n (if parametric)
2635 static int node_var_coef_offset(struct isl_sched_node
*node
)
2640 /* Return the position of the pair of variables encoding
2641 * coefficient "i" of "node".
2643 * The order of these variable pairs is the opposite of
2644 * that of the coefficients, with 2 variables per coefficient.
2646 static int node_var_coef_pos(struct isl_sched_node
*node
, int i
)
2648 return node_var_coef_offset(node
) + 2 * (node
->nvar
- 1 - i
);
2651 /* Construct an isl_dim_map for mapping constraints on coefficients
2652 * for "node" to the corresponding positions in graph->lp.
2653 * "offset" is the offset of the coefficients for the variables
2654 * in the input constraints.
2655 * "s" is the sign of the mapping.
2657 * The input constraints are given in terms of the coefficients
2658 * (c_0, c_x) or (c_0, c_n, c_x).
2659 * The mapping produced by this function essentially plugs in
2660 * (0, c_i_x^+ - c_i_x^-) if s = 1 and
2661 * (0, -c_i_x^+ + c_i_x^-) if s = -1 or
2662 * (0, 0, c_i_x^+ - c_i_x^-) if s = 1 and
2663 * (0, 0, -c_i_x^+ + c_i_x^-) if s = -1.
2664 * In graph->lp, the c_i_x^- appear before their c_i_x^+ counterpart.
2665 * Furthermore, the order of these pairs is the opposite of that
2666 * of the corresponding coefficients.
2668 * The caller can extend the mapping to also map the other coefficients
2669 * (and therefore not plug in 0).
2671 static __isl_give isl_dim_map
*intra_dim_map(isl_ctx
*ctx
,
2672 struct isl_sched_graph
*graph
, struct isl_sched_node
*node
,
2677 isl_dim_map
*dim_map
;
2679 if (!node
|| !graph
->lp
)
2682 total
= isl_basic_set_total_dim(graph
->lp
);
2683 pos
= node_var_coef_pos(node
, 0);
2684 dim_map
= isl_dim_map_alloc(ctx
, total
);
2685 isl_dim_map_range(dim_map
, pos
, -2, offset
, 1, node
->nvar
, -s
);
2686 isl_dim_map_range(dim_map
, pos
+ 1, -2, offset
, 1, node
->nvar
, s
);
2691 /* Construct an isl_dim_map for mapping constraints on coefficients
2692 * for "src" (node i) and "dst" (node j) to the corresponding positions
2694 * "offset" is the offset of the coefficients for the variables of "src"
2695 * in the input constraints.
2696 * "s" is the sign of the mapping.
2698 * The input constraints are given in terms of the coefficients
2699 * (c_0, c_n, c_x, c_y).
2700 * The mapping produced by this function essentially plugs in
2701 * (c_j_0 - c_i_0, c_j_n - c_i_n,
2702 * -(c_i_x^+ - c_i_x^-), c_j_x^+ - c_j_x^-) if s = 1 and
2703 * (-c_j_0 + c_i_0, -c_j_n + c_i_n,
2704 * c_i_x^+ - c_i_x^-, -(c_j_x^+ - c_j_x^-)) if s = -1.
2705 * In graph->lp, the c_*^- appear before their c_*^+ counterpart.
2706 * Furthermore, the order of these pairs is the opposite of that
2707 * of the corresponding coefficients.
2709 * The caller can further extend the mapping.
2711 static __isl_give isl_dim_map
*inter_dim_map(isl_ctx
*ctx
,
2712 struct isl_sched_graph
*graph
, struct isl_sched_node
*src
,
2713 struct isl_sched_node
*dst
, int offset
, int s
)
2717 isl_dim_map
*dim_map
;
2719 if (!src
|| !dst
|| !graph
->lp
)
2722 total
= isl_basic_set_total_dim(graph
->lp
);
2723 dim_map
= isl_dim_map_alloc(ctx
, total
);
2725 pos
= node_cst_coef_offset(dst
);
2726 isl_dim_map_range(dim_map
, pos
, 0, 0, 0, 1, s
);
2727 pos
= node_par_coef_offset(dst
);
2728 isl_dim_map_range(dim_map
, pos
, 1, 1, 1, dst
->nparam
, s
);
2729 pos
= node_var_coef_pos(dst
, 0);
2730 isl_dim_map_range(dim_map
, pos
, -2, offset
+ src
->nvar
, 1,
2732 isl_dim_map_range(dim_map
, pos
+ 1, -2, offset
+ src
->nvar
, 1,
2735 pos
= node_cst_coef_offset(src
);
2736 isl_dim_map_range(dim_map
, pos
, 0, 0, 0, 1, -s
);
2737 pos
= node_par_coef_offset(src
);
2738 isl_dim_map_range(dim_map
, pos
, 1, 1, 1, src
->nparam
, -s
);
2739 pos
= node_var_coef_pos(src
, 0);
2740 isl_dim_map_range(dim_map
, pos
, -2, offset
, 1, src
->nvar
, s
);
2741 isl_dim_map_range(dim_map
, pos
+ 1, -2, offset
, 1, src
->nvar
, -s
);
2746 /* Add the constraints from "src" to "dst" using "dim_map",
2747 * after making sure there is enough room in "dst" for the extra constraints.
2749 static __isl_give isl_basic_set
*add_constraints_dim_map(
2750 __isl_take isl_basic_set
*dst
, __isl_take isl_basic_set
*src
,
2751 __isl_take isl_dim_map
*dim_map
)
2755 n_eq
= isl_basic_set_n_equality(src
);
2756 n_ineq
= isl_basic_set_n_inequality(src
);
2757 dst
= isl_basic_set_extend_constraints(dst
, n_eq
, n_ineq
);
2758 dst
= isl_basic_set_add_constraints_dim_map(dst
, src
, dim_map
);
2762 /* Add constraints to graph->lp that force validity for the given
2763 * dependence from a node i to itself.
2764 * That is, add constraints that enforce
2766 * (c_i_0 + c_i_n n + c_i_x y) - (c_i_0 + c_i_n n + c_i_x x)
2767 * = c_i_x (y - x) >= 0
2769 * for each (x,y) in R.
2770 * We obtain general constraints on coefficients (c_0, c_x)
2771 * of valid constraints for (y - x) and then plug in (0, c_i_x^+ - c_i_x^-),
2772 * where c_i_x = c_i_x^+ - c_i_x^-, with c_i_x^+ and c_i_x^- non-negative.
2773 * In graph->lp, the c_i_x^- appear before their c_i_x^+ counterpart.
2774 * Note that the result of intra_coefficients may also contain
2775 * parameter coefficients c_n, in which case 0 is plugged in for them as well.
2777 static isl_stat
add_intra_validity_constraints(struct isl_sched_graph
*graph
,
2778 struct isl_sched_edge
*edge
)
2781 isl_map
*map
= isl_map_copy(edge
->map
);
2782 isl_ctx
*ctx
= isl_map_get_ctx(map
);
2783 isl_dim_map
*dim_map
;
2784 isl_basic_set
*coef
;
2785 struct isl_sched_node
*node
= edge
->src
;
2787 coef
= intra_coefficients(graph
, node
, map
, 0);
2789 offset
= coef_var_offset(coef
);
2792 return isl_stat_error
;
2794 dim_map
= intra_dim_map(ctx
, graph
, node
, offset
, 1);
2795 graph
->lp
= add_constraints_dim_map(graph
->lp
, coef
, dim_map
);
2800 /* Add constraints to graph->lp that force validity for the given
2801 * dependence from node i to node j.
2802 * That is, add constraints that enforce
2804 * (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) >= 0
2806 * for each (x,y) in R.
2807 * We obtain general constraints on coefficients (c_0, c_n, c_x, c_y)
2808 * of valid constraints for R and then plug in
2809 * (c_j_0 - c_i_0, c_j_n - c_i_n, -(c_i_x^+ - c_i_x^-), c_j_x^+ - c_j_x^-),
2810 * where c_* = c_*^+ - c_*^-, with c_*^+ and c_*^- non-negative.
2811 * In graph->lp, the c_*^- appear before their c_*^+ counterpart.
2813 static isl_stat
add_inter_validity_constraints(struct isl_sched_graph
*graph
,
2814 struct isl_sched_edge
*edge
)
2819 isl_dim_map
*dim_map
;
2820 isl_basic_set
*coef
;
2821 struct isl_sched_node
*src
= edge
->src
;
2822 struct isl_sched_node
*dst
= edge
->dst
;
2825 return isl_stat_error
;
2827 map
= isl_map_copy(edge
->map
);
2828 ctx
= isl_map_get_ctx(map
);
2829 coef
= inter_coefficients(graph
, edge
, map
);
2831 offset
= coef_var_offset(coef
);
2834 return isl_stat_error
;
2836 dim_map
= inter_dim_map(ctx
, graph
, src
, dst
, offset
, 1);
2838 edge
->start
= graph
->lp
->n_ineq
;
2839 graph
->lp
= add_constraints_dim_map(graph
->lp
, coef
, dim_map
);
2841 return isl_stat_error
;
2842 edge
->end
= graph
->lp
->n_ineq
;
2847 /* Add constraints to graph->lp that bound the dependence distance for the given
2848 * dependence from a node i to itself.
2849 * If s = 1, we add the constraint
2851 * c_i_x (y - x) <= m_0 + m_n n
2855 * -c_i_x (y - x) + m_0 + m_n n >= 0
2857 * for each (x,y) in R.
2858 * If s = -1, we add the constraint
2860 * -c_i_x (y - x) <= m_0 + m_n n
2864 * c_i_x (y - x) + m_0 + m_n n >= 0
2866 * for each (x,y) in R.
2867 * We obtain general constraints on coefficients (c_0, c_n, c_x)
2868 * of valid constraints for (y - x) and then plug in (m_0, m_n, -s * c_i_x),
2869 * with each coefficient (except m_0) represented as a pair of non-negative
2873 * If "local" is set, then we add constraints
2875 * c_i_x (y - x) <= 0
2879 * -c_i_x (y - x) <= 0
2881 * instead, forcing the dependence distance to be (less than or) equal to 0.
2882 * That is, we plug in (0, 0, -s * c_i_x),
2883 * intra_coefficients is not required to have c_n in its result when
2884 * "local" is set. If they are missing, then (0, -s * c_i_x) is plugged in.
2885 * Note that dependences marked local are treated as validity constraints
2886 * by add_all_validity_constraints and therefore also have
2887 * their distances bounded by 0 from below.
2889 static isl_stat
add_intra_proximity_constraints(struct isl_sched_graph
*graph
,
2890 struct isl_sched_edge
*edge
, int s
, int local
)
2894 isl_map
*map
= isl_map_copy(edge
->map
);
2895 isl_ctx
*ctx
= isl_map_get_ctx(map
);
2896 isl_dim_map
*dim_map
;
2897 isl_basic_set
*coef
;
2898 struct isl_sched_node
*node
= edge
->src
;
2900 coef
= intra_coefficients(graph
, node
, map
, !local
);
2902 offset
= coef_var_offset(coef
);
2905 return isl_stat_error
;
2907 nparam
= isl_space_dim(node
->space
, isl_dim_param
);
2908 dim_map
= intra_dim_map(ctx
, graph
, node
, offset
, -s
);
2911 isl_dim_map_range(dim_map
, 1, 0, 0, 0, 1, 1);
2912 isl_dim_map_range(dim_map
, 4, 2, 1, 1, nparam
, -1);
2913 isl_dim_map_range(dim_map
, 5, 2, 1, 1, nparam
, 1);
2915 graph
->lp
= add_constraints_dim_map(graph
->lp
, coef
, dim_map
);
2920 /* Add constraints to graph->lp that bound the dependence distance for the given
2921 * dependence from node i to node j.
2922 * If s = 1, we add the constraint
2924 * (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x)
2929 * -(c_j_0 + c_j_n n + c_j_x y) + (c_i_0 + c_i_n n + c_i_x x) +
2932 * for each (x,y) in R.
2933 * If s = -1, we add the constraint
2935 * -((c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x))
2940 * (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) +
2943 * for each (x,y) in R.
2944 * We obtain general constraints on coefficients (c_0, c_n, c_x, c_y)
2945 * of valid constraints for R and then plug in
2946 * (m_0 - s*c_j_0 + s*c_i_0, m_n - s*c_j_n + s*c_i_n,
2947 * s*c_i_x, -s*c_j_x)
2948 * with each coefficient (except m_0, c_*_0 and c_*_n)
2949 * represented as a pair of non-negative coefficients.
2952 * If "local" is set (and s = 1), then we add constraints
2954 * (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) <= 0
2958 * -((c_j_0 + c_j_n n + c_j_x y) + (c_i_0 + c_i_n n + c_i_x x)) >= 0
2960 * instead, forcing the dependence distance to be (less than or) equal to 0.
2961 * That is, we plug in
2962 * (-s*c_j_0 + s*c_i_0, -s*c_j_n + s*c_i_n, s*c_i_x, -s*c_j_x).
2963 * Note that dependences marked local are treated as validity constraints
2964 * by add_all_validity_constraints and therefore also have
2965 * their distances bounded by 0 from below.
2967 static isl_stat
add_inter_proximity_constraints(struct isl_sched_graph
*graph
,
2968 struct isl_sched_edge
*edge
, int s
, int local
)
2972 isl_map
*map
= isl_map_copy(edge
->map
);
2973 isl_ctx
*ctx
= isl_map_get_ctx(map
);
2974 isl_dim_map
*dim_map
;
2975 isl_basic_set
*coef
;
2976 struct isl_sched_node
*src
= edge
->src
;
2977 struct isl_sched_node
*dst
= edge
->dst
;
2979 coef
= inter_coefficients(graph
, edge
, map
);
2981 offset
= coef_var_offset(coef
);
2984 return isl_stat_error
;
2986 nparam
= isl_space_dim(src
->space
, isl_dim_param
);
2987 dim_map
= inter_dim_map(ctx
, graph
, src
, dst
, offset
, -s
);
2990 isl_dim_map_range(dim_map
, 1, 0, 0, 0, 1, 1);
2991 isl_dim_map_range(dim_map
, 4, 2, 1, 1, nparam
, -1);
2992 isl_dim_map_range(dim_map
, 5, 2, 1, 1, nparam
, 1);
2995 graph
->lp
= add_constraints_dim_map(graph
->lp
, coef
, dim_map
);
3000 /* Should the distance over "edge" be forced to zero?
3001 * That is, is it marked as a local edge?
3002 * If "use_coincidence" is set, then coincidence edges are treated
3005 static int force_zero(struct isl_sched_edge
*edge
, int use_coincidence
)
3007 return is_local(edge
) || (use_coincidence
&& is_coincidence(edge
));
3010 /* Add all validity constraints to graph->lp.
3012 * An edge that is forced to be local needs to have its dependence
3013 * distances equal to zero. We take care of bounding them by 0 from below
3014 * here. add_all_proximity_constraints takes care of bounding them by 0
3017 * If "use_coincidence" is set, then we treat coincidence edges as local edges.
3018 * Otherwise, we ignore them.
3020 static int add_all_validity_constraints(struct isl_sched_graph
*graph
,
3021 int use_coincidence
)
3025 for (i
= 0; i
< graph
->n_edge
; ++i
) {
3026 struct isl_sched_edge
*edge
= &graph
->edge
[i
];
3029 zero
= force_zero(edge
, use_coincidence
);
3030 if (!is_validity(edge
) && !zero
)
3032 if (edge
->src
!= edge
->dst
)
3034 if (add_intra_validity_constraints(graph
, edge
) < 0)
3038 for (i
= 0; i
< graph
->n_edge
; ++i
) {
3039 struct isl_sched_edge
*edge
= &graph
->edge
[i
];
3042 zero
= force_zero(edge
, use_coincidence
);
3043 if (!is_validity(edge
) && !zero
)
3045 if (edge
->src
== edge
->dst
)
3047 if (add_inter_validity_constraints(graph
, edge
) < 0)
3054 /* Add constraints to graph->lp that bound the dependence distance
3055 * for all dependence relations.
3056 * If a given proximity dependence is identical to a validity
3057 * dependence, then the dependence distance is already bounded
3058 * from below (by zero), so we only need to bound the distance
3059 * from above. (This includes the case of "local" dependences
3060 * which are treated as validity dependence by add_all_validity_constraints.)
3061 * Otherwise, we need to bound the distance both from above and from below.
3063 * If "use_coincidence" is set, then we treat coincidence edges as local edges.
3064 * Otherwise, we ignore them.
3066 static int add_all_proximity_constraints(struct isl_sched_graph
*graph
,
3067 int use_coincidence
)
3071 for (i
= 0; i
< graph
->n_edge
; ++i
) {
3072 struct isl_sched_edge
*edge
= &graph
->edge
[i
];
3075 zero
= force_zero(edge
, use_coincidence
);
3076 if (!is_proximity(edge
) && !zero
)
3078 if (edge
->src
== edge
->dst
&&
3079 add_intra_proximity_constraints(graph
, edge
, 1, zero
) < 0)
3081 if (edge
->src
!= edge
->dst
&&
3082 add_inter_proximity_constraints(graph
, edge
, 1, zero
) < 0)
3084 if (is_validity(edge
) || zero
)
3086 if (edge
->src
== edge
->dst
&&
3087 add_intra_proximity_constraints(graph
, edge
, -1, 0) < 0)
3089 if (edge
->src
!= edge
->dst
&&
3090 add_inter_proximity_constraints(graph
, edge
, -1, 0) < 0)
3097 /* Normalize the rows of "indep" such that all rows are lexicographically
3098 * positive and such that each row contains as many final zeros as possible,
3099 * given the choice for the previous rows.
3100 * Do this by performing elementary row operations.
3102 static __isl_give isl_mat
*normalize_independent(__isl_take isl_mat
*indep
)
3104 indep
= isl_mat_reverse_gauss(indep
);
3105 indep
= isl_mat_lexnonneg_rows(indep
);
3109 /* Extract the linear part of the current schedule for node "node".
3111 static __isl_give isl_mat
*extract_linear_schedule(struct isl_sched_node
*node
)
3113 int n_row
= isl_mat_rows(node
->sched
);
3115 return isl_mat_sub_alloc(node
->sched
, 0, n_row
,
3116 1 + node
->nparam
, node
->nvar
);
3119 /* Compute a basis for the rows in the linear part of the schedule
3120 * and extend this basis to a full basis. The remaining rows
3121 * can then be used to force linear independence from the rows
3124 * In particular, given the schedule rows S, we compute
3129 * with H the Hermite normal form of S. That is, all but the
3130 * first rank columns of H are zero and so each row in S is
3131 * a linear combination of the first rank rows of Q.
3132 * The matrix Q can be used as a variable transformation
3133 * that isolates the directions of S in the first rank rows.
3134 * Transposing S U = H yields
3138 * with all but the first rank rows of H^T zero.
3139 * The last rows of U^T are therefore linear combinations
3140 * of schedule coefficients that are all zero on schedule
3141 * coefficients that are linearly dependent on the rows of S.
3142 * At least one of these combinations is non-zero on
3143 * linearly independent schedule coefficients.
3144 * The rows are normalized to involve as few of the last
3145 * coefficients as possible and to have a positive initial value.
3147 static int node_update_vmap(struct isl_sched_node
*node
)
3151 H
= extract_linear_schedule(node
);
3153 H
= isl_mat_left_hermite(H
, 0, &U
, &Q
);
3154 isl_mat_free(node
->indep
);
3155 isl_mat_free(node
->vmap
);
3157 node
->indep
= isl_mat_transpose(U
);
3158 node
->rank
= isl_mat_initial_non_zero_cols(H
);
3159 node
->indep
= isl_mat_drop_rows(node
->indep
, 0, node
->rank
);
3160 node
->indep
= normalize_independent(node
->indep
);
3163 if (!node
->indep
|| !node
->vmap
|| node
->rank
< 0)
3168 /* Is "edge" marked as a validity or a conditional validity edge?
3170 static int is_any_validity(struct isl_sched_edge
*edge
)
3172 return is_validity(edge
) || is_conditional_validity(edge
);
3175 /* How many times should we count the constraints in "edge"?
3177 * We count as follows
3178 * validity -> 1 (>= 0)
3179 * validity+proximity -> 2 (>= 0 and upper bound)
3180 * proximity -> 2 (lower and upper bound)
3181 * local(+any) -> 2 (>= 0 and <= 0)
3183 * If an edge is only marked conditional_validity then it counts
3184 * as zero since it is only checked afterwards.
3186 * If "use_coincidence" is set, then we treat coincidence edges as local edges.
3187 * Otherwise, we ignore them.
3189 static int edge_multiplicity(struct isl_sched_edge
*edge
, int use_coincidence
)
3191 if (is_proximity(edge
) || force_zero(edge
, use_coincidence
))
3193 if (is_validity(edge
))
3198 /* How many times should the constraints in "edge" be counted
3199 * as a parametric intra-node constraint?
3201 * Only proximity edges that are not forced zero need
3202 * coefficient constraints that include coefficients for parameters.
3203 * If the edge is also a validity edge, then only
3204 * an upper bound is introduced. Otherwise, both lower and upper bounds
3207 static int parametric_intra_edge_multiplicity(struct isl_sched_edge
*edge
,
3208 int use_coincidence
)
3210 if (edge
->src
!= edge
->dst
)
3212 if (!is_proximity(edge
))
3214 if (force_zero(edge
, use_coincidence
))
3216 if (is_validity(edge
))
3222 /* Add "f" times the number of equality and inequality constraints of "bset"
3223 * to "n_eq" and "n_ineq" and free "bset".
3225 static isl_stat
update_count(__isl_take isl_basic_set
*bset
,
3226 int f
, int *n_eq
, int *n_ineq
)
3229 return isl_stat_error
;
3231 *n_eq
+= isl_basic_set_n_equality(bset
);
3232 *n_ineq
+= isl_basic_set_n_inequality(bset
);
3233 isl_basic_set_free(bset
);
3238 /* Count the number of equality and inequality constraints
3239 * that will be added for the given map.
3241 * The edges that require parameter coefficients are counted separately.
3243 * "use_coincidence" is set if we should take into account coincidence edges.
3245 static isl_stat
count_map_constraints(struct isl_sched_graph
*graph
,
3246 struct isl_sched_edge
*edge
, __isl_take isl_map
*map
,
3247 int *n_eq
, int *n_ineq
, int use_coincidence
)
3250 isl_basic_set
*coef
;
3251 int f
= edge_multiplicity(edge
, use_coincidence
);
3252 int fp
= parametric_intra_edge_multiplicity(edge
, use_coincidence
);
3259 if (edge
->src
!= edge
->dst
) {
3260 coef
= inter_coefficients(graph
, edge
, map
);
3261 return update_count(coef
, f
, n_eq
, n_ineq
);
3265 copy
= isl_map_copy(map
);
3266 coef
= intra_coefficients(graph
, edge
->src
, copy
, 1);
3267 if (update_count(coef
, fp
, n_eq
, n_ineq
) < 0)
3272 copy
= isl_map_copy(map
);
3273 coef
= intra_coefficients(graph
, edge
->src
, copy
, 0);
3274 if (update_count(coef
, f
- fp
, n_eq
, n_ineq
) < 0)
3282 return isl_stat_error
;
3285 /* Count the number of equality and inequality constraints
3286 * that will be added to the main lp problem.
3287 * We count as follows
3288 * validity -> 1 (>= 0)
3289 * validity+proximity -> 2 (>= 0 and upper bound)
3290 * proximity -> 2 (lower and upper bound)
3291 * local(+any) -> 2 (>= 0 and <= 0)
3293 * If "use_coincidence" is set, then we treat coincidence edges as local edges.
3294 * Otherwise, we ignore them.
3296 static int count_constraints(struct isl_sched_graph
*graph
,
3297 int *n_eq
, int *n_ineq
, int use_coincidence
)
3301 *n_eq
= *n_ineq
= 0;
3302 for (i
= 0; i
< graph
->n_edge
; ++i
) {
3303 struct isl_sched_edge
*edge
= &graph
->edge
[i
];
3304 isl_map
*map
= isl_map_copy(edge
->map
);
3306 if (count_map_constraints(graph
, edge
, map
, n_eq
, n_ineq
,
3307 use_coincidence
) < 0)
3314 /* Count the number of constraints that will be added by
3315 * add_bound_constant_constraints to bound the values of the constant terms
3316 * and increment *n_eq and *n_ineq accordingly.
3318 * In practice, add_bound_constant_constraints only adds inequalities.
3320 static isl_stat
count_bound_constant_constraints(isl_ctx
*ctx
,
3321 struct isl_sched_graph
*graph
, int *n_eq
, int *n_ineq
)
3323 if (isl_options_get_schedule_max_constant_term(ctx
) == -1)
3326 *n_ineq
+= graph
->n
;
3331 /* Add constraints to bound the values of the constant terms in the schedule,
3332 * if requested by the user.
3334 * The maximal value of the constant terms is defined by the option
3335 * "schedule_max_constant_term".
3337 static isl_stat
add_bound_constant_constraints(isl_ctx
*ctx
,
3338 struct isl_sched_graph
*graph
)
3344 max
= isl_options_get_schedule_max_constant_term(ctx
);
3348 total
= isl_basic_set_dim(graph
->lp
, isl_dim_set
);
3350 for (i
= 0; i
< graph
->n
; ++i
) {
3351 struct isl_sched_node
*node
= &graph
->node
[i
];
3354 k
= isl_basic_set_alloc_inequality(graph
->lp
);
3356 return isl_stat_error
;
3357 isl_seq_clr(graph
->lp
->ineq
[k
], 1 + total
);
3358 pos
= node_cst_coef_offset(node
);
3359 isl_int_set_si(graph
->lp
->ineq
[k
][1 + pos
], -1);
3360 isl_int_set_si(graph
->lp
->ineq
[k
][0], max
);
3366 /* Count the number of constraints that will be added by
3367 * add_bound_coefficient_constraints and increment *n_eq and *n_ineq
3370 * In practice, add_bound_coefficient_constraints only adds inequalities.
3372 static int count_bound_coefficient_constraints(isl_ctx
*ctx
,
3373 struct isl_sched_graph
*graph
, int *n_eq
, int *n_ineq
)
3377 if (isl_options_get_schedule_max_coefficient(ctx
) == -1 &&
3378 !isl_options_get_schedule_treat_coalescing(ctx
))
3381 for (i
= 0; i
< graph
->n
; ++i
)
3382 *n_ineq
+= graph
->node
[i
].nparam
+ 2 * graph
->node
[i
].nvar
;
3387 /* Add constraints to graph->lp that bound the values of
3388 * the parameter schedule coefficients of "node" to "max" and
3389 * the variable schedule coefficients to the corresponding entry
3391 * In either case, a negative value means that no bound needs to be imposed.
3393 * For parameter coefficients, this amounts to adding a constraint
3401 * The variables coefficients are, however, not represented directly.
3402 * Instead, the variable coefficients c_x are written as differences
3403 * c_x = c_x^+ - c_x^-.
3406 * -max_i <= c_x_i <= max_i
3410 * -max_i <= c_x_i^+ - c_x_i^- <= max_i
3414 * -(c_x_i^+ - c_x_i^-) + max_i >= 0
3415 * c_x_i^+ - c_x_i^- + max_i >= 0
3417 static isl_stat
node_add_coefficient_constraints(isl_ctx
*ctx
,
3418 struct isl_sched_graph
*graph
, struct isl_sched_node
*node
, int max
)
3424 total
= isl_basic_set_dim(graph
->lp
, isl_dim_set
);
3426 for (j
= 0; j
< node
->nparam
; ++j
) {
3432 k
= isl_basic_set_alloc_inequality(graph
->lp
);
3434 return isl_stat_error
;
3435 dim
= 1 + node_par_coef_offset(node
) + j
;
3436 isl_seq_clr(graph
->lp
->ineq
[k
], 1 + total
);
3437 isl_int_set_si(graph
->lp
->ineq
[k
][dim
], -1);
3438 isl_int_set_si(graph
->lp
->ineq
[k
][0], max
);
3441 ineq
= isl_vec_alloc(ctx
, 1 + total
);
3442 ineq
= isl_vec_clr(ineq
);
3444 return isl_stat_error
;
3445 for (i
= 0; i
< node
->nvar
; ++i
) {
3446 int pos
= 1 + node_var_coef_pos(node
, i
);
3448 if (isl_int_is_neg(node
->max
->el
[i
]))
3451 isl_int_set_si(ineq
->el
[pos
], 1);
3452 isl_int_set_si(ineq
->el
[pos
+ 1], -1);
3453 isl_int_set(ineq
->el
[0], node
->max
->el
[i
]);
3455 k
= isl_basic_set_alloc_inequality(graph
->lp
);
3458 isl_seq_cpy(graph
->lp
->ineq
[k
], ineq
->el
, 1 + total
);
3460 isl_seq_neg(ineq
->el
+ pos
, ineq
->el
+ pos
, 2);
3461 k
= isl_basic_set_alloc_inequality(graph
->lp
);
3464 isl_seq_cpy(graph
->lp
->ineq
[k
], ineq
->el
, 1 + total
);
3466 isl_seq_clr(ineq
->el
+ pos
, 2);
3473 return isl_stat_error
;
3476 /* Add constraints that bound the values of the variable and parameter
3477 * coefficients of the schedule.
3479 * The maximal value of the coefficients is defined by the option
3480 * 'schedule_max_coefficient' and the entries in node->max.
3481 * These latter entries are only set if either the schedule_max_coefficient
3482 * option or the schedule_treat_coalescing option is set.
3484 static isl_stat
add_bound_coefficient_constraints(isl_ctx
*ctx
,
3485 struct isl_sched_graph
*graph
)
3490 max
= isl_options_get_schedule_max_coefficient(ctx
);
3492 if (max
== -1 && !isl_options_get_schedule_treat_coalescing(ctx
))
3495 for (i
= 0; i
< graph
->n
; ++i
) {
3496 struct isl_sched_node
*node
= &graph
->node
[i
];
3498 if (node_add_coefficient_constraints(ctx
, graph
, node
, max
) < 0)
3499 return isl_stat_error
;
3505 /* Add a constraint to graph->lp that equates the value at position
3506 * "sum_pos" to the sum of the "n" values starting at "first".
3508 static isl_stat
add_sum_constraint(struct isl_sched_graph
*graph
,
3509 int sum_pos
, int first
, int n
)
3514 total
= isl_basic_set_dim(graph
->lp
, isl_dim_set
);
3516 k
= isl_basic_set_alloc_equality(graph
->lp
);
3518 return isl_stat_error
;
3519 isl_seq_clr(graph
->lp
->eq
[k
], 1 + total
);
3520 isl_int_set_si(graph
->lp
->eq
[k
][1 + sum_pos
], -1);
3521 for (i
= 0; i
< n
; ++i
)
3522 isl_int_set_si(graph
->lp
->eq
[k
][1 + first
+ i
], 1);
3527 /* Add a constraint to graph->lp that equates the value at position
3528 * "sum_pos" to the sum of the parameter coefficients of all nodes.
3530 static isl_stat
add_param_sum_constraint(struct isl_sched_graph
*graph
,
3536 total
= isl_basic_set_dim(graph
->lp
, isl_dim_set
);
3538 k
= isl_basic_set_alloc_equality(graph
->lp
);
3540 return isl_stat_error
;
3541 isl_seq_clr(graph
->lp
->eq
[k
], 1 + total
);
3542 isl_int_set_si(graph
->lp
->eq
[k
][1 + sum_pos
], -1);
3543 for (i
= 0; i
< graph
->n
; ++i
) {
3544 int pos
= 1 + node_par_coef_offset(&graph
->node
[i
]);
3546 for (j
= 0; j
< graph
->node
[i
].nparam
; ++j
)
3547 isl_int_set_si(graph
->lp
->eq
[k
][pos
+ j
], 1);
3553 /* Add a constraint to graph->lp that equates the value at position
3554 * "sum_pos" to the sum of the variable coefficients of all nodes.
3556 static isl_stat
add_var_sum_constraint(struct isl_sched_graph
*graph
,
3562 total
= isl_basic_set_dim(graph
->lp
, isl_dim_set
);
3564 k
= isl_basic_set_alloc_equality(graph
->lp
);
3566 return isl_stat_error
;
3567 isl_seq_clr(graph
->lp
->eq
[k
], 1 + total
);
3568 isl_int_set_si(graph
->lp
->eq
[k
][1 + sum_pos
], -1);
3569 for (i
= 0; i
< graph
->n
; ++i
) {
3570 struct isl_sched_node
*node
= &graph
->node
[i
];
3571 int pos
= 1 + node_var_coef_offset(node
);
3573 for (j
= 0; j
< 2 * node
->nvar
; ++j
)
3574 isl_int_set_si(graph
->lp
->eq
[k
][pos
+ j
], 1);
3580 /* Construct an ILP problem for finding schedule coefficients
3581 * that result in non-negative, but small dependence distances
3582 * over all dependences.
3583 * In particular, the dependence distances over proximity edges
3584 * are bounded by m_0 + m_n n and we compute schedule coefficients
3585 * with small values (preferably zero) of m_n and m_0.
3587 * All variables of the ILP are non-negative. The actual coefficients
3588 * may be negative, so each coefficient is represented as the difference
3589 * of two non-negative variables. The negative part always appears
3590 * immediately before the positive part.
3591 * Other than that, the variables have the following order
3593 * - sum of positive and negative parts of m_n coefficients
3595 * - sum of all c_n coefficients
3596 * (unconstrained when computing non-parametric schedules)
3597 * - sum of positive and negative parts of all c_x coefficients
3598 * - positive and negative parts of m_n coefficients
3600 * - positive and negative parts of c_i_x, in opposite order
3601 * - c_i_n (if parametric)
3604 * The constraints are those from the edges plus two or three equalities
3605 * to express the sums.
3607 * If "use_coincidence" is set, then we treat coincidence edges as local edges.
3608 * Otherwise, we ignore them.
3610 static isl_stat
setup_lp(isl_ctx
*ctx
, struct isl_sched_graph
*graph
,
3611 int use_coincidence
)
3621 parametric
= ctx
->opt
->schedule_parametric
;
3622 nparam
= isl_space_dim(graph
->node
[0].space
, isl_dim_param
);
3624 total
= param_pos
+ 2 * nparam
;
3625 for (i
= 0; i
< graph
->n
; ++i
) {
3626 struct isl_sched_node
*node
= &graph
->node
[graph
->sorted
[i
]];
3627 if (node_update_vmap(node
) < 0)
3628 return isl_stat_error
;
3629 node
->start
= total
;
3630 total
+= 1 + node
->nparam
+ 2 * node
->nvar
;
3633 if (count_constraints(graph
, &n_eq
, &n_ineq
, use_coincidence
) < 0)
3634 return isl_stat_error
;
3635 if (count_bound_constant_constraints(ctx
, graph
, &n_eq
, &n_ineq
) < 0)
3636 return isl_stat_error
;
3637 if (count_bound_coefficient_constraints(ctx
, graph
, &n_eq
, &n_ineq
) < 0)
3638 return isl_stat_error
;
3640 space
= isl_space_set_alloc(ctx
, 0, total
);
3641 isl_basic_set_free(graph
->lp
);
3642 n_eq
+= 2 + parametric
;
3644 graph
->lp
= isl_basic_set_alloc_space(space
, 0, n_eq
, n_ineq
);
3646 if (add_sum_constraint(graph
, 0, param_pos
, 2 * nparam
) < 0)
3647 return isl_stat_error
;
3648 if (parametric
&& add_param_sum_constraint(graph
, 2) < 0)
3649 return isl_stat_error
;
3650 if (add_var_sum_constraint(graph
, 3) < 0)
3651 return isl_stat_error
;
3652 if (add_bound_constant_constraints(ctx
, graph
) < 0)
3653 return isl_stat_error
;
3654 if (add_bound_coefficient_constraints(ctx
, graph
) < 0)
3655 return isl_stat_error
;
3656 if (add_all_validity_constraints(graph
, use_coincidence
) < 0)
3657 return isl_stat_error
;
3658 if (add_all_proximity_constraints(graph
, use_coincidence
) < 0)
3659 return isl_stat_error
;
3664 /* Analyze the conflicting constraint found by
3665 * isl_tab_basic_set_constrained_lexmin. If it corresponds to the validity
3666 * constraint of one of the edges between distinct nodes, living, moreover
3667 * in distinct SCCs, then record the source and sink SCC as this may
3668 * be a good place to cut between SCCs.
3670 static int check_conflict(int con
, void *user
)
3673 struct isl_sched_graph
*graph
= user
;
3675 if (graph
->src_scc
>= 0)
3678 con
-= graph
->lp
->n_eq
;
3680 if (con
>= graph
->lp
->n_ineq
)
3683 for (i
= 0; i
< graph
->n_edge
; ++i
) {
3684 if (!is_validity(&graph
->edge
[i
]))
3686 if (graph
->edge
[i
].src
== graph
->edge
[i
].dst
)
3688 if (graph
->edge
[i
].src
->scc
== graph
->edge
[i
].dst
->scc
)
3690 if (graph
->edge
[i
].start
> con
)
3692 if (graph
->edge
[i
].end
<= con
)
3694 graph
->src_scc
= graph
->edge
[i
].src
->scc
;
3695 graph
->dst_scc
= graph
->edge
[i
].dst
->scc
;
3701 /* Check whether the next schedule row of the given node needs to be
3702 * non-trivial. Lower-dimensional domains may have some trivial rows,
3703 * but as soon as the number of remaining required non-trivial rows
3704 * is as large as the number or remaining rows to be computed,
3705 * all remaining rows need to be non-trivial.
3707 static int needs_row(struct isl_sched_graph
*graph
, struct isl_sched_node
*node
)
3709 return node
->nvar
- node
->rank
>= graph
->maxvar
- graph
->n_row
;
3712 /* Take a linear combination "lin" in terms of the schedule coefficients c_i
3713 * and express it in terms of the variables of the ILP problem
3714 * as constructed by setup_lp.
3715 * In particular, in the ILP, the schedule coefficients are represented by
3716 * pairs of non-negative variables c^+_i - c^-_i, with c^-_i appearing
3717 * before c^+_i. Furthermore,
3718 * the pairs of non-negative variables representing the coefficients
3719 * are stored in the opposite order.
3721 static __isl_give isl_mat
*linear_to_lp(__isl_keep isl_mat
*lin
)
3730 ctx
= isl_mat_get_ctx(lin
);
3731 n
= isl_mat_rows(lin
);
3732 n_var
= isl_mat_cols(lin
);
3733 mat
= isl_mat_alloc(ctx
, n
, 2 * n_var
);
3736 for (i
= 0; i
< n
; ++i
) {
3737 for (j
= 0; j
< n_var
; ++j
) {
3738 int nj
= n_var
- 1 - j
;
3739 isl_int_neg(mat
->row
[i
][2 * nj
], lin
->row
[i
][j
]);
3740 isl_int_set(mat
->row
[i
][2 * nj
+ 1], lin
->row
[i
][j
]);
3747 /* Clear all memory associated to "region" and reset the fields
3748 * to their default values.
3750 static void clear_region(struct isl_ilp_region
*region
)
3752 region
->has_non_zero
= 0;
3753 region
->has_fixed
= 0;
3754 region
->optional
= 0;
3756 region
->disjunctive
= 0;
3757 region
->conditional
= 0;
3758 region
->non_zero
= isl_mat_free(region
->non_zero
);
3759 region
->fixed
= isl_mat_free(region
->fixed
);
3760 region
->fixed_val
= isl_vec_free(region
->fixed_val
);
3761 region
->user
= NULL
;
3764 /* Clear all "n" regions of "graph" and return -1.
3766 static int clear_regions(struct isl_sched_graph
*graph
, int n
)
3770 for (i
= 0; i
< n
; ++i
)
3771 clear_region(&graph
->region
[i
]);
3775 /* Set the fixed-value constraint of "region" to force
3776 * the linear combinations "zero" to be zero on the schedule coefficients.
3777 * "zero" is expressed in terms of the schedule coefficients and
3778 * needs to be expanded to the ILP variables first.
3780 static void set_zero(struct isl_ilp_region
*region
, __isl_keep isl_mat
*zero
)
3785 ctx
= isl_mat_get_ctx(zero
);
3786 len
= isl_mat_rows(zero
);
3787 region
->has_fixed
= 1;
3788 region
->fixed
= linear_to_lp(zero
);
3789 region
->fixed_val
= isl_vec_zero(ctx
, len
);
3792 /* Set the fixed value constraint of "region" to
3793 * force the next schedule row to be equal to row "pos" of the inner part
3794 * of intra-statement consecutivity constraint "intra" of node "node"
3795 * plus some linear combination of the schedule rows
3796 * prior to the one that corresponds to the first inner row and/or
3797 * rows of the inner part prior to "pos".
3798 * What is not allowed in the linear combination are any linearly
3799 * independent schedule rows that appear between rows that
3800 * correspond to rows of the inner part.
3801 * "outer_complement" is the orthogonal complement of [T_0; G], with
3802 * T_0 the schedule computed so far and G the outer part of "intra".
3803 * Since the outer part has been covered by T_0 at this stage,
3804 * "outer_complement" is effectively the orthogonal complement of T_0.
3806 * Let T_1 be the part of the schedule computed so far (T_0) that
3807 * does not include any rows corresponding to rows of the inner part.
3808 * Let H_< be the inner part before row "pos" and H_= the row
3809 * corresponding to "pos".
3810 * The next schedule row c should be equal to H_= plus a linear combination
3812 * Let U be the orthogonal complement of [T_1; H_<; H_=] and
3813 * let U' be the orthogonal complement of [T_1; H_<].
3814 * Then c U should be zero, while c U' should be equal to H_= U'.
3815 * The latter condition can be refined to c U'' = H_= U'' with
3816 * U'' a basis extension of U to cover U'.
3817 * This means that the remaining row U'' is such that H_= U''
3818 * is not zero. U'' contains exactly one row because the rank of U'
3819 * is one greater than that of U. This assumes that T_1 is
3820 * linearly independent of H, but if it is not then consecutivity
3821 * cannot be achieved anyway.
3823 * If no inner rows have been covered so far, then T_0 = T_1 and
3824 * U' can be obtained as "outer_complement".
3825 * Otherwise, it is computed from T_1 and H_<.
3827 * The fixed field of "region" is set to [U''; U], while
3828 * the fixed_val field is set to H_= U'' followed by zeros.
3830 static void set_equal(struct isl_ilp_region
*region
,
3831 struct isl_sched_node
*node
, struct isl_sched_intra
*intra
, int pos
,
3832 __isl_take isl_mat
*complement
)
3837 isl_mat
*inner
, *mat
, *ext_complement
;
3839 mat
= extract_linear_schedule(node
);
3840 if (intra
->n_fixed
> 0) {
3841 n
= isl_mat_rows(mat
);
3842 mat
= isl_mat_drop_rows(mat
,
3843 intra
->first_fixed
, n
- intra
->first_fixed
);
3844 inner
= isl_mat_copy(intra
->inner
);
3845 inner
= isl_mat_drop_rows(inner
, intra
->n_fixed
,
3846 intra
->n_inner
- intra
->n_fixed
);
3847 mat
= isl_mat_concat(mat
, inner
);
3848 isl_mat_free(complement
);
3849 complement
= isl_mat_row_complement(isl_mat_copy(mat
));
3851 row
= isl_mat_get_row(intra
->inner
, pos
);
3852 mat
= isl_mat_vec_concat(mat
, isl_vec_copy(row
));
3853 ext_complement
= isl_mat_row_complement(mat
);
3854 complement
= isl_mat_row_basis_extension(isl_mat_copy(ext_complement
),
3857 val
= isl_mat_vec_product(isl_mat_copy(complement
), row
);
3858 mat
= isl_mat_concat(complement
, ext_complement
);
3859 val
= isl_vec_zero_extend(val
, isl_mat_rows(mat
));
3860 region
->has_fixed
= 1;
3861 region
->fixed
= linear_to_lp(mat
);
3862 region
->fixed_val
= val
;
3866 /* Given a node and an intra-statement consecutivity constraint
3867 * on that node, construct a matrix that contains
3868 * - the linear part of the current schedule
3869 * - the outer part of the constraint, if "add_outer" is set
3870 * - the inner part of the constraint, if "add_inner" is set
3872 static __isl_give isl_mat
*intra_rows(struct isl_sched_node
*node
,
3873 struct isl_sched_intra
*intra
, int add_outer
, int add_inner
)
3877 mat
= extract_linear_schedule(node
);
3879 mat
= isl_mat_concat(mat
, isl_mat_copy(intra
->outer
));
3881 mat
= isl_mat_concat(mat
, isl_mat_copy(intra
->inner
));
3885 /* Finish the initialization of graph->region[n] as a region
3886 * corresponding to intra-statement consecutivity constraint "intra"
3888 * In particular, set the position of the sequence of variables
3889 * to which the region applies, mark the region as optional and
3890 * add a pointer to "intra" to be able to recover the constraint
3893 static int finish_intra(struct isl_sched_graph
*graph
,
3894 struct isl_sched_node
*node
, struct isl_sched_intra
*intra
, int n
)
3896 graph
->region
[n
].pos
= node_var_coef_offset(node
);
3897 graph
->region
[n
].optional
= 1;
3898 graph
->region
[n
].user
= intra
;
3903 /* Set the non-zero constraint of "region" to "non_zero".
3904 * "non_zero" is expressed in terms of the schedule coefficients.
3905 * Normalize it first and expand it to the ILP variables.
3907 static void set_non_zero(struct isl_ilp_region
*region
,
3908 __isl_take isl_mat
*non_zero
)
3910 non_zero
= normalize_independent(non_zero
);
3911 region
->has_non_zero
= 1;
3912 region
->non_zero
= linear_to_lp(non_zero
);
3913 isl_mat_free(non_zero
);
3916 /* Set the non-zero constraint of "region" to force the schedule row
3917 * to be linearly independent of the combination of the schedule computed
3918 * so far for "node" and the inner rows of "intra".
3919 * Simplify the constraints by exploiting the fact that
3920 * the linear combinations "zero" are all zero on the corresponding
3921 * schedule coefficients.
3923 static void set_independent_to_inner(struct isl_ilp_region
*region
,
3924 struct isl_sched_node
*node
, struct isl_sched_intra
*intra
,
3925 __isl_take isl_mat
*zero
)
3929 mat
= intra_rows(node
, intra
, 0, 1);
3930 mat
= isl_mat_row_complement(mat
);
3931 mat
= isl_mat_row_basis_extension(zero
, mat
);
3932 set_non_zero(region
, mat
);
3935 /* Add an ILP region to "graph" that forces the next schedule row
3936 * for "node" to be a linear combination of the outer rows of
3937 * intra-statement consecutivity constraint "intra" and
3938 * of the schedule rows computed so far.
3939 * "n" is the current number of ILP regions.
3940 * "outer_complement" is the orthogonal complement of [T_0; G], with
3941 * T_0 the schedule computed so far and G the outer part of "intra".
3942 * Return the updated number of ILP regions.
3944 * The next row being a linear combination of T_0 and G means
3945 * that "outer_complement" needs to be zero on the schedule coefficients.
3946 * The schedule row further needs to be linearly independent of
3947 * the inner part of "intra" (in order for later rows to be set
3948 * equal to the inner part) and of the previous schedule rows
3949 * (in order to make progress).
3951 static int add_linear_combination(struct isl_sched_graph
*graph
,
3952 struct isl_sched_node
*node
, struct isl_sched_intra
*intra
, int n
,
3953 __isl_take isl_mat
*outer_complement
)
3955 set_zero(&graph
->region
[n
], outer_complement
);
3956 set_independent_to_inner(&graph
->region
[n
], node
, intra
,
3958 return finish_intra(graph
, node
, intra
, n
);
3961 /* Add an ILP region to "graph" that forces the next schedule row
3962 * for "node" to be equal to the next inner row of "intra"
3963 * (plus a linear combination of the schedule computed so far,
3964 * except for linearly independent rows that appear in the middle
3965 * of rows that correspond to the inner part of "intra").
3966 * "n" is the current number of ILP regions.
3967 * "outer_complement" is the orthogonal complement of [T_0; G], with
3968 * T_0 the schedule computed so far and G the outer part of "intra".
3969 * Return the updated number of ILP regions.
3971 * Note that the schedule computed so far should be linearly
3972 * independent of the next inner row of "intra", meaning that
3973 * the linear combination does not cancel out the contribution
3974 * of the next inner row of "intra".
3975 * The next schedule row also needs to be
3976 * linearly independent of the remaining inner part of "intra"
3977 * (in order for later rows to be set equal to this remaining part) and
3978 * of the previous schedule rows (in order to make progress).
3979 * However, both of these are linearly independent of the next inner
3980 * row of "intra". Since the next schedule row has a non-zero
3981 * contribution of this next inner row, it is also linearly
3982 * independent of those rows.
3984 static int add_next_inner(struct isl_sched_graph
*graph
,
3985 struct isl_sched_node
*node
, struct isl_sched_intra
*intra
, int n
,
3986 __isl_take isl_mat
*outer_complement
)
3988 set_equal(&graph
->region
[n
], node
, intra
, intra
->n_fixed
,
3990 return finish_intra(graph
, node
, intra
, n
);
3993 /* Add an extra ILP region to "graph" that allows the next schedule row
3994 * for "node" to be linearly independent of the combination
3995 * of the schedule computed so far and all rows of "intra".
3996 * "n" is the current number of ILP regions or -1 on error.
3997 * Return the updated number of ILP regions or -1 on error.
3999 * The next schedule row is linearly independent of these rows
4000 * if the orthogonal complement is not zero on the schedule coefficients.
4002 static int allow_linear_independence(struct isl_sched_graph
*graph
,
4003 struct isl_sched_node
*node
, struct isl_sched_intra
*intra
, int n
)
4011 mat
= intra_rows(node
, intra
, 1, 1);
4012 U
= isl_mat_row_complement(mat
);
4013 set_non_zero(&graph
->region
[n
], U
);
4014 return finish_intra(graph
, node
, intra
, n
);
4017 /* Add an extra ILP region to "graph" that allows the next schedule row
4018 * for "node" to be a linear combination
4019 * of the schedule computed so far.
4020 * "n" is the current number of ILP regions or -1 on error.
4021 * Return the updated number of ILP regions or -1 on error.
4023 * The next schedule row is a linear combination of the current schedule
4024 * if its orthogonal complement is zero on the schedule coefficients.
4026 static int allow_linear_combination(struct isl_sched_graph
*graph
,
4027 struct isl_sched_node
*node
, struct isl_sched_intra
*intra
, int n
)
4034 mat
= extract_linear_schedule(node
);
4035 mat
= isl_mat_row_complement(mat
);
4037 set_zero(&graph
->region
[n
], mat
);
4041 return finish_intra(graph
, node
, intra
, n
);
4044 /* Is "region1" equal to "region2"?
4046 * Two regions are considered equal if they refer to the same sequence
4047 * of variables and if their non-zero and fixed-value constraints
4050 static isl_bool
region_is_equal(struct isl_ilp_region
*region1
,
4051 struct isl_ilp_region
*region2
)
4055 if (region1
->pos
!= region2
->pos
)
4056 return isl_bool_false
;
4057 if (region1
->has_non_zero
!= region2
->has_non_zero
)
4058 return isl_bool_false
;
4059 if (region1
->has_fixed
!= region2
->has_fixed
)
4060 return isl_bool_false
;
4062 if (region1
->has_non_zero
) {
4063 equal
= isl_mat_is_equal(region1
->non_zero
, region2
->non_zero
);
4064 if (equal
< 0 || !equal
)
4068 if (region1
->has_fixed
) {
4069 equal
= isl_mat_is_equal(region1
->fixed
, region2
->fixed
);
4070 if (equal
< 0 || !equal
)
4072 equal
= isl_vec_is_equal(region1
->fixed_val
,
4073 region2
->fixed_val
);
4074 if (equal
< 0 || !equal
)
4078 return isl_bool_true
;
4081 /* Is the region at position "pos" a duplicate of any of the regions
4082 * of "graph" starting at "first" and before "pos"?
4084 static isl_bool
is_duplicate(struct isl_sched_graph
*graph
, int first
, int pos
)
4088 for (i
= first
; i
< pos
; ++i
) {
4091 equal
= region_is_equal(&graph
->region
[i
], &graph
->region
[pos
]);
4092 if (equal
< 0 || equal
)
4096 return isl_bool_false
;
4099 /* Check if the most recently added disjunct, the one at position n - 1,
4100 * is a duplicate of any of the regions in "graph" starting at "first".
4101 * If so, drop this disjunct and return the updated "n".
4102 * Return -1 on error.
4104 static int drop_if_duplicate(struct isl_sched_graph
*graph
, int first
, int n
)
4111 duplicate
= is_duplicate(graph
, first
, n
- 1);
4113 return clear_regions(graph
, n
);
4115 clear_region(&graph
->region
[--n
]);
4120 /* Add ILP regions for the intra-statement consecutivity constraint "intra"
4121 * on node "node" in "graph".
4122 * "first" is the position of the first intra-statement consecutivity
4123 * constraint ILP region for "node".
4124 * "n" is the current number of ILP regions.
4125 * Return the updated number of ILP regions or -1 on error.
4127 * The type of constraint that needs to be imposed is prescribed by
4129 * "outer_complement" is the orthogonal complement of [T_0; G].
4130 * "allow_independent" is set if the next schedule row should
4131 * be allowed to be linearly independent of [T_0; G; H].
4133 * The regions introduced for "intra" form a disjunction of at most
4135 * The first option is to make progress on the consecutivity,
4136 * meaning that the next schedule row is either a linear combination
4137 * of the outer rows of the consecutivity constraint or
4138 * equal to the next inner row.
4139 * The second option is for the next schedule row to be linearly
4140 * independent of both the current schedule rows and all
4141 * rows from the consecutivity constraint.
4142 * The caller has already checked whether this is possible.
4143 * The third option is for the next schedule row to be a linear
4144 * combination of the outer schedule rows.
4146 * First check if the next schedule row should be allowed to
4147 * be a linear combination of outer schedule rows.
4148 * This is only allowed if no linear independence constraint
4149 * will be added for this node.
4150 * Furthermore, since the corresponding region is independent
4151 * of the intra-statement consecutivity constraint, it should
4152 * only be considered if this is the first intra-statement consecutivity
4153 * constraint for the node. Otherwise, the same region will already
4154 * have been added as part of the encoding of the first constraint and
4155 * the region would only be reached in cases where it is known that
4156 * it cannot be satisfied.
4158 * If this is not the first intra-statement consecutivity constraint,
4159 * then the other two disjuncts are also checked for being duplicates
4160 * of disjuncts from earlier intra-statement consecutivity constraints
4161 * for the same node. If so, then the duplicates are removed.
4162 * The first disjunct for this constraint (if any is left) is also
4163 * made conditional on the previous disjunction to ensure that
4164 * this disjunction is only considered if all previous disjunctions
4165 * for the same node have failed.
4167 * Finally, the initial disjuncts in the disjunction are marked
4170 static int add_state_intra_ilp_regions(isl_ctx
*ctx
,
4171 struct isl_sched_graph
*graph
, struct isl_sched_node
*node
,
4172 struct isl_sched_intra
*intra
, int first
, int n
,
4173 __isl_take isl_mat
*outer_complement
, int allow_independent
)
4175 int allow_combination
;
4177 int conditional
= n
> first
;
4181 allow_combination
= !conditional
&& !needs_row(graph
, node
);
4182 needed
= n
+ 1 + allow_independent
+ allow_combination
;
4183 if (graph_extend_region(ctx
, graph
, needed
) < 0)
4186 if (intra
->state
== isl_sched_intra_outer
)
4187 n
= add_linear_combination(graph
, node
, intra
, n
,
4190 n
= add_next_inner(graph
, node
, intra
, n
, outer_complement
);
4192 n
= drop_if_duplicate(graph
, first
, n
);
4194 if (allow_independent
) {
4195 n
= allow_linear_independence(graph
, node
, intra
, n
);
4197 n
= drop_if_duplicate(graph
, first
, n
);
4199 if (allow_combination
)
4200 n
= allow_linear_combination(graph
, node
, intra
, n
);
4202 if (conditional
&& n
> first_disj
)
4203 graph
->region
[first_disj
].conditional
= 1;
4204 for (i
= first_disj
; i
+ 1 < n
; ++i
)
4205 graph
->region
[i
].disjunctive
= 1;
4209 isl_mat_free(outer_complement
);
4210 return clear_regions(graph
, n
);
4213 /* Add ILP regions for the intra-statement consecutivity constraint "intra"
4214 * on node "node" in "graph".
4215 * "first" is the position of the first intra-statement consecutivity
4216 * constraint ILP region for "node".
4217 * "n" is the current number of ILP regions.
4218 * Return the updated number of ILP regions or -1 on error.
4220 * Let T_0 be the schedule computed so far,
4221 * let G be the outer part of the consecutivity constraint,
4222 * let H be the inner part of the consecutivity constraint, and
4223 * let h be the number of rows of H that still need to be handled.
4225 * If rank(T_0; G; H) < rank(T_0; G) + h,
4226 * then T_0 can no longer be extended with those remaining h rows
4227 * without introducing a linear dependence.
4228 * Mark the constraint as failed.
4230 * Otherwise, if rank(T_0) < rank(T_0; G), then T_0 does not cover G yet and
4231 * T_0 should be extended with a linear combination of G first.
4233 * Otherwise, if the number of rows of H that appear in T_0 is smaller
4234 * than the total number of rows in H, then the next schedule row
4235 * should be equal to the next row of H.
4237 * Otherwise, consecutivity has been achieved and no ILP constraint
4238 * needs to be added.
4240 * In the cases where some ILP regions need to be added,
4241 * check whether rank(T_0; G; H) < dim, in which case
4242 * a schedule row that is linearly independent of T_0; G; H
4245 static int add_intra_ilp_regions(isl_ctx
*ctx
, struct isl_sched_graph
*graph
,
4246 struct isl_sched_node
*node
, struct isl_sched_intra
*intra
,
4249 isl_mat
*mat
, *complement
, *all
;
4250 int allow_independent
;
4251 int n_inner
, rank_outer
, rank_all
;
4253 mat
= intra_rows(node
, intra
, 1, 0);
4254 complement
= isl_mat_row_complement(mat
);
4256 return clear_regions(graph
, n
);
4257 rank_outer
= isl_mat_cols(complement
) - isl_mat_rows(complement
);
4259 n_inner
= intra
->n_inner
- intra
->n_fixed
;
4261 all
= intra_rows(node
, intra
, 1, 1);
4262 rank_all
= isl_mat_rank(all
);
4267 allow_independent
= rank_all
< node
->nvar
;
4269 if (rank_all
< rank_outer
+ n_inner
)
4270 intra
->n_fixed
= -1;
4271 else if (node
->rank
< rank_outer
)
4272 intra
->state
= isl_sched_intra_outer
;
4273 else if (intra
->n_fixed
< intra
->n_inner
)
4274 intra
->state
= isl_sched_intra_inner
;
4276 intra
->state
= isl_sched_intra_free
;
4277 if (intra
->n_fixed
>= 0 && intra
->state
!= isl_sched_intra_free
)
4278 return add_state_intra_ilp_regions(ctx
, graph
, node
, intra
,
4279 first
, n
, complement
, allow_independent
);
4281 isl_mat_free(complement
);
4284 isl_mat_free(complement
);
4285 return clear_regions(graph
, n
);
4288 /* Add ILP regions for all active intra-statement consecutivity constraints
4290 * Return the total number of such regions or -1 is some error occurred.
4292 static int add_graph_intra_ilp_regions(isl_ctx
*ctx
,
4293 struct isl_sched_graph
*graph
)
4298 for (i
= 0; i
< graph
->n
; ++i
) {
4299 struct isl_sched_node
*node
= &graph
->node
[i
];
4300 struct isl_sched_intra
*intra
;
4303 for (intra
= node
->intra
; intra
; intra
= intra
->next
) {
4304 if (intra
->n_fixed
< 0)
4306 n
= add_intra_ilp_regions(ctx
, graph
, node
, intra
,
4316 /* Set graph->region[n] to an optional fixed-value constraint with
4317 * linear combinations "eq" and expected value "val"
4318 * that applies to the entire sequence of variables.
4320 static int set_global_eq(struct isl_sched_graph
*graph
,
4321 struct isl_sched_edge
*edge
, __isl_take isl_mat
*eq
,
4322 __isl_take isl_vec
*val
, int n
)
4324 graph
->region
[n
].has_fixed
= 1;
4325 graph
->region
[n
].fixed
= eq
;
4326 graph
->region
[n
].fixed_val
= val
;
4327 graph
->region
[n
].pos
= 0;
4328 graph
->region
[n
].optional
= 1;
4329 graph
->region
[n
].user
= edge
;
4334 /* Construct the equality constraints on the set of coefficients
4335 * for valid equality constraints for the dependence relation of "edge".
4337 * First compute the affine hull of the dependence relation.
4338 * An equality constraint is valid for the dependence relation
4339 * if it is a linear combination of the (equality) constraints
4340 * of the affine hull.
4341 * This means the coefficients of such an equality constraint
4342 * need to be orthogonal to the orthogonal complement of
4343 * the constraints of the affine hull.
4344 * Return this orthogonal complement E.
4345 * Note that the first element in this matrix corresponds
4346 * to the coefficient of the constant term.
4350 * E (c_0, c_n, c_x, c_y)^T = 0
4354 * c_0 + c_n n + c_x x + c_y y = 0 for each (x,y) in R
4356 * If the source or destination nodes of "edge" have been compressed,
4357 * then the dependence relation is also compressed before
4358 * the affine hull of the set of coefficients is computed.
4360 static __isl_give isl_mat
*inter_coefficients_eq(struct isl_sched_edge
*edge
)
4366 map
= isl_map_copy(edge
->map
);
4367 if (edge
->src
->compressed
)
4368 map
= isl_map_preimage_domain_multi_aff(map
,
4369 isl_multi_aff_copy(edge
->src
->decompress
));
4370 if (edge
->dst
->compressed
)
4371 map
= isl_map_preimage_range_multi_aff(map
,
4372 isl_multi_aff_copy(edge
->dst
->decompress
));
4373 aff
= isl_map_affine_hull(map
);
4374 aff
= isl_basic_map_remove_divs(aff
);
4375 mat
= isl_basic_map_extract_equalities(aff
);
4376 mat
= isl_mat_row_complement(mat
);
4377 isl_basic_map_free(aff
);
4382 /* Add an ILP region for the inter-statement consecutivity constraint "edge"
4383 * in "graph" that fixed the dependence distance to zero or
4384 * one (if "one" is set).
4385 * "n" is the current number of ILP regions.
4386 * Return the updated number of ILP regions or -1 on error.
4388 * Construct a matrix E with
4390 * E (c_0, c_n, c_x, c_y)^T = 0
4394 * c_0 + c_n n + c_x x + c_y y = 0 for each (x,y) in R
4396 * In case "one" is set, the constraints need to be of the form
4398 * c_0 + c_n n + c_x x + c_y y = 1 for each (x,y) in R
4402 * (c_0 - 1) + c_n n + c_x x + c_y y = 0 for each (x,y) in R
4406 * E (c_0 - 1, c_n, c_x, c_y)^T = 0
4410 * E (c_0, c_n, c_x, c_y)^T = E (1, 0, 0, 0)^T
4412 * That is the linear combinations E need to be equal to either zero
4413 * (if "one" is not set) or the first column of E (if "one" is set);
4415 * Before imposing these constraints, they need to be formulated
4416 * in terms of the ILP variables by plugging in
4417 * (c_j_0 - c_i_0, c_j_n - c_i_n, -(c_i_x^+ - c_i_x^-), c_j_x^+ - c_j_x^-)
4418 * for (c_0, c_n, c_x, c_y),
4419 * where c_* = c_*^+ - c_*^-, with c_*^+ and c_*^- non-negative.
4420 * In graph->lp, the c_*^- appear before their c_*^+ counterpart.
4421 * This results in constraints
4423 * (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) = 0
4427 * (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) = 1
4429 * Note that the first column of E corresponds to the coefficient
4430 * of the constant term, while the mapping returned by inter_dim_map
4431 * assumes the presence of a constant term. An extra zero column
4432 * is therefore temporarily inserted to represent this constant term.
4434 static int add_fixed_inter_ilp_region(isl_ctx
*ctx
,
4435 struct isl_sched_graph
*graph
, struct isl_sched_edge
*edge
, int n
,
4439 isl_dim_map
*dim_map
;
4443 if (graph_extend_region(ctx
, graph
, n
+ 1) < 0)
4446 eq
= inter_coefficients_eq(edge
);
4447 val
= isl_mat_get_col(eq
, 0);
4449 val
= isl_vec_set_si(val
, 0);
4450 offset
= isl_map_offset(edge
->map
, isl_dim_in
);
4451 dim_map
= inter_dim_map(ctx
, graph
, edge
->src
, edge
->dst
, offset
, 1);
4452 eq
= isl_mat_insert_zero_cols(eq
, 0, 1);
4453 eq
= isl_mat_dim_map(eq
, dim_map
);
4454 eq
= isl_mat_drop_cols(eq
, 0, 1);
4455 n
= set_global_eq(graph
, edge
, eq
, val
, n
);
4460 /* Add an ILP region for the inter-statement consecutivity constraint "edge"
4461 * in "graph" that fixed the dependence distance to zero.
4462 * "n" is the current number of ILP regions.
4463 * Return the updated number of ILP regions or -1 on error.
4465 static int add_outer_inter_ilp_region(isl_ctx
*ctx
,
4466 struct isl_sched_graph
*graph
, struct isl_sched_edge
*edge
, int n
)
4468 return add_fixed_inter_ilp_region(ctx
, graph
, edge
, n
, 0);
4471 /* Add an ILP region for the inter-statement consecutivity constraint "edge"
4472 * in "graph" that fixed the dependence distance to one.
4473 * "n" is the current number of ILP regions.
4474 * Return the updated number of ILP regions or -1 on error.
4476 * This distance should only be one at the point where both
4477 * corresponding intra-statement consecutivity constraints
4478 * fix the schedule row to be equal to the first inner row.
4479 * If either of these intra-statement consecutivity constraints
4480 * already fixed some inner rows, then this can no longer be achieved.
4482 static int add_inner_inter_ilp_region(isl_ctx
*ctx
,
4483 struct isl_sched_graph
*graph
, struct isl_sched_edge
*edge
, int n
)
4485 if (edge
->src_intra
->n_fixed
> 0 || edge
->dst_intra
->n_fixed
> 0) {
4486 edge
->state
= isl_sched_inter_failed
;
4489 edge
->state
= isl_sched_inter_inner
;
4490 return add_fixed_inter_ilp_region(ctx
, graph
, edge
, n
, 1);
4493 /* Add an ILP region for the inter-statement consecutivity constraint "edge"
4495 * "n" is the current number of ILP regions.
4496 * Return the updated number of ILP regions or -1 on error.
4498 * The dependence distance should be kept at zero as long as
4499 * both corresponding intra-statement consecutivity constraints
4500 * haven't had their outer parts covered by the current schedule.
4501 * The distance should be one when the schedule is made equal
4502 * to the first rows of their inner parts.
4503 * Note that at each level the constraint on dependence distance
4504 * is imposed unconditionally. This means that it is imposed
4505 * even if the regions of the corresponding intra-statement consecutivity
4506 * constraints cannot be satisfied at that level.
4507 * The distance-one constraint is also imposed if those regions
4508 * succeed for the wrong reason, i.e., if the schedule row does
4509 * not get equated to the first inner row but if rather one of
4510 * the other potential disjuncts applies.
4511 * The assumption is that the distance-one constraint somehow aligns
4512 * with the first inner rows and is therefore unlikely to succeed
4513 * if the schedule is not made equal to those first inner rows.
4514 * In any case, the distance-one constraint is applied only once.
4516 static int add_inter_ilp_region(isl_ctx
*ctx
, struct isl_sched_graph
*graph
,
4517 struct isl_sched_edge
*edge
, int n
)
4519 enum isl_sched_intra_state src_state
, dst_state
;
4521 if (edge
->state
== isl_sched_inter_failed
)
4523 if (edge
->state
== isl_sched_inter_free
)
4525 if (edge
->src_intra
->n_fixed
< 0 || edge
->dst_intra
->n_fixed
< 0) {
4526 edge
->state
= isl_sched_inter_failed
;
4529 if (edge
->state
== isl_sched_inter_inner
) {
4530 if (edge
->src_intra
->n_fixed
== 0 ||
4531 edge
->dst_intra
->n_fixed
== 0)
4532 edge
->state
= isl_sched_inter_failed
;
4534 edge
->state
= isl_sched_inter_free
;
4537 src_state
= edge
->src_intra
->state
;
4538 dst_state
= edge
->dst_intra
->state
;
4539 if (src_state
== isl_sched_intra_outer
||
4540 dst_state
== isl_sched_intra_outer
)
4541 return add_outer_inter_ilp_region(ctx
, graph
, edge
, n
);
4542 if (src_state
== isl_sched_intra_inner
&&
4543 dst_state
== isl_sched_intra_inner
)
4544 return add_inner_inter_ilp_region(ctx
, graph
, edge
, n
);
4548 /* Add ILP regions for all active inter-statement consecutivity constraints
4550 * "n" is the current number of ILP regions.
4551 * Return the updated number of ILP regions or -1 on error.
4553 static int add_graph_inter_ilp_regions(isl_ctx
*ctx
,
4554 struct isl_sched_graph
*graph
, int n
)
4558 for (i
= 0; i
< graph
->n_edge
; ++i
) {
4559 struct isl_sched_edge
*edge
= &graph
->edge
[i
];
4561 if (!is_consecutivity(edge
))
4563 n
= add_inter_ilp_region(ctx
, graph
, edge
, n
);
4570 /* Add ILP regions for all active intra-statement and inter-statement
4571 * consecutivity constraints in "graph".
4572 * Return the total number of such regions or -1 is some error occurred.
4574 static int add_graph_consecutivity_ilp_regions(isl_ctx
*ctx
,
4575 struct isl_sched_graph
*graph
)
4579 n
= add_graph_intra_ilp_regions(ctx
, graph
);
4582 n
= add_graph_inter_ilp_regions(ctx
, graph
, n
);
4587 /* Does this region try to fix the schedule row to be equal
4588 * to a row of the inner part of the corresponding
4589 * intra-statement consecutivity constraint?
4590 * In particular, was such a constraint imposed and
4591 * is this the region that imposes the constraint
4592 * (rather than any of the potential other disjuncts
4593 * corresponding to the same intra-statement consecutivity constraint)?
4594 * Note that the other disjuncts express either a pure linear combination
4595 * (with a zero fixed_val) or a (pure) linear independence.
4596 * The "user" field was set by finish_intra.
4597 * set_equal puts the row with the non-zero fixed_val in the first position.
4599 static isl_bool
fixes_inner(struct isl_ilp_region
*region
)
4601 struct isl_sched_intra
*intra
= region
->user
;
4603 if (intra
->state
!= isl_sched_intra_inner
)
4604 return isl_bool_false
;
4605 if (!region
->has_fixed
)
4606 return isl_bool_false
;
4607 return isl_bool_not(isl_vec_element_is_zero(region
->fixed_val
, 0));
4610 /* Extract the subsequence of elements of length "len" starting at "pos"
4613 static __isl_give isl_vec
*extract(__isl_keep isl_vec
*v
, int pos
, int len
)
4620 ctx
= isl_vec_get_ctx(v
);
4621 r
= isl_vec_alloc(ctx
, len
);
4624 isl_seq_cpy(r
->el
, v
->el
+ pos
, len
);
4628 /* Given a region that tries to fix the schedule row to be equal
4629 * to a row of the inner part of the corresponding
4630 * intra-statement consecutivity constraint, does "sol"
4631 * satisfy this constraint?
4633 * By definition, such a region imposes some linear combinations
4634 * that need to be equal to some fixed values.
4635 * Note that "sol" is the solution to an ILP problem,
4636 * so the denominator is always 1.
4638 static isl_bool
is_fixed(struct isl_ilp_region
*region
, __isl_keep isl_vec
*sol
)
4644 len
= isl_mat_cols(region
->fixed
);
4645 v
= extract(sol
, 1 + region
->pos
, len
);
4647 v
= isl_mat_vec_product(isl_mat_copy(region
->fixed
), v
);
4648 fixed
= isl_vec_is_equal(v
, region
->fixed_val
);
4654 /* Update the information on the intra-statement consecutivity constraint
4655 * associated to ILP region "pos" in "graph" based on the region itself and
4656 * on the (non-empty) ILP solution "sol".
4658 * If the entire disjunctive constraint associated to
4659 * an intra-statement consecutivity constraint could not be imposed
4660 * (as witnessed by the region corresponding to the last disjunct
4661 * being marked failed), then mark the intra-statement consecutivity constraint
4663 * Otherwise, if the region was meant to fix the schedule row to be equal
4664 * to a row of the inner part of the corresponding
4665 * intra-statement consecutivity constraint and if this succeeded,
4666 * then update the number of fixed rows of the
4667 * intra-statement consecutivity constraint.
4668 * If this is the first such row, then also keep track of its position.
4670 * Note that the "user" field of the region was set by finish_intra.
4672 static isl_stat
update_intra(struct isl_sched_graph
*graph
, int pos
,
4673 __isl_keep isl_vec
*sol
)
4675 struct isl_ilp_region
*region
= &graph
->region
[pos
];
4676 struct isl_sched_intra
*intra
= region
->user
;
4679 if (region
->failed
) {
4680 intra
->n_fixed
= -1;
4684 fixes
= fixes_inner(region
);
4686 return isl_stat_error
;
4690 fixes
= is_fixed(region
, sol
);
4692 return isl_stat_error
;
4696 if (intra
->n_fixed
== 0)
4697 intra
->first_fixed
= graph
->n_total_row
;
4703 /* Update the information on the inter-statement consecutivity constraint
4704 * associated to ILP region "pos" in "graph" based on the region itself and
4705 * on the (non-empty) ILP solution "sol".
4707 * Check if the region was marked failed and, if so,
4708 * mark the corresponding edge as failed.
4710 * Note that the "user" field of the region was set by set_global_eq.
4712 static isl_stat
update_inter(struct isl_sched_graph
*graph
, int pos
)
4714 struct isl_ilp_region
*region
= &graph
->region
[pos
];
4715 struct isl_sched_edge
*edge
= region
->user
;
4718 edge
->state
= isl_sched_inter_failed
;
4723 /* Update the information on the intra-statement or inter-statement
4724 * consecutivity constraint
4725 * associated to ILP region "pos" in "graph" based on the region itself and
4726 * on the ILP solution "sol".
4728 * If no solution was computed, then no useful information can be extracted
4731 * The "user" field points to either an isl_sched_intra object
4732 * (set by finish_intra) or an isl_sched_edge object
4733 * (set by set_global_eq).
4734 * Use the field to determine whether the region corresponds
4735 * to an intra-statement on inter-statement consecutivity constraint and
4736 * handle it accordingly.
4738 static isl_stat
update_consecutivity(struct isl_sched_graph
*graph
, int pos
,
4739 __isl_keep isl_vec
*sol
)
4741 struct isl_ilp_region
*region
= &graph
->region
[pos
];
4744 return isl_stat_error
;
4750 if (is_edge(graph
, region
->user
))
4751 return update_inter(graph
, pos
);
4753 return update_intra(graph
, pos
, sol
);
4757 /* Solve the ILP problem constructed in setup_lp.
4758 * First construct one or more ILP regions for each active intra-statement
4759 * consecutivity constraint. These (optional) regions try to
4760 * make progress in achieving consecutivity.
4761 * Next, for each node such that all the remaining rows of its schedule
4762 * need to be non-trivial, construct a region with a non-zero constraint.
4763 * This region imposes that the next row is independent of previous rows,
4764 * by enforcing that at least
4765 * one of the linear combinations in the rows of node->indep is non-zero.
4766 * The ILP regions corresponding to intra-statement and inter-statement
4767 * consecutivity constraints are added first to allow more freedom for them
4770 * After a solution has been computed, update the information
4771 * on intra-statement consecutivity constraints based on
4772 * the solution and on failed optional regions.
4774 static __isl_give isl_vec
*solve_lp(isl_ctx
*ctx
, struct isl_sched_graph
*graph
)
4780 n
= add_graph_consecutivity_ilp_regions(ctx
, graph
);
4783 if (graph_extend_region(ctx
, graph
, n
+ graph
->n
) < 0)
4785 for (i
= 0; i
< graph
->n
; ++i
) {
4786 struct isl_sched_node
*node
= &graph
->node
[i
];
4788 graph
->region
[n
+ i
].pos
= node_var_coef_offset(node
);
4789 graph
->region
[n
+ i
].has_non_zero
= needs_row(graph
, node
);
4790 if (!graph
->region
[n
+ i
].has_non_zero
)
4792 graph
->region
[n
+ i
].non_zero
= linear_to_lp(node
->indep
);
4794 lp
= isl_basic_set_copy(graph
->lp
);
4795 sol
= isl_tab_basic_set_constrained_lexmin(lp
, 2, n
+ graph
->n
,
4796 graph
->region
, &check_conflict
, graph
);
4797 for (i
= 0; i
< n
; ++i
)
4798 if (update_consecutivity(graph
, i
, sol
) < 0)
4799 return isl_vec_free(sol
);
4800 clear_regions(graph
, n
+ graph
->n
);
4803 clear_regions(graph
, n
+ graph
->n
);
4807 /* Extract the coefficients for the variables of "node" from "sol".
4809 * Each schedule coefficient c_i_x is represented as the difference
4810 * between two non-negative variables c_i_x^+ - c_i_x^-.
4811 * The c_i_x^- appear before their c_i_x^+ counterpart.
4812 * Furthermore, the order of these pairs is the opposite of that
4813 * of the corresponding coefficients.
4815 * Return c_i_x = c_i_x^+ - c_i_x^-
4817 static __isl_give isl_vec
*extract_var_coef(struct isl_sched_node
*node
,
4818 __isl_keep isl_vec
*sol
)
4826 csol
= isl_vec_alloc(isl_vec_get_ctx(sol
), node
->nvar
);
4830 pos
= 1 + node_var_coef_offset(node
);
4831 for (i
= 0; i
< node
->nvar
; ++i
)
4832 isl_int_sub(csol
->el
[node
->nvar
- 1 - i
],
4833 sol
->el
[pos
+ 2 * i
+ 1], sol
->el
[pos
+ 2 * i
]);
4838 /* Update the schedules of all nodes based on the given solution
4839 * of the LP problem.
4840 * The new row is added to the current band.
4841 * All possibly negative coefficients are encoded as a difference
4842 * of two non-negative variables, so we need to perform the subtraction
4845 * If coincident is set, then the caller guarantees that the new
4846 * row satisfies the coincidence constraints.
4848 static int update_schedule(struct isl_sched_graph
*graph
,
4849 __isl_take isl_vec
*sol
, int coincident
)
4852 isl_vec
*csol
= NULL
;
4857 isl_die(sol
->ctx
, isl_error_internal
,
4858 "no solution found", goto error
);
4859 if (graph
->n_total_row
>= graph
->max_row
)
4860 isl_die(sol
->ctx
, isl_error_internal
,
4861 "too many schedule rows", goto error
);
4863 for (i
= 0; i
< graph
->n
; ++i
) {
4864 struct isl_sched_node
*node
= &graph
->node
[i
];
4866 int row
= isl_mat_rows(node
->sched
);
4869 csol
= extract_var_coef(node
, sol
);
4873 isl_map_free(node
->band_sched
);
4874 node
->band_sched
= NULL
;
4875 node
->sched
= isl_mat_add_rows(node
->sched
, 1);
4878 pos
= node_cst_coef_offset(node
);
4879 node
->sched
= isl_mat_set_element(node
->sched
,
4880 row
, 0, sol
->el
[1 + pos
]);
4881 pos
= node_par_coef_offset(node
);
4882 for (j
= 0; j
< node
->nparam
; ++j
)
4883 node
->sched
= isl_mat_set_element(node
->sched
,
4884 row
, 1 + j
, sol
->el
[1 + pos
+ j
]);
4885 for (j
= 0; j
< node
->nvar
; ++j
)
4886 node
->sched
= isl_mat_set_element(node
->sched
,
4887 row
, 1 + node
->nparam
+ j
, csol
->el
[j
]);
4888 node
->coincident
[graph
->n_total_row
] = coincident
;
4894 graph
->n_total_row
++;
4903 /* Convert row "row" of node->sched into an isl_aff living in "ls"
4904 * and return this isl_aff.
4906 static __isl_give isl_aff
*extract_schedule_row(__isl_take isl_local_space
*ls
,
4907 struct isl_sched_node
*node
, int row
)
4915 aff
= isl_aff_zero_on_domain(ls
);
4916 if (isl_mat_get_element(node
->sched
, row
, 0, &v
) < 0)
4918 aff
= isl_aff_set_constant(aff
, v
);
4919 for (j
= 0; j
< node
->nparam
; ++j
) {
4920 if (isl_mat_get_element(node
->sched
, row
, 1 + j
, &v
) < 0)
4922 aff
= isl_aff_set_coefficient(aff
, isl_dim_param
, j
, v
);
4924 for (j
= 0; j
< node
->nvar
; ++j
) {
4925 if (isl_mat_get_element(node
->sched
, row
,
4926 1 + node
->nparam
+ j
, &v
) < 0)
4928 aff
= isl_aff_set_coefficient(aff
, isl_dim_in
, j
, v
);
4940 /* Convert the "n" rows starting at "first" of node->sched into a multi_aff
4941 * and return this multi_aff.
4943 * The result is defined over the uncompressed node domain.
4945 static __isl_give isl_multi_aff
*node_extract_partial_schedule_multi_aff(
4946 struct isl_sched_node
*node
, int first
, int n
)
4950 isl_local_space
*ls
;
4957 nrow
= isl_mat_rows(node
->sched
);
4958 if (node
->compressed
)
4959 space
= isl_multi_aff_get_domain_space(node
->decompress
);
4961 space
= isl_space_copy(node
->space
);
4962 ls
= isl_local_space_from_space(isl_space_copy(space
));
4963 space
= isl_space_from_domain(space
);
4964 space
= isl_space_add_dims(space
, isl_dim_out
, n
);
4965 ma
= isl_multi_aff_zero(space
);
4967 for (i
= first
; i
< first
+ n
; ++i
) {
4968 aff
= extract_schedule_row(isl_local_space_copy(ls
), node
, i
);
4969 ma
= isl_multi_aff_set_aff(ma
, i
- first
, aff
);
4972 isl_local_space_free(ls
);
4974 if (node
->compressed
)
4975 ma
= isl_multi_aff_pullback_multi_aff(ma
,
4976 isl_multi_aff_copy(node
->compress
));
4981 /* Convert the part of node->sched that corresponds to the current band
4982 * into a multi_aff and return this multi_aff.
4984 * The result is defined over the uncompressed node domain.
4986 static __isl_give isl_multi_aff
*node_extract_band_schedule_multi_aff(
4987 struct isl_sched_graph
*graph
, struct isl_sched_node
*node
)
4992 start
= graph
->band_start
;
4993 nrow
= isl_mat_rows(node
->sched
) - start
;
4994 return node_extract_partial_schedule_multi_aff(node
, start
, nrow
);
4997 /* Convert the part of node->sched that corresponds to the current band
4998 * into a map and return this map.
5000 * The result is cached in node->band_sched, which needs to be released
5001 * whenever node->sched is updated.
5002 * It is defined over the uncompressed node domain.
5004 static __isl_give isl_map
*node_extract_band_schedule(
5005 struct isl_sched_graph
*graph
, struct isl_sched_node
*node
)
5007 if (!node
->band_sched
) {
5010 ma
= node_extract_band_schedule_multi_aff(graph
, node
);
5011 node
->band_sched
= isl_map_from_multi_aff(ma
);
5014 return isl_map_copy(node
->band_sched
);
5017 /* Construct a map that can be used to update a dependence relation
5018 * based on the current band schedule.
5019 * That is, construct a map expressing that source and sink
5020 * are executed within the same iteration of the current band.
5021 * This map can then be intersected with the dependence relation.
5022 * This is not the most efficient way, but this shouldn't be a critical
5025 static __isl_give isl_map
*specializer(struct isl_sched_graph
*graph
,
5026 struct isl_sched_node
*src
, struct isl_sched_node
*dst
)
5028 isl_map
*src_sched
, *dst_sched
;
5030 src_sched
= node_extract_band_schedule(graph
, src
);
5031 dst_sched
= node_extract_band_schedule(graph
, dst
);
5032 return isl_map_apply_range(src_sched
, isl_map_reverse(dst_sched
));
5035 /* Intersect the domains of the nested relations in domain and range
5036 * of "umap" with "map".
5038 static __isl_give isl_union_map
*intersect_domains(
5039 __isl_take isl_union_map
*umap
, __isl_keep isl_map
*map
)
5041 isl_union_set
*uset
;
5043 umap
= isl_union_map_zip(umap
);
5044 uset
= isl_union_set_from_set(isl_map_wrap(isl_map_copy(map
)));
5045 umap
= isl_union_map_intersect_domain(umap
, uset
);
5046 umap
= isl_union_map_zip(umap
);
5050 /* Update the dependence relation of the given edge based
5051 * on the current band schedule.
5052 * If the dependence is carried completely by the current band, then
5053 * it is removed from the edge_tables. It is kept in the list of edges
5054 * as otherwise all edge_tables would have to be recomputed.
5056 * If the edge is of a type that can appear multiple times
5057 * between the same pair of nodes, then it is added to
5058 * the edge table (again). This prevents the situation
5059 * where none of these edges is referenced from the edge table
5060 * because the one that was referenced turned out to be empty and
5061 * was therefore removed from the table.
5063 * If the edge is marked failed or completely handled, then it is
5064 * (only) a consecutivity edge and it can be removed from consideration
5065 * without even updating the dependence relation.
5066 * Other types of edges have the default (isl_sched_inter_init) value
5069 static isl_stat
update_edge(isl_ctx
*ctx
, struct isl_sched_graph
*graph
,
5070 struct isl_sched_edge
*edge
)
5075 if (edge
->state
== isl_sched_inter_failed
)
5076 return clear_edge(graph
, edge
);
5077 if (edge
->state
== isl_sched_inter_free
)
5078 return clear_edge(graph
, edge
);
5080 id
= specializer(graph
, edge
->src
, edge
->dst
);
5081 edge
->map
= isl_map_intersect(edge
->map
, isl_map_copy(id
));
5085 if (edge
->tagged_condition
) {
5086 edge
->tagged_condition
=
5087 intersect_domains(edge
->tagged_condition
, id
);
5088 if (!edge
->tagged_condition
)
5091 if (edge
->tagged_validity
) {
5092 edge
->tagged_validity
=
5093 intersect_domains(edge
->tagged_validity
, id
);
5094 if (!edge
->tagged_validity
)
5098 empty
= isl_map_plain_is_empty(edge
->map
);
5102 graph_remove_edge(graph
, edge
);
5103 } else if (is_multi_edge_type(edge
)) {
5104 if (graph_edge_tables_add(ctx
, graph
, edge
) < 0)
5112 return isl_stat_error
;
5115 /* Does the domain of "umap" intersect "uset"?
5117 static int domain_intersects(__isl_keep isl_union_map
*umap
,
5118 __isl_keep isl_union_set
*uset
)
5122 umap
= isl_union_map_copy(umap
);
5123 umap
= isl_union_map_intersect_domain(umap
, isl_union_set_copy(uset
));
5124 empty
= isl_union_map_is_empty(umap
);
5125 isl_union_map_free(umap
);
5127 return empty
< 0 ? -1 : !empty
;
5130 /* Does the range of "umap" intersect "uset"?
5132 static int range_intersects(__isl_keep isl_union_map
*umap
,
5133 __isl_keep isl_union_set
*uset
)
5137 umap
= isl_union_map_copy(umap
);
5138 umap
= isl_union_map_intersect_range(umap
, isl_union_set_copy(uset
));
5139 empty
= isl_union_map_is_empty(umap
);
5140 isl_union_map_free(umap
);
5142 return empty
< 0 ? -1 : !empty
;
5145 /* Are the condition dependences of "edge" local with respect to
5146 * the current band schedule?
5148 * That is, are domain and range of the condition dependences mapped
5149 * to the same point?
5151 * In other words, is the condition false?
5153 static int is_condition_false(struct isl_sched_graph
*graph
,
5154 struct isl_sched_edge
*edge
)
5156 isl_union_map
*umap
;
5157 isl_map
*map
, *sched
, *test
;
5160 empty
= isl_union_map_is_empty(edge
->tagged_condition
);
5161 if (empty
< 0 || empty
)
5164 umap
= isl_union_map_copy(edge
->tagged_condition
);
5165 umap
= isl_union_map_zip(umap
);
5166 umap
= isl_union_set_unwrap(isl_union_map_domain(umap
));
5167 map
= isl_map_from_union_map(umap
);
5169 sched
= node_extract_band_schedule(graph
, edge
->src
);
5170 map
= isl_map_apply_domain(map
, sched
);
5171 sched
= node_extract_band_schedule(graph
, edge
->dst
);
5172 map
= isl_map_apply_range(map
, sched
);
5174 test
= isl_map_identity(isl_map_get_space(map
));
5175 local
= isl_map_is_subset(map
, test
);
5182 /* For each conditional validity constraint that is adjacent
5183 * to a condition with domain in condition_source or range in condition_sink,
5184 * turn it into an unconditional validity constraint.
5186 static int unconditionalize_adjacent_validity(struct isl_sched_graph
*graph
,
5187 __isl_take isl_union_set
*condition_source
,
5188 __isl_take isl_union_set
*condition_sink
)
5192 condition_source
= isl_union_set_coalesce(condition_source
);
5193 condition_sink
= isl_union_set_coalesce(condition_sink
);
5195 for (i
= 0; i
< graph
->n_edge
; ++i
) {
5197 isl_union_map
*validity
;
5199 if (!is_conditional_validity(&graph
->edge
[i
]))
5201 if (is_validity(&graph
->edge
[i
]))
5204 validity
= graph
->edge
[i
].tagged_validity
;
5205 adjacent
= domain_intersects(validity
, condition_sink
);
5206 if (adjacent
>= 0 && !adjacent
)
5207 adjacent
= range_intersects(validity
, condition_source
);
5213 set_validity(&graph
->edge
[i
]);
5216 isl_union_set_free(condition_source
);
5217 isl_union_set_free(condition_sink
);
5220 isl_union_set_free(condition_source
);
5221 isl_union_set_free(condition_sink
);
5225 /* Update the dependence relations of all edges based on the current band
5226 * schedule and enforce conditional validity constraints that are adjacent
5227 * to satisfied condition constraints.
5229 * First check if any of the condition constraints are satisfied
5230 * (i.e., not local to the outer schedule) and keep track of
5231 * their domain and range.
5232 * Then update all dependence relations (which removes the non-local
5234 * Finally, if any condition constraints turned out to be satisfied,
5235 * then turn all adjacent conditional validity constraints into
5236 * unconditional validity constraints.
5238 static int update_edges(isl_ctx
*ctx
, struct isl_sched_graph
*graph
)
5242 isl_union_set
*source
, *sink
;
5244 source
= isl_union_set_empty(isl_space_params_alloc(ctx
, 0));
5245 sink
= isl_union_set_empty(isl_space_params_alloc(ctx
, 0));
5246 for (i
= 0; i
< graph
->n_edge
; ++i
) {
5248 isl_union_set
*uset
;
5249 isl_union_map
*umap
;
5251 if (!is_condition(&graph
->edge
[i
]))
5253 if (is_local(&graph
->edge
[i
]))
5255 local
= is_condition_false(graph
, &graph
->edge
[i
]);
5263 umap
= isl_union_map_copy(graph
->edge
[i
].tagged_condition
);
5264 uset
= isl_union_map_domain(umap
);
5265 source
= isl_union_set_union(source
, uset
);
5267 umap
= isl_union_map_copy(graph
->edge
[i
].tagged_condition
);
5268 uset
= isl_union_map_range(umap
);
5269 sink
= isl_union_set_union(sink
, uset
);
5272 for (i
= 0; i
< graph
->n_edge
; ++i
) {
5273 if (update_edge(ctx
, graph
, &graph
->edge
[i
]) < 0)
5278 return unconditionalize_adjacent_validity(graph
, source
, sink
);
5280 isl_union_set_free(source
);
5281 isl_union_set_free(sink
);
5284 isl_union_set_free(source
);
5285 isl_union_set_free(sink
);
5289 /* Initiate a new band by recording the starting position of the new band and
5290 * by keeping track of the number of inner rows already taken into account for
5291 * all intra-statement consecutivity constraints at this point and
5292 * the states of the all inter-statement consecutivity constraints
5293 * such that they can be reset when the band gets discarded
5296 static void next_band(struct isl_sched_graph
*graph
)
5300 graph
->band_start
= graph
->n_total_row
;
5302 for (i
= 0; i
< graph
->n
; ++i
) {
5303 struct isl_sched_node
*node
= &graph
->node
[i
];
5304 struct isl_sched_intra
*intra
;
5306 for (intra
= node
->intra
; intra
; intra
= intra
->next
)
5307 intra
->band_n_fixed
= intra
->n_fixed
;
5309 for (i
= 0; i
< graph
->n_edge
; ++i
)
5310 graph
->edge
[i
].band_state
= graph
->edge
[i
].state
;
5313 /* Return the union of the universe domains of the nodes in "graph"
5314 * that satisfy "pred".
5316 static __isl_give isl_union_set
*isl_sched_graph_domain(isl_ctx
*ctx
,
5317 struct isl_sched_graph
*graph
,
5318 int (*pred
)(struct isl_sched_node
*node
, int data
), int data
)
5324 for (i
= 0; i
< graph
->n
; ++i
)
5325 if (pred(&graph
->node
[i
], data
))
5329 isl_die(ctx
, isl_error_internal
,
5330 "empty component", return NULL
);
5332 set
= isl_set_universe(isl_space_copy(graph
->node
[i
].space
));
5333 dom
= isl_union_set_from_set(set
);
5335 for (i
= i
+ 1; i
< graph
->n
; ++i
) {
5336 if (!pred(&graph
->node
[i
], data
))
5338 set
= isl_set_universe(isl_space_copy(graph
->node
[i
].space
));
5339 dom
= isl_union_set_union(dom
, isl_union_set_from_set(set
));
5345 /* Return a list of unions of universe domains, where each element
5346 * in the list corresponds to an SCC (or WCC) indexed by node->scc.
5348 static __isl_give isl_union_set_list
*extract_sccs(isl_ctx
*ctx
,
5349 struct isl_sched_graph
*graph
)
5352 isl_union_set_list
*filters
;
5354 filters
= isl_union_set_list_alloc(ctx
, graph
->scc
);
5355 for (i
= 0; i
< graph
->scc
; ++i
) {
5358 dom
= isl_sched_graph_domain(ctx
, graph
, &node_scc_exactly
, i
);
5359 filters
= isl_union_set_list_add(filters
, dom
);
5365 /* Return a list of two unions of universe domains, one for the SCCs up
5366 * to and including graph->src_scc and another for the other SCCs.
5368 static __isl_give isl_union_set_list
*extract_split(isl_ctx
*ctx
,
5369 struct isl_sched_graph
*graph
)
5372 isl_union_set_list
*filters
;
5374 filters
= isl_union_set_list_alloc(ctx
, 2);
5375 dom
= isl_sched_graph_domain(ctx
, graph
,
5376 &node_scc_at_most
, graph
->src_scc
);
5377 filters
= isl_union_set_list_add(filters
, dom
);
5378 dom
= isl_sched_graph_domain(ctx
, graph
,
5379 &node_scc_at_least
, graph
->src_scc
+ 1);
5380 filters
= isl_union_set_list_add(filters
, dom
);
5385 /* Copy nodes that satisfy node_pred from the src dependence graph
5386 * to the dst dependence graph.
5388 * The subgraph into which the nodes are copied will be used
5389 * to create a new band, so the cached value of the current
5390 * band schedule does not need to be copied.
5392 static isl_stat
copy_nodes(struct isl_sched_graph
*dst
,
5393 struct isl_sched_graph
*src
,
5394 int (*node_pred
)(struct isl_sched_node
*node
, int data
), int data
)
5399 for (i
= 0; i
< src
->n
; ++i
) {
5402 if (!node_pred(&src
->node
[i
], data
))
5406 dst
->node
[j
].space
= isl_space_copy(src
->node
[i
].space
);
5407 dst
->node
[j
].compressed
= src
->node
[i
].compressed
;
5408 dst
->node
[j
].hull
= isl_set_copy(src
->node
[i
].hull
);
5409 dst
->node
[j
].compress
=
5410 isl_multi_aff_copy(src
->node
[i
].compress
);
5411 dst
->node
[j
].decompress
=
5412 isl_multi_aff_copy(src
->node
[i
].decompress
);
5413 dst
->node
[j
].nvar
= src
->node
[i
].nvar
;
5414 dst
->node
[j
].nparam
= src
->node
[i
].nparam
;
5415 dst
->node
[j
].sched
= isl_mat_copy(src
->node
[i
].sched
);
5416 dst
->node
[j
].band_sched
= NULL
;
5417 dst
->node
[j
].coincident
= src
->node
[i
].coincident
;
5418 dst
->node
[j
].sizes
= isl_multi_val_copy(src
->node
[i
].sizes
);
5419 dst
->node
[j
].bounds
= isl_basic_set_copy(src
->node
[i
].bounds
);
5420 dst
->node
[j
].max
= isl_vec_copy(src
->node
[i
].max
);
5421 dst
->node
[j
].intra
= src
->node
[i
].intra
;
5424 if (!dst
->node
[j
].space
|| !dst
->node
[j
].sched
)
5425 return isl_stat_error
;
5426 if (dst
->node
[j
].compressed
&&
5427 (!dst
->node
[j
].hull
|| !dst
->node
[j
].compress
||
5428 !dst
->node
[j
].decompress
))
5429 return isl_stat_error
;
5435 /* Copy non-empty edges that satisfy edge_pred from the src dependence graph
5436 * to the dst dependence graph.
5437 * If the source or destination node of the edge is not in the destination
5438 * graph, then it must be a backward proximity edge and it should simply
5440 * Note that the intra-statement consecutivity constraints are
5441 * shared between the nodes of "src" and "dst".
5442 * If an edge references any intra-statement consecutivity constraints,
5443 * they can therefore simply be copied.
5445 static isl_stat
copy_edges(isl_ctx
*ctx
, struct isl_sched_graph
*dst
,
5446 struct isl_sched_graph
*src
,
5447 int (*edge_pred
)(struct isl_sched_edge
*edge
, int data
), int data
)
5452 for (i
= 0; i
< src
->n_edge
; ++i
) {
5453 struct isl_sched_edge
*edge
= &src
->edge
[i
];
5455 isl_union_map
*tagged_condition
;
5456 isl_union_map
*tagged_validity
;
5457 struct isl_sched_node
*dst_src
, *dst_dst
;
5458 struct isl_sched_intra
*dst_src_intra
, *dst_dst_intra
;
5460 if (!edge_pred(edge
, data
))
5463 if (isl_map_plain_is_empty(edge
->map
))
5466 dst_src
= graph_find_node(ctx
, dst
, edge
->src
->space
);
5467 dst_dst
= graph_find_node(ctx
, dst
, edge
->dst
->space
);
5468 if (!dst_src
|| !dst_dst
)
5469 return isl_stat_error
;
5470 if (!is_node(dst
, dst_src
) || !is_node(dst
, dst_dst
)) {
5471 if (is_validity(edge
) || is_conditional_validity(edge
))
5472 isl_die(ctx
, isl_error_internal
,
5473 "backward (conditional) validity edge",
5474 return isl_stat_error
);
5478 dst_src_intra
= edge
->src_intra
;
5479 dst_dst_intra
= edge
->dst_intra
;
5481 map
= isl_map_copy(edge
->map
);
5482 tagged_condition
= isl_union_map_copy(edge
->tagged_condition
);
5483 tagged_validity
= isl_union_map_copy(edge
->tagged_validity
);
5485 dst
->edge
[dst
->n_edge
].src
= dst_src
;
5486 dst
->edge
[dst
->n_edge
].dst
= dst_dst
;
5487 dst
->edge
[dst
->n_edge
].src_intra
= dst_src_intra
;
5488 dst
->edge
[dst
->n_edge
].dst_intra
= dst_dst_intra
;
5489 dst
->edge
[dst
->n_edge
].map
= map
;
5490 dst
->edge
[dst
->n_edge
].tagged_condition
= tagged_condition
;
5491 dst
->edge
[dst
->n_edge
].tagged_validity
= tagged_validity
;
5492 dst
->edge
[dst
->n_edge
].types
= edge
->types
;
5495 if (edge
->tagged_condition
&& !tagged_condition
)
5496 return isl_stat_error
;
5497 if (edge
->tagged_validity
&& !tagged_validity
)
5498 return isl_stat_error
;
5500 if (graph_edge_tables_add(ctx
, dst
,
5501 &dst
->edge
[dst
->n_edge
- 1]) < 0)
5502 return isl_stat_error
;
5508 /* Compute the maximal number of variables over all nodes.
5509 * This is the maximal number of linearly independent schedule
5510 * rows that we need to compute.
5511 * Just in case we end up in a part of the dependence graph
5512 * with only lower-dimensional domains, we make sure we will
5513 * compute the required amount of extra linearly independent rows.
5515 static int compute_maxvar(struct isl_sched_graph
*graph
)
5520 for (i
= 0; i
< graph
->n
; ++i
) {
5521 struct isl_sched_node
*node
= &graph
->node
[i
];
5524 if (node_update_vmap(node
) < 0)
5526 nvar
= node
->nvar
+ graph
->n_row
- node
->rank
;
5527 if (nvar
> graph
->maxvar
)
5528 graph
->maxvar
= nvar
;
5534 /* Count the number of (active) intra-statement consecutivity constraints
5535 * associated to "node".
5537 static int node_n_intra(struct isl_sched_node
*node
)
5540 struct isl_sched_intra
*intra
;
5543 for (intra
= node
->intra
; intra
; intra
= intra
->next
)
5544 if (intra
->n_fixed
>= 0)
5550 /* Extract the subgraph of "graph" that consists of the nodes satisfying
5551 * "node_pred" and the edges satisfying "edge_pred" and store
5552 * the result in "sub".
5554 static isl_stat
extract_sub_graph(isl_ctx
*ctx
, struct isl_sched_graph
*graph
,
5555 int (*node_pred
)(struct isl_sched_node
*node
, int data
),
5556 int (*edge_pred
)(struct isl_sched_edge
*edge
, int data
),
5557 int data
, struct isl_sched_graph
*sub
)
5559 int i
, n
= 0, n_edge
= 0, n_intra
= 0, n_inter
= 0;
5562 for (i
= 0; i
< graph
->n
; ++i
)
5563 if (node_pred(&graph
->node
[i
], data
)) {
5565 n_intra
+= node_n_intra(&graph
->node
[i
]);
5567 for (i
= 0; i
< graph
->n_edge
; ++i
)
5568 if (edge_pred(&graph
->edge
[i
], data
)) {
5570 if (is_consecutivity(&graph
->edge
[i
]))
5573 if (graph_alloc(ctx
, sub
, n
, n_edge
, n_intra
+ n_inter
) < 0)
5574 return isl_stat_error
;
5575 sub
->root
= graph
->root
;
5576 if (copy_nodes(sub
, graph
, node_pred
, data
) < 0)
5577 return isl_stat_error
;
5578 if (graph_init_table(ctx
, sub
) < 0)
5579 return isl_stat_error
;
5580 for (t
= 0; t
<= isl_edge_last_table
; ++t
)
5581 sub
->max_edge
[t
] = graph
->max_edge
[t
];
5582 if (graph_init_edge_tables(ctx
, sub
) < 0)
5583 return isl_stat_error
;
5584 if (copy_edges(ctx
, sub
, graph
, edge_pred
, data
) < 0)
5585 return isl_stat_error
;
5586 sub
->n_row
= graph
->n_row
;
5587 sub
->max_row
= graph
->max_row
;
5588 sub
->n_total_row
= graph
->n_total_row
;
5589 sub
->band_start
= graph
->band_start
;
5594 static __isl_give isl_schedule_node
*compute_schedule(isl_schedule_node
*node
,
5595 struct isl_sched_graph
*graph
);
5596 static __isl_give isl_schedule_node
*compute_schedule_wcc(
5597 isl_schedule_node
*node
, struct isl_sched_graph
*graph
);
5599 /* Compute a schedule for a subgraph of "graph". In particular, for
5600 * the graph composed of nodes that satisfy node_pred and edges that
5601 * that satisfy edge_pred.
5602 * If the subgraph is known to consist of a single component, then wcc should
5603 * be set and then we call compute_schedule_wcc on the constructed subgraph.
5604 * Otherwise, we call compute_schedule, which will check whether the subgraph
5607 * The schedule is inserted at "node" and the updated schedule node
5610 static __isl_give isl_schedule_node
*compute_sub_schedule(
5611 __isl_take isl_schedule_node
*node
, isl_ctx
*ctx
,
5612 struct isl_sched_graph
*graph
,
5613 int (*node_pred
)(struct isl_sched_node
*node
, int data
),
5614 int (*edge_pred
)(struct isl_sched_edge
*edge
, int data
),
5617 struct isl_sched_graph split
= { 0 };
5619 if (extract_sub_graph(ctx
, graph
, node_pred
, edge_pred
, data
,
5624 node
= compute_schedule_wcc(node
, &split
);
5626 node
= compute_schedule(node
, &split
);
5628 graph_free(ctx
, &split
);
5631 graph_free(ctx
, &split
);
5632 return isl_schedule_node_free(node
);
5635 static int edge_scc_exactly(struct isl_sched_edge
*edge
, int scc
)
5637 return edge
->src
->scc
== scc
&& edge
->dst
->scc
== scc
;
5640 static int edge_dst_scc_at_most(struct isl_sched_edge
*edge
, int scc
)
5642 return edge
->dst
->scc
<= scc
;
5645 static int edge_src_scc_at_least(struct isl_sched_edge
*edge
, int scc
)
5647 return edge
->src
->scc
>= scc
;
5650 /* Reset the current band by dropping all its schedule rows and
5651 * resetting the number of inner rows of
5652 * the intra-statement consecutivity constraints already taken into account
5653 * to their original values at the start of the band.
5654 * The states of the inter-statement consecutivity constraints
5655 * are also reset to their values at the start of the band.
5657 static isl_stat
reset_band(struct isl_sched_graph
*graph
)
5662 drop
= graph
->n_total_row
- graph
->band_start
;
5663 graph
->n_total_row
-= drop
;
5664 graph
->n_row
-= drop
;
5666 for (i
= 0; i
< graph
->n
; ++i
) {
5667 struct isl_sched_node
*node
= &graph
->node
[i
];
5668 struct isl_sched_intra
*intra
;
5670 for (intra
= node
->intra
; intra
; intra
= intra
->next
)
5671 intra
->n_fixed
= intra
->band_n_fixed
;
5673 isl_map_free(node
->band_sched
);
5674 node
->band_sched
= NULL
;
5676 node
->sched
= isl_mat_drop_rows(node
->sched
,
5677 graph
->band_start
, drop
);
5680 return isl_stat_error
;
5682 for (i
= 0; i
< graph
->n_edge
; ++i
)
5683 graph
->edge
[i
].state
= graph
->edge
[i
].band_state
;
5688 /* Split the current graph into two parts and compute a schedule for each
5689 * part individually. In particular, one part consists of all SCCs up
5690 * to and including graph->src_scc, while the other part contains the other
5691 * SCCs. The split is enforced by a sequence node inserted at position "node"
5692 * in the schedule tree. Return the updated schedule node.
5693 * If either of these two parts consists of a sequence, then it is spliced
5694 * into the sequence containing the two parts.
5696 * The current band is reset. It would be possible to reuse
5697 * the previously computed rows as the first rows in the next
5698 * band, but recomputing them may result in better rows as we are looking
5699 * at a smaller part of the dependence graph.
5701 static __isl_give isl_schedule_node
*compute_split_schedule(
5702 __isl_take isl_schedule_node
*node
, struct isl_sched_graph
*graph
)
5706 isl_union_set_list
*filters
;
5711 if (reset_band(graph
) < 0)
5712 return isl_schedule_node_free(node
);
5716 ctx
= isl_schedule_node_get_ctx(node
);
5717 filters
= extract_split(ctx
, graph
);
5718 node
= isl_schedule_node_insert_sequence(node
, filters
);
5719 node
= isl_schedule_node_child(node
, 1);
5720 node
= isl_schedule_node_child(node
, 0);
5722 node
= compute_sub_schedule(node
, ctx
, graph
,
5723 &node_scc_at_least
, &edge_src_scc_at_least
,
5724 graph
->src_scc
+ 1, 0);
5725 is_seq
= isl_schedule_node_get_type(node
) == isl_schedule_node_sequence
;
5726 node
= isl_schedule_node_parent(node
);
5727 node
= isl_schedule_node_parent(node
);
5729 node
= isl_schedule_node_sequence_splice_child(node
, 1);
5730 node
= isl_schedule_node_child(node
, 0);
5731 node
= isl_schedule_node_child(node
, 0);
5732 node
= compute_sub_schedule(node
, ctx
, graph
,
5733 &node_scc_at_most
, &edge_dst_scc_at_most
,
5735 is_seq
= isl_schedule_node_get_type(node
) == isl_schedule_node_sequence
;
5736 node
= isl_schedule_node_parent(node
);
5737 node
= isl_schedule_node_parent(node
);
5739 node
= isl_schedule_node_sequence_splice_child(node
, 0);
5744 /* Insert a band node at position "node" in the schedule tree corresponding
5745 * to the current band in "graph". Mark the band node permutable
5746 * if "permutable" is set.
5747 * The partial schedules and the coincidence property are extracted
5748 * from the graph nodes.
5749 * Return the updated schedule node.
5751 static __isl_give isl_schedule_node
*insert_current_band(
5752 __isl_take isl_schedule_node
*node
, struct isl_sched_graph
*graph
,
5758 isl_multi_pw_aff
*mpa
;
5759 isl_multi_union_pw_aff
*mupa
;
5765 isl_die(isl_schedule_node_get_ctx(node
), isl_error_internal
,
5766 "graph should have at least one node",
5767 return isl_schedule_node_free(node
));
5769 start
= graph
->band_start
;
5770 end
= graph
->n_total_row
;
5773 ma
= node_extract_partial_schedule_multi_aff(&graph
->node
[0], start
, n
);
5774 mpa
= isl_multi_pw_aff_from_multi_aff(ma
);
5775 mupa
= isl_multi_union_pw_aff_from_multi_pw_aff(mpa
);
5777 for (i
= 1; i
< graph
->n
; ++i
) {
5778 isl_multi_union_pw_aff
*mupa_i
;
5780 ma
= node_extract_partial_schedule_multi_aff(&graph
->node
[i
],
5782 mpa
= isl_multi_pw_aff_from_multi_aff(ma
);
5783 mupa_i
= isl_multi_union_pw_aff_from_multi_pw_aff(mpa
);
5784 mupa
= isl_multi_union_pw_aff_union_add(mupa
, mupa_i
);
5786 node
= isl_schedule_node_insert_partial_schedule(node
, mupa
);
5788 for (i
= 0; i
< n
; ++i
)
5789 node
= isl_schedule_node_band_member_set_coincident(node
, i
,
5790 graph
->node
[0].coincident
[start
+ i
]);
5791 node
= isl_schedule_node_band_set_permutable(node
, permutable
);
5796 /* Update the dependence relations based on the current schedule,
5797 * add the current band to "node" and then continue with the computation
5799 * Return the updated schedule node.
5801 static __isl_give isl_schedule_node
*compute_next_band(
5802 __isl_take isl_schedule_node
*node
,
5803 struct isl_sched_graph
*graph
, int permutable
)
5810 ctx
= isl_schedule_node_get_ctx(node
);
5811 if (update_edges(ctx
, graph
) < 0)
5812 return isl_schedule_node_free(node
);
5813 node
= insert_current_band(node
, graph
, permutable
);
5816 node
= isl_schedule_node_child(node
, 0);
5817 node
= compute_schedule(node
, graph
);
5818 node
= isl_schedule_node_parent(node
);
5823 /* Add the constraints "coef" derived from an edge from "node" to itself
5824 * to graph->lp in order to respect the dependences and to try and carry them.
5825 * "pos" is the sequence number of the edge that needs to be carried.
5826 * "coef" represents general constraints on coefficients (c_0, c_x)
5827 * of valid constraints for (y - x) with x and y instances of the node.
5829 * The constraints added to graph->lp need to enforce
5831 * (c_j_0 + c_j_x y) - (c_j_0 + c_j_x x)
5832 * = c_j_x (y - x) >= e_i
5834 * for each (x,y) in the dependence relation of the edge.
5835 * That is, (-e_i, c_j_x) needs to be plugged in for (c_0, c_x),
5836 * taking into account that each coefficient in c_j_x is represented
5837 * as a pair of non-negative coefficients.
5839 static isl_stat
add_intra_constraints(struct isl_sched_graph
*graph
,
5840 struct isl_sched_node
*node
, __isl_take isl_basic_set
*coef
, int pos
)
5844 isl_dim_map
*dim_map
;
5847 return isl_stat_error
;
5849 ctx
= isl_basic_set_get_ctx(coef
);
5850 offset
= coef_var_offset(coef
);
5851 dim_map
= intra_dim_map(ctx
, graph
, node
, offset
, 1);
5852 isl_dim_map_range(dim_map
, 3 + pos
, 0, 0, 0, 1, -1);
5853 graph
->lp
= add_constraints_dim_map(graph
->lp
, coef
, dim_map
);
5858 /* Add the constraints "coef" derived from an edge from "src" to "dst"
5859 * to graph->lp in order to respect the dependences and to try and carry them.
5860 * "pos" is the sequence number of the edge that needs to be carried or
5861 * -1 if no attempt should be made to carry the dependences.
5862 * "coef" represents general constraints on coefficients (c_0, c_n, c_x, c_y)
5863 * of valid constraints for (x, y) with x and y instances of "src" and "dst".
5865 * The constraints added to graph->lp need to enforce
5867 * (c_k_0 + c_k_n n + c_k_x y) - (c_j_0 + c_j_n n + c_j_x x) >= e_i
5869 * for each (x,y) in the dependence relation of the edge or
5871 * (c_k_0 + c_k_n n + c_k_x y) - (c_j_0 + c_j_n n + c_j_x x) >= 0
5875 * (-e_i + c_k_0 - c_j_0, c_k_n - c_j_n, -c_j_x, c_k_x)
5877 * (c_k_0 - c_j_0, c_k_n - c_j_n, -c_j_x, c_k_x)
5878 * needs to be plugged in for (c_0, c_n, c_x, c_y),
5879 * taking into account that each coefficient in c_j_x and c_k_x is represented
5880 * as a pair of non-negative coefficients.
5882 static isl_stat
add_inter_constraints(struct isl_sched_graph
*graph
,
5883 struct isl_sched_node
*src
, struct isl_sched_node
*dst
,
5884 __isl_take isl_basic_set
*coef
, int pos
)
5888 isl_dim_map
*dim_map
;
5891 return isl_stat_error
;
5893 ctx
= isl_basic_set_get_ctx(coef
);
5894 offset
= coef_var_offset(coef
);
5895 dim_map
= inter_dim_map(ctx
, graph
, src
, dst
, offset
, 1);
5897 isl_dim_map_range(dim_map
, 3 + pos
, 0, 0, 0, 1, -1);
5898 graph
->lp
= add_constraints_dim_map(graph
->lp
, coef
, dim_map
);
5903 /* Data structure for keeping track of the data needed
5904 * to exploit non-trivial lineality spaces.
5906 * "any_non_trivial" is true if there are any non-trivial lineality spaces.
5907 * If "any_non_trivial" is not true, then "equivalent" and "mask" may be NULL.
5908 * "equivalent" connects instances to other instances on the same line(s).
5909 * "mask" contains the domain spaces of "equivalent".
5910 * Any instance set not in "mask" does not have a non-trivial lineality space.
5912 struct isl_exploit_lineality_data
{
5913 isl_bool any_non_trivial
;
5914 isl_union_map
*equivalent
;
5915 isl_union_set
*mask
;
5918 /* Data structure collecting information used during the construction
5919 * of an LP for carrying dependences.
5921 * "intra" is a sequence of coefficient constraints for intra-node edges.
5922 * "inter" is a sequence of coefficient constraints for inter-node edges.
5923 * "lineality" contains data used to exploit non-trivial lineality spaces.
5926 isl_basic_set_list
*intra
;
5927 isl_basic_set_list
*inter
;
5928 struct isl_exploit_lineality_data lineality
;
5931 /* Free all the data stored in "carry".
5933 static void isl_carry_clear(struct isl_carry
*carry
)
5935 isl_basic_set_list_free(carry
->intra
);
5936 isl_basic_set_list_free(carry
->inter
);
5937 isl_union_map_free(carry
->lineality
.equivalent
);
5938 isl_union_set_free(carry
->lineality
.mask
);
5941 /* Return a pointer to the node in "graph" that lives in "space".
5942 * If the requested node has been compressed, then "space"
5943 * corresponds to the compressed space.
5944 * The graph is assumed to have such a node.
5945 * Return NULL in case of error.
5947 * First try and see if "space" is the space of an uncompressed node.
5948 * If so, return that node.
5949 * Otherwise, "space" was constructed by construct_compressed_id and
5950 * contains a user pointer pointing to the node in the tuple id.
5951 * However, this node belongs to the original dependence graph.
5952 * If "graph" is a subgraph of this original dependence graph,
5953 * then the node with the same space still needs to be looked up
5954 * in the current graph.
5956 static struct isl_sched_node
*graph_find_compressed_node(isl_ctx
*ctx
,
5957 struct isl_sched_graph
*graph
, __isl_keep isl_space
*space
)
5960 struct isl_sched_node
*node
;
5965 node
= graph_find_node(ctx
, graph
, space
);
5968 if (is_node(graph
, node
))
5971 id
= isl_space_get_tuple_id(space
, isl_dim_set
);
5972 node
= isl_id_get_user(id
);
5978 if (!is_node(graph
->root
, node
))
5979 isl_die(ctx
, isl_error_internal
,
5980 "space points to invalid node", return NULL
);
5981 if (graph
!= graph
->root
)
5982 node
= graph_find_node(ctx
, graph
, node
->space
);
5983 if (!is_node(graph
, node
))
5984 isl_die(ctx
, isl_error_internal
,
5985 "unable to find node", return NULL
);
5990 /* Internal data structure for add_all_constraints.
5992 * "graph" is the schedule constraint graph for which an LP problem
5993 * is being constructed.
5994 * "carry_inter" indicates whether inter-node edges should be carried.
5995 * "pos" is the position of the next edge that needs to be carried.
5997 struct isl_add_all_constraints_data
{
5999 struct isl_sched_graph
*graph
;
6004 /* Add the constraints "coef" derived from an edge from a node to itself
6005 * to data->graph->lp in order to respect the dependences and
6006 * to try and carry them.
6008 * The space of "coef" is of the form
6010 * coefficients[[c_cst] -> S[c_x]]
6012 * with S[c_x] the (compressed) space of the node.
6013 * Extract the node from the space and call add_intra_constraints.
6015 static isl_stat
lp_add_intra(__isl_take isl_basic_set
*coef
, void *user
)
6017 struct isl_add_all_constraints_data
*data
= user
;
6019 struct isl_sched_node
*node
;
6021 space
= isl_basic_set_get_space(coef
);
6022 space
= isl_space_range(isl_space_unwrap(space
));
6023 node
= graph_find_compressed_node(data
->ctx
, data
->graph
, space
);
6024 isl_space_free(space
);
6025 return add_intra_constraints(data
->graph
, node
, coef
, data
->pos
++);
6028 /* Add the constraints "coef" derived from an edge from a node j
6029 * to a node k to data->graph->lp in order to respect the dependences and
6030 * to try and carry them (provided data->carry_inter is set).
6032 * The space of "coef" is of the form
6034 * coefficients[[c_cst, c_n] -> [S_j[c_x] -> S_k[c_y]]]
6036 * with S_j[c_x] and S_k[c_y] the (compressed) spaces of the nodes.
6037 * Extract the nodes from the space and call add_inter_constraints.
6039 static isl_stat
lp_add_inter(__isl_take isl_basic_set
*coef
, void *user
)
6041 struct isl_add_all_constraints_data
*data
= user
;
6042 isl_space
*space
, *dom
;
6043 struct isl_sched_node
*src
, *dst
;
6046 space
= isl_basic_set_get_space(coef
);
6047 space
= isl_space_unwrap(isl_space_range(isl_space_unwrap(space
)));
6048 dom
= isl_space_domain(isl_space_copy(space
));
6049 src
= graph_find_compressed_node(data
->ctx
, data
->graph
, dom
);
6050 isl_space_free(dom
);
6051 space
= isl_space_range(space
);
6052 dst
= graph_find_compressed_node(data
->ctx
, data
->graph
, space
);
6053 isl_space_free(space
);
6055 pos
= data
->carry_inter
? data
->pos
++ : -1;
6056 return add_inter_constraints(data
->graph
, src
, dst
, coef
, pos
);
6059 /* Add constraints to graph->lp that force all (conditional) validity
6060 * dependences to be respected and attempt to carry them.
6061 * "intra" is the sequence of coefficient constraints for intra-node edges.
6062 * "inter" is the sequence of coefficient constraints for inter-node edges.
6063 * "carry_inter" indicates whether inter-node edges should be carried or
6066 static isl_stat
add_all_constraints(isl_ctx
*ctx
, struct isl_sched_graph
*graph
,
6067 __isl_keep isl_basic_set_list
*intra
,
6068 __isl_keep isl_basic_set_list
*inter
, int carry_inter
)
6070 struct isl_add_all_constraints_data data
= { ctx
, graph
, carry_inter
};
6073 if (isl_basic_set_list_foreach(intra
, &lp_add_intra
, &data
) < 0)
6074 return isl_stat_error
;
6075 if (isl_basic_set_list_foreach(inter
, &lp_add_inter
, &data
) < 0)
6076 return isl_stat_error
;
6080 /* Internal data structure for count_all_constraints
6081 * for keeping track of the number of equality and inequality constraints.
6083 struct isl_sched_count
{
6088 /* Add the number of equality and inequality constraints of "bset"
6089 * to data->n_eq and data->n_ineq.
6091 static isl_stat
bset_update_count(__isl_take isl_basic_set
*bset
, void *user
)
6093 struct isl_sched_count
*data
= user
;
6095 return update_count(bset
, 1, &data
->n_eq
, &data
->n_ineq
);
6098 /* Count the number of equality and inequality constraints
6099 * that will be added to the carry_lp problem.
6100 * We count each edge exactly once.
6101 * "intra" is the sequence of coefficient constraints for intra-node edges.
6102 * "inter" is the sequence of coefficient constraints for inter-node edges.
6104 static isl_stat
count_all_constraints(__isl_keep isl_basic_set_list
*intra
,
6105 __isl_keep isl_basic_set_list
*inter
, int *n_eq
, int *n_ineq
)
6107 struct isl_sched_count data
;
6109 data
.n_eq
= data
.n_ineq
= 0;
6110 if (isl_basic_set_list_foreach(inter
, &bset_update_count
, &data
) < 0)
6111 return isl_stat_error
;
6112 if (isl_basic_set_list_foreach(intra
, &bset_update_count
, &data
) < 0)
6113 return isl_stat_error
;
6116 *n_ineq
= data
.n_ineq
;
6121 /* Construct an LP problem for finding schedule coefficients
6122 * such that the schedule carries as many validity dependences as possible.
6123 * In particular, for each dependence i, we bound the dependence distance
6124 * from below by e_i, with 0 <= e_i <= 1 and then maximize the sum
6125 * of all e_i's. Dependences with e_i = 0 in the solution are simply
6126 * respected, while those with e_i > 0 (in practice e_i = 1) are carried.
6127 * "intra" is the sequence of coefficient constraints for intra-node edges.
6128 * "inter" is the sequence of coefficient constraints for inter-node edges.
6129 * "n_edge" is the total number of edges.
6130 * "carry_inter" indicates whether inter-node edges should be carried or
6131 * only respected. That is, if "carry_inter" is not set, then
6132 * no e_i variables are introduced for the inter-node edges.
6134 * All variables of the LP are non-negative. The actual coefficients
6135 * may be negative, so each coefficient is represented as the difference
6136 * of two non-negative variables. The negative part always appears
6137 * immediately before the positive part.
6138 * Other than that, the variables have the following order
6140 * - sum of (1 - e_i) over all edges
6141 * - sum of all c_n coefficients
6142 * (unconstrained when computing non-parametric schedules)
6143 * - sum of positive and negative parts of all c_x coefficients
6147 * - positive and negative parts of c_i_x, in opposite order
6148 * - c_i_n (if parametric)
6151 * The constraints are those from the (validity) edges plus three equalities
6152 * to express the sums and n_edge inequalities to express e_i <= 1.
6154 static isl_stat
setup_carry_lp(isl_ctx
*ctx
, struct isl_sched_graph
*graph
,
6155 int n_edge
, __isl_keep isl_basic_set_list
*intra
,
6156 __isl_keep isl_basic_set_list
*inter
, int carry_inter
)
6165 for (i
= 0; i
< graph
->n
; ++i
) {
6166 struct isl_sched_node
*node
= &graph
->node
[graph
->sorted
[i
]];
6167 node
->start
= total
;
6168 total
+= 1 + node
->nparam
+ 2 * node
->nvar
;
6171 if (count_all_constraints(intra
, inter
, &n_eq
, &n_ineq
) < 0)
6172 return isl_stat_error
;
6174 dim
= isl_space_set_alloc(ctx
, 0, total
);
6175 isl_basic_set_free(graph
->lp
);
6178 graph
->lp
= isl_basic_set_alloc_space(dim
, 0, n_eq
, n_ineq
);
6179 graph
->lp
= isl_basic_set_set_rational(graph
->lp
);
6181 k
= isl_basic_set_alloc_equality(graph
->lp
);
6183 return isl_stat_error
;
6184 isl_seq_clr(graph
->lp
->eq
[k
], 1 + total
);
6185 isl_int_set_si(graph
->lp
->eq
[k
][0], -n_edge
);
6186 isl_int_set_si(graph
->lp
->eq
[k
][1], 1);
6187 for (i
= 0; i
< n_edge
; ++i
)
6188 isl_int_set_si(graph
->lp
->eq
[k
][4 + i
], 1);
6190 if (add_param_sum_constraint(graph
, 1) < 0)
6191 return isl_stat_error
;
6192 if (add_var_sum_constraint(graph
, 2) < 0)
6193 return isl_stat_error
;
6195 for (i
= 0; i
< n_edge
; ++i
) {
6196 k
= isl_basic_set_alloc_inequality(graph
->lp
);
6198 return isl_stat_error
;
6199 isl_seq_clr(graph
->lp
->ineq
[k
], 1 + total
);
6200 isl_int_set_si(graph
->lp
->ineq
[k
][4 + i
], -1);
6201 isl_int_set_si(graph
->lp
->ineq
[k
][0], 1);
6204 if (add_all_constraints(ctx
, graph
, intra
, inter
, carry_inter
) < 0)
6205 return isl_stat_error
;
6210 static __isl_give isl_schedule_node
*compute_component_schedule(
6211 __isl_take isl_schedule_node
*node
, struct isl_sched_graph
*graph
,
6214 /* If the schedule_split_scaled option is set and if the linear
6215 * parts of the scheduling rows for all nodes in the graphs have
6216 * a non-trivial common divisor, then remove this
6217 * common divisor from the linear part.
6218 * Otherwise, insert a band node directly and continue with
6219 * the construction of the schedule.
6221 * If a non-trivial common divisor is found, then
6222 * the linear part is reduced and the remainder is ignored.
6223 * The pieces of the graph that are assigned different remainders
6224 * form (groups of) strongly connected components within
6225 * the scaled down band. If needed, they can therefore
6226 * be ordered along this remainder in a sequence node.
6227 * However, this ordering is not enforced here in order to allow
6228 * the scheduler to combine some of the strongly connected components.
6230 static __isl_give isl_schedule_node
*split_scaled(
6231 __isl_take isl_schedule_node
*node
, struct isl_sched_graph
*graph
)
6241 ctx
= isl_schedule_node_get_ctx(node
);
6242 if (!ctx
->opt
->schedule_split_scaled
)
6243 return compute_next_band(node
, graph
, 0);
6245 return compute_next_band(node
, graph
, 0);
6248 isl_int_init(gcd_i
);
6250 isl_int_set_si(gcd
, 0);
6252 row
= isl_mat_rows(graph
->node
[0].sched
) - 1;
6254 for (i
= 0; i
< graph
->n
; ++i
) {
6255 struct isl_sched_node
*node
= &graph
->node
[i
];
6256 int cols
= isl_mat_cols(node
->sched
);
6258 isl_seq_gcd(node
->sched
->row
[row
] + 1, cols
- 1, &gcd_i
);
6259 isl_int_gcd(gcd
, gcd
, gcd_i
);
6262 isl_int_clear(gcd_i
);
6264 if (isl_int_cmp_si(gcd
, 1) <= 0) {
6266 return compute_next_band(node
, graph
, 0);
6269 for (i
= 0; i
< graph
->n
; ++i
) {
6270 struct isl_sched_node
*node
= &graph
->node
[i
];
6272 isl_int_fdiv_q(node
->sched
->row
[row
][0],
6273 node
->sched
->row
[row
][0], gcd
);
6274 isl_int_mul(node
->sched
->row
[row
][0],
6275 node
->sched
->row
[row
][0], gcd
);
6276 node
->sched
= isl_mat_scale_down_row(node
->sched
, row
, gcd
);
6283 return compute_next_band(node
, graph
, 0);
6286 return isl_schedule_node_free(node
);
6289 /* Is the schedule row "sol" trivial on node "node"?
6290 * That is, is the solution zero on the dimensions linearly independent of
6291 * the previously found solutions?
6292 * Return 1 if the solution is trivial, 0 if it is not and -1 on error.
6294 * Each coefficient is represented as the difference between
6295 * two non-negative values in "sol".
6296 * We construct the schedule row s and check if it is linearly
6297 * independent of previously computed schedule rows
6298 * by computing T s, with T the linear combinations that are zero
6299 * on linearly dependent schedule rows.
6300 * If the result consists of all zeros, then the solution is trivial.
6302 static int is_trivial(struct isl_sched_node
*node
, __isl_keep isl_vec
*sol
)
6309 if (node
->nvar
== node
->rank
)
6312 node_sol
= extract_var_coef(node
, sol
);
6313 node_sol
= isl_mat_vec_product(isl_mat_copy(node
->indep
), node_sol
);
6317 trivial
= isl_seq_first_non_zero(node_sol
->el
,
6318 node
->nvar
- node
->rank
) == -1;
6320 isl_vec_free(node_sol
);
6325 /* Is the schedule row "sol" trivial on any node where it should
6327 * Return 1 if any solution is trivial, 0 if they are not and -1 on error.
6329 static int is_any_trivial(struct isl_sched_graph
*graph
,
6330 __isl_keep isl_vec
*sol
)
6334 for (i
= 0; i
< graph
->n
; ++i
) {
6335 struct isl_sched_node
*node
= &graph
->node
[i
];
6338 if (!needs_row(graph
, node
))
6340 trivial
= is_trivial(node
, sol
);
6341 if (trivial
< 0 || trivial
)
6348 /* Does the schedule represented by "sol" perform loop coalescing on "node"?
6349 * If so, return the position of the coalesced dimension.
6350 * Otherwise, return node->nvar or -1 on error.
6352 * In particular, look for pairs of coefficients c_i and c_j such that
6353 * |c_j/c_i| > ceil(size_i/2), i.e., |c_j| > |c_i * ceil(size_i/2)|.
6354 * If any such pair is found, then return i.
6355 * If size_i is infinity, then no check on c_i needs to be performed.
6357 static int find_node_coalescing(struct isl_sched_node
*node
,
6358 __isl_keep isl_vec
*sol
)
6364 if (node
->nvar
<= 1)
6367 csol
= extract_var_coef(node
, sol
);
6371 for (i
= 0; i
< node
->nvar
; ++i
) {
6374 if (isl_int_is_zero(csol
->el
[i
]))
6376 v
= isl_multi_val_get_val(node
->sizes
, i
);
6379 if (!isl_val_is_int(v
)) {
6383 v
= isl_val_div_ui(v
, 2);
6384 v
= isl_val_ceil(v
);
6387 isl_int_mul(max
, v
->n
, csol
->el
[i
]);
6390 for (j
= 0; j
< node
->nvar
; ++j
) {
6393 if (isl_int_abs_gt(csol
->el
[j
], max
))
6409 /* Force the schedule coefficient at position "pos" of "node" to be zero
6411 * The coefficient is encoded as the difference between two non-negative
6412 * variables. Force these two variables to have the same value.
6414 static __isl_give isl_tab_lexmin
*zero_out_node_coef(
6415 __isl_take isl_tab_lexmin
*tl
, struct isl_sched_node
*node
, int pos
)
6421 ctx
= isl_space_get_ctx(node
->space
);
6422 dim
= isl_tab_lexmin_dim(tl
);
6424 return isl_tab_lexmin_free(tl
);
6425 eq
= isl_vec_alloc(ctx
, 1 + dim
);
6426 eq
= isl_vec_clr(eq
);
6428 return isl_tab_lexmin_free(tl
);
6430 pos
= 1 + node_var_coef_pos(node
, pos
);
6431 isl_int_set_si(eq
->el
[pos
], 1);
6432 isl_int_set_si(eq
->el
[pos
+ 1], -1);
6433 tl
= isl_tab_lexmin_add_eq(tl
, eq
->el
);
6439 /* Return the lexicographically smallest rational point in the basic set
6440 * from which "tl" was constructed, double checking that this input set
6443 static __isl_give isl_vec
*non_empty_solution(__isl_keep isl_tab_lexmin
*tl
)
6447 sol
= isl_tab_lexmin_get_solution(tl
);
6451 isl_die(isl_vec_get_ctx(sol
), isl_error_internal
,
6452 "error in schedule construction",
6453 return isl_vec_free(sol
));
6457 /* Does the solution "sol" of the LP problem constructed by setup_carry_lp
6458 * carry any of the "n_edge" groups of dependences?
6459 * The value in the first position is the sum of (1 - e_i) over all "n_edge"
6460 * edges, with 0 <= e_i <= 1 equal to 1 when the dependences represented
6461 * by the edge are carried by the solution.
6462 * If the sum of the (1 - e_i) is smaller than "n_edge" then at least
6463 * one of those is carried.
6465 * Note that despite the fact that the problem is solved using a rational
6466 * solver, the solution is guaranteed to be integral.
6467 * Specifically, the dependence distance lower bounds e_i (and therefore
6468 * also their sum) are integers. See Lemma 5 of [1].
6470 * Any potential denominator of the sum is cleared by this function.
6471 * The denominator is not relevant for any of the other elements
6474 * [1] P. Feautrier, Some Efficient Solutions to the Affine Scheduling
6475 * Problem, Part II: Multi-Dimensional Time.
6476 * In Intl. Journal of Parallel Programming, 1992.
6478 static int carries_dependences(__isl_keep isl_vec
*sol
, int n_edge
)
6480 isl_int_divexact(sol
->el
[1], sol
->el
[1], sol
->el
[0]);
6481 isl_int_set_si(sol
->el
[0], 1);
6482 return isl_int_cmp_si(sol
->el
[1], n_edge
) < 0;
6485 /* Return the lexicographically smallest rational point in "lp",
6486 * assuming that all variables are non-negative and performing some
6487 * additional sanity checks.
6488 * If "want_integral" is set, then compute the lexicographically smallest
6489 * integer point instead.
6490 * In particular, "lp" should not be empty by construction.
6491 * Double check that this is the case.
6492 * If dependences are not carried for any of the "n_edge" edges,
6493 * then return an empty vector.
6495 * If the schedule_treat_coalescing option is set and
6496 * if the computed schedule performs loop coalescing on a given node,
6497 * i.e., if it is of the form
6499 * c_i i + c_j j + ...
6501 * with |c_j/c_i| >= size_i, then force the coefficient c_i to be zero
6502 * to cut out this solution. Repeat this process until no more loop
6503 * coalescing occurs or until no more dependences can be carried.
6504 * In the latter case, revert to the previously computed solution.
6506 * If the caller requests an integral solution and if coalescing should
6507 * be treated, then perform the coalescing treatment first as
6508 * an integral solution computed before coalescing treatment
6509 * would carry the same number of edges and would therefore probably
6510 * also be coalescing.
6512 * To allow the coalescing treatment to be performed first,
6513 * the initial solution is allowed to be rational and it is only
6514 * cut out (if needed) in the next iteration, if no coalescing measures
6517 static __isl_give isl_vec
*non_neg_lexmin(struct isl_sched_graph
*graph
,
6518 __isl_take isl_basic_set
*lp
, int n_edge
, int want_integral
)
6523 isl_vec
*sol
= NULL
, *prev
;
6524 int treat_coalescing
;
6529 ctx
= isl_basic_set_get_ctx(lp
);
6530 treat_coalescing
= isl_options_get_schedule_treat_coalescing(ctx
);
6531 tl
= isl_tab_lexmin_from_basic_set(lp
);
6539 tl
= isl_tab_lexmin_cut_to_integer(tl
);
6541 sol
= non_empty_solution(tl
);
6545 integral
= isl_int_is_one(sol
->el
[0]);
6546 if (!carries_dependences(sol
, n_edge
)) {
6548 prev
= isl_vec_alloc(ctx
, 0);
6553 prev
= isl_vec_free(prev
);
6554 cut
= want_integral
&& !integral
;
6557 if (!treat_coalescing
)
6559 for (i
= 0; i
< graph
->n
; ++i
) {
6560 struct isl_sched_node
*node
= &graph
->node
[i
];
6562 pos
= find_node_coalescing(node
, sol
);
6565 if (pos
< node
->nvar
)
6570 tl
= zero_out_node_coef(tl
, &graph
->node
[i
], pos
);
6573 } while (try_again
);
6575 isl_tab_lexmin_free(tl
);
6579 isl_tab_lexmin_free(tl
);
6585 /* If "edge" is an edge from a node to itself, then add the corresponding
6586 * dependence relation to "umap".
6587 * If "node" has been compressed, then the dependence relation
6588 * is also compressed first.
6590 static __isl_give isl_union_map
*add_intra(__isl_take isl_union_map
*umap
,
6591 struct isl_sched_edge
*edge
)
6594 struct isl_sched_node
*node
= edge
->src
;
6596 if (edge
->src
!= edge
->dst
)
6599 map
= isl_map_copy(edge
->map
);
6600 if (node
->compressed
) {
6601 map
= isl_map_preimage_domain_multi_aff(map
,
6602 isl_multi_aff_copy(node
->decompress
));
6603 map
= isl_map_preimage_range_multi_aff(map
,
6604 isl_multi_aff_copy(node
->decompress
));
6606 umap
= isl_union_map_add_map(umap
, map
);
6610 /* If "edge" is an edge from a node to another node, then add the corresponding
6611 * dependence relation to "umap".
6612 * If the source or destination nodes of "edge" have been compressed,
6613 * then the dependence relation is also compressed first.
6615 static __isl_give isl_union_map
*add_inter(__isl_take isl_union_map
*umap
,
6616 struct isl_sched_edge
*edge
)
6620 if (edge
->src
== edge
->dst
)
6623 map
= isl_map_copy(edge
->map
);
6624 if (edge
->src
->compressed
)
6625 map
= isl_map_preimage_domain_multi_aff(map
,
6626 isl_multi_aff_copy(edge
->src
->decompress
));
6627 if (edge
->dst
->compressed
)
6628 map
= isl_map_preimage_range_multi_aff(map
,
6629 isl_multi_aff_copy(edge
->dst
->decompress
));
6630 umap
= isl_union_map_add_map(umap
, map
);
6634 /* Internal data structure used by union_drop_coalescing_constraints
6635 * to collect bounds on all relevant statements.
6637 * "graph" is the schedule constraint graph for which an LP problem
6638 * is being constructed.
6639 * "bounds" collects the bounds.
6641 struct isl_collect_bounds_data
{
6643 struct isl_sched_graph
*graph
;
6644 isl_union_set
*bounds
;
6647 /* Add the size bounds for the node with instance deltas in "set"
6650 static isl_stat
collect_bounds(__isl_take isl_set
*set
, void *user
)
6652 struct isl_collect_bounds_data
*data
= user
;
6653 struct isl_sched_node
*node
;
6657 space
= isl_set_get_space(set
);
6660 node
= graph_find_compressed_node(data
->ctx
, data
->graph
, space
);
6661 isl_space_free(space
);
6663 bounds
= isl_set_from_basic_set(get_size_bounds(node
));
6664 data
->bounds
= isl_union_set_add_set(data
->bounds
, bounds
);
6669 /* Drop some constraints from "delta" that could be exploited
6670 * to construct loop coalescing schedules.
6671 * In particular, drop those constraint that bound the difference
6672 * to the size of the domain.
6673 * Do this for each set/node in "delta" separately.
6674 * The parameters are assumed to have been projected out by the caller.
6676 static __isl_give isl_union_set
*union_drop_coalescing_constraints(isl_ctx
*ctx
,
6677 struct isl_sched_graph
*graph
, __isl_take isl_union_set
*delta
)
6679 struct isl_collect_bounds_data data
= { ctx
, graph
};
6681 data
.bounds
= isl_union_set_empty(isl_space_params_alloc(ctx
, 0));
6682 if (isl_union_set_foreach_set(delta
, &collect_bounds
, &data
) < 0)
6683 data
.bounds
= isl_union_set_free(data
.bounds
);
6684 delta
= isl_union_set_plain_gist(delta
, data
.bounds
);
6689 /* Given a non-trivial lineality space "lineality", add the corresponding
6690 * universe set to data->mask and add a map from elements to
6691 * other elements along the lines in "lineality" to data->equivalent.
6692 * If this is the first time this function gets called
6693 * (data->any_non_trivial is still false), then set data->any_non_trivial and
6694 * initialize data->mask and data->equivalent.
6696 * In particular, if the lineality space is defined by equality constraints
6700 * then construct an affine mapping
6704 * and compute the equivalence relation of having the same image under f:
6706 * { x -> x' : E x = E x' }
6708 static isl_stat
add_non_trivial_lineality(__isl_take isl_basic_set
*lineality
,
6709 struct isl_exploit_lineality_data
*data
)
6715 isl_multi_pw_aff
*mpa
;
6720 return isl_stat_error
;
6721 if (isl_basic_set_dim(lineality
, isl_dim_div
) != 0)
6722 isl_die(isl_basic_set_get_ctx(lineality
), isl_error_internal
,
6723 "local variables not allowed", goto error
);
6725 space
= isl_basic_set_get_space(lineality
);
6726 if (!data
->any_non_trivial
) {
6727 data
->equivalent
= isl_union_map_empty(isl_space_copy(space
));
6728 data
->mask
= isl_union_set_empty(isl_space_copy(space
));
6730 data
->any_non_trivial
= isl_bool_true
;
6732 univ
= isl_set_universe(isl_space_copy(space
));
6733 data
->mask
= isl_union_set_add_set(data
->mask
, univ
);
6735 eq
= isl_basic_set_extract_equalities(lineality
);
6736 n
= isl_mat_rows(eq
);
6737 eq
= isl_mat_insert_zero_rows(eq
, 0, 1);
6738 eq
= isl_mat_set_element_si(eq
, 0, 0, 1);
6739 space
= isl_space_from_domain(space
);
6740 space
= isl_space_add_dims(space
, isl_dim_out
, n
);
6741 ma
= isl_multi_aff_from_aff_mat(space
, eq
);
6742 mpa
= isl_multi_pw_aff_from_multi_aff(ma
);
6743 map
= isl_multi_pw_aff_eq_map(mpa
, isl_multi_pw_aff_copy(mpa
));
6744 data
->equivalent
= isl_union_map_add_map(data
->equivalent
, map
);
6746 isl_basic_set_free(lineality
);
6749 isl_basic_set_free(lineality
);
6750 return isl_stat_error
;
6753 /* Check if the lineality space "set" is non-trivial (i.e., is not just
6754 * the origin or, in other words, satisfies a number of equality constraints
6755 * that is smaller than the dimension of the set).
6756 * If so, extend data->mask and data->equivalent accordingly.
6758 * The input should not have any local variables already, but
6759 * isl_set_remove_divs is called to make sure it does not.
6761 static isl_stat
add_lineality(__isl_take isl_set
*set
, void *user
)
6763 struct isl_exploit_lineality_data
*data
= user
;
6764 isl_basic_set
*hull
;
6767 set
= isl_set_remove_divs(set
);
6768 hull
= isl_set_unshifted_simple_hull(set
);
6769 dim
= isl_basic_set_dim(hull
, isl_dim_set
);
6770 n_eq
= isl_basic_set_n_equality(hull
);
6772 return isl_stat_error
;
6774 return add_non_trivial_lineality(hull
, data
);
6775 isl_basic_set_free(hull
);
6779 /* Check if the difference set on intra-node schedule constraints "intra"
6780 * has any non-trivial lineality space.
6781 * If so, then extend the difference set to a difference set
6782 * on equivalent elements. That is, if "intra" is
6784 * { y - x : (x,y) \in V }
6786 * and elements are equivalent if they have the same image under f,
6789 * { y' - x' : (x,y) \in V and f(x) = f(x') and f(y) = f(y') }
6791 * or, since f is linear,
6793 * { y' - x' : (x,y) \in V and f(y - x) = f(y' - x') }
6795 * The results of the search for non-trivial lineality spaces is stored
6798 static __isl_give isl_union_set
*exploit_intra_lineality(
6799 __isl_take isl_union_set
*intra
,
6800 struct isl_exploit_lineality_data
*data
)
6802 isl_union_set
*lineality
;
6803 isl_union_set
*uset
;
6805 data
->any_non_trivial
= isl_bool_false
;
6806 lineality
= isl_union_set_copy(intra
);
6807 lineality
= isl_union_set_combined_lineality_space(lineality
);
6808 if (isl_union_set_foreach_set(lineality
, &add_lineality
, data
) < 0)
6809 data
->any_non_trivial
= isl_bool_error
;
6810 isl_union_set_free(lineality
);
6812 if (data
->any_non_trivial
< 0)
6813 return isl_union_set_free(intra
);
6814 if (!data
->any_non_trivial
)
6817 uset
= isl_union_set_copy(intra
);
6818 intra
= isl_union_set_subtract(intra
, isl_union_set_copy(data
->mask
));
6819 uset
= isl_union_set_apply(uset
, isl_union_map_copy(data
->equivalent
));
6820 intra
= isl_union_set_union(intra
, uset
);
6822 intra
= isl_union_set_remove_divs(intra
);
6827 /* If the difference set on intra-node schedule constraints was found to have
6828 * any non-trivial lineality space by exploit_intra_lineality,
6829 * as recorded in "data", then extend the inter-node
6830 * schedule constraints "inter" to schedule constraints on equivalent elements.
6831 * That is, if "inter" is V and
6832 * elements are equivalent if they have the same image under f, then return
6834 * { (x', y') : (x,y) \in V and f(x) = f(x') and f(y) = f(y') }
6836 static __isl_give isl_union_map
*exploit_inter_lineality(
6837 __isl_take isl_union_map
*inter
,
6838 struct isl_exploit_lineality_data
*data
)
6840 isl_union_map
*umap
;
6842 if (data
->any_non_trivial
< 0)
6843 return isl_union_map_free(inter
);
6844 if (!data
->any_non_trivial
)
6847 umap
= isl_union_map_copy(inter
);
6848 inter
= isl_union_map_subtract_range(inter
,
6849 isl_union_set_copy(data
->mask
));
6850 umap
= isl_union_map_apply_range(umap
,
6851 isl_union_map_copy(data
->equivalent
));
6852 inter
= isl_union_map_union(inter
, umap
);
6853 umap
= isl_union_map_copy(inter
);
6854 inter
= isl_union_map_subtract_domain(inter
,
6855 isl_union_set_copy(data
->mask
));
6856 umap
= isl_union_map_apply_range(isl_union_map_copy(data
->equivalent
),
6858 inter
= isl_union_map_union(inter
, umap
);
6860 inter
= isl_union_map_remove_divs(inter
);
6865 /* For each (conditional) validity edge in "graph",
6866 * add the corresponding dependence relation using "add"
6867 * to a collection of dependence relations and return the result.
6868 * If "coincidence" is set, then coincidence edges are considered as well.
6870 static __isl_give isl_union_map
*collect_validity(struct isl_sched_graph
*graph
,
6871 __isl_give isl_union_map
*(*add
)(__isl_take isl_union_map
*umap
,
6872 struct isl_sched_edge
*edge
), int coincidence
)
6876 isl_union_map
*umap
;
6878 space
= isl_space_copy(graph
->node
[0].space
);
6879 umap
= isl_union_map_empty(space
);
6881 for (i
= 0; i
< graph
->n_edge
; ++i
) {
6882 struct isl_sched_edge
*edge
= &graph
->edge
[i
];
6884 if (!is_any_validity(edge
) &&
6885 (!coincidence
|| !is_coincidence(edge
)))
6888 umap
= add(umap
, edge
);
6894 /* Project out all parameters from "uset" and return the result.
6896 static __isl_give isl_union_set
*union_set_drop_parameters(
6897 __isl_take isl_union_set
*uset
)
6901 nparam
= isl_union_set_dim(uset
, isl_dim_param
);
6902 return isl_union_set_project_out(uset
, isl_dim_param
, 0, nparam
);
6905 /* For each dependence relation on a (conditional) validity edge
6906 * from a node to itself,
6907 * construct the set of coefficients of valid constraints for elements
6908 * in that dependence relation and collect the results.
6909 * If "coincidence" is set, then coincidence edges are considered as well.
6911 * In particular, for each dependence relation R, constraints
6912 * on coefficients (c_0, c_x) are constructed such that
6914 * c_0 + c_x d >= 0 for each d in delta R = { y - x | (x,y) in R }
6916 * If the schedule_treat_coalescing option is set, then some constraints
6917 * that could be exploited to construct coalescing schedules
6918 * are removed before the dual is computed, but after the parameters
6919 * have been projected out.
6920 * The entire computation is essentially the same as that performed
6921 * by intra_coefficients, except that it operates on multiple
6922 * edges together and that the parameters are always projected out.
6924 * Additionally, exploit any non-trivial lineality space
6925 * in the difference set after removing coalescing constraints and
6926 * store the results of the non-trivial lineality space detection in "data".
6927 * The procedure is currently run unconditionally, but it is unlikely
6928 * to find any non-trivial lineality spaces if no coalescing constraints
6929 * have been removed.
6931 * Note that if a dependence relation is a union of basic maps,
6932 * then each basic map needs to be treated individually as it may only
6933 * be possible to carry the dependences expressed by some of those
6934 * basic maps and not all of them.
6935 * The collected validity constraints are therefore not coalesced and
6936 * it is assumed that they are not coalesced automatically.
6937 * Duplicate basic maps can be removed, however.
6938 * In particular, if the same basic map appears as a disjunct
6939 * in multiple edges, then it only needs to be carried once.
6941 static __isl_give isl_basic_set_list
*collect_intra_validity(isl_ctx
*ctx
,
6942 struct isl_sched_graph
*graph
, int coincidence
,
6943 struct isl_exploit_lineality_data
*data
)
6945 isl_union_map
*intra
;
6946 isl_union_set
*delta
;
6947 isl_basic_set_list
*list
;
6949 intra
= collect_validity(graph
, &add_intra
, coincidence
);
6950 delta
= isl_union_map_deltas(intra
);
6951 delta
= union_set_drop_parameters(delta
);
6952 delta
= isl_union_set_remove_divs(delta
);
6953 if (isl_options_get_schedule_treat_coalescing(ctx
))
6954 delta
= union_drop_coalescing_constraints(ctx
, graph
, delta
);
6955 delta
= exploit_intra_lineality(delta
, data
);
6956 list
= isl_union_set_get_basic_set_list(delta
);
6957 isl_union_set_free(delta
);
6959 return isl_basic_set_list_coefficients(list
);
6962 /* For each dependence relation on a (conditional) validity edge
6963 * from a node to some other node,
6964 * construct the set of coefficients of valid constraints for elements
6965 * in that dependence relation and collect the results.
6966 * If "coincidence" is set, then coincidence edges are considered as well.
6968 * In particular, for each dependence relation R, constraints
6969 * on coefficients (c_0, c_n, c_x, c_y) are constructed such that
6971 * c_0 + c_n n + c_x x + c_y y >= 0 for each (x,y) in R
6973 * This computation is essentially the same as that performed
6974 * by inter_coefficients, except that it operates on multiple
6977 * Additionally, exploit any non-trivial lineality space
6978 * that may have been discovered by collect_intra_validity
6979 * (as stored in "data").
6981 * Note that if a dependence relation is a union of basic maps,
6982 * then each basic map needs to be treated individually as it may only
6983 * be possible to carry the dependences expressed by some of those
6984 * basic maps and not all of them.
6985 * The collected validity constraints are therefore not coalesced and
6986 * it is assumed that they are not coalesced automatically.
6987 * Duplicate basic maps can be removed, however.
6988 * In particular, if the same basic map appears as a disjunct
6989 * in multiple edges, then it only needs to be carried once.
6991 static __isl_give isl_basic_set_list
*collect_inter_validity(
6992 struct isl_sched_graph
*graph
, int coincidence
,
6993 struct isl_exploit_lineality_data
*data
)
6995 isl_union_map
*inter
;
6996 isl_union_set
*wrap
;
6997 isl_basic_set_list
*list
;
6999 inter
= collect_validity(graph
, &add_inter
, coincidence
);
7000 inter
= exploit_inter_lineality(inter
, data
);
7001 inter
= isl_union_map_remove_divs(inter
);
7002 wrap
= isl_union_map_wrap(inter
);
7003 list
= isl_union_set_get_basic_set_list(wrap
);
7004 isl_union_set_free(wrap
);
7005 return isl_basic_set_list_coefficients(list
);
7008 /* Construct an LP problem for finding schedule coefficients
7009 * such that the schedule carries as many of the "n_edge" groups of
7010 * dependences as possible based on the corresponding coefficient
7011 * constraints and return the lexicographically smallest non-trivial solution.
7012 * "intra" is the sequence of coefficient constraints for intra-node edges.
7013 * "inter" is the sequence of coefficient constraints for inter-node edges.
7014 * If "want_integral" is set, then compute an integral solution
7015 * for the coefficients rather than using the numerators
7016 * of a rational solution.
7017 * "carry_inter" indicates whether inter-node edges should be carried or
7020 * If none of the "n_edge" groups can be carried
7021 * then return an empty vector.
7023 static __isl_give isl_vec
*compute_carrying_sol_coef(isl_ctx
*ctx
,
7024 struct isl_sched_graph
*graph
, int n_edge
,
7025 __isl_keep isl_basic_set_list
*intra
,
7026 __isl_keep isl_basic_set_list
*inter
, int want_integral
,
7031 if (setup_carry_lp(ctx
, graph
, n_edge
, intra
, inter
, carry_inter
) < 0)
7034 lp
= isl_basic_set_copy(graph
->lp
);
7035 return non_neg_lexmin(graph
, lp
, n_edge
, want_integral
);
7038 /* Construct an LP problem for finding schedule coefficients
7039 * such that the schedule carries as many of the validity dependences
7041 * return the lexicographically smallest non-trivial solution.
7042 * If "fallback" is set, then the carrying is performed as a fallback
7043 * for the Pluto-like scheduler.
7044 * If "coincidence" is set, then try and carry coincidence edges as well.
7046 * The variable "n_edge" stores the number of groups that should be carried.
7047 * If none of the "n_edge" groups can be carried
7048 * then return an empty vector.
7049 * If, moreover, "n_edge" is zero, then the LP problem does not even
7050 * need to be constructed.
7052 * If a fallback solution is being computed, then compute an integral solution
7053 * for the coefficients rather than using the numerators
7054 * of a rational solution.
7056 * If a fallback solution is being computed, if there are any intra-node
7057 * dependences, and if requested by the user, then first try
7058 * to only carry those intra-node dependences.
7059 * If this fails to carry any dependences, then try again
7060 * with the inter-node dependences included.
7062 static __isl_give isl_vec
*compute_carrying_sol(isl_ctx
*ctx
,
7063 struct isl_sched_graph
*graph
, int fallback
, int coincidence
)
7065 int n_intra
, n_inter
;
7067 struct isl_carry carry
= { 0 };
7070 carry
.intra
= collect_intra_validity(ctx
, graph
, coincidence
,
7072 carry
.inter
= collect_inter_validity(graph
, coincidence
,
7074 if (!carry
.intra
|| !carry
.inter
)
7076 n_intra
= isl_basic_set_list_n_basic_set(carry
.intra
);
7077 n_inter
= isl_basic_set_list_n_basic_set(carry
.inter
);
7079 if (fallback
&& n_intra
> 0 &&
7080 isl_options_get_schedule_carry_self_first(ctx
)) {
7081 sol
= compute_carrying_sol_coef(ctx
, graph
, n_intra
,
7082 carry
.intra
, carry
.inter
, fallback
, 0);
7083 if (!sol
|| sol
->size
!= 0 || n_inter
== 0) {
7084 isl_carry_clear(&carry
);
7090 n_edge
= n_intra
+ n_inter
;
7092 isl_carry_clear(&carry
);
7093 return isl_vec_alloc(ctx
, 0);
7096 sol
= compute_carrying_sol_coef(ctx
, graph
, n_edge
,
7097 carry
.intra
, carry
.inter
, fallback
, 1);
7098 isl_carry_clear(&carry
);
7101 isl_carry_clear(&carry
);
7105 /* Construct a schedule row for each node such that as many validity dependences
7106 * as possible are carried and then continue with the next band.
7107 * If "fallback" is set, then the carrying is performed as a fallback
7108 * for the Pluto-like scheduler.
7109 * If "coincidence" is set, then try and carry coincidence edges as well.
7111 * If there are no validity dependences, then no dependence can be carried and
7112 * the procedure is guaranteed to fail. If there is more than one component,
7113 * then try computing a schedule on each component separately
7114 * to prevent or at least postpone this failure.
7116 * If a schedule row is computed, then check that dependences are carried
7117 * for at least one of the edges.
7119 * If the computed schedule row turns out to be trivial on one or
7120 * more nodes where it should not be trivial, then we throw it away
7121 * and try again on each component separately.
7123 * If there is only one component, then we accept the schedule row anyway,
7124 * but we do not consider it as a complete row and therefore do not
7125 * increment graph->n_row. Note that the ranks of the nodes that
7126 * do get a non-trivial schedule part will get updated regardless and
7127 * graph->maxvar is computed based on these ranks. The test for
7128 * whether more schedule rows are required in compute_schedule_wcc
7129 * is therefore not affected.
7131 * Insert a band corresponding to the schedule row at position "node"
7132 * of the schedule tree and continue with the construction of the schedule.
7133 * This insertion and the continued construction is performed by split_scaled
7134 * after optionally checking for non-trivial common divisors.
7136 static __isl_give isl_schedule_node
*carry(__isl_take isl_schedule_node
*node
,
7137 struct isl_sched_graph
*graph
, int fallback
, int coincidence
)
7146 ctx
= isl_schedule_node_get_ctx(node
);
7147 sol
= compute_carrying_sol(ctx
, graph
, fallback
, coincidence
);
7149 return isl_schedule_node_free(node
);
7150 if (sol
->size
== 0) {
7153 return compute_component_schedule(node
, graph
, 1);
7154 isl_die(ctx
, isl_error_unknown
, "unable to carry dependences",
7155 return isl_schedule_node_free(node
));
7158 trivial
= is_any_trivial(graph
, sol
);
7160 sol
= isl_vec_free(sol
);
7161 } else if (trivial
&& graph
->scc
> 1) {
7163 return compute_component_schedule(node
, graph
, 1);
7166 if (update_schedule(graph
, sol
, 0) < 0)
7167 return isl_schedule_node_free(node
);
7171 return split_scaled(node
, graph
);
7174 /* Construct a schedule row for each node such that as many validity dependences
7175 * as possible are carried and then continue with the next band.
7176 * Do so as a fallback for the Pluto-like scheduler.
7177 * If "coincidence" is set, then try and carry coincidence edges as well.
7179 static __isl_give isl_schedule_node
*carry_fallback(
7180 __isl_take isl_schedule_node
*node
, struct isl_sched_graph
*graph
,
7183 return carry(node
, graph
, 1, coincidence
);
7186 /* Construct a schedule row for each node such that as many validity dependences
7187 * as possible are carried and then continue with the next band.
7188 * Do so for the case where the Feautrier scheduler was selected
7191 static __isl_give isl_schedule_node
*carry_feautrier(
7192 __isl_take isl_schedule_node
*node
, struct isl_sched_graph
*graph
)
7194 return carry(node
, graph
, 0, 0);
7197 /* Construct a schedule row for each node such that as many validity dependences
7198 * as possible are carried and then continue with the next band.
7199 * Do so as a fallback for the Pluto-like scheduler.
7201 static __isl_give isl_schedule_node
*carry_dependences(
7202 __isl_take isl_schedule_node
*node
, struct isl_sched_graph
*graph
)
7204 return carry_fallback(node
, graph
, 0);
7207 /* Construct a schedule row for each node such that as many validity or
7208 * coincidence dependences as possible are carried and
7209 * then continue with the next band.
7210 * Do so as a fallback for the Pluto-like scheduler.
7212 static __isl_give isl_schedule_node
*carry_coincidence(
7213 __isl_take isl_schedule_node
*node
, struct isl_sched_graph
*graph
)
7215 return carry_fallback(node
, graph
, 1);
7218 /* Topologically sort statements mapped to the same schedule iteration
7219 * and add insert a sequence node in front of "node"
7220 * corresponding to this order.
7221 * If "initialized" is set, then it may be assumed that compute_maxvar
7222 * has been called on the current band. Otherwise, call
7223 * compute_maxvar if and before carry_dependences gets called.
7225 * If it turns out to be impossible to sort the statements apart,
7226 * because different dependences impose different orderings
7227 * on the statements, then we extend the schedule such that
7228 * it carries at least one more dependence.
7230 static __isl_give isl_schedule_node
*sort_statements(
7231 __isl_take isl_schedule_node
*node
, struct isl_sched_graph
*graph
,
7235 isl_union_set_list
*filters
;
7240 ctx
= isl_schedule_node_get_ctx(node
);
7242 isl_die(ctx
, isl_error_internal
,
7243 "graph should have at least one node",
7244 return isl_schedule_node_free(node
));
7249 if (update_edges(ctx
, graph
) < 0)
7250 return isl_schedule_node_free(node
);
7252 if (graph
->n_edge
== 0)
7255 if (detect_sccs(ctx
, graph
) < 0)
7256 return isl_schedule_node_free(node
);
7259 if (graph
->scc
< graph
->n
) {
7260 if (!initialized
&& compute_maxvar(graph
) < 0)
7261 return isl_schedule_node_free(node
);
7262 return carry_dependences(node
, graph
);
7265 filters
= extract_sccs(ctx
, graph
);
7266 node
= isl_schedule_node_insert_sequence(node
, filters
);
7271 /* Are there any (non-empty) (conditional) validity edges in the graph?
7273 static int has_validity_edges(struct isl_sched_graph
*graph
)
7277 for (i
= 0; i
< graph
->n_edge
; ++i
) {
7280 empty
= isl_map_plain_is_empty(graph
->edge
[i
].map
);
7285 if (is_any_validity(&graph
->edge
[i
]))
7292 /* Should we apply a Feautrier step?
7293 * That is, did the user request the Feautrier algorithm and are
7294 * there any validity dependences (left)?
7296 static int need_feautrier_step(isl_ctx
*ctx
, struct isl_sched_graph
*graph
)
7298 if (ctx
->opt
->schedule_algorithm
!= ISL_SCHEDULE_ALGORITHM_FEAUTRIER
)
7301 return has_validity_edges(graph
);
7304 /* Compute a schedule for a connected dependence graph using Feautrier's
7305 * multi-dimensional scheduling algorithm and return the updated schedule node.
7307 * The original algorithm is described in [1].
7308 * The main idea is to minimize the number of scheduling dimensions, by
7309 * trying to satisfy as many dependences as possible per scheduling dimension.
7311 * [1] P. Feautrier, Some Efficient Solutions to the Affine Scheduling
7312 * Problem, Part II: Multi-Dimensional Time.
7313 * In Intl. Journal of Parallel Programming, 1992.
7315 static __isl_give isl_schedule_node
*compute_schedule_wcc_feautrier(
7316 isl_schedule_node
*node
, struct isl_sched_graph
*graph
)
7318 return carry_feautrier(node
, graph
);
7321 /* Turn off the "local" bit on all (condition) edges.
7323 static void clear_local_edges(struct isl_sched_graph
*graph
)
7327 for (i
= 0; i
< graph
->n_edge
; ++i
)
7328 if (is_condition(&graph
->edge
[i
]))
7329 clear_local(&graph
->edge
[i
]);
7332 /* Does "graph" have both condition and conditional validity edges?
7334 static int need_condition_check(struct isl_sched_graph
*graph
)
7337 int any_condition
= 0;
7338 int any_conditional_validity
= 0;
7340 for (i
= 0; i
< graph
->n_edge
; ++i
) {
7341 if (is_condition(&graph
->edge
[i
]))
7343 if (is_conditional_validity(&graph
->edge
[i
]))
7344 any_conditional_validity
= 1;
7347 return any_condition
&& any_conditional_validity
;
7350 /* Does "graph" contain any coincidence edge?
7352 static int has_any_coincidence(struct isl_sched_graph
*graph
)
7356 for (i
= 0; i
< graph
->n_edge
; ++i
)
7357 if (is_coincidence(&graph
->edge
[i
]))
7363 /* Extract the final schedule row as a map with the iteration domain
7364 * of "node" as domain.
7366 static __isl_give isl_map
*final_row(struct isl_sched_node
*node
)
7371 row
= isl_mat_rows(node
->sched
) - 1;
7372 ma
= node_extract_partial_schedule_multi_aff(node
, row
, 1);
7373 return isl_map_from_multi_aff(ma
);
7376 /* Is the conditional validity dependence in the edge with index "edge_index"
7377 * violated by the latest (i.e., final) row of the schedule?
7378 * That is, is i scheduled after j
7379 * for any conditional validity dependence i -> j?
7381 static int is_violated(struct isl_sched_graph
*graph
, int edge_index
)
7383 isl_map
*src_sched
, *dst_sched
, *map
;
7384 struct isl_sched_edge
*edge
= &graph
->edge
[edge_index
];
7387 src_sched
= final_row(edge
->src
);
7388 dst_sched
= final_row(edge
->dst
);
7389 map
= isl_map_copy(edge
->map
);
7390 map
= isl_map_apply_domain(map
, src_sched
);
7391 map
= isl_map_apply_range(map
, dst_sched
);
7392 map
= isl_map_order_gt(map
, isl_dim_in
, 0, isl_dim_out
, 0);
7393 empty
= isl_map_is_empty(map
);
7402 /* Does "graph" have any satisfied condition edges that
7403 * are adjacent to the conditional validity constraint with
7404 * domain "conditional_source" and range "conditional_sink"?
7406 * A satisfied condition is one that is not local.
7407 * If a condition was forced to be local already (i.e., marked as local)
7408 * then there is no need to check if it is in fact local.
7410 * Additionally, mark all adjacent condition edges found as local.
7412 static int has_adjacent_true_conditions(struct isl_sched_graph
*graph
,
7413 __isl_keep isl_union_set
*conditional_source
,
7414 __isl_keep isl_union_set
*conditional_sink
)
7419 for (i
= 0; i
< graph
->n_edge
; ++i
) {
7420 int adjacent
, local
;
7421 isl_union_map
*condition
;
7423 if (!is_condition(&graph
->edge
[i
]))
7425 if (is_local(&graph
->edge
[i
]))
7428 condition
= graph
->edge
[i
].tagged_condition
;
7429 adjacent
= domain_intersects(condition
, conditional_sink
);
7430 if (adjacent
>= 0 && !adjacent
)
7431 adjacent
= range_intersects(condition
,
7432 conditional_source
);
7438 set_local(&graph
->edge
[i
]);
7440 local
= is_condition_false(graph
, &graph
->edge
[i
]);
7450 /* Are there any violated conditional validity dependences with
7451 * adjacent condition dependences that are not local with respect
7452 * to the current schedule?
7453 * That is, is the conditional validity constraint violated?
7455 * Additionally, mark all those adjacent condition dependences as local.
7456 * We also mark those adjacent condition dependences that were not marked
7457 * as local before, but just happened to be local already. This ensures
7458 * that they remain local if the schedule is recomputed.
7460 * We first collect domain and range of all violated conditional validity
7461 * dependences and then check if there are any adjacent non-local
7462 * condition dependences.
7464 static int has_violated_conditional_constraint(isl_ctx
*ctx
,
7465 struct isl_sched_graph
*graph
)
7469 isl_union_set
*source
, *sink
;
7471 source
= isl_union_set_empty(isl_space_params_alloc(ctx
, 0));
7472 sink
= isl_union_set_empty(isl_space_params_alloc(ctx
, 0));
7473 for (i
= 0; i
< graph
->n_edge
; ++i
) {
7474 isl_union_set
*uset
;
7475 isl_union_map
*umap
;
7478 if (!is_conditional_validity(&graph
->edge
[i
]))
7481 violated
= is_violated(graph
, i
);
7489 umap
= isl_union_map_copy(graph
->edge
[i
].tagged_validity
);
7490 uset
= isl_union_map_domain(umap
);
7491 source
= isl_union_set_union(source
, uset
);
7492 source
= isl_union_set_coalesce(source
);
7494 umap
= isl_union_map_copy(graph
->edge
[i
].tagged_validity
);
7495 uset
= isl_union_map_range(umap
);
7496 sink
= isl_union_set_union(sink
, uset
);
7497 sink
= isl_union_set_coalesce(sink
);
7501 any
= has_adjacent_true_conditions(graph
, source
, sink
);
7503 isl_union_set_free(source
);
7504 isl_union_set_free(sink
);
7507 isl_union_set_free(source
);
7508 isl_union_set_free(sink
);
7512 /* Examine the current band (the rows between graph->band_start and
7513 * graph->n_total_row), deciding whether to drop it or add it to "node"
7514 * and then continue with the computation of the next band, if any.
7515 * If "initialized" is set, then it may be assumed that compute_maxvar
7516 * has been called on the current band. Otherwise, call
7517 * compute_maxvar if and before carry_dependences gets called.
7519 * The caller keeps looking for a new row as long as
7520 * graph->n_row < graph->maxvar. If the latest attempt to find
7521 * such a row failed (i.e., we still have graph->n_row < graph->maxvar),
7523 * - split between SCCs and start over (assuming we found an interesting
7524 * pair of SCCs between which to split)
7525 * - continue with the next band (assuming the current band has at least
7527 * - if there is more than one SCC left, then split along all SCCs
7528 * - if outer coincidence needs to be enforced, then try to carry as many
7529 * validity or coincidence dependences as possible and
7530 * continue with the next band
7531 * - try to carry as many validity dependences as possible and
7532 * continue with the next band
7533 * In each case, we first insert a band node in the schedule tree
7534 * if any rows have been computed.
7536 * If the caller managed to complete the schedule and the current band
7537 * is empty, then finish off by topologically
7538 * sorting the statements based on the remaining dependences.
7539 * If, on the other hand, the current band has at least one row,
7540 * then continue with the next band. Note that this next band
7541 * will necessarily be empty, but the graph may still be split up
7542 * into weakly connected components before arriving back here.
7544 static __isl_give isl_schedule_node
*compute_schedule_finish_band(
7545 __isl_take isl_schedule_node
*node
, struct isl_sched_graph
*graph
,
7553 empty
= graph
->n_total_row
== graph
->band_start
;
7554 if (graph
->n_row
< graph
->maxvar
) {
7557 ctx
= isl_schedule_node_get_ctx(node
);
7558 if (!ctx
->opt
->schedule_maximize_band_depth
&& !empty
)
7559 return compute_next_band(node
, graph
, 1);
7560 if (graph
->src_scc
>= 0)
7561 return compute_split_schedule(node
, graph
);
7563 return compute_next_band(node
, graph
, 1);
7565 return compute_component_schedule(node
, graph
, 1);
7566 if (!initialized
&& compute_maxvar(graph
) < 0)
7567 return isl_schedule_node_free(node
);
7568 if (isl_options_get_schedule_outer_coincidence(ctx
))
7569 return carry_coincidence(node
, graph
);
7570 return carry_dependences(node
, graph
);
7574 return compute_next_band(node
, graph
, 1);
7575 return sort_statements(node
, graph
, initialized
);
7578 /* Construct a band of schedule rows for a connected dependence graph.
7579 * The caller is responsible for determining the strongly connected
7580 * components and calling compute_maxvar first.
7582 * We try to find a sequence of as many schedule rows as possible that result
7583 * in non-negative dependence distances (independent of the previous rows
7584 * in the sequence, i.e., such that the sequence is tilable), with as
7585 * many of the initial rows as possible satisfying the coincidence constraints.
7586 * The computation stops if we can't find any more rows or if we have found
7587 * all the rows we wanted to find.
7589 * If ctx->opt->schedule_outer_coincidence is set, then we force the
7590 * outermost dimension to satisfy the coincidence constraints. If this
7591 * turns out to be impossible, we fall back on the general scheme above
7592 * and try to carry as many dependences as possible.
7594 * If "graph" contains both condition and conditional validity dependences,
7595 * then we need to check that that the conditional schedule constraint
7596 * is satisfied, i.e., there are no violated conditional validity dependences
7597 * that are adjacent to any non-local condition dependences.
7598 * If there are, then we mark all those adjacent condition dependences
7599 * as local and recompute the current band. Those dependences that
7600 * are marked local will then be forced to be local.
7601 * The initial computation is performed with no dependences marked as local.
7602 * If we are lucky, then there will be no violated conditional validity
7603 * dependences adjacent to any non-local condition dependences.
7604 * Otherwise, we mark some additional condition dependences as local and
7605 * recompute. We continue this process until there are no violations left or
7606 * until we are no longer able to compute a schedule.
7607 * Since there are only a finite number of dependences,
7608 * there will only be a finite number of iterations.
7610 static isl_stat
compute_schedule_wcc_band(isl_ctx
*ctx
,
7611 struct isl_sched_graph
*graph
)
7613 int has_coincidence
;
7614 int use_coincidence
;
7615 int force_coincidence
= 0;
7616 int check_conditional
;
7618 if (sort_sccs(graph
) < 0)
7619 return isl_stat_error
;
7621 clear_local_edges(graph
);
7622 check_conditional
= need_condition_check(graph
);
7623 has_coincidence
= has_any_coincidence(graph
);
7625 if (ctx
->opt
->schedule_outer_coincidence
)
7626 force_coincidence
= 1;
7628 use_coincidence
= has_coincidence
;
7629 while (graph
->n_row
< graph
->maxvar
) {
7634 graph
->src_scc
= -1;
7635 graph
->dst_scc
= -1;
7637 if (setup_lp(ctx
, graph
, use_coincidence
) < 0)
7638 return isl_stat_error
;
7639 sol
= solve_lp(ctx
, graph
);
7641 return isl_stat_error
;
7642 if (sol
->size
== 0) {
7643 int empty
= graph
->n_total_row
== graph
->band_start
;
7646 if (use_coincidence
&& (!force_coincidence
|| !empty
)) {
7647 use_coincidence
= 0;
7652 coincident
= !has_coincidence
|| use_coincidence
;
7653 if (update_schedule(graph
, sol
, coincident
) < 0)
7654 return isl_stat_error
;
7656 if (!check_conditional
)
7658 violated
= has_violated_conditional_constraint(ctx
, graph
);
7660 return isl_stat_error
;
7663 if (reset_band(graph
) < 0)
7664 return isl_stat_error
;
7665 use_coincidence
= has_coincidence
;
7671 /* Compute a schedule for a connected dependence graph by considering
7672 * the graph as a whole and return the updated schedule node.
7674 * The actual schedule rows of the current band are computed by
7675 * compute_schedule_wcc_band. compute_schedule_finish_band takes
7676 * care of integrating the band into "node" and continuing
7679 static __isl_give isl_schedule_node
*compute_schedule_wcc_whole(
7680 __isl_take isl_schedule_node
*node
, struct isl_sched_graph
*graph
)
7687 ctx
= isl_schedule_node_get_ctx(node
);
7688 if (compute_schedule_wcc_band(ctx
, graph
) < 0)
7689 return isl_schedule_node_free(node
);
7691 return compute_schedule_finish_band(node
, graph
, 1);
7694 /* Clustering information used by compute_schedule_wcc_clustering.
7696 * "n" is the number of SCCs in the original dependence graph
7697 * "scc" is an array of "n" elements, each representing an SCC
7698 * of the original dependence graph. All entries in the same cluster
7699 * have the same number of schedule rows.
7700 * "scc_cluster" maps each SCC index to the cluster to which it belongs,
7701 * where each cluster is represented by the index of the first SCC
7702 * in the cluster. Initially, each SCC belongs to a cluster containing
7705 * "scc_in_merge" is used by merge_clusters_along_edge to keep
7706 * track of which SCCs need to be merged.
7708 * "cluster" contains the merged clusters of SCCs after the clustering
7711 * "scc_node" is a temporary data structure used inside copy_partial.
7712 * For each SCC, it keeps track of the number of nodes in the SCC
7713 * that have already been copied.
7715 struct isl_clustering
{
7717 struct isl_sched_graph
*scc
;
7718 struct isl_sched_graph
*cluster
;
7724 /* Initialize the clustering data structure "c" from "graph".
7726 * In particular, allocate memory, extract the SCCs from "graph"
7727 * into c->scc, initialize scc_cluster and construct
7728 * a band of schedule rows for each SCC.
7729 * Within each SCC, there is only one SCC by definition.
7730 * Each SCC initially belongs to a cluster containing only that SCC.
7732 static isl_stat
clustering_init(isl_ctx
*ctx
, struct isl_clustering
*c
,
7733 struct isl_sched_graph
*graph
)
7738 c
->scc
= isl_calloc_array(ctx
, struct isl_sched_graph
, c
->n
);
7739 c
->cluster
= isl_calloc_array(ctx
, struct isl_sched_graph
, c
->n
);
7740 c
->scc_cluster
= isl_calloc_array(ctx
, int, c
->n
);
7741 c
->scc_node
= isl_calloc_array(ctx
, int, c
->n
);
7742 c
->scc_in_merge
= isl_calloc_array(ctx
, int, c
->n
);
7743 if (!c
->scc
|| !c
->cluster
||
7744 !c
->scc_cluster
|| !c
->scc_node
|| !c
->scc_in_merge
)
7745 return isl_stat_error
;
7747 for (i
= 0; i
< c
->n
; ++i
) {
7748 if (extract_sub_graph(ctx
, graph
, &node_scc_exactly
,
7749 &edge_scc_exactly
, i
, &c
->scc
[i
]) < 0)
7750 return isl_stat_error
;
7752 if (compute_maxvar(&c
->scc
[i
]) < 0)
7753 return isl_stat_error
;
7754 if (compute_schedule_wcc_band(ctx
, &c
->scc
[i
]) < 0)
7755 return isl_stat_error
;
7756 c
->scc_cluster
[i
] = i
;
7762 /* Free all memory allocated for "c".
7764 static void clustering_free(isl_ctx
*ctx
, struct isl_clustering
*c
)
7769 for (i
= 0; i
< c
->n
; ++i
)
7770 graph_free(ctx
, &c
->scc
[i
]);
7773 for (i
= 0; i
< c
->n
; ++i
)
7774 graph_free(ctx
, &c
->cluster
[i
]);
7776 free(c
->scc_cluster
);
7778 free(c
->scc_in_merge
);
7781 /* Should we refrain from merging the cluster in "graph" with
7782 * any other cluster?
7783 * In particular, is its current schedule band empty and incomplete.
7785 static int bad_cluster(struct isl_sched_graph
*graph
)
7787 return graph
->n_row
< graph
->maxvar
&&
7788 graph
->n_total_row
== graph
->band_start
;
7791 /* Is "edge" a proximity edge with a non-empty dependence relation?
7793 static isl_bool
is_non_empty_proximity(struct isl_sched_edge
*edge
)
7795 if (!is_proximity(edge
))
7796 return isl_bool_false
;
7797 return isl_bool_not(isl_map_plain_is_empty(edge
->map
));
7800 /* Return the index of an edge in "graph" that can be used to merge
7801 * two clusters in "c".
7802 * Return graph->n_edge if no such edge can be found.
7803 * Return -1 on error.
7805 * In particular, return a proximity edge between two clusters
7806 * that is not marked "no_merge" and such that neither of the
7807 * two clusters has an incomplete, empty band.
7809 * If there are multiple such edges, then try and find the most
7810 * appropriate edge to use for merging. In particular, pick the edge
7811 * with the greatest weight. If there are multiple of those,
7812 * then pick one with the shortest distance between
7813 * the two cluster representatives.
7815 static int find_proximity(struct isl_sched_graph
*graph
,
7816 struct isl_clustering
*c
)
7818 int i
, best
= graph
->n_edge
, best_dist
, best_weight
;
7820 for (i
= 0; i
< graph
->n_edge
; ++i
) {
7821 struct isl_sched_edge
*edge
= &graph
->edge
[i
];
7825 prox
= is_non_empty_proximity(edge
);
7832 if (bad_cluster(&c
->scc
[edge
->src
->scc
]) ||
7833 bad_cluster(&c
->scc
[edge
->dst
->scc
]))
7835 dist
= c
->scc_cluster
[edge
->dst
->scc
] -
7836 c
->scc_cluster
[edge
->src
->scc
];
7839 weight
= edge
->weight
;
7840 if (best
< graph
->n_edge
) {
7841 if (best_weight
> weight
)
7843 if (best_weight
== weight
&& best_dist
<= dist
)
7848 best_weight
= weight
;
7854 /* Internal data structure used in mark_merge_sccs.
7856 * "graph" is the dependence graph in which a strongly connected
7857 * component is constructed.
7858 * "scc_cluster" maps each SCC index to the cluster to which it belongs.
7859 * "src" and "dst" are the indices of the nodes that are being merged.
7861 struct isl_mark_merge_sccs_data
{
7862 struct isl_sched_graph
*graph
;
7868 /* Check whether the cluster containing node "i" depends on the cluster
7869 * containing node "j". If "i" and "j" belong to the same cluster,
7870 * then they are taken to depend on each other to ensure that
7871 * the resulting strongly connected component consists of complete
7872 * clusters. Furthermore, if "i" and "j" are the two nodes that
7873 * are being merged, then they are taken to depend on each other as well.
7874 * Otherwise, check if there is a (conditional) validity dependence
7875 * from node[j] to node[i], forcing node[i] to follow node[j].
7877 static isl_bool
cluster_follows(int i
, int j
, void *user
)
7879 struct isl_mark_merge_sccs_data
*data
= user
;
7880 struct isl_sched_graph
*graph
= data
->graph
;
7881 int *scc_cluster
= data
->scc_cluster
;
7883 if (data
->src
== i
&& data
->dst
== j
)
7884 return isl_bool_true
;
7885 if (data
->src
== j
&& data
->dst
== i
)
7886 return isl_bool_true
;
7887 if (scc_cluster
[graph
->node
[i
].scc
] == scc_cluster
[graph
->node
[j
].scc
])
7888 return isl_bool_true
;
7890 return graph_has_validity_edge(graph
, &graph
->node
[j
], &graph
->node
[i
]);
7893 /* Mark all SCCs that belong to either of the two clusters in "c"
7894 * connected by the edge in "graph" with index "edge", or to any
7895 * of the intermediate clusters.
7896 * The marking is recorded in c->scc_in_merge.
7898 * The given edge has been selected for merging two clusters,
7899 * meaning that there is at least a proximity edge between the two nodes.
7900 * However, there may also be (indirect) validity dependences
7901 * between the two nodes. When merging the two clusters, all clusters
7902 * containing one or more of the intermediate nodes along the
7903 * indirect validity dependences need to be merged in as well.
7905 * First collect all such nodes by computing the strongly connected
7906 * component (SCC) containing the two nodes connected by the edge, where
7907 * the two nodes are considered to depend on each other to make
7908 * sure they end up in the same SCC. Similarly, each node is considered
7909 * to depend on every other node in the same cluster to ensure
7910 * that the SCC consists of complete clusters.
7912 * Then the original SCCs that contain any of these nodes are marked
7913 * in c->scc_in_merge.
7915 static isl_stat
mark_merge_sccs(isl_ctx
*ctx
, struct isl_sched_graph
*graph
,
7916 int edge
, struct isl_clustering
*c
)
7918 struct isl_mark_merge_sccs_data data
;
7919 struct isl_tarjan_graph
*g
;
7922 for (i
= 0; i
< c
->n
; ++i
)
7923 c
->scc_in_merge
[i
] = 0;
7926 data
.scc_cluster
= c
->scc_cluster
;
7927 data
.src
= graph
->edge
[edge
].src
- graph
->node
;
7928 data
.dst
= graph
->edge
[edge
].dst
- graph
->node
;
7930 g
= isl_tarjan_graph_component(ctx
, graph
->n
, data
.dst
,
7931 &cluster_follows
, &data
);
7937 isl_die(ctx
, isl_error_internal
,
7938 "expecting at least two nodes in component",
7940 if (g
->order
[--i
] != -1)
7941 isl_die(ctx
, isl_error_internal
,
7942 "expecting end of component marker", goto error
);
7944 for (--i
; i
>= 0 && g
->order
[i
] != -1; --i
) {
7945 int scc
= graph
->node
[g
->order
[i
]].scc
;
7946 c
->scc_in_merge
[scc
] = 1;
7949 isl_tarjan_graph_free(g
);
7952 isl_tarjan_graph_free(g
);
7953 return isl_stat_error
;
7956 /* Construct the identifier "cluster_i".
7958 static __isl_give isl_id
*cluster_id(isl_ctx
*ctx
, int i
)
7962 snprintf(name
, sizeof(name
), "cluster_%d", i
);
7963 return isl_id_alloc(ctx
, name
, NULL
);
7966 /* Construct the space of the cluster with index "i" containing
7967 * the strongly connected component "scc".
7969 * In particular, construct a space called cluster_i with dimension equal
7970 * to the number of schedule rows in the current band of "scc".
7972 static __isl_give isl_space
*cluster_space(struct isl_sched_graph
*scc
, int i
)
7978 nvar
= scc
->n_total_row
- scc
->band_start
;
7979 space
= isl_space_copy(scc
->node
[0].space
);
7980 space
= isl_space_params(space
);
7981 space
= isl_space_set_from_params(space
);
7982 space
= isl_space_add_dims(space
, isl_dim_set
, nvar
);
7983 id
= cluster_id(isl_space_get_ctx(space
), i
);
7984 space
= isl_space_set_tuple_id(space
, isl_dim_set
, id
);
7989 /* Collect the domain of the graph for merging clusters.
7991 * In particular, for each cluster with first SCC "i", construct
7992 * a set in the space called cluster_i with dimension equal
7993 * to the number of schedule rows in the current band of the cluster.
7995 static __isl_give isl_union_set
*collect_domain(isl_ctx
*ctx
,
7996 struct isl_sched_graph
*graph
, struct isl_clustering
*c
)
8000 isl_union_set
*domain
;
8002 space
= isl_space_params_alloc(ctx
, 0);
8003 domain
= isl_union_set_empty(space
);
8005 for (i
= 0; i
< graph
->scc
; ++i
) {
8008 if (!c
->scc_in_merge
[i
])
8010 if (c
->scc_cluster
[i
] != i
)
8012 space
= cluster_space(&c
->scc
[i
], i
);
8013 domain
= isl_union_set_add_set(domain
, isl_set_universe(space
));
8019 /* Construct a map from the original instances to the corresponding
8020 * cluster instance in the current bands of the clusters in "c".
8022 static __isl_give isl_union_map
*collect_cluster_map(isl_ctx
*ctx
,
8023 struct isl_sched_graph
*graph
, struct isl_clustering
*c
)
8027 isl_union_map
*cluster_map
;
8029 space
= isl_space_params_alloc(ctx
, 0);
8030 cluster_map
= isl_union_map_empty(space
);
8031 for (i
= 0; i
< graph
->scc
; ++i
) {
8035 if (!c
->scc_in_merge
[i
])
8038 id
= cluster_id(ctx
, c
->scc_cluster
[i
]);
8039 start
= c
->scc
[i
].band_start
;
8040 n
= c
->scc
[i
].n_total_row
- start
;
8041 for (j
= 0; j
< c
->scc
[i
].n
; ++j
) {
8044 struct isl_sched_node
*node
= &c
->scc
[i
].node
[j
];
8046 ma
= node_extract_partial_schedule_multi_aff(node
,
8048 ma
= isl_multi_aff_set_tuple_id(ma
, isl_dim_out
,
8050 map
= isl_map_from_multi_aff(ma
);
8051 cluster_map
= isl_union_map_add_map(cluster_map
, map
);
8059 /* Add "umap" to the schedule constraints "sc" of all types of "edge"
8060 * that are not isl_edge_condition or isl_edge_conditional_validity.
8062 static __isl_give isl_schedule_constraints
*add_non_conditional_constraints(
8063 struct isl_sched_edge
*edge
, __isl_keep isl_union_map
*umap
,
8064 __isl_take isl_schedule_constraints
*sc
)
8066 enum isl_edge_type t
;
8071 for (t
= isl_edge_first
; t
<= isl_edge_last_sc
; ++t
) {
8072 if (t
== isl_edge_condition
||
8073 t
== isl_edge_conditional_validity
)
8075 if (!is_type(edge
, t
))
8077 sc
= isl_schedule_constraints_add(sc
, t
,
8078 isl_union_map_copy(umap
));
8084 /* Add schedule constraints of types isl_edge_condition and
8085 * isl_edge_conditional_validity to "sc" by applying "umap" to
8086 * the domains of the wrapped relations in domain and range
8087 * of the corresponding tagged constraints of "edge".
8089 static __isl_give isl_schedule_constraints
*add_conditional_constraints(
8090 struct isl_sched_edge
*edge
, __isl_keep isl_union_map
*umap
,
8091 __isl_take isl_schedule_constraints
*sc
)
8093 enum isl_edge_type t
;
8094 isl_union_map
*tagged
;
8096 for (t
= isl_edge_condition
; t
<= isl_edge_conditional_validity
; ++t
) {
8097 if (!is_type(edge
, t
))
8099 if (t
== isl_edge_condition
)
8100 tagged
= isl_union_map_copy(edge
->tagged_condition
);
8102 tagged
= isl_union_map_copy(edge
->tagged_validity
);
8103 tagged
= isl_union_map_zip(tagged
);
8104 tagged
= isl_union_map_apply_domain(tagged
,
8105 isl_union_map_copy(umap
));
8106 tagged
= isl_union_map_zip(tagged
);
8107 sc
= isl_schedule_constraints_add(sc
, t
, tagged
);
8115 /* Given a mapping "cluster_map" from the original instances to
8116 * the cluster instances, add schedule constraints on the clusters
8117 * to "sc" corresponding to the original constraints represented by "edge".
8119 * For non-tagged dependence constraints, the cluster constraints
8120 * are obtained by applying "cluster_map" to the edge->map.
8122 * For tagged dependence constraints, "cluster_map" needs to be applied
8123 * to the domains of the wrapped relations in domain and range
8124 * of the tagged dependence constraints. Pick out the mappings
8125 * from these domains from "cluster_map" and construct their product.
8126 * This mapping can then be applied to the pair of domains.
8128 static __isl_give isl_schedule_constraints
*collect_edge_constraints(
8129 struct isl_sched_edge
*edge
, __isl_keep isl_union_map
*cluster_map
,
8130 __isl_take isl_schedule_constraints
*sc
)
8132 isl_union_map
*umap
;
8134 isl_union_set
*uset
;
8135 isl_union_map
*umap1
, *umap2
;
8140 umap
= isl_union_map_from_map(isl_map_copy(edge
->map
));
8141 umap
= isl_union_map_apply_domain(umap
,
8142 isl_union_map_copy(cluster_map
));
8143 umap
= isl_union_map_apply_range(umap
,
8144 isl_union_map_copy(cluster_map
));
8145 sc
= add_non_conditional_constraints(edge
, umap
, sc
);
8146 isl_union_map_free(umap
);
8148 if (!sc
|| (!is_condition(edge
) && !is_conditional_validity(edge
)))
8151 space
= isl_space_domain(isl_map_get_space(edge
->map
));
8152 uset
= isl_union_set_from_set(isl_set_universe(space
));
8153 umap1
= isl_union_map_copy(cluster_map
);
8154 umap1
= isl_union_map_intersect_domain(umap1
, uset
);
8155 space
= isl_space_range(isl_map_get_space(edge
->map
));
8156 uset
= isl_union_set_from_set(isl_set_universe(space
));
8157 umap2
= isl_union_map_copy(cluster_map
);
8158 umap2
= isl_union_map_intersect_domain(umap2
, uset
);
8159 umap
= isl_union_map_product(umap1
, umap2
);
8161 sc
= add_conditional_constraints(edge
, umap
, sc
);
8163 isl_union_map_free(umap
);
8167 /* How many of the outer dimensions of the band of size "dim" starting
8168 * at "start" can be freely combined without destroying
8169 * the schedule rows that correspond to the inner part
8170 * of any of the intra-statement consecutivity constraints of "node".
8172 * If the first such row belongs to an outer band,
8173 * then all schedule rows need to be preserved and
8174 * the number of rows that can be freely combined is zero.
8175 * Otherwise, it's the minimal number of rows that lie
8176 * outside the first row corresponding to an inner part.
8177 * Only take into account intra-statement consecutivity constraints
8178 * that are still active.
8180 static int n_outer_in_band(struct isl_sched_node
*node
, int start
, int dim
)
8182 struct isl_sched_intra
*intra
;
8186 for (intra
= node
->intra
; intra
; intra
= intra
->next
) {
8187 if (intra
->n_fixed
< 0)
8189 if (intra
->n_fixed
== 0)
8191 if (intra
->first_fixed
<= start
)
8193 else if (n_outer
> intra
->first_fixed
- start
)
8194 n_outer
= intra
->first_fixed
- start
;
8200 /* Construct the space for an intra-statement consecutivity constraint
8201 * on domain "space" with "n_outer" outer expressions and
8202 * "n_inner" inner expressions.
8204 * The returned space is of the form
8206 * space -> [outer -> inner]
8208 static __isl_give isl_space
*intra_space(__isl_take isl_space
*space
,
8209 int n_outer
, int n_inner
)
8213 space
= isl_space_from_domain(space
);
8214 space2
= isl_space_copy(space
);
8215 space
= isl_space_add_dims(space
, isl_dim_out
, n_outer
);
8216 space2
= isl_space_add_dims(space2
, isl_dim_out
, n_inner
);
8217 space
= isl_space_range_product(space
, space2
);
8222 /* Add an intra-statement consecutivity constraint on the cluster
8223 * with index "cluster" to "sc" that preserves the schedule rows of "node"
8224 * that correspond to inner parts of the original intra-statement
8225 * consecutivity constraints.
8227 * The constraint is an identity affine expression with the inner part
8228 * covering all the rows in the schedule that correspond to
8229 * inner parts of the original intra-statement consecutivity constraints.
8230 * This ensures that those inner parts are not modified and remain innermost
8232 * If the inner part would be empty (this includes the case where there are no
8233 * original intra-statement consecutivity constraints), then
8234 * no constraint needs to be added.
8236 static __isl_give isl_schedule_constraints
*collect_intra_consecutivity(
8237 struct isl_sched_node
*node
, __isl_take isl_schedule_constraints
*sc
,
8238 int cluster
, int start
, int dim
)
8245 isl_multi_aff_list
*list
;
8247 n_outer
= n_outer_in_band(node
, start
, dim
);
8251 ctx
= isl_schedule_constraints_get_ctx(sc
);
8252 id
= cluster_id(ctx
, cluster
);
8253 space
= isl_space_set_alloc(ctx
, 0, dim
);
8254 space
= isl_space_set_tuple_id(space
, isl_dim_set
, id
);
8255 space
= intra_space(space
, n_outer
, dim
- n_outer
);
8256 ma
= isl_multi_aff_identity(space
);
8258 list
= isl_schedule_constraints_get_intra_consecutivity(sc
);
8259 list
= isl_multi_aff_list_add(list
, ma
);
8260 sc
= isl_schedule_constraints_set_intra_consecutivity(sc
, list
);
8265 /* Given a mapping "cluster_map" from the original instances to
8266 * the cluster instances, add schedule constraints on the clusters
8267 * to "sc" corresponding to all edges in "graph" between nodes that
8268 * belong to SCCs that are marked for merging in "c".
8269 * Also add intra-statement consecutivity constraints
8270 * that preserve the schedule rows that correspond
8271 * to the inner parts of intra-statement consecutivity constraints
8272 * on the original graph.
8273 * Any inter-statement consecutivity constraint forces the two
8274 * corresponding statements to be part of the same cluster.
8275 * There are therefore no cross-cluster inter-statement consecutivity
8278 static __isl_give isl_schedule_constraints
*collect_constraints(
8279 struct isl_sched_graph
*graph
, struct isl_clustering
*c
,
8280 __isl_keep isl_union_map
*cluster_map
,
8281 __isl_take isl_schedule_constraints
*sc
)
8285 for (i
= 0; i
< graph
->n_edge
; ++i
) {
8286 struct isl_sched_edge
*edge
= &graph
->edge
[i
];
8288 if (!c
->scc_in_merge
[edge
->src
->scc
])
8290 if (!c
->scc_in_merge
[edge
->dst
->scc
])
8292 sc
= collect_edge_constraints(edge
, cluster_map
, sc
);
8295 for (i
= 0; i
< graph
->n
; ++i
) {
8296 struct isl_sched_node
*node
= &graph
->node
[i
];
8297 struct isl_sched_graph
*scc
;
8298 int start
, dim
, cluster
;
8300 if (!c
->scc_in_merge
[node
->scc
])
8302 cluster
= c
->scc_cluster
[node
->scc
];
8303 scc
= &c
->scc
[node
->scc
];
8304 start
= scc
->band_start
;
8305 dim
= scc
->n_total_row
- start
;
8306 sc
= collect_intra_consecutivity(node
, sc
, cluster
, start
, dim
);
8312 /* Construct a dependence graph for scheduling clusters with respect
8313 * to each other and store the result in "merge_graph".
8314 * In particular, the nodes of the graph correspond to the schedule
8315 * dimensions of the current bands of those clusters that have been
8316 * marked for merging in "c".
8318 * First construct an isl_schedule_constraints object for this domain
8319 * by transforming the edges in "graph" to the domain.
8320 * Then initialize a dependence graph for scheduling from these
8323 static isl_stat
init_merge_graph(isl_ctx
*ctx
, struct isl_sched_graph
*graph
,
8324 struct isl_clustering
*c
, struct isl_sched_graph
*merge_graph
)
8326 isl_union_set
*domain
;
8327 isl_union_map
*cluster_map
;
8328 isl_schedule_constraints
*sc
;
8331 domain
= collect_domain(ctx
, graph
, c
);
8332 sc
= isl_schedule_constraints_on_domain(domain
);
8334 return isl_stat_error
;
8335 cluster_map
= collect_cluster_map(ctx
, graph
, c
);
8336 sc
= collect_constraints(graph
, c
, cluster_map
, sc
);
8337 isl_union_map_free(cluster_map
);
8339 r
= graph_init(merge_graph
, sc
);
8341 isl_schedule_constraints_free(sc
);
8346 /* Compute the maximal number of remaining schedule rows that still need
8347 * to be computed for the nodes that belong to clusters with the maximal
8348 * dimension for the current band (i.e., the band that is to be merged).
8349 * Only clusters that are about to be merged are considered.
8350 * "maxvar" is the maximal dimension for the current band.
8351 * "c" contains information about the clusters.
8353 * Return the maximal number of remaining schedule rows or -1 on error.
8355 static int compute_maxvar_max_slack(int maxvar
, struct isl_clustering
*c
)
8361 for (i
= 0; i
< c
->n
; ++i
) {
8363 struct isl_sched_graph
*scc
;
8365 if (!c
->scc_in_merge
[i
])
8368 nvar
= scc
->n_total_row
- scc
->band_start
;
8371 for (j
= 0; j
< scc
->n
; ++j
) {
8372 struct isl_sched_node
*node
= &scc
->node
[j
];
8375 if (node_update_vmap(node
) < 0)
8377 slack
= node
->nvar
- node
->rank
;
8378 if (slack
> max_slack
)
8386 /* If there are any clusters where the dimension of the current band
8387 * (i.e., the band that is to be merged) is smaller than "maxvar" and
8388 * if there are any nodes in such a cluster where the number
8389 * of remaining schedule rows that still need to be computed
8390 * is greater than "max_slack", then return the smallest current band
8391 * dimension of all these clusters. Otherwise return the original value
8392 * of "maxvar". Return -1 in case of any error.
8393 * Only clusters that are about to be merged are considered.
8394 * "c" contains information about the clusters.
8396 static int limit_maxvar_to_slack(int maxvar
, int max_slack
,
8397 struct isl_clustering
*c
)
8401 for (i
= 0; i
< c
->n
; ++i
) {
8403 struct isl_sched_graph
*scc
;
8405 if (!c
->scc_in_merge
[i
])
8408 nvar
= scc
->n_total_row
- scc
->band_start
;
8411 for (j
= 0; j
< scc
->n
; ++j
) {
8412 struct isl_sched_node
*node
= &scc
->node
[j
];
8415 if (node_update_vmap(node
) < 0)
8417 slack
= node
->nvar
- node
->rank
;
8418 if (slack
> max_slack
) {
8428 /* Adjust merge_graph->maxvar based on the number of remaining schedule rows
8429 * that still need to be computed. In particular, if there is a node
8430 * in a cluster where the dimension of the current band is smaller
8431 * than merge_graph->maxvar, but the number of remaining schedule rows
8432 * is greater than that of any node in a cluster with the maximal
8433 * dimension for the current band (i.e., merge_graph->maxvar),
8434 * then adjust merge_graph->maxvar to the (smallest) current band dimension
8435 * of those clusters. Without this adjustment, the total number of
8436 * schedule dimensions would be increased, resulting in a skewed view
8437 * of the number of coincident dimensions.
8438 * "c" contains information about the clusters.
8440 * If the maximize_band_depth option is set and merge_graph->maxvar is reduced,
8441 * then there is no point in attempting any merge since it will be rejected
8442 * anyway. Set merge_graph->maxvar to zero in such cases.
8444 static isl_stat
adjust_maxvar_to_slack(isl_ctx
*ctx
,
8445 struct isl_sched_graph
*merge_graph
, struct isl_clustering
*c
)
8447 int max_slack
, maxvar
;
8449 max_slack
= compute_maxvar_max_slack(merge_graph
->maxvar
, c
);
8451 return isl_stat_error
;
8452 maxvar
= limit_maxvar_to_slack(merge_graph
->maxvar
, max_slack
, c
);
8454 return isl_stat_error
;
8456 if (maxvar
< merge_graph
->maxvar
) {
8457 if (isl_options_get_schedule_maximize_band_depth(ctx
))
8458 merge_graph
->maxvar
= 0;
8460 merge_graph
->maxvar
= maxvar
;
8466 /* Return the number of coincident dimensions in the current band of "graph",
8467 * where the nodes of "graph" are assumed to be scheduled by a single band.
8469 static int get_n_coincident(struct isl_sched_graph
*graph
)
8473 for (i
= graph
->band_start
; i
< graph
->n_total_row
; ++i
)
8474 if (!graph
->node
[0].coincident
[i
])
8477 return i
- graph
->band_start
;
8480 /* Should the clusters be merged based on the cluster schedule
8481 * in the current (and only) band of "merge_graph", given that
8482 * coincidence should be maximized?
8484 * If the number of coincident schedule dimensions in the merged band
8485 * would be less than the maximal number of coincident schedule dimensions
8486 * in any of the merged clusters, then the clusters should not be merged.
8488 static isl_bool
ok_to_merge_coincident(struct isl_clustering
*c
,
8489 struct isl_sched_graph
*merge_graph
)
8496 for (i
= 0; i
< c
->n
; ++i
) {
8497 if (!c
->scc_in_merge
[i
])
8499 n_coincident
= get_n_coincident(&c
->scc
[i
]);
8500 if (n_coincident
> max_coincident
)
8501 max_coincident
= n_coincident
;
8504 n_coincident
= get_n_coincident(merge_graph
);
8506 return n_coincident
>= max_coincident
;
8509 /* Return the transformation on "node" expressed by the current (and only)
8510 * band of "merge_graph" applied to the clusters in "c".
8512 * First find the representation of "node" in its SCC in "c" and
8513 * extract the transformation expressed by the current band.
8514 * Then extract the transformation applied by "merge_graph"
8515 * to the cluster to which this SCC belongs.
8516 * Combine the two to obtain the complete transformation on the node.
8518 * Note that the range of the first transformation is an anonymous space,
8519 * while the domain of the second is named "cluster_X". The range
8520 * of the former therefore needs to be adjusted before the two
8523 static __isl_give isl_map
*extract_node_transformation(isl_ctx
*ctx
,
8524 struct isl_sched_node
*node
, struct isl_clustering
*c
,
8525 struct isl_sched_graph
*merge_graph
)
8527 struct isl_sched_node
*scc_node
, *cluster_node
;
8531 isl_multi_aff
*ma
, *ma2
;
8533 scc_node
= graph_find_node(ctx
, &c
->scc
[node
->scc
], node
->space
);
8534 if (scc_node
&& !is_node(&c
->scc
[node
->scc
], scc_node
))
8535 isl_die(ctx
, isl_error_internal
, "unable to find node",
8537 start
= c
->scc
[node
->scc
].band_start
;
8538 n
= c
->scc
[node
->scc
].n_total_row
- start
;
8539 ma
= node_extract_partial_schedule_multi_aff(scc_node
, start
, n
);
8540 space
= cluster_space(&c
->scc
[node
->scc
], c
->scc_cluster
[node
->scc
]);
8541 cluster_node
= graph_find_node(ctx
, merge_graph
, space
);
8542 if (cluster_node
&& !is_node(merge_graph
, cluster_node
))
8543 isl_die(ctx
, isl_error_internal
, "unable to find cluster",
8544 space
= isl_space_free(space
));
8545 id
= isl_space_get_tuple_id(space
, isl_dim_set
);
8546 ma
= isl_multi_aff_set_tuple_id(ma
, isl_dim_out
, id
);
8547 isl_space_free(space
);
8548 n
= merge_graph
->n_total_row
;
8549 ma2
= node_extract_partial_schedule_multi_aff(cluster_node
, 0, n
);
8550 ma
= isl_multi_aff_pullback_multi_aff(ma2
, ma
);
8552 return isl_map_from_multi_aff(ma
);
8555 /* Give a set of distances "set", are they bounded by a small constant
8556 * in direction "pos"?
8557 * In practice, check if they are bounded by 2 by checking that there
8558 * are no elements with a value greater than or equal to 3 or
8559 * smaller than or equal to -3.
8561 static isl_bool
distance_is_bounded(__isl_keep isl_set
*set
, int pos
)
8567 return isl_bool_error
;
8569 test
= isl_set_copy(set
);
8570 test
= isl_set_lower_bound_si(test
, isl_dim_set
, pos
, 3);
8571 bounded
= isl_set_is_empty(test
);
8574 if (bounded
< 0 || !bounded
)
8577 test
= isl_set_copy(set
);
8578 test
= isl_set_upper_bound_si(test
, isl_dim_set
, pos
, -3);
8579 bounded
= isl_set_is_empty(test
);
8585 /* Does the set "set" have a fixed (but possible parametric) value
8586 * at dimension "pos"?
8588 static isl_bool
has_single_value(__isl_keep isl_set
*set
, int pos
)
8594 return isl_bool_error
;
8595 set
= isl_set_copy(set
);
8596 n
= isl_set_dim(set
, isl_dim_set
);
8597 set
= isl_set_project_out(set
, isl_dim_set
, pos
+ 1, n
- (pos
+ 1));
8598 set
= isl_set_project_out(set
, isl_dim_set
, 0, pos
);
8599 single
= isl_set_is_singleton(set
);
8605 /* Does "map" have a fixed (but possible parametric) value
8606 * at dimension "pos" of either its domain or its range?
8608 static isl_bool
has_singular_src_or_dst(__isl_keep isl_map
*map
, int pos
)
8613 set
= isl_map_domain(isl_map_copy(map
));
8614 single
= has_single_value(set
, pos
);
8617 if (single
< 0 || single
)
8620 set
= isl_map_range(isl_map_copy(map
));
8621 single
= has_single_value(set
, pos
);
8627 /* Does the edge "edge" from "graph" have bounded dependence distances
8628 * in the merged graph "merge_graph" of a selection of clusters in "c"?
8630 * Extract the complete transformations of the source and destination
8631 * nodes of the edge, apply them to the edge constraints and
8632 * compute the differences. Finally, check if these differences are bounded
8633 * in each direction.
8635 * If the dimension of the band is greater than the number of
8636 * dimensions that can be expected to be optimized by the edge
8637 * (based on its weight), then also allow the differences to be unbounded
8638 * in the remaining dimensions, but only if either the source or
8639 * the destination has a fixed value in that direction.
8640 * This allows a statement that produces values that are used by
8641 * several instances of another statement to be merged with that
8643 * However, merging such clusters will introduce an inherently
8644 * large proximity distance inside the merged cluster, meaning
8645 * that proximity distances will no longer be optimized in
8646 * subsequent merges. These merges are therefore only allowed
8647 * after all other possible merges have been tried.
8648 * The first time such a merge is encountered, the weight of the edge
8649 * is replaced by a negative weight. The second time (i.e., after
8650 * all merges over edges with a non-negative weight have been tried),
8651 * the merge is allowed.
8653 static isl_bool
has_bounded_distances(isl_ctx
*ctx
, struct isl_sched_edge
*edge
,
8654 struct isl_sched_graph
*graph
, struct isl_clustering
*c
,
8655 struct isl_sched_graph
*merge_graph
)
8662 map
= isl_map_copy(edge
->map
);
8663 t
= extract_node_transformation(ctx
, edge
->src
, c
, merge_graph
);
8664 map
= isl_map_apply_domain(map
, t
);
8665 t
= extract_node_transformation(ctx
, edge
->dst
, c
, merge_graph
);
8666 map
= isl_map_apply_range(map
, t
);
8667 dist
= isl_map_deltas(isl_map_copy(map
));
8669 bounded
= isl_bool_true
;
8670 n
= isl_set_dim(dist
, isl_dim_set
);
8671 n_slack
= n
- edge
->weight
;
8672 if (edge
->weight
< 0)
8673 n_slack
-= graph
->max_weight
+ 1;
8674 for (i
= 0; i
< n
; ++i
) {
8675 isl_bool bounded_i
, singular_i
;
8677 bounded_i
= distance_is_bounded(dist
, i
);
8682 if (edge
->weight
>= 0)
8683 bounded
= isl_bool_false
;
8687 singular_i
= has_singular_src_or_dst(map
, i
);
8692 bounded
= isl_bool_false
;
8695 if (!bounded
&& i
>= n
&& edge
->weight
>= 0)
8696 edge
->weight
-= graph
->max_weight
+ 1;
8704 return isl_bool_error
;
8707 /* Should the clusters be merged based on the cluster schedule
8708 * in the current (and only) band of "merge_graph"?
8709 * "graph" is the original dependence graph, while "c" records
8710 * which SCCs are involved in the latest merge.
8712 * In particular, is there at least one proximity constraint
8713 * that is optimized by the merge?
8715 * A proximity constraint is considered to be optimized
8716 * if the dependence distances are small.
8718 static isl_bool
ok_to_merge_proximity(isl_ctx
*ctx
,
8719 struct isl_sched_graph
*graph
, struct isl_clustering
*c
,
8720 struct isl_sched_graph
*merge_graph
)
8724 for (i
= 0; i
< graph
->n_edge
; ++i
) {
8725 struct isl_sched_edge
*edge
= &graph
->edge
[i
];
8728 if (!is_proximity(edge
))
8730 if (!c
->scc_in_merge
[edge
->src
->scc
])
8732 if (!c
->scc_in_merge
[edge
->dst
->scc
])
8734 if (c
->scc_cluster
[edge
->dst
->scc
] ==
8735 c
->scc_cluster
[edge
->src
->scc
])
8737 bounded
= has_bounded_distances(ctx
, edge
, graph
, c
,
8739 if (bounded
< 0 || bounded
)
8743 return isl_bool_false
;
8746 /* Should the clusters be merged based on the cluster schedule
8747 * in the current (and only) band of "merge_graph"?
8748 * "graph" is the original dependence graph, while "c" records
8749 * which SCCs are involved in the latest merge.
8751 * If the current band is empty, then the clusters should not be merged.
8753 * If the band depth should be maximized and the merge schedule
8754 * is incomplete (meaning that the dimension of some of the schedule
8755 * bands in the original schedule will be reduced), then the clusters
8756 * should not be merged.
8758 * If the schedule_maximize_coincidence option is set, then check that
8759 * the number of coincident schedule dimensions is not reduced.
8761 * Finally, only allow the merge if at least one proximity
8762 * constraint is optimized.
8764 static isl_bool
ok_to_merge(isl_ctx
*ctx
, struct isl_sched_graph
*graph
,
8765 struct isl_clustering
*c
, struct isl_sched_graph
*merge_graph
)
8767 if (merge_graph
->n_total_row
== merge_graph
->band_start
)
8768 return isl_bool_false
;
8770 if (isl_options_get_schedule_maximize_band_depth(ctx
) &&
8771 merge_graph
->n_total_row
< merge_graph
->maxvar
)
8772 return isl_bool_false
;
8774 if (isl_options_get_schedule_maximize_coincidence(ctx
)) {
8777 ok
= ok_to_merge_coincident(c
, merge_graph
);
8782 return ok_to_merge_proximity(ctx
, graph
, c
, merge_graph
);
8785 /* Apply the schedule in "t_node" to the "n" rows starting at "first"
8786 * of the schedule in "node" and return the result.
8788 * That is, essentially compute
8790 * T * N(first:first+n-1)
8792 * taking into account the constant term and the parameter coefficients
8795 static __isl_give isl_mat
*node_transformation(isl_ctx
*ctx
,
8796 struct isl_sched_node
*t_node
, struct isl_sched_node
*node
,
8801 int n_row
, n_col
, n_param
, n_var
;
8803 n_param
= node
->nparam
;
8805 n_row
= isl_mat_rows(t_node
->sched
);
8806 n_col
= isl_mat_cols(node
->sched
);
8807 t
= isl_mat_alloc(ctx
, n_row
, n_col
);
8810 for (i
= 0; i
< n_row
; ++i
) {
8811 isl_seq_cpy(t
->row
[i
], t_node
->sched
->row
[i
], 1 + n_param
);
8812 isl_seq_clr(t
->row
[i
] + 1 + n_param
, n_var
);
8813 for (j
= 0; j
< n
; ++j
)
8814 isl_seq_addmul(t
->row
[i
],
8815 t_node
->sched
->row
[i
][1 + n_param
+ j
],
8816 node
->sched
->row
[first
+ j
],
8817 1 + n_param
+ n_var
);
8822 /* Apply the cluster schedule in "t_node" to the current band
8823 * schedule of the nodes in "graph".
8825 * In particular, replace the rows starting at band_start
8826 * by the result of applying the cluster schedule in "t_node"
8827 * to the original rows.
8829 * The coincidence of the schedule is determined by the coincidence
8830 * of the cluster schedule.
8832 static isl_stat
transform(isl_ctx
*ctx
, struct isl_sched_graph
*graph
,
8833 struct isl_sched_node
*t_node
)
8839 start
= graph
->band_start
;
8840 n
= graph
->n_total_row
- start
;
8842 n_new
= isl_mat_rows(t_node
->sched
);
8843 for (i
= 0; i
< graph
->n
; ++i
) {
8844 struct isl_sched_node
*node
= &graph
->node
[i
];
8847 t
= node_transformation(ctx
, t_node
, node
, start
, n
);
8848 node
->sched
= isl_mat_drop_rows(node
->sched
, start
, n
);
8849 node
->sched
= isl_mat_concat(node
->sched
, t
);
8850 node
->band_sched
= isl_map_free(node
->band_sched
);
8852 return isl_stat_error
;
8853 for (j
= 0; j
< n_new
; ++j
)
8854 node
->coincident
[start
+ j
] = t_node
->coincident
[j
];
8856 graph
->n_total_row
-= n
;
8858 graph
->n_total_row
+= n_new
;
8859 graph
->n_row
+= n_new
;
8864 /* Merge the clusters marked for merging in "c" into a single
8865 * cluster using the cluster schedule in the current band of "merge_graph".
8866 * The representative SCC for the new cluster is the SCC with
8867 * the smallest index.
8869 * The current band schedule of each SCC in the new cluster is obtained
8870 * by applying the schedule of the corresponding original cluster
8871 * to the original band schedule.
8872 * All SCCs in the new cluster have the same number of schedule rows.
8874 static isl_stat
merge(isl_ctx
*ctx
, struct isl_clustering
*c
,
8875 struct isl_sched_graph
*merge_graph
)
8881 for (i
= 0; i
< c
->n
; ++i
) {
8882 struct isl_sched_node
*node
;
8884 if (!c
->scc_in_merge
[i
])
8888 space
= cluster_space(&c
->scc
[i
], c
->scc_cluster
[i
]);
8889 node
= graph_find_node(ctx
, merge_graph
, space
);
8890 isl_space_free(space
);
8892 return isl_stat_error
;
8893 if (!is_node(merge_graph
, node
))
8894 isl_die(ctx
, isl_error_internal
,
8895 "unable to find cluster",
8896 return isl_stat_error
);
8897 if (transform(ctx
, &c
->scc
[i
], node
) < 0)
8898 return isl_stat_error
;
8899 c
->scc_cluster
[i
] = cluster
;
8905 /* Try and merge the clusters of SCCs marked in c->scc_in_merge
8906 * by scheduling the current cluster bands with respect to each other.
8908 * Construct a dependence graph with a space for each cluster and
8909 * with the coordinates of each space corresponding to the schedule
8910 * dimensions of the current band of that cluster.
8911 * Construct a cluster schedule in this cluster dependence graph and
8912 * apply it to the current cluster bands if it is applicable
8913 * according to ok_to_merge.
8915 * If the number of remaining schedule dimensions in a cluster
8916 * with a non-maximal current schedule dimension is greater than
8917 * the number of remaining schedule dimensions in clusters
8918 * with a maximal current schedule dimension, then restrict
8919 * the number of rows to be computed in the cluster schedule
8920 * to the minimal such non-maximal current schedule dimension.
8921 * Do this by adjusting merge_graph.maxvar.
8923 * Return isl_bool_true if the clusters have effectively been merged
8924 * into a single cluster.
8926 * Note that since the standard scheduling algorithm minimizes the maximal
8927 * distance over proximity constraints, the proximity constraints between
8928 * the merged clusters may not be optimized any further than what is
8929 * sufficient to bring the distances within the limits of the internal
8930 * proximity constraints inside the individual clusters.
8931 * It may therefore make sense to perform an additional translation step
8932 * to bring the clusters closer to each other, while maintaining
8933 * the linear part of the merging schedule found using the standard
8934 * scheduling algorithm.
8936 static isl_bool
try_merge(isl_ctx
*ctx
, struct isl_sched_graph
*graph
,
8937 struct isl_clustering
*c
)
8939 struct isl_sched_graph merge_graph
= { 0 };
8942 if (init_merge_graph(ctx
, graph
, c
, &merge_graph
) < 0)
8945 if (compute_maxvar(&merge_graph
) < 0)
8947 if (adjust_maxvar_to_slack(ctx
, &merge_graph
,c
) < 0)
8949 if (compute_schedule_wcc_band(ctx
, &merge_graph
) < 0)
8951 merged
= ok_to_merge(ctx
, graph
, c
, &merge_graph
);
8952 if (merged
&& merge(ctx
, c
, &merge_graph
) < 0)
8955 graph_free(ctx
, &merge_graph
);
8958 graph_free(ctx
, &merge_graph
);
8959 return isl_bool_error
;
8962 /* Is there any edge marked "no_merge" between two SCCs that are
8963 * about to be merged (i.e., that are set in "scc_in_merge")?
8964 * "merge_edge" is the proximity edge along which the clusters of SCCs
8965 * are going to be merged.
8967 * If there is any edge between two SCCs with a negative weight,
8968 * while the weight of "merge_edge" is non-negative, then this
8969 * means that the edge was postponed. "merge_edge" should then
8970 * also be postponed since merging along the edge with negative weight should
8971 * be postponed until all edges with non-negative weight have been tried.
8972 * Replace the weight of "merge_edge" by a negative weight as well and
8973 * tell the caller not to attempt a merge.
8975 static int any_no_merge(struct isl_sched_graph
*graph
, int *scc_in_merge
,
8976 struct isl_sched_edge
*merge_edge
)
8980 for (i
= 0; i
< graph
->n_edge
; ++i
) {
8981 struct isl_sched_edge
*edge
= &graph
->edge
[i
];
8983 if (!scc_in_merge
[edge
->src
->scc
])
8985 if (!scc_in_merge
[edge
->dst
->scc
])
8989 if (merge_edge
->weight
>= 0 && edge
->weight
< 0) {
8990 merge_edge
->weight
-= graph
->max_weight
+ 1;
8998 /* Merge the two clusters in "c" connected by the edge in "graph"
8999 * with index "edge" into a single cluster.
9000 * If it turns out to be impossible to merge these two clusters,
9001 * then mark the edge as "no_merge" such that it will not be
9004 * First mark all SCCs that need to be merged. This includes the SCCs
9005 * in the two clusters, but it may also include the SCCs
9006 * of intermediate clusters.
9007 * If there is already a no_merge edge between any pair of such SCCs,
9008 * then simply mark the current edge as no_merge as well.
9009 * Likewise, if any of those edges was postponed by has_bounded_distances,
9010 * then postpone the current edge as well.
9011 * Otherwise, try and merge the clusters and mark "edge" as "no_merge"
9012 * if the clusters did not end up getting merged, unless the non-merge
9013 * is due to the fact that the edge was postponed. This postponement
9014 * can be recognized by a change in weight (from non-negative to negative).
9016 static isl_stat
merge_clusters_along_edge(isl_ctx
*ctx
,
9017 struct isl_sched_graph
*graph
, int edge
, struct isl_clustering
*c
)
9020 int edge_weight
= graph
->edge
[edge
].weight
;
9022 if (mark_merge_sccs(ctx
, graph
, edge
, c
) < 0)
9023 return isl_stat_error
;
9025 if (any_no_merge(graph
, c
->scc_in_merge
, &graph
->edge
[edge
]))
9026 merged
= isl_bool_false
;
9028 merged
= try_merge(ctx
, graph
, c
);
9030 return isl_stat_error
;
9031 if (!merged
&& edge_weight
== graph
->edge
[edge
].weight
)
9032 graph
->edge
[edge
].no_merge
= 1;
9037 /* Does "node" belong to the cluster identified by "cluster"?
9039 static int node_cluster_exactly(struct isl_sched_node
*node
, int cluster
)
9041 return node
->cluster
== cluster
;
9044 /* Does "edge" connect two nodes belonging to the cluster
9045 * identified by "cluster"?
9047 static int edge_cluster_exactly(struct isl_sched_edge
*edge
, int cluster
)
9049 return edge
->src
->cluster
== cluster
&& edge
->dst
->cluster
== cluster
;
9052 /* Swap the schedule of "node1" and "node2".
9053 * Both nodes have been derived from the same node in a common parent graph.
9054 * Since the "coincident" field is shared with that node
9055 * in the parent graph, there is no need to also swap this field.
9057 static void swap_sched(struct isl_sched_node
*node1
,
9058 struct isl_sched_node
*node2
)
9061 isl_map
*band_sched
;
9063 sched
= node1
->sched
;
9064 node1
->sched
= node2
->sched
;
9065 node2
->sched
= sched
;
9067 band_sched
= node1
->band_sched
;
9068 node1
->band_sched
= node2
->band_sched
;
9069 node2
->band_sched
= band_sched
;
9072 /* Copy the current band schedule from the SCCs that form the cluster
9073 * with index "pos" to the actual cluster at position "pos".
9074 * By construction, the index of the first SCC that belongs to the cluster
9077 * The order of the nodes inside both the SCCs and the cluster
9078 * is assumed to be same as the order in the original "graph".
9080 * Since the SCC graphs will no longer be used after this function,
9081 * the schedules are actually swapped rather than copied.
9083 static isl_stat
copy_partial(struct isl_sched_graph
*graph
,
9084 struct isl_clustering
*c
, int pos
)
9088 c
->cluster
[pos
].n_total_row
= c
->scc
[pos
].n_total_row
;
9089 c
->cluster
[pos
].n_row
= c
->scc
[pos
].n_row
;
9090 c
->cluster
[pos
].maxvar
= c
->scc
[pos
].maxvar
;
9092 for (i
= 0; i
< graph
->n
; ++i
) {
9096 if (graph
->node
[i
].cluster
!= pos
)
9098 s
= graph
->node
[i
].scc
;
9099 k
= c
->scc_node
[s
]++;
9100 swap_sched(&c
->cluster
[pos
].node
[j
], &c
->scc
[s
].node
[k
]);
9101 if (c
->scc
[s
].maxvar
> c
->cluster
[pos
].maxvar
)
9102 c
->cluster
[pos
].maxvar
= c
->scc
[s
].maxvar
;
9109 /* Is there a (conditional) validity dependence from node[j] to node[i],
9110 * forcing node[i] to follow node[j] or do the nodes belong to the same
9113 static isl_bool
node_follows_strong_or_same_cluster(int i
, int j
, void *user
)
9115 struct isl_sched_graph
*graph
= user
;
9117 if (graph
->node
[i
].cluster
== graph
->node
[j
].cluster
)
9118 return isl_bool_true
;
9119 return graph_has_validity_edge(graph
, &graph
->node
[j
], &graph
->node
[i
]);
9122 /* Extract the merged clusters of SCCs in "graph", sort them, and
9123 * store them in c->clusters. Update c->scc_cluster accordingly.
9125 * First keep track of the cluster containing the SCC to which a node
9126 * belongs in the node itself.
9127 * Then extract the clusters into c->clusters, copying the current
9128 * band schedule from the SCCs that belong to the cluster.
9129 * Do this only once per cluster.
9131 * Finally, topologically sort the clusters and update c->scc_cluster
9132 * to match the new scc numbering. While the SCCs were originally
9133 * sorted already, some SCCs that depend on some other SCCs may
9134 * have been merged with SCCs that appear before these other SCCs.
9135 * A reordering may therefore be required.
9137 static isl_stat
extract_clusters(isl_ctx
*ctx
, struct isl_sched_graph
*graph
,
9138 struct isl_clustering
*c
)
9142 for (i
= 0; i
< graph
->n
; ++i
)
9143 graph
->node
[i
].cluster
= c
->scc_cluster
[graph
->node
[i
].scc
];
9145 for (i
= 0; i
< graph
->scc
; ++i
) {
9146 if (c
->scc_cluster
[i
] != i
)
9148 if (extract_sub_graph(ctx
, graph
, &node_cluster_exactly
,
9149 &edge_cluster_exactly
, i
, &c
->cluster
[i
]) < 0)
9150 return isl_stat_error
;
9151 c
->cluster
[i
].src_scc
= -1;
9152 c
->cluster
[i
].dst_scc
= -1;
9153 if (copy_partial(graph
, c
, i
) < 0)
9154 return isl_stat_error
;
9157 if (detect_ccs(ctx
, graph
, &node_follows_strong_or_same_cluster
) < 0)
9158 return isl_stat_error
;
9159 for (i
= 0; i
< graph
->n
; ++i
)
9160 c
->scc_cluster
[graph
->node
[i
].scc
] = graph
->node
[i
].cluster
;
9165 /* Compute weights on the proximity edges of "graph" that can
9166 * be used by find_proximity to find the most appropriate
9167 * proximity edge to use to merge two clusters in "c".
9168 * The weights are also used by has_bounded_distances to determine
9169 * whether the merge should be allowed.
9170 * Store the maximum of the computed weights in graph->max_weight.
9172 * The computed weight is a measure for the number of remaining schedule
9173 * dimensions that can still be completely aligned.
9174 * In particular, compute the number of equalities between
9175 * input dimensions and output dimensions in the proximity constraints.
9176 * The directions that are already handled by outer schedule bands
9177 * are projected out prior to determining this number.
9179 * Edges that will never be considered by find_proximity are ignored.
9181 static isl_stat
compute_weights(struct isl_sched_graph
*graph
,
9182 struct isl_clustering
*c
)
9186 graph
->max_weight
= 0;
9188 for (i
= 0; i
< graph
->n_edge
; ++i
) {
9189 struct isl_sched_edge
*edge
= &graph
->edge
[i
];
9190 struct isl_sched_node
*src
= edge
->src
;
9191 struct isl_sched_node
*dst
= edge
->dst
;
9192 isl_basic_map
*hull
;
9196 prox
= is_non_empty_proximity(edge
);
9198 return isl_stat_error
;
9201 if (bad_cluster(&c
->scc
[edge
->src
->scc
]) ||
9202 bad_cluster(&c
->scc
[edge
->dst
->scc
]))
9204 if (c
->scc_cluster
[edge
->dst
->scc
] ==
9205 c
->scc_cluster
[edge
->src
->scc
])
9208 hull
= isl_map_affine_hull(isl_map_copy(edge
->map
));
9209 hull
= isl_basic_map_transform_dims(hull
, isl_dim_in
, 0,
9210 isl_mat_copy(src
->vmap
));
9211 hull
= isl_basic_map_transform_dims(hull
, isl_dim_out
, 0,
9212 isl_mat_copy(dst
->vmap
));
9213 hull
= isl_basic_map_project_out(hull
,
9214 isl_dim_in
, 0, src
->rank
);
9215 hull
= isl_basic_map_project_out(hull
,
9216 isl_dim_out
, 0, dst
->rank
);
9217 hull
= isl_basic_map_remove_divs(hull
);
9218 n_in
= isl_basic_map_dim(hull
, isl_dim_in
);
9219 n_out
= isl_basic_map_dim(hull
, isl_dim_out
);
9220 hull
= isl_basic_map_drop_constraints_not_involving_dims(hull
,
9221 isl_dim_in
, 0, n_in
);
9222 hull
= isl_basic_map_drop_constraints_not_involving_dims(hull
,
9223 isl_dim_out
, 0, n_out
);
9225 return isl_stat_error
;
9226 edge
->weight
= isl_basic_map_n_equality(hull
);
9227 isl_basic_map_free(hull
);
9229 if (edge
->weight
> graph
->max_weight
)
9230 graph
->max_weight
= edge
->weight
;
9236 /* Call compute_schedule_finish_band on each of the clusters in "c"
9237 * in their topological order. This order is determined by the scc
9238 * fields of the nodes in "graph".
9239 * Combine the results in a sequence expressing the topological order.
9241 * If there is only one cluster left, then there is no need to introduce
9242 * a sequence node. Also, in this case, the cluster necessarily contains
9243 * the SCC at position 0 in the original graph and is therefore also
9244 * stored in the first cluster of "c".
9246 static __isl_give isl_schedule_node
*finish_bands_clustering(
9247 __isl_take isl_schedule_node
*node
, struct isl_sched_graph
*graph
,
9248 struct isl_clustering
*c
)
9252 isl_union_set_list
*filters
;
9254 if (graph
->scc
== 1)
9255 return compute_schedule_finish_band(node
, &c
->cluster
[0], 0);
9257 ctx
= isl_schedule_node_get_ctx(node
);
9259 filters
= extract_sccs(ctx
, graph
);
9260 node
= isl_schedule_node_insert_sequence(node
, filters
);
9262 for (i
= 0; i
< graph
->scc
; ++i
) {
9263 int j
= c
->scc_cluster
[i
];
9264 node
= isl_schedule_node_child(node
, i
);
9265 node
= isl_schedule_node_child(node
, 0);
9266 node
= compute_schedule_finish_band(node
, &c
->cluster
[j
], 0);
9267 node
= isl_schedule_node_parent(node
);
9268 node
= isl_schedule_node_parent(node
);
9274 /* Compute a schedule for a connected dependence graph by first considering
9275 * each strongly connected component (SCC) in the graph separately and then
9276 * incrementally combining them into clusters.
9277 * Return the updated schedule node.
9279 * Initially, each cluster consists of a single SCC, each with its
9280 * own band schedule. The algorithm then tries to merge pairs
9281 * of clusters along a proximity edge until no more suitable
9282 * proximity edges can be found. During this merging, the schedule
9283 * is maintained in the individual SCCs.
9284 * After the merging is completed, the full resulting clusters
9285 * are extracted and in finish_bands_clustering,
9286 * compute_schedule_finish_band is called on each of them to integrate
9287 * the band into "node" and to continue the computation.
9289 * compute_weights initializes the weights that are used by find_proximity.
9291 static __isl_give isl_schedule_node
*compute_schedule_wcc_clustering(
9292 __isl_take isl_schedule_node
*node
, struct isl_sched_graph
*graph
)
9295 struct isl_clustering c
;
9298 ctx
= isl_schedule_node_get_ctx(node
);
9300 if (clustering_init(ctx
, &c
, graph
) < 0)
9303 if (compute_weights(graph
, &c
) < 0)
9307 i
= find_proximity(graph
, &c
);
9310 if (i
>= graph
->n_edge
)
9312 if (merge_clusters_along_edge(ctx
, graph
, i
, &c
) < 0)
9316 if (extract_clusters(ctx
, graph
, &c
) < 0)
9319 node
= finish_bands_clustering(node
, graph
, &c
);
9321 clustering_free(ctx
, &c
);
9324 clustering_free(ctx
, &c
);
9325 return isl_schedule_node_free(node
);
9328 /* Compute a schedule for a connected dependence graph and return
9329 * the updated schedule node.
9331 * If Feautrier's algorithm is selected, we first recursively try to satisfy
9332 * as many validity dependences as possible. When all validity dependences
9333 * are satisfied we extend the schedule to a full-dimensional schedule.
9335 * Call compute_schedule_wcc_whole or compute_schedule_wcc_clustering
9336 * depending on whether the user has selected the option to try and
9337 * compute a schedule for the entire (weakly connected) component first.
9338 * If there is only a single strongly connected component (SCC), then
9339 * there is no point in trying to combine SCCs
9340 * in compute_schedule_wcc_clustering, so compute_schedule_wcc_whole
9341 * is called instead.
9342 * Strongly connected components that are connected through
9343 * inter-statement consecutivity constraints are treated as
9344 * a single component to ensure that those constraints can be applied.
9346 static __isl_give isl_schedule_node
*compute_schedule_wcc(
9347 __isl_take isl_schedule_node
*node
, struct isl_sched_graph
*graph
)
9354 ctx
= isl_schedule_node_get_ctx(node
);
9355 if (detect_sccs_grouping_consecutive(ctx
, graph
) < 0)
9356 return isl_schedule_node_free(node
);
9358 if (compute_maxvar(graph
) < 0)
9359 return isl_schedule_node_free(node
);
9361 if (need_feautrier_step(ctx
, graph
))
9362 return compute_schedule_wcc_feautrier(node
, graph
);
9364 if (graph
->scc
<= 1 || isl_options_get_schedule_whole_component(ctx
))
9365 return compute_schedule_wcc_whole(node
, graph
);
9367 return compute_schedule_wcc_clustering(node
, graph
);
9370 /* Compute a schedule for each group of nodes identified by node->scc
9371 * separately and then combine them in a sequence node (or as set node
9372 * if graph->weak is set) inserted at position "node" of the schedule tree.
9373 * Return the updated schedule node.
9375 * If "wcc" is set then each of the groups belongs to a single
9376 * weakly connected component in the dependence graph so that
9377 * there is no need for compute_sub_schedule to look for weakly
9378 * connected components.
9380 * If a set node would be introduced and if the number of components
9381 * is equal to the number of nodes, then check if the schedule
9382 * is already complete. If so, a redundant set node would be introduced
9383 * (without any further descendants) stating that the statements
9384 * can be executed in arbitrary order, which is also expressed
9385 * by the absence of any node. Refrain from inserting any nodes
9386 * in this case and simply return.
9388 static __isl_give isl_schedule_node
*compute_component_schedule(
9389 __isl_take isl_schedule_node
*node
, struct isl_sched_graph
*graph
,
9394 isl_union_set_list
*filters
;
9399 if (graph
->weak
&& graph
->scc
== graph
->n
) {
9400 if (compute_maxvar(graph
) < 0)
9401 return isl_schedule_node_free(node
);
9402 if (graph
->n_row
>= graph
->maxvar
)
9406 ctx
= isl_schedule_node_get_ctx(node
);
9407 filters
= extract_sccs(ctx
, graph
);
9409 node
= isl_schedule_node_insert_set(node
, filters
);
9411 node
= isl_schedule_node_insert_sequence(node
, filters
);
9413 for (component
= 0; component
< graph
->scc
; ++component
) {
9414 node
= isl_schedule_node_child(node
, component
);
9415 node
= isl_schedule_node_child(node
, 0);
9416 node
= compute_sub_schedule(node
, ctx
, graph
,
9418 &edge_scc_exactly
, component
, wcc
);
9419 node
= isl_schedule_node_parent(node
);
9420 node
= isl_schedule_node_parent(node
);
9426 /* Compute a schedule for the given dependence graph and insert it at "node".
9427 * Return the updated schedule node.
9429 * We first check if the graph is connected (through validity and conditional
9430 * validity dependences) and, if not, compute a schedule
9431 * for each component separately.
9432 * If the schedule_serialize_sccs option is set, then we check for strongly
9433 * connected components instead and compute a separate schedule for
9434 * each such strongly connected component.
9436 static __isl_give isl_schedule_node
*compute_schedule(isl_schedule_node
*node
,
9437 struct isl_sched_graph
*graph
)
9444 ctx
= isl_schedule_node_get_ctx(node
);
9445 if (isl_options_get_schedule_serialize_sccs(ctx
)) {
9446 if (detect_sccs(ctx
, graph
) < 0)
9447 return isl_schedule_node_free(node
);
9449 if (detect_wccs(ctx
, graph
) < 0)
9450 return isl_schedule_node_free(node
);
9454 return compute_component_schedule(node
, graph
, 1);
9456 return compute_schedule_wcc(node
, graph
);
9459 /* Compute a schedule on sc->domain that respects the given schedule
9462 * In particular, the schedule respects all the validity dependences.
9463 * If the default isl scheduling algorithm is used, it tries to minimize
9464 * the dependence distances over the proximity dependences.
9465 * If Feautrier's scheduling algorithm is used, the proximity dependence
9466 * distances are only minimized during the extension to a full-dimensional
9469 * If there are any condition and conditional validity dependences,
9470 * then the conditional validity dependences may be violated inside
9471 * a tilable band, provided they have no adjacent non-local
9472 * condition dependences.
9474 __isl_give isl_schedule
*isl_schedule_constraints_compute_schedule(
9475 __isl_take isl_schedule_constraints
*sc
)
9477 isl_ctx
*ctx
= isl_schedule_constraints_get_ctx(sc
);
9478 struct isl_sched_graph graph
= { 0 };
9479 isl_schedule
*sched
;
9480 isl_schedule_node
*node
;
9481 isl_union_set
*domain
;
9483 sc
= isl_schedule_constraints_align_params(sc
);
9485 domain
= isl_schedule_constraints_get_domain(sc
);
9486 if (isl_union_set_n_set(domain
) == 0) {
9487 isl_schedule_constraints_free(sc
);
9488 return isl_schedule_from_domain(domain
);
9491 if (graph_init(&graph
, sc
) < 0)
9492 domain
= isl_union_set_free(domain
);
9494 node
= isl_schedule_node_from_domain(domain
);
9495 node
= isl_schedule_node_child(node
, 0);
9497 node
= compute_schedule(node
, &graph
);
9498 sched
= isl_schedule_node_get_schedule(node
);
9499 isl_schedule_node_free(node
);
9501 graph_free(ctx
, &graph
);
9502 isl_schedule_constraints_free(sc
);
9507 /* Compute a schedule for the given union of domains that respects
9508 * all the validity dependences and minimizes
9509 * the dependence distances over the proximity dependences.
9511 * This function is kept for backward compatibility.
9513 __isl_give isl_schedule
*isl_union_set_compute_schedule(
9514 __isl_take isl_union_set
*domain
,
9515 __isl_take isl_union_map
*validity
,
9516 __isl_take isl_union_map
*proximity
)
9518 isl_schedule_constraints
*sc
;
9520 sc
= isl_schedule_constraints_on_domain(domain
);
9521 sc
= isl_schedule_constraints_set_validity(sc
, validity
);
9522 sc
= isl_schedule_constraints_set_proximity(sc
, proximity
);
9524 return isl_schedule_constraints_compute_schedule(sc
);