From e09f2759b93625fd5cdd21816996adff73785edf Mon Sep 17 00:00:00 2001 From: Sven Verdoolaege Date: Sat, 20 Jul 2013 12:44:27 +0200 Subject: [PATCH] isl_ast_build_ast_from_schedule: improve handling of unroll options The core algorithm assumes that the result of compute_domains consists of disjoint domains and may end up in an infinite recursion if they overlap. As in the case of atomic domains, the unroll domais may actually be larger than those specified by the user because some constraints may be simplified away. In combination with separation classes, this may result in the unroll domains in one class overlapping with the unroll domains in another class. We therefore now compute the unroll domains earlier and we intersect them with the class domain again to ensure that the result does not overlap with other classes. We also update the class domain to reflect the atomic and unroll domains so that these would not overlap with other domains within the same class domain. Reported-by: Tobias Grosser Signed-off-by: Sven Verdoolaege --- isl_ast_codegen.c | 172 ++++++++++++++++--------------- test_inputs/codegen/separation_class4.c | 19 ++++ test_inputs/codegen/separation_class4.in | 10 ++ 3 files changed, 118 insertions(+), 83 deletions(-) create mode 100644 test_inputs/codegen/separation_class4.c create mode 100644 test_inputs/codegen/separation_class4.in diff --git a/isl_ast_codegen.c b/isl_ast_codegen.c index 6bbd5587..80b1b789 100644 --- a/isl_ast_codegen.c +++ b/isl_ast_codegen.c @@ -2251,8 +2251,41 @@ static __isl_give isl_constraint *at_offset(int depth, __isl_keep isl_aff *aff, return isl_equality_from_aff(aff); } -/* Return a list of basic sets, one for each value of the current dimension - * in "domain". +/* Data structure for storing the results and the intermediate objects + * of compute_domains. + * + * "list" is the main result of the function and contains a list + * of disjoint basic sets for which code should be generated. + * + * "executed" and "build" are inputs to compute_domains. + * "schedule_domain" is the domain of "executed". + * + * "option" constains the domains at the current depth that should by + * atomic, separated or unrolled. These domains are as specified by + * the user, except that inner dimensions have been eliminated and + * that they have been made pair-wise disjoint. + * + * "sep_class" contains the user-specified split into separation classes + * specialized to the current depth. + * "done" contains the union of the separation domains that have already + * been handled. + */ +struct isl_codegen_domains { + isl_basic_set_list *list; + + isl_union_map *executed; + isl_ast_build *build; + isl_set *schedule_domain; + + isl_set *option[3]; + + isl_map *sep_class; + isl_set *done; +}; + +/* Extend domains->list with a list of basic sets, one for each value + * of the current dimension in "domain" and remove the corresponding + * sets from the class domain. Return the updated class domain. * The divs that involve the current dimension have not been projected out * from this domain. * @@ -2283,24 +2316,28 @@ static __isl_give isl_constraint *at_offset(int depth, __isl_keep isl_aff *aff, * atomic option. * * Finally, we map i' back to i and add each basic set to the list. + * Since we may have dropped some constraints, we intersect with + * the class domain again to ensure that each element in the list + * is disjoint from the other class domains. */ -static __isl_give isl_basic_set_list *do_unroll(__isl_take isl_set *domain, - __isl_keep isl_ast_build *build) +static __isl_give isl_set *do_unroll(struct isl_codegen_domains *domains, + __isl_take isl_set *domain, __isl_take isl_set *class_domain) { int i, n; int depth; isl_ctx *ctx; isl_aff *lower; - isl_basic_set_list *list; isl_multi_aff *expansion; isl_basic_map *bmap; + isl_set *unroll_domain; + isl_ast_build *build; if (!domain) - return NULL; + return isl_set_free(class_domain); ctx = isl_set_get_ctx(domain); - depth = isl_ast_build_get_depth(build); - build = isl_ast_build_copy(build); + depth = isl_ast_build_get_depth(domains->build); + build = isl_ast_build_copy(domains->build); domain = isl_ast_build_eliminate_inner(build, domain); build = isl_ast_build_detect_strides(build, isl_set_copy(domain)); expansion = isl_ast_build_get_stride_expansion(build); @@ -2311,18 +2348,19 @@ static __isl_give isl_basic_set_list *do_unroll(__isl_take isl_set *domain, isl_ast_build_free(build); - list = isl_basic_set_list_alloc(ctx, 0); - lower = find_unroll_lower_bound(domain, depth, &n); if (!lower) - list = isl_basic_set_list_free(list); + class_domain = isl_set_free(class_domain); bmap = isl_basic_map_from_multi_aff(expansion); - for (i = 0; list && i < n; ++i) { + unroll_domain = isl_set_empty(isl_set_get_space(domain)); + + for (i = 0; class_domain && i < n; ++i) { isl_set *set; isl_basic_set *bset; isl_constraint *slice; + isl_basic_set_list *list; slice = at_offset(depth, lower, i); set = isl_set_copy(domain); @@ -2330,59 +2368,28 @@ static __isl_give isl_basic_set_list *do_unroll(__isl_take isl_set *domain, bset = isl_set_unshifted_simple_hull(set); bset = isl_basic_set_add_constraint(bset, slice); bset = isl_basic_set_apply(bset, isl_basic_map_copy(bmap)); - list = isl_basic_set_list_add(list, bset); + set = isl_set_from_basic_set(bset); + unroll_domain = isl_set_union(unroll_domain, isl_set_copy(set)); + set = isl_set_intersect(set, isl_set_copy(class_domain)); + set = isl_set_make_disjoint(set); + list = isl_basic_set_list_from_set(set); + domains->list = isl_basic_set_list_concat(domains->list, list); } + class_domain = isl_set_subtract(class_domain, unroll_domain); + isl_aff_free(lower); isl_set_free(domain); isl_basic_map_free(bmap); - return list; + return class_domain; } -/* Data structure for storing the results and the intermediate objects - * of compute_domains. - * - * "list" is the main result of the function and contains a list - * of disjoint basic sets for which code should be generated. - * - * "executed" and "build" are inputs to compute_domains. - * "schedule_domain" is the domain of "executed". - * - * "option" constains the domains at the current depth that should by - * atomic, separated or unrolled. These domains are as specified by - * the user, except that inner dimensions have been eliminated and - * that they have been made pair-wise disjoint. - * - * "sep_class" contains the user-specified split into separation classes - * specialized to the current depth. - * "done" contains the union of the separation domains that have already - * been handled. - * "atomic" contains the domain that has effectively been made atomic. - * This domain may be larger than the intersection of option[atomic] - * and the schedule domain. - */ -struct isl_codegen_domains { - isl_basic_set_list *list; - - isl_union_map *executed; - isl_ast_build *build; - isl_set *schedule_domain; - - isl_set *option[3]; - - isl_map *sep_class; - isl_set *done; - isl_set *atomic; -}; - /* Add domains to domains->list for each individual value of the current * dimension, for that part of the schedule domain that lies in the * intersection of the option domain and the class domain. - * - * "domain" is the intersection of the class domain and the schedule domain. - * The divs that involve the current dimension have not been projected out - * from this domain. + * Remove the corresponding sets from the class domain and + * return the updated class domain. * * We first break up the unroll option domain into individual pieces * and then handle each of them separately. The unroll option domain @@ -2394,8 +2401,8 @@ struct isl_codegen_domains { * intersecting with class and schedule domain, hoping that the * unroll option domain specified by the user is relatively simple. */ -static int compute_unroll_domains(struct isl_codegen_domains *domains, - __isl_keep isl_set *domain) +static __isl_give isl_set *compute_unroll_domains( + struct isl_codegen_domains *domains, __isl_take isl_set *class_domain) { isl_set *unroll_domain; isl_basic_set_list *unroll_list; @@ -2404,9 +2411,9 @@ static int compute_unroll_domains(struct isl_codegen_domains *domains, empty = isl_set_is_empty(domains->option[unroll]); if (empty < 0) - return -1; + return isl_set_free(class_domain); if (empty) - return 0; + return class_domain; unroll_domain = isl_set_copy(domains->option[unroll]); unroll_list = isl_basic_set_list_from_set(unroll_domain); @@ -2414,12 +2421,13 @@ static int compute_unroll_domains(struct isl_codegen_domains *domains, n = isl_basic_set_list_n_basic_set(unroll_list); for (i = 0; i < n; ++i) { isl_basic_set *bset; - isl_basic_set_list *list; bset = isl_basic_set_list_get_basic_set(unroll_list, i); unroll_domain = isl_set_from_basic_set(bset); unroll_domain = isl_set_intersect(unroll_domain, - isl_set_copy(domain)); + isl_set_copy(class_domain)); + unroll_domain = isl_set_intersect(unroll_domain, + isl_set_copy(domains->schedule_domain)); empty = isl_set_is_empty(unroll_domain); if (empty >= 0 && empty) { @@ -2427,19 +2435,18 @@ static int compute_unroll_domains(struct isl_codegen_domains *domains, continue; } - list = do_unroll(unroll_domain, domains->build); - domains->list = isl_basic_set_list_concat(domains->list, list); + class_domain = do_unroll(domains, unroll_domain, class_domain); } isl_basic_set_list_free(unroll_list); - return 0; + return class_domain; } /* Try and construct a single basic set that includes the intersection of * the schedule domain, the atomic option domain and the class domain. - * Add the resulting basic set(s) to domains->list and save a copy - * in domains->atomic for use in compute_partial_domains. + * Add the resulting basic set(s) to domains->list and remove them + * from class_domain. Return the updated class domain. * * We construct a single domain rather than trying to combine * the schedule domains of individual domains because we are working @@ -2453,12 +2460,12 @@ static int compute_unroll_domains(struct isl_codegen_domains *domains, * "domain" is the intersection of the schedule domain and the class domain, * with inner dimensions projected out. */ -static int compute_atomic_domain(struct isl_codegen_domains *domains, - __isl_keep isl_set *class_domain) +static __isl_give isl_set *compute_atomic_domain( + struct isl_codegen_domains *domains, __isl_take isl_set *class_domain) { isl_basic_set *bset; isl_basic_set_list *list; - isl_set *domain; + isl_set *domain, *atomic_domain; int empty; domain = isl_set_copy(domains->option[atomic]); @@ -2466,22 +2473,25 @@ static int compute_atomic_domain(struct isl_codegen_domains *domains, domain = isl_set_intersect(domain, isl_set_copy(domains->schedule_domain)); empty = isl_set_is_empty(domain); - if (empty < 0 || empty) { - domains->atomic = domain; - return empty < 0 ? -1 : 0; + if (empty < 0) + class_domain = isl_set_free(class_domain); + if (empty) { + isl_set_free(domain); + return class_domain; } domain = isl_ast_build_eliminate(domains->build, domain); domain = isl_set_coalesce(domain); bset = isl_set_unshifted_simple_hull(domain); domain = isl_set_from_basic_set(bset); - domains->atomic = isl_set_copy(domain); + atomic_domain = isl_set_copy(domain); domain = isl_set_intersect(domain, isl_set_copy(class_domain)); + class_domain = isl_set_subtract(class_domain, atomic_domain); domain = isl_set_make_disjoint(domain); list = isl_basic_set_list_from_set(domain); domains->list = isl_basic_set_list_concat(domains->list, list); - return 0; + return class_domain; } /* Split up the schedule domain into uniform basic sets, @@ -2553,9 +2563,11 @@ static int compute_separate_domain(struct isl_codegen_domains *domains, * * The domain that has been made atomic may be larger than specified * by the user since it needs to be representable as a single basic set. - * This possibly larger domain is stored in domains->atomic by + * This possibly larger domain is removed from class_domain by * compute_atomic_domain. It is computed first so that the extended domain * would not overlap with any domains computed before. + * Similary, the unrolled domains may have some constraints removed and + * may therefore also be larger than specified by the user. * * If anything is left after handling separate, unroll and atomic, * we split it up into basic sets and append the basic sets to domains->list. @@ -2571,11 +2583,10 @@ static int compute_partial_domains(struct isl_codegen_domains *domains, domains->done = isl_set_union(domains->done, isl_set_copy(class_domain)); - domain = isl_set_copy(class_domain); + class_domain = compute_atomic_domain(domains, class_domain); + class_domain = compute_unroll_domains(domains, class_domain); - if (compute_atomic_domain(domains, class_domain) < 0) - domain = isl_set_free(domain); - domain = isl_set_subtract(domain, domains->atomic); + domain = isl_set_copy(class_domain); if (compute_separate_domain(domains, domain) < 0) goto error; @@ -2585,11 +2596,6 @@ static int compute_partial_domains(struct isl_codegen_domains *domains, domain = isl_set_intersect(domain, isl_set_copy(domains->schedule_domain)); - if (compute_unroll_domains(domains, domain) < 0) - goto error; - domain = isl_set_subtract(domain, - isl_set_copy(domains->option[unroll])); - domain = isl_ast_build_eliminate(domains->build, domain); domain = isl_set_intersect(domain, isl_set_copy(class_domain)); diff --git a/test_inputs/codegen/separation_class4.c b/test_inputs/codegen/separation_class4.c new file mode 100644 index 00000000..f9b4ccf0 --- /dev/null +++ b/test_inputs/codegen/separation_class4.c @@ -0,0 +1,19 @@ +for (int c0 = 0; c0 <= 128; c0 += 1) + if (c0 <= 127) { + if (c0 == 0) { + for (int c3 = 0; c3 <= 1; c3 += 1) + for (int c5 = c3 + 58; c5 <= -c3 + 61; c5 += 1) + S_0(c3, c5); + } else + for (int c2 = 1; c2 <= 2; c2 += 1) + for (int c3 = max(4 * c0 - 2, 4 * c0 + 6 * c2 - 12); c3 <= min(4 * c0 + 6 * c2 - 7, 4 * c0 + 1); c3 += 1) + for (int c5 = max(4 * c0 - c3 + 57, -4 * c0 + c3 + 58); c5 <= min(-4 * c0 + c3 + 62, 4 * c0 - c3 + 61); c5 += 1) + S_0(c3, c5); + for (int c2 = 1; c2 <= 2; c2 += 1) + for (int c3 = max(4 * c0 + 6 * c2 - 10, 4 * c0); c3 <= min(4 * c0 + 6 * c2 - 5, 4 * c0 + 3); c3 += 1) + for (int c5 = max(4 * c0 - c3 + 62, -4 * c0 + c3 + 59); c5 <= min(4 * c0 - c3 + 66, -4 * c0 + c3 + 63); c5 += 1) + S_0(c3, c5); + } else + for (int c3 = 510; c3 <= 511; c3 += 1) + for (int c5 = -c3 + 569; c5 < c3 - 449; c5 += 1) + S_0(c3, c5); diff --git a/test_inputs/codegen/separation_class4.in b/test_inputs/codegen/separation_class4.in new file mode 100644 index 00000000..c04dd79b --- /dev/null +++ b/test_inputs/codegen/separation_class4.in @@ -0,0 +1,10 @@ +# Check that isl is not confused by the combination of separation classes +# and unroll. +{ S_0[t, i] -> [o0, 1, o9, t] : 4o0 >= -3 + t and 4o0 <= t and i >= 60 and i <= 65 and 6o9 >= 5 + t - 4o0 and 6o9 <= 10 + t - 4o0 and 4o0 <= -62 + t + i and 4o0 >= 59 + t - i and o0 >= 0 and o0 <= 127 and t <= 511 and t >= 0 and 4o0 >= -66 + t + i and 4o0 <= 63 + t - i; +S_0[t, i] -> [o0, 0, o9, t] : 4o0 >= -1 + t and 4o0 <= 2 + t and i >= 57 and i <= 62 and 6o9 >= 7 + t - 4o0 and 6o9 <= 12 + t - 4o0 and t >= 0 and t <= 511 and 4o0 <= -57 + t + i and 4o0 >= 58 + t - i and o0 >= 0 and o0 <= 128 and 4o0 >= -61 + t + i and 4o0 <= 62 + t - i } +{ : } +{ [i0, i1, i2, t] -> unroll[1]; +[i0, 1, i2, t] -> separation_class[[1] -> [0]] + : 0 <= i0 <= 127; +[i0, 0, i2, t] -> separation_class[[1] -> [0]] + : 1 <= i0 <= 127} -- 2.11.4.GIT