From 3733ac41c3a7ce80c561645cdd03c1535accc397 Mon Sep 17 00:00:00 2001 From: Sven Verdoolaege Date: Fri, 19 Apr 2013 16:57:00 +0200 Subject: [PATCH] isl_map_affine_hull: make stride information explicit before dropping divs In a2cbe44 (isl_map_affine_hull: avoid computing explicit representations for divs, Sun Jan 15 16:12:59 2012 +0100), isl_map_affine_hull was changed to drop unknown divs rather than computing an explicit representation for them. This caused some stride information that involves multiple unknown divs to get dropped. Try and recover this lost information by making stride information explicitly available before dropping the unknown divs. Signed-off-by: Sven Verdoolaege --- isl_affine_hull.c | 160 +++++++++++++++++++++++++++++++++++++++-- isl_morph.c | 1 + isl_test.c | 18 ++++- test_inputs/codegen/stride6.c | 4 ++ test_inputs/codegen/stride6.in | 3 + 5 files changed, 179 insertions(+), 7 deletions(-) create mode 100644 test_inputs/codegen/stride6.c create mode 100644 test_inputs/codegen/stride6.in diff --git a/isl_affine_hull.c b/isl_affine_hull.c index 12fbad5d..0d66081e 100644 --- a/isl_affine_hull.c +++ b/isl_affine_hull.c @@ -1,10 +1,15 @@ /* * Copyright 2008-2009 Katholieke Universiteit Leuven + * Copyright 2010 INRIA Saclay + * Copyright 2012 Ecole Normale Superieure * * Use of this software is governed by the MIT license * * Written by Sven Verdoolaege, K.U.Leuven, Departement * Computerwetenschappen, Celestijnenlaan 200A, B-3001 Leuven, Belgium + * and INRIA Saclay - Ile-de-France, Parc Club Orsay Universite, + * ZAC des vignes, 4 rue Jacques Monod, 91893 Orsay, France + * and Ecole Normale Superieure, 45 rue d'Ulm, 75230 Paris, France */ #include @@ -1157,8 +1162,150 @@ struct isl_basic_set *isl_basic_set_affine_hull(struct isl_basic_set *bset) isl_basic_map_affine_hull((struct isl_basic_map *)bset); } +/* Given a rational affine matrix "M", add stride constraints to "bmap" + * that ensure that + * + * M(x) + * + * is an integer vector. The variables x include all the variables + * of "bmap" except the unknown divs. + * + * If d is the common denominator of M, then we need to impose that + * + * d M(x) = 0 mod d + * + * or + * + * exists alpha : d M(x) = d alpha + * + * This function is similar to add_strides in isl_morph.c + */ +static __isl_give isl_basic_map *add_strides(__isl_take isl_basic_map *bmap, + __isl_keep isl_mat *M, int n_known) +{ + int i, div, k; + isl_int gcd; + + if (isl_int_is_one(M->row[0][0])) + return bmap; + + bmap = isl_basic_map_extend_space(bmap, isl_space_copy(bmap->dim), + M->n_row - 1, M->n_row - 1, 0); + + isl_int_init(gcd); + for (i = 1; i < M->n_row; ++i) { + isl_seq_gcd(M->row[i], M->n_col, &gcd); + if (isl_int_is_divisible_by(gcd, M->row[0][0])) + continue; + div = isl_basic_map_alloc_div(bmap); + if (div < 0) + goto error; + isl_int_set_si(bmap->div[div][0], 0); + k = isl_basic_map_alloc_equality(bmap); + if (k < 0) + goto error; + isl_seq_cpy(bmap->eq[k], M->row[i], M->n_col); + isl_seq_clr(bmap->eq[k] + M->n_col, bmap->n_div - n_known); + isl_int_set(bmap->eq[k][M->n_col - n_known + div], + M->row[0][0]); + } + isl_int_clear(gcd); + + return bmap; +error: + isl_int_clear(gcd); + isl_basic_map_free(bmap); + return NULL; +} + +/* If there are any equalities that involve (multiple) unknown divs, + * then extract the stride information encoded by those equalities + * and make it explicitly available in "bmap". + * + * We first sort the divs so that the unknown divs appear last and + * then we count how many equalities involve these divs. + * + * Let these equalities be of the form + * + * A(x) + B y = 0 + * + * where y represents the unknown divs and x the remaining variables. + * Let [H 0] be the Hermite Normal Form of B, i.e., + * + * B = [H 0] Q + * + * Then x is a solution of the equalities iff + * + * H^-1 A(x) (= - [I 0] Q y) + * + * is an integer vector. Let d be the common denominator of H^-1. + * We impose + * + * d H^-1 A(x) = d alpha + * + * in add_strides, with alpha fresh existentially quantified variables. + */ +static __isl_give isl_basic_map *isl_basic_map_make_strides_explicit( + __isl_take isl_basic_map *bmap) +{ + int known; + int n_known; + int n, n_col; + int total; + isl_ctx *ctx; + isl_mat *A, *B, *M; + + known = isl_basic_map_divs_known(bmap); + if (known < 0) + return isl_basic_map_free(bmap); + if (known) + return bmap; + bmap = isl_basic_map_sort_divs(bmap); + bmap = isl_basic_map_gauss(bmap, NULL); + if (!bmap) + return NULL; + + for (n_known = 0; n_known < bmap->n_div; ++n_known) + if (isl_int_is_zero(bmap->div[n_known][0])) + break; + ctx = isl_basic_map_get_ctx(bmap); + total = isl_space_dim(bmap->dim, isl_dim_all); + for (n = 0; n < bmap->n_eq; ++n) + if (isl_seq_first_non_zero(bmap->eq[n] + 1 + total + n_known, + bmap->n_div - n_known) == -1) + break; + if (n == 0) + return bmap; + B = isl_mat_sub_alloc6(ctx, bmap->eq, 0, n, 0, 1 + total + n_known); + n_col = bmap->n_div - n_known; + A = isl_mat_sub_alloc6(ctx, bmap->eq, 0, n, 1 + total + n_known, n_col); + A = isl_mat_left_hermite(A, 0, NULL, NULL); + A = isl_mat_drop_cols(A, n, n_col - n); + A = isl_mat_lin_to_aff(A); + A = isl_mat_right_inverse(A); + B = isl_mat_insert_zero_rows(B, 0, 1); + B = isl_mat_set_element_si(B, 0, 0, 1); + M = isl_mat_product(A, B); + if (!M) + return isl_basic_map_free(bmap); + bmap = add_strides(bmap, M, n_known); + bmap = isl_basic_map_gauss(bmap, NULL); + isl_mat_free(M); + + return bmap; +} + /* Compute the affine hull of each basic map in "map" separately - * and call isl_basic_map_gauss on the result. + * and make all stride information explicit so that we can remove + * all unknown divs without losing this information. + * The result is also guaranteed to be gaussed. + * + * In simple cases where a div is determined by an equality, + * calling isl_basic_map_gauss is enough to make the stride information + * explicit, as it will derive an explicit representation for the div + * from the equality. If, however, the stride information + * is encoded through multiple unknown divs then we need to make + * some extra effort in isl_basic_map_make_strides_explicit. */ static __isl_give isl_map *isl_map_local_affine_hull(__isl_take isl_map *map) { @@ -1171,6 +1318,7 @@ static __isl_give isl_map *isl_map_local_affine_hull(__isl_take isl_map *map) for (i = 0; i < map->n; ++i) { map->p[i] = isl_basic_map_affine_hull(map->p[i]); map->p[i] = isl_basic_map_gauss(map->p[i], NULL); + map->p[i] = isl_basic_map_make_strides_explicit(map->p[i]); if (!map->p[i]) return isl_map_free(map); } @@ -1191,11 +1339,11 @@ static __isl_give isl_set *isl_set_local_affine_hull(__isl_take isl_set *set) * In order to avoid performing parametric integer programming to * compute explicit expressions for the divs, possible leading to * an explosion in the number of basic maps, we first drop all unknown - * divs before aligning the divs. Note that the divs determined - * by an equality will be known since we call isl_basic_set_gauss - * from isl_map_local_affine_hull. Calling gauss is also needed - * because affine_hull assumes its input has been gaussed, while - * isl_map_affine_hull may be called on input that has not been gaussed, + * divs before aligning the divs. Note that isl_map_local_affine_hull tries + * to make sure that all stride information is explicitly available + * in terms of known divs. This involves calling isl_basic_set_gauss, + * which is also needed because affine_hull assumes its input has been gaussed, + * while isl_map_affine_hull may be called on input that has not been gaussed, * in particular from initial_facet_constraint. * Similarly, align_divs may reorder some divs so that we need to * gauss the result again. diff --git a/isl_morph.c b/isl_morph.c index 04991e17..3700874a 100644 --- a/isl_morph.c +++ b/isl_morph.c @@ -554,6 +554,7 @@ __isl_give isl_morph *isl_basic_set_parameter_compression( * * exists alpha in Z^m: B x = d alpha * + * This function is similar to add_strides in isl_affine_hull.c */ static __isl_give isl_basic_set *add_strides(__isl_take isl_basic_set *bset, __isl_keep isl_morph *morph) diff --git a/isl_test.c b/isl_test.c index 7986d937..ae43aed2 100644 --- a/isl_test.c +++ b/isl_test.c @@ -719,8 +719,9 @@ int test_affine_hull(struct isl_ctx *ctx) { const char *str; isl_set *set; - isl_basic_set *bset; + isl_basic_set *bset, *bset2; int n; + int subset; test_affine_hull_case(ctx, "affine2"); test_affine_hull_case(ctx, "affine"); @@ -750,6 +751,21 @@ int test_affine_hull(struct isl_ctx *ctx) if (!bset) return -1; + str = "{ [a] : exists e0, e1, e2: 32e1 = 31 + 31a + 31e0 and " + "32e2 = 31 + 31e0 }"; + set = isl_set_read_from_str(ctx, str); + bset = isl_set_affine_hull(set); + str = "{ [a] : exists e : a = 32 e }"; + bset2 = isl_basic_set_read_from_str(ctx, str); + subset = isl_basic_set_is_subset(bset, bset2); + isl_basic_set_free(bset); + isl_basic_set_free(bset2); + if (subset < 0) + return -1; + if (!subset) + isl_die(ctx, isl_error_unknown, "not as accurate as expected", + return -1); + return 0; } diff --git a/test_inputs/codegen/stride6.c b/test_inputs/codegen/stride6.c new file mode 100644 index 00000000..a7f60821 --- /dev/null +++ b/test_inputs/codegen/stride6.c @@ -0,0 +1,4 @@ +for (int c1 = -1024; c1 <= 0; c1 += 32) + for (int c2 = max(-((niter - c1) % 32) + niter - c1 - 32, -((niter - 1) % 32) + niter - 1); c2 <= min(niter + 1022, niter - c1 - 1); c2 += 32) + for (int c5 = max(max(niter - c1 - c2 - 32, -c1 - 1023), 0); c5 <= min(min(-c1, niter - c1 - c2 - 1), 31); c5 += 1) + S_4(niter - 1, -c1 - c5); diff --git a/test_inputs/codegen/stride6.in b/test_inputs/codegen/stride6.in new file mode 100644 index 00000000..86ca54a9 --- /dev/null +++ b/test_inputs/codegen/stride6.in @@ -0,0 +1,3 @@ +[niter] -> { S_4[-1 + niter, i] -> [o0, o1, o2, o3, o4, o5, o6, o7, 4] : exists (e0 = [(o0)/32], e1 = [(o1)/32], e2 = [(o2)/32], e3 = [(o3)/32], e4 = [(-31i + o5)/32], e5 = [(-i - o4 + o6)/32], e6 = [(-o4 + o7)/32], e7 = [(-1 + niter - o4)/32]: 32e0 = o0 and 32e1 = o1 and 32e2 = o2 and 32e3 = o3 and 32e4 = -31i + o5 and 32e5 = -i - o4 + o6 and 32e6 = -o4 + o7 and 32e7 = -1 + niter - o4 and o0 <= -1 + niter and o0 >= -32 + niter and o1 <= -i and o1 >= -31 - i and o2 <= -1 + niter + i and o2 >= -32 + niter + i and o3 <= 1023 + niter and o3 >= 992 + niter and o4 >= 0 and o4 <= 31 and o5 >= 0 and o5 <= 31 and o6 >= 0 and o6 <= 31 and o7 >= 0 and o7 <= 31 and i <= 1023 and i >= 0 and niter >= 1) } +[niter] -> { : niter <= 8192 and niter >= 1 } +[niter] -> { } -- 2.11.4.GIT