From cb1da73e54a95833c9f73ce8074a737e4d7a2390 Mon Sep 17 00:00:00 2001 From: Sven Verdoolaege Date: Wed, 3 Jul 2013 12:58:57 +0200 Subject: [PATCH] isl_ast_codegen.c: generate_component: handle non-obviously fixed values When all domains have an obviously fixed value, we skip the check for opportunties to expose strides by shifting. However, the domains may still have non-obviously fixed values, in which case the gcd of the strides is zero, which breaks generate_shift_component. Skip the shifting in this case as well. Signed-off-by: Sven Verdoolaege --- isl_ast_codegen.c | 4 +++- test_inputs/codegen/shift2.c | 53 +++++++++++++++++++++++++++++++++++++++++++ test_inputs/codegen/shift2.in | 5 ++++ 3 files changed, 61 insertions(+), 1 deletion(-) create mode 100644 test_inputs/codegen/shift2.c create mode 100644 test_inputs/codegen/shift2.in diff --git a/isl_ast_codegen.c b/isl_ast_codegen.c index dfd634e4..de933916 100644 --- a/isl_ast_codegen.c +++ b/isl_ast_codegen.c @@ -3191,6 +3191,8 @@ static __isl_give isl_ast_graft_list *generate_shift_component( * obviously fixed value for the current dimension to itself and all * other domains and collect the offsets and the gcd of the strides. * If the gcd becomes one, then we failed to find shifted strides. + * If the gcd is zero, then the differences were all fixed, meaning + * that some domains had non-obviously fixed values for the current dimension. * If all the offsets are the same (for those domains that do not have * an obviously fixed value for the current dimension), then we do not * apply the transformation. @@ -3284,7 +3286,7 @@ static __isl_give isl_ast_graft_list *generate_component( if (res < 0 || !gcd) { isl_ast_build_free(build); list = NULL; - } else if (i < n || fixed) { + } else if (i < n || fixed || isl_val_is_zero(gcd)) { list = generate_shifted_component_from_list(domain, order, n, build); } else { diff --git a/test_inputs/codegen/shift2.c b/test_inputs/codegen/shift2.c new file mode 100644 index 00000000..63e2a4d5 --- /dev/null +++ b/test_inputs/codegen/shift2.c @@ -0,0 +1,53 @@ +for (int c0 = 0; c0 <= 1; c0 += 1) { + for (int c1 = 0; c1 < length - 1; c1 += 32) { + for (int c2 = c1; c2 < length; c2 += 32) { + if (c1 == 0) + for (int c3 = 0; c3 <= min(length, 2 * c2 - 32); c3 += 32) + for (int c5 = 0; c5 <= min(31, length - c2 - 1); c5 += 1) + for (int c6 = max(-c3 + 1, 0); c6 <= min(31, length - c3); c6 += 1) + S_0(c0, c2 + c5, c3 + c6 - 1); + for (int c3 = max(2 * c1, 2 * c2); c3 <= min(2 * c2 + 62, 2 * length - 2); c3 += 32) + for (int c4 = 0; c4 <= min(min(length - c1 - 2, -c1 + c3 / 2 + 14), 31); c4 += 1) { + if (c4 == 0 && c3 == 0 && c2 == 0 && c1 == 0) { + for (int c6 = 1; c6 <= min(31, length); c6 += 1) + S_0(c0, 0, c6 - 1); + } else if (c4 == 0 && c3 == 2 * c2 + 32 && c1 == 0) + for (int c5 = 0; c5 <= 15; c5 += 1) + for (int c6 = 0; c6 <= min(31, length - 2 * c2 - 32); c6 += 1) + S_0(c0, c2 + c5, 2 * c2 + c6 + 31); + for (int c5 = max((-2 * c2 + c3) / 2, c1 - c2 + c4 + 1); c5 <= min(-c2 + c3 / 2 + 15, length - c2 - 1); c5 += 1) { + if (c4 == 0 && c1 == 0) + for (int c6 = max(-c3 + 1, 0); c6 <= min(2 * c2 - c3 + 2 * c5 - 1, length - c3); c6 += 1) + S_0(c0, c2 + c5, c3 + c6 - 1); + S_3(c0, c1 + c4, c2 + c5); + if (c4 == 0 && c1 == 0 && length >= 2 * c2 + 2 * c5) + S_0(c0, c2 + c5, 2 * c2 + 2 * c5 - 1); + if (c4 == 0 && c1 == 0) + for (int c6 = 2 * c2 - c3 + 2 * c5 + 1; c6 <= min(length - c3, 31); c6 += 1) + S_0(c0, c2 + c5, c3 + c6 - 1); + } + if (c4 == 0 && c1 == 0 && c3 + 30 >= 2 * length) + S_4(c0); + if (c4 == 0 && c3 == 2 * c2 && c1 == 0) + for (int c5 = 16; c5 <= min(length - c2 - 1, 31); c5 += 1) + for (int c6 = max(0, -2 * c2 + 1); c6 <= min(31, length - 2 * c2); c6 += 1) + S_0(c0, c2 + c5, 2 * c2 + c6 - 1); + } + if (32 * floord(length - 16, 32) + 16 == length && c2 + 16 == length && c1 == 0) + S_4(c0); + if (c1 == 0) + for (int c3 = 2 * c2 + 64; c3 <= length; c3 += 32) + for (int c5 = 0; c5 <= 31; c5 += 1) + for (int c6 = 0; c6 <= min(31, length - c3); c6 += 1) + S_0(c0, c2 + c5, c3 + c6 - 1); + } + if (length % 32 == 0 && c1 == 0) + S_4(c0); + } + if (length <= 1) + for (int c5 = 0; c5 <= length; c5 += 1) + if (c5 == length) { + S_4(c0); + } else + S_0(c0, 0, 0); +} diff --git a/test_inputs/codegen/shift2.in b/test_inputs/codegen/shift2.in new file mode 100644 index 00000000..3c05fac3 --- /dev/null +++ b/test_inputs/codegen/shift2.in @@ -0,0 +1,5 @@ +# Check that the shifting code is not confused by domains that +# have a non-obviously fixed value. +[tsteps, length] -> { S_4[iter] -> [iter, 0, o2, o3, 0, o5, o6, 3] : exists (e0 = [(o2)/32], e1 = [(o3)/32], e2 = [(-length + o5)/32], e3 = [(-2length + o6)/32]: tsteps = 2 and 32e0 = o2 and 32e1 = o3 and 32e2 = -length + o5 and 32e3 = -2length + o6 and o2 <= length and o2 >= -31 + length and o3 <= 2length and o3 >= -30 + 2length and o5 >= 0 and o5 <= 31 and o6 >= 0 and o6 <= 30 and iter <= 1 and iter >= 0); S_3[iter, i, j] -> [iter, o1, o2, o3, o4, o5, o6, 2] : exists (e0 = [(o1)/32], e1 = [(o2)/32], e2 = [(o3)/32], e3 = [(-i + o4)/32], e4 = [(-j + o5)/32], e5 = [(-2j + o6)/32]: tsteps = 2 and 32e0 = o1 and 32e1 = o2 and 32e2 = o3 and 32e3 = -i + o4 and 32e4 = -j + o5 and 32e5 = -2j + o6 and o1 <= i and o1 >= -31 + i and o2 <= j and o2 >= -31 + j and o3 <= 2j and o3 >= -30 + 2j and o4 >= 0 and o4 <= 31 and o5 >= 0 and o5 <= 31 and o6 >= 0 and o6 <= 30 and j >= 1 + i and i >= 0 and iter <= 1 and iter >= 0 and j <= -1 + length); S_0[iter, i, j] -> [iter, 0, o2, o3, 0, o5, o6, 4] : exists (e0 = [(o2)/32], e1 = [(o3)/32], e2 = [(-i + o5)/32], e3 = [(-31 + j - o6)/32]: tsteps = 2 and 32e0 = o2 and 32e1 = o3 and 32e2 = -i + o5 and 32e3 = -31 + j - o6 and o2 <= i and o2 >= -31 + i and o3 <= 1 + j and o3 >= -30 + j and o5 >= 0 and o5 <= 31 and o6 >= 0 and o6 <= 31 and i <= -1 + length and i >= 0 and iter >= 0 and iter <= 1 and j <= -1 + length and j >= 0) } +[tsteps, length] -> { : length >= 0 and length <= 1024 and tsteps = 2 } +{ } -- 2.11.4.GIT