From f6467da11a2738fcc282b9aec4cf6c38dcc3f8f9 Mon Sep 17 00:00:00 2001
From: Sven Verdoolaege <skimo@kotnet.org>
Date: Fri, 26 Sep 2014 08:24:29 +0200
Subject: [PATCH] gpu_array_ref_group: replace last_shared by depth

While last_shared was the index of the last dimension that affects
the tile, depth is the number of dimensions that affect the tile.
That is, the depth is one greater.
The depth is easier to work with, especially when we switch
to schedule trees as it reflects the depth where the copying
should be performed.

Note that it may be tempting to define the depth of a tile
rather than the depth of a group, but the only place where
the shared tile is used when a private tile is defined as well
for the same group is group_common_shared_memory_tile and
in that case we need the depth of the effective tile
(i.e., the private tile).

Signed-off-by: Sven Verdoolaege <skimo@kotnet.org>
---
 gpu.c       | 16 +++++++--------
 gpu_group.c | 67 +++++++++++++++++++++++++++++--------------------------------
 gpu_group.h |  9 ++++++---
 3 files changed, 46 insertions(+), 46 deletions(-)

diff --git a/gpu.c b/gpu.c
index 3f8f373..db089e3 100644
--- a/gpu.c
+++ b/gpu.c
@@ -3197,12 +3197,11 @@ static __isl_give isl_union_map *remove_local_accesses(
 
 /* Given an access relation "access" from "group", remove those reads
  * if ("read" is 1) or writes (if "read" is 0) that are only needed to
- * communicate data within the same iteration of the last_shared dimension
- * of the group.
+ * communicate data within the same iteration of the schedule at the
+ * position where the copying of the group is inserted.
  *
- * We extract a schedule that picks out the iteration of the last_shared
- * dimension of the group (and outer dimensions) and
- * call remove_local_accesses.
+ * We extract a schedule that picks out the iterations of the outer
+ * group->depth dimensions and call remove_local_accesses.
  */
 static __isl_give isl_union_map *remove_local_accesses_group(
 	struct gpu_gen *gen, struct gpu_array_ref_group *group,
@@ -3218,7 +3217,7 @@ static __isl_give isl_union_map *remove_local_accesses_group(
 	sched = isl_union_map_copy(gen->sched);
 
 	space = isl_union_map_get_space(sched);
-	proj = projection(space, gen->untiled_len, group->last_shared + 1);
+	proj = projection(space, gen->untiled_len, group->depth);
 	sched = isl_union_map_apply_range(sched, isl_union_map_from_map(proj));
 
 	return remove_local_accesses(gen->prog, group, access, sched, read);
@@ -3250,7 +3249,8 @@ static __isl_give isl_union_map *remove_local_accesses_group(
  *
  * and remove from this access relation those reads or writes
  * that only needed to communicate data within the same iteration
- * of the last_shared dimension of the group.
+ * of the outer part of the schedule where the copying for the group
+ * is inserted.
  * We then combine what is left with shared_sched into
  *
  *	D -> [S -> A]
@@ -3319,7 +3319,7 @@ static __isl_give isl_union_map *add_group_schedule(struct gpu_gen *gen,
 	map = isl_map_domain_map(isl_map_universe(space));
 
 	space = isl_union_map_get_space(schedule);
-	pos = group->last_shared + 1 - gen->tile_first;
+	pos = group->depth - gen->tile_first;
 	assert(pos >= 0);
 	if (read)
 		val = -2 - k;
diff --git a/gpu_group.c b/gpu_group.c
index e27f3e5..58c87f6 100644
--- a/gpu_group.c
+++ b/gpu_group.c
@@ -489,12 +489,12 @@ static int access_is_bijective(struct gpu_gen *gen, __isl_keep isl_map *access)
 	return res;
 }
 
-/* Look for the last shared tile loop that affects the offset of "tile"
- * and return the result.
- * If there is no such loop, then return the index of the loop
- * before the first shared tile loop, in particular gen->tile_first - 1.
+/* Compute the number of outer schedule tile dimensions that affect
+ * the offset of "tile".
+ * If there is no such dimension, then return the index
+ * of the first shared tile loop, i.e., gen->tile_first.
  */
-static int compute_tile_last_shared(struct gpu_gen *gen,
+static int compute_tile_depth(struct gpu_gen *gen,
 	struct gpu_array_tile *tile)
 {
 	int i, j;
@@ -518,28 +518,27 @@ static int compute_tile_last_shared(struct gpu_gen *gen,
 			break;
 	}
 
-	return j;
+	return ++j;
 }
 
-/* Look for the last shared tile loop that affects the offset of the
- * shared or private tile and store the result in group->last_shared.
- * If there is no such loop, then group->last_shared is set to a value
- * before the first shared tile loop, in particular gen->tile_first - 1.
+/* Determine the number of schedule dimensions that affect the offset of the
+ * shared or private tile and store the result in group->depth, with
+ * a lower bound of gen->tile_first.
  * If there is no tile defined on the array reference group,
- * then set group->last_shared to gen->shared_len - 1.
+ * then set group->depth to gen->shared_len.
  */
-static void set_last_shared(struct gpu_gen *gen,
+static void set_depth(struct gpu_gen *gen,
 	struct gpu_array_ref_group *group)
 {
 	struct gpu_array_tile *tile;
 
-	group->last_shared = gen->shared_len - 1;
+	group->depth = gen->shared_len;
 
 	tile = gpu_array_ref_group_tile(group);
 	if (!tile)
 		return;
 
-	group->last_shared = compute_tile_last_shared(gen, tile);
+	group->depth = compute_tile_depth(gen, tile);
 }
 
 /* Fill up the groups array with singleton groups, i.e., one group
@@ -909,7 +908,7 @@ static int compute_group_bounds_core(struct gpu_gen *gen,
 }
 
 /* Compute the private and/or shared memory tiles for the array
- * reference group "group" of array "array" and set last_shared.
+ * reference group "group" of array "array" and set the tile depth.
  * Return 0 on success and -1 on error.
  */
 static int compute_group_bounds(struct gpu_gen *gen,
@@ -919,7 +918,7 @@ static int compute_group_bounds(struct gpu_gen *gen,
 		return -1;
 	if (compute_group_bounds_core(gen, group) < 0)
 		return -1;
-	set_last_shared(gen, group);
+	set_depth(gen, group);
 
 	return 0;
 }
@@ -978,26 +977,24 @@ static int group_overlapping_writes(struct gpu_gen *gen,
 }
 
 /* Check if the access relations of group1 and group2 overlap within
- * the outermost min(group1->last_shared, group2->last_shared) loops.
+ * the outermost min(group1->depth, group2->depth) loops.
  */
-static int last_shared_accesses_overlap(struct gpu_array_ref_group *group1,
+static int depth_accesses_overlap(struct gpu_array_ref_group *group1,
 	struct gpu_array_ref_group *group2)
 {
-	int last_shared;
+	int depth;
 	int dim;
 	int empty;
 	isl_map *map_i, *map_j, *map;
 
-	last_shared = group1->last_shared;
-	if (group2->last_shared < last_shared)
-		last_shared = group2->last_shared;
+	depth = group1->depth;
+	if (group2->depth < depth)
+		depth = group2->depth;
 	map_i = isl_map_copy(group1->access);
 	dim = isl_map_dim(map_i, isl_dim_in);
-	map_i = isl_map_eliminate(map_i, isl_dim_in,
-				last_shared + 1, dim - (last_shared + 1));
+	map_i = isl_map_eliminate(map_i, isl_dim_in, depth, dim - depth);
 	map_j = isl_map_copy(group2->access);
-	map_j = isl_map_eliminate(map_j, isl_dim_in,
-				last_shared + 1, dim - (last_shared + 1));
+	map_j = isl_map_eliminate(map_j, isl_dim_in, depth, dim - depth);
 	map = isl_map_intersect(map_i, map_j);
 	empty = isl_map_is_empty(map);
 	isl_map_free(map);
@@ -1006,15 +1003,15 @@ static int last_shared_accesses_overlap(struct gpu_array_ref_group *group1,
 }
 
 /* If two groups have overlapping access relations (within the outer
- * last_shared loops) and if one of them involves a write,
+ * depth loops) and if one of them involves a write,
  * then merge the two groups into one.
  *
  * Return the updated number of groups.
  */
-static int group_last_shared_overlapping_writes(struct gpu_gen *gen, int n,
+static int group_depth_overlapping_writes(struct gpu_gen *gen, int n,
 	struct gpu_array_ref_group **groups)
 {
-	return group_writes(gen, n, groups, &last_shared_accesses_overlap, 1);
+	return group_writes(gen, n, groups, &depth_accesses_overlap, 1);
 }
 
 /* Is the size of the tile specified by "tile" smaller than the sum of
@@ -1045,7 +1042,7 @@ static int smaller_tile(struct gpu_array_tile *tile,
  * a shared memory tile and the size of the tile for the merge group
  * is smaller than the sum of the tile sizes of the individual groups.
  *
- * If merging two groups decreases the "last_shared" dimension of
+ * If merging two groups decreases the depth of the tile of
  * one or both of the two groups, then we need to check for overlapping
  * writes again.
  *
@@ -1092,8 +1089,8 @@ static int group_common_shared_memory_tile(struct gpu_gen *gen,
 				continue;
 			}
 
-			if (group->last_shared < groups[i]->last_shared ||
-			    group->last_shared < groups[j]->last_shared)
+			if (group->depth < groups[i]->depth ||
+			    group->depth < groups[j]->depth)
 				recompute_overlap = 1;
 			gpu_array_ref_group_free(groups[i]);
 			gpu_array_ref_group_free(groups[j]);
@@ -1105,7 +1102,7 @@ static int group_common_shared_memory_tile(struct gpu_gen *gen,
 	}
 
 	if (recompute_overlap)
-		n = group_last_shared_overlapping_writes(gen, n, groups);
+		n = group_depth_overlapping_writes(gen, n, groups);
 	return n;
 }
 
@@ -1134,7 +1131,7 @@ static void set_array_groups(struct gpu_local_array_info *array,
  * We first perform an initial grouping based only on the access relation.
  * After computing shared and private memory tiles, we check for
  * overlapping writes again, but this time taking into account
- * the "last_shared" property.
+ * the depth of the effective tile.
  *
  * Furthermore, if two groups admit a shared memory tile and if the
  * combination of the two also admits a shared memory tile, we merge
@@ -1168,7 +1165,7 @@ static int group_array_references(struct gpu_gen *gen,
 		if (compute_group_bounds(gen, groups[i]) < 0)
 			n = -1;
 
-	n = group_last_shared_overlapping_writes(gen, n, groups);
+	n = group_depth_overlapping_writes(gen, n, groups);
 
 	n = group_common_shared_memory_tile(gen, local->array, n, groups);
 
diff --git a/gpu_group.h b/gpu_group.h
index e50be51..0e85334 100644
--- a/gpu_group.h
+++ b/gpu_group.h
@@ -9,6 +9,10 @@
  * Otherwise, it is accessed from global memory.
  * Note that if both private_tile and shared_tile are set, then shared_tile
  * is only used inside group_common_shared_memory_tile.
+ * "depth" reflects the number of schedule dimensions that affect the tile
+ * (private_tile if set; shared_tile if shared_tile is set and private_tile
+ * is not).  The copying into and/or out of the tile is performed at that
+ * depth.
  */
 struct gpu_array_ref_group {
 	/* The references in this group access this local array. */
@@ -38,12 +42,11 @@ struct gpu_array_ref_group {
 	/* The private memory tile, NULL if none. */
 	struct gpu_array_tile *private_tile;
 
+	int depth;
+
 	/* References in this group; point to elements of a linked list. */
 	int n_ref;
 	struct gpu_stmt_access **refs;
-
-	/* Last shared memory tile dimension that affects tile of this group. */
-	int last_shared;
 };
 
 int gpu_group_references(struct gpu_gen *gen);
-- 
2.11.4.GIT