From 6269d1ae3d24d81b0dc789db4cf198983351ba60 Mon Sep 17 00:00:00 2001
From: Sven Verdoolaege <skimo@kotnet.org>
Date: Wed, 14 Apr 2010 12:25:19 +0200
Subject: [PATCH] isl_map_transitive_closure: use Floyd-Warshall on disjoint
 domains and ranges

---
 doc/implementation.tex   | 156 ++++++++++++++++++++++++++++++++
 doc/isl.bib              |  15 ++++
 doc/manual.tex           |   3 +
 isl_test.c               |   7 +-
 isl_transitive_closure.c | 226 ++++++++++++++++++++++++++++++++++++++++++++++-
 5 files changed, 404 insertions(+), 3 deletions(-)

diff --git a/doc/implementation.tex b/doc/implementation.tex
index 95166ef9..71414941 100644
--- a/doc/implementation.tex
+++ b/doc/implementation.tex
@@ -725,6 +725,162 @@ to $R$ directly, without a decomposition, would result in
 an overapproximation of the power.
 \end{example}
 
+\subsection{Partitioning the domains and ranges of $R$}
+
+The algorithm of \autoref{s:power} assumes that the input relation $R$
+can be treated as a union of translations.
+This is a reasonable assumption if $R$ maps elements of a given
+abstract domain to the same domain.
+However, if $R$ is a union of relations that map between different
+domains, then this assumption no longer holds.
+In particular, when an entire dependence graph is encoded
+in a single relation, as is done by, e.g.,
+\shortciteN[Section~6.1]{Barthou2000MSE}, then it does not make
+sense to look at differences between iterations of different domains.
+Now, arguably, a modified Floyd-Warshall algorithm should
+be applied to the dependence graph, as advocated by
+\shortciteN{Kelly1996closure}, with the transitive closure operation
+only being applied to relations from a given domain to itself.
+However, it is also possible to detect disjoint domains and ranges
+and to apply Floyd-Warshall internally.
+
+\linesnumbered
+\begin{algorithm}
+\caption{The modified Floyd-Warshall algorithm of
+\protect\shortciteN{Kelly1996closure}}
+\label{a:Floyd}
+\SetKwInput{Input}{Input}
+\SetKwInput{Output}{Output}
+\Input{Relations $R_{pq}$, $0 \le p, q < n$}
+\Output{Updated relations $R_{pq}$ such that each relation
+$R_{pq}$ contains all indirect paths from $p$ to $q$ in the input graph}
+%
+\BlankLine
+\SetVline
+\dontprintsemicolon
+%
+\For{$r \in [0, n-1]$}{
+    $R_{rr} \coloneqq R_{rr}^+$ \nllabel{l:Floyd:closure}\;
+    \For{$p \in [0, n-1]$}{
+	\For{$q \in [0, n-1]$}{
+	    \If{$p \ne r$ or $q \ne r$}{
+		$R_{pq} \coloneqq R_{pq} \cup \left(R_{rq} \circ R_{pr}\right)
+			     \cup \left(R_{rq} \circ R_{rr} \circ R_{pr}\right)$
+	     \nllabel{l:Floyd:update}
+	     }
+	}
+    }
+}
+\end{algorithm}
+
+Let the input relation $R$ be a union of $m$ basic relations $R_i$.
+Let $D_{2i}$ be the domains of $R_i$ and $D_{2i+1}$ the ranges of $R_i$.
+The first step is to group overlapping $D_j$ until a partition is
+obtained.  If the resulting partition consists of a single part,
+then we continue with the algorithm of \autoref{s:power}.
+Otherwise, we apply Floyd-Warshall on the graph with as vertices
+the parts of the partition and as edges the $R_i$ attached to
+the appropriate pairs of vertices.
+In particular, let there be $n$ parts $P_k$ in the partition.
+We construct $n^2$ relations
+$$
+R_{pq} \coloneqq \bigcup_{i \text{ s.t. } \domain R_i \subseteq P_p \wedge
+				 \range R_i \subseteq P_q} R_i
+,
+$$
+apply \autoref{a:Floyd} and return the union of all resulting
+$R_{pq}$ as the transitive closure of $R$.
+Each iteration of the $r$-loop in \autoref{a:Floyd} updates
+all relations $R_{pq}$ to include paths that go from $p$ to $r$,
+possibly stay there for a while, and then go from $r$ to $q$.
+Note that paths that ``stay in $r$'' include all paths that
+pass through earlier vertices since $R_{rr}$ itself has been updated
+accordingly in previous iterations of the outer loop.
+In principle, it would be sufficient to use the $R_{pr}$
+and $R_{rq}$ computed in the previous iteration of the
+$r$-loop in Line~\ref{l:Floyd:update}.
+However, from an implementation perspective, it is easier
+to allow either or both of these to have been updated
+in the same iteration of the $r$-loop.
+This may result in duplicate paths, but these can usually
+be removed by coalescing (\autoref{s:coalescing}) the result of the union
+in Line~\ref{l:Floyd:update}, which should be done in any case.
+The transitive closure in Line~\ref{l:Floyd:closure}
+is performed using a recursive call.  This recursive call
+includes the partitioning step, but the resulting partition will
+usually be a singleton.
+The result of the recursive call will either be exact or an
+overapproximation.  The final result of Floyd-Warshall is therefore
+also exact or an overapproximation.
+
+\begin{figure}
+\begin{center}
+\begin{tikzpicture}[x=1cm,y=1cm,>=stealth,shorten >=3pt]
+\foreach \x/\y in {0/0,1/1,3/2} {
+    \fill (\x,\y) circle (2pt);
+}
+\foreach \x/\y in {0/1,2/2,3/3} {
+    \draw (\x,\y) circle (2pt);
+}
+\draw[->] (0,0) -- (0,1);
+\draw[->] (0,1) -- (1,1);
+\draw[->] (2,2) -- (3,2);
+\draw[->] (3,2) -- (3,3);
+\draw[->,dashed] (2,2) -- (3,3);
+\draw[->,dotted] (0,0) -- (1,1);
+\end{tikzpicture}
+\end{center}
+\caption{The relation (solid arrows) on the right of Figure~1 of
+\protect\shortciteN{Beletska2009} and its transitive closure}
+\label{f:COCOA:1}
+\end{figure}
+\begin{example}
+Consider the relation on the right of Figure~1 of
+\shortciteN{Beletska2009},
+reproduced in \autoref{f:COCOA:1}.
+This relation can be described as
+$$
+\begin{aligned}
+\{\, (x, y) \to (x_2, y_2) \mid {} & (3y = 2x \wedge x_2 = x \wedge 3y_2 = 3 + 2x \wedge x \ge 0 \wedge x \le 3) \vee {} \\
+& (x_2 = 1 + x \wedge y_2 = y \wedge x \ge 0 \wedge 3y \ge 2 + 2x \wedge x \le 2 \wedge 3y \le 3 + 2x) \,\}
+.
+\end{aligned}
+$$
+Note that the domain of the upward relation overlaps with the range
+of the rightward relation and vice versa, but that the domain
+of neither relation overlaps with its own range or the domain of
+the other relation.
+The domains and ranges can therefore be partitioned into two parts,
+$P_0$ and $P_1$, shown as the white and black dots in \autoref{f:COCOA:1},
+respectively.
+Initially, we have
+$$
+\begin{aligned}
+R_{00} & = \emptyset
+\\
+R_{01} & = 
+\{\, (x, y) \to (x+1, y) \mid 
+(x \ge 0 \wedge 3y \ge 2 + 2x \wedge x \le 2 \wedge 3y \le 3 + 2x) \,\}
+\\
+R_{10} & =
+\{\, (x, y) \to (x_2, y_2) \mid (3y = 2x \wedge x_2 = x \wedge 3y_2 = 3 + 2x \wedge x \ge 0 \wedge x \le 3) \,\}
+\\
+R_{11} & = \emptyset
+.
+\end{aligned}
+$$
+In the first iteration, $R_{00}$ remains the same ($\emptyset^+ = \emptyset$).
+$R_{01}$ and $R_{10}$ are therefore also unaffected, but
+$R_{11}$ is updated to include $R_{01} \circ R_{10}$, i.e.,
+the dashed arrow in the figure.
+This new $R_{11}$ is obviously transitively closed, so it is not
+changed in the second iteration and it does not have an effect
+on $R_{01}$ and $R_{10}$.  However, $R_{00}$ is updated to
+include $R_{10} \circ R_{01}$, i.e., the dotted arrow in the figure.
+The transitive closure of the original relation is then equal to
+$R_{00} \cup R_{01} \cup R_{10} \cup R_{11}$.
+\end{example}
+
 \subsection{An {\tt Omega}-like implementation}
 
 While the main algorithm of \shortciteN{Kelly1996closure} is
diff --git a/doc/isl.bib b/doc/isl.bib
index 50836c76..425eeed2 100644
--- a/doc/isl.bib
+++ b/doc/isl.bib
@@ -77,3 +77,18 @@
   year = "2009",
   url = "https://lirias.kuleuven.be/handle/123456789/228373",
 }
+
+@article{Barthou2000MSE,
+  author = {Barthou, Denis and Cohen, Albert and Collard, Jean-Fran\c{c}ois},
+  title = {Maximal Static Expansion},
+  journal = {Int. J. Parallel Program.},
+  volume = {28},
+  number = {3},
+  year = {2000},
+  issn = {0885-7458},
+  pages = {213--243},
+  doi = {10.1023/A:1007500431910},
+  publisher = {Kluwer Academic Publishers},
+  address = {Norwell, MA, USA},
+}
+
diff --git a/doc/manual.tex b/doc/manual.tex
index e9c594f1..b80f2eaf 100644
--- a/doc/manual.tex
+++ b/doc/manual.tex
@@ -7,6 +7,7 @@
 \usepackage{aliascnt}
 \usepackage{tikz}
 \usepackage{calc}
+\usepackage[ruled]{algorithm2e}
 
 \def\vec#1{\mathchoice{\mbox{\boldmath$\displaystyle\bf#1$}}
 {\mbox{\boldmath$\textstyle\bf#1$}}
@@ -24,7 +25,9 @@
 \numberwithin{def}{section}
 \numberwithin{example}{section}
 
+\newcommand{\algocflineautorefname}{Algorithm}
 \newcommand{\exampleautorefname}{Example}
+\newcommand{\lstnumberautorefname}{Line}
 \renewcommand{\subsectionautorefname}{Section}
 
 \def\Z{\mathbb{Z}}
diff --git a/isl_test.c b/isl_test.c
index 187eacc2..044b9251 100644
--- a/isl_test.c
+++ b/isl_test.c
@@ -862,7 +862,12 @@ void test_closure(struct isl_ctx *ctx)
 	up = isl_map_intersect_range(up, dom);
 	map = isl_map_union(up, right);
 	map = isl_map_transitive_closure(map, &exact);
-	assert(!exact);
+	assert(exact);
+	map2 = isl_map_read_from_str(ctx,
+		"{ [0,0] -> [0,1]; [0,0] -> [1,1]; [0,1] -> [1,1]; "
+		"  [2,2] -> [3,2]; [2,2] -> [3,3]; [3,2] -> [3,3] }", -1);
+	assert(isl_map_is_equal(map, map2));
+	isl_map_free(map2);
 	isl_map_free(map);
 
 	/* COCOA Theorem 1 counter example */
diff --git a/isl_transitive_closure.c b/isl_transitive_closure.c
index 66dbaa01..e332458b 100644
--- a/isl_transitive_closure.c
+++ b/isl_transitive_closure.c
@@ -815,6 +815,228 @@ static __isl_give isl_map *construct_projected_component(
 	return app;
 }
 
+/* Given an array of sets "set", add "dom" at position "pos"
+ * and search for elements at earlier positions that overlap with "dom".
+ * If any can be found, then merge all of them, together with "dom", into
+ * a single set and assign the union to the first in the array,
+ * which becomes the new group leader for all groups involved in the merge.
+ * During the search, we only consider group leaders, i.e., those with
+ * group[i] = i, as the other sets have already been combined
+ * with one of the group leaders.
+ */
+static int merge(isl_set **set, int *group, __isl_take isl_set *dom, int pos)
+{
+	int i;
+
+	group[pos] = pos;
+	set[pos] = isl_set_copy(dom);
+
+	for (i = pos - 1; i >= 0; --i) {
+		int o;
+
+		if (group[i] != i)
+			continue;
+
+		o = isl_set_overlaps(set[i], dom);
+		if (o < 0)
+			goto error;
+		if (!o)
+			continue;
+
+		set[i] = isl_set_union(set[i], set[group[pos]]);
+		if (!set[i])
+			goto error;
+		set[group[pos]] = NULL;
+		group[group[pos]] = i;
+	}
+
+	isl_set_free(dom);
+	return 0;
+error:
+	isl_set_free(dom);
+	return -1;
+}
+
+/* Given a partition of the domains and ranges of the basic maps in "map",
+ * apply the Floyd-Warshall algorithm with the elements in the partition
+ * as vertices.
+ *
+ * In particular, there are "n" elements in the partition and "group" is
+ * an array of length 2 * map->n with entries in [0,n-1].
+ *
+ * We first construct a matrix of relations based on the partition information,
+ * apply Floyd-Warshall on this matrix of relations and then take the
+ * union of all entries in the matrix as the final result.
+ *
+ * The algorithm iterates over all vertices.  In each step, the whole
+ * matrix is updated to include all paths that go to the current vertex,
+ * possibly stay there a while (including passing through earlier vertices)
+ * and then come back.  At the start of each iteration, the diagonal
+ * element corresponding to the current vertex is replaced by its
+ * transitive closure to account for all indirect paths that stay
+ * in the current vertex.
+ */
+static __isl_give isl_map *floyd_warshall_with_groups(__isl_take isl_dim *dim,
+	__isl_keep isl_map *map, int *exact, int project, int *group, int n)
+{
+	int i, j, k;
+	int r, p, q;
+	isl_map ***grid = NULL;
+	isl_map *app;
+
+	if (!map)
+		goto error;
+
+	if (n == 1) {
+		free(group);
+		return construct_projected_component(dim, map, exact, project);
+	}
+
+	grid = isl_calloc_array(map->ctx, isl_map **, n);
+	if (!grid)
+		goto error;
+	for (i = 0; i < n; ++i) {
+		grid[i] = isl_calloc_array(map->ctx, isl_map *, n);
+		if (!grid[i])
+			goto error;
+		for (j = 0; j < n; ++j)
+			grid[i][j] = isl_map_empty(isl_map_get_dim(map));
+	}
+
+	for (k = 0; k < map->n; ++k) {
+		i = group[2 * k];
+		j = group[2 * k + 1];
+		grid[i][j] = isl_map_union(grid[i][j],
+				isl_map_from_basic_map(
+					isl_basic_map_copy(map->p[k])));
+	}
+
+	for (r = 0; r < n; ++r) {
+		int r_exact;
+		grid[r][r] = isl_map_transitive_closure(grid[r][r],
+				(exact && *exact) ? &r_exact : NULL);
+		if (exact && *exact && !r_exact)
+			*exact = 0;
+
+		for (p = 0; p < n; ++p)
+			for (q = 0; q < n; ++q) {
+				isl_map *loop;
+				if (p == r && q == r)
+					continue;
+				loop = isl_map_apply_range(
+						isl_map_copy(grid[p][r]),
+						isl_map_copy(grid[r][q]));
+				grid[p][q] = isl_map_union(grid[p][q], loop);
+				loop = isl_map_apply_range(
+						isl_map_copy(grid[p][r]),
+					isl_map_apply_range(
+						isl_map_copy(grid[r][r]),
+						isl_map_copy(grid[r][q])));
+				grid[p][q] = isl_map_union(grid[p][q], loop);
+				grid[p][q] = isl_map_coalesce(grid[p][q]);
+			}
+	}
+
+	app = isl_map_empty(isl_map_get_dim(map));
+
+	for (i = 0; i < n; ++i) {
+		for (j = 0; j < n; ++j)
+			app = isl_map_union(app, grid[i][j]);
+		free(grid[i]);
+	}
+	free(grid);
+
+	free(group);
+	isl_dim_free(dim);
+
+	return app;
+error:
+	if (grid)
+		for (i = 0; i < n; ++i) {
+			if (!grid[i])
+				continue;
+			for (j = 0; j < n; ++j)
+				isl_map_free(grid[i][j]);
+			free(grid[i]);
+		}
+	free(grid);
+	free(group);
+	isl_dim_free(dim);
+	return NULL;
+}
+
+/* Check if the domains and ranges of the basic maps in "map" can
+ * be partitioned, and if so, apply Floyd-Warshall on the elements
+ * of the partition.  Note that we can only apply this algorithm
+ * if we want to compute the transitive closure, i.e., when "project"
+ * is set.  If we want to compute the power, we need to keep track
+ * of the lengths and the recursive calls inside the Floyd-Warshall
+ * would result in non-linear lengths.
+ *
+ * To find the partition, we simply consider all of the domains
+ * and ranges in turn and combine those that overlap.
+ * "set" contains the partition elements and "group" indicates
+ * to which partition element a given domain or range belongs.
+ * The domain of basic map i corresponds to element 2 * i in these arrays,
+ * while the domain corresponds to element 2 * i + 1.
+ * During the construction group[k] is either equal to k,
+ * in which case set[k] contains the union of all the domains and
+ * ranges in the corresponding group, or is equal to some l < k,
+ * with l another domain or range in the same group.
+ */
+static __isl_give isl_map *floyd_warshall(__isl_take isl_dim *dim,
+	__isl_keep isl_map *map, int *exact, int project)
+{
+	int i;
+	isl_set **set = NULL;
+	int *group = NULL;
+	int n;
+
+	if (!map)
+		goto error;
+	if (!project || map->n <= 1)
+		return construct_projected_component(dim, map, exact, project);
+
+	set = isl_calloc_array(map->ctx, isl_set *, 2 * map->n);
+	group = isl_alloc_array(map->ctx, int, 2 * map->n);
+
+	if (!set || !group)
+		goto error;
+
+	for (i = 0; i < map->n; ++i) {
+		isl_set *dom;
+		dom = isl_set_from_basic_set(isl_basic_map_domain(
+				isl_basic_map_copy(map->p[i])));
+		if (merge(set, group, dom, 2 * i) < 0)
+			goto error;
+		dom = isl_set_from_basic_set(isl_basic_map_range(
+				isl_basic_map_copy(map->p[i])));
+		if (merge(set, group, dom, 2 * i + 1) < 0)
+			goto error;
+	}
+
+	n = 0;
+	for (i = 0; i < 2 * map->n; ++i)
+		if (group[i] == i)
+			group[i] = n++;
+		else
+			group[i] = group[group[i]];
+
+	for (i = 0; i < 2 * map->n; ++i)
+		isl_set_free(set[i]);
+
+	free(set);
+
+	return floyd_warshall_with_groups(dim, map, exact, project, group, n);
+error:
+	for (i = 0; i < 2 * map->n; ++i)
+		isl_set_free(set[i]);
+	free(set);
+	free(group);
+	isl_dim_free(dim);
+	return NULL;
+}
+
 /* Structure for representing the nodes in the graph being traversed
  * using Tarjan's algorithm.
  * index represents the order in which nodes are visited.
@@ -1033,7 +1255,7 @@ static __isl_give isl_map *construct_power_components(__isl_take isl_dim *dim,
 	if (!map)
 		goto error;
 	if (map->n <= 1)
-		return construct_projected_component(dim, map, exact, project);
+		return floyd_warshall(dim, map, exact, project);
 
 	s = basic_map_sort_alloc(map->ctx, map->n);
 	if (!s)
@@ -1061,7 +1283,7 @@ static __isl_give isl_map *construct_power_components(__isl_take isl_dim *dim,
 			--n;
 			++i;
 		}
-		path_comp = construct_projected_component(isl_dim_copy(dim),
+		path_comp = floyd_warshall(isl_dim_copy(dim),
 						comp, exact, project);
 		path_comb = isl_map_apply_range(isl_map_copy(path),
 						isl_map_copy(path_comp));
-- 
2.11.4.GIT