2 * lcs.c : routines for creating an lcs
4 * ====================================================================
5 * Copyright (c) 2000-2004 CollabNet. All rights reserved.
7 * This software is licensed as described in the file COPYING, which
8 * you should have received as part of this distribution. The terms
9 * are also available at http://subversion.tigris.org/license-1.html.
10 * If newer versions of this license are posted there, you may use a
11 * newer version instead, at your option.
13 * This software consists of voluntary contributions made by many
14 * individuals. For exact contribution history, see the revision
15 * history and logs, available at http://subversion.tigris.org/.
16 * ====================================================================
21 #include <apr_pools.h>
22 #include <apr_general.h>
24 #include <apr_general.h>
25 #include "svn_error.h"
26 #include "svn_version.h"
33 * Calculate the Longest Common Subsequence between two datasources.
34 * This function is what makes the diff code tick.
36 * The LCS algorithm implemented here is described by Sun Wu,
37 * Udi Manber and Gene Meyers in "An O(NP) Sequence Comparison Algorithm"
41 typedef struct svn_diff__snake_t svn_diff__snake_t
;
43 struct svn_diff__snake_t
47 svn_diff__position_t
*position
[2];
50 static APR_INLINE
void
51 svn_diff__snake(apr_off_t k
,
52 svn_diff__snake_t
*fp
,
54 svn_diff__lcs_t
**freelist
,
57 svn_diff__position_t
*start_position
[2];
58 svn_diff__position_t
*position
[2];
60 svn_diff__lcs_t
*previous_lcs
;
62 /* The previous entry at fp[k] is going to be replaced. See if we
63 * can mark that lcs node for reuse, because the sequence up to this
64 * point was a dead end.
73 previous_lcs
= lcs
->next
;
74 lcs
->next
= *freelist
;
79 if (fp
[k
- 1].y
+ 1 > fp
[k
+ 1].y
)
81 start_position
[0] = fp
[k
- 1].position
[0];
82 start_position
[1] = fp
[k
- 1].position
[1]->next
;
84 previous_lcs
= fp
[k
- 1].lcs
;
88 start_position
[0] = fp
[k
+ 1].position
[0]->next
;
89 start_position
[1] = fp
[k
+ 1].position
[1];
91 previous_lcs
= fp
[k
+ 1].lcs
;
95 /* ### Optimization, skip all positions that don't have matchpoints
96 * ### anyway. Beware of the sentinel, don't skip it!
99 position
[0] = start_position
[0];
100 position
[1] = start_position
[1];
102 while (position
[0]->node
== position
[1]->node
)
104 position
[0] = position
[0]->next
;
105 position
[1] = position
[1]->next
;
108 if (position
[1] != start_position
[1])
113 *freelist
= lcs
->next
;
117 lcs
= apr_palloc(pool
, sizeof(*lcs
));
120 lcs
->position
[idx
] = start_position
[0];
121 lcs
->position
[abs(1 - idx
)] = start_position
[1];
122 lcs
->length
= position
[1]->offset
- start_position
[1]->offset
;
123 lcs
->next
= previous_lcs
;
129 fp
[k
].lcs
= previous_lcs
;
134 previous_lcs
->refcount
++;
137 fp
[k
].position
[0] = position
[0];
138 fp
[k
].position
[1] = position
[1];
140 fp
[k
].y
= position
[1]->offset
;
144 static svn_diff__lcs_t
*
145 svn_diff__lcs_reverse(svn_diff__lcs_t
*lcs
)
147 svn_diff__lcs_t
*next
;
148 svn_diff__lcs_t
*prev
;
164 svn_diff__lcs(svn_diff__position_t
*position_list1
, /* pointer to tail (ring) */
165 svn_diff__position_t
*position_list2
, /* pointer to tail (ring) */
170 svn_diff__snake_t
*fp
;
174 svn_diff__lcs_t
*lcs
, *lcs_freelist
= NULL
;
176 svn_diff__position_t sentinel_position
[2];
178 /* Since EOF is always a sync point we tack on an EOF link
179 * with sentinel positions
181 lcs
= apr_palloc(pool
, sizeof(*lcs
));
182 lcs
->position
[0] = apr_pcalloc(pool
, sizeof(*lcs
->position
[0]));
183 lcs
->position
[0]->offset
= position_list1
? position_list1
->offset
+ 1 : 1;
184 lcs
->position
[1] = apr_pcalloc(pool
, sizeof(*lcs
->position
[1]));
185 lcs
->position
[1]->offset
= position_list2
? position_list2
->offset
+ 1 : 1;
190 if (position_list1
== NULL
|| position_list2
== NULL
)
193 /* Calculate length of both sequences to be compared */
194 length
[0] = position_list1
->offset
- position_list1
->next
->offset
+ 1;
195 length
[1] = position_list2
->offset
- position_list2
->next
->offset
+ 1;
196 idx
= length
[0] > length
[1] ? 1 : 0;
198 /* strikerXXX: here we allocate the furthest point array, which is
199 * strikerXXX: sized M + N + 3 (!)
201 fp
= apr_pcalloc(pool
,
202 sizeof(*fp
) * (apr_size_t
)(length
[0] + length
[1] + 3));
203 fp
+= length
[idx
] + 1;
205 sentinel_position
[idx
].next
= position_list1
->next
;
206 position_list1
->next
= &sentinel_position
[idx
];
207 sentinel_position
[idx
].offset
= position_list1
->offset
+ 1;
209 sentinel_position
[abs(1 - idx
)].next
= position_list2
->next
;
210 position_list2
->next
= &sentinel_position
[abs(1 - idx
)];
211 sentinel_position
[abs(1 - idx
)].offset
= position_list2
->offset
+ 1;
213 /* These are never dereferenced, only compared by value, so we
214 * can safely fake these up and the void* cast is OK.
216 sentinel_position
[0].node
= (void*)&sentinel_position
[0];
217 sentinel_position
[1].node
= (void*)&sentinel_position
[1];
219 d
= length
[abs(1 - idx
)] - length
[idx
];
221 /* k = -1 will be the first to be used to get previous
222 * position information from, make sure it holds sane
225 fp
[-1].position
[0] = sentinel_position
[0].next
;
226 fp
[-1].position
[1] = &sentinel_position
[1];
232 for (k
= -p
; k
< d
; k
++)
234 svn_diff__snake(k
, fp
, idx
, &lcs_freelist
, pool
);
237 for (k
= d
+ p
; k
>= d
; k
--)
239 svn_diff__snake(k
, fp
, idx
, &lcs_freelist
, pool
);
244 while (fp
[d
].position
[1] != &sentinel_position
[1]);
246 lcs
->next
= fp
[d
].lcs
;
247 lcs
= svn_diff__lcs_reverse(lcs
);
249 position_list1
->next
= sentinel_position
[idx
].next
;
250 position_list2
->next
= sentinel_position
[abs(1 - idx
)].next
;