unistr/u{8,16,32}-uctomb: Avoid possible trouble with huge strings.
[gnulib.git] / lib / git-merge-changelog.c
blob7b74a490e0f33ca53062759a526b077fd0e75a13
1 /* git-merge-changelog - git "merge" driver for GNU style ChangeLog files.
2 Copyright (C) 2008-2020 Bruno Haible <bruno@clisp.org>
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* README:
18 The default merge driver of 'git' *always* produces conflicts when
19 pulling public modifications into a privately modified ChangeLog file.
20 This is because ChangeLog files are always modified at the top; the
21 default merge driver has no clue how to deal with this. Furthermore
22 the conflicts are presented with more <<<< ==== >>>> markers than
23 necessary; this is because the default merge driver makes pointless
24 efforts to look at the individual line changes inside a ChangeLog entry.
26 This program serves as a 'git' merge driver that avoids these problems.
27 1. It produces no conflict when ChangeLog entries have been inserted
28 at the top both in the public and in the private modification. It
29 puts the privately added entries above the publicly added entries.
30 2. It respects the structure of ChangeLog files: entries are not split
31 into lines but kept together.
32 3. It also handles the case of small modifications of past ChangeLog
33 entries, or of removed ChangeLog entries: they are merged as one
34 would expect it.
35 4. Conflicts are presented at the top of the file, rather than where
36 they occurred, so that the user will see them immediately. (Unlike
37 for source code written in some programming language, conflict markers
38 that are located several hundreds lines from the top will not cause
39 any syntax error and therefore would be likely to remain unnoticed.)
42 /* Installation:
44 $ gnulib-tool --create-testdir --dir=/tmp/testdir123 git-merge-changelog
45 $ cd /tmp/testdir123
46 $ ./configure
47 $ make
48 $ make install
50 Additionally, for git users:
51 - Add to .git/config of the checkout (or to your $HOME/.gitconfig) the
52 lines
54 [merge "merge-changelog"]
55 name = GNU-style ChangeLog merge driver
56 driver = /usr/local/bin/git-merge-changelog %O %A %B
58 - In every directory that contains a ChangeLog file, add a file
59 '.gitattributes' with this line:
61 ChangeLog merge=merge-changelog
63 (See "man 5 gitattributes" for more info.)
65 Additionally, for bzr users:
66 - Install the 'extmerge' bzr plug-in listed at
67 <http://doc.bazaar.canonical.com/plugins/en/index.html>
68 <http://wiki.bazaar.canonical.com/BzrPlugins>
69 - Add to your $HOME/.bazaar/bazaar.conf the line
71 external_merge = git-merge-changelog %b %T %o
73 - Then, to merge a conflict in a ChangeLog file, use
75 $ bzr extmerge ChangeLog
77 Additionally, for hg users:
78 - Add to your $HOME/.hgrc the lines
80 [merge-patterns]
81 ChangeLog = git-merge-changelog
83 [merge-tools]
84 git-merge-changelog.executable = /usr/local/bin/git-merge-changelog
85 git-merge-changelog.args = $base $local $other
87 See <https://www.selenic.com/mercurial/hgrc.5.html> section merge-tools
88 for reference.
91 /* Use as an alternative to 'diff3':
92 git-merge-changelog performs the same role as "diff3 -m", just with
93 reordered arguments:
94 $ git-merge-changelog %O %A %B
95 is comparable to
96 $ diff3 -m %A %O %B
99 /* Calling convention:
100 A merge driver is called with three filename arguments:
101 1. %O = The common ancestor of %A and %B.
102 2. %A = The file's contents from the "current branch".
103 3. %B = The file's contents from the "other branch"; this is the contents
104 being merged in.
106 In case of a "git stash apply" or of an upstream pull (e.g. from a subsystem
107 maintainer to a central maintainer) or of a downstream pull with --rebase:
108 2. %A = The file's newest pulled contents; modified by other committers.
109 3. %B = The user's newest copy of the file; modified by the user.
110 In case of a downstream pull (e.g. from a central repository to the user)
111 or of an upstream pull with --rebase:
112 2. %A = The user's newest copy of the file; modified by the user.
113 3. %B = The file's newest pulled contents; modified by other committers.
115 It should write its merged output into file %A. It can also echo some
116 remarks to stdout. It should exit with return code 0 if the merge could
117 be resolved cleanly, or with non-zero return code if there were conflicts.
120 /* How it works:
121 The structure of a ChangeLog file: It consists of ChangeLog entries. A
122 ChangeLog entry starts at a line following a blank line and that starts with
123 a non-whitespace character, or at the beginning of a file.
124 The merge driver works as follows: It reads the three files into memory and
125 dissects them into ChangeLog entries. It then finds the differences between
126 %O and %B. They are classified as:
127 - removals (some consecutive entries removed),
128 - changes (some consecutive entries removed, some consecutive entries
129 added),
130 - additions (some consecutive entries added).
131 The driver then attempts to apply the changes to %A.
132 To this effect, it first computes a correspondence between the entries in %O
133 and the entries in %A, using fuzzy string matching to still identify changed
134 entries.
135 - Removals are applied one by one. If the entry is present in %A, at any
136 position, it is removed. If not, the removal is marked as a conflict.
137 - Additions at the top of %B are applied at the top of %A.
138 - Additions between entry x and entry y (y may be the file end) in %B are
139 applied between entry x and entry y in %A (if they still exist and are
140 still consecutive in %A), otherwise the additions are marked as a
141 conflict.
142 - Changes are categorized into "simple changes":
143 entry1 ... entryn
144 are mapped to
145 added_entry ... added_entry modified_entry1 ... modified_entryn,
146 where the correspondence between entry_i and modified_entry_i is still
147 clear; and "big changes": these are all the rest. Simple changes at the
148 top of %B are applied by putting the added entries at the top of %A. The
149 changes in simple changes are applied one by one; possibly leading to
150 single-entry conflicts. Big changes are applied en bloc, possibly
151 leading to conflicts spanning multiple entries.
152 - Conflicts are output at the top of the file and cause an exit status of
156 #include <config.h>
158 #include <getopt.h>
159 #include <limits.h>
160 #include <stdbool.h>
161 #include <stdio.h>
162 #include <stdlib.h>
163 #include <string.h>
164 #include <sys/types.h>
165 #include <unistd.h>
167 #include "error.h"
168 #include "read-file.h"
169 #include "gl_xlist.h"
170 #include "gl_array_list.h"
171 #include "gl_linkedhash_list.h"
172 #include "gl_rbtreehash_list.h"
173 #include "gl_linked_list.h"
174 #include "xalloc.h"
175 #include "xmalloca.h"
176 #include "fstrcmp.h"
177 #include "minmax.h"
178 #include "c-strstr.h"
179 #include "fwriteerror.h"
180 #include "getprogname.h"
182 #define ASSERT(expr) \
183 do \
185 if (!(expr)) \
186 abort (); \
188 while (0)
190 #define FSTRCMP_THRESHOLD 0.6
191 #define FSTRCMP_STRICTER_THRESHOLD 0.8
193 /* Representation of a ChangeLog entry.
194 The string may contain NUL bytes; therefore it is represented as a plain
195 opaque memory region. */
196 struct entry
198 char *string;
199 size_t length;
200 /* Cache for the hash code. */
201 bool hashcode_cached;
202 size_t hashcode;
205 /* Create an entry.
206 The memory region passed by the caller must of indefinite extent. It is
207 *not* copied here. */
208 static struct entry *
209 entry_create (char *string, size_t length)
211 struct entry *result = XMALLOC (struct entry);
212 result->string = string;
213 result->length = length;
214 result->hashcode_cached = false;
215 return result;
218 /* Compare two entries for equality. */
219 static bool
220 entry_equals (const void *elt1, const void *elt2)
222 const struct entry *entry1 = (const struct entry *) elt1;
223 const struct entry *entry2 = (const struct entry *) elt2;
224 return entry1->length == entry2->length
225 && memcmp (entry1->string, entry2->string, entry1->length) == 0;
228 /* Return a hash code of the contents of a ChangeLog entry. */
229 static size_t
230 entry_hashcode (const void *elt)
232 struct entry *entry = (struct entry *) elt;
233 if (!entry->hashcode_cached)
235 /* See https://www.haible.de/bruno/hashfunc.html. */
236 const char *s;
237 size_t n;
238 size_t h = 0;
240 for (s = entry->string, n = entry->length; n > 0; s++, n--)
241 h = (unsigned char) *s + ((h << 9) | (h >> (sizeof (size_t) * CHAR_BIT - 9)));
243 entry->hashcode = h;
244 entry->hashcode_cached = true;
246 return entry->hashcode;
249 /* Perform a fuzzy comparison of two ChangeLog entries.
250 Return a similarity measure of the two entries, a value between 0 and 1.
251 0 stands for very distinct, 1 for identical.
252 If the result is < LOWER_BOUND, an arbitrary other value < LOWER_BOUND can
253 be returned. */
254 static double
255 entry_fstrcmp (const struct entry *entry1, const struct entry *entry2,
256 double lower_bound)
258 /* fstrcmp works only on NUL terminated strings. */
259 char *memory;
260 double similarity;
262 if (memchr (entry1->string, '\0', entry1->length) != NULL)
263 return 0.0;
264 if (memchr (entry2->string, '\0', entry2->length) != NULL)
265 return 0.0;
266 memory = (char *) xmalloca (entry1->length + 1 + entry2->length + 1);
268 char *p = memory;
269 memcpy (p, entry1->string, entry1->length);
270 p += entry1->length;
271 *p++ = '\0';
272 memcpy (p, entry2->string, entry2->length);
273 p += entry2->length;
274 *p++ = '\0';
276 similarity =
277 fstrcmp_bounded (memory, memory + entry1->length + 1, lower_bound);
278 freea (memory);
279 return similarity;
282 /* This structure represents an entire ChangeLog file, after it was read
283 into memory. */
284 struct changelog_file
286 /* The entries, as a list. */
287 gl_list_t /* <struct entry *> */ entries_list;
288 /* The entries, as a list in opposite direction. */
289 gl_list_t /* <struct entry *> */ entries_reversed;
290 /* The entries, as an array. */
291 size_t num_entries;
292 struct entry **entries;
295 /* Read a ChangeLog file into memory.
296 Return the contents in *RESULT. */
297 static void
298 read_changelog_file (const char *filename, struct changelog_file *result)
300 /* Read the file in text mode, otherwise it's hard to recognize empty
301 lines. */
302 size_t length;
303 char *contents = read_file (filename, 0, &length);
304 if (contents == NULL)
306 fprintf (stderr, "could not read file '%s'\n", filename);
307 exit (EXIT_FAILURE);
310 result->entries_list =
311 gl_list_create_empty (GL_LINKEDHASH_LIST, entry_equals, entry_hashcode,
312 NULL, true);
313 result->entries_reversed =
314 gl_list_create_empty (GL_RBTREEHASH_LIST, entry_equals, entry_hashcode,
315 NULL, true);
316 /* A ChangeLog file consists of ChangeLog entries. A ChangeLog entry starts
317 at a line following a blank line and that starts with a non-whitespace
318 character, or at the beginning of a file.
319 Split the file contents into entries. */
321 char *contents_end = contents + length;
322 char *start = contents;
323 while (start < contents_end)
325 /* Search the end of the current entry. */
326 char *ptr = start;
327 struct entry *curr;
329 while (ptr < contents_end)
331 ptr = memchr (ptr, '\n', contents_end - ptr);
332 if (ptr == NULL)
334 ptr = contents_end;
335 break;
337 ptr++;
338 if (contents_end - ptr >= 2
339 && ptr[0] == '\n'
340 && !(ptr[1] == '\n' || ptr[1] == '\t' || ptr[1] == ' '))
342 ptr++;
343 break;
347 curr = entry_create (start, ptr - start);
348 gl_list_add_last (result->entries_list, curr);
349 gl_list_add_first (result->entries_reversed, curr);
351 start = ptr;
355 result->num_entries = gl_list_size (result->entries_list);
356 result->entries = XNMALLOC (result->num_entries, struct entry *);
358 size_t index = 0;
359 gl_list_iterator_t iter = gl_list_iterator (result->entries_list);
360 const void *elt;
361 gl_list_node_t node;
362 while (gl_list_iterator_next (&iter, &elt, &node))
363 result->entries[index++] = (struct entry *) elt;
364 gl_list_iterator_free (&iter);
365 ASSERT (index == result->num_entries);
369 /* A mapping (correspondence) between entries of FILE1 and of FILE2. */
370 struct entries_mapping
372 struct changelog_file *file1;
373 struct changelog_file *file2;
374 /* Mapping from indices in FILE1 to indices in FILE2.
375 A value -1 means that the entry from FILE1 is not found in FILE2.
376 A value -2 means that it has not yet been computed. */
377 ssize_t *index_mapping;
378 /* Mapping from indices in FILE2 to indices in FILE1.
379 A value -1 means that the entry from FILE2 is not found in FILE1.
380 A value -2 means that it has not yet been computed. */
381 ssize_t *index_mapping_reverse;
384 /* Look up (or lazily compute) the mapping of an entry in FILE1.
385 i is the index in FILE1.
386 Return the index in FILE2, or -1 when the entry is not found in FILE2. */
387 static ssize_t
388 entries_mapping_get (struct entries_mapping *mapping, ssize_t i)
390 if (mapping->index_mapping[i] < -1)
392 struct changelog_file *file1 = mapping->file1;
393 struct changelog_file *file2 = mapping->file2;
394 size_t n1 = file1->num_entries;
395 size_t n2 = file2->num_entries;
396 struct entry *entry_i = file1->entries[i];
397 ssize_t j;
399 /* Search whether it approximately occurs in file2. */
400 ssize_t best_j = -1;
401 double best_j_similarity = 0.0;
402 for (j = n2 - 1; j >= 0; j--)
403 if (mapping->index_mapping_reverse[j] < 0)
405 double similarity =
406 entry_fstrcmp (entry_i, file2->entries[j], best_j_similarity);
407 if (similarity > best_j_similarity)
409 best_j = j;
410 best_j_similarity = similarity;
413 if (best_j_similarity >= FSTRCMP_THRESHOLD)
415 /* Found a similar entry in file2. */
416 struct entry *entry_j = file2->entries[best_j];
417 /* Search whether it approximately occurs in file1 at index i. */
418 ssize_t best_i = -1;
419 double best_i_similarity = 0.0;
420 ssize_t ii;
421 for (ii = n1 - 1; ii >= 0; ii--)
422 if (mapping->index_mapping[ii] < 0)
424 double similarity =
425 entry_fstrcmp (file1->entries[ii], entry_j,
426 best_i_similarity);
427 if (similarity > best_i_similarity)
429 best_i = ii;
430 best_i_similarity = similarity;
433 if (best_i_similarity >= FSTRCMP_THRESHOLD && best_i == i)
435 mapping->index_mapping[i] = best_j;
436 mapping->index_mapping_reverse[best_j] = i;
439 if (mapping->index_mapping[i] < -1)
440 /* It does not approximately occur in FILE2.
441 Remember it, for next time. */
442 mapping->index_mapping[i] = -1;
444 return mapping->index_mapping[i];
447 /* Look up (or lazily compute) the mapping of an entry in FILE2.
448 j is the index in FILE2.
449 Return the index in FILE1, or -1 when the entry is not found in FILE1. */
450 static ssize_t
451 entries_mapping_reverse_get (struct entries_mapping *mapping, ssize_t j)
453 if (mapping->index_mapping_reverse[j] < -1)
455 struct changelog_file *file1 = mapping->file1;
456 struct changelog_file *file2 = mapping->file2;
457 size_t n1 = file1->num_entries;
458 size_t n2 = file2->num_entries;
459 struct entry *entry_j = file2->entries[j];
460 ssize_t i;
462 /* Search whether it approximately occurs in file1. */
463 ssize_t best_i = -1;
464 double best_i_similarity = 0.0;
465 for (i = n1 - 1; i >= 0; i--)
466 if (mapping->index_mapping[i] < 0)
468 double similarity =
469 entry_fstrcmp (file1->entries[i], entry_j, best_i_similarity);
470 if (similarity > best_i_similarity)
472 best_i = i;
473 best_i_similarity = similarity;
476 if (best_i_similarity >= FSTRCMP_THRESHOLD)
478 /* Found a similar entry in file1. */
479 struct entry *entry_i = file1->entries[best_i];
480 /* Search whether it approximately occurs in file2 at index j. */
481 ssize_t best_j = -1;
482 double best_j_similarity = 0.0;
483 ssize_t jj;
484 for (jj = n2 - 1; jj >= 0; jj--)
485 if (mapping->index_mapping_reverse[jj] < 0)
487 double similarity =
488 entry_fstrcmp (entry_i, file2->entries[jj],
489 best_j_similarity);
490 if (similarity > best_j_similarity)
492 best_j = jj;
493 best_j_similarity = similarity;
496 if (best_j_similarity >= FSTRCMP_THRESHOLD && best_j == j)
498 mapping->index_mapping_reverse[j] = best_i;
499 mapping->index_mapping[best_i] = j;
502 if (mapping->index_mapping_reverse[j] < -1)
503 /* It does not approximately occur in FILE1.
504 Remember it, for next time. */
505 mapping->index_mapping_reverse[j] = -1;
507 return mapping->index_mapping_reverse[j];
510 /* Compute a mapping (correspondence) between entries of FILE1 and of FILE2.
511 The correspondence also takes into account small modifications; i.e. the
512 indicated relation is not equality of entries but best-match similarity
513 of entries.
514 If FULL is true, the maximum of matching is done up-front. If it is false,
515 it is done in a lazy way through the functions entries_mapping_get and
516 entries_mapping_reverse_get.
517 Return the result in *RESULT. */
518 static void
519 compute_mapping (struct changelog_file *file1, struct changelog_file *file2,
520 bool full,
521 struct entries_mapping *result)
523 /* Mapping from indices in file1 to indices in file2. */
524 ssize_t *index_mapping;
525 /* Mapping from indices in file2 to indices in file1. */
526 ssize_t *index_mapping_reverse;
527 size_t n1 = file1->num_entries;
528 size_t n2 = file2->num_entries;
529 ssize_t i, j;
531 index_mapping = XNMALLOC (n1, ssize_t);
532 for (i = 0; i < n1; i++)
533 index_mapping[i] = -2;
535 index_mapping_reverse = XNMALLOC (n2, ssize_t);
536 for (j = 0; j < n2; j++)
537 index_mapping_reverse[j] = -2;
539 for (i = n1 - 1; i >= 0; i--)
540 /* Take an entry from file1. */
541 if (index_mapping[i] < -1)
543 struct entry *entry = file1->entries[i];
544 /* Search whether it occurs in file2. */
545 j = gl_list_indexof (file2->entries_reversed, entry);
546 if (j >= 0)
548 j = n2 - 1 - j;
549 /* Found an exact correspondence. */
550 /* If index_mapping_reverse[j] >= 0, we have already seen other
551 copies of this entry, and there were more occurrences of it in
552 file1 than in file2. In this case, do nothing. */
553 if (index_mapping_reverse[j] < 0)
555 index_mapping[i] = j;
556 index_mapping_reverse[j] = i;
557 /* Look for more occurrences of the same entry. Match them
558 as long as they pair up. Unpaired occurrences of the same
559 entry are left without mapping. */
561 ssize_t curr_i = i;
562 ssize_t curr_j = j;
564 for (;;)
566 ssize_t next_i;
567 ssize_t next_j;
569 next_i =
570 gl_list_indexof_from (file1->entries_reversed,
571 n1 - curr_i, entry);
572 if (next_i < 0)
573 break;
574 next_j =
575 gl_list_indexof_from (file2->entries_reversed,
576 n2 - curr_j, entry);
577 if (next_j < 0)
578 break;
579 curr_i = n1 - 1 - next_i;
580 curr_j = n2 - 1 - next_j;
581 ASSERT (index_mapping[curr_i] < 0);
582 ASSERT (index_mapping_reverse[curr_j] < 0);
583 index_mapping[curr_i] = curr_j;
584 index_mapping_reverse[curr_j] = curr_i;
591 result->file1 = file1;
592 result->file2 = file2;
593 result->index_mapping = index_mapping;
594 result->index_mapping_reverse = index_mapping_reverse;
596 if (full)
597 for (i = n1 - 1; i >= 0; i--)
598 entries_mapping_get (result, i);
601 /* An "edit" is a textual modification performed by the user, that needs to
602 be applied to the other file. */
603 enum edit_type
605 /* Some consecutive entries were added. */
606 ADDITION,
607 /* Some consecutive entries were removed; some other consecutive entries
608 were added at the same position. (Not necessarily the same number of
609 entries.) */
610 CHANGE,
611 /* Some consecutive entries were removed. */
612 REMOVAL
615 /* This structure represents an edit. */
616 struct edit
618 enum edit_type type;
619 /* Range of indices into the entries of FILE1. */
620 ssize_t i1, i2; /* first, last index; only used for CHANGE, REMOVAL */
621 /* Range of indices into the entries of FILE2. */
622 ssize_t j1, j2; /* first, last index; only used for ADDITION, CHANGE */
625 /* This structure represents the differences from one file, FILE1, to another
626 file, FILE2. */
627 struct differences
629 /* An array mapping FILE1 indices to FILE2 indices (or -1 when the entry
630 from FILE1 is not found in FILE2). */
631 ssize_t *index_mapping;
632 /* An array mapping FILE2 indices to FILE1 indices (or -1 when the entry
633 from FILE2 is not found in FILE1). */
634 ssize_t *index_mapping_reverse;
635 /* The edits that transform FILE1 into FILE2. */
636 size_t num_edits;
637 struct edit **edits;
640 /* Import the difference detection algorithm from GNU diff. */
641 #define ELEMENT struct entry *
642 #define EQUAL entry_equals
643 #define OFFSET ssize_t
644 #define EXTRA_CONTEXT_FIELDS \
645 ssize_t *index_mapping; \
646 ssize_t *index_mapping_reverse;
647 #define NOTE_DELETE(ctxt, xoff) \
648 ctxt->index_mapping[xoff] = -1
649 #define NOTE_INSERT(ctxt, yoff) \
650 ctxt->index_mapping_reverse[yoff] = -1
651 #include "diffseq.h"
653 /* Compute the differences between the entries of FILE1 and the entries of
654 FILE2. */
655 static void
656 compute_differences (struct changelog_file *file1, struct changelog_file *file2,
657 struct differences *result)
659 /* Unlike compute_mapping, which mostly ignores the order of the entries and
660 therefore works well when some entries are permuted, here we use the order.
661 I think this is needed in order to distinguish changes from
662 additions+removals; I don't know how to say what is a "change" if the
663 files are considered as unordered sets of entries. */
664 struct context ctxt;
665 size_t n1 = file1->num_entries;
666 size_t n2 = file2->num_entries;
667 ssize_t i;
668 ssize_t j;
669 gl_list_t /* <struct edit *> */ edits;
671 ctxt.xvec = file1->entries;
672 ctxt.yvec = file2->entries;
673 ctxt.index_mapping = XNMALLOC (n1, ssize_t);
674 for (i = 0; i < n1; i++)
675 ctxt.index_mapping[i] = 0;
676 ctxt.index_mapping_reverse = XNMALLOC (n2, ssize_t);
677 for (j = 0; j < n2; j++)
678 ctxt.index_mapping_reverse[j] = 0;
679 ctxt.fdiag = XNMALLOC (2 * (n1 + n2 + 3), ssize_t) + n2 + 1;
680 ctxt.bdiag = ctxt.fdiag + n1 + n2 + 3;
681 ctxt.too_expensive = n1 + n2;
683 /* Store in ctxt.index_mapping and ctxt.index_mapping_reverse a -1 for
684 each removed or added entry. */
685 compareseq (0, n1, 0, n2, 0, &ctxt);
687 /* Complete the index_mapping and index_mapping_reverse arrays. */
688 i = 0;
689 j = 0;
690 while (i < n1 || j < n2)
692 while (i < n1 && ctxt.index_mapping[i] < 0)
693 i++;
694 while (j < n2 && ctxt.index_mapping_reverse[j] < 0)
695 j++;
696 ASSERT ((i < n1) == (j < n2));
697 if (i == n1 && j == n2)
698 break;
699 ctxt.index_mapping[i] = j;
700 ctxt.index_mapping_reverse[j] = i;
701 i++;
702 j++;
705 /* Create the edits. */
706 edits = gl_list_create_empty (GL_ARRAY_LIST, NULL, NULL, NULL, true);
707 i = 0;
708 j = 0;
709 while (i < n1 || j < n2)
711 if (i == n1)
713 struct edit *e;
714 ASSERT (j < n2);
715 e = XMALLOC (struct edit);
716 e->type = ADDITION;
717 e->j1 = j;
718 e->j2 = n2 - 1;
719 gl_list_add_last (edits, e);
720 break;
722 if (j == n2)
724 struct edit *e;
725 ASSERT (i < n1);
726 e = XMALLOC (struct edit);
727 e->type = REMOVAL;
728 e->i1 = i;
729 e->i2 = n1 - 1;
730 gl_list_add_last (edits, e);
731 break;
733 if (ctxt.index_mapping[i] >= 0)
735 if (ctxt.index_mapping_reverse[j] >= 0)
737 ASSERT (ctxt.index_mapping[i] == j);
738 ASSERT (ctxt.index_mapping_reverse[j] == i);
739 i++;
740 j++;
742 else
744 struct edit *e;
745 ASSERT (ctxt.index_mapping_reverse[j] < 0);
746 e = XMALLOC (struct edit);
747 e->type = ADDITION;
748 e->j1 = j;
750 j++;
751 while (j < n2 && ctxt.index_mapping_reverse[j] < 0);
752 e->j2 = j - 1;
753 gl_list_add_last (edits, e);
756 else
758 if (ctxt.index_mapping_reverse[j] >= 0)
760 struct edit *e;
761 ASSERT (ctxt.index_mapping[i] < 0);
762 e = XMALLOC (struct edit);
763 e->type = REMOVAL;
764 e->i1 = i;
766 i++;
767 while (i < n1 && ctxt.index_mapping[i] < 0);
768 e->i2 = i - 1;
769 gl_list_add_last (edits, e);
771 else
773 struct edit *e;
774 ASSERT (ctxt.index_mapping[i] < 0);
775 ASSERT (ctxt.index_mapping_reverse[j] < 0);
776 e = XMALLOC (struct edit);
777 e->type = CHANGE;
778 e->i1 = i;
780 i++;
781 while (i < n1 && ctxt.index_mapping[i] < 0);
782 e->i2 = i - 1;
783 e->j1 = j;
785 j++;
786 while (j < n2 && ctxt.index_mapping_reverse[j] < 0);
787 e->j2 = j - 1;
788 gl_list_add_last (edits, e);
793 result->index_mapping = ctxt.index_mapping;
794 result->index_mapping_reverse = ctxt.index_mapping_reverse;
795 result->num_edits = gl_list_size (edits);
796 result->edits = XNMALLOC (result->num_edits, struct edit *);
798 size_t index = 0;
799 gl_list_iterator_t iter = gl_list_iterator (edits);
800 const void *elt;
801 gl_list_node_t node;
802 while (gl_list_iterator_next (&iter, &elt, &node))
803 result->edits[index++] = (struct edit *) elt;
804 gl_list_iterator_free (&iter);
805 ASSERT (index == result->num_edits);
809 /* An empty entry. */
810 static struct entry empty_entry = { NULL, 0 };
812 /* Return the end a paragraph.
813 ENTRY is an entry.
814 OFFSET is an offset into the entry, OFFSET <= ENTRY->length.
815 Return the offset of the end of paragraph, as an offset <= ENTRY->length;
816 it is the start of a blank line or the end of the entry. */
817 static size_t
818 find_paragraph_end (const struct entry *entry, size_t offset)
820 const char *string = entry->string;
821 size_t length = entry->length;
823 for (;;)
825 const char *nl = memchr (string + offset, '\n', length - offset);
826 if (nl == NULL)
827 return length;
828 offset = (nl - string) + 1;
829 if (offset < length && string[offset] == '\n')
830 return offset;
834 /* Split a merged entry.
835 Given an old entry of the form
836 TITLE
837 BODY
838 and a new entry of the form
839 TITLE
840 BODY1
841 BODY'
842 where the two titles are the same and BODY and BODY' are very similar,
843 this computes two new entries
844 TITLE
845 BODY1
847 TITLE
848 BODY'
849 and returns true.
850 If the entries don't have this form, it returns false. */
851 static bool
852 try_split_merged_entry (const struct entry *old_entry,
853 const struct entry *new_entry,
854 struct entry *new_split[2])
856 size_t old_title_len = find_paragraph_end (old_entry, 0);
857 size_t new_title_len = find_paragraph_end (new_entry, 0);
858 struct entry old_body;
859 struct entry new_body;
860 size_t best_split_offset;
861 double best_similarity;
862 size_t split_offset;
864 /* Same title? */
865 if (!(old_title_len == new_title_len
866 && memcmp (old_entry->string, new_entry->string, old_title_len) == 0))
867 return false;
869 old_body.string = old_entry->string + old_title_len;
870 old_body.length = old_entry->length - old_title_len;
872 /* Determine where to split the new entry.
873 This is done by maximizing the similarity between BODY and BODY'. */
874 best_split_offset = split_offset = new_title_len;
875 best_similarity = 0.0;
876 for (;;)
878 double similarity;
880 new_body.string = new_entry->string + split_offset;
881 new_body.length = new_entry->length - split_offset;
882 similarity =
883 entry_fstrcmp (&old_body, &new_body, best_similarity);
884 if (similarity > best_similarity)
886 best_split_offset = split_offset;
887 best_similarity = similarity;
889 if (best_similarity == 1.0)
890 /* It cannot get better. */
891 break;
893 if (split_offset < new_entry->length)
894 split_offset = find_paragraph_end (new_entry, split_offset + 1);
895 else
896 break;
899 /* BODY' should not be empty. */
900 if (best_split_offset == new_entry->length)
901 return false;
902 ASSERT (new_entry->string[best_split_offset] == '\n');
904 /* A certain similarity between BODY and BODY' is required. */
905 if (best_similarity < FSTRCMP_STRICTER_THRESHOLD)
906 return false;
908 new_split[0] = entry_create (new_entry->string, best_split_offset + 1);
911 size_t len1 = new_title_len;
912 size_t len2 = new_entry->length - best_split_offset;
913 char *combined = XNMALLOC (len1 + len2, char);
914 memcpy (combined, new_entry->string, len1);
915 memcpy (combined + len1, new_entry->string + best_split_offset, len2);
916 new_split[1] = entry_create (combined, len1 + len2);
919 return true;
922 /* Write the contents of an entry to the output stream FP. */
923 static void
924 entry_write (FILE *fp, struct entry *entry)
926 if (entry->length > 0)
927 fwrite (entry->string, 1, entry->length, fp);
930 /* This structure represents a conflict.
931 A conflict can occur for various reasons. */
932 struct conflict
934 /* Parts from the ancestor file. */
935 size_t num_old_entries;
936 struct entry **old_entries;
937 /* Parts of the modified file. */
938 size_t num_modified_entries;
939 struct entry **modified_entries;
942 /* Write a conflict to the output stream FP, including markers. */
943 static void
944 conflict_write (FILE *fp, struct conflict *c)
946 size_t i;
948 /* Use the same syntax as git's default merge driver.
949 Don't indent the contents of the entries (with things like ">" or "-"),
950 otherwise the user needs more textual editing to resolve the conflict. */
951 fputs ("<<<<<<<\n", fp);
952 for (i = 0; i < c->num_old_entries; i++)
953 entry_write (fp, c->old_entries[i]);
954 fputs ("=======\n", fp);
955 for (i = 0; i < c->num_modified_entries; i++)
956 entry_write (fp, c->modified_entries[i]);
957 fputs (">>>>>>>\n", fp);
960 /* Long options. */
961 static const struct option long_options[] =
963 { "help", no_argument, NULL, 'h' },
964 { "split-merged-entry", no_argument, NULL, CHAR_MAX + 1 },
965 { "version", no_argument, NULL, 'V' },
966 { NULL, 0, NULL, 0 }
969 /* Print a usage message and exit. */
970 static void
971 usage (int status)
973 if (status != EXIT_SUCCESS)
974 fprintf (stderr, "Try '%s --help' for more information.\n",
975 getprogname ());
976 else
978 printf ("Usage: %s [OPTION] O-FILE-NAME A-FILE-NAME B-FILE-NAME\n",
979 getprogname ());
980 printf ("\n");
981 printf ("Merges independent modifications of a ChangeLog style file.\n");
982 printf ("O-FILE-NAME names the original file, the ancestor of the two others.\n");
983 printf ("A-FILE-NAME names the publicly modified file.\n");
984 printf ("B-FILE-NAME names the user-modified file.\n");
985 printf ("Writes the merged file into A-FILE-NAME.\n");
986 printf ("\n");
987 #if 0 /* --split-merged-entry is now on by default. */
988 printf ("Operation modifiers:\n");
989 printf ("\
990 --split-merged-entry Possibly split a merged entry between paragraphs.\n\
991 Use this if you have the habit to merge unrelated\n\
992 entries into a single one, separated only by a\n\
993 newline, just because they happened on the same\n\
994 date.\n");
995 printf ("\n");
996 #endif
997 printf ("Informative output:\n");
998 printf (" -h, --help display this help and exit\n");
999 printf (" -V, --version output version information and exit\n");
1000 printf ("\n");
1001 fputs ("Report bugs to <bug-gnulib@gnu.org>.\n",
1002 stdout);
1005 exit (status);
1009 main (int argc, char *argv[])
1011 int optchar;
1012 bool do_help;
1013 bool do_version;
1014 bool split_merged_entry;
1016 /* Set default values for variables. */
1017 do_help = false;
1018 do_version = false;
1019 split_merged_entry = true;
1021 /* Parse command line options. */
1022 while ((optchar = getopt_long (argc, argv, "hV", long_options, NULL)) != EOF)
1023 switch (optchar)
1025 case '\0': /* Long option. */
1026 break;
1027 case 'h':
1028 do_help = true;
1029 break;
1030 case 'V':
1031 do_version = true;
1032 break;
1033 case CHAR_MAX + 1: /* --split-merged-entry */
1034 break;
1035 default:
1036 usage (EXIT_FAILURE);
1039 if (do_version)
1041 /* Version information is requested. */
1042 printf ("%s\n", getprogname ());
1043 printf ("Copyright (C) %s Free Software Foundation, Inc.\n\
1044 License GPLv2+: GNU GPL version 2 or later <https://gnu.org/licenses/gpl.html>\n\
1045 This is free software: you are free to change and redistribute it.\n\
1046 There is NO WARRANTY, to the extent permitted by law.\n\
1048 "2008");
1049 printf ("Written by %s.\n", "Bruno Haible");
1050 exit (EXIT_SUCCESS);
1053 if (do_help)
1055 /* Help is requested. */
1056 usage (EXIT_SUCCESS);
1059 /* Test argument count. */
1060 if (optind + 3 != argc)
1061 error (EXIT_FAILURE, 0, "expected three arguments");
1064 const char *ancestor_file_name; /* O-FILE-NAME */
1065 const char *destination_file_name; /* A-FILE-NAME */
1066 bool downstream;
1067 const char *other_file_name; /* B-FILE-NAME */
1068 const char *mainstream_file_name;
1069 const char *modified_file_name;
1070 struct changelog_file ancestor_file;
1071 struct changelog_file mainstream_file;
1072 struct changelog_file modified_file;
1073 /* Mapping from indices in ancestor_file to indices in mainstream_file. */
1074 struct entries_mapping mapping;
1075 struct differences diffs;
1076 gl_list_node_t *result_entries_pointers; /* array of pointers into result_entries */
1077 gl_list_t /* <struct entry *> */ result_entries;
1078 gl_list_t /* <struct conflict *> */ result_conflicts;
1080 ancestor_file_name = argv[optind];
1081 destination_file_name = argv[optind + 1];
1082 other_file_name = argv[optind + 2];
1084 /* Heuristic to determine whether it's a pull in downstream direction
1085 (e.g. pull from a centralized server) or a pull in upstream direction
1086 (e.g. "git stash apply").
1088 For ChangeLog this distinction is important. The difference between
1089 an "upstream" and a "downstream" repository is that more people are
1090 looking at the "upstream" repository. They want to be informed about
1091 changes and expect them to be shown at the top of the ChangeLog.
1092 When a user pulls downstream, on the other hand, he has two options:
1093 a) He gets the change entries from the central repository also at the
1094 top of his ChangeLog, and his own changes come after them.
1095 b) He gets the change entries from the central repository after those
1096 he has collected for his branch. His own change entries stay at
1097 the top of the ChangeLog file.
1098 In the case a) he has to reorder the ChangeLog before he can commit.
1099 No one does that. So most people want b).
1100 In other words, the order of entries in a ChangeLog should represent
1101 the order in which they have flown (or will flow) into the *central*
1102 repository.
1104 But in git this is fundamentally indistinguishable, because when Linus
1105 pulls patches from akpm and akpm pulls patches from Linus, it's not
1106 clear which of the two is more "upstream". Also, when you have many
1107 branches in a repository and pull from one to another, "git" has no way
1108 to know which branch is more "upstream" than the other. The git-tag(1)
1109 manual page also says:
1110 "One important aspect of git is it is distributed, and being
1111 distributed largely means there is no inherent "upstream" or
1112 "downstream" in the system."
1113 Therefore anyone who attempts to produce a ChangeLog from the merge
1114 history will fail.
1116 Here we allow the user to specify the pull direction through an
1117 environment variable (GIT_UPSTREAM or GIT_DOWNSTREAM). If these two
1118 environment variables are not set, we assume a "simple single user"
1119 usage pattern: He manages local changes through stashes and uses
1120 "git pull" only to pull downstream.
1122 How to distinguish these situation? There are several hints:
1123 - During a "git stash apply", GIT_REFLOG_ACTION is not set. During
1124 a "git pull", it is set to 'pull '. During a "git pull --rebase",
1125 it is set to 'pull --rebase'. During a "git cherry-pick", it is
1126 set to 'cherry-pick'.
1127 - During a "git stash apply", there is an environment variable of
1128 the form GITHEAD_<40_hex_digits>='Stashed changes'. */
1130 const char *var;
1132 var = getenv ("GIT_DOWNSTREAM");
1133 if (var != NULL && var[0] != '\0')
1134 downstream = true;
1135 else
1137 var = getenv ("GIT_UPSTREAM");
1138 if (var != NULL && var[0] != '\0')
1139 downstream = false;
1140 else
1142 var = getenv ("GIT_REFLOG_ACTION");
1143 #if 0 /* Debugging code */
1144 printf ("GIT_REFLOG_ACTION=|%s|\n", var);
1145 #endif
1146 if (var != NULL
1147 && ((strncmp (var, "pull", 4) == 0
1148 && c_strstr (var, " --rebase") == NULL)
1149 || strncmp (var, "merge origin", 12) == 0))
1150 downstream = true;
1151 else
1153 /* "git stash apply", "git rebase", "git cherry-pick" and
1154 similar. */
1155 downstream = false;
1161 #if 0 /* Debugging code */
1163 char buf[1000];
1164 printf ("First line of %%A:\n");
1165 sprintf (buf, "head -1 %s", destination_file_name); system (buf);
1166 printf ("First line of %%B:\n");
1167 sprintf (buf, "head -1 %s", other_file_name); system (buf);
1168 printf ("Guessing calling convention: %s\n",
1169 downstream
1170 ? "%A = modified by user, %B = upstream"
1171 : "%A = upstream, %B = modified by user");
1173 #endif
1175 if (downstream)
1177 mainstream_file_name = other_file_name;
1178 modified_file_name = destination_file_name;
1180 else
1182 mainstream_file_name = destination_file_name;
1183 modified_file_name = other_file_name;
1186 /* Read the three files into memory. */
1187 read_changelog_file (ancestor_file_name, &ancestor_file);
1188 read_changelog_file (mainstream_file_name, &mainstream_file);
1189 read_changelog_file (modified_file_name, &modified_file);
1191 /* Compute correspondence between the entries of ancestor_file and of
1192 mainstream_file. */
1193 compute_mapping (&ancestor_file, &mainstream_file, false, &mapping);
1194 (void) entries_mapping_reverse_get; /* avoid gcc "defined but not" warning */
1196 /* Compute differences between the entries of ancestor_file and of
1197 modified_file. */
1198 compute_differences (&ancestor_file, &modified_file, &diffs);
1200 /* Compute the result. */
1201 result_entries_pointers =
1202 XNMALLOC (mainstream_file.num_entries, gl_list_node_t);
1203 result_entries =
1204 gl_list_create_empty (GL_LINKED_LIST, entry_equals, entry_hashcode,
1205 NULL, true);
1207 size_t k;
1208 for (k = 0; k < mainstream_file.num_entries; k++)
1209 result_entries_pointers[k] =
1210 gl_list_add_last (result_entries, mainstream_file.entries[k]);
1212 result_conflicts =
1213 gl_list_create_empty (GL_ARRAY_LIST, NULL, NULL, NULL, true);
1215 size_t e;
1216 for (e = 0; e < diffs.num_edits; e++)
1218 struct edit *edit = diffs.edits[e];
1219 switch (edit->type)
1221 case ADDITION:
1222 if (edit->j1 == 0)
1224 /* An addition to the top of modified_file.
1225 Apply it to the top of mainstream_file. */
1226 ssize_t j;
1227 for (j = edit->j2; j >= edit->j1; j--)
1229 struct entry *added_entry = modified_file.entries[j];
1230 gl_list_add_first (result_entries, added_entry);
1233 else
1235 ssize_t i_before;
1236 ssize_t i_after;
1237 ssize_t k_before;
1238 ssize_t k_after;
1239 i_before = diffs.index_mapping_reverse[edit->j1 - 1];
1240 ASSERT (i_before >= 0);
1241 i_after = (edit->j2 + 1 == modified_file.num_entries
1242 ? ancestor_file.num_entries
1243 : diffs.index_mapping_reverse[edit->j2 + 1]);
1244 ASSERT (i_after >= 0);
1245 ASSERT (i_after == i_before + 1);
1246 /* An addition between ancestor_file.entries[i_before] and
1247 ancestor_file.entries[i_after]. See whether these two
1248 entries still exist in mainstream_file and are still
1249 consecutive. */
1250 k_before = entries_mapping_get (&mapping, i_before);
1251 k_after = (i_after == ancestor_file.num_entries
1252 ? mainstream_file.num_entries
1253 : entries_mapping_get (&mapping, i_after));
1254 if (k_before >= 0 && k_after >= 0 && k_after == k_before + 1)
1256 /* Yes, the entry before and after are still neighbours
1257 in mainstream_file. Apply the addition between
1258 them. */
1259 if (k_after == mainstream_file.num_entries)
1261 size_t j;
1262 for (j = edit->j1; j <= edit->j2; j++)
1264 struct entry *added_entry = modified_file.entries[j];
1265 gl_list_add_last (result_entries, added_entry);
1268 else
1270 gl_list_node_t node_k_after = result_entries_pointers[k_after];
1271 size_t j;
1272 for (j = edit->j1; j <= edit->j2; j++)
1274 struct entry *added_entry = modified_file.entries[j];
1275 gl_list_add_before (result_entries, node_k_after, added_entry);
1279 else
1281 /* It's not clear where the additions should be applied.
1282 Let the user decide. */
1283 struct conflict *c = XMALLOC (struct conflict);
1284 size_t j;
1285 c->num_old_entries = 0;
1286 c->old_entries = NULL;
1287 c->num_modified_entries = edit->j2 - edit->j1 + 1;
1288 c->modified_entries =
1289 XNMALLOC (c->num_modified_entries, struct entry *);
1290 for (j = edit->j1; j <= edit->j2; j++)
1291 c->modified_entries[j - edit->j1] = modified_file.entries[j];
1292 gl_list_add_last (result_conflicts, c);
1295 break;
1296 case REMOVAL:
1298 /* Apply the removals one by one. */
1299 size_t i;
1300 for (i = edit->i1; i <= edit->i2; i++)
1302 struct entry *removed_entry = ancestor_file.entries[i];
1303 ssize_t k = entries_mapping_get (&mapping, i);
1304 if (k >= 0
1305 && entry_equals (removed_entry,
1306 mainstream_file.entries[k]))
1308 /* The entry to be removed still exists in
1309 mainstream_file. Remove it. */
1310 gl_list_node_set_value (result_entries,
1311 result_entries_pointers[k],
1312 &empty_entry);
1314 else
1316 /* The entry to be removed was already removed or was
1317 modified. This is a conflict. */
1318 struct conflict *c = XMALLOC (struct conflict);
1319 c->num_old_entries = 1;
1320 c->old_entries =
1321 XNMALLOC (c->num_old_entries, struct entry *);
1322 c->old_entries[0] = removed_entry;
1323 c->num_modified_entries = 0;
1324 c->modified_entries = NULL;
1325 gl_list_add_last (result_conflicts, c);
1329 break;
1330 case CHANGE:
1332 bool done = false;
1333 /* When the user usually merges entries from the same day,
1334 and this edit is at the top of the file: */
1335 if (split_merged_entry && edit->j1 == 0)
1337 /* Test whether the change is "simple merged", i.e. whether
1338 it consists of additions, followed by an augmentation of
1339 the first changed entry, followed by small changes of the
1340 remaining entries:
1341 entry_1
1342 entry_2
1344 entry_n
1345 are mapped to
1346 added_entry
1348 added_entry
1349 augmented_entry_1
1350 modified_entry_2
1352 modified_entry_n. */
1353 if (edit->i2 - edit->i1 <= edit->j2 - edit->j1)
1355 struct entry *split[2];
1356 bool simple_merged =
1357 try_split_merged_entry (ancestor_file.entries[edit->i1],
1358 modified_file.entries[edit->i1 + edit->j2 - edit->i2],
1359 split);
1360 if (simple_merged)
1362 size_t i;
1363 for (i = edit->i1 + 1; i <= edit->i2; i++)
1364 if (entry_fstrcmp (ancestor_file.entries[i],
1365 modified_file.entries[i + edit->j2 - edit->i2],
1366 FSTRCMP_THRESHOLD)
1367 < FSTRCMP_THRESHOLD)
1369 simple_merged = false;
1370 break;
1373 if (simple_merged)
1375 /* Apply the additions at the top of modified_file.
1376 Apply each of the single-entry changes
1377 separately. */
1378 size_t num_changed = edit->i2 - edit->i1 + 1; /* > 0 */
1379 size_t num_added = (edit->j2 - edit->j1 + 1) - num_changed;
1380 ssize_t j;
1381 /* First part of the split modified_file.entries[edit->j2 - edit->i2 + edit->i1]: */
1382 gl_list_add_first (result_entries, split[0]);
1383 /* The additions. */
1384 for (j = edit->j1 + num_added - 1; j >= edit->j1; j--)
1386 struct entry *added_entry = modified_file.entries[j];
1387 gl_list_add_first (result_entries, added_entry);
1389 /* Now the single-entry changes. */
1390 for (j = edit->j1 + num_added; j <= edit->j2; j++)
1392 struct entry *changed_entry =
1393 (j == edit->j1 + num_added
1394 ? split[1]
1395 : modified_file.entries[j]);
1396 size_t i = j + edit->i2 - edit->j2;
1397 ssize_t k = entries_mapping_get (&mapping, i);
1398 if (k >= 0
1399 && entry_equals (ancestor_file.entries[i],
1400 mainstream_file.entries[k]))
1402 gl_list_node_set_value (result_entries,
1403 result_entries_pointers[k],
1404 changed_entry);
1406 else if (!entry_equals (ancestor_file.entries[i],
1407 changed_entry))
1409 struct conflict *c = XMALLOC (struct conflict);
1410 c->num_old_entries = 1;
1411 c->old_entries =
1412 XNMALLOC (c->num_old_entries, struct entry *);
1413 c->old_entries[0] = ancestor_file.entries[i];
1414 c->num_modified_entries = 1;
1415 c->modified_entries =
1416 XNMALLOC (c->num_modified_entries, struct entry *);
1417 c->modified_entries[0] = changed_entry;
1418 gl_list_add_last (result_conflicts, c);
1421 done = true;
1425 if (!done)
1427 bool simple;
1428 /* Test whether the change is "simple", i.e. whether it
1429 consists of small changes to the old ChangeLog entries
1430 and additions before them:
1431 entry_1
1433 entry_n
1434 are mapped to
1435 added_entry
1437 added_entry
1438 modified_entry_1
1440 modified_entry_n. */
1441 if (edit->i2 - edit->i1 <= edit->j2 - edit->j1)
1443 size_t i;
1444 simple = true;
1445 for (i = edit->i1; i <= edit->i2; i++)
1446 if (entry_fstrcmp (ancestor_file.entries[i],
1447 modified_file.entries[i + edit->j2 - edit->i2],
1448 FSTRCMP_THRESHOLD)
1449 < FSTRCMP_THRESHOLD)
1451 simple = false;
1452 break;
1455 else
1456 simple = false;
1457 if (simple)
1459 /* Apply the additions and each of the single-entry
1460 changes separately. */
1461 size_t num_changed = edit->i2 - edit->i1 + 1; /* > 0 */
1462 size_t num_added = (edit->j2 - edit->j1 + 1) - num_changed;
1463 if (edit->j1 == 0)
1465 /* A simple change at the top of modified_file.
1466 Apply it to the top of mainstream_file. */
1467 ssize_t j;
1468 for (j = edit->j1 + num_added - 1; j >= edit->j1; j--)
1470 struct entry *added_entry = modified_file.entries[j];
1471 gl_list_add_first (result_entries, added_entry);
1473 for (j = edit->j1 + num_added; j <= edit->j2; j++)
1475 struct entry *changed_entry = modified_file.entries[j];
1476 size_t i = j + edit->i2 - edit->j2;
1477 ssize_t k = entries_mapping_get (&mapping, i);
1478 if (k >= 0
1479 && entry_equals (ancestor_file.entries[i],
1480 mainstream_file.entries[k]))
1482 gl_list_node_set_value (result_entries,
1483 result_entries_pointers[k],
1484 changed_entry);
1486 else
1488 struct conflict *c;
1489 ASSERT (!entry_equals (ancestor_file.entries[i],
1490 changed_entry));
1491 c = XMALLOC (struct conflict);
1492 c->num_old_entries = 1;
1493 c->old_entries =
1494 XNMALLOC (c->num_old_entries, struct entry *);
1495 c->old_entries[0] = ancestor_file.entries[i];
1496 c->num_modified_entries = 1;
1497 c->modified_entries =
1498 XNMALLOC (c->num_modified_entries, struct entry *);
1499 c->modified_entries[0] = changed_entry;
1500 gl_list_add_last (result_conflicts, c);
1503 done = true;
1505 else
1507 ssize_t i_before;
1508 ssize_t k_before;
1509 bool linear;
1510 i_before = diffs.index_mapping_reverse[edit->j1 - 1];
1511 ASSERT (i_before >= 0);
1512 /* A simple change after ancestor_file.entries[i_before].
1513 See whether this entry and the following num_changed
1514 entries still exist in mainstream_file and are still
1515 consecutive. */
1516 k_before = entries_mapping_get (&mapping, i_before);
1517 linear = (k_before >= 0);
1518 if (linear)
1520 size_t i;
1521 for (i = i_before + 1; i <= i_before + num_changed; i++)
1522 if (entries_mapping_get (&mapping, i) != k_before + (i - i_before))
1524 linear = false;
1525 break;
1528 if (linear)
1530 gl_list_node_t node_for_insert =
1531 result_entries_pointers[k_before + 1];
1532 ssize_t j;
1533 for (j = edit->j1 + num_added - 1; j >= edit->j1; j--)
1535 struct entry *added_entry = modified_file.entries[j];
1536 gl_list_add_before (result_entries, node_for_insert, added_entry);
1538 for (j = edit->j1 + num_added; j <= edit->j2; j++)
1540 struct entry *changed_entry = modified_file.entries[j];
1541 size_t i = j + edit->i2 - edit->j2;
1542 ssize_t k = entries_mapping_get (&mapping, i);
1543 ASSERT (k >= 0);
1544 if (entry_equals (ancestor_file.entries[i],
1545 mainstream_file.entries[k]))
1547 gl_list_node_set_value (result_entries,
1548 result_entries_pointers[k],
1549 changed_entry);
1551 else
1553 struct conflict *c;
1554 ASSERT (!entry_equals (ancestor_file.entries[i],
1555 changed_entry));
1556 c = XMALLOC (struct conflict);
1557 c->num_old_entries = 1;
1558 c->old_entries =
1559 XNMALLOC (c->num_old_entries, struct entry *);
1560 c->old_entries[0] = ancestor_file.entries[i];
1561 c->num_modified_entries = 1;
1562 c->modified_entries =
1563 XNMALLOC (c->num_modified_entries, struct entry *);
1564 c->modified_entries[0] = changed_entry;
1565 gl_list_add_last (result_conflicts, c);
1568 done = true;
1572 else
1574 /* A big change.
1575 See whether the num_changed entries still exist
1576 unchanged in mainstream_file and are still
1577 consecutive. */
1578 ssize_t i_first;
1579 ssize_t k_first;
1580 bool linear_unchanged;
1581 i_first = edit->i1;
1582 k_first = entries_mapping_get (&mapping, i_first);
1583 linear_unchanged =
1584 (k_first >= 0
1585 && entry_equals (ancestor_file.entries[i_first],
1586 mainstream_file.entries[k_first]));
1587 if (linear_unchanged)
1589 size_t i;
1590 for (i = i_first + 1; i <= edit->i2; i++)
1591 if (!(entries_mapping_get (&mapping, i) == k_first + (i - i_first)
1592 && entry_equals (ancestor_file.entries[i],
1593 mainstream_file.entries[entries_mapping_get (&mapping, i)])))
1595 linear_unchanged = false;
1596 break;
1599 if (linear_unchanged)
1601 gl_list_node_t node_for_insert =
1602 result_entries_pointers[k_first];
1603 ssize_t j;
1604 size_t i;
1605 for (j = edit->j2; j >= edit->j1; j--)
1607 struct entry *new_entry = modified_file.entries[j];
1608 gl_list_add_before (result_entries, node_for_insert, new_entry);
1610 for (i = edit->i1; i <= edit->i2; i++)
1612 ssize_t k = entries_mapping_get (&mapping, i);
1613 ASSERT (k >= 0);
1614 ASSERT (entry_equals (ancestor_file.entries[i],
1615 mainstream_file.entries[k]));
1616 gl_list_node_set_value (result_entries,
1617 result_entries_pointers[k],
1618 &empty_entry);
1620 done = true;
1624 if (!done)
1626 struct conflict *c = XMALLOC (struct conflict);
1627 size_t i, j;
1628 c->num_old_entries = edit->i2 - edit->i1 + 1;
1629 c->old_entries =
1630 XNMALLOC (c->num_old_entries, struct entry *);
1631 for (i = edit->i1; i <= edit->i2; i++)
1632 c->old_entries[i - edit->i1] = ancestor_file.entries[i];
1633 c->num_modified_entries = edit->j2 - edit->j1 + 1;
1634 c->modified_entries =
1635 XNMALLOC (c->num_modified_entries, struct entry *);
1636 for (j = edit->j1; j <= edit->j2; j++)
1637 c->modified_entries[j - edit->j1] = modified_file.entries[j];
1638 gl_list_add_last (result_conflicts, c);
1641 break;
1646 /* Output the result. */
1648 FILE *fp = fopen (destination_file_name, "w");
1649 if (fp == NULL)
1651 fprintf (stderr, "could not write file '%s'\n", destination_file_name);
1652 exit (EXIT_FAILURE);
1655 /* Output the conflicts at the top. */
1657 size_t n = gl_list_size (result_conflicts);
1658 size_t i;
1659 for (i = 0; i < n; i++)
1660 conflict_write (fp, (struct conflict *) gl_list_get_at (result_conflicts, i));
1662 /* Output the modified and unmodified entries, in order. */
1664 gl_list_iterator_t iter = gl_list_iterator (result_entries);
1665 const void *elt;
1666 gl_list_node_t node;
1667 while (gl_list_iterator_next (&iter, &elt, &node))
1668 entry_write (fp, (struct entry *) elt);
1669 gl_list_iterator_free (&iter);
1672 if (fwriteerror (fp))
1674 fprintf (stderr, "error writing to file '%s'\n", destination_file_name);
1675 exit (EXIT_FAILURE);
1679 exit (gl_list_size (result_conflicts) > 0 ? EXIT_FAILURE : EXIT_SUCCESS);