Fix #338: re.sub() flag argument at wrong position.
[docutils.git] / prest / doc / tools / htmldiff.prl
blob4c355d17fdc636ea354c10d09b0d6ec72f8262e4
1 #!/usr/local/bin/perl
3 =pod
4 =begin reST
5 =begin Id
6 $Id: htmldiff.prl 767 2006-01-28 03:29:15Z marknodine $
7 Copyright (C) 2002-2005 Freescale Semiconductor, Inc.
8 Distributed under terms of the GNU General Public License (GPL).
9 =end Id
11 =begin Description
12 Description: This tool takes two HTML files and creates an HTML file
13 that has markings of text insertions and deletions.
15 =end Description
16 =begin Usage
17 Usage: htmldiff [options] file1 file2
19 Options:
20 -b string String to insert for backward link with -l.
21 Default: "${opt_b}"
22 -d string Deletion marking string, where "$_" specifies the
23 deleted words.
24 Default: "${opt_d}"
25 -f string String to insert for forward link with -l.
26 Default: "${opt_f}"
27 -h Prints help
28 -i string Insertion marking string, where "$_" specifies the
29 inserted words.
30 Default: "${opt_i}"
31 -k Create key describing difference format
32 -l Create links between difference regions
33 -p string Program to run for diff (program must support -D option).
34 Default: ${opt_p}
35 -V Prints version information
36 =end Usage
37 =end reST
38 =cut
40 BEGIN {
41 $opt_b = '<font color=orange>&lt;</font>';
42 $opt_f = '<font color=orange>&gt;</font>';
43 $opt_i = '<U><FONT COLOR=#800080>$_</FONT></U>';
44 $opt_d = '<S><FONT COLOR=#800000>$_</FONT></S>';
45 $opt_p = 'diff';
48 # Get options and process them
49 use Getopt::Std;
50 Usage() unless getopts("b:d:f:hi:klp:V");
52 Usage('Id') if $opt_V;
53 Usage('Description') if $opt_h;
55 die "Wrong number of arguments" unless @ARGV == 2;
57 $opt_i =~ /(.*)\$_(.*)/;
58 ($INSERT_PFX,$INSERT_SFX) = ($1, $2);
59 $opt_d =~ /(.*)\$_(.*)/;
60 ($DELETE_PFX,$DELETE_SFX) = ($1, $2);
62 #sub INSERTION { "$INSERT_PFX$_[0]$INSERT_SFX" }
63 #sub DELETION { OptLString() . "$DELETE_PFX$_[0]$DELETE_SFX" }
64 ($ctx_both, $ctx_1, $ctx_2) = (0 .. 2);
66 $args = join(' ',@ARGV);
67 $sym = "A__";
68 open (DIFF, "$opt_p -D $sym $args|") || die "Cannot run diff";
69 $context = $ctx_both;
70 $DIFF = 0;
71 if ($opt_k) {
72 print "Key: ${DELETE_PFX}Deleted text.$DELETE_SFX\n";
73 print "${INSERT_PFX}Inserted text.$INSERT_SFX\n";
74 print "Use <A HREF='#'>$opt_b</A> and <A HREF='#__diff${\scalar($DIFF+1)}'>$opt_f</A> to jump to previous/next difference region.\n"
75 if $opt_l;
76 print "<hr />\n";
78 print "<A HREF='#__diff${\scalar($DIFF+1)}' NAME='__diff$DIFF'>$opt_f</A> "
79 if $opt_l;
81 while (<DIFF>) {
82 if (/^#ifndef $sym\b/o) {
83 $context = $ctx_1; next;
85 if (/^#ifdef $sym\b/o) {
86 $context = $ctx_2; next;
88 if (/^#else /o) {
89 $context = 3 - $context; next;
91 if (/^#endif /o) {
92 $context = $ctx_both;
93 # Process the diff
94 # Get the word lists for each section
95 $diff1 = join('', @diff1);
96 $diff2 = join('', @diff2);
97 if ($diff1 eq '') {
98 my $s = MarkNonHTML($diff2, $INSERT_PFX, $INSERT_SFX);
99 $s =~ s/($INSERT_PFX)/OptLString() . $1/eo;
100 print $s;
102 elsif ($diff2 eq '') {
103 my $s = MarkNonHTML($diff1, $DELETE_PFX, $DELETE_SFX);
104 $s =~ s/($DELETE_PFX)/OptLString() . $1/eo;
105 print $s;
107 else {
108 @words1 = GetWordList($diff1);
109 @words2 = GetWordList($diff2);
110 # Use diff to find out which words were added/deleted/changed
111 open (O1, "> /tmp/1.$$") || die "Cannot write to /tmp/1.$$";
112 print O1 join("\n",@words1),"\n";
113 close O1;
114 open (O2, "> /tmp/2.$$") || die "Cannot write to /tmp/2.$$";
115 print O2 join("\n",@words2),"\n";
116 close O2;
117 open (DIFF2, "diff /tmp/1.$$ /tmp/2.$$|");
118 # Record for each difference section the type of difference
119 # (a, c, or d), the beginning and ending words in diff2, and
120 # any non-HTML words deleted from diff1.
121 my(@deletes, @diffs);
122 while (<DIFF2>) {
123 if (/^[0-9]+(?:,[0-9]+)?([acd][0-9]+(?:,[0-9]+)?)/) {
124 # A new difference section
125 push(@diffs, $1);
126 push(@deletes, []);
128 elsif (/^< ([^<].*)/) {
129 push(@{$deletes[$#deletes]}, $1);
132 close DIFF2;
133 unlink "/tmp/1.$$";
134 unlink "/tmp/2.$$";
135 # Create the merged version. We copy words from diff2 to
136 # $merged until we hit a difference section; for each
137 # difference section, we put the struck out section in and
138 # then mark as added any non-HTML words from diff2 for c
139 # and a type difference sections.
140 my($merged);
141 my($i);
142 my($word) = 1;
143 for ($i = 0; $i < @diffs; $i++) {
144 my($diff) = $diffs[$i];
145 my($deletes) = $deletes[$i];
146 my($type,$start,$end) =
147 $diff =~ /([acd])([0-9]+)(?:,([0-9]+))?/;
148 $start++ if $type eq 'd';
149 $end = $start unless $end;
150 # Copy over stuff
151 while ($word < $start) {
152 if ($diff2 =~ /^(\s+)/) {
154 elsif ($diff2 =~ /^(<.*?>|[^<\s]+)/) {
155 $word++;
157 else { next; }
158 $merged .= $1;
159 $diff2 = "$'";
161 # Mark any deletions
162 my $new;
163 $new = ' ' .
164 MarkNonHTML((join(' ',@$deletes)), $DELETE_PFX,
165 $DELETE_SFX) . ' '
166 if @$deletes;
167 if ($type =~ /[ac]/) {
168 # Mark any additions. Copy the whole string to $add.
169 my($add);
170 while ($word <= $end) {
171 if ($diff2 =~ /^(\s+)/) {
173 elsif ($diff2 =~ /^(<.*?>|[^<\s]+)/) {
174 $word++;
176 else { next; }
177 $add .= $1;
178 $diff2 = "$'";
180 # Now mark the non-HTML words.
181 $new .= MarkNonHTML($add, $INSERT_PFX, $INSERT_SFX);
183 $merged .= (($new =~ /$DELETE_PFX|$INSERT_PFX/o) ?
184 OptLString() : "") . $new;
186 # Copy the rest of the diff2 text
187 $merged .= $diff2;
188 print $merged;
190 undef @diff1;
191 undef @diff2;
192 next;
194 if ($context == $ctx_both) {
195 print;
197 elsif ($context == $ctx_1) {
198 push(@diff1, $_);
200 else {
201 push(@diff2, $_);
204 print "<A HREF='#__diff${\scalar($DIFF++)}' NAME='__diff$DIFF'>$opt_b</A>\n"
205 if $opt_l;
206 print "($DIFF differences)\n" if $opt_k;
208 # Returns a list of words. A word is either a set of non-space
209 # characters separated by spaces or a string enclosed between '<' and '>'.
210 # Inputs: string
211 # Outputs: array of words
212 sub GetWordList {
213 $_ = $_[0];
214 my(@out);
215 my($last_pos) = -1;
216 while ($last_pos != pos) {
217 $last_pos = pos;
218 next if (/\G(\s+)/gc);
219 if (/\G(<.*?>)/gc || /\G([^<\s]+)/gc) {
220 push (@out, $1);
223 return @out;
226 # Marks all non-HTML words by putting the prefix and suffix around
227 # them.
228 # Inputs: string, prefix, suffix
229 # Returns: string
230 sub MarkNonHTML {
231 my($s, $pfx, $sfx) = @_;
232 $s =~ s/(>|\A)(?!\s+(?:<|\Z))([^<]+)/$1$pfx$2$sfx/g;
233 return $s;
236 # Returns a string that links from one difference section to the next
237 # Inputs: None
238 # Returns: string
239 # Uses globals: $opt_l, $DIFF
240 sub OptLString {
241 return $opt_l ?
242 "<A HREF='#__diff${\scalar($DIFF++)}' NAME='__diff$DIFF'>$opt_b</A> <A HREF='#__diff${\scalar($DIFF+1)}'>$opt_f</A> " :
246 # This subroutine extracts and prints usage information
247 sub Usage {
248 my ($what) = @_;
249 $what = "Usage" if ! $what;
250 my $mark = $what eq 'Description' ? "($what|Usage)" : $what;
251 if (open(ME,$0) == 1) {
252 while (<ME>) {
253 if ((/^=begin $mark/ .. /^=end $mark/) &&
254 ! /^=(begin|end) $mark/) {
255 s/(\$\{[^\}]+\})/eval($1)/ge;
256 print;
259 close(ME);
261 else {
262 print STDERR "Usage not available.\n";
264 exit (1);