6 $Id: htmldiff.prl 767 2006-01-28 03:29:15Z marknodine $
7 Copyright (C) 2002-2005 Freescale Semiconductor, Inc.
8 Distributed under terms of the GNU General Public License (GPL).
12 Description: This tool takes two HTML files and creates an HTML file
13 that has markings of text insertions and deletions.
17 Usage: htmldiff [options] file1 file2
20 -b string String to insert for backward link with -l.
22 -d string Deletion marking string, where "$_" specifies the
25 -f string String to insert for forward link with -l.
28 -i string Insertion marking string, where "$_" specifies the
31 -k Create key describing difference format
32 -l Create links between difference regions
33 -p string Program to run for diff (program must support -D option).
35 -V Prints version information
41 $opt_b = '<font color=orange><</font>';
42 $opt_f = '<font color=orange>></font>';
43 $opt_i = '<U><FONT COLOR=#800080>$_</FONT></U>';
44 $opt_d = '<S><FONT COLOR=#800000>$_</FONT></S>';
48 # Get options and process them
50 Usage
() unless getopts
("b:d:f:hi:klp:V");
52 Usage
('Id') if $opt_V;
53 Usage
('Description') if $opt_h;
55 die "Wrong number of arguments" unless @ARGV == 2;
57 $opt_i =~ /(.*)\$_(.*)/;
58 ($INSERT_PFX,$INSERT_SFX) = ($1, $2);
59 $opt_d =~ /(.*)\$_(.*)/;
60 ($DELETE_PFX,$DELETE_SFX) = ($1, $2);
62 #sub INSERTION { "$INSERT_PFX$_[0]$INSERT_SFX" }
63 #sub DELETION { OptLString() . "$DELETE_PFX$_[0]$DELETE_SFX" }
64 ($ctx_both, $ctx_1, $ctx_2) = (0 .. 2);
66 $args = join(' ',@ARGV);
68 open (DIFF
, "$opt_p -D $sym $args|") || die "Cannot run diff";
72 print "Key: ${DELETE_PFX}Deleted text.$DELETE_SFX\n";
73 print "${INSERT_PFX}Inserted text.$INSERT_SFX\n";
74 print "Use <A HREF='#'>$opt_b</A> and <A HREF='#__diff${\scalar($DIFF+1)}'>$opt_f</A> to jump to previous/next difference region.\n"
78 print "<A HREF='#__diff${\scalar($DIFF+1)}' NAME='__diff$DIFF'>$opt_f</A> "
82 if (/^#ifndef $sym\b/o) {
83 $context = $ctx_1; next;
85 if (/^#ifdef $sym\b/o) {
86 $context = $ctx_2; next;
89 $context = 3 - $context; next;
94 # Get the word lists for each section
95 $diff1 = join('', @diff1);
96 $diff2 = join('', @diff2);
98 my $s = MarkNonHTML
($diff2, $INSERT_PFX, $INSERT_SFX);
99 $s =~ s/($INSERT_PFX)/OptLString() . $1/eo;
102 elsif ($diff2 eq '') {
103 my $s = MarkNonHTML
($diff1, $DELETE_PFX, $DELETE_SFX);
104 $s =~ s/($DELETE_PFX)/OptLString() . $1/eo;
108 @words1 = GetWordList
($diff1);
109 @words2 = GetWordList
($diff2);
110 # Use diff to find out which words were added/deleted/changed
111 open (O1
, "> /tmp/1.$$") || die "Cannot write to /tmp/1.$$";
112 print O1
join("\n",@words1),"\n";
114 open (O2
, "> /tmp/2.$$") || die "Cannot write to /tmp/2.$$";
115 print O2
join("\n",@words2),"\n";
117 open (DIFF2
, "diff /tmp/1.$$ /tmp/2.$$|");
118 # Record for each difference section the type of difference
119 # (a, c, or d), the beginning and ending words in diff2, and
120 # any non-HTML words deleted from diff1.
121 my(@deletes, @diffs);
123 if (/^[0-9]+(?:,[0-9]+)?([acd][0-9]+(?:,[0-9]+)?)/) {
124 # A new difference section
128 elsif (/^< ([^<].*)/) {
129 push(@
{$deletes[$#deletes]}, $1);
135 # Create the merged version. We copy words from diff2 to
136 # $merged until we hit a difference section; for each
137 # difference section, we put the struck out section in and
138 # then mark as added any non-HTML words from diff2 for c
139 # and a type difference sections.
143 for ($i = 0; $i < @diffs; $i++) {
144 my($diff) = $diffs[$i];
145 my($deletes) = $deletes[$i];
146 my($type,$start,$end) =
147 $diff =~ /([acd])([0-9]+)(?:,([0-9]+))?/;
148 $start++ if $type eq 'd';
149 $end = $start unless $end;
151 while ($word < $start) {
152 if ($diff2 =~ /^(\s+)/) {
154 elsif ($diff2 =~ /^(<.*?>|[^<\s]+)/) {
164 MarkNonHTML
((join(' ',@
$deletes)), $DELETE_PFX,
167 if ($type =~ /[ac]/) {
168 # Mark any additions. Copy the whole string to $add.
170 while ($word <= $end) {
171 if ($diff2 =~ /^(\s+)/) {
173 elsif ($diff2 =~ /^(<.*?>|[^<\s]+)/) {
180 # Now mark the non-HTML words.
181 $new .= MarkNonHTML
($add, $INSERT_PFX, $INSERT_SFX);
183 $merged .= (($new =~ /$DELETE_PFX|$INSERT_PFX/o) ?
184 OptLString
() : "") . $new;
186 # Copy the rest of the diff2 text
194 if ($context == $ctx_both) {
197 elsif ($context == $ctx_1) {
204 print "<A HREF='#__diff${\scalar($DIFF++)}' NAME='__diff$DIFF'>$opt_b</A>\n"
206 print "($DIFF differences)\n" if $opt_k;
208 # Returns a list of words. A word is either a set of non-space
209 # characters separated by spaces or a string enclosed between '<' and '>'.
211 # Outputs: array of words
216 while ($last_pos != pos) {
218 next if (/\G(\s+)/gc);
219 if (/\G(<.*?>)/gc || /\G([^<\s]+)/gc) {
226 # Marks all non-HTML words by putting the prefix and suffix around
228 # Inputs: string, prefix, suffix
231 my($s, $pfx, $sfx) = @_;
232 $s =~ s/(>|\A)(?!\s+(?:<|\Z))([^<]+)/$1$pfx$2$sfx/g;
236 # Returns a string that links from one difference section to the next
239 # Uses globals: $opt_l, $DIFF
242 "<A HREF='#__diff${\scalar($DIFF++)}' NAME='__diff$DIFF'>$opt_b</A> <A HREF='#__diff${\scalar($DIFF+1)}'>$opt_f</A> " :
246 # This subroutine extracts and prints usage information
249 $what = "Usage" if ! $what;
250 my $mark = $what eq 'Description' ?
"($what|Usage)" : $what;
251 if (open(ME
,$0) == 1) {
253 if ((/^=begin $mark/ .. /^=end $mark/) &&
254 ! /^=(begin|end) $mark/) {
255 s/(\$\{[^\}]+\})/eval($1)/ge;
262 print STDERR
"Usage not available.\n";