Revert 171679
[chromium-blink-merge.git] / third_party / bsdiff / mbsdiff.cc
blob1dcd35e9d4cef7b1356711a7d57cc640a6542e92
1 /*
2 bsdiff.c -- Binary patch generator.
4 Copyright 2003 Colin Percival
6 For the terms under which this work may be distributed, please see
7 the adjoining file "LICENSE".
9 ChangeLog:
10 2005-05-05 - Use the modified header struct from bspatch.h; use 32-bit
11 values throughout.
12 --Benjamin Smedberg <benjamin@smedbergs.us>
13 2005-05-18 - Use the same CRC algorithm as bzip2, and leverage the CRC table
14 provided by libbz2.
15 --Darin Fisher <darin@meer.net>
16 2007-11-14 - Changed to use Crc from Lzma library instead of Bzip library
17 --Rahul Kuchhal
20 #include "mbspatch.h"
22 #include <stdlib.h>
23 #include <stdio.h>
24 #include <string.h>
25 #include <fcntl.h>
26 #include <stdarg.h>
27 #ifdef _WIN32
28 #include <io.h>
29 #include <winsock2.h>
30 #else
31 #include <unistd.h>
32 #include <arpa/inet.h>
33 #define _O_BINARY 0
34 #endif
36 #undef MIN
37 #define MIN(x,y) (((x)<(y)) ? (x) : (y))
39 static void
40 reporterr(int e, const char *fmt, ...)
42 if (fmt) {
43 va_list args;
44 va_start(args, fmt);
45 vfprintf(stderr, fmt, args);
46 va_end(args);
49 exit(e);
52 static void
53 split(int *I,int *V,int start,int len,int h)
55 int i,j,k,x,tmp,jj,kk;
57 if(len<16) {
58 for(k=start;k<start+len;k+=j) {
59 j=1;x=V[I[k]+h];
60 for(i=1;k+i<start+len;i++) {
61 if(V[I[k+i]+h]<x) {
62 x=V[I[k+i]+h];
63 j=0;
65 if(V[I[k+i]+h]==x) {
66 tmp=I[k+j];I[k+j]=I[k+i];I[k+i]=tmp;
67 j++;
70 for(i=0;i<j;i++) V[I[k+i]]=k+j-1;
71 if(j==1) I[k]=-1;
73 return;
76 x=V[I[start+len/2]+h];
77 jj=0;kk=0;
78 for(i=start;i<start+len;i++) {
79 if(V[I[i]+h]<x) jj++;
80 if(V[I[i]+h]==x) kk++;
82 jj+=start;kk+=jj;
84 i=start;j=0;k=0;
85 while(i<jj) {
86 if(V[I[i]+h]<x) {
87 i++;
88 } else if(V[I[i]+h]==x) {
89 tmp=I[i];I[i]=I[jj+j];I[jj+j]=tmp;
90 j++;
91 } else {
92 tmp=I[i];I[i]=I[kk+k];I[kk+k]=tmp;
93 k++;
97 while(jj+j<kk) {
98 if(V[I[jj+j]+h]==x) {
99 j++;
100 } else {
101 tmp=I[jj+j];I[jj+j]=I[kk+k];I[kk+k]=tmp;
102 k++;
106 if(jj>start) split(I,V,start,jj-start,h);
108 for(i=0;i<kk-jj;i++) V[I[jj+i]]=kk-1;
109 if(jj==kk-1) I[jj]=-1;
111 if(start+len>kk) split(I,V,kk,start+len-kk,h);
114 static void
115 qsufsort(int *I,int *V,unsigned char *old,int oldsize)
117 int buckets[256];
118 int i,h,len;
120 for(i=0;i<256;i++) buckets[i]=0;
121 for(i=0;i<oldsize;i++) buckets[old[i]]++;
122 for(i=1;i<256;i++) buckets[i]+=buckets[i-1];
123 for(i=255;i>0;i--) buckets[i]=buckets[i-1];
124 buckets[0]=0;
126 for(i=0;i<oldsize;i++) I[++buckets[old[i]]]=i;
127 I[0]=oldsize;
128 for(i=0;i<oldsize;i++) V[i]=buckets[old[i]];
129 V[oldsize]=0;
130 for(i=1;i<256;i++) if(buckets[i]==buckets[i-1]+1) I[buckets[i]]=-1;
131 I[0]=-1;
133 for(h=1;I[0]!=-(oldsize+1);h+=h) {
134 len=0;
135 for(i=0;i<oldsize+1;) {
136 if(I[i]<0) {
137 len-=I[i];
138 i-=I[i];
139 } else {
140 if(len) I[i-len]=-len;
141 len=V[I[i]]+1-i;
142 split(I,V,i,len,h);
143 i+=len;
144 len=0;
147 if(len) I[i-len]=-len;
150 for(i=0;i<oldsize+1;i++) I[V[i]]=i;
153 static int
154 matchlen(unsigned char *old,int oldsize,unsigned char *newbuf,int newsize)
156 int i;
158 for(i=0;(i<oldsize)&&(i<newsize);i++)
159 if(old[i]!=newbuf[i]) break;
161 return i;
164 static int
165 search(int *I,unsigned char *old,int oldsize,
166 unsigned char *newbuf,int newsize,int st,int en,int *pos)
168 int x,y;
170 if(en-st<2) {
171 x=matchlen(old+I[st],oldsize-I[st],newbuf,newsize);
172 y=matchlen(old+I[en],oldsize-I[en],newbuf,newsize);
174 if(x>y) {
175 *pos=I[st];
176 return x;
177 } else {
178 *pos=I[en];
179 return y;
183 x=st+(en-st)/2;
184 if(memcmp(old+I[x],newbuf,MIN(oldsize-I[x],newsize))<0) {
185 return search(I,old,oldsize,newbuf,newsize,x,en,pos);
186 } else {
187 return search(I,old,oldsize,newbuf,newsize,st,x,pos);
191 int main(int argc,char *argv[])
193 int fd;
194 unsigned char *old,*newbuf;
195 int oldsize,newsize;
196 int *I,*V;
198 int scan,pos,len;
199 int lastscan,lastpos,lastoffset;
200 int oldscore,scsc;
202 int s,Sf,lenf,Sb,lenb;
203 int overlap,Ss,lens;
204 int i;
206 int dblen,eblen;
207 unsigned char *db,*eb;
209 unsigned int scrc;
211 MBSPatchHeader header = {
212 {'M','B','D','I','F','F','1','0'},
213 0, 0, 0, 0, 0, 0
216 int numtriples;
218 if(argc!=4)
219 reporterr(1,"usage: %s <oldfile> <newfile> <patchfile>\n",argv[0]);
221 /* Allocate oldsize+1 bytes instead of oldsize bytes to ensure
222 that we never try to malloc(0) and get a NULL pointer */
223 if(((fd=open(argv[1],O_RDONLY|_O_BINARY,0))<0) ||
224 ((oldsize=lseek(fd,0,SEEK_END))==-1) ||
225 ((old=(unsigned char*) malloc(oldsize+1))==NULL) ||
226 (lseek(fd,0,SEEK_SET)!=0) ||
227 (read(fd,old,oldsize)!=oldsize) ||
228 (close(fd)==-1))
229 reporterr(1,"%s\n",argv[1]);
231 scrc = CalculateCrc(old, oldsize);
233 if(((I=(int*) malloc((oldsize+1)*sizeof(int)))==NULL) ||
234 ((V=(int*) malloc((oldsize+1)*sizeof(int)))==NULL))
235 reporterr(1,NULL);
237 qsufsort(I,V,old,oldsize);
239 free(V);
241 /* Allocate newsize+1 bytes instead of newsize bytes to ensure
242 that we never try to malloc(0) and get a NULL pointer */
243 if(((fd=open(argv[2],O_RDONLY|_O_BINARY,0))<0) ||
244 ((newsize=lseek(fd,0,SEEK_END))==-1) ||
245 ((newbuf=(unsigned char*) malloc(newsize+1))==NULL) ||
246 (lseek(fd,0,SEEK_SET)!=0) ||
247 (read(fd,newbuf,newsize)!=newsize) ||
248 (close(fd)==-1)) reporterr(1,"%s\n",argv[2]);
250 if(((db=(unsigned char*) malloc(newsize+1))==NULL) ||
251 ((eb=(unsigned char*) malloc(newsize+1))==NULL))
252 reporterr(1,NULL);
254 dblen=0;
255 eblen=0;
257 if((fd=open(argv[3],O_CREAT|O_TRUNC|O_WRONLY|_O_BINARY,0666))<0)
258 reporterr(1,"%s\n",argv[3]);
260 /* start writing here */
262 /* We don't know the lengths yet, so we will write the header again
263 at the end */
265 if(write(fd,&header,sizeof(MBSPatchHeader))!=sizeof(MBSPatchHeader))
266 reporterr(1,"%s\n",argv[3]);
268 scan=0;len=0;
269 lastscan=0;lastpos=0;lastoffset=0;
270 numtriples = 0;
271 while(scan<newsize) {
272 oldscore=0;
274 for(scsc=scan+=len;scan<newsize;scan++) {
275 len=search(I,old,oldsize,newbuf+scan,newsize-scan,
276 0,oldsize,&pos);
278 for(;scsc<scan+len;scsc++)
279 if((scsc+lastoffset<oldsize) &&
280 (old[scsc+lastoffset] == newbuf[scsc]))
281 oldscore++;
283 if(((len==oldscore) && (len!=0)) ||
284 (len>oldscore+8)) break;
286 if((scan+lastoffset<oldsize) &&
287 (old[scan+lastoffset] == newbuf[scan]))
288 oldscore--;
291 if((len!=oldscore) || (scan==newsize)) {
292 MBSPatchTriple triple;
294 s=0;Sf=0;lenf=0;
295 for(i=0;(lastscan+i<scan)&&(lastpos+i<oldsize);) {
296 if(old[lastpos+i]==newbuf[lastscan+i]) s++;
297 i++;
298 if(s*2-i>Sf*2-lenf) { Sf=s; lenf=i; };
301 lenb=0;
302 if(scan<newsize) {
303 s=0;Sb=0;
304 for(i=1;(scan>=lastscan+i)&&(pos>=i);i++) {
305 if(old[pos-i]==newbuf[scan-i]) s++;
306 if(s*2-i>Sb*2-lenb) { Sb=s; lenb=i; };
310 if(lastscan+lenf>scan-lenb) {
311 overlap=(lastscan+lenf)-(scan-lenb);
312 s=0;Ss=0;lens=0;
313 for(i=0;i<overlap;i++) {
314 if(newbuf[lastscan+lenf-overlap+i]==
315 old[lastpos+lenf-overlap+i]) s++;
316 if(newbuf[scan-lenb+i]==
317 old[pos-lenb+i]) s--;
318 if(s>Ss) { Ss=s; lens=i+1; };
321 lenf+=lens-overlap;
322 lenb-=lens;
325 for(i=0;i<lenf;i++)
326 db[dblen+i]=newbuf[lastscan+i]-old[lastpos+i];
327 for(i=0;i<(scan-lenb)-(lastscan+lenf);i++)
328 eb[eblen+i]=newbuf[lastscan+lenf+i];
330 dblen+=lenf;
331 eblen+=(scan-lenb)-(lastscan+lenf);
333 triple.x = htonl(lenf);
334 triple.y = htonl((scan-lenb)-(lastscan+lenf));
335 triple.z = htonl((pos-lenb)-(lastpos+lenf));
336 if (write(fd,&triple,sizeof(triple)) != sizeof(triple))
337 reporterr(1,NULL);
339 #ifdef DEBUG_bsmedberg
340 printf("Writing a block:\n"
341 " X: %u\n"
342 " Y: %u\n"
343 " Z: %i\n",
344 (int) lenf,
345 (int) ((scan-lenb)-(lastscan+lenf)),
346 (int) ((pos-lenb)-(lastpos+lenf)));
347 #endif
349 ++numtriples;
351 lastscan=scan-lenb;
352 lastpos=pos-lenb;
353 lastoffset=pos-scan;
357 if(write(fd,db,dblen)!=dblen)
358 reporterr(1,NULL);
360 if(write(fd,eb,eblen)!=eblen)
361 reporterr(1,NULL);
363 header.slen = htonl(oldsize);
364 header.scrc32 = htonl(scrc);
365 header.dlen = htonl(newsize);
366 header.cblen = htonl(numtriples * sizeof(MBSPatchTriple));
367 header.difflen = htonl(dblen);
368 header.extralen = htonl(eblen);
370 if (lseek(fd,0,SEEK_SET) == -1 ||
371 write(fd,&header,sizeof(header)) != sizeof(header) ||
372 close(fd) == -1)
373 reporterr(1,NULL);
375 free(db);
376 free(eb);
377 free(I);
378 free(old);
379 free(newbuf);
381 return 0;