Update configs. IGNORE BROKEN CHANGESETS CLOSED TREE NO BUG a=release ba=release
[gecko.git] / other-licenses / bsdiff / bsdiff.c
blobeaf9027365015edbc72bcd8142c58cd3f98865b8
1 /* vim:set ts=8 sw=8 sts=8 noet: */
2 /*
3 bsdiff.c -- Binary patch generator.
5 Copyright 2003 Colin Percival
7 For the terms under which this work may be distributed, please see
8 the adjoining file "LICENSE".
10 ChangeLog:
11 2005-05-05 - Use the modified header struct from bspatch.h; use 32-bit
12 values throughout.
13 --Benjamin Smedberg <benjamin@smedbergs.us>
14 2005-05-18 - Use the same CRC algorithm as bzip2, and leverage the CRC table
15 provided by libbz2.
16 --Darin Fisher <darin@meer.net>
19 #include "bspatch.h"
21 #include <stdlib.h>
22 #include <stdio.h>
23 #include <string.h>
24 #include <fcntl.h>
25 #include <stdarg.h>
26 #ifdef XP_WIN
27 #include <io.h>
28 #include <winsock2.h>
29 #define open _open
30 #define close _close
31 #define read _read
32 #define lseek _lseek
33 #define write _write
34 #else
35 #include <unistd.h>
36 #include <arpa/inet.h>
37 #define _O_BINARY 0
38 #endif
40 #include "crctable.h"
42 #undef MIN
43 #define MIN(x,y) (((x)<(y)) ? (x) : (y))
45 /*---------------------------------------------------------------------------*/
47 /* This variable lives in libbz2. It's declared in bzlib_private.h, so we just
48 * declare it here to avoid including that entire header file.
50 extern unsigned int BZ2_crc32Table[256];
52 static unsigned int
53 crc32(const unsigned char *buf, unsigned int len)
55 unsigned int crc = 0xffffffffL;
57 const unsigned char *end = buf + len;
58 for (; buf != end; ++buf)
59 crc = (crc << 8) ^ BZ2_crc32Table[(crc >> 24) ^ *buf];
61 crc = ~crc;
62 return crc;
65 /*---------------------------------------------------------------------------*/
67 static void
68 reporterr(int e, const char *fmt, ...)
70 if (fmt) {
71 va_list args;
72 va_start(args, fmt);
73 vfprintf(stderr, fmt, args);
74 va_end(args);
77 exit(e);
80 static void
81 split(int32_t *I,int32_t *V,int32_t start,int32_t len,int32_t h)
83 int32_t i,j,k,x,tmp,jj,kk;
85 if(len<16) {
86 for(k=start;k<start+len;k+=j) {
87 j=1;x=V[I[k]+h];
88 for(i=1;k+i<start+len;i++) {
89 if(V[I[k+i]+h]<x) {
90 x=V[I[k+i]+h];
91 j=0;
93 if(V[I[k+i]+h]==x) {
94 tmp=I[k+j];I[k+j]=I[k+i];I[k+i]=tmp;
95 j++;
98 for(i=0;i<j;i++) V[I[k+i]]=k+j-1;
99 if(j==1) I[k]=-1;
101 return;
104 x=V[I[start+len/2]+h];
105 jj=0;kk=0;
106 for(i=start;i<start+len;i++) {
107 if(V[I[i]+h]<x) jj++;
108 if(V[I[i]+h]==x) kk++;
110 jj+=start;kk+=jj;
112 i=start;j=0;k=0;
113 while(i<jj) {
114 if(V[I[i]+h]<x) {
115 i++;
116 } else if(V[I[i]+h]==x) {
117 tmp=I[i];I[i]=I[jj+j];I[jj+j]=tmp;
118 j++;
119 } else {
120 tmp=I[i];I[i]=I[kk+k];I[kk+k]=tmp;
121 k++;
125 while(jj+j<kk) {
126 if(V[I[jj+j]+h]==x) {
127 j++;
128 } else {
129 tmp=I[jj+j];I[jj+j]=I[kk+k];I[kk+k]=tmp;
130 k++;
134 if(jj>start) split(I,V,start,jj-start,h);
136 for(i=0;i<kk-jj;i++) V[I[jj+i]]=kk-1;
137 if(jj==kk-1) I[jj]=-1;
139 if(start+len>kk) split(I,V,kk,start+len-kk,h);
142 static void
143 qsufsort(int32_t *I,int32_t *V,unsigned char *old,int32_t oldsize)
145 int32_t buckets[256];
146 int32_t i,h,len;
148 for(i=0;i<256;i++) buckets[i]=0;
149 for(i=0;i<oldsize;i++) buckets[old[i]]++;
150 for(i=1;i<256;i++) buckets[i]+=buckets[i-1];
151 for(i=255;i>0;i--) buckets[i]=buckets[i-1];
152 buckets[0]=0;
154 for(i=0;i<oldsize;i++) I[++buckets[old[i]]]=i;
155 I[0]=oldsize;
156 for(i=0;i<oldsize;i++) V[i]=buckets[old[i]];
157 V[oldsize]=0;
158 for(i=1;i<256;i++) if(buckets[i]==buckets[i-1]+1) I[buckets[i]]=-1;
159 I[0]=-1;
161 for(h=1;I[0]!=-(oldsize+1);h+=h) {
162 len=0;
163 for(i=0;i<oldsize+1;) {
164 if(I[i]<0) {
165 len-=I[i];
166 i-=I[i];
167 } else {
168 if(len) I[i-len]=-len;
169 len=V[I[i]]+1-i;
170 split(I,V,i,len,h);
171 i+=len;
172 len=0;
175 if(len) I[i-len]=-len;
178 for(i=0;i<oldsize+1;i++) I[V[i]]=i;
181 static int32_t
182 matchlen(unsigned char *old,int32_t oldsize,unsigned char *newbuf,int32_t newsize)
184 int32_t i;
186 for(i=0;(i<oldsize)&&(i<newsize);i++)
187 if(old[i]!=newbuf[i]) break;
189 return i;
192 static int32_t
193 search(int32_t *I,unsigned char *old,int32_t oldsize,
194 unsigned char *newbuf,int32_t newsize,int32_t st,int32_t en,int32_t *pos)
196 int32_t x,y;
198 if(en-st<2) {
199 x=matchlen(old+I[st],oldsize-I[st],newbuf,newsize);
200 y=matchlen(old+I[en],oldsize-I[en],newbuf,newsize);
202 if(x>y) {
203 *pos=I[st];
204 return x;
205 } else {
206 *pos=I[en];
207 return y;
211 x=st+(en-st)/2;
212 if(memcmp(old+I[x],newbuf,MIN(oldsize-I[x],newsize))<0) {
213 return search(I,old,oldsize,newbuf,newsize,x,en,pos);
214 } else {
215 return search(I,old,oldsize,newbuf,newsize,st,x,pos);
219 int main(int argc,char *argv[])
221 int fd;
222 unsigned char *old,*newbuf;
223 int32_t oldsize,newsize;
224 int32_t *I,*V;
226 int32_t scan,pos,len;
227 int32_t lastscan,lastpos,lastoffset;
228 int32_t oldscore,scsc;
230 int32_t s,Sf,lenf,Sb,lenb;
231 int32_t overlap,Ss,lens;
232 int32_t i;
234 int32_t dblen,eblen;
235 unsigned char *db,*eb;
237 unsigned int scrc;
239 MBSPatchHeader header = {
240 {'M','B','D','I','F','F','1','0'},
241 0, 0, 0, 0, 0, 0
244 uint32_t numtriples;
246 if(argc!=4)
247 reporterr(1,"usage: %s <oldfile> <newfile> <patchfile>\n",argv[0]);
249 /* Allocate oldsize+1 bytes instead of oldsize bytes to ensure
250 that we never try to malloc(0) and get a NULL pointer */
251 if(((fd=open(argv[1],O_RDONLY|_O_BINARY,0))<0) ||
252 ((oldsize=lseek(fd,0,SEEK_END))==-1) ||
253 ((old=(unsigned char*) malloc(oldsize+1))==NULL) ||
254 (lseek(fd,0,SEEK_SET)!=0) ||
255 (read(fd,old,oldsize)!=oldsize) ||
256 (close(fd)==-1))
257 reporterr(1,"%s\n",argv[1]);
259 scrc = crc32(old, oldsize);
261 if(((I=(int32_t*) malloc((oldsize+1)*sizeof(int32_t)))==NULL) ||
262 ((V=(int32_t*) malloc((oldsize+1)*sizeof(int32_t)))==NULL))
263 reporterr(1,NULL);
265 qsufsort(I,V,old,oldsize);
267 free(V);
269 /* Allocate newsize+1 bytes instead of newsize bytes to ensure
270 that we never try to malloc(0) and get a NULL pointer */
271 if(((fd=open(argv[2],O_RDONLY|_O_BINARY,0))<0) ||
272 ((newsize=lseek(fd,0,SEEK_END))==-1) ||
273 ((newbuf=(unsigned char*) malloc(newsize+1))==NULL) ||
274 (lseek(fd,0,SEEK_SET)!=0) ||
275 (read(fd,newbuf,newsize)!=newsize) ||
276 (close(fd)==-1)) reporterr(1,"%s\n",argv[2]);
278 if(((db=(unsigned char*) malloc(newsize+1))==NULL) ||
279 ((eb=(unsigned char*) malloc(newsize+1))==NULL))
280 reporterr(1,NULL);
282 dblen=0;
283 eblen=0;
285 if((fd=open(argv[3],O_CREAT|O_TRUNC|O_WRONLY|_O_BINARY,0666))<0)
286 reporterr(1,"%s\n",argv[3]);
288 /* start writing here */
290 /* We don't know the lengths yet, so we will write the header again
291 at the end */
293 if(write(fd,&header,sizeof(MBSPatchHeader))!=sizeof(MBSPatchHeader))
294 reporterr(1,"%s\n",argv[3]);
296 scan=0;len=0;
297 lastscan=0;lastpos=0;lastoffset=0;
298 numtriples = 0;
299 while(scan<newsize) {
300 oldscore=0;
302 for(scsc=scan+=len;scan<newsize;scan++) {
303 len=search(I,old,oldsize,newbuf+scan,newsize-scan,
304 0,oldsize,&pos);
306 for(;scsc<scan+len;scsc++)
307 if((scsc+lastoffset<oldsize) &&
308 (old[scsc+lastoffset] == newbuf[scsc]))
309 oldscore++;
311 if(((len==oldscore) && (len!=0)) ||
312 (len>oldscore+10)) break;
314 if((scan+lastoffset<oldsize) &&
315 (old[scan+lastoffset] == newbuf[scan]))
316 oldscore--;
319 if((len!=oldscore) || (scan==newsize)) {
320 MBSPatchTriple triple;
322 s=0;Sf=0;lenf=0;
323 for(i=0;(lastscan+i<scan)&&(lastpos+i<oldsize);) {
324 if(old[lastpos+i]==newbuf[lastscan+i]) s++;
325 i++;
326 if(s*3-i*2>Sf*3-lenf*2) { Sf=s; lenf=i; };
329 lenb=0;
330 if(scan<newsize) {
331 s=0;Sb=0;
332 for(i=1;(scan>=lastscan+i)&&(pos>=i);i++) {
333 if(old[pos-i]==newbuf[scan-i]) s++;
334 if(s*3-i*2>Sb*3-lenb*2) { Sb=s; lenb=i; };
338 if(lastscan+lenf>scan-lenb) {
339 overlap=(lastscan+lenf)-(scan-lenb);
340 s=0;Ss=0;lens=0;
341 for(i=0;i<overlap;i++) {
342 if(newbuf[lastscan+lenf-overlap+i]==
343 old[lastpos+lenf-overlap+i]) s++;
344 if(newbuf[scan-lenb+i]==
345 old[pos-lenb+i]) s--;
346 if(s>Ss) { Ss=s; lens=i+1; };
349 lenf+=lens-overlap;
350 lenb-=lens;
353 for(i=0;i<lenf;i++)
354 db[dblen+i]=newbuf[lastscan+i]-old[lastpos+i];
355 for(i=0;i<(scan-lenb)-(lastscan+lenf);i++)
356 eb[eblen+i]=newbuf[lastscan+lenf+i];
358 dblen+=lenf;
359 eblen+=(scan-lenb)-(lastscan+lenf);
361 triple.x = htonl(lenf);
362 triple.y = htonl((scan-lenb)-(lastscan+lenf));
363 triple.z = htonl((pos-lenb)-(lastpos+lenf));
364 if (write(fd,&triple,sizeof(triple)) != sizeof(triple))
365 reporterr(1,NULL);
367 #ifdef DEBUG_bsmedberg
368 printf("Writing a block:\n"
369 " X: %u\n"
370 " Y: %u\n"
371 " Z: %i\n",
372 (uint32_t) lenf,
373 (uint32_t) ((scan-lenb)-(lastscan+lenf)),
374 (uint32_t) ((pos-lenb)-(lastpos+lenf)));
375 #endif
377 ++numtriples;
379 lastscan=scan-lenb;
380 lastpos=pos-lenb;
381 lastoffset=pos-scan;
385 if(write(fd,db,dblen)!=dblen)
386 reporterr(1,NULL);
388 if(write(fd,eb,eblen)!=eblen)
389 reporterr(1,NULL);
391 header.slen = htonl(oldsize);
392 header.scrc32 = htonl(scrc);
393 header.dlen = htonl(newsize);
394 header.cblen = htonl(numtriples * sizeof(MBSPatchTriple));
395 header.difflen = htonl(dblen);
396 header.extralen = htonl(eblen);
398 if (lseek(fd,0,SEEK_SET) == -1 ||
399 write(fd,&header,sizeof(header)) != sizeof(header) ||
400 close(fd) == -1)
401 reporterr(1,NULL);
403 free(db);
404 free(eb);
405 free(I);
406 free(old);
407 free(newbuf);
409 return 0;