1 /* vim:set ts=8 sw=8 sts=8 noet: */
3 bsdiff.c -- Binary patch generator.
5 Copyright 2003 Colin Percival
7 For the terms under which this work may be distributed, please see
8 the adjoining file "LICENSE".
11 2005-05-05 - Use the modified header struct from bspatch.h; use 32-bit
13 --Benjamin Smedberg <benjamin@smedbergs.us>
14 2005-05-18 - Use the same CRC algorithm as bzip2, and leverage the CRC table
16 --Darin Fisher <darin@meer.net>
36 #include <arpa/inet.h>
43 #define MIN(x,y) (((x)<(y)) ? (x) : (y))
45 /*---------------------------------------------------------------------------*/
47 /* This variable lives in libbz2. It's declared in bzlib_private.h, so we just
48 * declare it here to avoid including that entire header file.
50 extern unsigned int BZ2_crc32Table
[256];
53 crc32(const unsigned char *buf
, unsigned int len
)
55 unsigned int crc
= 0xffffffffL
;
57 const unsigned char *end
= buf
+ len
;
58 for (; buf
!= end
; ++buf
)
59 crc
= (crc
<< 8) ^ BZ2_crc32Table
[(crc
>> 24) ^ *buf
];
65 /*---------------------------------------------------------------------------*/
68 reporterr(int e
, const char *fmt
, ...)
73 vfprintf(stderr
, fmt
, args
);
81 split(int32_t *I
,int32_t *V
,int32_t start
,int32_t len
,int32_t h
)
83 int32_t i
,j
,k
,x
,tmp
,jj
,kk
;
86 for(k
=start
;k
<start
+len
;k
+=j
) {
88 for(i
=1;k
+i
<start
+len
;i
++) {
94 tmp
=I
[k
+j
];I
[k
+j
]=I
[k
+i
];I
[k
+i
]=tmp
;
98 for(i
=0;i
<j
;i
++) V
[I
[k
+i
]]=k
+j
-1;
104 x
=V
[I
[start
+len
/2]+h
];
106 for(i
=start
;i
<start
+len
;i
++) {
107 if(V
[I
[i
]+h
]<x
) jj
++;
108 if(V
[I
[i
]+h
]==x
) kk
++;
116 } else if(V
[I
[i
]+h
]==x
) {
117 tmp
=I
[i
];I
[i
]=I
[jj
+j
];I
[jj
+j
]=tmp
;
120 tmp
=I
[i
];I
[i
]=I
[kk
+k
];I
[kk
+k
]=tmp
;
126 if(V
[I
[jj
+j
]+h
]==x
) {
129 tmp
=I
[jj
+j
];I
[jj
+j
]=I
[kk
+k
];I
[kk
+k
]=tmp
;
134 if(jj
>start
) split(I
,V
,start
,jj
-start
,h
);
136 for(i
=0;i
<kk
-jj
;i
++) V
[I
[jj
+i
]]=kk
-1;
137 if(jj
==kk
-1) I
[jj
]=-1;
139 if(start
+len
>kk
) split(I
,V
,kk
,start
+len
-kk
,h
);
143 qsufsort(int32_t *I
,int32_t *V
,unsigned char *old
,int32_t oldsize
)
145 int32_t buckets
[256];
148 for(i
=0;i
<256;i
++) buckets
[i
]=0;
149 for(i
=0;i
<oldsize
;i
++) buckets
[old
[i
]]++;
150 for(i
=1;i
<256;i
++) buckets
[i
]+=buckets
[i
-1];
151 for(i
=255;i
>0;i
--) buckets
[i
]=buckets
[i
-1];
154 for(i
=0;i
<oldsize
;i
++) I
[++buckets
[old
[i
]]]=i
;
156 for(i
=0;i
<oldsize
;i
++) V
[i
]=buckets
[old
[i
]];
158 for(i
=1;i
<256;i
++) if(buckets
[i
]==buckets
[i
-1]+1) I
[buckets
[i
]]=-1;
161 for(h
=1;I
[0]!=-(oldsize
+1);h
+=h
) {
163 for(i
=0;i
<oldsize
+1;) {
168 if(len
) I
[i
-len
]=-len
;
175 if(len
) I
[i
-len
]=-len
;
178 for(i
=0;i
<oldsize
+1;i
++) I
[V
[i
]]=i
;
182 matchlen(unsigned char *old
,int32_t oldsize
,unsigned char *newbuf
,int32_t newsize
)
186 for(i
=0;(i
<oldsize
)&&(i
<newsize
);i
++)
187 if(old
[i
]!=newbuf
[i
]) break;
193 search(int32_t *I
,unsigned char *old
,int32_t oldsize
,
194 unsigned char *newbuf
,int32_t newsize
,int32_t st
,int32_t en
,int32_t *pos
)
199 x
=matchlen(old
+I
[st
],oldsize
-I
[st
],newbuf
,newsize
);
200 y
=matchlen(old
+I
[en
],oldsize
-I
[en
],newbuf
,newsize
);
212 if(memcmp(old
+I
[x
],newbuf
,MIN(oldsize
-I
[x
],newsize
))<0) {
213 return search(I
,old
,oldsize
,newbuf
,newsize
,x
,en
,pos
);
215 return search(I
,old
,oldsize
,newbuf
,newsize
,st
,x
,pos
);
219 int main(int argc
,char *argv
[])
222 unsigned char *old
,*newbuf
;
223 int32_t oldsize
,newsize
;
226 int32_t scan
,pos
,len
;
227 int32_t lastscan
,lastpos
,lastoffset
;
228 int32_t oldscore
,scsc
;
230 int32_t s
,Sf
,lenf
,Sb
,lenb
;
231 int32_t overlap
,Ss
,lens
;
235 unsigned char *db
,*eb
;
239 MBSPatchHeader header
= {
240 {'M','B','D','I','F','F','1','0'},
247 reporterr(1,"usage: %s <oldfile> <newfile> <patchfile>\n",argv
[0]);
249 /* Allocate oldsize+1 bytes instead of oldsize bytes to ensure
250 that we never try to malloc(0) and get a NULL pointer */
251 if(((fd
=open(argv
[1],O_RDONLY
|_O_BINARY
,0))<0) ||
252 ((oldsize
=lseek(fd
,0,SEEK_END
))==-1) ||
253 ((old
=(unsigned char*) malloc(oldsize
+1))==NULL
) ||
254 (lseek(fd
,0,SEEK_SET
)!=0) ||
255 (read(fd
,old
,oldsize
)!=oldsize
) ||
257 reporterr(1,"%s\n",argv
[1]);
259 scrc
= crc32(old
, oldsize
);
261 if(((I
=(int32_t*) malloc((oldsize
+1)*sizeof(int32_t)))==NULL
) ||
262 ((V
=(int32_t*) malloc((oldsize
+1)*sizeof(int32_t)))==NULL
))
265 qsufsort(I
,V
,old
,oldsize
);
269 /* Allocate newsize+1 bytes instead of newsize bytes to ensure
270 that we never try to malloc(0) and get a NULL pointer */
271 if(((fd
=open(argv
[2],O_RDONLY
|_O_BINARY
,0))<0) ||
272 ((newsize
=lseek(fd
,0,SEEK_END
))==-1) ||
273 ((newbuf
=(unsigned char*) malloc(newsize
+1))==NULL
) ||
274 (lseek(fd
,0,SEEK_SET
)!=0) ||
275 (read(fd
,newbuf
,newsize
)!=newsize
) ||
276 (close(fd
)==-1)) reporterr(1,"%s\n",argv
[2]);
278 if(((db
=(unsigned char*) malloc(newsize
+1))==NULL
) ||
279 ((eb
=(unsigned char*) malloc(newsize
+1))==NULL
))
285 if((fd
=open(argv
[3],O_CREAT
|O_TRUNC
|O_WRONLY
|_O_BINARY
,0666))<0)
286 reporterr(1,"%s\n",argv
[3]);
288 /* start writing here */
290 /* We don't know the lengths yet, so we will write the header again
293 if(write(fd
,&header
,sizeof(MBSPatchHeader
))!=sizeof(MBSPatchHeader
))
294 reporterr(1,"%s\n",argv
[3]);
297 lastscan
=0;lastpos
=0;lastoffset
=0;
299 while(scan
<newsize
) {
302 for(scsc
=scan
+=len
;scan
<newsize
;scan
++) {
303 len
=search(I
,old
,oldsize
,newbuf
+scan
,newsize
-scan
,
306 for(;scsc
<scan
+len
;scsc
++)
307 if((scsc
+lastoffset
<oldsize
) &&
308 (old
[scsc
+lastoffset
] == newbuf
[scsc
]))
311 if(((len
==oldscore
) && (len
!=0)) ||
312 (len
>oldscore
+10)) break;
314 if((scan
+lastoffset
<oldsize
) &&
315 (old
[scan
+lastoffset
] == newbuf
[scan
]))
319 if((len
!=oldscore
) || (scan
==newsize
)) {
320 MBSPatchTriple triple
;
323 for(i
=0;(lastscan
+i
<scan
)&&(lastpos
+i
<oldsize
);) {
324 if(old
[lastpos
+i
]==newbuf
[lastscan
+i
]) s
++;
326 if(s
*3-i
*2>Sf
*3-lenf
*2) { Sf
=s
; lenf
=i
; };
332 for(i
=1;(scan
>=lastscan
+i
)&&(pos
>=i
);i
++) {
333 if(old
[pos
-i
]==newbuf
[scan
-i
]) s
++;
334 if(s
*3-i
*2>Sb
*3-lenb
*2) { Sb
=s
; lenb
=i
; };
338 if(lastscan
+lenf
>scan
-lenb
) {
339 overlap
=(lastscan
+lenf
)-(scan
-lenb
);
341 for(i
=0;i
<overlap
;i
++) {
342 if(newbuf
[lastscan
+lenf
-overlap
+i
]==
343 old
[lastpos
+lenf
-overlap
+i
]) s
++;
344 if(newbuf
[scan
-lenb
+i
]==
345 old
[pos
-lenb
+i
]) s
--;
346 if(s
>Ss
) { Ss
=s
; lens
=i
+1; };
354 db
[dblen
+i
]=newbuf
[lastscan
+i
]-old
[lastpos
+i
];
355 for(i
=0;i
<(scan
-lenb
)-(lastscan
+lenf
);i
++)
356 eb
[eblen
+i
]=newbuf
[lastscan
+lenf
+i
];
359 eblen
+=(scan
-lenb
)-(lastscan
+lenf
);
361 triple
.x
= htonl(lenf
);
362 triple
.y
= htonl((scan
-lenb
)-(lastscan
+lenf
));
363 triple
.z
= htonl((pos
-lenb
)-(lastpos
+lenf
));
364 if (write(fd
,&triple
,sizeof(triple
)) != sizeof(triple
))
367 #ifdef DEBUG_bsmedberg
368 printf("Writing a block:\n"
373 (uint32_t) ((scan
-lenb
)-(lastscan
+lenf
)),
374 (uint32_t) ((pos
-lenb
)-(lastpos
+lenf
)));
385 if(write(fd
,db
,dblen
)!=dblen
)
388 if(write(fd
,eb
,eblen
)!=eblen
)
391 header
.slen
= htonl(oldsize
);
392 header
.scrc32
= htonl(scrc
);
393 header
.dlen
= htonl(newsize
);
394 header
.cblen
= htonl(numtriples
* sizeof(MBSPatchTriple
));
395 header
.difflen
= htonl(dblen
);
396 header
.extralen
= htonl(eblen
);
398 if (lseek(fd
,0,SEEK_SET
) == -1 ||
399 write(fd
,&header
,sizeof(header
)) != sizeof(header
) ||