2 bsdiff.c -- Binary patch generator.
4 Copyright 2003 Colin Percival
6 For the terms under which this work may be distributed, please see
7 the adjoining file "LICENSE".
10 2005-05-05 - Use the modified header struct from bspatch.h; use 32-bit
12 --Benjamin Smedberg <benjamin@smedbergs.us>
13 2005-05-18 - Use the same CRC algorithm as bzip2, and leverage the CRC table
15 --Darin Fisher <darin@meer.net>
16 2007-11-14 - Changed to use Crc from Lzma library instead of Bzip library
32 #include <arpa/inet.h>
37 #define MIN(x,y) (((x)<(y)) ? (x) : (y))
40 reporterr(int e
, const char *fmt
, ...)
45 vfprintf(stderr
, fmt
, args
);
53 split(int *I
,int *V
,int start
,int len
,int h
)
55 int i
,j
,k
,x
,tmp
,jj
,kk
;
58 for(k
=start
;k
<start
+len
;k
+=j
) {
60 for(i
=1;k
+i
<start
+len
;i
++) {
66 tmp
=I
[k
+j
];I
[k
+j
]=I
[k
+i
];I
[k
+i
]=tmp
;
70 for(i
=0;i
<j
;i
++) V
[I
[k
+i
]]=k
+j
-1;
76 x
=V
[I
[start
+len
/2]+h
];
78 for(i
=start
;i
<start
+len
;i
++) {
80 if(V
[I
[i
]+h
]==x
) kk
++;
88 } else if(V
[I
[i
]+h
]==x
) {
89 tmp
=I
[i
];I
[i
]=I
[jj
+j
];I
[jj
+j
]=tmp
;
92 tmp
=I
[i
];I
[i
]=I
[kk
+k
];I
[kk
+k
]=tmp
;
101 tmp
=I
[jj
+j
];I
[jj
+j
]=I
[kk
+k
];I
[kk
+k
]=tmp
;
106 if(jj
>start
) split(I
,V
,start
,jj
-start
,h
);
108 for(i
=0;i
<kk
-jj
;i
++) V
[I
[jj
+i
]]=kk
-1;
109 if(jj
==kk
-1) I
[jj
]=-1;
111 if(start
+len
>kk
) split(I
,V
,kk
,start
+len
-kk
,h
);
115 qsufsort(int *I
,int *V
,unsigned char *old
,int oldsize
)
120 for(i
=0;i
<256;i
++) buckets
[i
]=0;
121 for(i
=0;i
<oldsize
;i
++) buckets
[old
[i
]]++;
122 for(i
=1;i
<256;i
++) buckets
[i
]+=buckets
[i
-1];
123 for(i
=255;i
>0;i
--) buckets
[i
]=buckets
[i
-1];
126 for(i
=0;i
<oldsize
;i
++) I
[++buckets
[old
[i
]]]=i
;
128 for(i
=0;i
<oldsize
;i
++) V
[i
]=buckets
[old
[i
]];
130 for(i
=1;i
<256;i
++) if(buckets
[i
]==buckets
[i
-1]+1) I
[buckets
[i
]]=-1;
133 for(h
=1;I
[0]!=-(oldsize
+1);h
+=h
) {
135 for(i
=0;i
<oldsize
+1;) {
140 if(len
) I
[i
-len
]=-len
;
147 if(len
) I
[i
-len
]=-len
;
150 for(i
=0;i
<oldsize
+1;i
++) I
[V
[i
]]=i
;
154 matchlen(unsigned char *old
,int oldsize
,unsigned char *newbuf
,int newsize
)
158 for(i
=0;(i
<oldsize
)&&(i
<newsize
);i
++)
159 if(old
[i
]!=newbuf
[i
]) break;
165 search(int *I
,unsigned char *old
,int oldsize
,
166 unsigned char *newbuf
,int newsize
,int st
,int en
,int *pos
)
171 x
=matchlen(old
+I
[st
],oldsize
-I
[st
],newbuf
,newsize
);
172 y
=matchlen(old
+I
[en
],oldsize
-I
[en
],newbuf
,newsize
);
184 if(memcmp(old
+I
[x
],newbuf
,MIN(oldsize
-I
[x
],newsize
))<0) {
185 return search(I
,old
,oldsize
,newbuf
,newsize
,x
,en
,pos
);
187 return search(I
,old
,oldsize
,newbuf
,newsize
,st
,x
,pos
);
191 int main(int argc
,char *argv
[])
194 unsigned char *old
,*newbuf
;
199 int lastscan
,lastpos
,lastoffset
;
202 int s
,Sf
,lenf
,Sb
,lenb
;
207 unsigned char *db
,*eb
;
211 MBSPatchHeader header
= {
212 {'M','B','D','I','F','F','1','0'},
219 reporterr(1,"usage: %s <oldfile> <newfile> <patchfile>\n",argv
[0]);
221 /* Allocate oldsize+1 bytes instead of oldsize bytes to ensure
222 that we never try to malloc(0) and get a NULL pointer */
223 if(((fd
=open(argv
[1],O_RDONLY
|_O_BINARY
,0))<0) ||
224 ((oldsize
=lseek(fd
,0,SEEK_END
))==-1) ||
225 ((old
=(unsigned char*) malloc(oldsize
+1))==NULL
) ||
226 (lseek(fd
,0,SEEK_SET
)!=0) ||
227 (read(fd
,old
,oldsize
)!=oldsize
) ||
229 reporterr(1,"%s\n",argv
[1]);
231 scrc
= CalculateCrc(old
, oldsize
);
233 if(((I
=(int*) malloc((oldsize
+1)*sizeof(int)))==NULL
) ||
234 ((V
=(int*) malloc((oldsize
+1)*sizeof(int)))==NULL
))
237 qsufsort(I
,V
,old
,oldsize
);
241 /* Allocate newsize+1 bytes instead of newsize bytes to ensure
242 that we never try to malloc(0) and get a NULL pointer */
243 if(((fd
=open(argv
[2],O_RDONLY
|_O_BINARY
,0))<0) ||
244 ((newsize
=lseek(fd
,0,SEEK_END
))==-1) ||
245 ((newbuf
=(unsigned char*) malloc(newsize
+1))==NULL
) ||
246 (lseek(fd
,0,SEEK_SET
)!=0) ||
247 (read(fd
,newbuf
,newsize
)!=newsize
) ||
248 (close(fd
)==-1)) reporterr(1,"%s\n",argv
[2]);
250 if(((db
=(unsigned char*) malloc(newsize
+1))==NULL
) ||
251 ((eb
=(unsigned char*) malloc(newsize
+1))==NULL
))
257 if((fd
=open(argv
[3],O_CREAT
|O_TRUNC
|O_WRONLY
|_O_BINARY
,0666))<0)
258 reporterr(1,"%s\n",argv
[3]);
260 /* start writing here */
262 /* We don't know the lengths yet, so we will write the header again
265 if(write(fd
,&header
,sizeof(MBSPatchHeader
))!=sizeof(MBSPatchHeader
))
266 reporterr(1,"%s\n",argv
[3]);
269 lastscan
=0;lastpos
=0;lastoffset
=0;
271 while(scan
<newsize
) {
274 for(scsc
=scan
+=len
;scan
<newsize
;scan
++) {
275 len
=search(I
,old
,oldsize
,newbuf
+scan
,newsize
-scan
,
278 for(;scsc
<scan
+len
;scsc
++)
279 if((scsc
+lastoffset
<oldsize
) &&
280 (old
[scsc
+lastoffset
] == newbuf
[scsc
]))
283 if(((len
==oldscore
) && (len
!=0)) ||
284 (len
>oldscore
+8)) break;
286 if((scan
+lastoffset
<oldsize
) &&
287 (old
[scan
+lastoffset
] == newbuf
[scan
]))
291 if((len
!=oldscore
) || (scan
==newsize
)) {
292 MBSPatchTriple triple
;
295 for(i
=0;(lastscan
+i
<scan
)&&(lastpos
+i
<oldsize
);) {
296 if(old
[lastpos
+i
]==newbuf
[lastscan
+i
]) s
++;
298 if(s
*2-i
>Sf
*2-lenf
) { Sf
=s
; lenf
=i
; };
304 for(i
=1;(scan
>=lastscan
+i
)&&(pos
>=i
);i
++) {
305 if(old
[pos
-i
]==newbuf
[scan
-i
]) s
++;
306 if(s
*2-i
>Sb
*2-lenb
) { Sb
=s
; lenb
=i
; };
310 if(lastscan
+lenf
>scan
-lenb
) {
311 overlap
=(lastscan
+lenf
)-(scan
-lenb
);
313 for(i
=0;i
<overlap
;i
++) {
314 if(newbuf
[lastscan
+lenf
-overlap
+i
]==
315 old
[lastpos
+lenf
-overlap
+i
]) s
++;
316 if(newbuf
[scan
-lenb
+i
]==
317 old
[pos
-lenb
+i
]) s
--;
318 if(s
>Ss
) { Ss
=s
; lens
=i
+1; };
326 db
[dblen
+i
]=newbuf
[lastscan
+i
]-old
[lastpos
+i
];
327 for(i
=0;i
<(scan
-lenb
)-(lastscan
+lenf
);i
++)
328 eb
[eblen
+i
]=newbuf
[lastscan
+lenf
+i
];
331 eblen
+=(scan
-lenb
)-(lastscan
+lenf
);
333 triple
.x
= htonl(lenf
);
334 triple
.y
= htonl((scan
-lenb
)-(lastscan
+lenf
));
335 triple
.z
= htonl((pos
-lenb
)-(lastpos
+lenf
));
336 if (write(fd
,&triple
,sizeof(triple
)) != sizeof(triple
))
339 #ifdef DEBUG_bsmedberg
340 printf("Writing a block:\n"
345 (int) ((scan
-lenb
)-(lastscan
+lenf
)),
346 (int) ((pos
-lenb
)-(lastpos
+lenf
)));
357 if(write(fd
,db
,dblen
)!=dblen
)
360 if(write(fd
,eb
,eblen
)!=eblen
)
363 header
.slen
= htonl(oldsize
);
364 header
.scrc32
= htonl(scrc
);
365 header
.dlen
= htonl(newsize
);
366 header
.cblen
= htonl(numtriples
* sizeof(MBSPatchTriple
));
367 header
.difflen
= htonl(dblen
);
368 header
.extralen
= htonl(eblen
);
370 if (lseek(fd
,0,SEEK_SET
) == -1 ||
371 write(fd
,&header
,sizeof(header
)) != sizeof(header
) ||