1 /***************************************************************************
2 * chm_lib.c - CHM archive manipulation routines *
3 * ------------------- *
5 * author: Jed Wing <jedwin@ugcs.caltech.edu> *
7 * notes: These routines are meant for the manipulation of microsoft *
8 * .chm (compiled html help) files, but may likely be used *
9 * for the manipulation of any ITSS archive, if ever ITSS *
10 * archives are used for any other purpose. *
12 * Note also that the section names are statically handled. *
13 * To be entirely correct, the section names should be read *
14 * from the section names meta-file, and then the various *
15 * content sections and the "transforms" to apply to the data *
16 * they contain should be inferred from the section name and *
17 * the meta-files referenced using that name; however, all of *
18 * the files I've been able to get my hands on appear to have *
19 * only two sections: Uncompressed and MSCompressed. *
20 * Additionally, the ITSS.DLL file included with Windows does *
21 * not appear to handle any different transforms than the *
22 * simple LZX-transform. Furthermore, the list of transforms *
23 * to apply is broken, in that only half the required space *
24 * is allocated for the list. (It appears as though the *
25 * space is allocated for ASCII strings, but the strings are *
26 * written as unicode. As a result, only the first half of *
27 * the string appears.) So this is probably not too big of *
28 * a deal, at least until CHM v4 (MS .lit files), which also *
29 * incorporate encryption, of some description. *
31 ***************************************************************************/
33 /***************************************************************************
35 * This library is free software; you can redistribute it and/or modify *
36 * it under the terms of the GNU Lesser General Public License as *
37 * published by the Free Software Foundation; either version 2.1 of the *
38 * License, or (at your option) any later version. *
40 ***************************************************************************/
42 /***************************************************************************
44 * Adapted for Wine by Mike McCormack *
46 ***************************************************************************/
57 #include "wine/unicode.h"
62 #define CHM_ACQUIRE_LOCK(a) do { \
63 EnterCriticalSection(&(a)); \
65 #define CHM_RELEASE_LOCK(a) do { \
66 LeaveCriticalSection(&(a)); \
69 #define CHM_NULL_FD (INVALID_HANDLE_VALUE)
70 #define CHM_CLOSE_FILE(fd) CloseHandle((fd))
73 * defines related to tuning
75 #ifndef CHM_MAX_BLOCKS_CACHED
76 #define CHM_MAX_BLOCKS_CACHED 5
80 * architecture specific defines
82 * Note: as soon as C99 is more widespread, the below defines should
83 * probably just use the C99 sized-int types.
85 * The following settings will probably work for many platforms. The sizes
86 * don't have to be exactly correct, but the types must accommodate at least as
87 * many bits as they specify.
90 /* i386, 32-bit, Windows */
93 typedef USHORT UInt16
;
96 typedef LONGLONG Int64
;
97 typedef ULONGLONG UInt64
;
99 /* utilities for unmarshalling data */
100 static int _unmarshal_char_array(unsigned char **pData
,
101 unsigned long *pLenRemain
,
105 if (count
<= 0 || (unsigned int)count
> *pLenRemain
)
107 memcpy(dest
, (*pData
), count
);
109 *pLenRemain
-= count
;
113 static int _unmarshal_uchar_array(unsigned char **pData
,
114 unsigned long *pLenRemain
,
118 if (count
<= 0 || (unsigned int)count
> *pLenRemain
)
120 memcpy(dest
, (*pData
), count
);
122 *pLenRemain
-= count
;
126 static int _unmarshal_int32(unsigned char **pData
,
127 unsigned long *pLenRemain
,
132 *dest
= (*pData
)[0] | (*pData
)[1]<<8 | (*pData
)[2]<<16 | (*pData
)[3]<<24;
138 static int _unmarshal_uint32(unsigned char **pData
,
139 unsigned long *pLenRemain
,
144 *dest
= (*pData
)[0] | (*pData
)[1]<<8 | (*pData
)[2]<<16 | (*pData
)[3]<<24;
150 static int _unmarshal_int64(unsigned char **pData
,
151 unsigned long *pLenRemain
,
162 temp
|= (*pData
)[i
-1];
170 static int _unmarshal_uint64(unsigned char **pData
,
171 unsigned long *pLenRemain
,
182 temp
|= (*pData
)[i
-1];
190 static int _unmarshal_uuid(unsigned char **pData
,
191 unsigned long *pDataLen
,
194 return _unmarshal_uchar_array(pData
, pDataLen
, dest
, 16);
197 /* names of sections essential to decompression */
198 static const WCHAR _CHMU_RESET_TABLE
[] = {
199 ':',':','D','a','t','a','S','p','a','c','e','/',
200 'S','t','o','r','a','g','e','/',
201 'M','S','C','o','m','p','r','e','s','s','e','d','/',
202 'T','r','a','n','s','f','o','r','m','/',
203 '{','7','F','C','2','8','9','4','0','-','9','D','3','1',
204 '-','1','1','D','0','-','9','B','2','7','-',
205 '0','0','A','0','C','9','1','E','9','C','7','C','}','/',
206 'I','n','s','t','a','n','c','e','D','a','t','a','/',
207 'R','e','s','e','t','T','a','b','l','e',0
209 static const WCHAR _CHMU_LZXC_CONTROLDATA
[] = {
210 ':',':','D','a','t','a','S','p','a','c','e','/',
211 'S','t','o','r','a','g','e','/',
212 'M','S','C','o','m','p','r','e','s','s','e','d','/',
213 'C','o','n','t','r','o','l','D','a','t','a',0
215 static const WCHAR _CHMU_CONTENT
[] = {
216 ':',':','D','a','t','a','S','p','a','c','e','/',
217 'S','t','o','r','a','g','e','/',
218 'M','S','C','o','m','p','r','e','s','s','e','d','/',
219 'C','o','n','t','e','n','t',0
221 static const WCHAR _CHMU_SPANINFO
[] = {
222 ':',':','D','a','t','a','S','p','a','c','e','/',
223 'S','t','o','r','a','g','e','/',
224 'M','S','C','o','m','p','r','e','s','s','e','d','/',
225 'S','p','a','n','I','n','f','o',
229 * structures local to this module
232 /* structure of ITSF headers */
233 #define _CHM_ITSF_V2_LEN (0x58)
234 #define _CHM_ITSF_V3_LEN (0x60)
237 char signature
[4]; /* 0 (ITSF) */
238 Int32 version
; /* 4 */
239 Int32 header_len
; /* 8 */
240 Int32 unknown_000c
; /* c */
241 UInt32 last_modified
; /* 10 */
242 UInt32 lang_id
; /* 14 */
243 UChar dir_uuid
[16]; /* 18 */
244 UChar stream_uuid
[16]; /* 28 */
245 UInt64 unknown_offset
; /* 38 */
246 UInt64 unknown_len
; /* 40 */
247 UInt64 dir_offset
; /* 48 */
248 UInt64 dir_len
; /* 50 */
249 UInt64 data_offset
; /* 58 (Not present before V3) */
250 }; /* __attribute__ ((aligned (1))); */
252 static int _unmarshal_itsf_header(unsigned char **pData
,
253 unsigned long *pDataLen
,
254 struct chmItsfHeader
*dest
)
256 /* we only know how to deal with the 0x58 and 0x60 byte structures */
257 if (*pDataLen
!= _CHM_ITSF_V2_LEN
&& *pDataLen
!= _CHM_ITSF_V3_LEN
)
260 /* unmarshal common fields */
261 _unmarshal_char_array(pData
, pDataLen
, dest
->signature
, 4);
262 _unmarshal_int32 (pData
, pDataLen
, &dest
->version
);
263 _unmarshal_int32 (pData
, pDataLen
, &dest
->header_len
);
264 _unmarshal_int32 (pData
, pDataLen
, &dest
->unknown_000c
);
265 _unmarshal_uint32 (pData
, pDataLen
, &dest
->last_modified
);
266 _unmarshal_uint32 (pData
, pDataLen
, &dest
->lang_id
);
267 _unmarshal_uuid (pData
, pDataLen
, dest
->dir_uuid
);
268 _unmarshal_uuid (pData
, pDataLen
, dest
->stream_uuid
);
269 _unmarshal_uint64 (pData
, pDataLen
, &dest
->unknown_offset
);
270 _unmarshal_uint64 (pData
, pDataLen
, &dest
->unknown_len
);
271 _unmarshal_uint64 (pData
, pDataLen
, &dest
->dir_offset
);
272 _unmarshal_uint64 (pData
, pDataLen
, &dest
->dir_len
);
274 /* error check the data */
275 /* XXX: should also check UUIDs, probably, though with a version 3 file,
276 * current MS tools do not seem to use them.
278 if (memcmp(dest
->signature
, "ITSF", 4) != 0)
280 if (dest
->version
== 2)
282 if (dest
->header_len
< _CHM_ITSF_V2_LEN
)
285 else if (dest
->version
== 3)
287 if (dest
->header_len
< _CHM_ITSF_V3_LEN
)
293 /* now, if we have a V3 structure, unmarshal the rest.
294 * otherwise, compute it
296 if (dest
->version
== 3)
299 _unmarshal_uint64(pData
, pDataLen
, &dest
->data_offset
);
304 dest
->data_offset
= dest
->dir_offset
+ dest
->dir_len
;
309 /* structure of ITSP headers */
310 #define _CHM_ITSP_V1_LEN (0x54)
313 char signature
[4]; /* 0 (ITSP) */
314 Int32 version
; /* 4 */
315 Int32 header_len
; /* 8 */
316 Int32 unknown_000c
; /* c */
317 UInt32 block_len
; /* 10 */
318 Int32 blockidx_intvl
; /* 14 */
319 Int32 index_depth
; /* 18 */
320 Int32 index_root
; /* 1c */
321 Int32 index_head
; /* 20 */
322 Int32 unknown_0024
; /* 24 */
323 UInt32 num_blocks
; /* 28 */
324 Int32 unknown_002c
; /* 2c */
325 UInt32 lang_id
; /* 30 */
326 UChar system_uuid
[16]; /* 34 */
327 UChar unknown_0044
[16]; /* 44 */
328 }; /* __attribute__ ((aligned (1))); */
330 static int _unmarshal_itsp_header(unsigned char **pData
,
331 unsigned long *pDataLen
,
332 struct chmItspHeader
*dest
)
334 /* we only know how to deal with a 0x54 byte structures */
335 if (*pDataLen
!= _CHM_ITSP_V1_LEN
)
338 /* unmarshal fields */
339 _unmarshal_char_array(pData
, pDataLen
, dest
->signature
, 4);
340 _unmarshal_int32 (pData
, pDataLen
, &dest
->version
);
341 _unmarshal_int32 (pData
, pDataLen
, &dest
->header_len
);
342 _unmarshal_int32 (pData
, pDataLen
, &dest
->unknown_000c
);
343 _unmarshal_uint32 (pData
, pDataLen
, &dest
->block_len
);
344 _unmarshal_int32 (pData
, pDataLen
, &dest
->blockidx_intvl
);
345 _unmarshal_int32 (pData
, pDataLen
, &dest
->index_depth
);
346 _unmarshal_int32 (pData
, pDataLen
, &dest
->index_root
);
347 _unmarshal_int32 (pData
, pDataLen
, &dest
->index_head
);
348 _unmarshal_int32 (pData
, pDataLen
, &dest
->unknown_0024
);
349 _unmarshal_uint32 (pData
, pDataLen
, &dest
->num_blocks
);
350 _unmarshal_int32 (pData
, pDataLen
, &dest
->unknown_002c
);
351 _unmarshal_uint32 (pData
, pDataLen
, &dest
->lang_id
);
352 _unmarshal_uuid (pData
, pDataLen
, dest
->system_uuid
);
353 _unmarshal_uchar_array(pData
, pDataLen
, dest
->unknown_0044
, 16);
355 /* error check the data */
356 if (memcmp(dest
->signature
, "ITSP", 4) != 0)
358 if (dest
->version
!= 1)
360 if (dest
->header_len
!= _CHM_ITSP_V1_LEN
)
366 /* structure of PMGL headers */
367 static const char _chm_pmgl_marker
[4] = "PMGL";
368 #define _CHM_PMGL_LEN (0x14)
371 char signature
[4]; /* 0 (PMGL) */
372 UInt32 free_space
; /* 4 */
373 UInt32 unknown_0008
; /* 8 */
374 Int32 block_prev
; /* c */
375 Int32 block_next
; /* 10 */
376 }; /* __attribute__ ((aligned (1))); */
378 static int _unmarshal_pmgl_header(unsigned char **pData
,
379 unsigned long *pDataLen
,
380 struct chmPmglHeader
*dest
)
382 /* we only know how to deal with a 0x14 byte structures */
383 if (*pDataLen
!= _CHM_PMGL_LEN
)
386 /* unmarshal fields */
387 _unmarshal_char_array(pData
, pDataLen
, dest
->signature
, 4);
388 _unmarshal_uint32 (pData
, pDataLen
, &dest
->free_space
);
389 _unmarshal_uint32 (pData
, pDataLen
, &dest
->unknown_0008
);
390 _unmarshal_int32 (pData
, pDataLen
, &dest
->block_prev
);
391 _unmarshal_int32 (pData
, pDataLen
, &dest
->block_next
);
393 /* check structure */
394 if (memcmp(dest
->signature
, _chm_pmgl_marker
, 4) != 0)
400 /* structure of PMGI headers */
401 static const char _chm_pmgi_marker
[4] = "PMGI";
402 #define _CHM_PMGI_LEN (0x08)
405 char signature
[4]; /* 0 (PMGI) */
406 UInt32 free_space
; /* 4 */
407 }; /* __attribute__ ((aligned (1))); */
409 static int _unmarshal_pmgi_header(unsigned char **pData
,
410 unsigned long *pDataLen
,
411 struct chmPmgiHeader
*dest
)
413 /* we only know how to deal with a 0x8 byte structures */
414 if (*pDataLen
!= _CHM_PMGI_LEN
)
417 /* unmarshal fields */
418 _unmarshal_char_array(pData
, pDataLen
, dest
->signature
, 4);
419 _unmarshal_uint32 (pData
, pDataLen
, &dest
->free_space
);
421 /* check structure */
422 if (memcmp(dest
->signature
, _chm_pmgi_marker
, 4) != 0)
428 /* structure of LZXC reset table */
429 #define _CHM_LZXC_RESETTABLE_V1_LEN (0x28)
430 struct chmLzxcResetTable
436 UInt64 uncompressed_len
;
437 UInt64 compressed_len
;
439 }; /* __attribute__ ((aligned (1))); */
441 static int _unmarshal_lzxc_reset_table(unsigned char **pData
,
442 unsigned long *pDataLen
,
443 struct chmLzxcResetTable
*dest
)
445 /* we only know how to deal with a 0x28 byte structures */
446 if (*pDataLen
!= _CHM_LZXC_RESETTABLE_V1_LEN
)
449 /* unmarshal fields */
450 _unmarshal_uint32 (pData
, pDataLen
, &dest
->version
);
451 _unmarshal_uint32 (pData
, pDataLen
, &dest
->block_count
);
452 _unmarshal_uint32 (pData
, pDataLen
, &dest
->unknown
);
453 _unmarshal_uint32 (pData
, pDataLen
, &dest
->table_offset
);
454 _unmarshal_uint64 (pData
, pDataLen
, &dest
->uncompressed_len
);
455 _unmarshal_uint64 (pData
, pDataLen
, &dest
->compressed_len
);
456 _unmarshal_uint64 (pData
, pDataLen
, &dest
->block_len
);
458 /* check structure */
459 if (dest
->version
!= 2)
465 /* structure of LZXC control data block */
466 #define _CHM_LZXC_MIN_LEN (0x18)
467 #define _CHM_LZXC_V2_LEN (0x1c)
468 struct chmLzxcControlData
471 char signature
[4]; /* 4 (LZXC) */
472 UInt32 version
; /* 8 */
473 UInt32 resetInterval
; /* c */
474 UInt32 windowSize
; /* 10 */
475 UInt32 windowsPerReset
; /* 14 */
476 UInt32 unknown_18
; /* 18 */
479 static int _unmarshal_lzxc_control_data(unsigned char **pData
,
480 unsigned long *pDataLen
,
481 struct chmLzxcControlData
*dest
)
483 /* we want at least 0x18 bytes */
484 if (*pDataLen
< _CHM_LZXC_MIN_LEN
)
487 /* unmarshal fields */
488 _unmarshal_uint32 (pData
, pDataLen
, &dest
->size
);
489 _unmarshal_char_array(pData
, pDataLen
, dest
->signature
, 4);
490 _unmarshal_uint32 (pData
, pDataLen
, &dest
->version
);
491 _unmarshal_uint32 (pData
, pDataLen
, &dest
->resetInterval
);
492 _unmarshal_uint32 (pData
, pDataLen
, &dest
->windowSize
);
493 _unmarshal_uint32 (pData
, pDataLen
, &dest
->windowsPerReset
);
495 if (*pDataLen
>= _CHM_LZXC_V2_LEN
)
496 _unmarshal_uint32 (pData
, pDataLen
, &dest
->unknown_18
);
498 dest
->unknown_18
= 0;
500 if (dest
->version
== 2)
502 dest
->resetInterval
*= 0x8000;
503 dest
->windowSize
*= 0x8000;
505 if (dest
->windowSize
== 0 || dest
->resetInterval
== 0)
508 /* for now, only support resetInterval a multiple of windowSize/2 */
509 if (dest
->windowSize
== 1)
511 if ((dest
->resetInterval
% (dest
->windowSize
/2)) != 0)
514 /* check structure */
515 if (memcmp(dest
->signature
, "LZXC", 4) != 0)
521 /* the structure used for chm file handles */
526 CRITICAL_SECTION mutex
;
527 CRITICAL_SECTION lzx_mutex
;
528 CRITICAL_SECTION cache_mutex
;
538 struct chmUnitInfo rt_unit
;
539 struct chmUnitInfo cn_unit
;
540 struct chmLzxcResetTable reset_table
;
542 /* LZX control data */
543 int compression_enabled
;
545 UInt32 reset_interval
;
546 UInt32 reset_blkcount
;
548 /* decompressor state */
549 struct LZXstate
*lzx_state
;
552 /* cache for decompressed blocks */
553 UChar
**cache_blocks
;
554 Int64
*cache_block_indices
;
555 Int32 cache_num_blocks
;
559 * utility functions local to this module
562 /* utility function to handle differences between {pread,read}(64)? */
563 static Int64
_chm_fetch_bytes(struct chmFile
*h
,
569 if (h
->fd
== CHM_NULL_FD
)
572 CHM_ACQUIRE_LOCK(h
->mutex
);
573 /* NOTE: this might be better done with CreateFileMapping, et cetera... */
575 DWORD origOffsetLo
=0, origOffsetHi
=0;
576 DWORD offsetLo
, offsetHi
;
579 /* awkward Win32 Seek/Tell */
580 offsetLo
= (unsigned long)(os
& 0xffffffffL
);
581 offsetHi
= (unsigned long)((os
>> 32) & 0xffffffffL
);
582 origOffsetLo
= SetFilePointer(h
->fd
, 0, &origOffsetHi
, FILE_CURRENT
);
583 offsetLo
= SetFilePointer(h
->fd
, offsetLo
, &offsetHi
, FILE_BEGIN
);
595 /* restore original position */
596 SetFilePointer(h
->fd
, origOffsetLo
, &origOffsetHi
, FILE_BEGIN
);
598 CHM_RELEASE_LOCK(h
->mutex
);
602 /* open an ITS archive */
603 struct chmFile
*chm_openW(const WCHAR
*filename
)
605 unsigned char sbuffer
[256];
606 unsigned long sremain
;
607 unsigned char *sbufpos
;
608 struct chmFile
*newHandle
=NULL
;
609 struct chmItsfHeader itsfHeader
;
610 struct chmItspHeader itspHeader
;
612 struct chmUnitInfo uiSpan
;
614 struct chmUnitInfo uiLzxc
;
615 struct chmLzxcControlData ctlData
;
617 /* allocate handle */
618 newHandle
= (struct chmFile
*)malloc(sizeof(struct chmFile
));
619 newHandle
->fd
= CHM_NULL_FD
;
620 newHandle
->lzx_state
= NULL
;
621 newHandle
->cache_blocks
= NULL
;
622 newHandle
->cache_block_indices
= NULL
;
623 newHandle
->cache_num_blocks
= 0;
626 if ((newHandle
->fd
=CreateFileW(filename
,
631 FILE_ATTRIBUTE_NORMAL
,
632 NULL
)) == CHM_NULL_FD
)
638 /* initialize mutexes, if needed */
639 InitializeCriticalSection(&newHandle
->mutex
);
640 InitializeCriticalSection(&newHandle
->lzx_mutex
);
641 InitializeCriticalSection(&newHandle
->cache_mutex
);
643 /* read and verify header */
644 sremain
= _CHM_ITSF_V3_LEN
;
646 if (_chm_fetch_bytes(newHandle
, sbuffer
, (UInt64
)0, sremain
) != sremain
||
647 !_unmarshal_itsf_header(&sbufpos
, &sremain
, &itsfHeader
))
649 chm_close(newHandle
);
653 /* stash important values from header */
654 newHandle
->dir_offset
= itsfHeader
.dir_offset
;
655 newHandle
->dir_len
= itsfHeader
.dir_len
;
656 newHandle
->data_offset
= itsfHeader
.data_offset
;
658 /* now, read and verify the directory header chunk */
659 sremain
= _CHM_ITSP_V1_LEN
;
661 if (_chm_fetch_bytes(newHandle
, sbuffer
,
662 (UInt64
)itsfHeader
.dir_offset
, sremain
) != sremain
||
663 !_unmarshal_itsp_header(&sbufpos
, &sremain
, &itspHeader
))
665 chm_close(newHandle
);
669 /* grab essential information from ITSP header */
670 newHandle
->dir_offset
+= itspHeader
.header_len
;
671 newHandle
->dir_len
-= itspHeader
.header_len
;
672 newHandle
->index_root
= itspHeader
.index_root
;
673 newHandle
->index_head
= itspHeader
.index_head
;
674 newHandle
->block_len
= itspHeader
.block_len
;
676 /* if the index root is -1, this means we don't have any PMGI blocks.
677 * as a result, we must use the sole PMGL block as the index root
679 if (newHandle
->index_root
== -1)
680 newHandle
->index_root
= newHandle
->index_head
;
682 /* By default, compression is enabled. */
683 newHandle
->compression_enabled
= 1;
685 /* Jed, Sun Jun 27: 'span' doesn't seem to be used anywhere?! */
688 if (CHM_RESOLVE_SUCCESS
!= chm_resolve_object(newHandle
,
691 uiSpan
.space
== CHM_COMPRESSED
)
693 chm_close(newHandle
);
697 /* N.B.: we've already checked that uiSpan is in the uncompressed section,
698 * so this should not require attempting to decompress, which may
699 * rely on having a valid "span"
703 if (chm_retrieve_object(newHandle
, &uiSpan
, sbuffer
,
704 0, sremain
) != sremain
||
705 !_unmarshal_uint64(&sbufpos
, &sremain
, &newHandle
->span
))
707 chm_close(newHandle
);
712 /* prefetch most commonly needed unit infos */
713 if (CHM_RESOLVE_SUCCESS
!= chm_resolve_object(newHandle
,
715 &newHandle
->rt_unit
) ||
716 newHandle
->rt_unit
.space
== CHM_COMPRESSED
||
717 CHM_RESOLVE_SUCCESS
!= chm_resolve_object(newHandle
,
719 &newHandle
->cn_unit
) ||
720 newHandle
->cn_unit
.space
== CHM_COMPRESSED
||
721 CHM_RESOLVE_SUCCESS
!= chm_resolve_object(newHandle
,
722 _CHMU_LZXC_CONTROLDATA
,
724 uiLzxc
.space
== CHM_COMPRESSED
)
726 newHandle
->compression_enabled
= 0;
729 /* read reset table info */
730 if (newHandle
->compression_enabled
)
732 sremain
= _CHM_LZXC_RESETTABLE_V1_LEN
;
734 if (chm_retrieve_object(newHandle
, &newHandle
->rt_unit
, sbuffer
,
735 0, sremain
) != sremain
||
736 !_unmarshal_lzxc_reset_table(&sbufpos
, &sremain
,
737 &newHandle
->reset_table
))
739 newHandle
->compression_enabled
= 0;
743 /* read control data */
744 if (newHandle
->compression_enabled
)
746 sremain
= (unsigned long)uiLzxc
.length
;
748 if (chm_retrieve_object(newHandle
, &uiLzxc
, sbuffer
,
749 0, sremain
) != sremain
||
750 !_unmarshal_lzxc_control_data(&sbufpos
, &sremain
,
753 newHandle
->compression_enabled
= 0;
756 newHandle
->window_size
= ctlData
.windowSize
;
757 newHandle
->reset_interval
= ctlData
.resetInterval
;
759 /* Jed, Mon Jun 28: Experimentally, it appears that the reset block count */
760 /* must be multiplied by this formerly unknown ctrl data field in */
761 /* order to decompress some files. */
763 newHandle
->reset_blkcount
= newHandle
->reset_interval
/
764 (newHandle
->window_size
/ 2);
766 newHandle
->reset_blkcount
= newHandle
->reset_interval
/
767 (newHandle
->window_size
/ 2) *
768 ctlData
.windowsPerReset
;
772 /* initialize cache */
773 chm_set_param(newHandle
, CHM_PARAM_MAX_BLOCKS_CACHED
,
774 CHM_MAX_BLOCKS_CACHED
);
779 /* close an ITS archive */
780 void chm_close(struct chmFile
*h
)
784 if (h
->fd
!= CHM_NULL_FD
)
785 CHM_CLOSE_FILE(h
->fd
);
788 DeleteCriticalSection(&h
->mutex
);
789 DeleteCriticalSection(&h
->lzx_mutex
);
790 DeleteCriticalSection(&h
->cache_mutex
);
793 LZXteardown(h
->lzx_state
);
799 for (i
=0; i
<h
->cache_num_blocks
; i
++)
801 if (h
->cache_blocks
[i
])
802 free(h
->cache_blocks
[i
]);
804 free(h
->cache_blocks
);
805 h
->cache_blocks
= NULL
;
808 if (h
->cache_block_indices
)
809 free(h
->cache_block_indices
);
810 h
->cache_block_indices
= NULL
;
817 * set a parameter on the file handle.
818 * valid parameter types:
819 * CHM_PARAM_MAX_BLOCKS_CACHED:
820 * how many decompressed blocks should be cached? A simple
821 * caching scheme is used, wherein the index of the block is
822 * used as a hash value, and hash collision results in the
823 * invalidation of the previously cached block.
825 void chm_set_param(struct chmFile
*h
,
831 case CHM_PARAM_MAX_BLOCKS_CACHED
:
832 CHM_ACQUIRE_LOCK(h
->cache_mutex
);
833 if (paramVal
!= h
->cache_num_blocks
)
839 /* allocate new cached blocks */
840 newBlocks
= (UChar
**)malloc(paramVal
* sizeof (UChar
*));
841 newIndices
= (UInt64
*)malloc(paramVal
* sizeof (UInt64
));
842 for (i
=0; i
<paramVal
; i
++)
848 /* re-distribute old cached blocks */
851 for (i
=0; i
<h
->cache_num_blocks
; i
++)
853 int newSlot
= (int)(h
->cache_block_indices
[i
] % paramVal
);
855 if (h
->cache_blocks
[i
])
857 /* in case of collision, destroy newcomer */
858 if (newBlocks
[newSlot
])
860 free(h
->cache_blocks
[i
]);
861 h
->cache_blocks
[i
] = NULL
;
865 newBlocks
[newSlot
] = h
->cache_blocks
[i
];
866 newIndices
[newSlot
] =
867 h
->cache_block_indices
[i
];
872 free(h
->cache_blocks
);
873 free(h
->cache_block_indices
);
876 /* now, set new values */
877 h
->cache_blocks
= newBlocks
;
878 h
->cache_block_indices
= newIndices
;
879 h
->cache_num_blocks
= paramVal
;
881 CHM_RELEASE_LOCK(h
->cache_mutex
);
890 * helper methods for chm_resolve_object
893 /* skip a compressed dword */
894 static void _chm_skip_cword(UChar
**pEntry
)
896 while (*(*pEntry
)++ >= 0x80)
900 /* skip the data from a PMGL entry */
901 static void _chm_skip_PMGL_entry_data(UChar
**pEntry
)
903 _chm_skip_cword(pEntry
);
904 _chm_skip_cword(pEntry
);
905 _chm_skip_cword(pEntry
);
908 /* parse a compressed dword */
909 static UInt64
_chm_parse_cword(UChar
**pEntry
)
913 while ((temp
=*(*pEntry
)++) >= 0x80)
916 accum
+= temp
& 0x7f;
919 return (accum
<< 7) + temp
;
922 /* parse a utf-8 string into an ASCII char buffer */
923 static int _chm_parse_UTF8(UChar
**pEntry
, UInt64 count
, WCHAR
*path
)
925 /* MJM - Modified to return real Unicode strings */
928 *path
++ = (*(*pEntry
)++);
936 /* parse a PMGL entry into a chmUnitInfo struct; return 1 on success. */
937 static int _chm_parse_PMGL_entry(UChar
**pEntry
, struct chmUnitInfo
*ui
)
942 strLen
= _chm_parse_cword(pEntry
);
943 if (strLen
> CHM_MAX_PATHLEN
)
947 if (! _chm_parse_UTF8(pEntry
, strLen
, ui
->path
))
951 ui
->space
= (int)_chm_parse_cword(pEntry
);
952 ui
->start
= _chm_parse_cword(pEntry
);
953 ui
->length
= _chm_parse_cword(pEntry
);
957 /* find an exact entry in PMGL; return NULL if we fail */
958 static UChar
*_chm_find_in_PMGL(UChar
*page_buf
,
960 const WCHAR
*objPath
)
962 /* XXX: modify this to do a binary search using the nice index structure
963 * that is provided for us.
965 struct chmPmglHeader header
;
971 WCHAR buffer
[CHM_MAX_PATHLEN
+1];
973 /* figure out where to start and end */
975 hremain
= _CHM_PMGL_LEN
;
976 if (! _unmarshal_pmgl_header(&cur
, &hremain
, &header
))
978 end
= page_buf
+ block_len
- (header
.free_space
);
980 /* now, scan progressively */
985 strLen
= _chm_parse_cword(&cur
);
986 if (! _chm_parse_UTF8(&cur
, strLen
, buffer
))
989 /* check if it is the right name */
990 if (! strcmpiW(buffer
, objPath
))
993 _chm_skip_PMGL_entry_data(&cur
);
999 /* find which block should be searched next for the entry; -1 if no block */
1000 static Int32
_chm_find_in_PMGI(UChar
*page_buf
,
1002 const WCHAR
*objPath
)
1004 /* XXX: modify this to do a binary search using the nice index structure
1005 * that is provided for us
1007 struct chmPmgiHeader header
;
1013 WCHAR buffer
[CHM_MAX_PATHLEN
+1];
1015 /* figure out where to start and end */
1017 hremain
= _CHM_PMGI_LEN
;
1018 if (! _unmarshal_pmgi_header(&cur
, &hremain
, &header
))
1020 end
= page_buf
+ block_len
- (header
.free_space
);
1022 /* now, scan progressively */
1026 strLen
= _chm_parse_cword(&cur
);
1027 if (! _chm_parse_UTF8(&cur
, strLen
, buffer
))
1030 /* check if it is the right name */
1031 if (strcmpiW(buffer
, objPath
) > 0)
1034 /* load next value for path */
1035 page
= (int)_chm_parse_cword(&cur
);
1041 /* resolve a particular object from the archive */
1042 int chm_resolve_object(struct chmFile
*h
,
1043 const WCHAR
*objPath
,
1044 struct chmUnitInfo
*ui
)
1047 * XXX: implement caching scheme for dir pages
1052 /* buffer to hold whatever page we're looking at */
1053 UChar
*page_buf
= HeapAlloc(GetProcessHeap(), 0, h
->block_len
);
1056 curPage
= h
->index_root
;
1058 /* until we have either returned or given up */
1059 while (curPage
!= -1)
1062 /* try to fetch the index page */
1063 if (_chm_fetch_bytes(h
, page_buf
,
1064 (UInt64
)h
->dir_offset
+ (UInt64
)curPage
*h
->block_len
,
1065 h
->block_len
) != h
->block_len
)
1067 HeapFree(GetProcessHeap(), 0, page_buf
);
1068 return CHM_RESOLVE_FAILURE
;
1071 /* now, if it is a leaf node: */
1072 if (memcmp(page_buf
, _chm_pmgl_marker
, 4) == 0)
1075 UChar
*pEntry
= _chm_find_in_PMGL(page_buf
,
1080 HeapFree(GetProcessHeap(), 0, page_buf
);
1081 return CHM_RESOLVE_FAILURE
;
1084 /* parse entry and return */
1085 _chm_parse_PMGL_entry(&pEntry
, ui
);
1086 HeapFree(GetProcessHeap(), 0, page_buf
);
1087 return CHM_RESOLVE_SUCCESS
;
1090 /* else, if it is a branch node: */
1091 else if (memcmp(page_buf
, _chm_pmgi_marker
, 4) == 0)
1092 curPage
= _chm_find_in_PMGI(page_buf
, h
->block_len
, objPath
);
1094 /* else, we are confused. give up. */
1097 HeapFree(GetProcessHeap(), 0, page_buf
);
1098 return CHM_RESOLVE_FAILURE
;
1102 /* didn't find anything. fail. */
1103 HeapFree(GetProcessHeap(), 0, page_buf
);
1104 return CHM_RESOLVE_FAILURE
;
1108 * utility methods for dealing with compressed data
1111 /* get the bounds of a compressed block. return 0 on failure */
1112 static int _chm_get_cmpblock_bounds(struct chmFile
*h
,
1117 UChar buffer
[8], *dummy
;
1120 /* for all but the last block, use the reset table */
1121 if (block
< h
->reset_table
.block_count
-1)
1123 /* unpack the start address */
1126 if (_chm_fetch_bytes(h
, buffer
,
1127 (UInt64
)h
->data_offset
1128 + (UInt64
)h
->rt_unit
.start
1129 + (UInt64
)h
->reset_table
.table_offset
1131 remain
) != remain
||
1132 !_unmarshal_uint64(&dummy
, &remain
, start
))
1135 /* unpack the end address */
1138 if (_chm_fetch_bytes(h
, buffer
,
1139 (UInt64
)h
->data_offset
1140 + (UInt64
)h
->rt_unit
.start
1141 + (UInt64
)h
->reset_table
.table_offset
1142 + (UInt64
)block
*8 + 8,
1143 remain
) != remain
||
1144 !_unmarshal_int64(&dummy
, &remain
, len
))
1148 /* for the last block, use the span in addition to the reset table */
1151 /* unpack the start address */
1154 if (_chm_fetch_bytes(h
, buffer
,
1155 (UInt64
)h
->data_offset
1156 + (UInt64
)h
->rt_unit
.start
1157 + (UInt64
)h
->reset_table
.table_offset
1159 remain
) != remain
||
1160 !_unmarshal_uint64(&dummy
, &remain
, start
))
1163 *len
= h
->reset_table
.compressed_len
;
1166 /* compute the length and absolute start address */
1168 *start
+= h
->data_offset
+ h
->cn_unit
.start
;
1173 /* decompress the block. must have lzx_mutex. */
1174 static Int64
_chm_decompress_block(struct chmFile
*h
,
1178 UChar
*cbuffer
= HeapAlloc( GetProcessHeap(), 0,
1179 ((unsigned int)h
->reset_table
.block_len
+ 6144));
1180 UInt64 cmpStart
; /* compressed start */
1181 Int64 cmpLen
; /* compressed len */
1182 int indexSlot
; /* cache index slot */
1183 UChar
*lbuffer
; /* local buffer ptr */
1184 UInt32 blockAlign
= (UInt32
)(block
% h
->reset_blkcount
); /* reset intvl. aln. */
1185 UInt32 i
; /* local loop index */
1187 /* let the caching system pull its weight! */
1188 if (block
- blockAlign
<= h
->lzx_last_block
&&
1189 block
>= h
->lzx_last_block
)
1190 blockAlign
= (block
- h
->lzx_last_block
);
1192 /* check if we need previous blocks */
1193 if (blockAlign
!= 0)
1195 /* fetch all required previous blocks since last reset */
1196 for (i
= blockAlign
; i
> 0; i
--)
1198 UInt32 curBlockIdx
= block
- i
;
1200 /* check if we most recently decompressed the previous block */
1201 if (h
->lzx_last_block
!= curBlockIdx
)
1203 if ((curBlockIdx
% h
->reset_blkcount
) == 0)
1206 fprintf(stderr
, "***RESET (1)***\n");
1208 LZXreset(h
->lzx_state
);
1211 indexSlot
= (int)((curBlockIdx
) % h
->cache_num_blocks
);
1212 h
->cache_block_indices
[indexSlot
] = curBlockIdx
;
1213 if (! h
->cache_blocks
[indexSlot
])
1214 h
->cache_blocks
[indexSlot
] = (UChar
*)malloc(
1215 (unsigned int)(h
->reset_table
.block_len
));
1216 lbuffer
= h
->cache_blocks
[indexSlot
];
1218 /* decompress the previous block */
1220 fprintf(stderr
, "Decompressing block #%4d (EXTRA)\n", curBlockIdx
);
1222 if (!_chm_get_cmpblock_bounds(h
, curBlockIdx
, &cmpStart
, &cmpLen
) ||
1223 _chm_fetch_bytes(h
, cbuffer
, cmpStart
, cmpLen
) != cmpLen
||
1224 LZXdecompress(h
->lzx_state
, cbuffer
, lbuffer
, (int)cmpLen
,
1225 (int)h
->reset_table
.block_len
) != DECR_OK
)
1228 fprintf(stderr
, " (DECOMPRESS FAILED!)\n");
1230 HeapFree(GetProcessHeap(), 0, cbuffer
);
1234 h
->lzx_last_block
= (int)curBlockIdx
;
1240 if ((block
% h
->reset_blkcount
) == 0)
1243 fprintf(stderr
, "***RESET (2)***\n");
1245 LZXreset(h
->lzx_state
);
1249 /* allocate slot in cache */
1250 indexSlot
= (int)(block
% h
->cache_num_blocks
);
1251 h
->cache_block_indices
[indexSlot
] = block
;
1252 if (! h
->cache_blocks
[indexSlot
])
1253 h
->cache_blocks
[indexSlot
] = (UChar
*)malloc(
1254 ((unsigned int)h
->reset_table
.block_len
));
1255 lbuffer
= h
->cache_blocks
[indexSlot
];
1258 /* decompress the block we actually want */
1260 fprintf(stderr
, "Decompressing block #%4d (REAL )\n", block
);
1262 if (! _chm_get_cmpblock_bounds(h
, block
, &cmpStart
, &cmpLen
) ||
1263 _chm_fetch_bytes(h
, cbuffer
, cmpStart
, cmpLen
) != cmpLen
||
1264 LZXdecompress(h
->lzx_state
, cbuffer
, lbuffer
, (int)cmpLen
,
1265 (int)h
->reset_table
.block_len
) != DECR_OK
)
1268 fprintf(stderr
, " (DECOMPRESS FAILED!)\n");
1270 HeapFree(GetProcessHeap(), 0, cbuffer
);
1273 h
->lzx_last_block
= (int)block
;
1275 /* XXX: modify LZX routines to return the length of the data they
1276 * decompressed and return that instead, for an extra sanity check.
1278 HeapFree(GetProcessHeap(), 0, cbuffer
);
1279 return h
->reset_table
.block_len
;
1282 /* grab a region from a compressed block */
1283 static Int64
_chm_decompress_region(struct chmFile
*h
,
1288 UInt64 nBlock
, nOffset
;
1296 /* figure out what we need to read */
1297 nBlock
= start
/ h
->reset_table
.block_len
;
1298 nOffset
= start
% h
->reset_table
.block_len
;
1300 if (nLen
> (h
->reset_table
.block_len
- nOffset
))
1301 nLen
= h
->reset_table
.block_len
- nOffset
;
1303 /* if block is cached, return data from it. */
1304 CHM_ACQUIRE_LOCK(h
->lzx_mutex
);
1305 CHM_ACQUIRE_LOCK(h
->cache_mutex
);
1306 if (h
->cache_block_indices
[nBlock
% h
->cache_num_blocks
] == nBlock
&&
1307 h
->cache_blocks
[nBlock
% h
->cache_num_blocks
] != NULL
)
1310 h
->cache_blocks
[nBlock
% h
->cache_num_blocks
] + nOffset
,
1311 (unsigned int)nLen
);
1312 CHM_RELEASE_LOCK(h
->cache_mutex
);
1313 CHM_RELEASE_LOCK(h
->lzx_mutex
);
1316 CHM_RELEASE_LOCK(h
->cache_mutex
);
1318 /* data request not satisfied, so... start up the decompressor machine */
1321 int window_size
= ffs(h
->window_size
) - 1;
1322 h
->lzx_last_block
= -1;
1323 h
->lzx_state
= LZXinit(window_size
);
1326 /* decompress some data */
1327 gotLen
= _chm_decompress_block(h
, nBlock
, &ubuffer
);
1330 memcpy(buf
, ubuffer
+nOffset
, (unsigned int)nLen
);
1331 CHM_RELEASE_LOCK(h
->lzx_mutex
);
1335 /* retrieve (part of) an object */
1336 LONGINT64
chm_retrieve_object(struct chmFile
*h
,
1337 struct chmUnitInfo
*ui
,
1342 /* must be valid file handle */
1346 /* starting address must be in correct range */
1347 if (addr
< 0 || addr
>= ui
->length
)
1351 if (addr
+ len
> ui
->length
)
1352 len
= ui
->length
- addr
;
1354 /* if the file is uncompressed, it's simple */
1355 if (ui
->space
== CHM_UNCOMPRESSED
)
1358 return _chm_fetch_bytes(h
,
1360 (UInt64
)h
->data_offset
+ (UInt64
)ui
->start
+ (UInt64
)addr
,
1364 /* else if the file is compressed, it's a little trickier */
1365 else /* ui->space == CHM_COMPRESSED */
1367 Int64 swath
=0, total
=0;
1369 /* if compression is not enabled for this file... */
1370 if (! h
->compression_enabled
)
1375 /* swill another mouthful */
1376 swath
= _chm_decompress_region(h
, buf
, ui
->start
+ addr
, len
);
1378 /* if we didn't get any... */
1394 /* enumerate the objects in the .chm archive */
1395 int chm_enumerate(struct chmFile
*h
,
1402 /* buffer to hold whatever page we're looking at */
1403 UChar
*page_buf
= HeapAlloc(GetProcessHeap(), 0, (unsigned int)h
->block_len
);
1404 struct chmPmglHeader header
;
1407 unsigned long lenRemain
;
1410 /* the current ui */
1411 struct chmUnitInfo ui
;
1415 curPage
= h
->index_head
;
1417 /* until we have either returned or given up */
1418 while (curPage
!= -1)
1421 /* try to fetch the index page */
1422 if (_chm_fetch_bytes(h
,
1424 (UInt64
)h
->dir_offset
+ (UInt64
)curPage
*h
->block_len
,
1425 h
->block_len
) != h
->block_len
)
1427 HeapFree(GetProcessHeap(), 0, page_buf
);
1431 /* figure out start and end for this page */
1433 lenRemain
= _CHM_PMGL_LEN
;
1434 if (! _unmarshal_pmgl_header(&cur
, &lenRemain
, &header
))
1436 HeapFree(GetProcessHeap(), 0, page_buf
);
1439 end
= page_buf
+ h
->block_len
- (header
.free_space
);
1441 /* loop over this page */
1444 if (! _chm_parse_PMGL_entry(&cur
, &ui
))
1446 HeapFree(GetProcessHeap(), 0, page_buf
);
1450 /* get the length of the path */
1451 ui_path_len
= strlenW(ui
.path
)-1;
1453 /* check for DIRS */
1454 if (ui
.path
[ui_path_len
] == '/' && !(what
& CHM_ENUMERATE_DIRS
))
1457 /* check for FILES */
1458 if (ui
.path
[ui_path_len
] != '/' && !(what
& CHM_ENUMERATE_FILES
))
1461 /* check for NORMAL vs. META */
1462 if (ui
.path
[0] == '/')
1465 /* check for NORMAL vs. SPECIAL */
1466 if (ui
.path
[1] == '#' || ui
.path
[1] == '$')
1467 flag
= CHM_ENUMERATE_SPECIAL
;
1469 flag
= CHM_ENUMERATE_NORMAL
;
1472 flag
= CHM_ENUMERATE_META
;
1473 if (! (what
& flag
))
1476 /* call the enumerator */
1478 int status
= (*e
)(h
, &ui
, context
);
1481 case CHM_ENUMERATOR_FAILURE
:
1482 HeapFree(GetProcessHeap(), 0, page_buf
);
1484 case CHM_ENUMERATOR_CONTINUE
:
1486 case CHM_ENUMERATOR_SUCCESS
:
1487 HeapFree(GetProcessHeap(), 0, page_buf
);
1495 /* advance to next page */
1496 curPage
= header
.block_next
;
1499 HeapFree(GetProcessHeap(), 0, page_buf
);
1503 int chm_enumerate_dir(struct chmFile
*h
,
1504 const WCHAR
*prefix
,
1510 * XXX: do this efficiently (i.e. using the tree index)
1515 /* buffer to hold whatever page we're looking at */
1516 UChar
*page_buf
= HeapAlloc(GetProcessHeap(), 0, (unsigned int)h
->block_len
);
1517 struct chmPmglHeader header
;
1520 unsigned long lenRemain
;
1522 /* set to 1 once we've started */
1525 /* the current ui */
1526 struct chmUnitInfo ui
;
1530 /* the length of the prefix */
1531 WCHAR prefixRectified
[CHM_MAX_PATHLEN
+1];
1533 WCHAR lastPath
[CHM_MAX_PATHLEN
];
1537 curPage
= h
->index_head
;
1539 /* initialize pathname state */
1540 strncpyW(prefixRectified
, prefix
, CHM_MAX_PATHLEN
);
1541 prefixLen
= strlenW(prefixRectified
);
1544 if (prefixRectified
[prefixLen
-1] != '/')
1546 prefixRectified
[prefixLen
] = '/';
1547 prefixRectified
[prefixLen
+1] = '\0';
1554 /* until we have either returned or given up */
1555 while (curPage
!= -1)
1558 /* try to fetch the index page */
1559 if (_chm_fetch_bytes(h
,
1561 (UInt64
)h
->dir_offset
+ (UInt64
)curPage
*h
->block_len
,
1562 h
->block_len
) != h
->block_len
)
1564 HeapFree(GetProcessHeap(), 0, page_buf
);
1568 /* figure out start and end for this page */
1570 lenRemain
= _CHM_PMGL_LEN
;
1571 if (! _unmarshal_pmgl_header(&cur
, &lenRemain
, &header
))
1573 HeapFree(GetProcessHeap(), 0, page_buf
);
1576 end
= page_buf
+ h
->block_len
- (header
.free_space
);
1578 /* loop over this page */
1581 if (! _chm_parse_PMGL_entry(&cur
, &ui
))
1583 HeapFree(GetProcessHeap(), 0, page_buf
);
1587 /* check if we should start */
1590 if (ui
.length
== 0 && strncmpiW(ui
.path
, prefixRectified
, prefixLen
) == 0)
1595 if (ui
.path
[prefixLen
] == '\0')
1599 /* check if we should stop */
1602 if (strncmpiW(ui
.path
, prefixRectified
, prefixLen
) != 0)
1604 HeapFree(GetProcessHeap(), 0, page_buf
);
1609 /* check if we should include this path */
1610 if (lastPathLen
!= -1)
1612 if (strncmpiW(ui
.path
, lastPath
, lastPathLen
) == 0)
1615 strcpyW(lastPath
, ui
.path
);
1616 lastPathLen
= strlenW(lastPath
);
1618 /* get the length of the path */
1619 ui_path_len
= strlenW(ui
.path
)-1;
1621 /* check for DIRS */
1622 if (ui
.path
[ui_path_len
] == '/' && !(what
& CHM_ENUMERATE_DIRS
))
1625 /* check for FILES */
1626 if (ui
.path
[ui_path_len
] != '/' && !(what
& CHM_ENUMERATE_FILES
))
1629 /* check for NORMAL vs. META */
1630 if (ui
.path
[0] == '/')
1633 /* check for NORMAL vs. SPECIAL */
1634 if (ui
.path
[1] == '#' || ui
.path
[1] == '$')
1635 flag
= CHM_ENUMERATE_SPECIAL
;
1637 flag
= CHM_ENUMERATE_NORMAL
;
1640 flag
= CHM_ENUMERATE_META
;
1641 if (! (what
& flag
))
1644 /* call the enumerator */
1646 int status
= (*e
)(h
, &ui
, context
);
1649 case CHM_ENUMERATOR_FAILURE
:
1650 HeapFree(GetProcessHeap(), 0, page_buf
);
1652 case CHM_ENUMERATOR_CONTINUE
:
1654 case CHM_ENUMERATOR_SUCCESS
:
1655 HeapFree(GetProcessHeap(), 0, page_buf
);
1663 /* advance to next page */
1664 curPage
= header
.block_next
;
1667 HeapFree(GetProcessHeap(), 0, page_buf
);