1 /***************************************************************************
2 * chm_lib.c - CHM archive manipulation routines *
3 * ------------------- *
5 * author: Jed Wing <jedwin@ugcs.caltech.edu> *
7 * notes: These routines are meant for the manipulation of microsoft *
8 * .chm (compiled html help) files, but may likely be used *
9 * for the manipulation of any ITSS archive, if ever ITSS *
10 * archives are used for any other purpose. *
12 * Note also that the section names are statically handled. *
13 * To be entirely correct, the section names should be read *
14 * from the section names meta-file, and then the various *
15 * content sections and the "transforms" to apply to the data *
16 * they contain should be inferred from the section name and *
17 * the meta-files referenced using that name; however, all of *
18 * the files I've been able to get my hands on appear to have *
19 * only two sections: Uncompressed and MSCompressed. *
20 * Additionally, the ITSS.DLL file included with Windows does *
21 * not appear to handle any different transforms than the *
22 * simple LZX-transform. Furthermore, the list of transforms *
23 * to apply is broken, in that only half the required space *
24 * is allocated for the list. (It appears as though the *
25 * space is allocated for ASCII strings, but the strings are *
26 * written as unicode. As a result, only the first half of *
27 * the string appears.) So this is probably not too big of *
28 * a deal, at least until CHM v4 (MS .lit files), which also *
29 * incorporate encryption, of some description. *
31 ***************************************************************************/
33 /***************************************************************************
35 * This library is free software; you can redistribute it and/or
36 * modify it under the terms of the GNU Lesser General Public
37 * License as published by the Free Software Foundation; either
38 * version 2.1 of the License, or (at your option) any later version.
40 * This library is distributed in the hope that it will be useful,
41 * but WITHOUT ANY WARRANTY; without even the implied warranty of
42 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
43 * Lesser General Public License for more details.
45 * You should have received a copy of the GNU Lesser General Public
46 * License along with this library; if not, write to the Free Software
47 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
49 ***************************************************************************/
51 /***************************************************************************
53 * Adapted for Wine by Mike McCormack *
55 ***************************************************************************/
58 #include "wine/port.h"
67 #include "wine/unicode.h"
72 #define CHM_ACQUIRE_LOCK(a) do { \
73 EnterCriticalSection(&(a)); \
75 #define CHM_RELEASE_LOCK(a) do { \
76 LeaveCriticalSection(&(a)); \
79 #define CHM_NULL_FD (INVALID_HANDLE_VALUE)
80 #define CHM_CLOSE_FILE(fd) CloseHandle((fd))
83 * defines related to tuning
85 #ifndef CHM_MAX_BLOCKS_CACHED
86 #define CHM_MAX_BLOCKS_CACHED 5
88 #define CHM_PARAM_MAX_BLOCKS_CACHED 0
91 * architecture specific defines
93 * Note: as soon as C99 is more widespread, the below defines should
94 * probably just use the C99 sized-int types.
96 * The following settings will probably work for many platforms. The sizes
97 * don't have to be exactly correct, but the types must accommodate at least as
98 * many bits as they specify.
101 /* i386, 32-bit, Windows */
104 typedef USHORT UInt16
;
106 typedef DWORD UInt32
;
107 typedef LONGLONG Int64
;
108 typedef ULONGLONG UInt64
;
110 /* utilities for unmarshalling data */
111 static int _unmarshal_char_array(unsigned char **pData
,
112 unsigned int *pLenRemain
,
116 if (count
<= 0 || (unsigned int)count
> *pLenRemain
)
118 memcpy(dest
, (*pData
), count
);
120 *pLenRemain
-= count
;
124 static int _unmarshal_uchar_array(unsigned char **pData
,
125 unsigned int *pLenRemain
,
129 if (count
<= 0 || (unsigned int)count
> *pLenRemain
)
131 memcpy(dest
, (*pData
), count
);
133 *pLenRemain
-= count
;
137 static int _unmarshal_int32(unsigned char **pData
,
138 unsigned int *pLenRemain
,
143 *dest
= (*pData
)[0] | (*pData
)[1]<<8 | (*pData
)[2]<<16 | (*pData
)[3]<<24;
149 static int _unmarshal_uint32(unsigned char **pData
,
150 unsigned int *pLenRemain
,
155 *dest
= (*pData
)[0] | (*pData
)[1]<<8 | (*pData
)[2]<<16 | (*pData
)[3]<<24;
161 static int _unmarshal_int64(unsigned char **pData
,
162 unsigned int *pLenRemain
,
173 temp
|= (*pData
)[i
-1];
181 static int _unmarshal_uint64(unsigned char **pData
,
182 unsigned int *pLenRemain
,
193 temp
|= (*pData
)[i
-1];
201 static int _unmarshal_uuid(unsigned char **pData
,
202 unsigned int *pDataLen
,
205 return _unmarshal_uchar_array(pData
, pDataLen
, dest
, 16);
208 /* names of sections essential to decompression */
209 static const WCHAR _CHMU_RESET_TABLE
[] = {
210 ':',':','D','a','t','a','S','p','a','c','e','/',
211 'S','t','o','r','a','g','e','/',
212 'M','S','C','o','m','p','r','e','s','s','e','d','/',
213 'T','r','a','n','s','f','o','r','m','/',
214 '{','7','F','C','2','8','9','4','0','-','9','D','3','1',
215 '-','1','1','D','0','-','9','B','2','7','-',
216 '0','0','A','0','C','9','1','E','9','C','7','C','}','/',
217 'I','n','s','t','a','n','c','e','D','a','t','a','/',
218 'R','e','s','e','t','T','a','b','l','e',0
220 static const WCHAR _CHMU_LZXC_CONTROLDATA
[] = {
221 ':',':','D','a','t','a','S','p','a','c','e','/',
222 'S','t','o','r','a','g','e','/',
223 'M','S','C','o','m','p','r','e','s','s','e','d','/',
224 'C','o','n','t','r','o','l','D','a','t','a',0
226 static const WCHAR _CHMU_CONTENT
[] = {
227 ':',':','D','a','t','a','S','p','a','c','e','/',
228 'S','t','o','r','a','g','e','/',
229 'M','S','C','o','m','p','r','e','s','s','e','d','/',
230 'C','o','n','t','e','n','t',0
234 * structures local to this module
237 /* structure of ITSF headers */
238 #define _CHM_ITSF_V2_LEN (0x58)
239 #define _CHM_ITSF_V3_LEN (0x60)
242 char signature
[4]; /* 0 (ITSF) */
243 Int32 version
; /* 4 */
244 Int32 header_len
; /* 8 */
245 Int32 unknown_000c
; /* c */
246 UInt32 last_modified
; /* 10 */
247 UInt32 lang_id
; /* 14 */
248 UChar dir_uuid
[16]; /* 18 */
249 UChar stream_uuid
[16]; /* 28 */
250 UInt64 unknown_offset
; /* 38 */
251 UInt64 unknown_len
; /* 40 */
252 UInt64 dir_offset
; /* 48 */
253 UInt64 dir_len
; /* 50 */
254 UInt64 data_offset
; /* 58 (Not present before V3) */
255 }; /* __attribute__ ((aligned (1))); */
257 static int _unmarshal_itsf_header(unsigned char **pData
,
258 unsigned int *pDataLen
,
259 struct chmItsfHeader
*dest
)
261 /* we only know how to deal with the 0x58 and 0x60 byte structures */
262 if (*pDataLen
!= _CHM_ITSF_V2_LEN
&& *pDataLen
!= _CHM_ITSF_V3_LEN
)
265 /* unmarshal common fields */
266 _unmarshal_char_array(pData
, pDataLen
, dest
->signature
, 4);
267 _unmarshal_int32 (pData
, pDataLen
, &dest
->version
);
268 _unmarshal_int32 (pData
, pDataLen
, &dest
->header_len
);
269 _unmarshal_int32 (pData
, pDataLen
, &dest
->unknown_000c
);
270 _unmarshal_uint32 (pData
, pDataLen
, &dest
->last_modified
);
271 _unmarshal_uint32 (pData
, pDataLen
, &dest
->lang_id
);
272 _unmarshal_uuid (pData
, pDataLen
, dest
->dir_uuid
);
273 _unmarshal_uuid (pData
, pDataLen
, dest
->stream_uuid
);
274 _unmarshal_uint64 (pData
, pDataLen
, &dest
->unknown_offset
);
275 _unmarshal_uint64 (pData
, pDataLen
, &dest
->unknown_len
);
276 _unmarshal_uint64 (pData
, pDataLen
, &dest
->dir_offset
);
277 _unmarshal_uint64 (pData
, pDataLen
, &dest
->dir_len
);
279 /* error check the data */
280 /* XXX: should also check UUIDs, probably, though with a version 3 file,
281 * current MS tools do not seem to use them.
283 if (memcmp(dest
->signature
, "ITSF", 4) != 0)
285 if (dest
->version
== 2)
287 if (dest
->header_len
< _CHM_ITSF_V2_LEN
)
290 else if (dest
->version
== 3)
292 if (dest
->header_len
< _CHM_ITSF_V3_LEN
)
298 /* now, if we have a V3 structure, unmarshal the rest.
299 * otherwise, compute it
301 if (dest
->version
== 3)
304 _unmarshal_uint64(pData
, pDataLen
, &dest
->data_offset
);
309 dest
->data_offset
= dest
->dir_offset
+ dest
->dir_len
;
314 /* structure of ITSP headers */
315 #define _CHM_ITSP_V1_LEN (0x54)
318 char signature
[4]; /* 0 (ITSP) */
319 Int32 version
; /* 4 */
320 Int32 header_len
; /* 8 */
321 Int32 unknown_000c
; /* c */
322 UInt32 block_len
; /* 10 */
323 Int32 blockidx_intvl
; /* 14 */
324 Int32 index_depth
; /* 18 */
325 Int32 index_root
; /* 1c */
326 Int32 index_head
; /* 20 */
327 Int32 unknown_0024
; /* 24 */
328 UInt32 num_blocks
; /* 28 */
329 Int32 unknown_002c
; /* 2c */
330 UInt32 lang_id
; /* 30 */
331 UChar system_uuid
[16]; /* 34 */
332 UChar unknown_0044
[16]; /* 44 */
333 }; /* __attribute__ ((aligned (1))); */
335 static int _unmarshal_itsp_header(unsigned char **pData
,
336 unsigned int *pDataLen
,
337 struct chmItspHeader
*dest
)
339 /* we only know how to deal with a 0x54 byte structures */
340 if (*pDataLen
!= _CHM_ITSP_V1_LEN
)
343 /* unmarshal fields */
344 _unmarshal_char_array(pData
, pDataLen
, dest
->signature
, 4);
345 _unmarshal_int32 (pData
, pDataLen
, &dest
->version
);
346 _unmarshal_int32 (pData
, pDataLen
, &dest
->header_len
);
347 _unmarshal_int32 (pData
, pDataLen
, &dest
->unknown_000c
);
348 _unmarshal_uint32 (pData
, pDataLen
, &dest
->block_len
);
349 _unmarshal_int32 (pData
, pDataLen
, &dest
->blockidx_intvl
);
350 _unmarshal_int32 (pData
, pDataLen
, &dest
->index_depth
);
351 _unmarshal_int32 (pData
, pDataLen
, &dest
->index_root
);
352 _unmarshal_int32 (pData
, pDataLen
, &dest
->index_head
);
353 _unmarshal_int32 (pData
, pDataLen
, &dest
->unknown_0024
);
354 _unmarshal_uint32 (pData
, pDataLen
, &dest
->num_blocks
);
355 _unmarshal_int32 (pData
, pDataLen
, &dest
->unknown_002c
);
356 _unmarshal_uint32 (pData
, pDataLen
, &dest
->lang_id
);
357 _unmarshal_uuid (pData
, pDataLen
, dest
->system_uuid
);
358 _unmarshal_uchar_array(pData
, pDataLen
, dest
->unknown_0044
, 16);
360 /* error check the data */
361 if (memcmp(dest
->signature
, "ITSP", 4) != 0)
363 if (dest
->version
!= 1)
365 if (dest
->header_len
!= _CHM_ITSP_V1_LEN
)
371 /* structure of PMGL headers */
372 static const char _chm_pmgl_marker
[4] = "PMGL";
373 #define _CHM_PMGL_LEN (0x14)
376 char signature
[4]; /* 0 (PMGL) */
377 UInt32 free_space
; /* 4 */
378 UInt32 unknown_0008
; /* 8 */
379 Int32 block_prev
; /* c */
380 Int32 block_next
; /* 10 */
381 }; /* __attribute__ ((aligned (1))); */
383 static int _unmarshal_pmgl_header(unsigned char **pData
,
384 unsigned int *pDataLen
,
385 struct chmPmglHeader
*dest
)
387 /* we only know how to deal with a 0x14 byte structures */
388 if (*pDataLen
!= _CHM_PMGL_LEN
)
391 /* unmarshal fields */
392 _unmarshal_char_array(pData
, pDataLen
, dest
->signature
, 4);
393 _unmarshal_uint32 (pData
, pDataLen
, &dest
->free_space
);
394 _unmarshal_uint32 (pData
, pDataLen
, &dest
->unknown_0008
);
395 _unmarshal_int32 (pData
, pDataLen
, &dest
->block_prev
);
396 _unmarshal_int32 (pData
, pDataLen
, &dest
->block_next
);
398 /* check structure */
399 if (memcmp(dest
->signature
, _chm_pmgl_marker
, 4) != 0)
405 /* structure of PMGI headers */
406 static const char _chm_pmgi_marker
[4] = "PMGI";
407 #define _CHM_PMGI_LEN (0x08)
410 char signature
[4]; /* 0 (PMGI) */
411 UInt32 free_space
; /* 4 */
412 }; /* __attribute__ ((aligned (1))); */
414 static int _unmarshal_pmgi_header(unsigned char **pData
,
415 unsigned int *pDataLen
,
416 struct chmPmgiHeader
*dest
)
418 /* we only know how to deal with a 0x8 byte structures */
419 if (*pDataLen
!= _CHM_PMGI_LEN
)
422 /* unmarshal fields */
423 _unmarshal_char_array(pData
, pDataLen
, dest
->signature
, 4);
424 _unmarshal_uint32 (pData
, pDataLen
, &dest
->free_space
);
426 /* check structure */
427 if (memcmp(dest
->signature
, _chm_pmgi_marker
, 4) != 0)
433 /* structure of LZXC reset table */
434 #define _CHM_LZXC_RESETTABLE_V1_LEN (0x28)
435 struct chmLzxcResetTable
441 UInt64 uncompressed_len
;
442 UInt64 compressed_len
;
444 }; /* __attribute__ ((aligned (1))); */
446 static int _unmarshal_lzxc_reset_table(unsigned char **pData
,
447 unsigned int *pDataLen
,
448 struct chmLzxcResetTable
*dest
)
450 /* we only know how to deal with a 0x28 byte structures */
451 if (*pDataLen
!= _CHM_LZXC_RESETTABLE_V1_LEN
)
454 /* unmarshal fields */
455 _unmarshal_uint32 (pData
, pDataLen
, &dest
->version
);
456 _unmarshal_uint32 (pData
, pDataLen
, &dest
->block_count
);
457 _unmarshal_uint32 (pData
, pDataLen
, &dest
->unknown
);
458 _unmarshal_uint32 (pData
, pDataLen
, &dest
->table_offset
);
459 _unmarshal_uint64 (pData
, pDataLen
, &dest
->uncompressed_len
);
460 _unmarshal_uint64 (pData
, pDataLen
, &dest
->compressed_len
);
461 _unmarshal_uint64 (pData
, pDataLen
, &dest
->block_len
);
463 /* check structure */
464 if (dest
->version
!= 2)
470 /* structure of LZXC control data block */
471 #define _CHM_LZXC_MIN_LEN (0x18)
472 #define _CHM_LZXC_V2_LEN (0x1c)
473 struct chmLzxcControlData
476 char signature
[4]; /* 4 (LZXC) */
477 UInt32 version
; /* 8 */
478 UInt32 resetInterval
; /* c */
479 UInt32 windowSize
; /* 10 */
480 UInt32 windowsPerReset
; /* 14 */
481 UInt32 unknown_18
; /* 18 */
484 static int _unmarshal_lzxc_control_data(unsigned char **pData
,
485 unsigned int *pDataLen
,
486 struct chmLzxcControlData
*dest
)
488 /* we want at least 0x18 bytes */
489 if (*pDataLen
< _CHM_LZXC_MIN_LEN
)
492 /* unmarshal fields */
493 _unmarshal_uint32 (pData
, pDataLen
, &dest
->size
);
494 _unmarshal_char_array(pData
, pDataLen
, dest
->signature
, 4);
495 _unmarshal_uint32 (pData
, pDataLen
, &dest
->version
);
496 _unmarshal_uint32 (pData
, pDataLen
, &dest
->resetInterval
);
497 _unmarshal_uint32 (pData
, pDataLen
, &dest
->windowSize
);
498 _unmarshal_uint32 (pData
, pDataLen
, &dest
->windowsPerReset
);
500 if (*pDataLen
>= _CHM_LZXC_V2_LEN
)
501 _unmarshal_uint32 (pData
, pDataLen
, &dest
->unknown_18
);
503 dest
->unknown_18
= 0;
505 if (dest
->version
== 2)
507 dest
->resetInterval
*= 0x8000;
508 dest
->windowSize
*= 0x8000;
510 if (dest
->windowSize
== 0 || dest
->resetInterval
== 0)
513 /* for now, only support resetInterval a multiple of windowSize/2 */
514 if (dest
->windowSize
== 1)
516 if ((dest
->resetInterval
% (dest
->windowSize
/2)) != 0)
519 /* check structure */
520 if (memcmp(dest
->signature
, "LZXC", 4) != 0)
526 /* the structure used for chm file handles */
531 CRITICAL_SECTION mutex
;
532 CRITICAL_SECTION lzx_mutex
;
533 CRITICAL_SECTION cache_mutex
;
543 struct chmUnitInfo rt_unit
;
544 struct chmUnitInfo cn_unit
;
545 struct chmLzxcResetTable reset_table
;
547 /* LZX control data */
548 int compression_enabled
;
550 UInt32 reset_interval
;
551 UInt32 reset_blkcount
;
553 /* decompressor state */
554 struct LZXstate
*lzx_state
;
557 /* cache for decompressed blocks */
558 UChar
**cache_blocks
;
559 Int64
*cache_block_indices
;
560 Int32 cache_num_blocks
;
564 * utility functions local to this module
567 /* utility function to handle differences between {pread,read}(64)? */
568 static Int64
_chm_fetch_bytes(struct chmFile
*h
,
574 if (h
->fd
== CHM_NULL_FD
)
577 CHM_ACQUIRE_LOCK(h
->mutex
);
578 /* NOTE: this might be better done with CreateFileMapping, et cetera... */
580 LARGE_INTEGER old_pos
, new_pos
;
583 /* awkward Win32 Seek/Tell */
584 new_pos
.QuadPart
= 0;
585 SetFilePointerEx( h
->fd
, new_pos
, &old_pos
, FILE_CURRENT
);
586 new_pos
.QuadPart
= os
;
587 SetFilePointerEx( h
->fd
, new_pos
, NULL
, FILE_BEGIN
);
599 /* restore original position */
600 SetFilePointerEx( h
->fd
, old_pos
, NULL
, FILE_BEGIN
);
602 CHM_RELEASE_LOCK(h
->mutex
);
607 * set a parameter on the file handle.
608 * valid parameter types:
609 * CHM_PARAM_MAX_BLOCKS_CACHED:
610 * how many decompressed blocks should be cached? A simple
611 * caching scheme is used, wherein the index of the block is
612 * used as a hash value, and hash collision results in the
613 * invalidation of the previously cached block.
615 static void chm_set_param(struct chmFile
*h
,
621 case CHM_PARAM_MAX_BLOCKS_CACHED
:
622 CHM_ACQUIRE_LOCK(h
->cache_mutex
);
623 if (paramVal
!= h
->cache_num_blocks
)
629 /* allocate new cached blocks */
630 newBlocks
= HeapAlloc(GetProcessHeap(), 0, paramVal
* sizeof (UChar
*));
631 newIndices
= HeapAlloc(GetProcessHeap(), 0, paramVal
* sizeof (UInt64
));
632 for (i
=0; i
<paramVal
; i
++)
638 /* re-distribute old cached blocks */
641 for (i
=0; i
<h
->cache_num_blocks
; i
++)
643 int newSlot
= (int)(h
->cache_block_indices
[i
] % paramVal
);
645 if (h
->cache_blocks
[i
])
647 /* in case of collision, destroy newcomer */
648 if (newBlocks
[newSlot
])
650 HeapFree(GetProcessHeap(), 0, h
->cache_blocks
[i
]);
651 h
->cache_blocks
[i
] = NULL
;
655 newBlocks
[newSlot
] = h
->cache_blocks
[i
];
656 newIndices
[newSlot
] =
657 h
->cache_block_indices
[i
];
662 HeapFree(GetProcessHeap(), 0, h
->cache_blocks
);
663 HeapFree(GetProcessHeap(), 0, h
->cache_block_indices
);
666 /* now, set new values */
667 h
->cache_blocks
= newBlocks
;
668 h
->cache_block_indices
= newIndices
;
669 h
->cache_num_blocks
= paramVal
;
671 CHM_RELEASE_LOCK(h
->cache_mutex
);
679 /* open an ITS archive */
680 struct chmFile
*chm_openW(const WCHAR
*filename
)
682 unsigned char sbuffer
[256];
683 unsigned int sremain
;
684 unsigned char *sbufpos
;
685 struct chmFile
*newHandle
=NULL
;
686 struct chmItsfHeader itsfHeader
;
687 struct chmItspHeader itspHeader
;
689 struct chmUnitInfo uiSpan
;
691 struct chmUnitInfo uiLzxc
;
692 struct chmLzxcControlData ctlData
;
694 /* allocate handle */
695 newHandle
= HeapAlloc(GetProcessHeap(), 0, sizeof(struct chmFile
));
696 newHandle
->fd
= CHM_NULL_FD
;
697 newHandle
->lzx_state
= NULL
;
698 newHandle
->cache_blocks
= NULL
;
699 newHandle
->cache_block_indices
= NULL
;
700 newHandle
->cache_num_blocks
= 0;
703 if ((newHandle
->fd
=CreateFileW(filename
,
708 FILE_ATTRIBUTE_NORMAL
,
709 NULL
)) == CHM_NULL_FD
)
711 HeapFree(GetProcessHeap(), 0, newHandle
);
715 /* initialize mutexes, if needed */
716 InitializeCriticalSection(&newHandle
->mutex
);
717 newHandle
->mutex
.DebugInfo
->Spare
[0] = (DWORD_PTR
)(__FILE__
": chmFile.mutex");
718 InitializeCriticalSection(&newHandle
->lzx_mutex
);
719 newHandle
->lzx_mutex
.DebugInfo
->Spare
[0] = (DWORD_PTR
)(__FILE__
": chmFile.lzx_mutex");
720 InitializeCriticalSection(&newHandle
->cache_mutex
);
721 newHandle
->cache_mutex
.DebugInfo
->Spare
[0] = (DWORD_PTR
)(__FILE__
": chmFile.cache_mutex");
723 /* read and verify header */
724 sremain
= _CHM_ITSF_V3_LEN
;
726 if (_chm_fetch_bytes(newHandle
, sbuffer
, (UInt64
)0, sremain
) != sremain
||
727 !_unmarshal_itsf_header(&sbufpos
, &sremain
, &itsfHeader
))
729 chm_close(newHandle
);
733 /* stash important values from header */
734 newHandle
->dir_offset
= itsfHeader
.dir_offset
;
735 newHandle
->dir_len
= itsfHeader
.dir_len
;
736 newHandle
->data_offset
= itsfHeader
.data_offset
;
738 /* now, read and verify the directory header chunk */
739 sremain
= _CHM_ITSP_V1_LEN
;
741 if (_chm_fetch_bytes(newHandle
, sbuffer
,
742 itsfHeader
.dir_offset
, sremain
) != sremain
||
743 !_unmarshal_itsp_header(&sbufpos
, &sremain
, &itspHeader
))
745 chm_close(newHandle
);
749 /* grab essential information from ITSP header */
750 newHandle
->dir_offset
+= itspHeader
.header_len
;
751 newHandle
->dir_len
-= itspHeader
.header_len
;
752 newHandle
->index_root
= itspHeader
.index_root
;
753 newHandle
->index_head
= itspHeader
.index_head
;
754 newHandle
->block_len
= itspHeader
.block_len
;
756 /* if the index root is -1, this means we don't have any PMGI blocks.
757 * as a result, we must use the sole PMGL block as the index root
759 if (newHandle
->index_root
== -1)
760 newHandle
->index_root
= newHandle
->index_head
;
762 /* By default, compression is enabled. */
763 newHandle
->compression_enabled
= 1;
765 /* prefetch most commonly needed unit infos */
766 if (CHM_RESOLVE_SUCCESS
!= chm_resolve_object(newHandle
,
768 &newHandle
->rt_unit
) ||
769 newHandle
->rt_unit
.space
== CHM_COMPRESSED
||
770 CHM_RESOLVE_SUCCESS
!= chm_resolve_object(newHandle
,
772 &newHandle
->cn_unit
) ||
773 newHandle
->cn_unit
.space
== CHM_COMPRESSED
||
774 CHM_RESOLVE_SUCCESS
!= chm_resolve_object(newHandle
,
775 _CHMU_LZXC_CONTROLDATA
,
777 uiLzxc
.space
== CHM_COMPRESSED
)
779 newHandle
->compression_enabled
= 0;
782 /* read reset table info */
783 if (newHandle
->compression_enabled
)
785 sremain
= _CHM_LZXC_RESETTABLE_V1_LEN
;
787 if (chm_retrieve_object(newHandle
, &newHandle
->rt_unit
, sbuffer
,
788 0, sremain
) != sremain
||
789 !_unmarshal_lzxc_reset_table(&sbufpos
, &sremain
,
790 &newHandle
->reset_table
))
792 newHandle
->compression_enabled
= 0;
796 /* read control data */
797 if (newHandle
->compression_enabled
)
799 sremain
= (unsigned long)uiLzxc
.length
;
801 if (chm_retrieve_object(newHandle
, &uiLzxc
, sbuffer
,
802 0, sremain
) != sremain
||
803 !_unmarshal_lzxc_control_data(&sbufpos
, &sremain
,
806 newHandle
->compression_enabled
= 0;
809 newHandle
->window_size
= ctlData
.windowSize
;
810 newHandle
->reset_interval
= ctlData
.resetInterval
;
812 /* Jed, Mon Jun 28: Experimentally, it appears that the reset block count */
813 /* must be multiplied by this formerly unknown ctrl data field in */
814 /* order to decompress some files. */
816 newHandle
->reset_blkcount
= newHandle
->reset_interval
/
817 (newHandle
->window_size
/ 2);
819 newHandle
->reset_blkcount
= newHandle
->reset_interval
/
820 (newHandle
->window_size
/ 2) *
821 ctlData
.windowsPerReset
;
825 /* initialize cache */
826 chm_set_param(newHandle
, CHM_PARAM_MAX_BLOCKS_CACHED
,
827 CHM_MAX_BLOCKS_CACHED
);
832 /* close an ITS archive */
833 void chm_close(struct chmFile
*h
)
837 if (h
->fd
!= CHM_NULL_FD
)
838 CHM_CLOSE_FILE(h
->fd
);
841 h
->mutex
.DebugInfo
->Spare
[0] = 0;
842 DeleteCriticalSection(&h
->mutex
);
843 h
->lzx_mutex
.DebugInfo
->Spare
[0] = 0;
844 DeleteCriticalSection(&h
->lzx_mutex
);
845 h
->cache_mutex
.DebugInfo
->Spare
[0] = 0;
846 DeleteCriticalSection(&h
->cache_mutex
);
849 LZXteardown(h
->lzx_state
);
855 for (i
=0; i
<h
->cache_num_blocks
; i
++)
857 HeapFree(GetProcessHeap(), 0, h
->cache_blocks
[i
]);
859 HeapFree(GetProcessHeap(), 0, h
->cache_blocks
);
860 h
->cache_blocks
= NULL
;
863 HeapFree(GetProcessHeap(), 0, h
->cache_block_indices
);
864 h
->cache_block_indices
= NULL
;
866 HeapFree(GetProcessHeap(), 0, h
);
871 * helper methods for chm_resolve_object
874 /* skip a compressed dword */
875 static void _chm_skip_cword(UChar
**pEntry
)
877 while (*(*pEntry
)++ >= 0x80)
881 /* skip the data from a PMGL entry */
882 static void _chm_skip_PMGL_entry_data(UChar
**pEntry
)
884 _chm_skip_cword(pEntry
);
885 _chm_skip_cword(pEntry
);
886 _chm_skip_cword(pEntry
);
889 /* parse a compressed dword */
890 static UInt64
_chm_parse_cword(UChar
**pEntry
)
894 while ((temp
=*(*pEntry
)++) >= 0x80)
897 accum
+= temp
& 0x7f;
900 return (accum
<< 7) + temp
;
903 /* parse a utf-8 string into an ASCII char buffer */
904 static int _chm_parse_UTF8(UChar
**pEntry
, UInt64 count
, WCHAR
*path
)
906 /* MJM - Modified to return real Unicode strings */
909 *path
++ = (*(*pEntry
)++);
917 /* parse a PMGL entry into a chmUnitInfo struct; return 1 on success. */
918 static int _chm_parse_PMGL_entry(UChar
**pEntry
, struct chmUnitInfo
*ui
)
923 strLen
= _chm_parse_cword(pEntry
);
924 if (strLen
> CHM_MAX_PATHLEN
)
928 if (! _chm_parse_UTF8(pEntry
, strLen
, ui
->path
))
932 ui
->space
= (int)_chm_parse_cword(pEntry
);
933 ui
->start
= _chm_parse_cword(pEntry
);
934 ui
->length
= _chm_parse_cword(pEntry
);
938 /* find an exact entry in PMGL; return NULL if we fail */
939 static UChar
*_chm_find_in_PMGL(UChar
*page_buf
,
941 const WCHAR
*objPath
)
943 /* XXX: modify this to do a binary search using the nice index structure
944 * that is provided for us.
946 struct chmPmglHeader header
;
952 WCHAR buffer
[CHM_MAX_PATHLEN
+1];
954 /* figure out where to start and end */
956 hremain
= _CHM_PMGL_LEN
;
957 if (! _unmarshal_pmgl_header(&cur
, &hremain
, &header
))
959 end
= page_buf
+ block_len
- (header
.free_space
);
961 /* now, scan progressively */
966 strLen
= _chm_parse_cword(&cur
);
967 if (! _chm_parse_UTF8(&cur
, strLen
, buffer
))
970 /* check if it is the right name */
971 if (! strcmpiW(buffer
, objPath
))
974 _chm_skip_PMGL_entry_data(&cur
);
980 /* find which block should be searched next for the entry; -1 if no block */
981 static Int32
_chm_find_in_PMGI(UChar
*page_buf
,
983 const WCHAR
*objPath
)
985 /* XXX: modify this to do a binary search using the nice index structure
986 * that is provided for us
988 struct chmPmgiHeader header
;
994 WCHAR buffer
[CHM_MAX_PATHLEN
+1];
996 /* figure out where to start and end */
998 hremain
= _CHM_PMGI_LEN
;
999 if (! _unmarshal_pmgi_header(&cur
, &hremain
, &header
))
1001 end
= page_buf
+ block_len
- (header
.free_space
);
1003 /* now, scan progressively */
1007 strLen
= _chm_parse_cword(&cur
);
1008 if (! _chm_parse_UTF8(&cur
, strLen
, buffer
))
1011 /* check if it is the right name */
1012 if (strcmpiW(buffer
, objPath
) > 0)
1015 /* load next value for path */
1016 page
= (int)_chm_parse_cword(&cur
);
1022 /* resolve a particular object from the archive */
1023 int chm_resolve_object(struct chmFile
*h
,
1024 const WCHAR
*objPath
,
1025 struct chmUnitInfo
*ui
)
1028 * XXX: implement caching scheme for dir pages
1033 /* buffer to hold whatever page we're looking at */
1034 UChar
*page_buf
= HeapAlloc(GetProcessHeap(), 0, h
->block_len
);
1037 curPage
= h
->index_root
;
1039 /* until we have either returned or given up */
1040 while (curPage
!= -1)
1043 /* try to fetch the index page */
1044 if (_chm_fetch_bytes(h
, page_buf
,
1045 h
->dir_offset
+ (UInt64
)curPage
*h
->block_len
,
1046 h
->block_len
) != h
->block_len
)
1048 HeapFree(GetProcessHeap(), 0, page_buf
);
1049 return CHM_RESOLVE_FAILURE
;
1052 /* now, if it is a leaf node: */
1053 if (memcmp(page_buf
, _chm_pmgl_marker
, 4) == 0)
1056 UChar
*pEntry
= _chm_find_in_PMGL(page_buf
,
1061 HeapFree(GetProcessHeap(), 0, page_buf
);
1062 return CHM_RESOLVE_FAILURE
;
1065 /* parse entry and return */
1066 _chm_parse_PMGL_entry(&pEntry
, ui
);
1067 HeapFree(GetProcessHeap(), 0, page_buf
);
1068 return CHM_RESOLVE_SUCCESS
;
1071 /* else, if it is a branch node: */
1072 else if (memcmp(page_buf
, _chm_pmgi_marker
, 4) == 0)
1073 curPage
= _chm_find_in_PMGI(page_buf
, h
->block_len
, objPath
);
1075 /* else, we are confused. give up. */
1078 HeapFree(GetProcessHeap(), 0, page_buf
);
1079 return CHM_RESOLVE_FAILURE
;
1083 /* didn't find anything. fail. */
1084 HeapFree(GetProcessHeap(), 0, page_buf
);
1085 return CHM_RESOLVE_FAILURE
;
1089 * utility methods for dealing with compressed data
1092 /* get the bounds of a compressed block. return 0 on failure */
1093 static int _chm_get_cmpblock_bounds(struct chmFile
*h
,
1098 UChar buffer
[8], *dummy
;
1101 /* for all but the last block, use the reset table */
1102 if (block
< h
->reset_table
.block_count
-1)
1104 /* unpack the start address */
1107 if (_chm_fetch_bytes(h
, buffer
,
1110 + h
->reset_table
.table_offset
1112 remain
) != remain
||
1113 !_unmarshal_uint64(&dummy
, &remain
, start
))
1116 /* unpack the end address */
1119 if (_chm_fetch_bytes(h
, buffer
,
1122 + h
->reset_table
.table_offset
1124 remain
) != remain
||
1125 !_unmarshal_int64(&dummy
, &remain
, len
))
1129 /* for the last block, use the span in addition to the reset table */
1132 /* unpack the start address */
1135 if (_chm_fetch_bytes(h
, buffer
,
1138 + h
->reset_table
.table_offset
1140 remain
) != remain
||
1141 !_unmarshal_uint64(&dummy
, &remain
, start
))
1144 *len
= h
->reset_table
.compressed_len
;
1147 /* compute the length and absolute start address */
1149 *start
+= h
->data_offset
+ h
->cn_unit
.start
;
1154 /* decompress the block. must have lzx_mutex. */
1155 static Int64
_chm_decompress_block(struct chmFile
*h
,
1159 UChar
*cbuffer
= HeapAlloc( GetProcessHeap(), 0,
1160 ((unsigned int)h
->reset_table
.block_len
+ 6144));
1161 UInt64 cmpStart
; /* compressed start */
1162 Int64 cmpLen
; /* compressed len */
1163 int indexSlot
; /* cache index slot */
1164 UChar
*lbuffer
; /* local buffer ptr */
1165 UInt32 blockAlign
= (UInt32
)(block
% h
->reset_blkcount
); /* reset interval align */
1166 UInt32 i
; /* local loop index */
1168 /* let the caching system pull its weight! */
1169 if (block
- blockAlign
<= h
->lzx_last_block
&&
1170 block
>= h
->lzx_last_block
)
1171 blockAlign
= (block
- h
->lzx_last_block
);
1173 /* check if we need previous blocks */
1174 if (blockAlign
!= 0)
1176 /* fetch all required previous blocks since last reset */
1177 for (i
= blockAlign
; i
> 0; i
--)
1179 UInt32 curBlockIdx
= block
- i
;
1181 /* check if we most recently decompressed the previous block */
1182 if (h
->lzx_last_block
!= curBlockIdx
)
1184 if ((curBlockIdx
% h
->reset_blkcount
) == 0)
1187 fprintf(stderr
, "***RESET (1)***\n");
1189 LZXreset(h
->lzx_state
);
1192 indexSlot
= (int)((curBlockIdx
) % h
->cache_num_blocks
);
1193 h
->cache_block_indices
[indexSlot
] = curBlockIdx
;
1194 if (! h
->cache_blocks
[indexSlot
])
1195 h
->cache_blocks
[indexSlot
] =
1196 HeapAlloc(GetProcessHeap(), 0,
1197 (unsigned int)(h
->reset_table
.block_len
));
1198 lbuffer
= h
->cache_blocks
[indexSlot
];
1200 /* decompress the previous block */
1202 fprintf(stderr
, "Decompressing block #%4d (EXTRA)\n", curBlockIdx
);
1204 if (!_chm_get_cmpblock_bounds(h
, curBlockIdx
, &cmpStart
, &cmpLen
) ||
1205 _chm_fetch_bytes(h
, cbuffer
, cmpStart
, cmpLen
) != cmpLen
||
1206 LZXdecompress(h
->lzx_state
, cbuffer
, lbuffer
, (int)cmpLen
,
1207 (int)h
->reset_table
.block_len
) != DECR_OK
)
1210 fprintf(stderr
, " (DECOMPRESS FAILED!)\n");
1212 HeapFree(GetProcessHeap(), 0, cbuffer
);
1216 h
->lzx_last_block
= (int)curBlockIdx
;
1222 if ((block
% h
->reset_blkcount
) == 0)
1225 fprintf(stderr
, "***RESET (2)***\n");
1227 LZXreset(h
->lzx_state
);
1231 /* allocate slot in cache */
1232 indexSlot
= (int)(block
% h
->cache_num_blocks
);
1233 h
->cache_block_indices
[indexSlot
] = block
;
1234 if (! h
->cache_blocks
[indexSlot
])
1235 h
->cache_blocks
[indexSlot
] =
1236 HeapAlloc(GetProcessHeap(), 0, ((unsigned int)h
->reset_table
.block_len
));
1237 lbuffer
= h
->cache_blocks
[indexSlot
];
1240 /* decompress the block we actually want */
1242 fprintf(stderr
, "Decompressing block #%4d (REAL )\n", block
);
1244 if (! _chm_get_cmpblock_bounds(h
, block
, &cmpStart
, &cmpLen
) ||
1245 _chm_fetch_bytes(h
, cbuffer
, cmpStart
, cmpLen
) != cmpLen
||
1246 LZXdecompress(h
->lzx_state
, cbuffer
, lbuffer
, (int)cmpLen
,
1247 (int)h
->reset_table
.block_len
) != DECR_OK
)
1250 fprintf(stderr
, " (DECOMPRESS FAILED!)\n");
1252 HeapFree(GetProcessHeap(), 0, cbuffer
);
1255 h
->lzx_last_block
= (int)block
;
1257 /* XXX: modify LZX routines to return the length of the data they
1258 * decompressed and return that instead, for an extra sanity check.
1260 HeapFree(GetProcessHeap(), 0, cbuffer
);
1261 return h
->reset_table
.block_len
;
1264 /* grab a region from a compressed block */
1265 static Int64
_chm_decompress_region(struct chmFile
*h
,
1270 UInt64 nBlock
, nOffset
;
1273 UChar
*ubuffer
= NULL
;
1278 /* figure out what we need to read */
1279 nBlock
= start
/ h
->reset_table
.block_len
;
1280 nOffset
= start
% h
->reset_table
.block_len
;
1282 if (nLen
> (h
->reset_table
.block_len
- nOffset
))
1283 nLen
= h
->reset_table
.block_len
- nOffset
;
1285 /* if block is cached, return data from it. */
1286 CHM_ACQUIRE_LOCK(h
->lzx_mutex
);
1287 CHM_ACQUIRE_LOCK(h
->cache_mutex
);
1288 if (h
->cache_block_indices
[nBlock
% h
->cache_num_blocks
] == nBlock
&&
1289 h
->cache_blocks
[nBlock
% h
->cache_num_blocks
] != NULL
)
1292 h
->cache_blocks
[nBlock
% h
->cache_num_blocks
] + nOffset
,
1293 (unsigned int)nLen
);
1294 CHM_RELEASE_LOCK(h
->cache_mutex
);
1295 CHM_RELEASE_LOCK(h
->lzx_mutex
);
1298 CHM_RELEASE_LOCK(h
->cache_mutex
);
1300 /* data request not satisfied, so... start up the decompressor machine */
1303 int window_size
= ffs(h
->window_size
) - 1;
1304 h
->lzx_last_block
= -1;
1305 h
->lzx_state
= LZXinit(window_size
);
1308 /* decompress some data */
1309 gotLen
= _chm_decompress_block(h
, nBlock
, &ubuffer
);
1312 memcpy(buf
, ubuffer
+nOffset
, (unsigned int)nLen
);
1313 CHM_RELEASE_LOCK(h
->lzx_mutex
);
1317 /* retrieve (part of) an object */
1318 LONGINT64
chm_retrieve_object(struct chmFile
*h
,
1319 struct chmUnitInfo
*ui
,
1324 /* must be valid file handle */
1328 /* starting address must be in correct range */
1329 if (addr
>= ui
->length
)
1333 if (addr
+ len
> ui
->length
)
1334 len
= ui
->length
- addr
;
1336 /* if the file is uncompressed, it's simple */
1337 if (ui
->space
== CHM_UNCOMPRESSED
)
1340 return _chm_fetch_bytes(h
,
1342 h
->data_offset
+ ui
->start
+ addr
,
1346 /* else if the file is compressed, it's a little trickier */
1347 else /* ui->space == CHM_COMPRESSED */
1349 Int64 swath
=0, total
=0;
1351 /* if compression is not enabled for this file... */
1352 if (! h
->compression_enabled
)
1357 /* swill another mouthful */
1358 swath
= _chm_decompress_region(h
, buf
, ui
->start
+ addr
, len
);
1360 /* if we didn't get any... */
1376 /* enumerate the objects in the .chm archive */
1377 int chm_enumerate(struct chmFile
*h
,
1384 /* buffer to hold whatever page we're looking at */
1385 UChar
*page_buf
= HeapAlloc(GetProcessHeap(), 0, h
->block_len
);
1386 struct chmPmglHeader header
;
1389 unsigned int lenRemain
;
1392 /* the current ui */
1393 struct chmUnitInfo ui
;
1397 curPage
= h
->index_head
;
1399 /* until we have either returned or given up */
1400 while (curPage
!= -1)
1403 /* try to fetch the index page */
1404 if (_chm_fetch_bytes(h
,
1406 h
->dir_offset
+ (UInt64
)curPage
*h
->block_len
,
1407 h
->block_len
) != h
->block_len
)
1409 HeapFree(GetProcessHeap(), 0, page_buf
);
1413 /* figure out start and end for this page */
1415 lenRemain
= _CHM_PMGL_LEN
;
1416 if (! _unmarshal_pmgl_header(&cur
, &lenRemain
, &header
))
1418 HeapFree(GetProcessHeap(), 0, page_buf
);
1421 end
= page_buf
+ h
->block_len
- (header
.free_space
);
1423 /* loop over this page */
1426 if (! _chm_parse_PMGL_entry(&cur
, &ui
))
1428 HeapFree(GetProcessHeap(), 0, page_buf
);
1432 /* get the length of the path */
1433 ui_path_len
= strlenW(ui
.path
)-1;
1435 /* check for DIRS */
1436 if (ui
.path
[ui_path_len
] == '/' && !(what
& CHM_ENUMERATE_DIRS
))
1439 /* check for FILES */
1440 if (ui
.path
[ui_path_len
] != '/' && !(what
& CHM_ENUMERATE_FILES
))
1443 /* check for NORMAL vs. META */
1444 if (ui
.path
[0] == '/')
1447 /* check for NORMAL vs. SPECIAL */
1448 if (ui
.path
[1] == '#' || ui
.path
[1] == '$')
1449 flag
= CHM_ENUMERATE_SPECIAL
;
1451 flag
= CHM_ENUMERATE_NORMAL
;
1454 flag
= CHM_ENUMERATE_META
;
1455 if (! (what
& flag
))
1458 /* call the enumerator */
1460 int status
= (*e
)(h
, &ui
, context
);
1463 case CHM_ENUMERATOR_FAILURE
:
1464 HeapFree(GetProcessHeap(), 0, page_buf
);
1466 case CHM_ENUMERATOR_CONTINUE
:
1468 case CHM_ENUMERATOR_SUCCESS
:
1469 HeapFree(GetProcessHeap(), 0, page_buf
);
1477 /* advance to next page */
1478 curPage
= header
.block_next
;
1481 HeapFree(GetProcessHeap(), 0, page_buf
);
1485 int chm_enumerate_dir(struct chmFile
*h
,
1486 const WCHAR
*prefix
,
1492 * XXX: do this efficiently (i.e. using the tree index)
1497 /* buffer to hold whatever page we're looking at */
1498 UChar
*page_buf
= HeapAlloc(GetProcessHeap(), 0, h
->block_len
);
1499 struct chmPmglHeader header
;
1502 unsigned int lenRemain
;
1504 /* set to 1 once we've started */
1507 /* the current ui */
1508 struct chmUnitInfo ui
;
1512 /* the length of the prefix */
1513 WCHAR prefixRectified
[CHM_MAX_PATHLEN
+1];
1515 WCHAR lastPath
[CHM_MAX_PATHLEN
];
1519 curPage
= h
->index_head
;
1521 /* initialize pathname state */
1522 lstrcpynW(prefixRectified
, prefix
, CHM_MAX_PATHLEN
);
1523 prefixLen
= strlenW(prefixRectified
);
1526 if (prefixRectified
[prefixLen
-1] != '/')
1528 prefixRectified
[prefixLen
] = '/';
1529 prefixRectified
[prefixLen
+1] = '\0';
1536 /* until we have either returned or given up */
1537 while (curPage
!= -1)
1540 /* try to fetch the index page */
1541 if (_chm_fetch_bytes(h
,
1543 h
->dir_offset
+ (UInt64
)curPage
*h
->block_len
,
1544 h
->block_len
) != h
->block_len
)
1546 HeapFree(GetProcessHeap(), 0, page_buf
);
1550 /* figure out start and end for this page */
1552 lenRemain
= _CHM_PMGL_LEN
;
1553 if (! _unmarshal_pmgl_header(&cur
, &lenRemain
, &header
))
1555 HeapFree(GetProcessHeap(), 0, page_buf
);
1558 end
= page_buf
+ h
->block_len
- (header
.free_space
);
1560 /* loop over this page */
1563 if (! _chm_parse_PMGL_entry(&cur
, &ui
))
1565 HeapFree(GetProcessHeap(), 0, page_buf
);
1569 /* check if we should start */
1572 if (ui
.length
== 0 && strncmpiW(ui
.path
, prefixRectified
, prefixLen
) == 0)
1577 if (ui
.path
[prefixLen
] == '\0')
1581 /* check if we should stop */
1584 if (strncmpiW(ui
.path
, prefixRectified
, prefixLen
) != 0)
1586 HeapFree(GetProcessHeap(), 0, page_buf
);
1591 /* check if we should include this path */
1592 if (lastPathLen
!= -1)
1594 if (strncmpiW(ui
.path
, lastPath
, lastPathLen
) == 0)
1597 strcpyW(lastPath
, ui
.path
);
1598 lastPathLen
= strlenW(lastPath
);
1600 /* get the length of the path */
1601 ui_path_len
= strlenW(ui
.path
)-1;
1603 /* check for DIRS */
1604 if (ui
.path
[ui_path_len
] == '/' && !(what
& CHM_ENUMERATE_DIRS
))
1607 /* check for FILES */
1608 if (ui
.path
[ui_path_len
] != '/' && !(what
& CHM_ENUMERATE_FILES
))
1611 /* check for NORMAL vs. META */
1612 if (ui
.path
[0] == '/')
1615 /* check for NORMAL vs. SPECIAL */
1616 if (ui
.path
[1] == '#' || ui
.path
[1] == '$')
1617 flag
= CHM_ENUMERATE_SPECIAL
;
1619 flag
= CHM_ENUMERATE_NORMAL
;
1622 flag
= CHM_ENUMERATE_META
;
1623 if (! (what
& flag
))
1626 /* call the enumerator */
1628 int status
= (*e
)(h
, &ui
, context
);
1631 case CHM_ENUMERATOR_FAILURE
:
1632 HeapFree(GetProcessHeap(), 0, page_buf
);
1634 case CHM_ENUMERATOR_CONTINUE
:
1636 case CHM_ENUMERATOR_SUCCESS
:
1637 HeapFree(GetProcessHeap(), 0, page_buf
);
1645 /* advance to next page */
1646 curPage
= header
.block_next
;
1649 HeapFree(GetProcessHeap(), 0, page_buf
);