2 * Copyright (c) 2008 Christos Zoulas
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
15 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
16 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
18 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
19 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 * POSSIBILITY OF SUCH DAMAGE.
27 * Parse composite document files, the format used in Microsoft Office
28 * document files before they switched to zipped xml.
29 * Info from: http://sc.openoffice.org/compdocfileformat.pdf
35 FILE_RCSID("@(#)$File: cdf.c,v 1.30 2009/05/06 14:29:47 christos Exp $")
55 #define __arraycount(a) (sizeof(a) / sizeof(a[0]))
59 #define DPRINTF(a) printf a, fflush(stdout)
69 #define NEED_SWAP (cdf_bo.u == (uint32_t)0x01020304)
71 #define CDF_TOLE8(x) (NEED_SWAP ? cdf_tole8(x) : (uint64_t)(x))
72 #define CDF_TOLE4(x) (NEED_SWAP ? cdf_tole4(x) : (uint32_t)(x))
73 #define CDF_TOLE2(x) (NEED_SWAP ? cdf_tole2(x) : (uint16_t)(x))
79 cdf_tole2(uint16_t sv
)
82 uint8_t *s
= (uint8_t *)(void *)&sv
;
83 uint8_t *d
= (uint8_t *)(void *)&rv
;
93 cdf_tole4(uint32_t sv
)
96 uint8_t *s
= (uint8_t *)(void *)&sv
;
97 uint8_t *d
= (uint8_t *)(void *)&rv
;
109 cdf_tole8(uint64_t sv
)
112 uint8_t *s
= (uint8_t *)(void *)&sv
;
113 uint8_t *d
= (uint8_t *)(void *)&rv
;
125 #define CDF_UNPACK(a) \
126 (void)memcpy(&(a), &buf[len], sizeof(a)), len += sizeof(a)
127 #define CDF_UNPACKA(a) \
128 (void)memcpy((a), &buf[len], sizeof(a)), len += sizeof(a)
131 cdf_swap_header(cdf_header_t
*h
)
135 h
->h_magic
= CDF_TOLE8(h
->h_magic
);
136 h
->h_uuid
[0] = CDF_TOLE8(h
->h_uuid
[0]);
137 h
->h_uuid
[1] = CDF_TOLE8(h
->h_uuid
[1]);
138 h
->h_revision
= CDF_TOLE2(h
->h_revision
);
139 h
->h_version
= CDF_TOLE2(h
->h_version
);
140 h
->h_byte_order
= CDF_TOLE2(h
->h_byte_order
);
141 h
->h_sec_size_p2
= CDF_TOLE2(h
->h_sec_size_p2
);
142 h
->h_short_sec_size_p2
= CDF_TOLE2(h
->h_short_sec_size_p2
);
143 h
->h_num_sectors_in_sat
= CDF_TOLE4(h
->h_num_sectors_in_sat
);
144 h
->h_secid_first_directory
= CDF_TOLE4(h
->h_secid_first_directory
);
145 h
->h_min_size_standard_stream
=
146 CDF_TOLE4(h
->h_min_size_standard_stream
);
147 h
->h_secid_first_sector_in_short_sat
=
148 CDF_TOLE4(h
->h_secid_first_sector_in_short_sat
);
149 h
->h_num_sectors_in_short_sat
=
150 CDF_TOLE4(h
->h_num_sectors_in_short_sat
);
151 h
->h_secid_first_sector_in_master_sat
=
152 CDF_TOLE4(h
->h_secid_first_sector_in_master_sat
);
153 h
->h_num_sectors_in_master_sat
=
154 CDF_TOLE4(h
->h_num_sectors_in_master_sat
);
155 for (i
= 0; i
< __arraycount(h
->h_master_sat
); i
++)
156 h
->h_master_sat
[i
] = CDF_TOLE4(h
->h_master_sat
[i
]);
160 cdf_unpack_header(cdf_header_t
*h
, char *buf
)
165 CDF_UNPACK(h
->h_magic
);
166 CDF_UNPACKA(h
->h_uuid
);
167 CDF_UNPACK(h
->h_revision
);
168 CDF_UNPACK(h
->h_version
);
169 CDF_UNPACK(h
->h_byte_order
);
170 CDF_UNPACK(h
->h_sec_size_p2
);
171 CDF_UNPACK(h
->h_short_sec_size_p2
);
172 CDF_UNPACKA(h
->h_unused0
);
173 CDF_UNPACK(h
->h_num_sectors_in_sat
);
174 CDF_UNPACK(h
->h_secid_first_directory
);
175 CDF_UNPACKA(h
->h_unused1
);
176 CDF_UNPACK(h
->h_min_size_standard_stream
);
177 CDF_UNPACK(h
->h_secid_first_sector_in_short_sat
);
178 CDF_UNPACK(h
->h_num_sectors_in_short_sat
);
179 CDF_UNPACK(h
->h_secid_first_sector_in_master_sat
);
180 CDF_UNPACK(h
->h_num_sectors_in_master_sat
);
181 for (i
= 0; i
< __arraycount(h
->h_master_sat
); i
++)
182 CDF_UNPACK(h
->h_master_sat
[i
]);
186 cdf_swap_dir(cdf_directory_t
*d
)
188 d
->d_namelen
= CDF_TOLE2(d
->d_namelen
);
189 d
->d_left_child
= CDF_TOLE4(d
->d_left_child
);
190 d
->d_right_child
= CDF_TOLE4(d
->d_right_child
);
191 d
->d_storage
= CDF_TOLE4(d
->d_storage
);
192 d
->d_storage_uuid
[0] = CDF_TOLE8(d
->d_storage_uuid
[0]);
193 d
->d_storage_uuid
[1] = CDF_TOLE8(d
->d_storage_uuid
[1]);
194 d
->d_flags
= CDF_TOLE4(d
->d_flags
);
195 d
->d_created
= CDF_TOLE8(d
->d_created
);
196 d
->d_modified
= CDF_TOLE8(d
->d_modified
);
197 d
->d_stream_first_sector
= CDF_TOLE4(d
->d_stream_first_sector
);
198 d
->d_size
= CDF_TOLE4(d
->d_size
);
202 cdf_swap_class(cdf_classid_t
*d
)
204 d
->cl_dword
= CDF_TOLE4(d
->cl_dword
);
205 d
->cl_word
[0] = CDF_TOLE2(d
->cl_word
[0]);
206 d
->cl_word
[1] = CDF_TOLE2(d
->cl_word
[1]);
210 cdf_unpack_dir(cdf_directory_t
*d
, char *buf
)
214 CDF_UNPACKA(d
->d_name
);
215 CDF_UNPACK(d
->d_namelen
);
216 CDF_UNPACK(d
->d_type
);
217 CDF_UNPACK(d
->d_color
);
218 CDF_UNPACK(d
->d_left_child
);
219 CDF_UNPACK(d
->d_right_child
);
220 CDF_UNPACK(d
->d_storage
);
221 CDF_UNPACKA(d
->d_storage_uuid
);
222 CDF_UNPACK(d
->d_flags
);
223 CDF_UNPACK(d
->d_created
);
224 CDF_UNPACK(d
->d_modified
);
225 CDF_UNPACK(d
->d_stream_first_sector
);
226 CDF_UNPACK(d
->d_size
);
227 CDF_UNPACK(d
->d_unused0
);
231 cdf_check_stream_offset(const cdf_stream_t
*sst
, const void *p
, size_t tail
)
233 const char *b
= (const char *)sst
->sst_tab
;
234 const char *e
= ((const char *)p
) + tail
;
235 if (e
>= b
&& (size_t)(e
- b
) < sst
->sst_dirlen
* sst
->sst_len
)
237 DPRINTF((stderr
, "offset begin %p end %p %zu >= %zu\n", b
, e
,
238 (size_t)(e
- b
), sst
->sst_dirlen
* sst
->sst_len
));
244 cdf_read(const cdf_info_t
*info
, off_t off
, void *buf
, size_t len
)
246 size_t siz
= (size_t)off
+ len
;
248 if ((off_t
)(off
+ len
) != (off_t
)siz
) {
253 if (info
->i_buf
!= NULL
&& info
->i_len
>= siz
) {
254 (void)memcpy(buf
, &info
->i_buf
[off
], len
);
258 if (info
->i_fd
== -1)
261 if (lseek(info
->i_fd
, off
, SEEK_SET
) == (off_t
)-1)
264 if (read(info
->i_fd
, buf
, len
) != (ssize_t
)len
)
271 cdf_read_header(const cdf_info_t
*info
, cdf_header_t
*h
)
275 (void)memcpy(cdf_bo
.s
, "\01\02\03\04", 4);
276 if (cdf_read(info
, (off_t
)0, buf
, sizeof(buf
)) == -1)
278 cdf_unpack_header(h
, buf
);
280 if (h
->h_magic
!= CDF_MAGIC
) {
281 DPRINTF(("Bad magic 0x%llx != 0x%llx\n",
282 (unsigned long long)h
->h_magic
,
283 (unsigned long long)CDF_MAGIC
));
286 if (h
->h_sec_size_p2
> 20) {
287 DPRINTF(("Bad sector size 0x%u\n", h
->h_sec_size_p2
));
290 if (h
->h_short_sec_size_p2
> 20) {
291 DPRINTF(("Bad short sector size 0x%u\n",
292 h
->h_short_sec_size_p2
));
303 cdf_read_sector(const cdf_info_t
*info
, void *buf
, size_t offs
, size_t len
,
304 const cdf_header_t
*h
, cdf_secid_t id
)
306 assert((size_t)CDF_SEC_SIZE(h
) == len
);
307 return cdf_read(info
, (off_t
)CDF_SEC_POS(h
, id
),
308 ((char *)buf
) + offs
, len
);
312 cdf_read_short_sector(const cdf_stream_t
*sst
, void *buf
, size_t offs
,
313 size_t len
, const cdf_header_t
*h
, cdf_secid_t id
)
315 assert((size_t)CDF_SHORT_SEC_SIZE(h
) == len
);
316 (void)memcpy(((char *)buf
) + offs
,
317 ((const char *)sst
->sst_tab
) + CDF_SHORT_SEC_POS(h
, id
), len
);
322 * Read the sector allocation table.
325 cdf_read_sat(const cdf_info_t
*info
, cdf_header_t
*h
, cdf_sat_t
*sat
)
328 size_t ss
= CDF_SEC_SIZE(h
);
329 cdf_secid_t
*msa
, mid
, sec
;
330 size_t nsatpersec
= (ss
/ sizeof(mid
)) - 1;
332 for (i
= 0; i
< __arraycount(h
->h_master_sat
); i
++)
333 if (h
->h_master_sat
[i
] == CDF_SECID_FREE
)
336 #define CDF_SEC_LIMIT (UINT32_MAX / (4 * ss))
337 if (h
->h_num_sectors_in_master_sat
> CDF_SEC_LIMIT
/ nsatpersec
||
339 DPRINTF(("Number of sectors in master SAT too big %u %zu\n",
340 h
->h_num_sectors_in_master_sat
, i
));
345 sat
->sat_len
= h
->h_num_sectors_in_master_sat
* nsatpersec
+ i
;
346 DPRINTF(("sat_len = %zu ss = %zu\n", sat
->sat_len
, ss
));
347 if ((sat
->sat_tab
= calloc(sat
->sat_len
, ss
)) == NULL
)
350 for (i
= 0; i
< __arraycount(h
->h_master_sat
); i
++) {
351 if (h
->h_master_sat
[i
] < 0)
353 if (cdf_read_sector(info
, sat
->sat_tab
, ss
* i
, ss
, h
,
354 h
->h_master_sat
[i
]) != (ssize_t
)ss
) {
355 DPRINTF(("Reading sector %d", h
->h_master_sat
[i
]));
360 if ((msa
= calloc(1, ss
)) == NULL
)
363 mid
= h
->h_secid_first_sector_in_master_sat
;
364 for (j
= 0; j
< h
->h_num_sectors_in_master_sat
; j
++) {
367 if (j
>= CDF_LOOP_LIMIT
) {
368 DPRINTF(("Reading master sector loop limit"));
372 if (cdf_read_sector(info
, msa
, 0, ss
, h
, mid
) != (ssize_t
)ss
) {
373 DPRINTF(("Reading master sector %d", mid
));
376 for (k
= 0; k
< nsatpersec
; k
++, i
++) {
377 sec
= CDF_TOLE4(msa
[k
]);
380 if (i
>= sat
->sat_len
) {
381 DPRINTF(("Out of bounds reading MSA %u >= %u",
386 if (cdf_read_sector(info
, sat
->sat_tab
, ss
* i
, ss
, h
,
387 sec
) != (ssize_t
)ss
) {
388 DPRINTF(("Reading sector %d",
393 mid
= CDF_TOLE4(msa
[nsatpersec
]);
407 cdf_count_chain(const cdf_sat_t
*sat
, cdf_secid_t sid
, size_t size
)
410 cdf_secid_t maxsector
= (cdf_secid_t
)(sat
->sat_len
* size
);
413 for (j
= i
= 0; sid
>= 0; i
++, j
++) {
414 DPRINTF((" %d", sid
));
415 if (j
>= CDF_LOOP_LIMIT
) {
416 DPRINTF(("Counting chain loop limit"));
420 if (sid
> maxsector
) {
421 DPRINTF(("Sector %d > %d\n", sid
, maxsector
));
425 sid
= CDF_TOLE4(sat
->sat_tab
[sid
]);
432 cdf_read_long_sector_chain(const cdf_info_t
*info
, const cdf_header_t
*h
,
433 const cdf_sat_t
*sat
, cdf_secid_t sid
, size_t len
, cdf_stream_t
*scn
)
435 size_t ss
= CDF_SEC_SIZE(h
), i
, j
;
437 scn
->sst_len
= cdf_count_chain(sat
, sid
, ss
);
438 scn
->sst_dirlen
= len
;
440 if (scn
->sst_len
== (size_t)-1)
443 scn
->sst_tab
= calloc(scn
->sst_len
, ss
);
444 if (scn
->sst_tab
== NULL
)
447 for (j
= i
= 0; sid
>= 0; i
++, j
++) {
448 if (j
>= CDF_LOOP_LIMIT
) {
449 DPRINTF(("Read long sector chain loop limit"));
453 if (i
>= scn
->sst_len
) {
454 DPRINTF(("Out of bounds reading long sector chain "
455 "%u > %u\n", i
, scn
->sst_len
));
459 if ((nr
= cdf_read_sector(info
, scn
->sst_tab
, i
* ss
, ss
, h
,
460 sid
)) != (ssize_t
)ss
) {
461 if (i
== scn
->sst_len
- 1 && nr
> 0) {
462 /* Last sector might be truncated */
465 DPRINTF(("Reading long sector chain %d", sid
));
468 sid
= CDF_TOLE4(sat
->sat_tab
[sid
]);
477 cdf_read_short_sector_chain(const cdf_header_t
*h
,
478 const cdf_sat_t
*ssat
, const cdf_stream_t
*sst
,
479 cdf_secid_t sid
, size_t len
, cdf_stream_t
*scn
)
481 size_t ss
= CDF_SHORT_SEC_SIZE(h
), i
, j
;
482 scn
->sst_len
= cdf_count_chain(ssat
, sid
, CDF_SEC_SIZE(h
));
483 scn
->sst_dirlen
= len
;
485 if (sst
->sst_tab
== NULL
|| scn
->sst_len
== (size_t)-1)
488 scn
->sst_tab
= calloc(scn
->sst_len
, ss
);
489 if (scn
->sst_tab
== NULL
)
492 for (j
= i
= 0; sid
>= 0; i
++, j
++) {
493 if (j
>= CDF_LOOP_LIMIT
) {
494 DPRINTF(("Read short sector chain loop limit"));
498 if (i
>= scn
->sst_len
) {
499 DPRINTF(("Out of bounds reading short sector chain "
500 "%u > %u\n", i
, scn
->sst_len
));
504 if (cdf_read_short_sector(sst
, scn
->sst_tab
, i
* ss
, ss
, h
,
505 sid
) != (ssize_t
)ss
) {
506 DPRINTF(("Reading short sector chain %d", sid
));
509 sid
= CDF_TOLE4(ssat
->sat_tab
[sid
]);
518 cdf_read_sector_chain(const cdf_info_t
*info
, const cdf_header_t
*h
,
519 const cdf_sat_t
*sat
, const cdf_sat_t
*ssat
, const cdf_stream_t
*sst
,
520 cdf_secid_t sid
, size_t len
, cdf_stream_t
*scn
)
523 if (len
< h
->h_min_size_standard_stream
)
524 return cdf_read_short_sector_chain(h
, ssat
, sst
, sid
, len
,
527 return cdf_read_long_sector_chain(info
, h
, sat
, sid
, len
, scn
);
531 cdf_read_dir(const cdf_info_t
*info
, const cdf_header_t
*h
,
532 const cdf_sat_t
*sat
, cdf_dir_t
*dir
)
535 size_t ss
= CDF_SEC_SIZE(h
), ns
, nd
;
537 cdf_secid_t sid
= h
->h_secid_first_directory
;
539 ns
= cdf_count_chain(sat
, sid
, ss
);
540 if (ns
== (size_t)-1)
543 nd
= ss
/ CDF_DIRECTORY_SIZE
;
545 dir
->dir_len
= ns
* nd
;
546 dir
->dir_tab
= calloc(dir
->dir_len
, sizeof(dir
->dir_tab
[0]));
547 if (dir
->dir_tab
== NULL
)
550 if ((buf
= malloc(ss
)) == NULL
) {
555 for (j
= i
= 0; i
< ns
; i
++, j
++) {
556 if (j
>= CDF_LOOP_LIMIT
) {
557 DPRINTF(("Read dir loop limit"));
561 if (cdf_read_sector(info
, buf
, 0, ss
, h
, sid
) != (ssize_t
)ss
) {
562 DPRINTF(("Reading directory sector %d", sid
));
565 for (j
= 0; j
< nd
; j
++) {
566 cdf_unpack_dir(&dir
->dir_tab
[i
* nd
+ j
],
567 &buf
[j
* CDF_DIRECTORY_SIZE
]);
569 sid
= CDF_TOLE4(sat
->sat_tab
[sid
]);
572 for (i
= 0; i
< dir
->dir_len
; i
++)
573 cdf_swap_dir(&dir
->dir_tab
[i
]);
584 cdf_read_ssat(const cdf_info_t
*info
, const cdf_header_t
*h
,
585 const cdf_sat_t
*sat
, cdf_sat_t
*ssat
)
588 size_t ss
= CDF_SEC_SIZE(h
);
589 cdf_secid_t sid
= h
->h_secid_first_sector_in_short_sat
;
591 ssat
->sat_len
= cdf_count_chain(sat
, sid
, CDF_SEC_SIZE(h
));
592 if (ssat
->sat_len
== (size_t)-1)
595 ssat
->sat_tab
= calloc(ssat
->sat_len
, ss
);
596 if (ssat
->sat_tab
== NULL
)
599 for (j
= i
= 0; sid
>= 0; i
++, j
++) {
600 if (j
>= CDF_LOOP_LIMIT
) {
601 DPRINTF(("Read short sat sector loop limit"));
605 if (i
>= ssat
->sat_len
) {
606 DPRINTF(("Out of bounds reading short sector chain "
607 "%u > %u\n", i
, ssat
->sat_len
));
611 if (cdf_read_sector(info
, ssat
->sat_tab
, i
* ss
, ss
, h
, sid
) !=
613 DPRINTF(("Reading short sat sector %d", sid
));
616 sid
= CDF_TOLE4(sat
->sat_tab
[sid
]);
625 cdf_read_short_stream(const cdf_info_t
*info
, const cdf_header_t
*h
,
626 const cdf_sat_t
*sat
, const cdf_dir_t
*dir
, cdf_stream_t
*scn
)
629 const cdf_directory_t
*d
;
631 for (i
= 0; i
< dir
->dir_len
; i
++)
632 if (dir
->dir_tab
[i
].d_type
== CDF_DIR_TYPE_ROOT_STORAGE
)
635 /* If the it is not there, just fake it; some docs don't have it */
636 if (i
== dir
->dir_len
)
638 d
= &dir
->dir_tab
[i
];
640 /* If the it is not there, just fake it; some docs don't have it */
641 if (d
->d_stream_first_sector
< 0)
644 return cdf_read_long_sector_chain(info
, h
, sat
,
645 d
->d_stream_first_sector
, d
->d_size
, scn
);
654 cdf_namecmp(const char *d
, const uint16_t *s
, size_t l
)
656 for (; l
--; d
++, s
++)
657 if (*d
!= CDF_TOLE2(*s
))
658 return (unsigned char)*d
- CDF_TOLE2(*s
);
663 cdf_read_summary_info(const cdf_info_t
*info
, const cdf_header_t
*h
,
664 const cdf_sat_t
*sat
, const cdf_sat_t
*ssat
, const cdf_stream_t
*sst
,
665 const cdf_dir_t
*dir
, cdf_stream_t
*scn
)
668 const cdf_directory_t
*d
;
669 static const char name
[] = "\05SummaryInformation";
671 for (i
= 0; i
< dir
->dir_len
; i
++)
672 if (dir
->dir_tab
[i
].d_type
== CDF_DIR_TYPE_USER_STREAM
&&
673 cdf_namecmp(name
, dir
->dir_tab
[i
].d_name
, sizeof(name
))
677 if (i
== dir
->dir_len
) {
678 DPRINTF(("Cannot find summary information section\n"));
682 d
= &dir
->dir_tab
[i
];
683 return cdf_read_sector_chain(info
, h
, sat
, ssat
, sst
,
684 d
->d_stream_first_sector
, d
->d_size
, scn
);
688 cdf_read_property_info(const cdf_stream_t
*sst
, uint32_t offs
,
689 cdf_property_info_t
**info
, size_t *count
, size_t *maxcount
)
691 const cdf_section_header_t
*shp
;
692 cdf_section_header_t sh
;
693 const uint32_t *p
, *q
, *e
;
700 size_t i
, o
, nelements
, j
;
701 cdf_property_info_t
*inp
;
703 if (offs
> UINT32_MAX
/ 4) {
707 shp
= (const void *)((const char *)sst
->sst_tab
+ offs
);
708 if (cdf_check_stream_offset(sst
, shp
, sizeof(*shp
)) == -1)
710 sh
.sh_len
= CDF_TOLE4(shp
->sh_len
);
711 #define CDF_SHLEN_LIMIT (UINT32_MAX / 8)
712 if (sh
.sh_len
> CDF_SHLEN_LIMIT
) {
716 sh
.sh_properties
= CDF_TOLE4(shp
->sh_properties
);
717 #define CDF_PROP_LIMIT (UINT32_MAX / (4 * sizeof(*inp)))
718 if (sh
.sh_properties
> CDF_PROP_LIMIT
)
720 DPRINTF(("section len: %u properties %u\n", sh
.sh_len
,
723 if (*maxcount
> CDF_PROP_LIMIT
)
725 *maxcount
+= sh
.sh_properties
;
726 inp
= realloc(*info
, *maxcount
* sizeof(*inp
));
728 *maxcount
= sh
.sh_properties
;
729 inp
= malloc(*maxcount
* sizeof(*inp
));
735 *count
+= sh
.sh_properties
;
736 p
= (const void *)((const char *)sst
->sst_tab
+ offs
+ sizeof(sh
));
737 e
= (const void *)(((const char *)shp
) + sh
.sh_len
);
738 if (cdf_check_stream_offset(sst
, e
, 0) == -1)
740 for (i
= 0; i
< sh
.sh_properties
; i
++) {
741 q
= (const uint32_t *)((const char *)p
+
742 CDF_TOLE4(p
[(i
<< 1) + 1])) - 2;
744 DPRINTF(("Ran of the end %p > %p\n", q
, e
));
747 inp
[i
].pi_id
= CDF_TOLE4(p
[i
<< 1]);
748 inp
[i
].pi_type
= CDF_TOLE4(q
[0]);
749 DPRINTF(("%d) id=%x type=%x offs=%x\n", i
, inp
[i
].pi_id
,
750 inp
[i
].pi_type
, (const char *)q
- (const char *)p
));
751 if (inp
[i
].pi_type
& CDF_VECTOR
) {
752 nelements
= CDF_TOLE4(q
[1]);
758 if (inp
[i
].pi_type
& (CDF_ARRAY
|CDF_BYREF
|CDF_RESERVED
))
760 switch (inp
[i
].pi_type
& CDF_TYPEMASK
) {
764 if (inp
[i
].pi_type
& CDF_VECTOR
)
766 (void)memcpy(&s16
, &q
[o
], sizeof(s16
));
767 inp
[i
].pi_s16
= CDF_TOLE2(s16
);
770 if (inp
[i
].pi_type
& CDF_VECTOR
)
772 (void)memcpy(&s32
, &q
[o
], sizeof(s32
));
773 inp
[i
].pi_s32
= CDF_TOLE4(s32
);
777 if (inp
[i
].pi_type
& CDF_VECTOR
)
779 (void)memcpy(&u32
, &q
[o
], sizeof(u32
));
780 inp
[i
].pi_u32
= CDF_TOLE4(u32
);
783 if (inp
[i
].pi_type
& CDF_VECTOR
)
785 (void)memcpy(&s64
, &q
[o
], sizeof(s64
));
786 inp
[i
].pi_s64
= CDF_TOLE4(s64
);
789 if (inp
[i
].pi_type
& CDF_VECTOR
)
791 (void)memcpy(&u64
, &q
[o
], sizeof(u64
));
792 inp
[i
].pi_u64
= CDF_TOLE4(u64
);
794 case CDF_LENGTH32_STRING
:
796 size_t nelem
= inp
- *info
;
797 if (*maxcount
> CDF_PROP_LIMIT
798 || nelements
> CDF_PROP_LIMIT
)
800 *maxcount
+= nelements
;
801 inp
= realloc(*info
, *maxcount
* sizeof(*inp
));
807 DPRINTF(("nelements = %d\n", nelements
));
808 for (j
= 0; j
< nelements
; j
++, i
++) {
809 uint32_t l
= CDF_TOLE4(q
[o
]);
810 inp
[i
].pi_str
.s_len
= l
;
811 inp
[i
].pi_str
.s_buf
= (const char *)(&q
[o
+1]);
812 DPRINTF(("l = %d, r = %d, s = %s\n", l
,
813 CDF_ROUND(l
, sizeof(l
)),
814 inp
[i
].pi_str
.s_buf
));
815 l
= 4 + CDF_ROUND(l
, sizeof(l
));
821 if (inp
[i
].pi_type
& CDF_VECTOR
)
823 (void)memcpy(&tp
, &q
[o
], sizeof(tp
));
824 inp
[i
].pi_tp
= CDF_TOLE8(tp
);
827 if (inp
[i
].pi_type
& CDF_VECTOR
)
832 DPRINTF(("Don't know how to deal with %x\n",
844 cdf_unpack_summary_info(const cdf_stream_t
*sst
, cdf_summary_info_header_t
*ssi
,
845 cdf_property_info_t
**info
, size_t *count
)
848 const cdf_summary_info_header_t
*si
= sst
->sst_tab
;
849 const cdf_section_declaration_t
*sd
= (const void *)
850 ((const char *)sst
->sst_tab
+ CDF_SECTION_DECLARATION_OFFSET
);
852 if (cdf_check_stream_offset(sst
, si
, sizeof(*si
)) == -1 ||
853 cdf_check_stream_offset(sst
, sd
, sizeof(*sd
)) == -1)
855 ssi
->si_byte_order
= CDF_TOLE2(si
->si_byte_order
);
856 ssi
->si_os_version
= CDF_TOLE2(si
->si_os_version
);
857 ssi
->si_os
= CDF_TOLE2(si
->si_os
);
858 ssi
->si_class
= si
->si_class
;
859 cdf_swap_class(&ssi
->si_class
);
860 ssi
->si_count
= CDF_TOLE2(si
->si_count
);
864 for (i
= 0; i
< CDF_TOLE4(si
->si_count
); i
++) {
865 if (i
>= CDF_LOOP_LIMIT
) {
866 DPRINTF(("Unpack summary info loop limit"));
870 if (cdf_read_property_info(sst
, CDF_TOLE4(sd
->sd_offset
),
871 info
, count
, &maxcount
) == -1)
880 cdf_print_classid(char *buf
, size_t buflen
, const cdf_classid_t
*id
)
882 return snprintf(buf
, buflen
, "%.8x-%.4x-%.4x-%.2x%.2x-"
883 "%.2x%.2x%.2x%.2x%.2x%.2x", id
->cl_dword
, id
->cl_word
[0],
884 id
->cl_word
[1], id
->cl_two
[0], id
->cl_two
[1], id
->cl_six
[0],
885 id
->cl_six
[1], id
->cl_six
[2], id
->cl_six
[3], id
->cl_six
[4],
889 static const struct {
893 { CDF_PROPERTY_CODE_PAGE
, "Code page" },
894 { CDF_PROPERTY_TITLE
, "Title" },
895 { CDF_PROPERTY_SUBJECT
, "Subject" },
896 { CDF_PROPERTY_AUTHOR
, "Author" },
897 { CDF_PROPERTY_KEYWORDS
, "Keywords" },
898 { CDF_PROPERTY_COMMENTS
, "Comments" },
899 { CDF_PROPERTY_TEMPLATE
, "Template" },
900 { CDF_PROPERTY_LAST_SAVED_BY
, "Last Saved By" },
901 { CDF_PROPERTY_REVISION_NUMBER
, "Revision Number" },
902 { CDF_PROPERTY_TOTAL_EDITING_TIME
, "Total Editing Time" },
903 { CDF_PROPERTY_LAST_PRINTED
, "Last Printed" },
904 { CDF_PROPERTY_CREATE_TIME
, "Create Time/Date" },
905 { CDF_PROPERTY_LAST_SAVED_TIME
, "Last Saved Time/Date" },
906 { CDF_PROPERTY_NUMBER_OF_PAGES
, "Number of Pages" },
907 { CDF_PROPERTY_NUMBER_OF_WORDS
, "Number of Words" },
908 { CDF_PROPERTY_NUMBER_OF_CHARACTERS
, "Number of Characters" },
909 { CDF_PROPERTY_THUMBNAIL
, "Thumbnail" },
910 { CDF_PROPERTY_NAME_OF_APPLICATION
, "Name of Creating Application" },
911 { CDF_PROPERTY_SECURITY
, "Security" },
912 { CDF_PROPERTY_LOCALE_ID
, "Locale ID" },
916 cdf_print_property_name(char *buf
, size_t bufsiz
, uint32_t p
)
920 for (i
= 0; i
< __arraycount(vn
); i
++)
922 return snprintf(buf
, bufsiz
, "%s", vn
[i
].n
);
923 return snprintf(buf
, bufsiz
, "0x%x", p
);
927 cdf_print_elapsed_time(char *buf
, size_t bufsiz
, cdf_timestamp_t ts
)
930 int days
, hours
, mins
, secs
;
942 len
+= snprintf(buf
+ len
, bufsiz
- len
, "%dd+", days
);
948 len
+= snprintf(buf
+ len
, bufsiz
- len
, "%.2d:", hours
);
953 len
+= snprintf(buf
+ len
, bufsiz
- len
, "%.2d:", mins
);
957 len
+= snprintf(buf
+ len
, bufsiz
- len
, "%.2d", secs
);
964 cdf_dump_header(const cdf_header_t
*h
)
968 #define DUMP(a, b) (void)fprintf(stderr, "%40.40s = " a "\n", # b, h->h_ ## b)
969 #define DUMP2(a, b) (void)fprintf(stderr, "%40.40s = " a " (" a ")\n", # b, \
970 h->h_ ## b, 1 << h->h_ ## b)
971 DUMP("%d", revision
);
973 DUMP("0x%x", byte_order
);
974 DUMP2("%d", sec_size_p2
);
975 DUMP2("%d", short_sec_size_p2
);
976 DUMP("%d", num_sectors_in_sat
);
977 DUMP("%d", secid_first_directory
);
978 DUMP("%d", min_size_standard_stream
);
979 DUMP("%d", secid_first_sector_in_short_sat
);
980 DUMP("%d", num_sectors_in_short_sat
);
981 DUMP("%d", secid_first_sector_in_master_sat
);
982 DUMP("%d", num_sectors_in_master_sat
);
983 for (i
= 0; i
< __arraycount(h
->h_master_sat
); i
++) {
984 if (h
->h_master_sat
[i
] == CDF_SECID_FREE
)
986 (void)fprintf(stderr
, "%35.35s[%.3zu] = %d\n",
987 "master_sat", i
, h
->h_master_sat
[i
]);
992 cdf_dump_sat(const char *prefix
, const cdf_sat_t
*sat
, size_t size
)
994 size_t i
, j
, s
= size
/ sizeof(cdf_secid_t
);
996 for (i
= 0; i
< sat
->sat_len
; i
++) {
997 (void)fprintf(stderr
, "%s[%zu]:\n%.6d: ", prefix
, i
, i
* s
);
998 for (j
= 0; j
< s
; j
++) {
999 (void)fprintf(stderr
, "%5d, ",
1000 CDF_TOLE4(sat
->sat_tab
[s
* i
+ j
]));
1001 if ((j
+ 1) % 10 == 0)
1002 (void)fprintf(stderr
, "\n%.6d: ",
1005 (void)fprintf(stderr
, "\n");
1010 cdf_dump(void *v
, size_t len
)
1013 unsigned char *p
= v
;
1015 (void)fprintf(stderr
, "%.4x: ", 0);
1016 for (i
= 0, j
= 0; i
< len
; i
++, p
++) {
1017 (void)fprintf(stderr
, "%.2x ", *p
);
1018 abuf
[j
++] = isprint(*p
) ? *p
: '.';
1022 (void)fprintf(stderr
, "%s\n%.4x: ", abuf
, i
+ 1);
1025 (void)fprintf(stderr
, "\n");
1029 cdf_dump_stream(const cdf_header_t
*h
, const cdf_stream_t
*sst
)
1031 size_t ss
= sst
->sst_dirlen
< h
->h_min_size_standard_stream
?
1032 CDF_SHORT_SEC_SIZE(h
) : CDF_SEC_SIZE(h
);
1033 cdf_dump(sst
->sst_tab
, ss
* sst
->sst_len
);
1037 cdf_dump_dir(const cdf_info_t
*info
, const cdf_header_t
*h
,
1038 const cdf_sat_t
*sat
, const cdf_sat_t
*ssat
, const cdf_stream_t
*sst
,
1039 const cdf_dir_t
*dir
)
1043 char name
[__arraycount(d
->d_name
)];
1047 static const char *types
[] = { "empty", "user storage",
1048 "user stream", "lockbytes", "property", "root storage" };
1050 for (i
= 0; i
< dir
->dir_len
; i
++) {
1051 d
= &dir
->dir_tab
[i
];
1052 for (j
= 0; j
< sizeof(name
); j
++)
1053 name
[j
] = (char)CDF_TOLE2(d
->d_name
[j
]);
1054 (void)fprintf(stderr
, "Directory %zu: %s\n", i
, name
);
1055 if (d
->d_type
< __arraycount(types
))
1056 (void)fprintf(stderr
, "Type: %s\n", types
[d
->d_type
]);
1058 (void)fprintf(stderr
, "Type: %d\n", d
->d_type
);
1059 (void)fprintf(stderr
, "Color: %s\n",
1060 d
->d_color
? "black" : "red");
1061 (void)fprintf(stderr
, "Left child: %d\n", d
->d_left_child
);
1062 (void)fprintf(stderr
, "Right child: %d\n", d
->d_right_child
);
1063 (void)fprintf(stderr
, "Flags: 0x%x\n", d
->d_flags
);
1064 cdf_timestamp_to_timespec(&ts
, d
->d_created
);
1065 (void)fprintf(stderr
, "Created %s", ctime(&ts
.tv_sec
));
1066 cdf_timestamp_to_timespec(&ts
, d
->d_modified
);
1067 (void)fprintf(stderr
, "Modified %s", ctime(&ts
.tv_sec
));
1068 (void)fprintf(stderr
, "Stream %d\n", d
->d_stream_first_sector
);
1069 (void)fprintf(stderr
, "Size %d\n", d
->d_size
);
1070 switch (d
->d_type
) {
1071 case CDF_DIR_TYPE_USER_STORAGE
:
1072 (void)fprintf(stderr
, "Storage: %d\n", d
->d_storage
);
1074 case CDF_DIR_TYPE_USER_STREAM
:
1077 if (cdf_read_sector_chain(info
, h
, sat
, ssat
, sst
,
1078 d
->d_stream_first_sector
, d
->d_size
, &scn
) == -1) {
1079 warn("Can't read stream for %s at %d len %d",
1080 name
, d
->d_stream_first_sector
, d
->d_size
);
1083 cdf_dump_stream(h
, &scn
);
1094 cdf_dump_property_info(const cdf_property_info_t
*info
, size_t count
)
1101 for (i
= 0; i
< count
; i
++) {
1102 cdf_print_property_name(buf
, sizeof(buf
), info
[i
].pi_id
);
1103 (void)fprintf(stderr
, "%zu) %s: ", i
, buf
);
1104 switch (info
[i
].pi_type
) {
1106 (void)fprintf(stderr
, "signed 16 [%hd]\n",
1110 (void)fprintf(stderr
, "signed 32 [%d]\n",
1113 case CDF_UNSIGNED32
:
1114 (void)fprintf(stderr
, "unsigned 32 [%u]\n",
1117 case CDF_LENGTH32_STRING
:
1118 (void)fprintf(stderr
, "string %u [%.*s]\n",
1119 info
[i
].pi_str
.s_len
,
1120 info
[i
].pi_str
.s_len
, info
[i
].pi_str
.s_buf
);
1124 if (tp
< 1000000000000000LL) {
1125 cdf_print_elapsed_time(buf
, sizeof(buf
), tp
);
1126 (void)fprintf(stderr
, "timestamp %s\n", buf
);
1128 cdf_timestamp_to_timespec(&ts
, tp
);
1129 (void)fprintf(stderr
, "timestamp %s",
1134 (void)fprintf(stderr
, "CLIPBOARD %u\n", info
[i
].pi_u32
);
1137 DPRINTF(("Don't know how to deal with %x\n",
1146 cdf_dump_summary_info(const cdf_header_t
*h
, const cdf_stream_t
*sst
)
1149 cdf_summary_info_header_t ssi
;
1150 cdf_property_info_t
*info
;
1154 if (cdf_unpack_summary_info(sst
, &ssi
, &info
, &count
) == -1)
1156 (void)fprintf(stderr
, "Endian: %x\n", ssi
.si_byte_order
);
1157 (void)fprintf(stderr
, "Os Version %d.%d\n", ssi
.si_os_version
& 0xff,
1158 ssi
.si_os_version
>> 8);
1159 (void)fprintf(stderr
, "Os %d\n", ssi
.si_os
);
1160 cdf_print_classid(buf
, sizeof(buf
), &ssi
.si_class
);
1161 (void)fprintf(stderr
, "Class %s\n", buf
);
1162 (void)fprintf(stderr
, "Count %d\n", ssi
.si_count
);
1163 cdf_dump_property_info(info
, count
);
1171 main(int argc
, char *argv
[])
1175 cdf_sat_t sat
, ssat
;
1176 cdf_stream_t sst
, scn
;
1181 (void)fprintf(stderr
, "Usage: %s <filename>\n", getprogname());
1187 for (i
= 1; i
< argc
; i
++) {
1188 if ((info
.i_fd
= open(argv
[1], O_RDONLY
)) == -1)
1189 err(1, "Cannot open `%s'", argv
[1]);
1191 if (cdf_read_header(&info
, &h
) == -1)
1192 err(1, "Cannot read header");
1194 cdf_dump_header(&h
);
1197 if (cdf_read_sat(&info
, &h
, &sat
) == -1)
1198 err(1, "Cannot read sat");
1200 cdf_dump_sat("SAT", &sat
, CDF_SEC_SIZE(&h
));
1203 if (cdf_read_ssat(&info
, &h
, &sat
, &ssat
) == -1)
1204 err(1, "Cannot read ssat");
1206 cdf_dump_sat("SSAT", &h
, &ssat
, CDF_SHORT_SEC_SIZE(&h
));
1209 if (cdf_read_dir(&info
, &h
, &sat
, &dir
) == -1)
1210 err(1, "Cannot read dir");
1212 if (cdf_read_short_stream(&info
, &h
, &sat
, &dir
, &sst
) == -1)
1213 err(1, "Cannot read short stream");
1215 cdf_dump_stream(&h
, &sst
);
1219 cdf_dump_dir(&info
, &h
, &sat
, &ssat
, &sst
, &dir
);
1223 if (cdf_read_summary_info(&info
, &h
, &sat
, &ssat
, &sst
, &dir
,
1225 err(1, "Cannot read summary info");
1227 cdf_dump_summary_info(&h
, &scn
);
1230 (void)close(info
.i_fd
);