2 * ms-biff.c: MS Excel Biff support...
5 * Jody Goldberg (jody@gnome.org)
6 * Michael Meeks (michael@ximian.com)
8 * (C) 1998-2001 Michael Meeks
9 * (C) 2002-2005 Jody Goldberg
10 * (C) 2016 Morten Welinder
13 #include <gnumeric-config.h>
18 #include "biff-types.h"
19 #include "ms-excel-util.h"
22 #include <gsf/gsf-input.h>
23 #include <gsf/gsf-output.h>
24 #include <gsf/gsf-utils.h>
25 #include <gsf/gsf-msole-utils.h>
26 #include <goffice/goffice.h>
29 #define sizeof_BIFF_8_FILEPASS (6 + 3*16)
30 #define sizeof_BIFF_2_7_FILEPASS 4
33 * The only complicated bits in this code are:
34 * a) Speed optimisation of passing a raw pointer to the mapped stream back
35 * b) Handling decryption
38 /*******************************************************************************/
39 /* Helper Functions */
40 /*******************************************************************************/
43 destroy_sensitive (void *p
, size_t len
)
47 memset (p
, 0xaa, len
- 1);
48 go_destroy_password (p
);
53 ms_biff_query_dump (BiffQuery
*q
)
55 const char *opname
= biff_opcode_name (q
->opcode
);
56 g_print ("Opcode 0x%x (%s) length %d malloced? %d\nData:\n",
58 opname
? opname
: "?",
62 gsf_mem_dump (q
->data
, q
->length
);
63 /* dump_stream (q->output); */
67 ms_biff_query_bound_check (BiffQuery
*q
, guint32 offset
, unsigned len
)
69 if (offset
>= q
->length
) {
73 if (!ms_biff_query_peek_next (q
, &opcode
) ||
74 opcode
!= BIFF_CONTINUE
||
75 !ms_biff_query_next (q
)) {
76 g_warning ("missing CONTINUE");
81 if ((offset
+ len
) > q
->length
) {
82 g_warning ("supposedly atomic item of len %u sst spans CONTINUEs, we are screwed", len
);
88 /*****************************************************************************/
90 /*****************************************************************************/
93 * ms_biff_password_hash and ms_biff_crypt_seq
94 * based on pseudo-code in the OpenOffice.org XL documentation
97 ms_biff_password_hash (guint8
const *password
)
99 int tmp
, index
= 0, len
= strlen ((char const *)password
);
100 guint16 chr
, hash
= 0;
103 chr
= password
[index
];
105 tmp
= (chr
<< index
);
106 hash
^= (tmp
& 0x7fff) | (tmp
>> 15);
107 } while (index
< len
);
108 hash
= hash
^ len
^ 0xce4b;
114 ms_biff_crypt_seq (BiffQuery
*q
, guint16 key
, guint8
const *password
)
116 static guint8
const preset
[] = {
117 0xbb, 0xff, 0xff, 0xba, 0xff, 0xff, 0xb9, 0x80,
118 0x00, 0xbe, 0x0f, 0x00, 0xbf, 0x0f, 0x00, 0x00
120 guint8
const low
= key
& 0xff;
121 guint8
const high
= (key
>> 8) & 0xff;
122 unsigned i
, len
= strlen ((char const*)password
);
123 guint8
*seq
= q
->xor_key
;
125 strncpy (seq
, password
, 16);
126 for (i
= 0; (len
+ i
) < 16; i
++)
127 seq
[len
+ i
] = preset
[i
];
129 for (i
= 0; i
< 16; i
+= 2) {
134 for (i
= 0; i
< 16; i
++)
135 seq
[i
] = (seq
[i
] << 2) | (seq
[i
] >> 6);
139 ms_biff_pre_biff8_query_set_decrypt (BiffQuery
*q
, guint8
const *password
)
142 guint16 pw_hash
= ms_biff_password_hash (password
);
145 if (q
->length
== 4) {
146 key
= GSF_LE_GET_GUINT16(q
->data
+ 0);
147 hash
= GSF_LE_GET_GUINT16(q
->data
+ 2);
148 } else if (q
->length
== 6) {
149 /* BIFF8 record with pre-biff8 crypto, these do exist */
150 key
= GSF_LE_GET_GUINT16(q
->data
+ 2);
151 hash
= GSF_LE_GET_GUINT16(q
->data
+ 4);
159 ms_biff_crypt_seq (q
, key
, password
);
161 q
->encryption
= MS_BIFF_CRYPTO_XOR
;
166 makekey (guint32 block
, RC4_KEY
*key
, const unsigned char *valDigest
)
169 unsigned char digest
[16];
172 memset (pwarray
, 0, 64);
174 /* 40 bit of hashed password, set by verify_password () */
175 memcpy (pwarray
, valDigest
, 5);
177 GSF_LE_SET_GUINT32 (pwarray
+ 5, block
);
182 md5_process_block (pwarray
, 64, &ctx
);
183 md5_read_ctx (&ctx
, digest
);
184 prepare_key (digest
, 16, key
);
186 destroy_sensitive (&ctx
, sizeof (ctx
));
187 destroy_sensitive (digest
, sizeof (digest
));
188 destroy_sensitive (pwarray
, sizeof (pwarray
));
194 * convert UTF-8-password into UTF-16
197 verify_password (guint8
const *password
, guint8
const *docid
,
198 guint8
const *salt_data
, guint8
const *hashedsalt_data
,
199 unsigned char *valDigest
)
201 guint8 pwarray
[64], salt
[64], hashedsalt
[16];
202 struct md5_ctx mdContext
;
203 unsigned char digest
[16];
205 int offset
, keyoffset
, i
;
208 gunichar2
*utf16
= g_utf8_to_utf16 (password
, -1, NULL
, NULL
, NULL
);
210 /* we had better receive valid UTF-8 */
211 g_return_val_if_fail (utf16
!= NULL
, FALSE
);
213 /* Be careful about endianness */
214 memset (pwarray
, 0, sizeof (pwarray
));
215 for (i
= 0 ; utf16
[i
] ; i
++) {
216 pwarray
[(2 * i
) + 0] = ((utf16
[i
] >> 0) & 0xff);
217 pwarray
[(2 * i
) + 1] = ((utf16
[i
] >> 8) & 0xff);
221 pwarray
[2 * i
] = 0x80;
222 pwarray
[56] = (i
<< 4);
224 md5_init_ctx (&mdContext
);
225 md5_process_block (pwarray
, 64, &mdContext
);
226 md5_read_ctx (&mdContext
, digest
);
232 md5_init_ctx (&mdContext
);
233 while (offset
!= 16) {
234 if ((64 - offset
) < 5)
235 tocopy
= 64 - offset
;
237 memcpy (pwarray
+ offset
, digest
+ keyoffset
, tocopy
);
241 md5_process_block (pwarray
, 64, &mdContext
);
250 memcpy (pwarray
+ offset
, docid
, 16);
254 /* Fix (zero) all but first 16 bytes */
256 memset (pwarray
+ 17, 0, 47);
260 md5_process_block (pwarray
, 64, &mdContext
);
261 md5_read_ctx (&mdContext
, valDigest
);
263 /* Generate 40-bit RC4 key from 128-bit hashed password */
264 makekey (0, &key
, valDigest
);
266 memcpy (salt
, salt_data
, 16);
267 rc4 (salt
, 16, &key
);
268 memcpy (hashedsalt
, hashedsalt_data
, 16);
269 rc4 (hashedsalt
, 16, &key
);
272 memset (salt
+ 17, 0, 47);
275 md5_init_ctx (&mdContext
);
276 md5_process_block (salt
, 64, &mdContext
);
277 md5_read_ctx (&mdContext
, digest
);
279 res
= memcmp (digest
, hashedsalt
, 16) == 0;
281 destroy_sensitive (pwarray
, sizeof (pwarray
));
282 destroy_sensitive (salt
, sizeof (salt
));
283 destroy_sensitive (hashedsalt
, sizeof (hashedsalt
));
284 destroy_sensitive (&mdContext
, sizeof (mdContext
));
285 destroy_sensitive (digest
, sizeof (digest
));
286 destroy_sensitive (&key
, sizeof (key
));
291 #define REKEY_BLOCK 0x400
293 skip_bytes (BiffQuery
*q
, int start
, int count
)
295 static guint8 scratch
[REKEY_BLOCK
];
298 block
= (start
+ count
) / REKEY_BLOCK
;
300 if (block
!= q
->block
) {
301 makekey (q
->block
= block
, &q
->rc4_key
, q
->md5_digest
);
302 count
= (start
+ count
) % REKEY_BLOCK
;
305 g_assert (count
<= REKEY_BLOCK
);
306 rc4 (scratch
, count
, &q
->rc4_key
);
310 * ms_biff_query_set_decrypt :
312 * @password: password in UTF-8 encoding.
315 ms_biff_query_set_decrypt (BiffQuery
*q
, MsBiffVersion version
,
316 guint8
const *password
)
318 g_return_val_if_fail (q
->opcode
== BIFF_FILEPASS
, FALSE
);
320 if (password
== NULL
)
323 if (version
< MS_BIFF_V8
|| q
->length
== 0 || q
->data
[0] == 0)
324 return ms_biff_pre_biff8_query_set_decrypt (q
, password
);
326 XL_CHECK_CONDITION_VAL (q
->length
== sizeof_BIFF_8_FILEPASS
, FALSE
);
328 if (!verify_password (password
, q
->data
+ 6,
329 q
->data
+ 22, q
->data
+ 38, q
->md5_digest
))
332 q
->encryption
= MS_BIFF_CRYPTO_RC4
;
335 /* For some reaons the 1st record after FILEPASS seems to be unencrypted */
336 q
->dont_decrypt_next_record
= TRUE
;
338 /* pretend to decrypt the entire stream up till this point, it was not
339 * encrypted, but do it anyway to keep the rc4 state in sync
341 skip_bytes (q
, 0, gsf_input_tell (q
->input
));
346 ms_biff_query_copy_decrypt (BiffQuery
*dst
, BiffQuery
const *src
)
348 g_return_if_fail (dst
!= NULL
);
349 g_return_if_fail (src
!= NULL
);
352 switch (src
->encryption
) {
354 case MS_BIFF_CRYPTO_NONE
:
355 XL_CHECK_CONDITION (dst
->encryption
== MS_BIFF_CRYPTO_NONE
);
357 case MS_BIFF_CRYPTO_XOR
:
359 case MS_BIFF_CRYPTO_RC4
:
365 ms_biff_query_new (GsfInput
*input
)
369 g_return_val_if_fail (input
!= NULL
, NULL
);
371 q
= g_new0 (BiffQuery
, 1);
374 q
->data_malloced
= q
->non_decrypted_data_malloced
= FALSE
;
375 q
->data
= q
->non_decrypted_data
= NULL
;
377 q
->encryption
= MS_BIFF_CRYPTO_NONE
;
380 ms_biff_query_dump (q
);
386 ms_biff_query_peek_next (BiffQuery
*q
, guint16
*opcode
)
391 g_return_val_if_fail (opcode
!= NULL
, FALSE
);
392 g_return_val_if_fail (q
!= NULL
, FALSE
);
394 data
= gsf_input_read (q
->input
, 4, NULL
);
397 *opcode
= GSF_LE_GET_GUINT16 (data
);
398 len
= GSF_LE_GET_GUINT16 (data
+ 2);
399 gsf_input_seek (q
->input
, -4, G_SEEK_CUR
);
401 return gsf_input_remaining (q
->input
) >= 4 + len
;
405 * Returns FALSE if has hit end
408 ms_biff_query_next (BiffQuery
*q
)
412 gboolean auto_continue
;
414 g_return_val_if_fail (q
!= NULL
, FALSE
);
416 if (gsf_input_eof (q
->input
))
419 if (q
->data_malloced
) {
422 q
->data_malloced
= FALSE
;
424 if (q
->non_decrypted_data_malloced
) {
425 g_free (q
->non_decrypted_data
);
426 q
->non_decrypted_data
= NULL
;
427 q
->non_decrypted_data_malloced
= FALSE
;
430 q
->streamPos
= gsf_input_tell (q
->input
);
431 data
= gsf_input_read (q
->input
, 4, NULL
);
434 q
->opcode
= GSF_LE_GET_GUINT16 (data
);
435 len
= GSF_LE_GET_GUINT16 (data
+ 2);
440 /* no biff record should be larger than around 20,000 */
441 XL_CHECK_CONDITION_VAL (len
< 20000, FALSE
);
444 q
->data
= (guint8
*)gsf_input_read (q
->input
, len
, NULL
);
450 if (q
->encryption
== MS_BIFF_CRYPTO_RC4
) {
451 q
->non_decrypted_data_malloced
= q
->data_malloced
;
452 q
->non_decrypted_data
= q
->data
;
454 q
->data_malloced
= TRUE
;
455 q
->data
= g_new (guint8
, q
->length
);
456 memcpy (q
->data
, q
->non_decrypted_data
, q
->length
);
458 if (q
->dont_decrypt_next_record
) {
459 skip_bytes (q
, q
->streamPos
, 4 + q
->length
);
460 q
->dont_decrypt_next_record
= FALSE
;
462 int pos
= q
->streamPos
;
463 guint8
*data
= q
->data
;
466 /* pretend to decrypt header */
467 skip_bytes (q
, pos
, 4);
470 while (q
->block
!= (pos
+ len
) / REKEY_BLOCK
) {
471 int step
= REKEY_BLOCK
- (pos
% REKEY_BLOCK
);
472 rc4 (data
, step
, &q
->rc4_key
);
476 makekey (++q
->block
, &q
->rc4_key
, q
->md5_digest
);
479 rc4 (data
, len
, &q
->rc4_key
);
481 } else if (q
->encryption
== MS_BIFF_CRYPTO_XOR
) {
482 unsigned int offset
, k
;
484 q
->non_decrypted_data_malloced
= q
->data_malloced
;
485 q
->non_decrypted_data
= q
->data
;
486 q
->data_malloced
= TRUE
;
487 q
->data
= g_new (guint8
, q
->length
);
488 memcpy (q
->data
, q
->non_decrypted_data
, q
->length
);
490 offset
= (q
->streamPos
+ q
->length
+ 4) % 16;
491 for (k
= 0; k
< q
->length
; ++k
) {
492 guint8 tmp
= (q
->data
[k
] << 3) | (q
->data
[k
] >> 5);
493 q
->data
[k
] = tmp
^ q
->xor_key
[offset
];
494 offset
= (offset
+ 1) % 16;
497 q
->non_decrypted_data
= q
->data
;
499 // Turn this on to debug memory access beyond record
501 q
->non_decrypted_data_malloced
= q
->data_malloced
;
502 q
->data
= g_memdup (q
->data
, q
->length
);
503 q
->data_malloced
= TRUE
;
508 ms_biff_query_dump (q
);
512 * I am guessing that we should always handle BIFF_CONTINUE here,
513 * except for the few record types exempt from encryption. For
514 * now, however, do the bare minimum.
523 case BIFF_EXTERNNAME_v0
:
524 case BIFF_EXTERNNAME_v2
:
525 case BIFF_EXTERNSHEET
:
531 case BIFF_FORMULA_v0
:
532 case BIFF_FORMULA_v2
:
533 case BIFF_FORMULA_v4
:
539 case BIFF_MERGECELLS
:
546 auto_continue
= TRUE
;
549 case BIFF_BOUNDSHEET
:
550 // Needs access to non_decrypted_data
555 * The continuation of the string part seems to repeat the
556 * unicode marker so we clearly cannot just concatenate.
559 auto_continue
= FALSE
;
564 while (ms_biff_query_peek_next (q
, &opcode
) &&
565 opcode
== BIFF_CONTINUE
) {
566 GString
*data
= g_string_new_len (q
->data
, q
->length
);
568 if (!ms_biff_query_next (q
)) {
569 g_string_free (data
, TRUE
);
570 return FALSE
; /* Probably shouldn't happen */
573 g_string_append_len (data
, q
->data
, q
->length
);
574 if (q
->data_malloced
)
576 q
->length
= data
->len
;
577 q
->data
= g_string_free (data
, FALSE
);
578 q
->data_malloced
= TRUE
;
586 ms_biff_query_destroy (BiffQuery
*q
)
589 if (q
->data_malloced
) {
592 q
->data_malloced
= FALSE
;
594 if (q
->non_decrypted_data_malloced
) {
595 g_free (q
->non_decrypted_data
);
596 q
->non_decrypted_data
= NULL
;
597 q
->non_decrypted_data_malloced
= FALSE
;
601 destroy_sensitive (q
, sizeof (*q
));
607 /*****************************************************************************/
609 /*****************************************************************************/
611 #define MAX_BIFF7_RECORD_SIZE 0x820
612 #define MAX_BIFF8_RECORD_SIZE 0x2020
616 * @output: (transfer full): the output storage
617 * @version: file format version
618 * @codepage: Codepage to use for strings. Only used pre-BIff8 and ignored
619 * unless positive and for
623 ms_biff_put_new (GsfOutput
*output
, MsBiffVersion version
, int codepage
)
627 g_return_val_if_fail (output
!= NULL
, NULL
);
629 bp
= g_new (BiffPut
, 1);
632 bp
->streamPos
= gsf_output_tell (output
);
635 bp
->version
= version
;
637 bp
->record
= g_string_new (NULL
);
639 if (version
>= MS_BIFF_V8
) {
640 bp
->convert
= g_iconv_open ("UTF-16LE", "UTF-8");
643 bp
->codepage
= (codepage
> 0)
645 : gsf_msole_iconv_win_codepage ();
646 bp
->convert
= gsf_msole_iconv_open_codepage_for_export (bp
->codepage
);
653 ms_biff_put_destroy (BiffPut
*bp
)
655 g_return_if_fail (bp
!= NULL
);
656 g_return_if_fail (bp
->output
!= NULL
);
658 if (bp
->output
!= NULL
) {
659 gsf_output_close (bp
->output
);
660 g_object_unref (bp
->output
);
663 g_string_free (bp
->record
, TRUE
);
664 gsf_iconv_close (bp
->convert
);
670 ms_biff_put_len_next (BiffPut
*bp
, guint16 opcode
, guint32 len
)
672 g_return_val_if_fail (bp
, NULL
);
673 g_return_val_if_fail (bp
->output
, NULL
);
674 g_return_val_if_fail (bp
->len_fixed
== -1, NULL
);
678 const char *opname
= biff_opcode_name (opcode
);
679 g_printerr ("Biff put len 0x%x (%s)\n",
680 opcode
, opname
? opname
: "?");
686 bp
->streamPos
= gsf_output_tell (bp
->output
);
688 g_string_set_size (bp
->record
, len
);
690 return bp
->record
->str
;
694 ms_biff_put_var_next (BiffPut
*bp
, guint16 opcode
)
696 g_return_if_fail (bp
!= NULL
);
697 g_return_if_fail (bp
->output
!= NULL
);
698 g_return_if_fail (bp
->len_fixed
== -1);
702 const char *opname
= biff_opcode_name (opcode
);
703 g_printerr ("Biff put var 0x%x (%s)\n",
704 opcode
, opname
? opname
: "?");
711 bp
->streamPos
= gsf_output_tell (bp
->output
);
713 g_string_set_size (bp
->record
, 0);
717 ms_biff_max_record_len (BiffPut
const *bp
)
719 return (bp
->version
>= MS_BIFF_V8
) ? MAX_BIFF8_RECORD_SIZE
: MAX_BIFF7_RECORD_SIZE
;
723 ms_biff_put_var_write (BiffPut
*bp
, guint8
const *data
, guint32 len
)
725 g_return_if_fail (bp
!= NULL
);
726 g_return_if_fail (data
!= NULL
);
727 g_return_if_fail (bp
->output
!= NULL
);
728 g_return_if_fail ((gint32
)len
>= 0);
730 g_return_if_fail (bp
->len_fixed
== 0);
733 if (bp
->curpos
+ len
> bp
->record
->len
)
734 g_string_set_size (bp
->record
, bp
->curpos
+ len
);
737 memcpy (bp
->record
->str
+ bp
->curpos
, data
, len
);
743 ms_biff_put_var_seekto (BiffPut
*bp
, int pos
)
745 g_return_if_fail (bp
!= NULL
);
746 g_return_if_fail (bp
->output
!= NULL
);
747 g_return_if_fail (bp
->len_fixed
== 0);
748 g_return_if_fail (pos
>= 0);
754 ms_biff_put_commit (BiffPut
*bp
)
760 g_return_if_fail (bp
!= NULL
);
761 g_return_if_fail (bp
->output
!= NULL
);
763 maxlen
= ms_biff_max_record_len (bp
);
766 len
= bp
->record
->len
;
767 data
= bp
->record
->str
;
770 size_t thislen
= MIN (len
, maxlen
);
772 GSF_LE_SET_GUINT16 (tmp
, opcode
);
773 GSF_LE_SET_GUINT16 (tmp
+ 2, thislen
);
774 gsf_output_write (bp
->output
, 4, tmp
);
775 gsf_output_write (bp
->output
, thislen
, data
);
777 opcode
= BIFF_CONTINUE
;
782 bp
->streamPos
= gsf_output_tell (bp
->output
);
788 * This is useful to figure out who writes uninitialized
789 * memory to files. It causes layers below to flush.
791 gsf_output_seek (bp
->output
, -1, G_SEEK_CUR
);
792 gsf_output_seek (bp
->output
, +1, G_SEEK_CUR
);
797 ms_biff_put_empty (BiffPut
*bp
, guint16 opcode
)
799 ms_biff_put_len_next (bp
, opcode
, 0);
800 ms_biff_put_commit (bp
);
804 ms_biff_put_2byte (BiffPut
*bp
, guint16 opcode
, guint16 content
)
806 guint8
*data
= ms_biff_put_len_next (bp
, opcode
, 2);
807 GSF_LE_SET_GUINT16 (data
, content
);
808 ms_biff_put_commit (bp
);
812 ms_biff_put_abs_write (BiffPut
*bp
, gsf_off_t pos
, gconstpointer buf
, gsize size
)
814 gsf_off_t old
= gsf_output_tell (bp
->output
);
815 gsf_output_seek (bp
->output
, pos
, G_SEEK_SET
);
816 gsf_output_write (bp
->output
, size
, buf
);
817 gsf_output_seek (bp
->output
, old
, G_SEEK_SET
);