1 ;/* inffas32.asm is a hand tuned assembler version of inffast.c -- fast decoding
3 ; * inffas32.asm is derivated from inffas86.c, with translation of assembly code
5 ; * Copyright (C) 1995-2003 Mark Adler
6 ; * For conditions of distribution and use, see copyright notice in zlib.h
8 ; * Copyright (C) 2003 Chris Anderson <christop@charm.net>
9 ; * Please use the copyright conditions above.
11 ; * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from
12 ; * the gcc -S output of zlib-1.2.0/inffast.c. Zlib-1.2.0 is in beta release at
13 ; * the moment. I have successfully compiled and tested this code with gcc2.96,
14 ; * gcc3.2, icc5.0, msvc6.0. It is very close to the speed of inffast.S
15 ; * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX
16 ; * enabled. I will attempt to merge the MMX code into this version. Newer
17 ; * versions of this and inffast.S can be found at
18 ; * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/
20 ; * 2005 : modification by Gilles Vollant
22 ; For Visual C++ 4.x and higher and ML 6.x and higher
23 ; ml.exe is in directory \MASM611C of Win95 DDK
24 ; ml.exe is also distributed in http://www.masm32.com/masmdl.htm
25 ; and in VC++2003 toolkit at http://msdn.microsoft.com/visualc/vctoolkit2003/
28 ; compile with command line option
29 ; ml /coff /Zi /c /Flinffas32.lst inffas32.asm
31 ; if you define NO_GZIP (see inflate.h), compile with
32 ; ml /coff /Zi /c /Flinffas32.lst /DNO_GUNZIP inffas32.asm
35 ; zlib122sup is 0 fort zlib 1.2.2.1 and lower
36 ; zlib122sup is 8 fort zlib 1.2.2.2 and more (with addition of dmax and head
37 ; in inflate_state in inflate.h)
42 INFLATE_MODE_TYPE
equ 11
43 INFLATE_MODE_BAD
equ 26
46 INFLATE_MODE_TYPE
equ 11
47 INFLATE_MODE_BAD
equ 26
49 INFLATE_MODE_TYPE
equ 3
50 INFLATE_MODE_BAD
equ 17
58 ;;;GLOBAL _inflate_fast
80 db 'Fast decoding Code from Chris Anderson'
84 invalid_literal_length_code_msg:
85 db 'invalid literal/length code'
89 invalid_distance_code_msg:
90 db 'invalid distance code'
94 invalid_distance_too_far_msg:
95 db 'invalid distance too far back'
136 mode_state
equ 0 ;/* state->mode */
137 wsize_state
equ (32+zlib1222sup
) ;/* state->wsize */
138 write_state
equ (36+4+zlib1222sup
) ;/* state->write */
139 window_state
equ (40+4+zlib1222sup
) ;/* state->window */
140 hold_state
equ (44+4+zlib1222sup
) ;/* state->hold */
141 bits_state
equ (48+4+zlib1222sup
) ;/* state->bits */
142 lencode_state
equ (64+4+zlib1222sup
) ;/* state->lencode */
143 distcode_state
equ (68+4+zlib1222sup
) ;/* state->distcode */
144 lenbits_state
equ (72+4+zlib1222sup
) ;/* state->lenbits */
145 distbits_state
equ (76+4+zlib1222sup
) ;/* state->distbits */
150 ;GLOBAL inflate_fast_use_mmx
155 ; GLOBAL inflate_fast_use_mmx:object
156 ;.size inflate_fast_use_mmx, 4
161 _inflate_fast
proc near
162 .FPO
(16, 4, 0, 0, 1, 0)
207 mov eax, [edi+lencode_state
]
208 mov ecx, [edi+distcode_state
]
214 mov ecx, [edi+lenbits_state
]
220 mov ecx, [edi+distbits_state
]
225 mov eax, [edi+wsize_state
]
226 mov ecx, [edi+write_state
]
227 mov edx, [edi+window_state
]
233 mov ebp, [edi+hold_state
]
234 mov ebx, [edi+bits_state
]
271 cmp dword ptr [inflate_fast_use_mmx
],2
281 xor dword ptr [esp],0200000h
309 mov dword ptr [inflate_fast_use_mmx
],2
312 mov dword ptr [inflate_fast_use_mmx
],3
356 jnz L_test_for_length_base
371 L_test_for_length_base:
378 jz L_test_for_second_level_length
382 jae L_add_bits_to_len
409 ja L_get_distance_code
435 jz L_test_for_second_level_dist
439 jae L_add_bits_to_dist
508 L_test_for_second_level_length:
514 jnz L_test_for_end_of_block
526 L_test_for_second_level_dist:
532 jnz L_invalid_distance_code
552 jb L_invalid_distance_too_far
555 cmp dword ptr [esp+48],0
556 jne L_wrap_around_window
580 L_wrap_around_window:
584 jbe L_contiguous_in_window
609 L_contiguous_in_window:
643 movd mm4
,dword ptr [esp+0]
645 movd mm5
,dword ptr [esp+4]
656 ja L_get_length_code_mmx
659 movd mm7
,dword ptr [esi]
665 L_get_length_code_mmx:
677 jnz L_test_for_length_base_mmx
692 L_test_for_length_base_mmx:
698 jz L_test_for_second_level_length_mmx
700 jz L_decode_distance_mmx
706 and ecx, [inflate_fast_mask
+eax*4]
709 L_decode_distance_mmx:
713 ja L_get_dist_code_mmx
716 movd mm7
,dword ptr [esi]
738 jz L_test_for_second_level_dist_mmx
740 jz L_check_dist_one_mmx
742 L_add_bits_to_dist_mmx:
747 and ecx, [inflate_fast_mask
+eax*4]
778 L_check_dist_one_mmx:
780 jne L_check_window_mmx
782 je L_check_window_mmx
799 L_test_for_second_level_length_mmx:
801 jnz L_test_for_end_of_block
806 and ecx, [inflate_fast_mask
+eax*4]
812 L_test_for_second_level_dist_mmx:
814 jnz L_invalid_distance_code
819 and ecx, [inflate_fast_mask
+eax*4]
834 jb L_invalid_distance_too_far
837 cmp dword ptr [esp+48],0
838 jne L_wrap_around_window_mmx
861 L_wrap_around_window_mmx:
865 jbe L_contiguous_in_window_mmx
889 L_contiguous_in_window_mmx:
913 L_invalid_distance_code:
919 mov ecx, invalid_distance_code_msg
920 mov edx,INFLATE_MODE_BAD
921 jmp L_update_stream_state
923 L_test_for_end_of_block:
930 jz L_invalid_literal_length_code
933 mov edx,INFLATE_MODE_TYPE
934 jmp L_update_stream_state
936 L_invalid_literal_length_code:
942 mov ecx, invalid_literal_length_code_msg
943 mov edx,INFLATE_MODE_BAD
944 jmp L_update_stream_state
946 L_invalid_distance_too_far:
951 mov ecx, invalid_distance_too_far_msg
952 mov edx,INFLATE_MODE_BAD
953 jmp L_update_stream_state
955 L_update_stream_state:
963 mov [eax+mode_state
],edx
969 cmp dword ptr [inflate_fast_use_mmx
],2
986 mov [edx+bits_state
],ebx
1012 cmp dword ptr [inflate_fast_use_mmx
],2
1027 mov [edx+hold_state
],ebp
1034 jbe L_last_is_smaller
1053 jbe L_end_is_smaller