Commit | Line | Data |
---|---|---|
5ca28f79 L |
1 | ;/* inffas32.asm is a hand tuned assembler version of inffast.c -- fast decoding\r |
2 | ; *\r | |
3 | ; * inffas32.asm is derivated from inffas86.c, with translation of assembly code\r | |
4 | ; *\r | |
5 | ; * Copyright (C) 1995-2003 Mark Adler\r | |
6 | ; * For conditions of distribution and use, see copyright notice in zlib.h\r | |
7 | ; *\r | |
8 | ; * Copyright (C) 2003 Chris Anderson <christop@charm.net>\r | |
9 | ; * Please use the copyright conditions above.\r | |
10 | ; *\r | |
11 | ; * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from\r | |
12 | ; * the gcc -S output of zlib-1.2.0/inffast.c. Zlib-1.2.0 is in beta release at\r | |
13 | ; * the moment. I have successfully compiled and tested this code with gcc2.96,\r | |
14 | ; * gcc3.2, icc5.0, msvc6.0. It is very close to the speed of inffast.S\r | |
15 | ; * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX\r | |
16 | ; * enabled. I will attempt to merge the MMX code into this version. Newer\r | |
17 | ; * versions of this and inffast.S can be found at\r | |
18 | ; * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/\r | |
19 | ; *\r | |
20 | ; * 2005 : modification by Gilles Vollant\r | |
21 | ; */\r | |
22 | ; For Visual C++ 4.x and higher and ML 6.x and higher\r | |
23 | ; ml.exe is in directory \MASM611C of Win95 DDK\r | |
24 | ; ml.exe is also distributed in http://www.masm32.com/masmdl.htm\r | |
25 | ; and in VC++2003 toolkit at http://msdn.microsoft.com/visualc/vctoolkit2003/\r | |
26 | ;\r | |
27 | ;\r | |
28 | ; compile with command line option\r | |
29 | ; ml /coff /Zi /c /Flinffas32.lst inffas32.asm\r | |
30 | \r | |
31 | ; if you define NO_GZIP (see inflate.h), compile with\r | |
32 | ; ml /coff /Zi /c /Flinffas32.lst /DNO_GUNZIP inffas32.asm\r | |
33 | \r | |
34 | \r | |
35 | ; zlib122sup is 0 fort zlib 1.2.2.1 and lower\r | |
36 | ; zlib122sup is 8 fort zlib 1.2.2.2 and more (with addition of dmax and head\r | |
37 | ; in inflate_state in inflate.h)\r | |
38 | zlib1222sup equ 8\r | |
39 | \r | |
40 | \r | |
41 | IFDEF GUNZIP\r | |
42 | INFLATE_MODE_TYPE equ 11\r | |
43 | INFLATE_MODE_BAD equ 26\r | |
44 | ELSE\r | |
45 | IFNDEF NO_GUNZIP\r | |
46 | INFLATE_MODE_TYPE equ 11\r | |
47 | INFLATE_MODE_BAD equ 26\r | |
48 | ELSE\r | |
49 | INFLATE_MODE_TYPE equ 3\r | |
50 | INFLATE_MODE_BAD equ 17\r | |
51 | ENDIF\r | |
52 | ENDIF\r | |
53 | \r | |
54 | \r | |
55 | ; 75 "inffast.S"\r | |
56 | ;FILE "inffast.S"\r | |
57 | \r | |
58 | ;;;GLOBAL _inflate_fast\r | |
59 | \r | |
60 | ;;;SECTION .text\r | |
61 | \r | |
62 | \r | |
63 | \r | |
64 | .586p\r | |
65 | .mmx\r | |
66 | \r | |
67 | name inflate_fast_x86\r | |
68 | .MODEL FLAT\r | |
69 | \r | |
70 | _DATA segment\r | |
71 | inflate_fast_use_mmx:\r | |
72 | dd 1\r | |
73 | \r | |
74 | \r | |
75 | _TEXT segment\r | |
76 | \r | |
77 | \r | |
78 | \r | |
79 | ALIGN 4\r | |
80 | db 'Fast decoding Code from Chris Anderson'\r | |
81 | db 0\r | |
82 | \r | |
83 | ALIGN 4\r | |
84 | invalid_literal_length_code_msg:\r | |
85 | db 'invalid literal/length code'\r | |
86 | db 0\r | |
87 | \r | |
88 | ALIGN 4\r | |
89 | invalid_distance_code_msg:\r | |
90 | db 'invalid distance code'\r | |
91 | db 0\r | |
92 | \r | |
93 | ALIGN 4\r | |
94 | invalid_distance_too_far_msg:\r | |
95 | db 'invalid distance too far back'\r | |
96 | db 0\r | |
97 | \r | |
98 | \r | |
99 | ALIGN 4\r | |
100 | inflate_fast_mask:\r | |
101 | dd 0\r | |
102 | dd 1\r | |
103 | dd 3\r | |
104 | dd 7\r | |
105 | dd 15\r | |
106 | dd 31\r | |
107 | dd 63\r | |
108 | dd 127\r | |
109 | dd 255\r | |
110 | dd 511\r | |
111 | dd 1023\r | |
112 | dd 2047\r | |
113 | dd 4095\r | |
114 | dd 8191\r | |
115 | dd 16383\r | |
116 | dd 32767\r | |
117 | dd 65535\r | |
118 | dd 131071\r | |
119 | dd 262143\r | |
120 | dd 524287\r | |
121 | dd 1048575\r | |
122 | dd 2097151\r | |
123 | dd 4194303\r | |
124 | dd 8388607\r | |
125 | dd 16777215\r | |
126 | dd 33554431\r | |
127 | dd 67108863\r | |
128 | dd 134217727\r | |
129 | dd 268435455\r | |
130 | dd 536870911\r | |
131 | dd 1073741823\r | |
132 | dd 2147483647\r | |
133 | dd 4294967295\r | |
134 | \r | |
135 | \r | |
136 | mode_state equ 0 ;/* state->mode */\r | |
137 | wsize_state equ (32+zlib1222sup) ;/* state->wsize */\r | |
138 | write_state equ (36+4+zlib1222sup) ;/* state->write */\r | |
139 | window_state equ (40+4+zlib1222sup) ;/* state->window */\r | |
140 | hold_state equ (44+4+zlib1222sup) ;/* state->hold */\r | |
141 | bits_state equ (48+4+zlib1222sup) ;/* state->bits */\r | |
142 | lencode_state equ (64+4+zlib1222sup) ;/* state->lencode */\r | |
143 | distcode_state equ (68+4+zlib1222sup) ;/* state->distcode */\r | |
144 | lenbits_state equ (72+4+zlib1222sup) ;/* state->lenbits */\r | |
145 | distbits_state equ (76+4+zlib1222sup) ;/* state->distbits */\r | |
146 | \r | |
147 | \r | |
148 | ;;SECTION .text\r | |
149 | ; 205 "inffast.S"\r | |
150 | ;GLOBAL inflate_fast_use_mmx\r | |
151 | \r | |
152 | ;SECTION .data\r | |
153 | \r | |
154 | \r | |
155 | ; GLOBAL inflate_fast_use_mmx:object\r | |
156 | ;.size inflate_fast_use_mmx, 4\r | |
157 | ; 226 "inffast.S"\r | |
158 | ;SECTION .text\r | |
159 | \r | |
160 | ALIGN 4\r | |
161 | _inflate_fast proc near\r | |
162 | .FPO (16, 4, 0, 0, 1, 0)\r | |
163 | push edi\r | |
164 | push esi\r | |
165 | push ebp\r | |
166 | push ebx\r | |
167 | pushfd\r | |
168 | sub esp,64\r | |
169 | cld\r | |
170 | \r | |
171 | \r | |
172 | \r | |
173 | \r | |
174 | mov esi, [esp+88]\r | |
175 | mov edi, [esi+28]\r | |
176 | \r | |
177 | \r | |
178 | \r | |
179 | \r | |
180 | \r | |
181 | \r | |
182 | \r | |
183 | mov edx, [esi+4]\r | |
184 | mov eax, [esi+0]\r | |
185 | \r | |
186 | add edx,eax\r | |
187 | sub edx,11\r | |
188 | \r | |
189 | mov [esp+44],eax\r | |
190 | mov [esp+20],edx\r | |
191 | \r | |
192 | mov ebp, [esp+92]\r | |
193 | mov ecx, [esi+16]\r | |
194 | mov ebx, [esi+12]\r | |
195 | \r | |
196 | sub ebp,ecx\r | |
197 | neg ebp\r | |
198 | add ebp,ebx\r | |
199 | \r | |
200 | sub ecx,257\r | |
201 | add ecx,ebx\r | |
202 | \r | |
203 | mov [esp+60],ebx\r | |
204 | mov [esp+40],ebp\r | |
205 | mov [esp+16],ecx\r | |
206 | ; 285 "inffast.S"\r | |
207 | mov eax, [edi+lencode_state]\r | |
208 | mov ecx, [edi+distcode_state]\r | |
209 | \r | |
210 | mov [esp+8],eax\r | |
211 | mov [esp+12],ecx\r | |
212 | \r | |
213 | mov eax,1\r | |
214 | mov ecx, [edi+lenbits_state]\r | |
215 | shl eax,cl\r | |
216 | dec eax\r | |
217 | mov [esp+0],eax\r | |
218 | \r | |
219 | mov eax,1\r | |
220 | mov ecx, [edi+distbits_state]\r | |
221 | shl eax,cl\r | |
222 | dec eax\r | |
223 | mov [esp+4],eax\r | |
224 | \r | |
225 | mov eax, [edi+wsize_state]\r | |
226 | mov ecx, [edi+write_state]\r | |
227 | mov edx, [edi+window_state]\r | |
228 | \r | |
229 | mov [esp+52],eax\r | |
230 | mov [esp+48],ecx\r | |
231 | mov [esp+56],edx\r | |
232 | \r | |
233 | mov ebp, [edi+hold_state]\r | |
234 | mov ebx, [edi+bits_state]\r | |
235 | ; 321 "inffast.S"\r | |
236 | mov esi, [esp+44]\r | |
237 | mov ecx, [esp+20]\r | |
238 | cmp ecx,esi\r | |
239 | ja L_align_long\r | |
240 | \r | |
241 | add ecx,11\r | |
242 | sub ecx,esi\r | |
243 | mov eax,12\r | |
244 | sub eax,ecx\r | |
245 | lea edi, [esp+28]\r | |
246 | rep movsb\r | |
247 | mov ecx,eax\r | |
248 | xor eax,eax\r | |
249 | rep stosb\r | |
250 | lea esi, [esp+28]\r | |
251 | mov [esp+20],esi\r | |
252 | jmp L_is_aligned\r | |
253 | \r | |
254 | \r | |
255 | L_align_long:\r | |
256 | test esi,3\r | |
257 | jz L_is_aligned\r | |
258 | xor eax,eax\r | |
259 | mov al, [esi]\r | |
260 | inc esi\r | |
261 | mov ecx,ebx\r | |
262 | add ebx,8\r | |
263 | shl eax,cl\r | |
264 | or ebp,eax\r | |
265 | jmp L_align_long\r | |
266 | \r | |
267 | L_is_aligned:\r | |
268 | mov edi, [esp+60]\r | |
269 | ; 366 "inffast.S"\r | |
270 | L_check_mmx:\r | |
271 | cmp dword ptr [inflate_fast_use_mmx],2\r | |
272 | je L_init_mmx\r | |
273 | ja L_do_loop\r | |
274 | \r | |
275 | push eax\r | |
276 | push ebx\r | |
277 | push ecx\r | |
278 | push edx\r | |
279 | pushfd\r | |
280 | mov eax, [esp]\r | |
281 | xor dword ptr [esp],0200000h\r | |
282 | \r | |
283 | \r | |
284 | \r | |
285 | \r | |
286 | popfd\r | |
287 | pushfd\r | |
288 | pop edx\r | |
289 | xor edx,eax\r | |
290 | jz L_dont_use_mmx\r | |
291 | xor eax,eax\r | |
292 | cpuid\r | |
293 | cmp ebx,0756e6547h\r | |
294 | jne L_dont_use_mmx\r | |
295 | cmp ecx,06c65746eh\r | |
296 | jne L_dont_use_mmx\r | |
297 | cmp edx,049656e69h\r | |
298 | jne L_dont_use_mmx\r | |
299 | mov eax,1\r | |
300 | cpuid\r | |
301 | shr eax,8\r | |
302 | and eax,15\r | |
303 | cmp eax,6\r | |
304 | jne L_dont_use_mmx\r | |
305 | test edx,0800000h\r | |
306 | jnz L_use_mmx\r | |
307 | jmp L_dont_use_mmx\r | |
308 | L_use_mmx:\r | |
309 | mov dword ptr [inflate_fast_use_mmx],2\r | |
310 | jmp L_check_mmx_pop\r | |
311 | L_dont_use_mmx:\r | |
312 | mov dword ptr [inflate_fast_use_mmx],3\r | |
313 | L_check_mmx_pop:\r | |
314 | pop edx\r | |
315 | pop ecx\r | |
316 | pop ebx\r | |
317 | pop eax\r | |
318 | jmp L_check_mmx\r | |
319 | ; 426 "inffast.S"\r | |
320 | ALIGN 4\r | |
321 | L_do_loop:\r | |
322 | ; 437 "inffast.S"\r | |
323 | cmp bl,15\r | |
324 | ja L_get_length_code\r | |
325 | \r | |
326 | xor eax,eax\r | |
327 | lodsw\r | |
328 | mov cl,bl\r | |
329 | add bl,16\r | |
330 | shl eax,cl\r | |
331 | or ebp,eax\r | |
332 | \r | |
333 | L_get_length_code:\r | |
334 | mov edx, [esp+0]\r | |
335 | mov ecx, [esp+8]\r | |
336 | and edx,ebp\r | |
337 | mov eax, [ecx+edx*4]\r | |
338 | \r | |
339 | L_dolen:\r | |
340 | \r | |
341 | \r | |
342 | \r | |
343 | \r | |
344 | \r | |
345 | \r | |
346 | mov cl,ah\r | |
347 | sub bl,ah\r | |
348 | shr ebp,cl\r | |
349 | \r | |
350 | \r | |
351 | \r | |
352 | \r | |
353 | \r | |
354 | \r | |
355 | test al,al\r | |
356 | jnz L_test_for_length_base\r | |
357 | \r | |
358 | shr eax,16\r | |
359 | stosb\r | |
360 | \r | |
361 | L_while_test:\r | |
362 | \r | |
363 | \r | |
364 | cmp [esp+16],edi\r | |
365 | jbe L_break_loop\r | |
366 | \r | |
367 | cmp [esp+20],esi\r | |
368 | ja L_do_loop\r | |
369 | jmp L_break_loop\r | |
370 | \r | |
371 | L_test_for_length_base:\r | |
372 | ; 502 "inffast.S"\r | |
373 | mov edx,eax\r | |
374 | shr edx,16\r | |
375 | mov cl,al\r | |
376 | \r | |
377 | test al,16\r | |
378 | jz L_test_for_second_level_length\r | |
379 | and cl,15\r | |
380 | jz L_save_len\r | |
381 | cmp bl,cl\r | |
382 | jae L_add_bits_to_len\r | |
383 | \r | |
384 | mov ch,cl\r | |
385 | xor eax,eax\r | |
386 | lodsw\r | |
387 | mov cl,bl\r | |
388 | add bl,16\r | |
389 | shl eax,cl\r | |
390 | or ebp,eax\r | |
391 | mov cl,ch\r | |
392 | \r | |
393 | L_add_bits_to_len:\r | |
394 | mov eax,1\r | |
395 | shl eax,cl\r | |
396 | dec eax\r | |
397 | sub bl,cl\r | |
398 | and eax,ebp\r | |
399 | shr ebp,cl\r | |
400 | add edx,eax\r | |
401 | \r | |
402 | L_save_len:\r | |
403 | mov [esp+24],edx\r | |
404 | \r | |
405 | \r | |
406 | L_decode_distance:\r | |
407 | ; 549 "inffast.S"\r | |
408 | cmp bl,15\r | |
409 | ja L_get_distance_code\r | |
410 | \r | |
411 | xor eax,eax\r | |
412 | lodsw\r | |
413 | mov cl,bl\r | |
414 | add bl,16\r | |
415 | shl eax,cl\r | |
416 | or ebp,eax\r | |
417 | \r | |
418 | L_get_distance_code:\r | |
419 | mov edx, [esp+4]\r | |
420 | mov ecx, [esp+12]\r | |
421 | and edx,ebp\r | |
422 | mov eax, [ecx+edx*4]\r | |
423 | \r | |
424 | \r | |
425 | L_dodist:\r | |
426 | mov edx,eax\r | |
427 | shr edx,16\r | |
428 | mov cl,ah\r | |
429 | sub bl,ah\r | |
430 | shr ebp,cl\r | |
431 | ; 584 "inffast.S"\r | |
432 | mov cl,al\r | |
433 | \r | |
434 | test al,16\r | |
435 | jz L_test_for_second_level_dist\r | |
436 | and cl,15\r | |
437 | jz L_check_dist_one\r | |
438 | cmp bl,cl\r | |
439 | jae L_add_bits_to_dist\r | |
440 | \r | |
441 | mov ch,cl\r | |
442 | xor eax,eax\r | |
443 | lodsw\r | |
444 | mov cl,bl\r | |
445 | add bl,16\r | |
446 | shl eax,cl\r | |
447 | or ebp,eax\r | |
448 | mov cl,ch\r | |
449 | \r | |
450 | L_add_bits_to_dist:\r | |
451 | mov eax,1\r | |
452 | shl eax,cl\r | |
453 | dec eax\r | |
454 | sub bl,cl\r | |
455 | and eax,ebp\r | |
456 | shr ebp,cl\r | |
457 | add edx,eax\r | |
458 | jmp L_check_window\r | |
459 | \r | |
460 | L_check_window:\r | |
461 | ; 625 "inffast.S"\r | |
462 | mov [esp+44],esi\r | |
463 | mov eax,edi\r | |
464 | sub eax, [esp+40]\r | |
465 | \r | |
466 | cmp eax,edx\r | |
467 | jb L_clip_window\r | |
468 | \r | |
469 | mov ecx, [esp+24]\r | |
470 | mov esi,edi\r | |
471 | sub esi,edx\r | |
472 | \r | |
473 | sub ecx,3\r | |
474 | mov al, [esi]\r | |
475 | mov [edi],al\r | |
476 | mov al, [esi+1]\r | |
477 | mov dl, [esi+2]\r | |
478 | add esi,3\r | |
479 | mov [edi+1],al\r | |
480 | mov [edi+2],dl\r | |
481 | add edi,3\r | |
482 | rep movsb\r | |
483 | \r | |
484 | mov esi, [esp+44]\r | |
485 | jmp L_while_test\r | |
486 | \r | |
487 | ALIGN 4\r | |
488 | L_check_dist_one:\r | |
489 | cmp edx,1\r | |
490 | jne L_check_window\r | |
491 | cmp [esp+40],edi\r | |
492 | je L_check_window\r | |
493 | \r | |
494 | dec edi\r | |
495 | mov ecx, [esp+24]\r | |
496 | mov al, [edi]\r | |
497 | sub ecx,3\r | |
498 | \r | |
499 | mov [edi+1],al\r | |
500 | mov [edi+2],al\r | |
501 | mov [edi+3],al\r | |
502 | add edi,4\r | |
503 | rep stosb\r | |
504 | \r | |
505 | jmp L_while_test\r | |
506 | \r | |
507 | ALIGN 4\r | |
508 | L_test_for_second_level_length:\r | |
509 | \r | |
510 | \r | |
511 | \r | |
512 | \r | |
513 | test al,64\r | |
514 | jnz L_test_for_end_of_block\r | |
515 | \r | |
516 | mov eax,1\r | |
517 | shl eax,cl\r | |
518 | dec eax\r | |
519 | and eax,ebp\r | |
520 | add eax,edx\r | |
521 | mov edx, [esp+8]\r | |
522 | mov eax, [edx+eax*4]\r | |
523 | jmp L_dolen\r | |
524 | \r | |
525 | ALIGN 4\r | |
526 | L_test_for_second_level_dist:\r | |
527 | \r | |
528 | \r | |
529 | \r | |
530 | \r | |
531 | test al,64\r | |
532 | jnz L_invalid_distance_code\r | |
533 | \r | |
534 | mov eax,1\r | |
535 | shl eax,cl\r | |
536 | dec eax\r | |
537 | and eax,ebp\r | |
538 | add eax,edx\r | |
539 | mov edx, [esp+12]\r | |
540 | mov eax, [edx+eax*4]\r | |
541 | jmp L_dodist\r | |
542 | \r | |
543 | ALIGN 4\r | |
544 | L_clip_window:\r | |
545 | ; 721 "inffast.S"\r | |
546 | mov ecx,eax\r | |
547 | mov eax, [esp+52]\r | |
548 | neg ecx\r | |
549 | mov esi, [esp+56]\r | |
550 | \r | |
551 | cmp eax,edx\r | |
552 | jb L_invalid_distance_too_far\r | |
553 | \r | |
554 | add ecx,edx\r | |
555 | cmp dword ptr [esp+48],0\r | |
556 | jne L_wrap_around_window\r | |
557 | \r | |
558 | sub eax,ecx\r | |
559 | add esi,eax\r | |
560 | ; 749 "inffast.S"\r | |
561 | mov eax, [esp+24]\r | |
562 | cmp eax,ecx\r | |
563 | jbe L_do_copy1\r | |
564 | \r | |
565 | sub eax,ecx\r | |
566 | rep movsb\r | |
567 | mov esi,edi\r | |
568 | sub esi,edx\r | |
569 | jmp L_do_copy1\r | |
570 | \r | |
571 | cmp eax,ecx\r | |
572 | jbe L_do_copy1\r | |
573 | \r | |
574 | sub eax,ecx\r | |
575 | rep movsb\r | |
576 | mov esi,edi\r | |
577 | sub esi,edx\r | |
578 | jmp L_do_copy1\r | |
579 | \r | |
580 | L_wrap_around_window:\r | |
581 | ; 793 "inffast.S"\r | |
582 | mov eax, [esp+48]\r | |
583 | cmp ecx,eax\r | |
584 | jbe L_contiguous_in_window\r | |
585 | \r | |
586 | add esi, [esp+52]\r | |
587 | add esi,eax\r | |
588 | sub esi,ecx\r | |
589 | sub ecx,eax\r | |
590 | \r | |
591 | \r | |
592 | mov eax, [esp+24]\r | |
593 | cmp eax,ecx\r | |
594 | jbe L_do_copy1\r | |
595 | \r | |
596 | sub eax,ecx\r | |
597 | rep movsb\r | |
598 | mov esi, [esp+56]\r | |
599 | mov ecx, [esp+48]\r | |
600 | cmp eax,ecx\r | |
601 | jbe L_do_copy1\r | |
602 | \r | |
603 | sub eax,ecx\r | |
604 | rep movsb\r | |
605 | mov esi,edi\r | |
606 | sub esi,edx\r | |
607 | jmp L_do_copy1\r | |
608 | \r | |
609 | L_contiguous_in_window:\r | |
610 | ; 836 "inffast.S"\r | |
611 | add esi,eax\r | |
612 | sub esi,ecx\r | |
613 | \r | |
614 | \r | |
615 | mov eax, [esp+24]\r | |
616 | cmp eax,ecx\r | |
617 | jbe L_do_copy1\r | |
618 | \r | |
619 | sub eax,ecx\r | |
620 | rep movsb\r | |
621 | mov esi,edi\r | |
622 | sub esi,edx\r | |
623 | \r | |
624 | L_do_copy1:\r | |
625 | ; 862 "inffast.S"\r | |
626 | mov ecx,eax\r | |
627 | rep movsb\r | |
628 | \r | |
629 | mov esi, [esp+44]\r | |
630 | jmp L_while_test\r | |
631 | ; 878 "inffast.S"\r | |
632 | ALIGN 4\r | |
633 | L_init_mmx:\r | |
634 | emms\r | |
635 | \r | |
636 | \r | |
637 | \r | |
638 | \r | |
639 | \r | |
640 | movd mm0,ebp\r | |
641 | mov ebp,ebx\r | |
642 | ; 896 "inffast.S"\r | |
643 | movd mm4,dword ptr [esp+0]\r | |
644 | movq mm3,mm4\r | |
645 | movd mm5,dword ptr [esp+4]\r | |
646 | movq mm2,mm5\r | |
647 | pxor mm1,mm1\r | |
648 | mov ebx, [esp+8]\r | |
649 | jmp L_do_loop_mmx\r | |
650 | \r | |
651 | ALIGN 4\r | |
652 | L_do_loop_mmx:\r | |
653 | psrlq mm0,mm1\r | |
654 | \r | |
655 | cmp ebp,32\r | |
656 | ja L_get_length_code_mmx\r | |
657 | \r | |
658 | movd mm6,ebp\r | |
659 | movd mm7,dword ptr [esi]\r | |
660 | add esi,4\r | |
661 | psllq mm7,mm6\r | |
662 | add ebp,32\r | |
663 | por mm0,mm7\r | |
664 | \r | |
665 | L_get_length_code_mmx:\r | |
666 | pand mm4,mm0\r | |
667 | movd eax,mm4\r | |
668 | movq mm4,mm3\r | |
669 | mov eax, [ebx+eax*4]\r | |
670 | \r | |
671 | L_dolen_mmx:\r | |
672 | movzx ecx,ah\r | |
673 | movd mm1,ecx\r | |
674 | sub ebp,ecx\r | |
675 | \r | |
676 | test al,al\r | |
677 | jnz L_test_for_length_base_mmx\r | |
678 | \r | |
679 | shr eax,16\r | |
680 | stosb\r | |
681 | \r | |
682 | L_while_test_mmx:\r | |
683 | \r | |
684 | \r | |
685 | cmp [esp+16],edi\r | |
686 | jbe L_break_loop\r | |
687 | \r | |
688 | cmp [esp+20],esi\r | |
689 | ja L_do_loop_mmx\r | |
690 | jmp L_break_loop\r | |
691 | \r | |
692 | L_test_for_length_base_mmx:\r | |
693 | \r | |
694 | mov edx,eax\r | |
695 | shr edx,16\r | |
696 | \r | |
697 | test al,16\r | |
698 | jz L_test_for_second_level_length_mmx\r | |
699 | and eax,15\r | |
700 | jz L_decode_distance_mmx\r | |
701 | \r | |
702 | psrlq mm0,mm1\r | |
703 | movd mm1,eax\r | |
704 | movd ecx,mm0\r | |
705 | sub ebp,eax\r | |
706 | and ecx, [inflate_fast_mask+eax*4]\r | |
707 | add edx,ecx\r | |
708 | \r | |
709 | L_decode_distance_mmx:\r | |
710 | psrlq mm0,mm1\r | |
711 | \r | |
712 | cmp ebp,32\r | |
713 | ja L_get_dist_code_mmx\r | |
714 | \r | |
715 | movd mm6,ebp\r | |
716 | movd mm7,dword ptr [esi]\r | |
717 | add esi,4\r | |
718 | psllq mm7,mm6\r | |
719 | add ebp,32\r | |
720 | por mm0,mm7\r | |
721 | \r | |
722 | L_get_dist_code_mmx:\r | |
723 | mov ebx, [esp+12]\r | |
724 | pand mm5,mm0\r | |
725 | movd eax,mm5\r | |
726 | movq mm5,mm2\r | |
727 | mov eax, [ebx+eax*4]\r | |
728 | \r | |
729 | L_dodist_mmx:\r | |
730 | \r | |
731 | movzx ecx,ah\r | |
732 | mov ebx,eax\r | |
733 | shr ebx,16\r | |
734 | sub ebp,ecx\r | |
735 | movd mm1,ecx\r | |
736 | \r | |
737 | test al,16\r | |
738 | jz L_test_for_second_level_dist_mmx\r | |
739 | and eax,15\r | |
740 | jz L_check_dist_one_mmx\r | |
741 | \r | |
742 | L_add_bits_to_dist_mmx:\r | |
743 | psrlq mm0,mm1\r | |
744 | movd mm1,eax\r | |
745 | movd ecx,mm0\r | |
746 | sub ebp,eax\r | |
747 | and ecx, [inflate_fast_mask+eax*4]\r | |
748 | add ebx,ecx\r | |
749 | \r | |
750 | L_check_window_mmx:\r | |
751 | mov [esp+44],esi\r | |
752 | mov eax,edi\r | |
753 | sub eax, [esp+40]\r | |
754 | \r | |
755 | cmp eax,ebx\r | |
756 | jb L_clip_window_mmx\r | |
757 | \r | |
758 | mov ecx,edx\r | |
759 | mov esi,edi\r | |
760 | sub esi,ebx\r | |
761 | \r | |
762 | sub ecx,3\r | |
763 | mov al, [esi]\r | |
764 | mov [edi],al\r | |
765 | mov al, [esi+1]\r | |
766 | mov dl, [esi+2]\r | |
767 | add esi,3\r | |
768 | mov [edi+1],al\r | |
769 | mov [edi+2],dl\r | |
770 | add edi,3\r | |
771 | rep movsb\r | |
772 | \r | |
773 | mov esi, [esp+44]\r | |
774 | mov ebx, [esp+8]\r | |
775 | jmp L_while_test_mmx\r | |
776 | \r | |
777 | ALIGN 4\r | |
778 | L_check_dist_one_mmx:\r | |
779 | cmp ebx,1\r | |
780 | jne L_check_window_mmx\r | |
781 | cmp [esp+40],edi\r | |
782 | je L_check_window_mmx\r | |
783 | \r | |
784 | dec edi\r | |
785 | mov ecx,edx\r | |
786 | mov al, [edi]\r | |
787 | sub ecx,3\r | |
788 | \r | |
789 | mov [edi+1],al\r | |
790 | mov [edi+2],al\r | |
791 | mov [edi+3],al\r | |
792 | add edi,4\r | |
793 | rep stosb\r | |
794 | \r | |
795 | mov ebx, [esp+8]\r | |
796 | jmp L_while_test_mmx\r | |
797 | \r | |
798 | ALIGN 4\r | |
799 | L_test_for_second_level_length_mmx:\r | |
800 | test al,64\r | |
801 | jnz L_test_for_end_of_block\r | |
802 | \r | |
803 | and eax,15\r | |
804 | psrlq mm0,mm1\r | |
805 | movd ecx,mm0\r | |
806 | and ecx, [inflate_fast_mask+eax*4]\r | |
807 | add ecx,edx\r | |
808 | mov eax, [ebx+ecx*4]\r | |
809 | jmp L_dolen_mmx\r | |
810 | \r | |
811 | ALIGN 4\r | |
812 | L_test_for_second_level_dist_mmx:\r | |
813 | test al,64\r | |
814 | jnz L_invalid_distance_code\r | |
815 | \r | |
816 | and eax,15\r | |
817 | psrlq mm0,mm1\r | |
818 | movd ecx,mm0\r | |
819 | and ecx, [inflate_fast_mask+eax*4]\r | |
820 | mov eax, [esp+12]\r | |
821 | add ecx,ebx\r | |
822 | mov eax, [eax+ecx*4]\r | |
823 | jmp L_dodist_mmx\r | |
824 | \r | |
825 | ALIGN 4\r | |
826 | L_clip_window_mmx:\r | |
827 | \r | |
828 | mov ecx,eax\r | |
829 | mov eax, [esp+52]\r | |
830 | neg ecx\r | |
831 | mov esi, [esp+56]\r | |
832 | \r | |
833 | cmp eax,ebx\r | |
834 | jb L_invalid_distance_too_far\r | |
835 | \r | |
836 | add ecx,ebx\r | |
837 | cmp dword ptr [esp+48],0\r | |
838 | jne L_wrap_around_window_mmx\r | |
839 | \r | |
840 | sub eax,ecx\r | |
841 | add esi,eax\r | |
842 | \r | |
843 | cmp edx,ecx\r | |
844 | jbe L_do_copy1_mmx\r | |
845 | \r | |
846 | sub edx,ecx\r | |
847 | rep movsb\r | |
848 | mov esi,edi\r | |
849 | sub esi,ebx\r | |
850 | jmp L_do_copy1_mmx\r | |
851 | \r | |
852 | cmp edx,ecx\r | |
853 | jbe L_do_copy1_mmx\r | |
854 | \r | |
855 | sub edx,ecx\r | |
856 | rep movsb\r | |
857 | mov esi,edi\r | |
858 | sub esi,ebx\r | |
859 | jmp L_do_copy1_mmx\r | |
860 | \r | |
861 | L_wrap_around_window_mmx:\r | |
862 | \r | |
863 | mov eax, [esp+48]\r | |
864 | cmp ecx,eax\r | |
865 | jbe L_contiguous_in_window_mmx\r | |
866 | \r | |
867 | add esi, [esp+52]\r | |
868 | add esi,eax\r | |
869 | sub esi,ecx\r | |
870 | sub ecx,eax\r | |
871 | \r | |
872 | \r | |
873 | cmp edx,ecx\r | |
874 | jbe L_do_copy1_mmx\r | |
875 | \r | |
876 | sub edx,ecx\r | |
877 | rep movsb\r | |
878 | mov esi, [esp+56]\r | |
879 | mov ecx, [esp+48]\r | |
880 | cmp edx,ecx\r | |
881 | jbe L_do_copy1_mmx\r | |
882 | \r | |
883 | sub edx,ecx\r | |
884 | rep movsb\r | |
885 | mov esi,edi\r | |
886 | sub esi,ebx\r | |
887 | jmp L_do_copy1_mmx\r | |
888 | \r | |
889 | L_contiguous_in_window_mmx:\r | |
890 | \r | |
891 | add esi,eax\r | |
892 | sub esi,ecx\r | |
893 | \r | |
894 | \r | |
895 | cmp edx,ecx\r | |
896 | jbe L_do_copy1_mmx\r | |
897 | \r | |
898 | sub edx,ecx\r | |
899 | rep movsb\r | |
900 | mov esi,edi\r | |
901 | sub esi,ebx\r | |
902 | \r | |
903 | L_do_copy1_mmx:\r | |
904 | \r | |
905 | \r | |
906 | mov ecx,edx\r | |
907 | rep movsb\r | |
908 | \r | |
909 | mov esi, [esp+44]\r | |
910 | mov ebx, [esp+8]\r | |
911 | jmp L_while_test_mmx\r | |
912 | ; 1174 "inffast.S"\r | |
913 | L_invalid_distance_code:\r | |
914 | \r | |
915 | \r | |
916 | \r | |
917 | \r | |
918 | \r | |
919 | mov ecx, invalid_distance_code_msg\r | |
920 | mov edx,INFLATE_MODE_BAD\r | |
921 | jmp L_update_stream_state\r | |
922 | \r | |
923 | L_test_for_end_of_block:\r | |
924 | \r | |
925 | \r | |
926 | \r | |
927 | \r | |
928 | \r | |
929 | test al,32\r | |
930 | jz L_invalid_literal_length_code\r | |
931 | \r | |
932 | mov ecx,0\r | |
933 | mov edx,INFLATE_MODE_TYPE\r | |
934 | jmp L_update_stream_state\r | |
935 | \r | |
936 | L_invalid_literal_length_code:\r | |
937 | \r | |
938 | \r | |
939 | \r | |
940 | \r | |
941 | \r | |
942 | mov ecx, invalid_literal_length_code_msg\r | |
943 | mov edx,INFLATE_MODE_BAD\r | |
944 | jmp L_update_stream_state\r | |
945 | \r | |
946 | L_invalid_distance_too_far:\r | |
947 | \r | |
948 | \r | |
949 | \r | |
950 | mov esi, [esp+44]\r | |
951 | mov ecx, invalid_distance_too_far_msg\r | |
952 | mov edx,INFLATE_MODE_BAD\r | |
953 | jmp L_update_stream_state\r | |
954 | \r | |
955 | L_update_stream_state:\r | |
956 | \r | |
957 | mov eax, [esp+88]\r | |
958 | test ecx,ecx\r | |
959 | jz L_skip_msg\r | |
960 | mov [eax+24],ecx\r | |
961 | L_skip_msg:\r | |
962 | mov eax, [eax+28]\r | |
963 | mov [eax+mode_state],edx\r | |
964 | jmp L_break_loop\r | |
965 | \r | |
966 | ALIGN 4\r | |
967 | L_break_loop:\r | |
968 | ; 1243 "inffast.S"\r | |
969 | cmp dword ptr [inflate_fast_use_mmx],2\r | |
970 | jne L_update_next_in\r | |
971 | \r | |
972 | \r | |
973 | \r | |
974 | mov ebx,ebp\r | |
975 | \r | |
976 | L_update_next_in:\r | |
977 | ; 1266 "inffast.S"\r | |
978 | mov eax, [esp+88]\r | |
979 | mov ecx,ebx\r | |
980 | mov edx, [eax+28]\r | |
981 | shr ecx,3\r | |
982 | sub esi,ecx\r | |
983 | shl ecx,3\r | |
984 | sub ebx,ecx\r | |
985 | mov [eax+12],edi\r | |
986 | mov [edx+bits_state],ebx\r | |
987 | mov ecx,ebx\r | |
988 | \r | |
989 | lea ebx, [esp+28]\r | |
990 | cmp [esp+20],ebx\r | |
991 | jne L_buf_not_used\r | |
992 | \r | |
993 | sub esi,ebx\r | |
994 | mov ebx, [eax+0]\r | |
995 | mov [esp+20],ebx\r | |
996 | add esi,ebx\r | |
997 | mov ebx, [eax+4]\r | |
998 | sub ebx,11\r | |
999 | add [esp+20],ebx\r | |
1000 | \r | |
1001 | L_buf_not_used:\r | |
1002 | mov [eax+0],esi\r | |
1003 | \r | |
1004 | mov ebx,1\r | |
1005 | shl ebx,cl\r | |
1006 | dec ebx\r | |
1007 | \r | |
1008 | \r | |
1009 | \r | |
1010 | \r | |
1011 | \r | |
1012 | cmp dword ptr [inflate_fast_use_mmx],2\r | |
1013 | jne L_update_hold\r | |
1014 | \r | |
1015 | \r | |
1016 | \r | |
1017 | psrlq mm0,mm1\r | |
1018 | movd ebp,mm0\r | |
1019 | \r | |
1020 | emms\r | |
1021 | \r | |
1022 | L_update_hold:\r | |
1023 | \r | |
1024 | \r | |
1025 | \r | |
1026 | and ebp,ebx\r | |
1027 | mov [edx+hold_state],ebp\r | |
1028 | \r | |
1029 | \r | |
1030 | \r | |
1031 | \r | |
1032 | mov ebx, [esp+20]\r | |
1033 | cmp ebx,esi\r | |
1034 | jbe L_last_is_smaller\r | |
1035 | \r | |
1036 | sub ebx,esi\r | |
1037 | add ebx,11\r | |
1038 | mov [eax+4],ebx\r | |
1039 | jmp L_fixup_out\r | |
1040 | L_last_is_smaller:\r | |
1041 | sub esi,ebx\r | |
1042 | neg esi\r | |
1043 | add esi,11\r | |
1044 | mov [eax+4],esi\r | |
1045 | \r | |
1046 | \r | |
1047 | \r | |
1048 | \r | |
1049 | L_fixup_out:\r | |
1050 | \r | |
1051 | mov ebx, [esp+16]\r | |
1052 | cmp ebx,edi\r | |
1053 | jbe L_end_is_smaller\r | |
1054 | \r | |
1055 | sub ebx,edi\r | |
1056 | add ebx,257\r | |
1057 | mov [eax+16],ebx\r | |
1058 | jmp L_done\r | |
1059 | L_end_is_smaller:\r | |
1060 | sub edi,ebx\r | |
1061 | neg edi\r | |
1062 | add edi,257\r | |
1063 | mov [eax+16],edi\r | |
1064 | \r | |
1065 | \r | |
1066 | \r | |
1067 | \r | |
1068 | \r | |
1069 | L_done:\r | |
1070 | add esp,64\r | |
1071 | popfd\r | |
1072 | pop ebx\r | |
1073 | pop ebp\r | |
1074 | pop esi\r | |
1075 | pop edi\r | |
1076 | ret\r | |
1077 | _inflate_fast endp\r | |
1078 | \r | |
1079 | _TEXT ends\r | |
1080 | end\r |