Fixed all remaining warnings in 64-bit build
[Glide64.git] / TexLoad4b.h
blobc0fbc6f4a8ebe02fc0062bead52f8734bf902c95
1 /*
2 * Glide64 - Glide video plugin for Nintendo 64 emulators.
3 * Copyright (c) 2002 Dave2001
4 * Copyright (c) 2008 Günther <guenther.emu@freenet.de>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 //****************************************************************
23 // Glide64 - Glide Plugin for Nintendo 64 emulators (tested mostly with Project64)
24 // Project started on December 29th, 2001
26 // To modify Glide64:
27 // * Write your name and (optional)email, commented by your work, so I know who did it, and so that you can find which parts you modified when it comes time to send it to me.
28 // * Do NOT send me the whole project or file that you modified. Take out your modified code sections, and tell me where to put them. If people sent the whole thing, I would have many different versions, but no idea how to combine them all.
30 // Official Glide64 development channel: #Glide64 on EFnet
32 // Original author: Dave2001 (Dave2999@hotmail.com)
33 // Other authors: Gonetz, Gugaman
35 //****************************************************************
37 //****************************************************************
38 // Size: 0, Format: 2
40 DWORD Load4bCI (unsigned char * dst, unsigned char * src, int wid_64, int height, int line, int real_width, int tile)
42 if (wid_64 < 1) wid_64 = 1;
43 if (height < 1) height = 1;
44 int ext = (real_width - (wid_64 << 4)) << 1;
45 unsigned short * pal = (rdp.pal_8 + (rdp.tiles[tile].palette << 4));
46 if (rdp.tlut_mode == 2)
48 #ifndef GCC
49 __asm {
50 mov ebx,dword ptr [pal]
52 mov esi,dword ptr [src]
53 mov edi,dword ptr [dst]
55 mov ecx,dword ptr [height]
56 y_loop:
57 push ecx
59 mov ecx,dword ptr [wid_64]
60 x_loop:
61 push ecx
63 mov eax,dword ptr [esi] // read all 8 pixels
64 bswap eax
65 add esi,4
66 mov edx,eax
68 // 1st dword output {
69 shr eax,23
70 and eax,0x1E
71 mov cx,word ptr [ebx+eax]
72 ror cx,1
73 shl ecx,16
75 mov eax,edx
76 shr eax,27
77 and eax,0x1E
78 mov cx,word ptr [ebx+eax]
79 ror cx,1
81 mov dword ptr [edi],ecx
82 add edi,4
83 // }
85 // 2nd dword output {
86 mov eax,edx
87 shr eax,15
88 and eax,0x1E
89 mov cx,word ptr [ebx+eax]
90 ror cx,1
91 shl ecx,16
93 mov eax,edx
94 shr eax,19
95 and eax,0x1E
96 mov cx,word ptr [ebx+eax]
97 ror cx,1
99 mov dword ptr [edi],ecx
100 add edi,4
101 // }
103 // 3rd dword output {
104 mov eax,edx
105 shr eax,7
106 and eax,0x1E
107 mov cx,word ptr [ebx+eax]
108 ror cx,1
109 shl ecx,16
111 mov eax,edx
112 shr eax,11
113 and eax,0x1E
114 mov cx,word ptr [ebx+eax]
115 ror cx,1
117 mov dword ptr [edi],ecx
118 add edi,4
119 // }
121 // 4th dword output {
122 mov eax,edx
123 shl eax,1
124 and eax,0x1E
125 mov cx,word ptr [ebx+eax]
126 ror cx,1
127 shl ecx,16
129 shr edx,3
130 and edx,0x1E
131 mov cx,word ptr [ebx+edx]
132 ror cx,1
134 mov dword ptr [edi],ecx
135 add edi,4
136 // }
138 // * copy
139 mov eax,dword ptr [esi] // read all 8 pixels
140 bswap eax
141 add esi,4
142 mov edx,eax
144 // 1st dword output {
145 shr eax,23
146 and eax,0x1E
147 mov cx,word ptr [ebx+eax]
148 ror cx,1
149 shl ecx,16
151 mov eax,edx
152 shr eax,27
153 and eax,0x1E
154 mov cx,word ptr [ebx+eax]
155 ror cx,1
157 mov dword ptr [edi],ecx
158 add edi,4
159 // }
161 // 2nd dword output {
162 mov eax,edx
163 shr eax,15
164 and eax,0x1E
165 mov cx,word ptr [ebx+eax]
166 ror cx,1
167 shl ecx,16
169 mov eax,edx
170 shr eax,19
171 and eax,0x1E
172 mov cx,word ptr [ebx+eax]
173 ror cx,1
175 mov dword ptr [edi],ecx
176 add edi,4
177 // }
179 // 3rd dword output {
180 mov eax,edx
181 shr eax,7
182 and eax,0x1E
183 mov cx,word ptr [ebx+eax]
184 ror cx,1
185 shl ecx,16
187 mov eax,edx
188 shr eax,11
189 and eax,0x1E
190 mov cx,word ptr [ebx+eax]
191 ror cx,1
193 mov dword ptr [edi],ecx
194 add edi,4
195 // }
197 // 4th dword output {
198 mov eax,edx
199 shl eax,1
200 and eax,0x1E
201 mov cx,word ptr [ebx+eax]
202 ror cx,1
203 shl ecx,16
205 shr edx,3
206 and edx,0x1E
207 mov cx,word ptr [ebx+edx]
208 ror cx,1
210 mov dword ptr [edi],ecx
211 add edi,4
212 // }
213 // *
215 pop ecx
217 dec ecx
218 jnz x_loop
220 pop ecx
221 dec ecx
222 jz end_y_loop
223 push ecx
225 add esi,dword ptr [line]
226 add edi,dword ptr [ext]
228 mov ecx,dword ptr [wid_64]
229 x_loop_2:
230 push ecx
232 mov eax,dword ptr [esi+4] // read all 8 pixels
233 bswap eax
234 mov edx,eax
236 // 1st dword output {
237 shr eax,23
238 and eax,0x1E
239 mov cx,word ptr [ebx+eax]
240 ror cx,1
241 shl ecx,16
243 mov eax,edx
244 shr eax,27
245 and eax,0x1E
246 mov cx,word ptr [ebx+eax]
247 ror cx,1
249 mov dword ptr [edi],ecx
250 add edi,4
251 // }
253 // 2nd dword output {
254 mov eax,edx
255 shr eax,15
256 and eax,0x1E
257 mov cx,word ptr [ebx+eax]
258 ror cx,1
259 shl ecx,16
261 mov eax,edx
262 shr eax,19
263 and eax,0x1E
264 mov cx,word ptr [ebx+eax]
265 ror cx,1
267 mov dword ptr [edi],ecx
268 add edi,4
269 // }
271 // 3rd dword output {
272 mov eax,edx
273 shr eax,7
274 and eax,0x1E
275 mov cx,word ptr [ebx+eax]
276 ror cx,1
277 shl ecx,16
279 mov eax,edx
280 shr eax,11
281 and eax,0x1E
282 mov cx,word ptr [ebx+eax]
283 ror cx,1
285 mov dword ptr [edi],ecx
286 add edi,4
287 // }
289 // 4th dword output {
290 mov eax,edx
291 shl eax,1
292 and eax,0x1E
293 mov cx,word ptr [ebx+eax]
294 ror cx,1
295 shl ecx,16
297 shr edx,3
298 and edx,0x1E
299 mov cx,word ptr [ebx+edx]
300 ror cx,1
302 mov dword ptr [edi],ecx
303 add edi,4
304 // }
306 // * copy
307 mov eax,dword ptr [esi] // read all 8 pixels
308 bswap eax
309 add esi,8
310 mov edx,eax
312 // 1st dword output {
313 shr eax,23
314 and eax,0x1E
315 mov cx,word ptr [ebx+eax]
316 ror cx,1
317 shl ecx,16
319 mov eax,edx
320 shr eax,27
321 and eax,0x1E
322 mov cx,word ptr [ebx+eax]
323 ror cx,1
325 mov dword ptr [edi],ecx
326 add edi,4
327 // }
329 // 2nd dword output {
330 mov eax,edx
331 shr eax,15
332 and eax,0x1E
333 mov cx,word ptr [ebx+eax]
334 ror cx,1
335 shl ecx,16
337 mov eax,edx
338 shr eax,19
339 and eax,0x1E
340 mov cx,word ptr [ebx+eax]
341 ror cx,1
343 mov dword ptr [edi],ecx
344 add edi,4
345 // }
347 // 3rd dword output {
348 mov eax,edx
349 shr eax,7
350 and eax,0x1E
351 mov cx,word ptr [ebx+eax]
352 ror cx,1
353 shl ecx,16
355 mov eax,edx
356 shr eax,11
357 and eax,0x1E
358 mov cx,word ptr [ebx+eax]
359 ror cx,1
361 mov dword ptr [edi],ecx
362 add edi,4
363 // }
365 // 4th dword output {
366 mov eax,edx
367 shl eax,1
368 and eax,0x1E
369 mov cx,word ptr [ebx+eax]
370 ror cx,1
371 shl ecx,16
373 shr edx,3
374 and edx,0x1E
375 mov cx,word ptr [ebx+edx]
376 ror cx,1
378 mov dword ptr [edi],ecx
379 add edi,4
380 // }
381 // *
383 pop ecx
385 dec ecx
386 jnz x_loop_2
388 add esi,dword ptr [line]
389 add edi,dword ptr [ext]
391 pop ecx
392 dec ecx
393 jnz y_loop
395 end_y_loop:
397 #else // _WIN32
398 //printf("Load4bCI1\n");
399 // This way, gcc generates either a 32 bit or a 64 bit register
400 intptr_t fake_ecx, fake_eax, fake_edx;
401 asm volatile (
402 "y_loop: \n"
403 "push %[c] \n"
405 "mov %[wid_64], %%ecx \n"
406 "x_loop: \n"
407 "push %[c] \n"
409 "mov (%[src]), %%eax \n" // read all 8 pixels
410 "bswap %%eax \n"
411 "add $4, %[src] \n"
412 "mov %%eax, %%edx \n"
414 // 1st dword output {
415 "shr $23, %%eax \n"
416 "and $0x1E, %%eax \n"
417 "mov (%[pal],%[a]), %%cx \n"
418 "ror $1, %%cx \n"
419 "shl $16, %%ecx \n"
421 "mov %%edx, %%eax \n"
422 "shr $27, %%eax \n"
423 "and $0x1E, %%eax \n"
424 "mov (%[pal],%[a]), %%cx \n"
425 "ror $1, %%cx \n"
427 "mov %%ecx, (%[dst]) \n"
428 "add $4, %[dst] \n"
429 // }
431 // 2nd dword output {
432 "mov %%edx, %%eax \n"
433 "shr $15, %%eax \n"
434 "and $0x1E, %%eax \n"
435 "mov (%[pal],%[a]), %%cx \n"
436 "ror $1, %%cx \n"
437 "shl $16, %%ecx \n"
439 "mov %%edx, %%eax \n"
440 "shr $19, %%eax \n"
441 "and $0x1E, %%eax \n"
442 "mov (%[pal],%[a]), %%cx \n"
443 "ror $1, %%cx \n"
445 "mov %%ecx, (%[dst]) \n"
446 "add $4, %[dst] \n"
447 // }
449 // 3rd dword output {
450 "mov %%edx, %%eax \n"
451 "shr $7,%%eax \n"
452 "and $0x1E, %%eax \n"
453 "mov (%[pal],%[a]),%%cx \n"
454 "ror $1,%%cx \n"
455 "shl $16,%%ecx \n"
457 "mov %%edx, %%eax \n"
458 "shr $11, %%eax \n"
459 "and $0x1E, %%eax \n"
460 "mov (%[pal],%[a]), %%cx \n"
461 "ror $1, %%cx \n"
463 "mov %%ecx, (%[dst]) \n"
464 "add $4, %[dst] \n"
465 // }
467 // 4th dword output {
468 "mov %%edx, %%eax \n"
469 "shl $1, %%eax \n"
470 "and $0x1E, %%eax \n"
471 "mov (%[pal],%[a]), %%cx \n"
472 "ror $1, %%cx \n"
473 "shl $16, %%ecx \n"
475 "shr $3, %%edx \n"
476 "and $0x1E, %%edx \n"
477 "mov (%[pal],%[d]), %%cx \n"
478 "ror $1, %%cx \n"
480 "mov %%ecx, (%[dst]) \n"
481 "add $4, %[dst] \n"
482 // }
484 // * copy
485 "mov (%[src]), %%eax \n" // read all 8 pixels
486 "bswap %%eax \n"
487 "add $4, %[src] \n"
488 "mov %%eax, %%edx \n"
490 // 1st dword output {
491 "shr $23, %%eax \n"
492 "and $0x1E, %%eax \n"
493 "mov (%[pal],%[a]), %%cx \n"
494 "ror $1, %%cx \n"
495 "shl $16, %%ecx \n"
497 "mov %%edx, %%eax \n"
498 "shr $27, %%eax \n"
499 "and $0x1E, %%eax \n"
500 "mov (%[pal],%[a]), %%cx \n"
501 "ror $1, %%cx \n"
503 "mov %%ecx, (%[dst]) \n"
504 "add $4, %[dst] \n"
505 // }
507 // 2nd dword output {
508 "mov %%edx, %%eax \n"
509 "shr $15, %%eax \n"
510 "and $0x1E, %%eax \n"
511 "mov (%[pal],%[a]), %%cx \n"
512 "ror $1, %%cx \n"
513 "shl $16, %%ecx \n"
515 "mov %%edx, %%eax \n"
516 "shr $19, %%eax \n"
517 "and $0x1E, %%eax \n"
518 "mov (%[pal],%[a]), %%cx \n"
519 "ror $1, %%cx \n"
521 "mov %%ecx, (%[dst]) \n"
522 "add $4, %[dst] \n"
523 // }
525 // 3rd dword output {
526 "mov %%edx, %%eax \n"
527 "shr $7, %%eax \n"
528 "and $0x1E, %%eax \n"
529 "mov (%[pal],%[a]), %%cx \n"
530 "ror $1, %%cx \n"
531 "shl $16, %%ecx \n"
533 "mov %%edx, %%eax \n"
534 "shr $11, %%eax \n"
535 "and $0x1E, %%eax \n"
536 "mov (%[pal],%[a]), %%cx \n"
537 "ror $1, %%cx \n"
539 "mov %%ecx, (%[dst]) \n"
540 "add $4, %[dst] \n"
541 // }
543 // 4th dword output {
544 "mov %%edx, %%eax \n"
545 "shl $1, %%eax \n"
546 "and $0x1E, %%eax \n"
547 "mov (%[pal],%[a]), %%cx \n"
548 "ror $1, %%cx \n"
549 "shl $16, %%ecx \n"
551 "shr $3, %%edx \n"
552 "and $0x1E, %%edx \n"
553 "mov (%[pal],%[d]), %%cx \n"
554 "ror $1, %%cx \n"
556 "mov %%ecx, (%[dst]) \n"
557 "add $4, %[dst] \n"
558 // }
559 // *
561 "pop %[c] \n"
563 "dec %%ecx \n"
564 "jnz x_loop \n"
566 "pop %[c] \n"
567 "dec %%ecx \n"
568 "jz end_y_loop \n"
569 "push %[c] \n"
571 "add %[line], %[src] \n"
572 "add %[ext], %[dst] \n"
574 "mov %[wid_64], %%ecx \n"
575 "x_loop_2: \n"
576 "push %[c] \n"
578 "mov 4(%[src]), %%eax \n" // read all 8 pixels
579 "bswap %%eax \n"
580 "mov %%eax, %%edx \n"
582 // 1st dword output {
583 "shr $23, %%eax \n"
584 "and $0x1E, %%eax \n"
585 "mov (%[pal],%[a]), %%cx \n"
586 "ror $1, %%cx \n"
587 "shl $16, %%ecx \n"
589 "mov %%edx, %%eax \n"
590 "shr $27, %%eax \n"
591 "and $0x1E, %%eax \n"
592 "mov (%[pal],%[a]), %%cx \n"
593 "ror $1, %%cx \n"
595 "mov %%ecx, (%[dst]) \n"
596 "add $4, %[dst] \n"
597 // }
599 // 2nd dword output {
600 "mov %%edx, %%eax \n"
601 "shr $15, %%eax \n"
602 "and $0x1E, %%eax \n"
603 "mov (%[pal],%[a]), %%cx \n"
604 "ror $1, %%cx \n"
605 "shl $16, %%ecx \n"
607 "mov %%edx, %%eax \n"
608 "shr $19, %%eax \n"
609 "and $0x1E, %%eax \n"
610 "mov (%[pal],%[a]), %%cx \n"
611 "ror $1, %%cx \n"
613 "mov %%ecx, (%[dst]) \n"
614 "add $4, %[dst] \n"
615 // }
617 // 3rd dword output {
618 "mov %%edx, %%eax \n"
619 "shr $7, %%eax \n"
620 "and $0x1E, %%eax \n"
621 "mov (%[pal],%[a]), %%cx \n"
622 "ror $1, %%cx \n"
623 "shl $16, %%ecx \n"
625 "mov %%edx, %%eax \n"
626 "shr $11, %%eax \n"
627 "and $0x1E, %%eax \n"
628 "mov (%[pal],%[a]), %%cx \n"
629 "ror $1, %%cx \n"
631 "mov %%ecx, (%[dst]) \n"
632 "add $4, %[dst] \n"
633 // }
635 // 4th dword output {
636 "mov %%edx, %%eax \n"
637 "shl $1, %%eax \n"
638 "and $0x1E, %%eax \n"
639 "mov (%[pal],%[a]), %%cx \n"
640 "ror $1, %%cx \n"
641 "shl $16, %%ecx \n"
643 "shr $3, %%edx \n"
644 "and $0x1E, %%edx \n"
645 "mov (%[pal],%[d]), %%cx \n"
646 "ror $1, %%cx \n"
648 "mov %%ecx, (%[dst]) \n"
649 "add $4, %[dst] \n"
650 // }
652 // * copy
653 "mov (%[src]), %%eax \n" // read all 8 pixels
654 "bswap %%eax \n"
655 "add $8, %[src] \n"
656 "mov %%eax, %%edx \n"
658 // 1st dword output {
659 "shr $23, %%eax \n"
660 "and $0x1E, %%eax \n"
661 "mov (%[pal],%[a]), %%cx \n"
662 "ror $1, %%cx \n"
663 "shl $16, %%ecx \n"
665 "mov %%edx, %%eax \n"
666 "shr $27, %%eax \n"
667 "and $0x1E, %%eax \n"
668 "mov (%[pal],%[a]), %%cx \n"
669 "ror $1, %%cx \n"
671 "mov %%ecx, (%[dst]) \n"
672 "add $4, %[dst] \n"
673 // }
675 // 2nd dword output {
676 "mov %%edx, %%eax \n"
677 "shr $15, %%eax \n"
678 "and $0x1E, %%eax \n"
679 "mov (%[pal],%[a]), %%cx \n"
680 "ror $1, %%cx \n"
681 "shl $16, %%ecx \n"
683 "mov %%edx, %%eax \n"
684 "shr $19, %%eax \n"
685 "and $0x1E, %%eax \n"
686 "mov (%[pal],%[a]), %%cx \n"
687 "ror $1, %%cx \n"
689 "mov %%ecx, (%[dst]) \n"
690 "add $4, %[dst] \n"
691 // }
693 // 3rd dword output {
694 "mov %%edx, %%eax \n"
695 "shr $7, %%eax \n"
696 "and $0x1E, %%eax \n"
697 "mov (%[pal],%[a]), %%cx \n"
698 "ror $1, %%cx \n"
699 "shl $16, %%ecx \n"
701 "mov %%edx, %%eax \n"
702 "shr $11, %%eax \n"
703 "and $0x1E, %%eax \n"
704 "mov (%[pal],%[a]), %%cx \n"
705 "ror $1, %%cx \n"
707 "mov %%ecx, (%[dst]) \n"
708 "add $4, %[dst] \n"
709 // }
711 // 4th dword output {
712 "mov %%edx, %%eax \n"
713 "shl $1, %%eax \n"
714 "and $0x1E, %%eax \n"
715 "mov (%[pal],%[a]), %%cx \n"
716 "ror $1, %%cx \n"
717 "shl $16, %%ecx \n"
719 "shr $3, %%edx \n"
720 "and $0x1E, %%edx \n"
721 "mov (%[pal],%[d]), %%cx \n"
722 "ror $1, %%cx \n"
724 "mov %%ecx, (%[dst]) \n"
725 "add $4, %[dst] \n"
726 // }
727 // *
729 "pop %[c] \n"
731 "dec %%ecx \n"
732 "jnz x_loop_2 \n"
734 "add %[line], %[src] \n"
735 "add %[ext], %[dst] \n"
737 "pop %[c] \n"
738 "dec %%ecx \n"
739 "jnz y_loop \n"
741 "end_y_loop: \n"
742 : [c] "=&c" (fake_ecx), [a] "=&a" (fake_eax), [d] "=&d" (fake_edx)
743 : [src] "S"(src), [dst] "D"(dst), "[c]"(height),
744 // pal needs to be in a register because its used in mov (%[pal],...), ...
745 [pal] "r" (pal), [wid_64] "g" (wid_64), [line] "g" ((uintptr_t)line), [ext] "g" ((uintptr_t)ext)
746 : "memory", "cc"
748 #endif // _WIN32
750 else
752 #ifndef GCC
753 __asm {
754 mov ebx,dword ptr [pal]
756 mov esi,dword ptr [src]
757 mov edi,dword ptr [dst]
759 mov ecx,dword ptr [height]
760 ia_y_loop:
761 push ecx
763 mov ecx,dword ptr [wid_64]
764 ia_x_loop:
765 push ecx
767 mov eax,dword ptr [esi] // read all 8 pixels
768 bswap eax
769 add esi,4
770 mov edx,eax
772 // 1st dword output {
773 shr eax,23
774 and eax,0x1E
775 mov cx,word ptr [ebx+eax]
776 ror cx,8
777 shl ecx,16
779 mov eax,edx
780 shr eax,27
781 and eax,0x1E
782 mov cx,word ptr [ebx+eax]
783 ror cx,8
785 mov dword ptr [edi],ecx
786 add edi,4
787 // }
789 // 2nd dword output {
790 mov eax,edx
791 shr eax,15
792 and eax,0x1E
793 mov cx,word ptr [ebx+eax]
794 ror cx,8
795 shl ecx,16
797 mov eax,edx
798 shr eax,19
799 and eax,0x1E
800 mov cx,word ptr [ebx+eax]
801 ror cx,8
803 mov dword ptr [edi],ecx
804 add edi,4
805 // }
807 // 3rd dword output {
808 mov eax,edx
809 shr eax,7
810 and eax,0x1E
811 mov cx,word ptr [ebx+eax]
812 ror cx,8
813 shl ecx,16
815 mov eax,edx
816 shr eax,11
817 and eax,0x1E
818 mov cx,word ptr [ebx+eax]
819 ror cx,8
821 mov dword ptr [edi],ecx
822 add edi,4
823 // }
825 // 4th dword output {
826 mov eax,edx
827 shl eax,1
828 and eax,0x1E
829 mov cx,word ptr [ebx+eax]
830 ror cx,8
831 shl ecx,16
833 shr edx,3
834 and edx,0x1E
835 mov cx,word ptr [ebx+edx]
836 ror cx,8
838 mov dword ptr [edi],ecx
839 add edi,4
840 // }
842 // * copy
843 mov eax,dword ptr [esi] // read all 8 pixels
844 bswap eax
845 add esi,4
846 mov edx,eax
848 // 1st dword output {
849 shr eax,23
850 and eax,0x1E
851 mov cx,word ptr [ebx+eax]
852 ror cx,8
853 shl ecx,16
855 mov eax,edx
856 shr eax,27
857 and eax,0x1E
858 mov cx,word ptr [ebx+eax]
859 ror cx,8
861 mov dword ptr [edi],ecx
862 add edi,4
863 // }
865 // 2nd dword output {
866 mov eax,edx
867 shr eax,15
868 and eax,0x1E
869 mov cx,word ptr [ebx+eax]
870 ror cx,8
871 shl ecx,16
873 mov eax,edx
874 shr eax,19
875 and eax,0x1E
876 mov cx,word ptr [ebx+eax]
877 ror cx,8
879 mov dword ptr [edi],ecx
880 add edi,4
881 // }
883 // 3rd dword output {
884 mov eax,edx
885 shr eax,7
886 and eax,0x1E
887 mov cx,word ptr [ebx+eax]
888 ror cx,8
889 shl ecx,16
891 mov eax,edx
892 shr eax,11
893 and eax,0x1E
894 mov cx,word ptr [ebx+eax]
895 ror cx,8
897 mov dword ptr [edi],ecx
898 add edi,4
899 // }
901 // 4th dword output {
902 mov eax,edx
903 shl eax,1
904 and eax,0x1E
905 mov cx,word ptr [ebx+eax]
906 ror cx,8
907 shl ecx,16
909 shr edx,3
910 and edx,0x1E
911 mov cx,word ptr [ebx+edx]
912 ror cx,8
914 mov dword ptr [edi],ecx
915 add edi,4
916 // }
917 // *
919 pop ecx
921 dec ecx
922 jnz ia_x_loop
924 pop ecx
925 dec ecx
926 jz ia_end_y_loop
927 push ecx
929 add esi,dword ptr [line]
930 add edi,dword ptr [ext]
932 mov ecx,dword ptr [wid_64]
933 ia_x_loop_2:
934 push ecx
936 mov eax,dword ptr [esi+4] // read all 8 pixels
937 bswap eax
938 mov edx,eax
940 // 1st dword output {
941 shr eax,23
942 and eax,0x1E
943 mov cx,word ptr [ebx+eax]
944 ror cx,8
945 shl ecx,16
947 mov eax,edx
948 shr eax,27
949 and eax,0x1E
950 mov cx,word ptr [ebx+eax]
951 ror cx,8
953 mov dword ptr [edi],ecx
954 add edi,4
955 // }
957 // 2nd dword output {
958 mov eax,edx
959 shr eax,15
960 and eax,0x1E
961 mov cx,word ptr [ebx+eax]
962 ror cx,8
963 shl ecx,16
965 mov eax,edx
966 shr eax,19
967 and eax,0x1E
968 mov cx,word ptr [ebx+eax]
969 ror cx,8
971 mov dword ptr [edi],ecx
972 add edi,4
973 // }
975 // 3rd dword output {
976 mov eax,edx
977 shr eax,7
978 and eax,0x1E
979 mov cx,word ptr [ebx+eax]
980 ror cx,8
981 shl ecx,16
983 mov eax,edx
984 shr eax,11
985 and eax,0x1E
986 mov cx,word ptr [ebx+eax]
987 ror cx,8
989 mov dword ptr [edi],ecx
990 add edi,4
991 // }
993 // 4th dword output {
994 mov eax,edx
995 shl eax,1
996 and eax,0x1E
997 mov cx,word ptr [ebx+eax]
998 ror cx,8
999 shl ecx,16
1001 shr edx,3
1002 and edx,0x1E
1003 mov cx,word ptr [ebx+edx]
1004 ror cx,8
1006 mov dword ptr [edi],ecx
1007 add edi,4
1008 // }
1010 // * copy
1011 mov eax,dword ptr [esi] // read all 8 pixels
1012 bswap eax
1013 add esi,8
1014 mov edx,eax
1016 // 1st dword output {
1017 shr eax,23
1018 and eax,0x1E
1019 mov cx,word ptr [ebx+eax]
1020 ror cx,8
1021 shl ecx,16
1023 mov eax,edx
1024 shr eax,27
1025 and eax,0x1E
1026 mov cx,word ptr [ebx+eax]
1027 ror cx,8
1029 mov dword ptr [edi],ecx
1030 add edi,4
1031 // }
1033 // 2nd dword output {
1034 mov eax,edx
1035 shr eax,15
1036 and eax,0x1E
1037 mov cx,word ptr [ebx+eax]
1038 ror cx,8
1039 shl ecx,16
1041 mov eax,edx
1042 shr eax,19
1043 and eax,0x1E
1044 mov cx,word ptr [ebx+eax]
1045 ror cx,8
1047 mov dword ptr [edi],ecx
1048 add edi,4
1049 // }
1051 // 3rd dword output {
1052 mov eax,edx
1053 shr eax,7
1054 and eax,0x1E
1055 mov cx,word ptr [ebx+eax]
1056 ror cx,8
1057 shl ecx,16
1059 mov eax,edx
1060 shr eax,11
1061 and eax,0x1E
1062 mov cx,word ptr [ebx+eax]
1063 ror cx,8
1065 mov dword ptr [edi],ecx
1066 add edi,4
1067 // }
1069 // 4th dword output {
1070 mov eax,edx
1071 shl eax,1
1072 and eax,0x1E
1073 mov cx,word ptr [ebx+eax]
1074 ror cx,8
1075 shl ecx,16
1077 shr edx,3
1078 and edx,0x1E
1079 mov cx,word ptr [ebx+edx]
1080 ror cx,8
1082 mov dword ptr [edi],ecx
1083 add edi,4
1084 // }
1085 // *
1087 pop ecx
1089 dec ecx
1090 jnz ia_x_loop_2
1092 add esi,dword ptr [line]
1093 add edi,dword ptr [ext]
1095 pop ecx
1096 dec ecx
1097 jnz ia_y_loop
1099 ia_end_y_loop:
1101 #else // _WIN32
1102 //printf("Load4bCI2\n");
1103 intptr_t fake_ecx, fake_eax, fake_edx;
1104 asm volatile (
1105 "ia_y_loop: \n"
1106 "push %[c] \n"
1108 "mov %[wid_64], %%ecx \n"
1109 "ia_x_loop: \n"
1110 "push %[c] \n"
1112 "mov (%[src]), %%eax \n" // read all 8 pixels
1113 "bswap %%eax \n"
1114 "add $4, %[src] \n"
1115 "mov %%eax, %%edx \n"
1117 // 1st dword output {
1118 "shr $23, %%eax \n"
1119 "and $0x1E, %%eax \n"
1120 "mov (%[pal],%[a]), %%cx \n"
1121 "ror $8, %%cx \n"
1122 "shl $16, %%ecx \n"
1124 "mov %%edx, %%eax \n"
1125 "shr $27, %%eax \n"
1126 "and $0x1E, %%eax \n"
1127 "mov (%[pal],%[a]), %%cx \n"
1128 "ror $8, %%cx \n"
1130 "mov %%ecx, (%[dst]) \n"
1131 "add $4, %[dst] \n"
1132 // }
1134 // 2nd dword output {
1135 "mov %%edx, %%eax \n"
1136 "shr $15, %%eax \n"
1137 "and $0x1E, %%eax \n"
1138 "mov (%[pal],%[a]), %%cx \n"
1139 "ror $8, %%cx \n"
1140 "shl $16, %%ecx \n"
1142 "mov %%edx, %%eax \n"
1143 "shr $19, %%eax \n"
1144 "and $0x1E, %%eax \n"
1145 "mov (%[pal],%[a]), %%cx \n"
1146 "ror $8, %%cx \n"
1148 "mov %%ecx, (%[dst]) \n"
1149 "add $4, %[dst] \n"
1150 // }
1152 // 3rd dword output {
1153 "mov %%edx, %%eax \n"
1154 "shr $7, %%eax \n"
1155 "and $0x1E, %%eax \n"
1156 "mov (%[pal],%[a]), %%cx \n"
1157 "ror $8, %%cx \n"
1158 "shl $16, %%ecx \n"
1160 "mov %%edx, %%eax \n"
1161 "shr $11, %%eax \n"
1162 "and $0x1E, %%eax \n"
1163 "mov (%[pal],%[a]), %%cx \n"
1164 "ror $8, %%cx \n"
1166 "mov %%ecx, (%[dst]) \n"
1167 "add $4, %[dst] \n"
1168 // }
1170 // 4th dword output {
1171 "mov %%edx, %%eax \n"
1172 "shl $1, %%eax \n"
1173 "and $0x1E, %%eax \n"
1174 "mov (%[pal],%[a]), %%cx \n"
1175 "ror $8, %%cx \n"
1176 "shl $16, %%ecx \n"
1178 "shr $3, %%edx \n"
1179 "and $0x1E, %%edx \n"
1180 "mov (%[pal],%[d]), %%cx \n"
1181 "ror $8, %%cx \n"
1183 "mov %%ecx, (%[dst]) \n"
1184 "add $4, %[dst] \n"
1185 // }
1187 // * copy
1188 "mov (%[src]), %%eax \n" // read all 8 pixels
1189 "bswap %%eax \n"
1190 "add $4, %[src] \n"
1191 "mov %%eax, %%edx \n"
1193 // 1st dword output {
1194 "shr $23, %%eax \n"
1195 "and $0x1E, %%eax \n"
1196 "mov (%[pal],%[a]), %%cx \n"
1197 "ror $8, %%cx \n"
1198 "shl $16, %%ecx \n"
1200 "mov %%edx, %%eax \n"
1201 "shr $27, %%eax \n"
1202 "and $0x1E, %%eax \n"
1203 "mov (%[pal],%[a]), %%cx \n"
1204 "ror $8, %%cx \n"
1206 "mov %%ecx, (%[dst]) \n"
1207 "add $4, %[dst] \n"
1208 // }
1210 // 2nd dword output {
1211 "mov %%edx, %%eax \n"
1212 "shr $15, %%eax \n"
1213 "and $0x1E, %%eax \n"
1214 "mov (%[pal],%[a]), %%cx \n"
1215 "ror $8, %%cx \n"
1216 "shl $16, %%ecx \n"
1218 "mov %%edx, %%eax \n"
1219 "shr $19, %%eax \n"
1220 "and $0x1E, %%eax \n"
1221 "mov (%[pal],%[a]), %%cx \n"
1222 "ror $8, %%cx \n"
1224 "mov %%ecx, (%[dst]) \n"
1225 "add $4, %[dst] \n"
1226 // }
1228 // 3rd dword output {
1229 "mov %%edx, %%eax \n"
1230 "shr $7, %%eax \n"
1231 "and $0x1E, %%eax \n"
1232 "mov (%[pal],%[a]), %%cx \n"
1233 "ror $8,%%cx \n"
1234 "shl $16, %%ecx \n"
1236 "mov %%edx, %%eax \n"
1237 "shr $11, %%eax \n"
1238 "and $0x1E, %%eax \n"
1239 "mov (%[pal],%[a]), %%cx \n"
1240 "ror $8, %%cx \n"
1242 "mov %%ecx, (%[dst]) \n"
1243 "add $4, %[dst] \n"
1244 // }
1246 // 4th dword output {
1247 "mov %%edx, %%eax \n"
1248 "shl $1, %%eax \n"
1249 "and $0x1E, %%eax \n"
1250 "mov (%[pal],%[a]), %%cx \n"
1251 "ror $8, %%cx \n"
1252 "shl $16, %%ecx \n"
1254 "shr $3, %%edx \n"
1255 "and $0x1E, %%edx \n"
1256 "mov (%[pal],%[d]), %%cx \n"
1257 "ror $8, %%cx \n"
1259 "mov %%ecx, (%[dst]) \n"
1260 "add $4, %[dst] \n"
1261 // }
1262 // *
1264 "pop %[c] \n"
1266 "dec %%ecx \n"
1267 "jnz ia_x_loop \n"
1269 "pop %[c] \n"
1270 "dec %%ecx \n"
1271 "jz ia_end_y_loop \n"
1272 "push %[c] \n"
1274 "add %[line], %[src] \n"
1275 "add %[ext], %[dst] \n"
1277 "mov %[wid_64], %%ecx \n"
1278 "ia_x_loop_2: \n"
1279 "push %[c] \n"
1281 "mov 4(%[src]), %%eax \n" // read all 8 pixels
1282 "bswap %%eax \n"
1283 "mov %%eax, %%edx \n"
1285 // 1st dword output {
1286 "shr $23, %%eax \n"
1287 "and $0x1E, %%eax \n"
1288 "mov (%[pal],%[a]), %%cx \n"
1289 "ror $8, %%cx \n"
1290 "shl $16, %%ecx \n"
1292 "mov %%edx, %%eax \n"
1293 "shr $27, %%eax \n"
1294 "and $0x1E, %%eax \n"
1295 "mov (%[pal],%[a]), %%cx \n"
1296 "ror $8, %%cx \n"
1298 "mov %%ecx, (%[dst]) \n"
1299 "add $4, %[dst] \n"
1300 // }
1302 // 2nd dword output {
1303 "mov %%edx, %%eax \n"
1304 "shr $15, %%eax \n"
1305 "and $0x1E, %%eax \n"
1306 "mov (%[pal],%[a]), %%cx \n"
1307 "ror $8, %%cx \n"
1308 "shl $16, %%ecx \n"
1310 "mov %%edx, %%eax \n"
1311 "shr $19, %%eax \n"
1312 "and $0x1E, %%eax \n"
1313 "mov (%[pal],%[a]), %%cx \n"
1314 "ror $8, %%cx \n"
1316 "mov %%ecx, (%[dst]) \n"
1317 "add $4, %[dst] \n"
1318 // }
1320 // 3rd dword output {
1321 "mov %%edx, %%eax \n"
1322 "shr $7, %%eax \n"
1323 "and $0x1E, %%eax \n"
1324 "mov (%[pal],%[a]), %%cx \n"
1325 "ror $8, %%cx \n"
1326 "shl $16, %%ecx \n"
1328 "mov %%edx, %%eax \n"
1329 "shr $11, %%eax \n"
1330 "and $0x1E, %%eax \n"
1331 "mov (%[pal],%[a]), %%cx \n"
1332 "ror $8, %%cx \n"
1334 "mov %%ecx, (%[dst]) \n"
1335 "add $4, %[dst] \n"
1336 // }
1338 // 4th dword output {
1339 "mov %%edx, %%eax \n"
1340 "shl $1, %%eax \n"
1341 "and $0x1E, %%eax \n"
1342 "mov (%[pal],%[a]), %%cx \n"
1343 "ror $8, %%cx \n"
1344 "shl $16, %%ecx \n"
1346 "shr $3, %%edx \n"
1347 "and $0x1E, %%edx \n"
1348 "mov (%[pal],%[d]), %%cx \n"
1349 "ror $8, %%cx \n"
1351 "mov %%ecx, (%[dst]) \n"
1352 "add $4, %[dst] \n"
1353 // }
1355 // * copy
1356 "mov (%[src]), %%eax \n" // read all 8 pixels
1357 "bswap %%eax \n"
1358 "add $8, %[src] \n"
1359 "mov %%eax, %%edx \n"
1361 // 1st dword output {
1362 "shr $23, %%eax \n"
1363 "and $0x1E, %%eax \n"
1364 "mov (%[pal],%[a]), %%cx \n"
1365 "ror $8, %%cx \n"
1366 "shl $16, %%ecx \n"
1368 "mov %%edx, %%eax \n"
1369 "shr $27, %%eax \n"
1370 "and $0x1E, %%eax \n"
1371 "mov (%[pal],%[a]), %%cx \n"
1372 "ror $8, %%cx \n"
1374 "mov %%ecx, (%[dst]) \n"
1375 "add $4, %[dst] \n"
1376 // }
1378 // 2nd dword output {
1379 "mov %%edx, %%eax \n"
1380 "shr $15, %%eax \n"
1381 "and $0x1E, %%eax \n"
1382 "mov (%[pal],%[a]), %%cx \n"
1383 "ror $8, %%cx \n"
1384 "shl $16, %%ecx \n"
1386 "mov %%edx, %%eax \n"
1387 "shr $19, %%eax \n"
1388 "and $0x1E, %%eax \n"
1389 "mov (%[pal],%[a]), %%cx \n"
1390 "ror $8, %%cx \n"
1392 "mov %%ecx, (%[dst]) \n"
1393 "add $4, %[dst] \n"
1394 // }
1396 // 3rd dword output {
1397 "mov %%edx, %%eax \n"
1398 "shr $7, %%eax \n"
1399 "and $0x1E, %%eax \n"
1400 "mov (%[pal],%[a]), %%cx \n"
1401 "ror $8, %%cx \n"
1402 "shl $16, %%ecx \n"
1404 "mov %%edx, %%eax \n"
1405 "shr $11, %%eax \n"
1406 "and $0x1E, %%eax \n"
1407 "mov (%[pal],%[a]), %%cx \n"
1408 "ror $8, %%cx \n"
1410 "mov %%ecx, (%[dst]) \n"
1411 "add $4, %[dst] \n"
1412 // }
1414 // 4th dword output {
1415 "mov %%edx, %%eax \n"
1416 "shl $1, %%eax \n"
1417 "and $0x1E, %%eax \n"
1418 "mov (%[pal],%[a]), %%cx \n"
1419 "ror $8, %%cx \n"
1420 "shl $16, %%ecx \n"
1422 "shr $3, %%edx \n"
1423 "and $0x1E, %%edx \n"
1424 "mov (%[pal],%[d]), %%cx \n"
1425 "ror $8, %%cx \n"
1427 "mov %%ecx, (%[dst]) \n"
1428 "add $4, %[dst] \n"
1429 // }
1430 // *
1432 "pop %[c] \n"
1434 "dec %%ecx \n"
1435 "jnz ia_x_loop_2 \n"
1437 "add %[line], %[src] \n"
1438 "add %[ext], %[dst] \n"
1440 "pop %[c] \n"
1441 "dec %%ecx \n"
1442 "jnz ia_y_loop \n"
1444 "ia_end_y_loop: \n"
1445 : [c] "=&c" (fake_ecx), [a] "=&a" (fake_eax), [d] "=&d" (fake_edx)
1446 : [src]"S"(src), [dst]"D"(dst), "[c]"(height),
1447 // pal needs to be in a register because its used in mov (%[pal],...), ...
1448 [pal] "r" (pal), [wid_64] "g" (wid_64), [line] "g" ((uintptr_t)line), [ext] "g" ((uintptr_t)ext)
1449 : "memory", "cc"
1451 #endif // _WIN32
1452 return (1 << 16) | GR_TEXFMT_ALPHA_INTENSITY_88;
1455 return (1 << 16) | GR_TEXFMT_ARGB_1555;
1458 //****************************************************************
1459 // Size: 0, Format: 3
1461 // ** BY GUGAMAN **
1463 DWORD Load4bIA (unsigned char * dst, unsigned char * src, int wid_64, int height, int line, int real_width, int tile)
1465 if (rdp.tlut_mode != 0)
1466 return Load4bCI (dst, src, wid_64, height, line, real_width, tile);
1468 if (wid_64 < 1) wid_64 = 1;
1469 if (height < 1) height = 1;
1470 int ext = (real_width - (wid_64 << 4));
1471 #ifndef GCC
1472 __asm {
1473 mov esi,dword ptr [src]
1474 mov edi,dword ptr [dst]
1476 mov ecx,dword ptr [height]
1477 y_loop:
1478 push ecx
1480 mov ecx,dword ptr [wid_64]
1481 x_loop:
1482 push ecx
1484 mov eax,dword ptr [esi] // read all 8 pixels
1485 bswap eax
1486 add esi,4
1487 mov edx,eax
1489 // 1st dword {
1490 xor ecx,ecx
1492 // pixel #1
1493 // IIIAxxxxxxxxxxxxxxxxxxxxxxxxxxxx
1494 // xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII
1495 mov eax,edx
1496 shr eax,24 //Alpha
1497 and eax,0x00000010
1498 or ecx,eax
1499 shl eax,1
1500 or ecx,eax
1501 shl eax,1
1502 or ecx,eax
1503 shl eax,1
1504 or ecx,eax
1505 mov eax,edx
1506 shr eax,28 // Intensity
1507 and eax,0x0000000E
1508 or ecx,eax
1509 shr eax,3
1510 or ecx,eax
1512 // pixel #2
1513 // xxxxIIIAxxxxxxxxxxxxxxxxxxxxxxxx
1514 // xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx
1515 mov eax,edx
1516 shr eax,12 //Alpha
1517 and eax,0x00001000
1518 or ecx,eax
1519 shl eax,1
1520 or ecx,eax
1521 shl eax,1
1522 or ecx,eax
1523 shl eax,1
1524 or ecx,eax
1525 mov eax,edx
1526 shr eax,16 // Intensity
1527 and eax,0x00000E00
1528 or ecx,eax
1529 shr eax,3
1530 and eax,0x00000100
1531 or ecx,eax
1533 // pixel #3
1534 // xxxxxxxxIIIAxxxxxxxxxxxxxxxxxxxx
1535 // xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx
1536 //Alpha
1537 mov eax,edx
1538 and eax,0x00100000
1539 or ecx,eax
1540 shl eax,1
1541 or ecx,eax
1542 shl eax,1
1543 or ecx,eax
1544 shl eax,1
1545 or ecx,eax
1546 mov eax,edx
1547 shr eax,4 // Intensity
1548 and eax,0x000E0000
1549 or ecx,eax
1550 shr eax,3
1551 and eax,0x00010000
1552 or ecx,eax
1554 // pixel #4
1555 // xxxxxxxxxxxxIIIAxxxxxxxxxxxxxxxx
1556 // AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx
1557 mov eax,edx
1558 shl eax,12 //Alpha
1559 and eax,0x10000000
1560 or ecx,eax
1561 shl eax,1
1562 or ecx,eax
1563 shl eax,1
1564 or ecx,eax
1565 shl eax,1
1566 or ecx,eax
1567 mov eax,edx
1568 shl eax,8 // Intensity
1569 and eax,0x0E000000
1570 or ecx,eax
1571 shr eax,3
1572 and eax,0x01000000
1573 or ecx,eax
1576 mov dword ptr [edi],ecx
1577 add edi,4
1578 // }
1580 // 2nd dword {
1581 xor ecx,ecx
1583 // pixel #5
1584 // xxxxxxxxxxxxxxxxIIIAxxxxxxxxxxxx
1585 // xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII
1586 mov eax,edx
1587 shr eax,8 //Alpha
1588 and eax,0x00000010
1589 or ecx,eax
1590 shl eax,1
1591 or ecx,eax
1592 shl eax,1
1593 or ecx,eax
1594 shl eax,1
1595 or ecx,eax
1596 mov eax,edx
1597 shr eax,12 // Intensity
1598 and eax,0x0000000E
1599 or ecx,eax
1600 shr eax,3
1601 or ecx,eax
1603 // pixel #6
1604 // xxxxxxxxxxxxxxxxxxxxIIIAxxxxxxxx
1605 // xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx
1606 //Alpha
1607 mov eax,edx
1608 shl eax,4
1609 and eax,0x00001000
1610 or ecx,eax
1611 shl eax,1
1612 or ecx,eax
1613 shl eax,1
1614 or ecx,eax
1615 shl eax,1
1616 or ecx,eax
1617 mov eax,edx // Intensity
1618 and eax,0x00000E00
1619 or ecx,eax
1620 shr eax,3
1621 and eax,0x00000100
1622 or ecx,eax
1624 // pixel #7
1625 // xxxxxxxxxxxxxxxxxxxxxxxxIIIAxxxx
1626 // xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx
1627 //Alpha
1628 mov eax,edx
1629 shl eax,16
1630 and eax,0x00100000
1631 or ecx,eax
1632 shl eax,1
1633 or ecx,eax
1634 shl eax,1
1635 or ecx,eax
1636 shl eax,1
1637 or ecx,eax
1638 mov eax,edx
1639 shl eax,12 // Intensity
1640 and eax,0x000E0000
1641 or ecx,eax
1642 shr eax,3
1643 and eax,0x00010000
1644 or ecx,eax
1646 // pixel #8
1647 // xxxxxxxxxxxxxxxxxxxxxxxxxxxxIIIA
1648 // AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx
1649 mov eax,edx
1650 shl eax,28 //Alpha
1651 and eax,0x10000000
1652 or ecx,eax
1653 shl eax,1
1654 or ecx,eax
1655 shl eax,1
1656 or ecx,eax
1657 shl eax,1
1658 or ecx,eax
1659 mov eax,edx
1660 shl eax,24 // Intensity
1661 and eax,0x0E000000
1662 or ecx,eax
1663 shr eax,3
1664 and eax,0x01000000
1665 or ecx,eax
1667 mov dword ptr [edi],ecx
1668 add edi,4
1669 // }
1671 // * copy
1672 mov eax,dword ptr [esi] // read all 8 pixels
1673 bswap eax
1674 add esi,4
1675 mov edx,eax
1677 // 1st dword {
1678 xor ecx,ecx
1680 // pixel #1
1681 // IIIAxxxxxxxxxxxxxxxxxxxxxxxxxxxx
1682 // xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII
1683 mov eax,edx
1684 shr eax,24 //Alpha
1685 and eax,0x00000010
1686 or ecx,eax
1687 shl eax,1
1688 or ecx,eax
1689 shl eax,1
1690 or ecx,eax
1691 shl eax,1
1692 or ecx,eax
1693 mov eax,edx
1694 shr eax,28 // Intensity
1695 and eax,0x0000000E
1696 or ecx,eax
1697 shr eax,3
1698 or ecx,eax
1700 // pixel #2
1701 // xxxxIIIAxxxxxxxxxxxxxxxxxxxxxxxx
1702 // xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx
1703 mov eax,edx
1704 shr eax,12 //Alpha
1705 and eax,0x00001000
1706 or ecx,eax
1707 shl eax,1
1708 or ecx,eax
1709 shl eax,1
1710 or ecx,eax
1711 shl eax,1
1712 or ecx,eax
1713 mov eax,edx
1714 shr eax,16 // Intensity
1715 and eax,0x00000E00
1716 or ecx,eax
1717 shr eax,3
1718 and eax,0x00000100
1719 or ecx,eax
1721 // pixel #3
1722 // xxxxxxxxIIIAxxxxxxxxxxxxxxxxxxxx
1723 // xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx
1724 //Alpha
1725 mov eax,edx
1726 and eax,0x00100000
1727 or ecx,eax
1728 shl eax,1
1729 or ecx,eax
1730 shl eax,1
1731 or ecx,eax
1732 shl eax,1
1733 or ecx,eax
1734 mov eax,edx
1735 shr eax,4 // Intensity
1736 and eax,0x000E0000
1737 or ecx,eax
1738 shr eax,3
1739 and eax,0x00010000
1740 or ecx,eax
1742 // pixel #4
1743 // xxxxxxxxxxxxIIIAxxxxxxxxxxxxxxxx
1744 // AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx
1745 mov eax,edx
1746 shl eax,12 //Alpha
1747 and eax,0x10000000
1748 or ecx,eax
1749 shl eax,1
1750 or ecx,eax
1751 shl eax,1
1752 or ecx,eax
1753 shl eax,1
1754 or ecx,eax
1755 mov eax,edx
1756 shl eax,8 // Intensity
1757 and eax,0x0E000000
1758 or ecx,eax
1759 shr eax,3
1760 and eax,0x01000000
1761 or ecx,eax
1764 mov dword ptr [edi],ecx
1765 add edi,4
1766 // }
1768 // 2nd dword {
1769 xor ecx,ecx
1771 // pixel #5
1772 // xxxxxxxxxxxxxxxxIIIAxxxxxxxxxxxx
1773 // xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII
1774 mov eax,edx
1775 shr eax,8 //Alpha
1776 and eax,0x00000010
1777 or ecx,eax
1778 shl eax,1
1779 or ecx,eax
1780 shl eax,1
1781 or ecx,eax
1782 shl eax,1
1783 or ecx,eax
1784 mov eax,edx
1785 shr eax,12 // Intensity
1786 and eax,0x0000000E
1787 or ecx,eax
1788 shr eax,3
1789 or ecx,eax
1791 // pixel #6
1792 // xxxxxxxxxxxxxxxxxxxxIIIAxxxxxxxx
1793 // xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx
1794 //Alpha
1795 mov eax,edx
1796 shl eax,4
1797 and eax,0x00001000
1798 or ecx,eax
1799 shl eax,1
1800 or ecx,eax
1801 shl eax,1
1802 or ecx,eax
1803 shl eax,1
1804 or ecx,eax
1805 mov eax,edx // Intensity
1806 and eax,0x00000E00
1807 or ecx,eax
1808 shr eax,3
1809 and eax,0x00000100
1810 or ecx,eax
1812 // pixel #7
1813 // xxxxxxxxxxxxxxxxxxxxxxxxIIIAxxxx
1814 // xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx
1815 //Alpha
1816 mov eax,edx
1817 shl eax,16
1818 and eax,0x00100000
1819 or ecx,eax
1820 shl eax,1
1821 or ecx,eax
1822 shl eax,1
1823 or ecx,eax
1824 shl eax,1
1825 or ecx,eax
1826 mov eax,edx
1827 shl eax,12 // Intensity
1828 and eax,0x000E0000
1829 or ecx,eax
1830 shr eax,3
1831 and eax,0x00010000
1832 or ecx,eax
1834 // pixel #8
1835 // xxxxxxxxxxxxxxxxxxxxxxxxxxxxIIIA
1836 // AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx
1837 mov eax,edx
1838 shl eax,28 //Alpha
1839 and eax,0x10000000
1840 or ecx,eax
1841 shl eax,1
1842 or ecx,eax
1843 shl eax,1
1844 or ecx,eax
1845 shl eax,1
1846 or ecx,eax
1847 mov eax,edx
1848 shl eax,24 // Intensity
1849 and eax,0x0E000000
1850 or ecx,eax
1851 shr eax,3
1852 and eax,0x01000000
1853 or ecx,eax
1855 mov dword ptr [edi],ecx
1856 add edi,4
1857 // }
1859 // *
1861 pop ecx
1862 dec ecx
1863 jnz x_loop
1865 pop ecx
1866 dec ecx
1867 jz end_y_loop
1868 push ecx
1870 add esi,dword ptr [line]
1871 add edi,dword ptr [ext]
1873 mov ecx,dword ptr [wid_64]
1874 x_loop_2:
1875 push ecx
1877 mov eax,dword ptr [esi+4] // read all 8 pixels
1878 bswap eax
1879 mov edx,eax
1881 // 1st dword {
1882 xor ecx,ecx
1884 // pixel #1
1885 // IIIAxxxxxxxxxxxxxxxxxxxxxxxxxxxx
1886 // xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII
1887 mov eax,edx
1888 shr eax,24 //Alpha
1889 and eax,0x00000010
1890 or ecx,eax
1891 shl eax,1
1892 or ecx,eax
1893 shl eax,1
1894 or ecx,eax
1895 shl eax,1
1896 or ecx,eax
1897 mov eax,edx
1898 shr eax,28 // Intensity
1899 and eax,0x0000000E
1900 or ecx,eax
1901 shr eax,3
1902 or ecx,eax
1904 // pixel #2
1905 // xxxxIIIAxxxxxxxxxxxxxxxxxxxxxxxx
1906 // xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx
1907 mov eax,edx
1908 shr eax,12 //Alpha
1909 and eax,0x00001000
1910 or ecx,eax
1911 shl eax,1
1912 or ecx,eax
1913 shl eax,1
1914 or ecx,eax
1915 shl eax,1
1916 or ecx,eax
1917 mov eax,edx
1918 shr eax,16 // Intensity
1919 and eax,0x00000E00
1920 or ecx,eax
1921 shr eax,3
1922 and eax,0x00000100
1923 or ecx,eax
1925 // pixel #3
1926 // xxxxxxxxIIIAxxxxxxxxxxxxxxxxxxxx
1927 // xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx
1928 //Alpha
1929 mov eax,edx
1930 and eax,0x00100000
1931 or ecx,eax
1932 shl eax,1
1933 or ecx,eax
1934 shl eax,1
1935 or ecx,eax
1936 shl eax,1
1937 or ecx,eax
1938 mov eax,edx
1939 shr eax,4 // Intensity
1940 and eax,0x000E0000
1941 or ecx,eax
1942 shr eax,3
1943 and eax,0x00010000
1944 or ecx,eax
1946 // pixel #4
1947 // xxxxxxxxxxxxIIIAxxxxxxxxxxxxxxxx
1948 // AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx
1949 mov eax,edx
1950 shl eax,12 //Alpha
1951 and eax,0x10000000
1952 or ecx,eax
1953 shl eax,1
1954 or ecx,eax
1955 shl eax,1
1956 or ecx,eax
1957 shl eax,1
1958 or ecx,eax
1959 mov eax,edx
1960 shl eax,8 // Intensity
1961 and eax,0x0E000000
1962 or ecx,eax
1963 shr eax,3
1964 and eax,0x01000000
1965 or ecx,eax
1968 mov dword ptr [edi],ecx
1969 add edi,4
1970 // }
1972 // 2nd dword {
1973 xor ecx,ecx
1975 // pixel #5
1976 // xxxxxxxxxxxxxxxxIIIAxxxxxxxxxxxx
1977 // xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII
1978 mov eax,edx
1979 shr eax,8 //Alpha
1980 and eax,0x00000010
1981 or ecx,eax
1982 shl eax,1
1983 or ecx,eax
1984 shl eax,1
1985 or ecx,eax
1986 shl eax,1
1987 or ecx,eax
1988 mov eax,edx
1989 shr eax,12 // Intensity
1990 and eax,0x0000000E
1991 or ecx,eax
1992 shr eax,3
1993 or ecx,eax
1995 // pixel #6
1996 // xxxxxxxxxxxxxxxxxxxxIIIAxxxxxxxx
1997 // xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx
1998 //Alpha
1999 mov eax,edx
2000 shl eax,4
2001 and eax,0x00001000
2002 or ecx,eax
2003 shl eax,1
2004 or ecx,eax
2005 shl eax,1
2006 or ecx,eax
2007 shl eax,1
2008 or ecx,eax
2009 mov eax,edx // Intensity
2010 and eax,0x00000E00
2011 or ecx,eax
2012 shr eax,3
2013 and eax,0x00000100
2014 or ecx,eax
2016 // pixel #7
2017 // xxxxxxxxxxxxxxxxxxxxxxxxIIIAxxxx
2018 // xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx
2019 //Alpha
2020 mov eax,edx
2021 shl eax,16
2022 and eax,0x00100000
2023 or ecx,eax
2024 shl eax,1
2025 or ecx,eax
2026 shl eax,1
2027 or ecx,eax
2028 shl eax,1
2029 or ecx,eax
2030 mov eax,edx
2031 shl eax,12 // Intensity
2032 and eax,0x000E0000
2033 or ecx,eax
2034 shr eax,3
2035 and eax,0x00010000
2036 or ecx,eax
2038 // pixel #8
2039 // xxxxxxxxxxxxxxxxxxxxxxxxxxxxIIIA
2040 // AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx
2041 mov eax,edx
2042 shl eax,28 //Alpha
2043 and eax,0x10000000
2044 or ecx,eax
2045 shl eax,1
2046 or ecx,eax
2047 shl eax,1
2048 or ecx,eax
2049 shl eax,1
2050 or ecx,eax
2051 mov eax,edx
2052 shl eax,24 // Intensity
2053 and eax,0x0E000000
2054 or ecx,eax
2055 shr eax,3
2056 and eax,0x01000000
2057 or ecx,eax
2059 mov dword ptr [edi],ecx
2060 add edi,4
2061 // }
2063 // * copy
2064 mov eax,dword ptr [esi] // read all 8 pixels
2065 bswap eax
2066 add esi,8
2067 mov edx,eax
2069 // 1st dword {
2070 xor ecx,ecx
2072 // pixel #1
2073 // IIIAxxxxxxxxxxxxxxxxxxxxxxxxxxxx
2074 // xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII
2075 mov eax,edx
2076 shr eax,24 //Alpha
2077 and eax,0x00000010
2078 or ecx,eax
2079 shl eax,1
2080 or ecx,eax
2081 shl eax,1
2082 or ecx,eax
2083 shl eax,1
2084 or ecx,eax
2085 mov eax,edx
2086 shr eax,28 // Intensity
2087 and eax,0x0000000E
2088 or ecx,eax
2089 shr eax,3
2090 or ecx,eax
2092 // pixel #2
2093 // xxxxIIIAxxxxxxxxxxxxxxxxxxxxxxxx
2094 // xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx
2095 mov eax,edx
2096 shr eax,12 //Alpha
2097 and eax,0x00001000
2098 or ecx,eax
2099 shl eax,1
2100 or ecx,eax
2101 shl eax,1
2102 or ecx,eax
2103 shl eax,1
2104 or ecx,eax
2105 mov eax,edx
2106 shr eax,16 // Intensity
2107 and eax,0x00000E00
2108 or ecx,eax
2109 shr eax,3
2110 and eax,0x00000100
2111 or ecx,eax
2113 // pixel #3
2114 // xxxxxxxxIIIAxxxxxxxxxxxxxxxxxxxx
2115 // xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx
2116 //Alpha
2117 mov eax,edx
2118 and eax,0x00100000
2119 or ecx,eax
2120 shl eax,1
2121 or ecx,eax
2122 shl eax,1
2123 or ecx,eax
2124 shl eax,1
2125 or ecx,eax
2126 mov eax,edx
2127 shr eax,4 // Intensity
2128 and eax,0x000E0000
2129 or ecx,eax
2130 shr eax,3
2131 and eax,0x00010000
2132 or ecx,eax
2134 // pixel #4
2135 // xxxxxxxxxxxxIIIAxxxxxxxxxxxxxxxx
2136 // AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx
2137 mov eax,edx
2138 shl eax,12 //Alpha
2139 and eax,0x10000000
2140 or ecx,eax
2141 shl eax,1
2142 or ecx,eax
2143 shl eax,1
2144 or ecx,eax
2145 shl eax,1
2146 or ecx,eax
2147 mov eax,edx
2148 shl eax,8 // Intensity
2149 and eax,0x0E000000
2150 or ecx,eax
2151 shr eax,3
2152 and eax,0x01000000
2153 or ecx,eax
2156 mov dword ptr [edi],ecx
2157 add edi,4
2158 // }
2160 // 2nd dword {
2161 xor ecx,ecx
2163 // pixel #5
2164 // xxxxxxxxxxxxxxxxIIIAxxxxxxxxxxxx
2165 // xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII
2166 mov eax,edx
2167 shr eax,8 //Alpha
2168 and eax,0x00000010
2169 or ecx,eax
2170 shl eax,1
2171 or ecx,eax
2172 shl eax,1
2173 or ecx,eax
2174 shl eax,1
2175 or ecx,eax
2176 mov eax,edx
2177 shr eax,12 // Intensity
2178 and eax,0x0000000E
2179 or ecx,eax
2180 shr eax,3
2181 or ecx,eax
2183 // pixel #6
2184 // xxxxxxxxxxxxxxxxxxxxIIIAxxxxxxxx
2185 // xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx
2186 //Alpha
2187 mov eax,edx
2188 shl eax,4
2189 and eax,0x00001000
2190 or ecx,eax
2191 shl eax,1
2192 or ecx,eax
2193 shl eax,1
2194 or ecx,eax
2195 shl eax,1
2196 or ecx,eax
2197 mov eax,edx // Intensity
2198 and eax,0x00000E00
2199 or ecx,eax
2200 shr eax,3
2201 and eax,0x00000100
2202 or ecx,eax
2204 // pixel #7
2205 // xxxxxxxxxxxxxxxxxxxxxxxxIIIAxxxx
2206 // xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx
2207 //Alpha
2208 mov eax,edx
2209 shl eax,16
2210 and eax,0x00100000
2211 or ecx,eax
2212 shl eax,1
2213 or ecx,eax
2214 shl eax,1
2215 or ecx,eax
2216 shl eax,1
2217 or ecx,eax
2218 mov eax,edx
2219 shl eax,12 // Intensity
2220 and eax,0x000E0000
2221 or ecx,eax
2222 shr eax,3
2223 and eax,0x00010000
2224 or ecx,eax
2226 // pixel #8
2227 // xxxxxxxxxxxxxxxxxxxxxxxxxxxxIIIA
2228 // AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx
2229 mov eax,edx
2230 shl eax,28 //Alpha
2231 and eax,0x10000000
2232 or ecx,eax
2233 shl eax,1
2234 or ecx,eax
2235 shl eax,1
2236 or ecx,eax
2237 shl eax,1
2238 or ecx,eax
2239 mov eax,edx
2240 shl eax,24 // Intensity
2241 and eax,0x0E000000
2242 or ecx,eax
2243 shr eax,3
2244 and eax,0x01000000
2245 or ecx,eax
2247 mov dword ptr [edi],ecx
2248 add edi,4
2249 // }
2250 // *
2252 pop ecx
2253 dec ecx
2254 jnz x_loop_2
2256 add esi,dword ptr [line]
2257 add edi,dword ptr [ext]
2259 pop ecx
2260 dec ecx
2261 jnz y_loop
2263 end_y_loop:
2265 #else // _WIN32
2266 //printf("Load4bIA\n");
2267 intptr_t fake_ecx;
2268 asm volatile (
2269 "y_loop2: \n"
2270 "push %[c] \n"
2272 "mov %[wid_64], %%ecx \n"
2273 "x_loop2: \n"
2274 "push %[c] \n"
2276 "mov (%[src]), %%eax \n" // read all 8 pixels
2277 "bswap %%eax \n"
2278 "add $4, %[src] \n"
2279 "mov %%eax, %%edx \n"
2281 // 1st dword {
2282 "xor %%ecx, %%ecx \n"
2284 // pixel #1
2285 // IIIAxxxxxxxxxxxxxxxxxxxxxxxxxxxx
2286 // xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII
2287 "mov %%edx, %%eax \n"
2288 "shr $24, %%eax \n" //Alpha
2289 "and $0x00000010, %%eax \n"
2290 "or %%eax, %%ecx \n"
2291 "shl $1, %%eax \n"
2292 "or %%eax, %%ecx \n"
2293 "shl $1, %%eax \n"
2294 "or %%eax, %%ecx \n"
2295 "shl $1, %%eax \n"
2296 "or %%eax, %%ecx \n"
2297 "mov %%edx, %%eax \n"
2298 "shr $28, %%eax \n" // Intensity
2299 "and $0x0000000E, %%eax \n"
2300 "or %%eax, %%ecx \n"
2301 "shr $3, %%eax \n"
2302 "or %%eax, %%ecx \n"
2304 // pixel #2
2305 // xxxxIIIAxxxxxxxxxxxxxxxxxxxxxxxx
2306 // xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx
2307 "mov %%edx, %%eax \n"
2308 "shr $12, %%eax \n" //Alpha
2309 "and $0x00001000, %%eax \n"
2310 "or %%eax, %%ecx \n"
2311 "shl $1, %%eax \n"
2312 "or %%eax, %%ecx \n"
2313 "shl $1, %%eax \n"
2314 "or %%eax, %%ecx \n"
2315 "shl $1, %%eax \n"
2316 "or %%eax, %%ecx \n"
2317 "mov %%edx, %%eax \n"
2318 "shr $16, %%eax \n" // Intensity
2319 "and $0x00000E00, %%eax \n"
2320 "or %%eax, %%ecx \n"
2321 "shr $3, %%eax \n"
2322 "and $0x00000100, %%eax \n"
2323 "or %%eax, %%ecx \n"
2325 // pixel #3
2326 // xxxxxxxxIIIAxxxxxxxxxxxxxxxxxxxx
2327 // xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx
2328 //Alpha
2329 "mov %%edx, %%eax \n"
2330 "and $0x00100000, %%eax \n"
2331 "or %%eax, %%ecx \n"
2332 "shl $1, %%eax \n"
2333 "or %%eax, %%ecx \n"
2334 "shl $1, %%eax \n"
2335 "or %%eax, %%ecx \n"
2336 "shl $1, %%eax \n"
2337 "or %%eax, %%ecx \n"
2338 "mov %%edx, %%eax \n"
2339 "shr $4, %%eax \n" // Intensity
2340 "and $0x000E0000, %%eax \n"
2341 "or %%eax, %%ecx \n"
2342 "shr $3, %%eax \n"
2343 "and $0x00010000, %%eax \n"
2344 "or %%eax, %%ecx \n"
2346 // pixel #4
2347 // xxxxxxxxxxxxIIIAxxxxxxxxxxxxxxxx
2348 // AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx
2349 "mov %%edx, %%eax \n"
2350 "shl $12, %%eax \n" //Alpha
2351 "and $0x10000000, %%eax \n"
2352 "or %%eax, %%ecx \n"
2353 "shl $1, %%eax \n"
2354 "or %%eax, %%ecx \n"
2355 "shl $1, %%eax \n"
2356 "or %%eax, %%ecx \n"
2357 "shl $1, %%eax \n"
2358 "or %%eax, %%ecx \n"
2359 "mov %%edx, %%eax \n"
2360 "shl $8, %%eax \n" // Intensity
2361 "and $0x0E000000, %%eax \n"
2362 "or %%eax, %%ecx \n"
2363 "shr $3, %%eax \n"
2364 "and $0x01000000, %%eax \n"
2365 "or %%eax, %%ecx \n"
2368 "mov %%ecx, (%[dst]) \n"
2369 "add $4, %[dst] \n"
2370 // }
2372 // 2nd dword {
2373 "xor %%ecx, %%ecx \n"
2375 // pixel #5
2376 // xxxxxxxxxxxxxxxxIIIAxxxxxxxxxxxx
2377 // xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII
2378 "mov %%edx, %%eax \n"
2379 "shr $8, %%eax \n" //Alpha
2380 "and $0x00000010, %%eax \n"
2381 "or %%eax, %%ecx \n"
2382 "shl $1, %%eax \n"
2383 "or %%eax, %%ecx \n"
2384 "shl $1, %%eax \n"
2385 "or %%eax, %%ecx \n"
2386 "shl $1, %%eax \n"
2387 "or %%eax, %%ecx \n"
2388 "mov %%edx, %%eax \n"
2389 "shr $12, %%eax \n" // Intensity
2390 "and $0x0000000E, %%eax \n"
2391 "or %%eax, %%ecx \n"
2392 "shr $3, %%eax \n"
2393 "or %%eax, %%ecx \n"
2395 // pixel #6
2396 // xxxxxxxxxxxxxxxxxxxxIIIAxxxxxxxx
2397 // xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx
2398 //Alpha
2399 "mov %%edx, %%eax \n"
2400 "shl $4, %%eax \n"
2401 "and $0x00001000, %%eax \n"
2402 "or %%eax, %%ecx \n"
2403 "shl $1, %%eax \n"
2404 "or %%eax, %%ecx \n"
2405 "shl $1, %%eax \n"
2406 "or %%eax, %%ecx \n"
2407 "shl $1, %%eax \n"
2408 "or %%eax, %%ecx \n"
2409 "mov %%edx, %%eax \n" // Intensity
2410 "and $0x00000E00, %%eax \n"
2411 "or %%eax, %%ecx \n"
2412 "shr $3, %%eax \n"
2413 "and $0x00000100, %%eax \n"
2414 "or %%eax, %%ecx \n"
2416 // pixel #7
2417 // xxxxxxxxxxxxxxxxxxxxxxxxIIIAxxxx
2418 // xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx
2419 //Alpha
2420 "mov %%edx, %%eax \n"
2421 "shl $16, %%eax \n"
2422 "and $0x00100000, %%eax \n"
2423 "or %%eax, %%ecx \n"
2424 "shl $1, %%eax \n"
2425 "or %%eax, %%ecx \n"
2426 "shl $1, %%eax \n"
2427 "or %%eax, %%ecx \n"
2428 "shl $1, %%eax \n"
2429 "or %%eax, %%ecx \n"
2430 "mov %%edx, %%eax \n"
2431 "shl $12, %%eax \n" // Intensity
2432 "and $0x000E0000, %%eax \n"
2433 "or %%eax, %%ecx \n"
2434 "shr $3, %%eax \n"
2435 "and $0x00010000, %%eax \n"
2436 "or %%eax, %%ecx \n"
2438 // pixel #8
2439 // xxxxxxxxxxxxxxxxxxxxxxxxxxxxIIIA
2440 // AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx
2441 "mov %%edx, %%eax \n"
2442 "shl $28, %%eax \n" //Alpha
2443 "and $0x10000000, %%eax \n"
2444 "or %%eax, %%ecx \n"
2445 "shl $1, %%eax \n"
2446 "or %%eax, %%ecx \n"
2447 "shl $1, %%eax \n"
2448 "or %%eax, %%ecx \n"
2449 "shl $1, %%eax \n"
2450 "or %%eax, %%ecx \n"
2451 "mov %%edx, %%eax \n"
2452 "shl $24, %%eax \n" // Intensity
2453 "and $0x0E000000, %%eax \n"
2454 "or %%eax, %%ecx \n"
2455 "shr $3, %%eax \n"
2456 "and $0x01000000, %%eax \n"
2457 "or %%eax, %%ecx \n"
2459 "mov %%ecx, (%[dst]) \n"
2460 "add $4, %[dst] \n"
2461 // }
2463 // * copy
2464 "mov (%[src]), %%eax \n" // read all 8 pixels
2465 "bswap %%eax \n"
2466 "add $4, %[src] \n"
2467 "mov %%eax, %%edx \n"
2469 // 1st dword {
2470 "xor %%ecx, %%ecx \n"
2472 // pixel #1
2473 // IIIAxxxxxxxxxxxxxxxxxxxxxxxxxxxx
2474 // xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII
2475 "mov %%edx, %%eax \n"
2476 "shr $24, %%eax \n" //Alpha
2477 "and $0x00000010, %%eax \n"
2478 "or %%eax, %%ecx \n"
2479 "shl $1, %%eax \n"
2480 "or %%eax, %%ecx \n"
2481 "shl $1, %%eax \n"
2482 "or %%eax, %%ecx \n"
2483 "shl $1, %%eax \n"
2484 "or %%eax, %%ecx \n"
2485 "mov %%edx, %%eax \n"
2486 "shr $28, %%eax \n" // Intensity
2487 "and $0x0000000E, %%eax \n"
2488 "or %%eax, %%ecx \n"
2489 "shr $3, %%eax \n"
2490 "or %%eax, %%ecx \n"
2492 // pixel #2
2493 // xxxxIIIAxxxxxxxxxxxxxxxxxxxxxxxx
2494 // xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx
2495 "mov %%edx, %%eax \n"
2496 "shr $12, %%eax \n" //Alpha
2497 "and $0x00001000, %%eax \n"
2498 "or %%eax, %%ecx \n"
2499 "shl $1, %%eax \n"
2500 "or %%eax, %%ecx \n"
2501 "shl $1, %%eax \n"
2502 "or %%eax, %%ecx \n"
2503 "shl $1, %%eax \n"
2504 "or %%eax, %%ecx \n"
2505 "mov %%edx, %%eax \n"
2506 "shr $16, %%eax \n" // Intensity
2507 "and $0x00000E00, %%eax \n"
2508 "or %%eax, %%ecx \n"
2509 "shr $3, %%eax \n"
2510 "and $0x00000100, %%eax \n"
2511 "or %%eax, %%ecx \n"
2513 // pixel #3
2514 // xxxxxxxxIIIAxxxxxxxxxxxxxxxxxxxx
2515 // xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx
2516 //Alpha
2517 "mov %%edx, %%eax \n"
2518 "and $0x00100000, %%eax \n"
2519 "or %%eax, %%ecx \n"
2520 "shl $1, %%eax \n"
2521 "or %%eax, %%ecx \n"
2522 "shl $1, %%eax \n"
2523 "or %%eax, %%ecx \n"
2524 "shl $1, %%eax \n"
2525 "or %%eax, %%ecx \n"
2526 "mov %%edx, %%eax \n"
2527 "shr $4, %%eax \n" // Intensity
2528 "and $0x000E0000, %%eax \n"
2529 "or %%eax, %%ecx \n"
2530 "shr $3, %%eax \n"
2531 "and $0x00010000, %%eax \n"
2532 "or %%eax, %%ecx \n"
2534 // pixel #4
2535 // xxxxxxxxxxxxIIIAxxxxxxxxxxxxxxxx
2536 // AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx
2537 "mov %%edx, %%eax \n"
2538 "shl $12, %%eax \n" //Alpha
2539 "and $0x10000000, %%eax \n"
2540 "or %%eax, %%ecx \n"
2541 "shl $1, %%eax \n"
2542 "or %%eax, %%ecx \n"
2543 "shl $1, %%eax \n"
2544 "or %%eax, %%ecx \n"
2545 "shl $1, %%eax \n"
2546 "or %%eax, %%ecx \n"
2547 "mov %%edx, %%eax \n"
2548 "shl $8, %%eax \n" // Intensity
2549 "and $0x0E000000, %%eax \n"
2550 "or %%eax, %%ecx \n"
2551 "shr $3, %%eax \n"
2552 "and $0x01000000, %%eax \n"
2553 "or %%eax, %%ecx \n"
2556 "mov %%ecx, (%[dst]) \n"
2557 "add $4, %[dst] \n"
2558 // }
2560 // 2nd dword {
2561 "xor %%ecx, %%ecx \n"
2563 // pixel #5
2564 // xxxxxxxxxxxxxxxxIIIAxxxxxxxxxxxx
2565 // xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII
2566 "mov %%edx, %%eax \n"
2567 "shr $8, %%eax \n" //Alpha
2568 "and $0x00000010, %%eax \n"
2569 "or %%eax, %%ecx \n"
2570 "shl $1, %%eax \n"
2571 "or %%eax, %%ecx \n"
2572 "shl $1, %%eax \n"
2573 "or %%eax, %%ecx \n"
2574 "shl $1, %%eax \n"
2575 "or %%eax, %%ecx \n"
2576 "mov %%edx, %%eax \n"
2577 "shr $12, %%eax \n" // Intensity
2578 "and $0x0000000E, %%eax \n"
2579 "or %%eax, %%ecx \n"
2580 "shr $3, %%eax \n"
2581 "or %%eax, %%ecx \n"
2583 // pixel #6
2584 // xxxxxxxxxxxxxxxxxxxxIIIAxxxxxxxx
2585 // xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx
2586 //Alpha
2587 "mov %%edx, %%eax \n"
2588 "shl $4, %%eax \n"
2589 "and $0x00001000, %%eax \n"
2590 "or %%eax, %%ecx \n"
2591 "shl $1, %%eax \n"
2592 "or %%eax, %%ecx \n"
2593 "shl $1, %%eax \n"
2594 "or %%eax, %%ecx \n"
2595 "shl $1, %%eax \n"
2596 "or %%eax, %%ecx \n"
2597 "mov %%edx, %%eax \n" // Intensity
2598 "and $0x00000E00, %%eax \n"
2599 "or %%eax, %%ecx \n"
2600 "shr $3, %%eax \n"
2601 "and $0x00000100, %%eax \n"
2602 "or %%eax, %%ecx \n"
2604 // pixel #7
2605 // xxxxxxxxxxxxxxxxxxxxxxxxIIIAxxxx
2606 // xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx
2607 //Alpha
2608 "mov %%edx, %%eax \n"
2609 "shl $16, %%eax \n"
2610 "and $0x00100000, %%eax \n"
2611 "or %%eax, %%ecx \n"
2612 "shl $1, %%eax \n"
2613 "or %%eax, %%ecx \n"
2614 "shl $1, %%eax \n"
2615 "or %%eax, %%ecx \n"
2616 "shl $1, %%eax \n"
2617 "or %%eax, %%ecx \n"
2618 "mov %%edx, %%eax \n"
2619 "shl $12, %%eax \n" // Intensity
2620 "and $0x000E0000, %%eax \n"
2621 "or %%eax, %%ecx \n"
2622 "shr $3, %%eax \n"
2623 "and $0x00010000, %%eax \n"
2624 "or %%eax, %%ecx \n"
2626 // pixel #8
2627 // xxxxxxxxxxxxxxxxxxxxxxxxxxxxIIIA
2628 // AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx
2629 "mov %%edx, %%eax \n"
2630 "shl $28, %%eax \n" //Alpha
2631 "and $0x10000000, %%eax \n"
2632 "or %%eax, %%ecx \n"
2633 "shl $1, %%eax \n"
2634 "or %%eax, %%ecx \n"
2635 "shl $1, %%eax \n"
2636 "or %%eax, %%ecx \n"
2637 "shl $1, %%eax \n"
2638 "or %%eax, %%ecx \n"
2639 "mov %%edx, %%eax \n"
2640 "shl $24, %%eax \n" // Intensity
2641 "and $0x0E000000, %%eax \n"
2642 "or %%eax, %%ecx \n"
2643 "shr $3, %%eax \n"
2644 "and $0x01000000, %%eax \n"
2645 "or %%eax, %%ecx \n"
2647 "mov %%ecx, (%[dst]) \n"
2648 "add $4, %[dst] \n"
2649 // }
2651 // *
2653 "pop %[c] \n"
2654 "dec %%ecx \n"
2655 "jnz x_loop2 \n"
2657 "pop %[c] \n"
2658 "dec %%ecx \n"
2659 "jz end_y_loop2 \n"
2660 "push %[c] \n"
2662 "add %[line], %[src] \n"
2663 "add %[ext], %[dst] \n"
2665 "mov %[wid_64], %%ecx \n"
2666 "x_loop_22: \n"
2667 "push %[c] \n"
2669 "mov 4(%[src]), %%eax \n" // read all 8 pixels
2670 "bswap %%eax \n"
2671 "mov %%eax, %%edx \n"
2673 // 1st dword {
2674 "xor %%ecx, %%ecx \n"
2676 // pixel #1
2677 // IIIAxxxxxxxxxxxxxxxxxxxxxxxxxxxx
2678 // xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII
2679 "mov %%edx, %%eax \n"
2680 "shr $24, %%eax \n" //Alpha
2681 "and $0x00000010, %%eax \n"
2682 "or %%eax, %%ecx \n"
2683 "shl $1, %%eax \n"
2684 "or %%eax, %%ecx \n"
2685 "shl $1, %%eax \n"
2686 "or %%eax, %%ecx \n"
2687 "shl $1, %%eax \n"
2688 "or %%eax, %%ecx \n"
2689 "mov %%edx, %%eax \n"
2690 "shr $28, %%eax \n" // Intensity
2691 "and $0x0000000E, %%eax \n"
2692 "or %%eax, %%ecx \n"
2693 "shr $3, %%eax \n"
2694 "or %%eax, %%ecx \n"
2696 // pixel #2
2697 // xxxxIIIAxxxxxxxxxxxxxxxxxxxxxxxx
2698 // xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx
2699 "mov %%edx, %%eax \n"
2700 "shr $12, %%eax \n" //Alpha
2701 "and $0x00001000, %%eax \n"
2702 "or %%eax, %%ecx \n"
2703 "shl $1, %%eax \n"
2704 "or %%eax, %%ecx \n"
2705 "shl $1, %%eax \n"
2706 "or %%eax, %%ecx \n"
2707 "shl $1, %%eax \n"
2708 "or %%eax, %%ecx \n"
2709 "mov %%edx, %%eax \n"
2710 "shr $16, %%eax \n" // Intensity
2711 "and $0x00000E00, %%eax \n"
2712 "or %%eax, %%ecx \n"
2713 "shr $3, %%eax \n"
2714 "and $0x00000100, %%eax \n"
2715 "or %%eax, %%ecx \n"
2717 // pixel #3
2718 // xxxxxxxxIIIAxxxxxxxxxxxxxxxxxxxx
2719 // xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx
2720 //Alpha
2721 "mov %%edx, %%eax \n"
2722 "and $0x00100000, %%eax \n"
2723 "or %%eax, %%ecx \n"
2724 "shl $1, %%eax \n"
2725 "or %%eax, %%ecx \n"
2726 "shl $1, %%eax \n"
2727 "or %%eax, %%ecx \n"
2728 "shl $1, %%eax \n"
2729 "or %%eax, %%ecx \n"
2730 "mov %%edx, %%eax \n"
2731 "shr $4, %%eax \n" // Intensity
2732 "and $0x000E0000, %%eax \n"
2733 "or %%eax, %%ecx \n"
2734 "shr $3, %%eax \n"
2735 "and $0x00010000, %%eax \n"
2736 "or %%eax, %%ecx \n"
2738 // pixel #4
2739 // xxxxxxxxxxxxIIIAxxxxxxxxxxxxxxxx
2740 // AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx
2741 "mov %%edx, %%eax \n"
2742 "shl $12, %%eax \n" //Alpha
2743 "and $0x10000000, %%eax \n"
2744 "or %%eax, %%ecx \n"
2745 "shl $1, %%eax \n"
2746 "or %%eax, %%ecx \n"
2747 "shl $1, %%eax \n"
2748 "or %%eax, %%ecx \n"
2749 "shl $1, %%eax \n"
2750 "or %%eax, %%ecx \n"
2751 "mov %%edx, %%eax \n"
2752 "shl $8, %%eax \n" // Intensity
2753 "and $0x0E000000, %%eax \n"
2754 "or %%eax, %%ecx \n"
2755 "shr $3, %%eax \n"
2756 "and $0x01000000, %%eax \n"
2757 "or %%eax, %%ecx \n"
2760 "mov %%ecx, (%[dst]) \n"
2761 "add $4, %[dst] \n"
2762 // }
2764 // 2nd dword {
2765 "xor %%ecx, %%ecx \n"
2767 // pixel #5
2768 // xxxxxxxxxxxxxxxxIIIAxxxxxxxxxxxx
2769 // xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII
2770 "mov %%edx, %%eax \n"
2771 "shr $8, %%eax \n" //Alpha
2772 "and $0x00000010, %%eax \n"
2773 "or %%eax, %%ecx \n"
2774 "shl $1, %%eax \n"
2775 "or %%eax, %%ecx \n"
2776 "shl $1, %%eax \n"
2777 "or %%eax, %%ecx \n"
2778 "shl $1, %%eax \n"
2779 "or %%eax, %%ecx \n"
2780 "mov %%edx, %%eax \n"
2781 "shr $12, %%eax \n" // Intensity
2782 "and $0x0000000E, %%eax \n"
2783 "or %%eax, %%ecx \n"
2784 "shr $3, %%eax \n"
2785 "or %%eax, %%ecx \n"
2787 // pixel #6
2788 // xxxxxxxxxxxxxxxxxxxxIIIAxxxxxxxx
2789 // xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx
2790 //Alpha
2791 "mov %%edx, %%eax \n"
2792 "shl $4, %%eax \n"
2793 "and $0x00001000, %%eax \n"
2794 "or %%eax, %%ecx \n"
2795 "shl $1, %%eax \n"
2796 "or %%eax, %%ecx \n"
2797 "shl $1, %%eax \n"
2798 "or %%eax, %%ecx \n"
2799 "shl $1, %%eax \n"
2800 "or %%eax, %%ecx \n"
2801 "mov %%edx, %%eax \n" // Intensity
2802 "and $0x00000E00, %%eax \n"
2803 "or %%eax, %%ecx \n"
2804 "shr $3, %%eax \n"
2805 "and $0x00000100, %%eax \n"
2806 "or %%eax, %%ecx \n"
2808 // pixel #7
2809 // xxxxxxxxxxxxxxxxxxxxxxxxIIIAxxxx
2810 // xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx
2811 //Alpha
2812 "mov %%edx, %%eax \n"
2813 "shl $16, %%eax \n"
2814 "and $0x00100000, %%eax \n"
2815 "or %%eax, %%ecx \n"
2816 "shl $1, %%eax \n"
2817 "or %%eax, %%ecx \n"
2818 "shl $1, %%eax \n"
2819 "or %%eax, %%ecx \n"
2820 "shl $1, %%eax \n"
2821 "or %%eax, %%ecx \n"
2822 "mov %%edx, %%eax \n"
2823 "shl $12, %%eax \n" // Intensity
2824 "and $0x000E0000, %%eax \n"
2825 "or %%eax, %%ecx \n"
2826 "shr $3, %%eax \n"
2827 "and $0x00010000, %%eax \n"
2828 "or %%eax, %%ecx \n"
2830 // pixel #8
2831 // xxxxxxxxxxxxxxxxxxxxxxxxxxxxIIIA
2832 // AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx
2833 "mov %%edx, %%eax \n"
2834 "shl $28, %%eax \n" //Alpha
2835 "and $0x10000000, %%eax \n"
2836 "or %%eax, %%ecx \n"
2837 "shl $1, %%eax \n"
2838 "or %%eax, %%ecx \n"
2839 "shl $1, %%eax \n"
2840 "or %%eax, %%ecx \n"
2841 "shl $1, %%eax \n"
2842 "or %%eax, %%ecx \n"
2843 "mov %%edx, %%eax \n"
2844 "shl $24, %%eax \n" // Intensity
2845 "and $0x0E000000, %%eax \n"
2846 "or %%eax, %%ecx \n"
2847 "shr $3, %%eax \n"
2848 "and $0x01000000, %%eax \n"
2849 "or %%eax, %%ecx \n"
2851 "mov %%ecx, (%[dst]) \n"
2852 "add $4, %[dst] \n"
2853 // }
2855 // * copy
2856 "mov (%[src]), %%eax \n" // read all 8 pixels
2857 "bswap %%eax \n"
2858 "add $8, %[src] \n"
2859 "mov %%eax, %%edx \n"
2861 // 1st dword {
2862 "xor %%ecx, %%ecx \n"
2864 // pixel #1
2865 // IIIAxxxxxxxxxxxxxxxxxxxxxxxxxxxx
2866 // xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII
2867 "mov %%edx, %%eax \n"
2868 "shr $24, %%eax \n" //Alpha
2869 "and $0x00000010, %%eax \n"
2870 "or %%eax, %%ecx \n"
2871 "shl $1, %%eax \n"
2872 "or %%eax, %%ecx \n"
2873 "shl $1, %%eax \n"
2874 "or %%eax, %%ecx \n"
2875 "shl $1, %%eax \n"
2876 "or %%eax, %%ecx \n"
2877 "mov %%edx, %%eax \n"
2878 "shr $28, %%eax \n" // Intensity
2879 "and $0x0000000E, %%eax \n"
2880 "or %%eax, %%ecx \n"
2881 "shr $3, %%eax \n"
2882 "or %%eax, %%ecx \n"
2884 // pixel #2
2885 // xxxxIIIAxxxxxxxxxxxxxxxxxxxxxxxx
2886 // xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx
2887 "mov %%edx, %%eax \n"
2888 "shr $12, %%eax \n" //Alpha
2889 "and $0x00001000, %%eax \n"
2890 "or %%eax, %%ecx \n"
2891 "shl $1, %%eax \n"
2892 "or %%eax, %%ecx \n"
2893 "shl $1, %%eax \n"
2894 "or %%eax, %%ecx \n"
2895 "shl $1, %%eax \n"
2896 "or %%eax, %%ecx \n"
2897 "mov %%edx, %%eax \n"
2898 "shr $16, %%eax \n" // Intensity
2899 "and $0x00000E00, %%eax \n"
2900 "or %%eax, %%ecx \n"
2901 "shr $3, %%eax \n"
2902 "and $0x00000100, %%eax \n"
2903 "or %%eax, %%ecx \n"
2905 // pixel #3
2906 // xxxxxxxxIIIAxxxxxxxxxxxxxxxxxxxx
2907 // xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx
2908 //Alpha
2909 "mov %%edx, %%eax \n"
2910 "and $0x00100000, %%eax \n"
2911 "or %%eax, %%ecx \n"
2912 "shl $1, %%eax \n"
2913 "or %%eax, %%ecx \n"
2914 "shl $1, %%eax \n"
2915 "or %%eax, %%ecx \n"
2916 "shl $1, %%eax \n"
2917 "or %%eax, %%ecx \n"
2918 "mov %%edx, %%eax \n"
2919 "shr $4, %%eax \n" // Intensity
2920 "and $0x000E0000, %%eax \n"
2921 "or %%eax, %%ecx \n"
2922 "shr $3, %%eax \n"
2923 "and $0x00010000, %%eax \n"
2924 "or %%eax, %%ecx \n"
2926 // pixel #4
2927 // xxxxxxxxxxxxIIIAxxxxxxxxxxxxxxxx
2928 // AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx
2929 "mov %%edx, %%eax \n"
2930 "shl $12, %%eax \n" //Alpha
2931 "and $0x10000000, %%eax \n"
2932 "or %%eax, %%ecx \n"
2933 "shl $1, %%eax \n"
2934 "or %%eax, %%ecx \n"
2935 "shl $1, %%eax \n"
2936 "or %%eax, %%ecx \n"
2937 "shl $1, %%eax \n"
2938 "or %%eax, %%ecx \n"
2939 "mov %%edx, %%eax \n"
2940 "shl $8, %%eax \n" // Intensity
2941 "and $0x0E000000, %%eax \n"
2942 "or %%eax, %%ecx \n"
2943 "shr $3, %%eax \n"
2944 "and $0x01000000, %%eax \n"
2945 "or %%eax, %%ecx \n"
2948 "mov %%ecx, (%[dst]) \n"
2949 "add $4, %[dst] \n"
2950 // }
2952 // 2nd dword {
2953 "xor %%ecx, %%ecx \n"
2955 // pixel #5
2956 // xxxxxxxxxxxxxxxxIIIAxxxxxxxxxxxx
2957 // xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII
2958 "mov %%edx, %%eax \n"
2959 "shr $8, %%eax \n" //Alpha
2960 "and $0x00000010, %%eax \n"
2961 "or %%eax, %%ecx \n"
2962 "shl $1, %%eax \n"
2963 "or %%eax, %%ecx \n"
2964 "shl $1, %%eax \n"
2965 "or %%eax, %%ecx \n"
2966 "shl $1, %%eax \n"
2967 "or %%eax, %%ecx \n"
2968 "mov %%edx, %%eax \n"
2969 "shr $12, %%eax \n" // Intensity
2970 "and $0x0000000E, %%eax \n"
2971 "or %%eax, %%ecx \n"
2972 "shr $3, %%eax \n"
2973 "or %%eax, %%ecx \n"
2975 // pixel #6
2976 // xxxxxxxxxxxxxxxxxxxxIIIAxxxxxxxx
2977 // xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx
2978 //Alpha
2979 "mov %%edx, %%eax \n"
2980 "shl $4, %%eax \n"
2981 "and $0x00001000, %%eax \n"
2982 "or %%eax, %%ecx \n"
2983 "shl $1, %%eax \n"
2984 "or %%eax, %%ecx \n"
2985 "shl $1, %%eax \n"
2986 "or %%eax, %%ecx \n"
2987 "shl $1, %%eax \n"
2988 "or %%eax, %%ecx \n"
2989 "mov %%edx, %%eax \n" // Intensity
2990 "and $0x00000E00, %%eax \n"
2991 "or %%eax, %%ecx \n"
2992 "shr $3, %%eax \n"
2993 "and $0x00000100, %%eax \n"
2994 "or %%eax, %%ecx \n"
2996 // pixel #7
2997 // xxxxxxxxxxxxxxxxxxxxxxxxIIIAxxxx
2998 // xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx
2999 //Alpha
3000 "mov %%edx, %%eax \n"
3001 "shl $16, %%eax \n"
3002 "and $0x00100000, %%eax \n"
3003 "or %%eax, %%ecx \n"
3004 "shl $1, %%eax \n"
3005 "or %%eax, %%ecx \n"
3006 "shl $1, %%eax \n"
3007 "or %%eax, %%ecx \n"
3008 "shl $1, %%eax \n"
3009 "or %%eax, %%ecx \n"
3010 "mov %%edx, %%eax \n"
3011 "shl $12, %%eax \n" // Intensity
3012 "and $0x000E0000, %%eax \n"
3013 "or %%eax, %%ecx \n"
3014 "shr $3, %%eax \n"
3015 "and $0x00010000, %%eax \n"
3016 "or %%eax, %%ecx \n"
3018 // pixel #8
3019 // xxxxxxxxxxxxxxxxxxxxxxxxxxxxIIIA
3020 // AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx
3021 "mov %%edx, %%eax \n"
3022 "shl $28, %%eax \n" //Alpha
3023 "and $0x10000000, %%eax \n"
3024 "or %%eax, %%ecx \n"
3025 "shl $1, %%eax \n"
3026 "or %%eax, %%ecx \n"
3027 "shl $1, %%eax \n"
3028 "or %%eax, %%ecx \n"
3029 "shl $1, %%eax \n"
3030 "or %%eax, %%ecx \n"
3031 "mov %%edx, %%eax \n"
3032 "shl $24, %%eax \n" // Intensity
3033 "and $0x0E000000, %%eax \n"
3034 "or %%eax, %%ecx \n"
3035 "shr $3, %%eax \n"
3036 "and $0x01000000, %%eax \n"
3037 "or %%eax, %%ecx \n"
3039 "mov %%ecx, (%[dst]) \n"
3040 "add $4, %[dst] \n"
3041 // }
3042 // *
3044 "pop %[c] \n"
3045 "dec %%ecx \n"
3046 "jnz x_loop_22 \n"
3048 "add %[line], %[src] \n"
3049 "add %[ext], %[dst] \n"
3051 "pop %[c] \n"
3052 "dec %%ecx \n"
3053 "jnz y_loop2 \n"
3055 "end_y_loop2: \n"
3056 : [c] "=&c" (fake_ecx)
3057 : [src]"S"(src), [dst]"D"(dst), "[c]"(height),
3058 [wid_64] "g" (wid_64), [line] "g" ((uintptr_t)line), [ext] "g" ((uintptr_t)ext)
3059 : "memory", "cc", "eax", "edx", "ebx"
3061 #endif // _WIN32
3063 return /*(0 << 16) | */GR_TEXFMT_ALPHA_INTENSITY_44;
3066 //****************************************************************
3067 // Size: 0, Format: 4
3069 DWORD Load4bI (unsigned char * dst, unsigned char * src, int wid_64, int height, int line, int real_width, int tile)
3071 if (rdp.tlut_mode != 0)
3072 return Load4bCI (dst, src, wid_64, height, line, real_width, tile);
3074 if (wid_64 < 1) wid_64 = 1;
3075 if (height < 1) height = 1;
3076 int ext = (real_width - (wid_64 << 4));
3077 #ifndef GCC
3078 __asm {
3079 mov esi,dword ptr [src]
3080 mov edi,dword ptr [dst]
3082 mov ecx,dword ptr [height]
3083 y_loop:
3084 push ecx
3086 mov ecx,dword ptr [wid_64]
3087 x_loop:
3088 push ecx
3090 mov eax,dword ptr [esi] // read all 8 pixels
3091 bswap eax
3092 add esi,4
3093 mov edx,eax
3095 // 1st dword {
3096 xor ecx,ecx
3097 shr eax,28 // 0xF0000000 -> 0x0000000F
3098 or ecx,eax
3099 shl eax,4
3100 or ecx,eax
3102 mov eax,edx // 0x0F000000 -> 0x00000F00
3103 shr eax,16
3104 and eax,0x00000F00
3105 or ecx,eax
3106 shl eax,4
3107 or ecx,eax
3109 mov eax,edx
3110 shr eax,4 // 0x00F00000 -> 0x000F0000
3111 and eax,0x000F0000
3112 or ecx,eax
3113 shl eax,4
3114 or ecx,eax
3116 mov eax,edx
3117 shl eax,8 // 0x000F0000 -> 0x0F000000
3118 and eax,0x0F000000
3119 or ecx,eax
3120 shl eax,4
3121 or ecx,eax
3123 mov dword ptr [edi],ecx
3124 add edi,4
3125 // }
3127 // 2nd dword {
3128 xor ecx,ecx
3129 mov eax,edx
3130 shr eax,12 // 0x0000F000 -> 0x0000000F
3131 and eax,0x0000000F
3132 or ecx,eax
3133 shl eax,4
3134 or ecx,eax
3136 mov eax,edx // 0x00000F00 -> 0x00000F00
3137 and eax,0x00000F00
3138 or ecx,eax
3139 shl eax,4
3140 or ecx,eax
3142 mov eax,edx
3143 shl eax,12 // 0x000000F0 -> 0x000F0000
3144 and eax,0x000F0000
3145 or ecx,eax
3146 shl eax,4
3147 or ecx,eax
3149 shl edx,24 // 0x0000000F -> 0x0F000000
3150 and edx,0x0F000000
3151 or ecx,edx
3152 shl edx,4
3153 or ecx,edx
3155 mov dword ptr [edi],ecx
3156 add edi,4
3157 // }
3159 // * copy
3160 mov eax,dword ptr [esi] // read all 8 pixels
3161 bswap eax
3162 add esi,4
3163 mov edx,eax
3165 // 1st dword {
3166 xor ecx,ecx
3167 shr eax,28 // 0xF0000000 -> 0x0000000F
3168 or ecx,eax
3169 shl eax,4
3170 or ecx,eax
3172 mov eax,edx // 0x0F000000 -> 0x00000F00
3173 shr eax,16
3174 and eax,0x00000F00
3175 or ecx,eax
3176 shl eax,4
3177 or ecx,eax
3179 mov eax,edx
3180 shr eax,4 // 0x00F00000 -> 0x000F0000
3181 and eax,0x000F0000
3182 or ecx,eax
3183 shl eax,4
3184 or ecx,eax
3186 mov eax,edx
3187 shl eax,8 // 0x000F0000 -> 0x0F000000
3188 and eax,0x0F000000
3189 or ecx,eax
3190 shl eax,4
3191 or ecx,eax
3193 mov dword ptr [edi],ecx
3194 add edi,4
3195 // }
3197 // 2nd dword {
3198 xor ecx,ecx
3199 mov eax,edx
3200 shr eax,12 // 0x0000F000 -> 0x0000000F
3201 and eax,0x0000000F
3202 or ecx,eax
3203 shl eax,4
3204 or ecx,eax
3206 mov eax,edx // 0x00000F00 -> 0x00000F00
3207 and eax,0x00000F00
3208 or ecx,eax
3209 shl eax,4
3210 or ecx,eax
3212 mov eax,edx
3213 shl eax,12 // 0x000000F0 -> 0x000F0000
3214 and eax,0x000F0000
3215 or ecx,eax
3216 shl eax,4
3217 or ecx,eax
3219 shl edx,24 // 0x0000000F -> 0x0F000000
3220 and edx,0x0F000000
3221 or ecx,edx
3222 shl edx,4
3223 or ecx,edx
3225 mov dword ptr [edi],ecx
3226 add edi,4
3227 // }
3228 // *
3230 pop ecx
3231 dec ecx
3232 jnz x_loop
3234 pop ecx
3235 dec ecx
3236 jz end_y_loop
3237 push ecx
3239 add esi,dword ptr [line]
3240 add edi,dword ptr [ext]
3242 mov ecx,dword ptr [wid_64]
3243 x_loop_2:
3244 push ecx
3246 mov eax,dword ptr [esi+4] // read all 8 pixels
3247 bswap eax
3248 mov edx,eax
3250 // 1st dword {
3251 xor ecx,ecx
3252 shr eax,28 // 0xF0000000 -> 0x0000000F
3253 or ecx,eax
3254 shl eax,4
3255 or ecx,eax
3257 mov eax,edx // 0x0F000000 -> 0x00000F00
3258 shr eax,16
3259 and eax,0x00000F00
3260 or ecx,eax
3261 shl eax,4
3262 or ecx,eax
3264 mov eax,edx
3265 shr eax,4 // 0x00F00000 -> 0x000F0000
3266 and eax,0x000F0000
3267 or ecx,eax
3268 shl eax,4
3269 or ecx,eax
3271 mov eax,edx
3272 shl eax,8 // 0x000F0000 -> 0x0F000000
3273 and eax,0x0F000000
3274 or ecx,eax
3275 shl eax,4
3276 or ecx,eax
3278 mov dword ptr [edi],ecx
3279 add edi,4
3280 // }
3282 // 2nd dword {
3283 xor ecx,ecx
3284 mov eax,edx
3285 shr eax,12 // 0x0000F000 -> 0x0000000F
3286 and eax,0x0000000F
3287 or ecx,eax
3288 shl eax,4
3289 or ecx,eax
3291 mov eax,edx // 0x00000F00 -> 0x00000F00
3292 and eax,0x00000F00
3293 or ecx,eax
3294 shl eax,4
3295 or ecx,eax
3297 mov eax,edx
3298 shl eax,12 // 0x000000F0 -> 0x000F0000
3299 and eax,0x000F0000
3300 or ecx,eax
3301 shl eax,4
3302 or ecx,eax
3304 shl edx,24 // 0x0000000F -> 0x0F000000
3305 and edx,0x0F000000
3306 or ecx,edx
3307 shl edx,4
3308 or ecx,edx
3310 mov dword ptr [edi],ecx
3311 add edi,4
3312 // }
3314 // * copy
3315 mov eax,dword ptr [esi] // read all 8 pixels
3316 bswap eax
3317 add esi,8
3318 mov edx,eax
3320 // 1st dword {
3321 xor ecx,ecx
3322 shr eax,28 // 0xF0000000 -> 0x0000000F
3323 or ecx,eax
3324 shl eax,4
3325 or ecx,eax
3327 mov eax,edx // 0x0F000000 -> 0x00000F00
3328 shr eax,16
3329 and eax,0x00000F00
3330 or ecx,eax
3331 shl eax,4
3332 or ecx,eax
3334 mov eax,edx
3335 shr eax,4 // 0x00F00000 -> 0x000F0000
3336 and eax,0x000F0000
3337 or ecx,eax
3338 shl eax,4
3339 or ecx,eax
3341 mov eax,edx
3342 shl eax,8 // 0x000F0000 -> 0x0F000000
3343 and eax,0x0F000000
3344 or ecx,eax
3345 shl eax,4
3346 or ecx,eax
3348 mov dword ptr [edi],ecx
3349 add edi,4
3350 // }
3352 // 2nd dword {
3353 xor ecx,ecx
3354 mov eax,edx
3355 shr eax,12 // 0x0000F000 -> 0x0000000F
3356 and eax,0x0000000F
3357 or ecx,eax
3358 shl eax,4
3359 or ecx,eax
3361 mov eax,edx // 0x00000F00 -> 0x00000F00
3362 and eax,0x00000F00
3363 or ecx,eax
3364 shl eax,4
3365 or ecx,eax
3367 mov eax,edx
3368 shl eax,12 // 0x000000F0 -> 0x000F0000
3369 and eax,0x000F0000
3370 or ecx,eax
3371 shl eax,4
3372 or ecx,eax
3374 shl edx,24 // 0x0000000F -> 0x0F000000
3375 and edx,0x0F000000
3376 or ecx,edx
3377 shl edx,4
3378 or ecx,edx
3380 mov dword ptr [edi],ecx
3381 add edi,4
3382 // }
3383 // *
3385 pop ecx
3386 dec ecx
3387 jnz x_loop_2
3389 add esi,dword ptr [line]
3390 add edi,dword ptr [ext]
3392 pop ecx
3393 dec ecx
3394 jnz y_loop
3396 end_y_loop:
3398 #else // _WIN32
3399 //printf("Load4bI\n");
3400 intptr_t fake_ecx;
3401 asm volatile (
3402 "y_loop3: \n"
3403 "push %[c] \n"
3405 "mov %[wid_64], %%ecx \n"
3406 "x_loop3: \n"
3407 "push %[c] \n"
3409 "mov (%[src]), %%eax \n" // read all 8 pixels
3410 "bswap %%eax \n"
3411 "add $4, %[src] \n"
3412 "mov %%eax, %%edx \n"
3414 // 1st dword {
3415 "xor %%ecx, %%ecx \n"
3416 "shr $28, %%eax \n" // 0xF0000000 -> 0x0000000F
3417 "or %%eax, %%ecx \n"
3418 "shl $4, %%eax \n"
3419 "or %%eax, %%ecx \n"
3421 "mov %%edx, %%eax \n" // 0x0F000000 -> 0x00000F00
3422 "shr $16, %%eax \n"
3423 "and $0x00000F00, %%eax \n"
3424 "or %%eax, %%ecx \n"
3425 "shl $4, %%eax \n"
3426 "or %%eax, %%ecx \n"
3428 "mov %%edx, %%eax \n"
3429 "shr $4, %%eax \n" // 0x00F00000 -> 0x000F0000
3430 "and $0x000F0000, %%eax \n"
3431 "or %%eax, %%ecx \n"
3432 "shl $4, %%eax \n"
3433 "or %%eax, %%ecx \n"
3435 "mov %%edx, %%eax \n"
3436 "shl $8, %%eax \n" // 0x000F0000 -> 0x0F000000
3437 "and $0x0F000000, %%eax \n"
3438 "or %%eax, %%ecx \n"
3439 "shl $4, %%eax \n"
3440 "or %%eax, %%ecx \n"
3442 "mov %%ecx, (%[dst]) \n"
3443 "add $4, %[dst] \n"
3444 // }
3446 // 2nd dword {
3447 "xor %%ecx, %%ecx \n"
3448 "mov %%edx, %%eax \n"
3449 "shr $12, %%eax \n" // 0x0000F000 -> 0x0000000F
3450 "and $0x0000000F, %%eax \n"
3451 "or %%eax, %%ecx \n"
3452 "shl $4, %%eax \n"
3453 "or %%eax, %%ecx \n"
3455 "mov %%edx, %%eax \n" // 0x00000F00 -> 0x00000F00
3456 "and $0x00000F00, %%eax \n"
3457 "or %%eax, %%ecx \n"
3458 "shl $4, %%eax \n"
3459 "or %%eax, %%ecx \n"
3461 "mov %%edx, %%eax \n"
3462 "shl $12, %%eax \n" // 0x000000F0 -> 0x000F0000
3463 "and $0x000F0000, %%eax \n"
3464 "or %%eax, %%ecx \n"
3465 "shl $4, %%eax \n"
3466 "or %%eax, %%ecx \n"
3468 "shl $24, %%edx \n" // 0x0000000F -> 0x0F000000
3469 "and $0x0F000000, %%edx \n"
3470 "or %%edx, %%ecx \n"
3471 "shl $4, %%edx \n"
3472 "or %%edx, %%ecx \n"
3474 "mov %%ecx, (%[dst]) \n"
3475 "add $4, %[dst] \n"
3476 // }
3478 // * copy
3479 "mov (%[src]), %%eax \n" // read all 8 pixels
3480 "bswap %%eax \n"
3481 "add $4, %[src] \n"
3482 "mov %%eax, %%edx \n"
3484 // 1st dword {
3485 "xor %%ecx, %%ecx \n"
3486 "shr $28, %%eax \n" // 0xF0000000 -> 0x0000000F
3487 "or %%eax, %%ecx \n"
3488 "shl $4, %%eax \n"
3489 "or %%eax, %%ecx \n"
3491 "mov %%edx, %%eax \n" // 0x0F000000 -> 0x00000F00
3492 "shr $16, %%eax \n"
3493 "and $0x00000F00, %%eax \n"
3494 "or %%eax, %%ecx \n"
3495 "shl $4, %%eax \n"
3496 "or %%eax, %%ecx \n"
3498 "mov %%edx, %%eax \n"
3499 "shr $4, %%eax \n" // 0x00F00000 -> 0x000F0000
3500 "and $0x000F0000, %%eax \n"
3501 "or %%eax, %%ecx \n"
3502 "shl $4, %%eax \n"
3503 "or %%eax, %%ecx \n"
3505 "mov %%edx, %%eax \n"
3506 "shl $8, %%eax \n" // 0x000F0000 -> 0x0F000000
3507 "and $0x0F000000, %%eax \n"
3508 "or %%eax, %%ecx \n"
3509 "shl $4, %%eax \n"
3510 "or %%eax, %%ecx \n"
3512 "mov %%ecx, (%[dst]) \n"
3513 "add $4, %[dst] \n"
3514 // }
3516 // 2nd dword {
3517 "xor %%ecx, %%ecx \n"
3518 "mov %%edx, %%eax \n"
3519 "shr $12, %%eax \n" // 0x0000F000 -> 0x0000000F
3520 "and $0x0000000F, %%eax \n"
3521 "or %%eax, %%ecx \n"
3522 "shl $4, %%eax \n"
3523 "or %%eax, %%ecx \n"
3525 "mov %%edx, %%eax \n" // 0x00000F00 -> 0x00000F00
3526 "and $0x00000F00, %%eax \n"
3527 "or %%eax, %%ecx \n"
3528 "shl $4, %%eax \n"
3529 "or %%eax, %%ecx \n"
3531 "mov %%edx, %%eax \n"
3532 "shl $12, %%eax \n" // 0x000000F0 -> 0x000F0000
3533 "and $0x000F0000, %%eax \n"
3534 "or %%eax, %%ecx \n"
3535 "shl $4, %%eax \n"
3536 "or %%eax, %%ecx \n"
3538 "shl $24, %%edx \n" // 0x0000000F -> 0x0F000000
3539 "and $0x0F000000, %%edx \n"
3540 "or %%edx, %%ecx \n"
3541 "shl $4, %%edx \n"
3542 "or %%edx, %%ecx \n"
3544 "mov %%ecx, (%[dst]) \n"
3545 "add $4, %[dst] \n"
3546 // }
3547 // *
3549 "pop %[c] \n"
3550 "dec %%ecx \n"
3551 "jnz x_loop3 \n"
3553 "pop %[c] \n"
3554 "dec %%ecx \n"
3555 "jz end_y_loop3 \n"
3556 "push %[c] \n"
3558 "add %[line], %[src] \n"
3559 "add %[ext], %[dst] \n"
3561 "mov %[wid_64], %%ecx \n"
3562 "x_loop_23: \n"
3563 "push %[c] \n"
3565 "mov 4(%[src]), %%eax \n" // read all 8 pixels
3566 "bswap %%eax \n"
3567 "mov %%eax, %%edx \n"
3569 // 1st dword {
3570 "xor %%ecx, %%ecx \n"
3571 "shr $28, %%eax \n" // 0xF0000000 -> 0x0000000F
3572 "or %%eax, %%ecx \n"
3573 "shl $4, %%eax \n"
3574 "or %%eax, %%ecx \n"
3576 "mov %%edx, %%eax \n" // 0x0F000000 -> 0x00000F00
3577 "shr $16, %%eax \n"
3578 "and $0x00000F00, %%eax \n"
3579 "or %%eax, %%ecx \n"
3580 "shl $4, %%eax \n"
3581 "or %%eax, %%ecx \n"
3583 "mov %%edx, %%eax \n"
3584 "shr $4, %%eax \n" // 0x00F00000 -> 0x000F0000
3585 "and $0x000F0000, %%eax \n"
3586 "or %%eax, %%ecx \n"
3587 "shl $4, %%eax \n"
3588 "or %%eax, %%ecx \n"
3590 "mov %%edx, %%eax \n"
3591 "shl $8, %%eax \n" // 0x000F0000 -> 0x0F000000
3592 "and $0x0F000000, %%eax \n"
3593 "or %%eax, %%ecx \n"
3594 "shl $4, %%eax \n"
3595 "or %%eax, %%ecx \n"
3597 "mov %%ecx, (%[dst]) \n"
3598 "add $4, %[dst] \n"
3599 // }
3601 // 2nd dword {
3602 "xor %%ecx, %%ecx \n"
3603 "mov %%edx, %%eax \n"
3604 "shr $12, %%eax \n" // 0x0000F000 -> 0x0000000F
3605 "and $0x0000000F, %%eax \n"
3606 "or %%eax, %%ecx \n"
3607 "shl $4, %%eax \n"
3608 "or %%eax, %%ecx \n"
3610 "mov %%edx, %%eax \n" // 0x00000F00 -> 0x00000F00
3611 "and $0x00000F00, %%eax \n"
3612 "or %%eax, %%ecx \n"
3613 "shl $4, %%eax \n"
3614 "or %%eax, %%ecx \n"
3616 "mov %%edx, %%eax \n"
3617 "shl $12, %%eax \n" // 0x000000F0 -> 0x000F0000
3618 "and $0x000F0000, %%eax \n"
3619 "or %%eax, %%ecx \n"
3620 "shl $4, %%eax \n"
3621 "or %%eax, %%ecx \n"
3623 "shl $24, %%edx \n" // 0x0000000F -> 0x0F000000
3624 "and $0x0F000000, %%edx \n"
3625 "or %%edx, %%ecx \n"
3626 "shl $4, %%edx \n"
3627 "or %%edx, %%ecx \n"
3629 "mov %%ecx, (%[dst]) \n"
3630 "add $4, %[dst] \n"
3631 // }
3633 // * copy
3634 "mov (%[src]), %%eax \n" // read all 8 pixels
3635 "bswap %%eax \n"
3636 "add $8, %[src] \n"
3637 "mov %%eax, %%edx \n"
3639 // 1st dword {
3640 "xor %%ecx, %%ecx \n"
3641 "shr $28, %%eax \n" // 0xF0000000 -> 0x0000000F
3642 "or %%eax, %%ecx \n"
3643 "shl $4, %%eax \n"
3644 "or %%eax, %%ecx \n"
3646 "mov %%edx, %%eax \n" // 0x0F000000 -> 0x00000F00
3647 "shr $16, %%eax \n"
3648 "and $0x00000F00, %%eax \n"
3649 "or %%eax, %%ecx \n"
3650 "shl $4, %%eax \n"
3651 "or %%eax, %%ecx \n"
3653 "mov %%edx, %%eax \n"
3654 "shr $4, %%eax \n" // 0x00F00000 -> 0x000F0000
3655 "and $0x000F0000, %%eax \n"
3656 "or %%eax, %%ecx \n"
3657 "shl $4, %%eax \n"
3658 "or %%eax, %%ecx \n"
3660 "mov %%edx, %%eax \n"
3661 "shl $8, %%eax \n" // 0x000F0000 -> 0x0F000000
3662 "and $0x0F000000, %%eax \n"
3663 "or %%eax, %%ecx \n"
3664 "shl $4, %%eax \n"
3665 "or %%eax, %%ecx \n"
3667 "mov %%ecx, (%[dst]) \n"
3668 "add $4, %[dst] \n"
3669 // }
3671 // 2nd dword {
3672 "xor %%ecx, %%ecx \n"
3673 "mov %%edx, %%eax \n"
3674 "shr $12, %%eax \n" // 0x0000F000 -> 0x0000000F
3675 "and $0x0000000F, %%eax \n"
3676 "or %%eax, %%ecx \n"
3677 "shl $4, %%eax \n"
3678 "or %%eax, %%ecx \n"
3680 "mov %%edx, %%eax \n" // 0x00000F00 -> 0x00000F00
3681 "and $0x00000F00, %%eax \n"
3682 "or %%eax, %%ecx \n"
3683 "shl $4, %%eax \n"
3684 "or %%eax, %%ecx \n"
3686 "mov %%edx, %%eax \n"
3687 "shl $12, %%eax \n" // 0x000000F0 -> 0x000F0000
3688 "and $0x000F0000, %%eax \n"
3689 "or %%eax, %%ecx \n"
3690 "shl $4, %%eax \n"
3691 "or %%eax, %%ecx \n"
3693 "shl $24, %%edx \n" // 0x0000000F -> 0x0F000000
3694 "and $0x0F000000, %%edx \n"
3695 "or %%edx, %%ecx \n"
3696 "shl $4, %%edx \n"
3697 "or %%edx, %%ecx \n"
3699 "mov %%ecx, (%[dst]) \n"
3700 "add $4, %[dst] \n"
3701 // }
3702 // *
3704 "pop %[c] \n"
3705 "dec %%ecx \n"
3706 "jnz x_loop_23 \n"
3708 "add %[line], %[src] \n"
3709 "add %[ext], %[dst] \n"
3711 "pop %[c] \n"
3712 "dec %%ecx \n"
3713 "jnz y_loop3 \n"
3715 "end_y_loop3: \n"
3716 : [c] "=&c" (fake_ecx)
3717 : [src] "S"(src), [dst] "D"(dst), "[c]"(height),
3718 [wid_64] "g" (wid_64), [line] "g" ((uintptr_t)line), [ext] "g" ((uintptr_t)ext)
3719 : "memory", "cc", "eax", "edx", "ebx"
3721 #endif // _WIN
3723 return /*(0 << 16) | */GR_TEXFMT_ALPHA_INTENSITY_44;
3726 //****************************************************************
3727 // Size: 0, Format: 0
3729 DWORD Load4bSelect (unsigned char * dst, unsigned char * src, int wid_64, int height, int line, int real_width, int tile)
3731 if (rdp.tlut_mode == 0)
3732 return Load4bI (dst, src, wid_64, height, line, real_width, tile);
3734 return Load4bCI (dst, src, wid_64, height, line, real_width, tile);