*** empty log message ***
[wmaker-crm.git] / wrlib / x86_specific.c
blobe4c08c439f0ebf65922b1cb45bc383aa1c8bfd9b
1 /* x86_convert.c - convert RImage to XImage with x86 optimizations
2 *
3 * Raster graphics library
5 * Copyright (c) 2000 Alfredo K. Kojima
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Library General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Library General Public License for more details.
17 * You should have received a copy of the GNU Library General Public
18 * License along with this library; if not, write to the Free
19 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22 #include <config.h>
24 #ifdef ASM_X86
27 #ifdef ASM_X86_MMX
29 int
30 x86_check_mmx()
32 static int result = -1;
34 if (result >= 0)
35 return result;
37 result = 0;
39 asm volatile
40 ("pushal \n" // please dont forget this in any asm
41 "pushfl \n" // check whether cpuid supported
42 "pop %%eax \n"
43 "movl %%eax, %%ebx \n"
44 "xorl $(1<<21), %%eax \n"
45 "pushl %%eax \n"
46 "popfl \n"
47 "pushfl \n"
48 "popl %%eax \n"
49 "xorl %%ebx, %%eax \n"
50 "andl $(1<<21), %%eax \n"
51 "jz .NotPentium \n"
52 "xorl %%eax, %%eax \n" // no eax effect because of the movl below
53 // except reseting flags. is it needed?
54 "movl $1, %%eax \n"
55 "cpuid \n"
56 "test $(1<<23), %%edx \n"
57 "jz .NotMMX \n"
59 "popal \n" // popal needed because the address of
60 "movl $1, %0 \n" // variable %0 may be kept in a register
61 "jmp .noPop \n"
63 ".NotMMX: \n"
64 ".NotPentium: \n"
65 "popal \n"
66 ".noPop: \n"
68 : "=m" (result));
70 return result;
75 * TODO:
76 * 32/8 24/8 32/16 24/16 32/24 24/24
77 * PPlain YES YES
78 * MMX DONE
81 * - try to align stack (local variable space) into quadword boundary
86 void
87 x86_mmx_TrueColor_32_to_16(unsigned char *image, // 8
88 unsigned short *ximage, // 12
89 short *err, // 16
90 short *nerr, // 20
91 short *rtable, // 24
92 short *gtable, // 28
93 short *btable, // 32
94 int dr, // 36
95 int dg, // 40
96 int db, // 44
97 unsigned int roffs, // 48
98 unsigned int goffs, // 52
99 unsigned int boffs, // 56
100 int width, // 60
101 int height, // 64
102 int line_offset) // 68
105 int x; //-4
106 long long rrggbbaa;// -16
107 long long pixel; //-24
108 short *tmp_err; //-32
109 short *tmp_nerr; //-36
112 asm volatile
114 "subl $128, %esp \n" // alloc some more stack
116 "pushal \n"
118 // pack dr, dg and db into mm6
119 "movl 36(%ebp), %eax \n"
120 "movl 40(%ebp), %ebx \n"
121 "movw %ax, -16(%ebp) \n"
123 "movw %bx, -14(%ebp) \n"
124 "movl 44(%ebp), %eax \n"
125 "movw $0, -10(%ebp) \n"
126 "movw %ax, -12(%ebp) \n"
128 "movq -16(%ebp), %mm6 \n" // dr dg db 0
130 // pack 4|4|4|4 into mm7, for shifting (/16)
131 "movl $0x00040004, -16(%ebp) \n"
132 "movl $0x00040004, -12(%ebp) \n"
133 "movq -16(%ebp), %mm7 \n"
135 // store constant values for using with mmx when dithering
136 "movl $0x00070007, -16(%ebp) \n"
137 "movl $0x00070007, -12(%ebp) \n"
138 "movq -16(%ebp), %mm5 \n"
140 "movl $0x00050005, -16(%ebp) \n"
141 "movl $0x00050005, -12(%ebp) \n"
142 "movq -16(%ebp), %mm4 \n"
144 "movl $0x00030003, -16(%ebp) \n"
145 "movl $0x00030003, -12(%ebp) \n"
146 "movq -16(%ebp), %mm3 \n"
148 // process 1 pixel / cycle, each component treated as 16bit
149 "movl 8(%ebp), %esi \n" // esi = image->data
151 ".LoopYa: \n"
152 "movl 60(%ebp), %eax \n"
153 "movl %eax, -4(%ebp) \n" // x = width
155 "movl 64(%ebp), %eax \n"
156 "decl %eax \n" // y--
157 "movl %eax, 64(%ebp) \n"
158 "js .Enda \n" // if y < 0, goto end
159 "andl $1, %eax \n"
160 "jz .LoopY_1a \n" // if (y&1) goto LoopY_1
162 ".LoopY_0a: \n"
164 "movl 16(%ebp), %ebx \n" // ebx = err
165 "movl %ebx, -36(%ebp) \n" // [-36] = err
166 "movl 20(%ebp), %eax \n" //
167 "movl %eax, -32(%ebp) \n" // [-32] = nerr
169 "jmp .LoopXa \n"
171 ".LoopY_1a: \n"
173 "movl 20(%ebp), %ebx \n" // ebx = nerr
174 "movl %ebx, -36(%ebp) \n" // [-36] = nerr
175 "movl 16(%ebp), %eax \n" //
176 "movl %eax, -32(%ebp) \n" // [-32] = eerr
178 ".align 16 \n"
179 ".LoopXa: \n"
181 // calculate errors and pixel components
183 // depend on ebx, esi, mm6
184 "movq (%ebx), %mm1 \n" // mm1 = error[0..3]
185 "punpcklbw (%esi), %mm0 \n" // mm0 = image->data[0..3]
186 "psrlw $8, %mm0 \n" // fixup mm0
187 "paddusb %mm1, %mm0 \n" // mm0 = mm0 + mm1 (sat. to 255)
188 "movq %mm0, -24(%ebp) \n" // save the pixel
190 "movzwl -24(%ebp), %ecx \n" // ecx = pixel.red
191 "movl 24(%ebp), %edi \n" // edi = rtable
192 //agi
193 "leal (%edi, %ecx, 2), %eax \n" // eax = &rtable[pixel.red]
194 // agi
195 "movw (%eax), %dx \n" // dx = rtable[pixel.red]
196 "movw %dx, -16(%ebp) \n" // save rr
198 "movzwl -22(%ebp), %ecx \n" // ecx = pixel.green
199 "movl 28(%ebp), %edi \n" // edi = gtable
200 //agi
201 "leal (%edi, %ecx, 2), %eax \n" // eax = &gtable[pixel.green]
202 //agi
203 "movw (%eax), %dx \n" // dx = gtable[pixel.green]
204 "movw %dx, -14(%ebp) \n" // save gg
206 "movzwl -20(%ebp), %ecx \n" // ecx = pixel.blue
207 "movl 32(%ebp), %edi \n" // ebx = btable
208 //agi
209 "leal (%edi, %ecx, 2), %eax \n" // eax = &btable[pixel.blue]
210 //agi
211 "movw (%eax), %dx \n" // dx = btable[pixel.blue]
212 "movw %dx, -12(%ebp) \n" // save bb
214 "movw $0, -10(%ebp) \n" // save dummy aa
216 "movq -16(%ebp), %mm1 \n" // load mm1 with rrggbbaa
217 "pmullw %mm6, %mm1 \n" // mm1 = rr*dr|...
218 "psubsw %mm1, %mm0 \n" // error = pixel - mm1
221 // distribute the error
223 // depend on mm0, mm7, mm3, mm4, mm5
225 "movl -36(%ebp), %ebx \n"
227 "movq %mm0, %mm1 \n"
228 "pmullw %mm5, %mm1 \n" // mm1 = mm1*7
229 "psrlw %mm7, %mm1 \n" // mm1 = mm1/16
230 "paddw 8(%ebx), %mm1 \n"
231 "movq %mm1, 8(%ebx) \n" // err[x+1,y] = rer*7/16
234 "movl -32(%ebp), %ebx \n"
236 "movq %mm0, %mm1 \n"
237 "pmullw %mm4, %mm1 \n" // mm1 = mm1*5
238 "psrlw %mm7, %mm1 \n" // mm1 = mm1/16
239 "paddw -8(%ebx), %mm1 \n"
240 "movq %mm1, -8(%ebx) \n" // err[x-1,y+1] += rer*3/16
242 "movq %mm0, %mm1 \n"
243 "pmullw %mm3, %mm1 \n" // mm1 = mm1*3
244 "psrlw %mm7, %mm1 \n" // mm1 = mm1/16
245 "paddw 8(%ebx), %mm1 \n"
246 "movq %mm1, (%ebx) \n" // err[x,y+1] += rer*5/16
248 "psrlw %mm7, %mm0 \n" // mm0 = mm0/16
249 "movq %mm0, 8(%ebx) \n" // err[x+1,y+1] = rer/16
252 // calculate final pixel value and store
253 "movl 48(%ebp), %ecx \n"
254 "movw -16(%ebp), %ax \n"
255 "shlw %cl, %ax \n" //NP* ax = r<<roffs
257 "movl 52(%ebp), %ecx \n"
258 "movw -14(%ebp), %bx \n"
259 "shlw %cl, %bx \n" //NP*
260 "orw %bx, %ax \n"
262 "movl 56(%ebp), %ecx \n"
263 "movw -12(%ebp), %bx \n"
264 "shlw %cl, %bx \n" //NP*
265 "orw %bx, %ax \n"
267 "movl 12(%ebp), %edx \n"
268 "movw %ax, (%edx) \n"
269 "addl $2, %edx \n" // increment ximage
270 "movl %edx, 12(%ebp) \n"
272 // prepare for next iteration on X
274 "addl $8, -32(%ebp) \n" // nerr += 8
276 "movl -36(%ebp), %ebx \n"
277 "addl $8, %ebx \n"
278 "movl %ebx, -36(%ebp) \n" // ebx = err += 8
281 // Note: in the last pixel, this would cause an invalid memory access
282 // because, punpcklbw is used (which reads 8 bytes) and the last
283 // pixel is only 4 bytes. This is no problem because the image data
284 // was allocated with extra 4 bytes when created.
285 "addl $4, %esi \n" // image->data += 4
288 "decl -4(%ebp) \n" // x--
289 "jnz .LoopXa \n" // if x>0, goto .LoopX
292 // depend on edx
293 "addl 68(%ebp), %edx \n" // add extra offset to ximage
294 "movl %edx, 12(%ebp) \n"
297 "jmp .LoopYa \n"
299 ".Enda: \n" // THE END
301 "emms \n"
303 "popal \n"
312 void
313 x86_mmx_TrueColor_24_to_16(unsigned char *image, // 8
314 unsigned short *ximage, // 12
315 short *err, // 16
316 short *nerr, // 20
317 short *rtable, // 24
318 short *gtable, // 28
319 short *btable, // 32
320 int dr, // 36
321 int dg, // 40
322 int db, // 44
323 unsigned int roffs, // 48
324 unsigned int goffs, // 52
325 unsigned int boffs, // 56
326 int width, // 60
327 int height, // 64
328 int line_offset) // 68
331 int x; //-4
332 long long rrggbbaa;// -16
333 long long pixel; //-24
334 short *tmp_err; //-32
335 short *tmp_nerr; //-36
337 int w1; // -64
338 int w2; // -68
341 asm volatile
343 "subl $128, %esp \n" // alloc some more stack
345 "pushal \n"
347 "movl 60(%ebp), %eax \n" // eax = width
348 "movl %eax, %ebx \n"
349 "shrl $2, %eax \n"
350 "movl %eax, -64(%ebp) \n" // w1 = width / 4
351 "andl $3, %ebx \n"
352 "movl %ebx, -68(%ebp) \n" // w2 = width % 4
355 ".LoopYc: \n"
356 "movl 60(%ebp), %eax \n"
357 "movl %eax, -4(%ebp) \n" // x = width
359 "decl 64(%ebp) \n" // height--
360 "js .Endc \n" // if height < 0 then end
362 "movl 64(%ebp), %eax \n"
363 "decl %eax \n" // y--
364 "movl %eax, 64(%ebp) \n"
365 "js .Endc \n" // if y < 0, goto end
366 "andl $1, %eax \n"
367 "jz .LoopY_1c \n" // if (y&1) goto LoopY_1
369 ".LoopY_0c: \n"
371 "movl 16(%ebp), %ebx \n" // ebx = err
372 "movl %ebx, -36(%ebp) \n" // [-36] = err
373 "movl 20(%ebp), %eax \n" //
374 "movl %eax, -32(%ebp) \n" // [-32] = nerr
376 "jmp .LoopX_1c \n"
378 ".LoopY_1c: \n"
380 "movl 20(%ebp), %ebx \n" // ebx = nerr
381 "movl %ebx, -36(%ebp) \n" // [-36] = nerr
382 "movl 16(%ebp), %eax \n" //
383 "movl %eax, -32(%ebp) \n" // [-32] = eerr
385 ".align 16 \n"
387 "movl %eax, -4(%ebp) \n" // x = w1
388 ".LoopX_1c: \n"
389 "decl -4(%ebp) \n" // x--
390 "js .Xend1_c \n" // if x < 0 then end
392 // do conversion of 4 pixels
393 "movq 16(%ebp), %mm0 \n" // mm0 = err
398 "jmp .LoopX_1c \n"
399 ".Xend1_c: \n"
401 "movl -68(%ebp), %eax \n"
402 "movl %eax, -4(%ebp) \n" // x = w2
403 ".LoopX_2c: \n"
404 "decl -4(%ebp) \n" // x--
405 "js .Xend2_c \n" //
406 // do conversion
407 "jmp .LoopX_2c \n"
408 ".Xend2_c: \n"
410 "movl -64(%ebp), %eax \n"
411 "jmp .LoopYc \n"
413 ".Endc: \n" // THE END
415 "emms \n"
417 "popal \n"
423 #endif /* ASM_X86_MMX */
427 void
428 x86_PseudoColor_32_to_8(unsigned char *image, // 8
429 unsigned char *ximage, // 12
430 char *err, // 16
431 char *nerr, // 20
432 short *ctable, // 24
433 int dr, // 28
434 int dg, // 32
435 int db, // 36
436 unsigned long *pixels, // 40
437 int cpc, // 44
438 int width, // 48
439 int height, // 52
440 int bytesPerPixel, // 56
441 int line_offset) // 60
444 * int x; -4
445 * int cpcpc; -8
447 * int rr; -12
448 * int gg; -16
449 * int bb; -20
451 * char ndr; -21
452 * char ndg; -22
453 * char ndb; -23
455 * char *err; -32
456 * char *nerr; -36
459 asm volatile
461 "subl $128, %esp \n" // alloc some stack space
462 "pushal \n"
464 "movl 44(%ebp), %eax \n"
465 "mulb 44(%ebp) \n"
466 "movl %eax, -8(%ebp) \n" // cpcpc = cpc*cpc
468 // eax will always be <= 0xffff
470 // process 1 pixel / cycle, each component treated as 16bit
471 "movl 8(%ebp), %esi \n" // esi = image->data
473 ".LoopYb: \n"
474 "movl 48(%ebp), %ecx \n"
475 "movl %ecx, -4(%ebp) \n" // x = width
477 "movl 52(%ebp), %ecx \n"
478 "decl %ecx \n" // y--
479 "movl %ecx, 52(%ebp) \n"
480 "js .Endb \n" // if y < 0, goto end
481 "andl $1, %ecx \n"
482 "jz .LoopY_1b \n" // if (y&1) goto LoopY_1
484 ".LoopY_0b: \n"
486 "movl 16(%ebp), %ebx \n" // ebx = err
487 //useless "movl %ebx, -36(%ebp) \n" // [-36] = err
488 "movl 20(%ebp), %ecx \n" //
489 "movl %ecx, -32(%ebp) \n" // [-32] = nerr
491 "movl $0, (%ecx) \n" // init error of nerr[0] to 0
493 "jmp .LoopXb \n"
495 ".LoopY_1b: \n"
497 "movl 20(%ebp), %ebx \n" // ebx = nerr
498 //useless "movl %ebx, -36(%ebp) \n" // [-36] = nerr
499 "movl 16(%ebp), %ecx \n" //
500 "movl %ecx, -32(%ebp) \n" // [-32] = err
502 "movl $0, (%ecx) \n" // init error of nerr[0] to 0
505 ".align 16 \n"
506 ".LoopXb: \n"
509 "movl 24(%ebp), %edi \n" // edi = ctable
510 "xorl %edx, %edx \n" // zero the upper word on edx
512 // RED
514 // depends on ebx==err, esi==image->data, edi
515 "movzbw (%esi), %dx \n" // dx = image->data[0]
516 "movsbw (%ebx), %ax \n" // ax = error[0]
517 "addw %ax, %dx \n" // pixel.red = data[0] + error[0]
519 "testb %dh, %dh \n" // test if pixel.red < 0 or > 255
520 "jz .OKRb \n" // 0 <= pixel.red <= 255
521 "js .NEGRb \n" // pixel.red < 0
522 "movw $0xff, %dx \n" // pixel.red > 255
523 "jmp .OKRb \n"
524 ".NEGRb: \n"
525 "xorw %dx, %dx \n"
526 ".OKRb: \n"
527 //partial reg
528 "leal (%edi, %edx, 2), %ecx \n" // ecx = &ctable[pixel.red]
529 //agi
530 "movl (%ecx), %eax \n" // ax = ctable[pixel.red]
531 "movw %ax, -12(%ebp) \n" // save rr
533 "mulb 28(%ebp) \n" // ax = rr*dr
534 "subw %ax, %dx \n" // rer = dx = dx - rr*dr
536 "movswl %dx, %eax \n" // save rer
538 // distribute error
539 "leal (, %eax, 8), %ecx \n"
540 "subw %dx, %cx \n" // cx = rer * 7
541 "sarw $4, %cx \n" // cx = rer * 7 / 16
542 "addb %cl, 4(%ebx) \n" // err[x+1] += rer * 7 / 16
544 "movl -32(%ebp), %ecx \n" // ecx = nerr
546 "leaw (%eax, %eax, 4), %dx \n" // dx = rer * 5
547 "sarw $4, %dx \n" // dx = rer * 5 / 16
548 "addb %dl, (%ecx) \n" // nerr[x] += rer * 5 / 16
550 "leaw (%eax, %eax, 2), %dx \n" // dx = rer * 3
551 "sarw $4, %dx \n" // dx = rer * 3 / 16
552 "addb %dl, -4(%ecx) \n" // nerr[x-1] += rer * 3 / 16
554 "sarw $4, %ax \n" // ax = rer / 16
555 "movb %al, 4(%ecx) \n" // nerr[x+1] = rer / 16
558 // GREEN
560 // depends on ebx, esi, edi
561 "movzbw 1(%esi), %dx \n" // dx = image->data[1]
562 "movsbw 1(%ebx), %ax \n" // ax = error[1]
563 "addw %ax, %dx \n" // pixel.grn = data[1] + error[1]
565 "testb %dh, %dh \n" // test if pixel.grn < 0 or > 255
566 "jz .OKGb \n" // 0 <= pixel.grn <= 255
567 "js .NEGGb \n" // pixel.grn < 0
568 "movw $0xff, %dx \n" // pixel.grn > 255
569 "jmp .OKGb \n"
570 ".NEGGb: \n"
571 "xorw %dx, %dx \n"
572 ".OKGb: \n"
573 // partial reg
574 "leal (%edi, %edx, 2), %ecx \n" // ecx = &ctable[pixel.grn]
575 //agi
576 "movw (%ecx), %ax \n" // ax = ctable[pixel.grn]
577 "movw %ax, -16(%ebp) \n" // save gg
579 "mulb 28(%ebp) \n" // ax = gg*dg
580 "subw %ax, %dx \n" // ger = dx = dx - gg*dg
582 "movswl %dx, %eax \n" // save ger
584 // distribute error
586 "leal (, %eax, 8), %ecx \n"
587 "subw %dx, %cx \n" // cx = ger * 7
588 "sarw $4, %cx \n" // cx = ger * 7 / 16
589 "addb %cl, 5(%ebx) \n" // err[x+1] += ger * 7 / 16
591 "movl -32(%ebp), %ecx \n" // ecx = nerr
593 "leaw (%eax, %eax, 4), %dx \n" // dx = ger * 5
594 "sarw $4, %dx \n" // dx = ger * 5 / 16
595 "addb %dl, 1(%ecx) \n" // nerr[x] += ger * 5 / 16
597 "leaw (%eax, %eax, 2), %dx \n" // dx = ger * 3
598 "sarw $4, %dx \n" // dx = ger * 3 / 16
599 "addb %dl, -3(%ecx) \n" // nerr[x-1] += ger * 3 / 16
601 "sarw $4, %ax \n" // ax = ger / 16
602 "movb %al, 5(%ecx) \n" // nerr[x+1] = ger / 16
605 // BLUE
607 // depends on ebx, esi
608 "movzbw 2(%esi), %dx \n" // dx = image->data[2]
609 "movsbw 2(%ebx), %ax \n" // ax = error[2]
610 "addw %ax, %dx \n" // pixel.grn = data[2] + error[2]
612 "testb %dh, %dh \n" // test if pixel.blu < 0 or > 255
613 "jz .OKBb \n" // 0 <= pixel.blu <= 255
614 "js .NEGBb \n" // pixel.blu < 0
615 "movw $0xff, %dx \n" // pixel.blu > 255
616 "jmp .OKBb \n"
617 ".NEGBb: \n"
618 "xorw %dx, %dx \n"
619 ".OKBb: \n"
620 //partial reg
621 "leal (%edi, %edx, 2), %ecx \n" // ecx = &ctable[pixel.blu]
622 //agi
623 "movw (%ecx), %ax \n" // ax = ctable[pixel.blu]
624 "movw %ax, -20(%ebp) \n" // save bb
626 "mulb 28(%ebp) \n" // ax = bb*db
627 "subw %ax, %dx \n" // ber = dx = dx - bb*db
628 "movswl %dx, %eax \n" // save ber
630 // distribute error
631 "leal (, %eax, 8), %ecx \n"
632 "subw %dx, %cx \n" // cx = ber * 7
633 "sarw $4, %cx \n" // cx = ber * 7 / 16
634 "addb %cl, 6(%ebx) \n" // err[x+1] += ber * 7 / 16
636 "movl -32(%ebp), %ecx \n" // ecx = nerr
638 "leaw (%eax, %eax, 4), %dx \n" // dx = ber * 5
639 "sarw $4, %dx \n" // dx = ber * 5 / 16
640 "addb %dl, 2(%ecx) \n" // nerr[x] += ber * 5 / 16
642 "leaw (%eax, %eax, 2), %dx \n" // dx = ber * 3
643 "sarw $4, %dx \n" // dx = ber * 3 / 16
644 "addb %dl, -4(%ecx) \n" // nerr[x-1] += ber * 3 / 16
646 "sarw $4, %ax \n" // ax = ber / 16
647 "movb %al, 6(%ecx) \n" // nerr[x+1] = ber / 16
649 "andl $0xffff, %eax \n"
650 // depends on eax & 0xffff0000 == 0
651 // calculate the index of the value of the pixel
652 "movw -12(%ebp), %ax \n" // ax = rr
653 "mulb -8(%ebp) \n" // ax = cpcpc*rr
654 "movw %ax, %cx \n"
655 "movw -16(%ebp), %ax \n" // ax = gg
656 "mulb 44(%ebp) \n" // ax = cpc*gg
657 "addw %cx, %ax \n" // ax = cpc*gg + cpcpc*rr
658 "addw -20(%ebp), %ax \n" // ax = cpcpc*rr + cpc*gg + bb
660 "movl 40(%ebp), %ecx \n"
661 //agi
662 "leal (%ecx, %eax, 4), %edx \n"
663 //agi
664 "movb (%edx), %cl \n" // cl = pixels[ax]
666 // store the pixel
667 "movl 12(%ebp), %eax \n"
668 "movb %cl, (%eax) \n" // *ximage = cl
669 "incl 12(%ebp) \n" // ximage++
671 // prepare for next iteration on X
673 "addl $4, -32(%ebp) \n" // nerr += 4
674 "addl $4, %ebx \n" // err += 4
676 "addl 56(%ebp), %esi \n" // image->data += bpp
678 "decl -4(%ebp) \n" // x--
679 "jnz .LoopXb \n" // if x>0, goto .LoopX
682 "movl 60(%ebp), %eax \n"
683 "addl %eax, 12(%ebp) \n" // add extra offset to ximage
685 "jmp .LoopYb \n"
687 ".Endb: \n"
689 "emms \n"
690 "popal \n"
696 #endif /* ASM_X86 */