added preliminary X Input Methods support
[wmaker-crm.git] / wrlib / x86_specific.c
blob54253083ec79e079b44551cfdb1feb01875bffee
1 /* x86_convert.c - convert RImage to XImage with x86 optimizations
3 * Raster graphics library
5 * Copyright (c) 2000-2003 Alfredo K. Kojima
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Library General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Library General Public License for more details.
17 * You should have received a copy of the GNU Library General Public
18 * License along with this library; if not, write to the Free
19 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22 #include <config.h>
24 #ifdef ASM_X86
27 #ifdef ASM_X86_MMX
29 int
30 x86_check_mmx()
32 static int result = -1;
34 if (result >= 0)
35 return result;
37 result = 0;
39 asm volatile
40 ("pushal \n\t" // please dont forget this in any asm
41 "pushfl \n\t" // check whether cpuid supported
42 "pop %%eax \n\t"
43 "movl %%eax, %%ebx \n\t"
44 "xorl $(1<<21), %%eax \n\t"
45 "pushl %%eax \n\t"
46 "popfl \n\t"
47 "pushfl \n\t"
48 "popl %%eax \n\t"
49 "xorl %%ebx, %%eax \n\t"
50 "andl $(1<<21), %%eax \n\t"
51 "jz .NotPentium \n\t"
52 "xorl %%eax, %%eax \n\t" // no eax effect because of the movl below
53 // except reseting flags. is it needed?
54 "movl $1, %%eax \n\t"
55 "cpuid \n\t"
56 "test $(1<<23), %%edx \n\t"
57 "jz .NotMMX \n\t"
59 "popal \n\t" // popal needed because the address of
60 "movl $1, %0 \n\t" // variable %0 may be kept in a register
61 "jmp .noPop \n"
63 ".NotMMX: \n"
64 ".NotPentium: \n\t"
65 "popal \n"
66 ".noPop: \n\t"
68 : "=m" (result));
70 return result;
75 * TODO:
76 * 32/8 24/8 32/16 24/16 32/24 24/24
77 * PPlain YES YES
78 * MMX DONE
81 * - try to align stack (local variable space) into quadword boundary
83 void
84 x86_mmx_TrueColor_32_to_16(unsigned char *image,
85 unsigned short *ximage,
86 short *err,
87 short *nerr,
88 short *rtable,
89 short *gtable,
90 short *btable,
91 int dr,
92 int dg,
93 int db,
94 unsigned int roffs,
95 unsigned int goffs,
96 unsigned int boffs,
97 int width,
98 int height,
99 int line_offset)
101 long long rrggbbaa;
102 long long pixel;
103 short *tmp_err;
104 short *tmp_nerr;
105 int x;
107 asm volatile
109 "pushal \n\t"
111 // pack dr, dg and db into mm6
112 "movl %7, %%eax \n\t"
113 "movl %8, %%ebx \n\t"
114 "movl %9, %%ecx \n\t"
115 "movw %%ax, %16 \n\t"
116 "movw %%bx, %17 \n\t"
117 "movw %%cx, %18 \n\t"
118 "movw $0, %19 \n\t"
120 "movq %16, %%mm6 \n\t" // dr dg db 0
122 // pack 4|4|4|4 into mm7, for shifting (/16)
123 "movl $0x00040004, %16 \n\t"
124 "movl $0x00040004, %18 \n\t"
125 "movq %16, %%mm7 \n\t"
127 // store constant values for using with mmx when dithering
128 "movl $0x00070007, %16 \n\t"
129 "movl $0x00070007, %18 \n\t"
130 "movq %16, %%mm5 \n\t"
132 "movl $0x00050005, %16 \n\t"
133 "movl $0x00050005, %18 \n\t"
134 "movq %16, %%mm4 \n\t"
136 "movl $0x00030003, %16 \n\t"
137 "movl $0x00030003, %18 \n\t"
138 "movq %16, %%mm3 \n\t"
140 // process 1 pixel / cycle, each component treated as 16bit
141 "movl %0, %%esi \n" // esi = image->data
143 ".LoopYa: \n\t"
144 "movl %13, %%eax \n\t"
145 "movl %%eax, %26 \n\t" // x = width
147 "movl %14, %%eax \n\t"
148 "decl %%eax \n\t" // y--
149 "movl %%eax, %14 \n\t"
150 "js .Enda \n\t" // if y < 0, goto end
151 "andl $1, %%eax \n\t"
152 "jz .LoopY_1a \n" // if (y&1) goto LoopY_1
154 ".LoopY_0a: \n\t"
156 "movl %2, %%ebx \n\t" // ebx = err
157 "movl %%ebx, %25 \n\t" // [-36] = err
158 "movl %3, %%eax \n\t" //
159 "movl %%eax, %24 \n\t" // [-32] = nerr
161 "jmp .LoopXa \n"
163 ".LoopY_1a: \n\t"
165 "movl %3, %%ebx \n\t" // ebx = nerr
166 "movl %%ebx, %25 \n\t" // [-36] = nerr
167 "movl %2, %%eax \n\t" //
168 "movl %%eax, %24 \n\t" // [-32] = eerr
170 ".align 16 \n"
171 ".LoopXa: \n\t"
173 // calculate errors and pixel components
175 // depend on ebx, esi, mm6
176 "movq (%%ebx), %%mm1 \n\t" // mm1 = error[0..3]
177 "punpcklbw (%%esi), %%mm0 \n\t" // mm0 = image->data[0..3]
178 "psrlw $8, %%mm0 \n\t" // fixup mm0
179 "paddusb %%mm1, %%mm0 \n\t" // mm0 = mm0 + mm1 (sat. to 255)
180 "movq %%mm0, %20 \n\t" // save the pixel
182 "movzwl %20, %%ecx \n\t" // ecx = pixel.red
183 "movl %4, %%edi \n\t" // edi = rtable
184 //agi
185 "leal (%%edi, %%ecx, 2), %%eax \n\t" // eax = &rtable[pixel.red]
186 // agi
187 "movw (%%eax), %%dx \n\t" // dx = rtable[pixel.red]
188 "movw %%dx, %16 \n\t" // save rr
190 "movzwl %21, %%ecx \n\t" // ecx = pixel.green
191 "movl %5, %%edi \n\t" // edi = gtable
192 //agi
193 "leal (%%edi, %%ecx, 2), %%eax \n\t" // eax = &gtable[pixel.green]
194 //agi
195 "movw (%%eax), %%dx \n\t" // dx = gtable[pixel.green]
196 "movw %%dx, %17 \n\t" // save gg
198 "movzwl %22, %%ecx \n\t" // ecx = pixel.blue
199 "movl %6, %%edi \n\t" // ebx = btable
200 //agi
201 "leal (%%edi, %%ecx, 2), %%eax \n\t" // eax = &btable[pixel.blue]
202 //agi
203 "movw (%%eax), %%dx \n\t" // dx = btable[pixel.blue]
204 "movw %%dx, %18 \n\t" // save bb
206 "movw $0, %19 \n\t" // save dummy aa
208 "movq %16, %%mm1 \n\t" // load mm1 with rrggbbaa
209 "pmullw %%mm6, %%mm1 \n\t" // mm1 = rr*dr|...
210 "psubsw %%mm1, %%mm0 \n\t" // error = pixel - mm1
213 // distribute the error
215 // depend on mm0, mm7, mm3, mm4, mm5
217 "movl %25, %%ebx \n\t"
219 "movq %%mm0, %%mm1 \n\t"
220 "pmullw %%mm5, %%mm1 \n\t" // mm1 = mm1*7
221 "psrlw %%mm7, %%mm1 \n\t" // mm1 = mm1/16
222 "paddw 8(%%ebx), %%mm1 \n\t"
223 "movq %%mm1, 8(%%ebx) \n\t" // err[x+1,y] = rer*7/16
226 "movl %24, %%ebx \n\t"
228 "movq %%mm0, %%mm1 \n\t"
229 "pmullw %%mm4, %%mm1 \n\t" // mm1 = mm1*5
230 "psrlw %%mm7, %%mm1 \n\t" // mm1 = mm1/16
231 "paddw -8(%%ebx), %%mm1 \n\t"
232 "movq %%mm1, -8(%%ebx) \n\t" // err[x-1,y+1] += rer*3/16
234 "movq %%mm0, %%mm1 \n\t"
235 "pmullw %%mm3, %%mm1 \n\t" // mm1 = mm1*3
236 "psrlw %%mm7, %%mm1 \n\t" // mm1 = mm1/16
237 "paddw 8(%%ebx), %%mm1 \n\t"
238 "movq %%mm1, (%%ebx) \n\t" // err[x,y+1] += rer*5/16
240 "psrlw %%mm7, %%mm0 \n\t" // mm0 = mm0/16
241 "movq %%mm0, 8(%%ebx) \n\t" // err[x+1,y+1] = rer/16
244 // calculate final pixel value and store
245 "movl %10, %%ecx \n\t"
246 "movw %16, %%ax \n\t"
247 "shlw %%cl, %%ax \n\t" //NP* ax = r<<roffs
249 "movl %11, %%ecx \n\t"
250 "movw %17, %%bx \n\t"
251 "shlw %%cl, %%bx \n\t" //NP*
252 "orw %%bx, %%ax \n\t"
254 "movl %12, %%ecx \n\t"
255 "movw %18, %%bx \n\t"
256 "shlw %%cl, %%bx \n\t" //NP*
257 "orw %%bx, %%ax \n\t"
259 "movl %1, %%edx \n\t"
260 "movw %%ax, (%%edx) \n\t"
261 "addl $2, %%edx \n\t" // increment ximage
262 "movl %%edx, %1 \n\t"
264 // prepare for next iteration on X
266 "addl $8, %24 \n\t" // nerr += 8
268 "movl %25, %%ebx \n\t"
269 "addl $8, %%ebx \n\t"
270 "movl %%ebx, %25 \n\t" // ebx = err += 8
273 // Note: in the last pixel, this would cause an invalid memory access
274 // because, punpcklbw is used (which reads 8 bytes) and the last
275 // pixel is only 4 bytes. This is no problem because the image data
276 // was allocated with extra 4 bytes when created.
277 "addl $4, %%esi \n\t" // image->data += 4
280 "decl %26 \n\t" // x--
281 "jnz .LoopXa \n\t" // if x>0, goto .LoopX
284 // depend on edx
285 "addl %15, %%edx \n\t" // add extra offset to ximage
286 "movl %%edx, %1 \n\t"
289 "jmp .LoopYa \n"
291 ".Enda: \n\t" // THE END
292 "emms \n\t"
293 "popal \n\t"
296 "m" (image), // %0
297 "m" (ximage), // %1
298 "m" (err), // %2
299 "m" (nerr), // %3
300 "m" (rtable), // %4
301 "m" (gtable), // %5
302 "m" (btable), // %6
303 "m" (dr), // %7
304 "m" (dg), // %8
305 "m" (db), // %9
306 "m" (roffs), // %10
307 "m" (goffs), // %11
308 "m" (boffs), // %12
309 "m" (width), // %13
310 "m" (height), // %14
311 "m" (line_offset), // %15
312 "m" (rrggbbaa), // %16 (access to rr)
313 "m" ((*((short*)(&rrggbbaa)+1))), // %17 (access to gg)
314 "m" ((*((short*)(&rrggbbaa)+2))), // %18 (access to bb)
315 "m" ((*((short*)(&rrggbbaa)+3))), // %19 (access to aa)
316 "m" (pixel), // %20 (access to pixel.r)
317 "m" ((*((short*)(&pixel)+1))), // %21 (access to pixel.g)
318 "m" ((*((short*)(&pixel)+2))), // %22 (access to pixel.b)
319 "m" ((*((short*)(&pixel)+3))), // %23 (access to pixel.a)
320 "m" (tmp_err), // %24
321 "m" (tmp_nerr), // %25
322 "m" (x) // %26
327 void
328 x86_mmx_TrueColor_24_to_16(unsigned char *image,
329 unsigned short *ximage,
330 short *err,
331 short *nerr,
332 short *rtable,
333 short *gtable,
334 short *btable,
335 int dr,
336 int dg,
337 int db,
338 unsigned int roffs,
339 unsigned int goffs,
340 unsigned int boffs,
341 int width,
342 int height,
343 int line_offset)
345 long long rrggbbaa;
346 long long pixel;
348 short *tmp_err;
349 short *tmp_nerr;
351 int x;
352 int w1;
353 int w2;
355 asm volatile
357 "pushal \n\t"
359 "movl %13, %%eax \n\t" // eax = width
360 "movl %%eax, %%ebx \n\t"
361 "shrl $2, %%eax \n\t"
362 "movl %%eax, %27 \n\t" // w1 = width / 4
363 "andl $3, %%ebx \n\t"
364 "movl %%ebx, %28 \n" // w2 = width %% 4
367 ".LoopYc: \n\t"
368 "movl %13, %%eax \n\t"
369 "movl %%eax, %26 \n\t" // x = width
371 "decl %14 \n\t" // height--
372 "js .Endc \n\t" // if height < 0 then end
374 "movl %14, %%eax \n\t"
375 "decl %%eax \n\t" // y--
376 "movl %%eax, %14 \n\t"
377 "js .Endc \n\t" // if y < 0, goto end
378 "andl $1, %%eax \n\t"
379 "jz .LoopY_1c \n" // if (y&1) goto LoopY_1
381 ".LoopY_0c: \n\t"
383 "movl %2, %%ebx \n\t" // ebx = err
384 "movl %%ebx, %25 \n\t" // [-36] = err
385 "movl %3, %%eax \n\t" //
386 "movl %%eax, %24 \n\t" // [-32] = nerr
388 "jmp .LoopX_1c \n"
390 ".LoopY_1c: \n\t"
392 "movl %3, %%ebx \n\t" // ebx = nerr
393 "movl %%ebx, %25 \n\t" // [-36] = nerr
394 "movl %2, %%eax \n\t" //
395 "movl %%eax, %24 \n\t" // [-32] = eerr
397 ".align 16 \n\t"
399 "movl %%eax, %26 \n" // x = w1
400 ".LoopX_1c: \n\t"
401 "decl %26 \n\t" // x--
402 "js .Xend1_c \n\t" // if x < 0 then end
404 // do conversion of 4 pixels
405 "movq %2, %%mm0 \n\t" // mm0 = err
410 "jmp .LoopX_1c \n"
411 ".Xend1_c: \n\t"
413 "movl %28, %%eax \n\t"
414 "movl %%eax, %26 \n" // x = w2
415 ".LoopX_2c: \n\t"
416 "decl %26 \n\t" // x--
417 "js .Xend2_c \n\t" //
418 // do conversion
419 "jmp .LoopX_2c \n"
420 ".Xend2_c: \n\t"
422 "movl %27, %%eax \n\t"
423 "jmp .LoopYc \n"
425 ".Endc: \n\t" // THE END
426 "emms \n\t"
427 "popal \n\t"
430 "m" (image), // %0
431 "m" (ximage), // %1
432 "m" (err), // %2
433 "m" (nerr), // %3
434 "m" (rtable), // %4
435 "m" (gtable), // %5
436 "m" (btable), // %6
437 "m" (dr), // %7
438 "m" (dg), // %8
439 "m" (db), // %9
440 "m" (roffs), // %10
441 "m" (goffs), // %11
442 "m" (boffs), // %12
443 "m" (width), // %13
444 "m" (height), // %14
445 "m" (line_offset), // %15
446 "m" (rrggbbaa), // %16 (access to rr)
447 "m" ((*((short*)(&rrggbbaa)+1))), // %17 (access to gg)
448 "m" ((*((short*)(&rrggbbaa)+2))), // %18 (access to bb)
449 "m" ((*((short*)(&rrggbbaa)+3))), // %19 (access to aa)
450 "m" (pixel), // %20 (access to pixel.r)
451 "m" ((*((short*)(&pixel)+1))), // %21 (access to pixel.g)
452 "m" ((*((short*)(&pixel)+2))), // %22 (access to pixel.b)
453 "m" ((*((short*)(&pixel)+3))), // %23 (access to pixel.a)
454 "m" (tmp_err), // %24
455 "m" (tmp_nerr), // %25
456 "m" (x), // %26
457 "m" (w1), // %27
458 "m" (w2) // %28
464 #endif /* ASM_X86_MMX */
468 void
469 x86_PseudoColor_32_to_8(unsigned char *image,
470 unsigned char *ximage,
471 char *err,
472 char *nerr,
473 short *ctable,
474 int dr,
475 int dg,
476 int db,
477 unsigned long *pixels,
478 int cpc,
479 int width,
480 int height,
481 int bytesPerPixel,
482 int line_offset)
484 int x;
485 int cpcpc;
487 int rr;
488 int gg;
489 int bb;
491 char *tmp_err;
492 char *tmp_nerr;
494 char ndr; // aparently not used
495 char ndg; // aparently not used
496 char ndb; // aparently not used
498 asm volatile
500 "pushal \n\t"
502 "movl %9, %%eax \n\t"
503 "mulb %9 \n\t"
504 "movl %%eax, %15 \n\t" // cpcpc = cpc*cpc
506 // eax will always be <= 0xffff
508 // process 1 pixel / cycle, each component treated as 16bit
509 "movl %0, %%esi \n" // esi = image->data
511 ".LoopYb: \n\t"
512 "movl %10, %%ecx \n\t"
513 "movl %%ecx, %14 \n\t" // x = width
515 "movl %11, %%ecx \n\t"
516 "decl %%ecx \n\t" // y--
517 "movl %%ecx, %11 \n\t"
518 "js .Endb \n\t" // if y < 0, goto end
519 "andl $1, %%ecx \n\t"
520 "jz .LoopY_1b \n" // if (y&1) goto LoopY_1
522 ".LoopY_0b: \n\t"
524 "movl %2, %%ebx \n\t" // ebx = err
525 //useless "movl %%ebx, %20 \n\t" // [-36] = err
526 "movl %3, %%ecx \n\t" //
527 "movl %%ecx, %19 \n\t" // [-32] = nerr
529 "movl $0, (%%ecx) \n\t" // init error of nerr[0] to 0
531 "jmp .LoopXb \n"
533 ".LoopY_1b: \n\t"
535 "movl %3, %%ebx \n\t" // ebx = nerr
536 //useless "movl %%ebx, %20 \n\t" // [-36] = nerr
537 "movl %2, %%ecx \n\t" //
538 "movl %%ecx, %19 \n\t" // [-32] = err
540 "movl $0, (%%ecx) \n\t" // init error of nerr[0] to 0
543 ".align 16 \n"
544 ".LoopXb: \n\t"
547 "movl %4, %%edi \n\t" // edi = ctable
548 "xorl %%edx, %%edx \n\t" // zero the upper word on edx
550 // RED
552 // depends on ebx==err, esi==image->data, edi
553 "movzbw (%%esi), %%dx \n\t" // dx = image->data[0]
554 "movsbw (%%ebx), %%ax \n\t" // ax = error[0]
555 "addw %%ax, %%dx \n\t" // pixel.red = data[0] + error[0]
557 "testb %%dh, %%dh \n\t" // test if pixel.red < 0 or > 255
558 "jz .OKRb \n\t" // 0 <= pixel.red <= 255
559 "js .NEGRb \n\t" // pixel.red < 0
560 "movw $0xff, %%dx \n\t" // pixel.red > 255
561 "jmp .OKRb \n"
562 ".NEGRb: \n\t"
563 "xorw %%dx, %%dx \n"
564 ".OKRb: \n\t"
565 //partial reg
566 "leal (%%edi, %%edx, 2), %%ecx \n\t" // ecx = &ctable[pixel.red]
567 //agi
568 "movl (%%ecx), %%eax \n\t" // ax = ctable[pixel.red]
569 "movw %%ax, %16 \n\t" // save rr
571 "mulb %5 \n\t" // ax = rr*dr
572 "subw %%ax, %%dx \n\t" // rer = dx = dx - rr*dr
574 "movswl %%dx, %%eax \n\t" // save rer
576 // distribute error
577 "leal (, %%eax, 8), %%ecx \n\t"
578 "subw %%dx, %%cx \n\t" // cx = rer * 7
579 "sarw $4, %%cx \n\t" // cx = rer * 7 / 16
580 "addb %%cl, 4(%%ebx) \n\t" // err[x+1] += rer * 7 / 16
582 "movl %19, %%ecx \n\t" // ecx = nerr
584 "leaw (%%eax, %%eax, 4), %%dx \n\t" // dx = rer * 5
585 "sarw $4, %%dx \n\t" // dx = rer * 5 / 16
586 "addb %%dl, (%%ecx) \n\t" // nerr[x] += rer * 5 / 16
588 "leaw (%%eax, %%eax, 2), %%dx \n\t" // dx = rer * 3
589 "sarw $4, %%dx \n\t" // dx = rer * 3 / 16
590 "addb %%dl, -4(%%ecx) \n\t" // nerr[x-1] += rer * 3 / 16
592 "sarw $4, %%ax \n\t" // ax = rer / 16
593 "movb %%al, 4(%%ecx) \n\t" // nerr[x+1] = rer / 16
596 // GREEN
598 // depends on ebx, esi, edi
599 "movzbw 1(%%esi), %%dx \n\t" // dx = image->data[1]
600 "movsbw 1(%%ebx), %%ax \n\t" // ax = error[1]
601 "addw %%ax, %%dx \n\t" // pixel.grn = data[1] + error[1]
603 "testb %%dh, %%dh \n\t" // test if pixel.grn < 0 or > 255
604 "jz .OKGb \n\t" // 0 <= pixel.grn <= 255
605 "js .NEGGb \n\t" // pixel.grn < 0
606 "movw $0xff, %%dx \n\t" // pixel.grn > 255
607 "jmp .OKGb \n"
608 ".NEGGb: \n\t"
609 "xorw %%dx, %%dx \n"
610 ".OKGb: \n\t"
611 // partial reg
612 "leal (%%edi, %%edx, 2), %%ecx \n\t" // ecx = &ctable[pixel.grn]
613 //agi
614 "movw (%%ecx), %%ax \n\t" // ax = ctable[pixel.grn]
615 "movw %%ax, %17 \n\t" // save gg
617 "mulb %6 \n\t" // ax = gg*dg
618 "subw %%ax, %%dx \n\t" // ger = dx = dx - gg*dg
620 "movswl %%dx, %%eax \n\t" // save ger
622 // distribute error
624 "leal (, %%eax, 8), %%ecx \n\t"
625 "subw %%dx, %%cx \n\t" // cx = ger * 7
626 "sarw $4, %%cx \n\t" // cx = ger * 7 / 16
627 "addb %%cl, 5(%%ebx) \n\t" // err[x+1] += ger * 7 / 16
629 "movl %19, %%ecx \n\t" // ecx = nerr
631 "leaw (%%eax, %%eax, 4), %%dx \n\t" // dx = ger * 5
632 "sarw $4, %%dx \n\t" // dx = ger * 5 / 16
633 "addb %%dl, 1(%%ecx) \n\t" // nerr[x] += ger * 5 / 16
635 "leaw (%%eax, %%eax, 2), %%dx \n\t" // dx = ger * 3
636 "sarw $4, %%dx \n\t" // dx = ger * 3 / 16
637 "addb %%dl, -3(%%ecx) \n\t" // nerr[x-1] += ger * 3 / 16
639 "sarw $4, %%ax \n\t" // ax = ger / 16
640 "movb %%al, 5(%%ecx) \n\t" // nerr[x+1] = ger / 16
643 // BLUE
645 // depends on ebx, esi
646 "movzbw 2(%%esi), %%dx \n\t" // dx = image->data[2]
647 "movsbw 2(%%ebx), %%ax \n\t" // ax = error[2]
648 "addw %%ax, %%dx \n\t" // pixel.grn = data[2] + error[2]
650 "testb %%dh, %%dh \n\t" // test if pixel.blu < 0 or > 255
651 "jz .OKBb \n\t" // 0 <= pixel.blu <= 255
652 "js .NEGBb \n\t" // pixel.blu < 0
653 "movw $0xff, %%dx \n\t" // pixel.blu > 255
654 "jmp .OKBb \n"
655 ".NEGBb: \n\t"
656 "xorw %%dx, %%dx \n"
657 ".OKBb: \n\t"
658 //partial reg
659 "leal (%%edi, %%edx, 2), %%ecx \n\t" // ecx = &ctable[pixel.blu]
660 //agi
661 "movw (%%ecx), %%ax \n\t" // ax = ctable[pixel.blu]
662 "movw %%ax, %18 \n\t" // save bb
664 "mulb %7 \n\t" // ax = bb*db
665 "subw %%ax, %%dx \n\t" // ber = dx = dx - bb*db
666 "movswl %%dx, %%eax \n\t" // save ber
668 // distribute error
669 "leal (, %%eax, 8), %%ecx \n\t"
670 "subw %%dx, %%cx \n\t" // cx = ber * 7
671 "sarw $4, %%cx \n\t" // cx = ber * 7 / 16
672 "addb %%cl, 6(%%ebx) \n\t" // err[x+1] += ber * 7 / 16
674 "movl %19, %%ecx \n\t" // ecx = nerr
676 "leaw (%%eax, %%eax, 4), %%dx \n\t" // dx = ber * 5
677 "sarw $4, %%dx \n\t" // dx = ber * 5 / 16
678 "addb %%dl, 2(%%ecx) \n\t" // nerr[x] += ber * 5 / 16
680 "leaw (%%eax, %%eax, 2), %%dx \n\t" // dx = ber * 3
681 "sarw $4, %%dx \n\t" // dx = ber * 3 / 16
682 "addb %%dl, -4(%%ecx) \n\t" // nerr[x-1] += ber * 3 / 16
684 "sarw $4, %%ax \n\t" // ax = ber / 16
685 "movb %%al, 6(%%ecx) \n\t" // nerr[x+1] = ber / 16
687 "andl $0xffff, %%eax \n\t"
688 // depends on eax & 0xffff0000 == 0
689 // calculate the index of the value of the pixel
690 "movw %16, %%ax \n\t" // ax = rr
691 "mulb %15 \n\t" // ax = cpcpc*rr
692 "movw %%ax, %%cx \n\t"
693 "movw %17, %%ax \n\t" // ax = gg
694 "mulb %9 \n\t" // ax = cpc*gg
695 "addw %%cx, %%ax \n\t" // ax = cpc*gg + cpcpc*rr
696 "addw %18, %%ax \n\t" // ax = cpcpc*rr + cpc*gg + bb
698 "movl %8, %%ecx \n\t"
699 //agi
700 "leal (%%ecx, %%eax, 4), %%edx \n\t"
701 //agi
702 "movb (%%edx), %%cl \n\t" // cl = pixels[ax]
704 // store the pixel
705 "movl %1, %%eax \n\t"
706 "movb %%cl, (%%eax) \n\t" // *ximage = cl
707 "incl %1 \n\t" // ximage++
709 // prepare for next iteration on X
711 "addl $4, %19 \n\t" // nerr += 4
712 "addl $4, %%ebx \n\t" // err += 4
714 "addl %12, %%esi \n\t" // image->data += bpp
716 "decl %14 \n\t" // x--
717 "jnz .LoopXb \n\t" // if x>0, goto .LoopX
720 "movl %13, %%eax \n\t"
721 "addl %%eax, %1 \n\t" // add extra offset to ximage
723 "jmp .LoopYb \n"
725 ".Endb: \n\t"
726 "emms \n\t"
727 "popal \n\t"
730 "m" (image), // %0
731 "m" (ximage), // %1
732 "m" (err), // %2
733 "m" (nerr), // %3
734 "m" (ctable), // %4
735 "m" (dr), // %5
736 "m" (dg), // %6
737 "m" (db), // %7
738 "m" (pixels), // %8
739 "m" (cpc), // %9
740 "m" (width), // %10
741 "m" (height), // %11
742 "m" (bytesPerPixel), // %12
743 "m" (line_offset), // %13
744 "m" (x), // %14
745 "m" (cpcpc), // %15
746 "m" (rr), // %16
747 "m" (gg), // %17
748 "m" (bb), // %18
749 "m" (tmp_err), // %19
750 "m" (tmp_nerr), // %20
751 "m" (ndr), // %21
752 "m" (ndg), // %22
753 "m" (ndb) // %23
757 #endif /* ASM_X86 */