Bouncing appicon effect
[wmaker-crm.git] / wrlib / x86_specific.c
blob85b4b12b8d60d63d4eeb389cb84a2a678d90d22c
1 /* x86_convert.c - convert RImage to XImage with x86 optimizations
3 * Raster graphics library
5 * Copyright (c) 2000-2003 Alfredo K. Kojima
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Library General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Library General Public License for more details.
17 * You should have received a copy of the GNU Library General Public
18 * License along with this library; if not, write to the Free
19 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22 #include <config.h>
24 #ifdef ASM_X86
26 #ifdef ASM_X86_MMX
28 int x86_check_mmx()
30 static int result = -1;
32 if (result >= 0)
33 return result;
35 result = 0;
37 asm volatile (
38 "pushal \n\t" /* please dont forget this in any asm */
39 "pushfl \n\t" /* check whether cpuid supported */
40 "pop %%eax \n\t"
41 "movl %%eax, %%ebx \n\t"
42 "xorl $(1<<21), %%eax \n\t"
43 "pushl %%eax \n\t"
44 "popfl \n\t"
45 "pushfl \n\t"
46 "popl %%eax \n\t"
47 "xorl %%ebx, %%eax \n\t"
48 "andl $(1<<21), %%eax \n\t"
49 "jz .NotPentium \n\t"
50 "xorl %%eax, %%eax \n\t" /* no eax effect because of the movl below */
51 /* except reseting flags. is it needed? */
52 "movl $1, %%eax \n\t"
53 "cpuid \n\t"
54 "test $(1<<23), %%edx \n\t"
55 "jz .NotMMX \n\t"
56 "popal \n\t" /* popal needed because the address of */
57 "movl $1, %0 \n\t" /* variable %0 may be kept in a register */
58 "jmp .noPop \n\t"
59 ".NotMMX: \n\t"
60 ".NotPentium: \n\t"
61 "popal \n\t"
62 ".noPop: \n\t"
63 : "=m" (result)
66 return result;
70 * TODO:
71 * 32/8 24/8 32/16 24/16 32/24 24/24
72 * PPlain YES YES
73 * MMX DONE
76 * - try to align stack (local variable space) into quadword boundary
78 void
79 x86_mmx_TrueColor_32_to_16(unsigned char *image,
80 unsigned short *ximage,
81 short *err,
82 short *nerr,
83 unsigned short *rtable,
84 unsigned short *gtable,
85 unsigned short *btable,
86 int dr,
87 int dg,
88 int db,
89 unsigned int roffs,
90 unsigned int goffs, unsigned int boffs, int width, int height, int line_offset)
92 union {
93 long long rrggbbaa;
94 struct {
95 short int rr, gg, bb, aa;
96 } words;
97 } rrggbbaa;
99 union {
100 long long pixel;
101 struct {
102 short int rr, gg, bb, aa;
103 } words;
104 } pixel;
106 short *tmp_err;
107 short *tmp_nerr;
108 int x;
110 asm volatile (
111 "pushl %%ebx \n\t"
112 /* pack dr, dg and db into mm6 */
113 "movl %7, %%eax \n\t"
114 "movl %8, %%ebx \n\t"
115 "movl %9, %%ecx \n\t"
116 "movw %%ax, %16 \n\t"
117 "movw %%bx, %17 \n\t"
118 "movw %%cx, %18 \n\t"
119 "movw $0, %19 \n\t"
120 "movq %16, %%mm6 \n\t" /* dr dg db 0 */
121 /* pack 4|4|4|4 into mm7, for shifting (/16) */
122 "movl $0x00040004, %16 \n\t"
123 "movl $0x00040004, %18 \n\t"
124 "movq %16, %%mm7 \n\t"
125 /* store constant values for using with mmx when dithering */
126 "movl $0x00070007, %16 \n\t"
127 "movl $0x00070007, %18 \n\t"
128 "movq %16, %%mm5 \n\t"
129 "movl $0x00050005, %16 \n\t"
130 "movl $0x00050005, %18 \n\t"
131 "movq %16, %%mm4 \n\t"
132 "movl $0x00030003, %16 \n\t"
133 "movl $0x00030003, %18 \n\t"
134 "movq %16, %%mm3 \n\t"
135 /* process 1 pixel / cycle, each component treated as 16bit */
136 "movl %0, %%esi \n\t" /* esi = image->data */
137 ".LoopYa: \n\t"
138 "movl %13, %%eax \n\t"
139 "movl %%eax, %26 \n\t" /* x = width */
140 "movl %14, %%eax \n\t"
141 "decl %%eax \n\t" /* y-- */
142 "movl %%eax, %14 \n\t"
143 "js .Enda \n\t" /* if y < 0, goto end */
144 "andl $1, %%eax \n\t"
145 "jz .LoopY_1a \n\t" /* if (y & 1) goto LoopY_1 */
146 ".LoopY_0a: \n\t"
147 "movl %2, %%ebx \n\t" /* ebx = err */
148 "movl %%ebx, %25 \n\t" /* [-36] = err */
149 "movl %3, %%eax \n\t"
150 "movl %%eax, %24 \n\t" /* [-32] = nerr */
151 "jmp .LoopXa \n\t"
152 ".LoopY_1a: \n\t"
153 "movl %3, %%ebx \n\t" /* ebx = nerr */
154 "movl %%ebx, %25 \n\t" /* [-36] = nerr */
155 "movl %2, %%eax \n\t"
156 "movl %%eax, %24 \n\t" /* [-32] = eerr */
157 ".align 16 \n\t"
158 ".LoopXa: \n\t"
159 /* calculate errors and pixel components; depend on ebx, esi, mm6 */
160 "movq (%%ebx), %%mm1 \n\t" /* mm1 = error[0..3] */
161 "punpcklbw (%%esi), %%mm0 \n\t" /* mm0 = image->data[0..3] */
162 "psrlw $8, %%mm0 \n\t" /* fixup mm0 */
163 "paddusb %%mm1, %%mm0 \n\t" /* mm0 = mm0 + mm1 (sat. to 255) */
164 "movq %%mm0, %20 \n\t" /* save the pixel */
165 "movzwl %20, %%ecx \n\t" /* ecx = pixel.red */
166 "movl %4, %%edi \n\t" /* edi = rtable */
167 /* agi */
168 "leal (%%edi, %%ecx, 2), %%eax \n\t" /* eax = &rtable[pixel.red] */
169 /* agi */
170 "movw (%%eax), %%dx \n\t" /* dx = rtable[pixel.red] */
171 "movw %%dx, %16 \n\t" /* save rr */
172 "movzwl %21, %%ecx \n\t" /* ecx = pixel.green */
173 "movl %5, %%edi \n\t" /* edi = gtable */
174 /* agi */
175 "leal (%%edi, %%ecx, 2), %%eax \n\t" /* eax = &gtable[pixel.green] */
176 /* agi */
177 "movw (%%eax), %%dx \n\t" /* dx = gtable[pixel.green] */
178 "movw %%dx, %17 \n\t" /* save gg */
179 "movzwl %22, %%ecx \n\t" /* ecx = pixel.blue */
180 "movl %6, %%edi \n\t" /* ebx = btable */
181 /* agi */
182 "leal (%%edi, %%ecx, 2), %%eax \n\t" /* eax = &btable[pixel.blue] */
183 /* agi */
184 "movw (%%eax), %%dx \n\t" /* dx = btable[pixel.blue] */
185 "movw %%dx, %18 \n\t" /* save bb */
186 "movw $0, %19 \n\t" /* save dummy aa */
187 "movq %16, %%mm1 \n\t" /* load mm1 with rrggbbaa */
188 "pmullw %%mm6, %%mm1 \n\t" /* mm1 = rr*dr|... */
189 "psubsw %%mm1, %%mm0 \n\t" /* error = pixel - mm1 */
190 /* distribute the error; depend on mm0, mm7, mm3, mm4, mm5 */
191 "movl %25, %%ebx \n\t"
192 "movq %%mm0, %%mm1 \n\t"
193 "pmullw %%mm5, %%mm1 \n\t" /* mm1 = mm1*7 */
194 "psrlw %%mm7, %%mm1 \n\t" /* mm1 = mm1/16 */
195 "paddw 8(%%ebx), %%mm1 \n\t"
196 "movq %%mm1, 8(%%ebx) \n\t" /* err[x+1,y] = rer*7/16 */
197 "movl %24, %%ebx \n\t"
198 "movq %%mm0, %%mm1 \n\t"
199 "pmullw %%mm4, %%mm1 \n\t" /* mm1 = mm1*5 */
200 "psrlw %%mm7, %%mm1 \n\t" /* mm1 = mm1/16 */
201 "paddw -8(%%ebx), %%mm1 \n\t"
202 "movq %%mm1, -8(%%ebx) \n\t" /* err[x-1,y+1] += rer*3/16 */
203 "movq %%mm0, %%mm1 \n\t"
204 "pmullw %%mm3, %%mm1 \n\t" /* mm1 = mm1*3 */
205 "psrlw %%mm7, %%mm1 \n\t" /* mm1 = mm1/16 */
206 "paddw 8(%%ebx), %%mm1 \n\t"
207 "movq %%mm1, (%%ebx) \n\t" /* err[x,y+1] += rer*5/16 */
208 "psrlw %%mm7, %%mm0 \n\t" /* mm0 = mm0/16 */
209 "movq %%mm0, 8(%%ebx) \n\t" /* err[x+1,y+1] = rer/16 */
210 /* calculate final pixel value and store */
211 "movl %10, %%ecx \n\t"
212 "movw %16, %%ax \n\t"
213 "shlw %%cl, %%ax \n\t" /* NP* ax = r<<roffs */
214 "movl %11, %%ecx \n\t"
215 "movw %17, %%bx \n\t"
216 "shlw %%cl, %%bx \n\t" /* NP* */
217 "orw %%bx, %%ax \n\t"
218 "movl %12, %%ecx \n\t"
219 "movw %18, %%bx \n\t"
220 "shlw %%cl, %%bx \n\t" /* NP* */
221 "orw %%bx, %%ax \n\t"
222 "movl %1, %%edx \n\t"
223 "movw %%ax, (%%edx) \n\t"
224 "addl $2, %%edx \n\t" /* increment ximage */
225 "movl %%edx, %1 \n\t"
226 /* prepare for next iteration on X */
227 "addl $8, %24 \n\t" /* nerr += 8 */
228 "movl %25, %%ebx \n\t"
229 "addl $8, %%ebx \n\t"
230 "movl %%ebx, %25 \n\t" /* ebx = err += 8 */
231 /* Note: in the last pixel, this would cause an invalid memory access
232 * because, punpcklbw is used (which reads 8 bytes) and the last
233 * pixel is only 4 bytes. This is no problem because the image data
234 * was allocated with extra 4 bytes when created. */
235 "addl $4, %%esi \n\t" /* image->data += 4 */
236 "decl %26 \n\t" /* x-- */
237 "jnz .LoopXa \n\t" /* if x>0, goto .LoopX */
238 /* depend on edx */
239 "addl %15, %%edx \n\t" /* add extra offset to ximage */
240 "movl %%edx, %1 \n\t"
241 "jmp .LoopYa \n\t"
242 ".Enda: \n\t" /* THE END */
243 "emms \n\t"
244 "popl %%ebx \n\t"
246 : "m" (image), /* %0 */
247 "m" (ximage), /* %1 */
248 "m" (err), /* %2 */
249 "m" (nerr), /* %3 */
250 "m" (rtable), /* %4 */
251 "m" (gtable), /* %5 */
252 "m" (btable), /* %6 */
253 "m" (dr), /* %7 */
254 "m" (dg), /* %8 */
255 "m" (db), /* %9 */
256 "m" (roffs), /* %10 */
257 "m" (goffs), /* %11 */
258 "m" (boffs), /* %12 */
259 "m" (width), /* %13 */
260 "m" (height), /* %14 */
261 "m" (line_offset), /* %15 */
262 "m" (rrggbbaa.words.rr), /* %16 (access to rr) */
263 "m" (rrggbbaa.words.gg), /* %17 (access to gg) */
264 "m" (rrggbbaa.words.bb), /* %18 (access to bb) */
265 "m" (rrggbbaa.words.aa), /* %19 (access to aa) */
266 "m" (pixel.words.rr), /* %20 (access to pixel.r) */
267 "m" (pixel.words.gg), /* %21 (access to pixel.g) */
268 "m" (pixel.words.bb), /* %22 (access to pixel.b) */
269 "m" (pixel.words.aa), /* %23 (access to pixel.a) */
270 "m" (tmp_err), /* %24 */
271 "m" (tmp_nerr), /* %25 */
272 "m" (x) /* %26 */
273 : "eax",
274 "ecx",
275 "edx",
276 "esi",
277 "edi"
281 void
282 x86_mmx_TrueColor_24_to_16(unsigned char *image,
283 unsigned short *ximage,
284 short *err,
285 short *nerr,
286 short *rtable,
287 short *gtable,
288 short *btable,
289 int dr,
290 int dg,
291 int db,
292 unsigned int roffs,
293 unsigned int goffs, unsigned int boffs, int width, int height, int line_offset)
295 union {
296 long long rrggbbaa;
297 struct {
298 short int rr, gg, bb, aa;
299 } words;
300 } rrggbbaa;
302 union {
303 long long pixel;
304 struct {
305 short int rr, gg, bb, aa;
306 } words;
307 } pixel;
309 short *tmp_err;
310 short *tmp_nerr;
312 int x;
313 int w1;
314 int w2;
316 asm volatile (
317 "pushl %%ebx \n\t"
318 "movl %13, %%eax \n\t" /* eax = width */
319 "movl %%eax, %%ebx \n\t"
320 "shrl $2, %%eax \n\t"
321 "movl %%eax, %27 \n\t" /* w1 = width / 4 */
322 "andl $3, %%ebx \n\t"
323 "movl %%ebx, %28 \n\t" /* w2 = width %% 4 */
324 ".LoopYc: \n\t"
325 "movl %13, %%eax \n\t"
326 "movl %%eax, %26 \n\t" /* x = width */
327 "decl %14 \n\t" /* height-- */
328 "js .Endc \n\t" /* if height < 0 then end */
329 "movl %14, %%eax \n\t"
330 "decl %%eax \n\t" /* y-- */
331 "movl %%eax, %14 \n\t"
332 "js .Endc \n\t" /* if y < 0, goto end */
333 "andl $1, %%eax \n\t"
334 "jz .LoopY_1c \n\t" /* if (y&1) goto LoopY_1 */
335 ".LoopY_0c: \n\t"
336 "movl %2, %%ebx \n\t" /* ebx = err */
337 "movl %%ebx, %25 \n\t" /* [-36] = err */
338 "movl %3, %%eax \n\t"
339 "movl %%eax, %24 \n\t" /* [-32] = nerr */
340 "jmp .LoopX_1c \n\t"
341 ".LoopY_1c: \n\t"
342 "movl %3, %%ebx \n\t" /* ebx = nerr */
343 "movl %%ebx, %25 \n\t" /* [-36] = nerr */
344 "movl %2, %%eax \n\t"
345 "movl %%eax, %24 \n\t" /* [-32] = eerr */
346 ".align 16 \n\t"
347 "movl %%eax, %26 \n\t" /* x = w1 */
348 ".LoopX_1c: \n\t"
349 "decl %26 \n\t" /* x-- */
350 "js .Xend1_c \n\t" /* if x < 0 then end */
351 /* do conversion of 4 pixels */
352 "movq %2, %%mm0 \n\t" /* mm0 = err */
353 "jmp .LoopX_1c \n\t"
354 ".Xend1_c: \n\t"
355 "movl %28, %%eax \n\t"
356 "movl %%eax, %26 \n\t" /* x = w2 */
357 ".LoopX_2c: \n\t"
358 "decl %26 \n\t" /* x-- */
359 "js .Xend2_c \n\t"
360 /* do conversion */
361 "jmp .LoopX_2c \n\t"
362 ".Xend2_c: \n\t"
363 "movl %27, %%eax \n\t"
364 "jmp .LoopYc \n\t"
365 ".Endc: \n\t" /* THE END */
366 "emms \n\t"
367 "popl %%ebx \n\t"
369 : "m" (image), /* %0 */
370 "m" (ximage), /* %1 */
371 "m" (err), /* %2 */
372 "m" (nerr), /* %3 */
373 "m" (rtable), /* %4 */
374 "m" (gtable), /* %5 */
375 "m" (btable), /* %6 */
376 "m" (dr), /* %7 */
377 "m" (dg), /* %8 */
378 "m" (db), /* %9 */
379 "m" (roffs), /* %10 */
380 "m" (goffs), /* %11 */
381 "m" (boffs), /* %12 */
382 "m" (width), /* %13 */
383 "m" (height), /* %14 */
384 "m" (line_offset), /* %15 */
385 "m" (rrggbbaa.words.rr), /* %16 (access to rr) */
386 "m" (rrggbbaa.words.gg), /* %17 (access to gg) */
387 "m" (rrggbbaa.words.bb), /* %18 (access to bb) */
388 "m" (rrggbbaa.words.aa), /* %19 (access to aa) */
389 "m" (pixel.words.rr), /* %20 (access to pixel.r) */
390 "m" (pixel.words.gg), /* %21 (access to pixel.g) */
391 "m" (pixel.words.bb), /* %22 (access to pixel.b) */
392 "m" (pixel.words.aa), /* %23 (access to pixel.a) */
393 "m" (tmp_err), /* %24 */
394 "m" (tmp_nerr), /* %25 */
395 "m" (x), /* %26 */
396 "m" (w1), /* %27 */
397 "m" (w2) /* %28 */
399 "eax",
400 "ecx",
401 "edx",
402 "esi",
403 "edi"
407 #endif /* ASM_X86_MMX */
409 void
410 x86_PseudoColor_32_to_8(unsigned char *image,
411 unsigned char *ximage,
412 char *err,
413 char *nerr,
414 short *ctable,
415 int dr,
416 int dg,
417 int db,
418 unsigned long *pixels, int cpc, int width, int height, int bytesPerPixel, int line_offset)
420 int x;
421 int cpcpc;
423 int rr;
424 int gg;
425 int bb;
427 char *tmp_err;
428 char *tmp_nerr;
430 char ndr; // aparently not used
431 char ndg; // aparently not used
432 char ndb; // aparently not used
434 asm volatile (
435 "pushal \n\t"
436 "movl %9, %%eax \n\t"
437 "mulb %9 \n\t"
438 "movl %%eax, %15 \n\t" /* cpcpc = cpc * cpc */
439 /* eax will always be <= 0xffff */
440 /* process 1 pixel / cycle, each component treated as 16bit */
441 "movl %0, %%esi \n\t" /* esi = image->data */
442 ".LoopYb: \n\t"
443 "movl %10, %%ecx \n\t"
444 "movl %%ecx, %14 \n\t" /* x = width */
445 "movl %11, %%ecx \n\t"
446 "decl %%ecx \n\t" /* y-- */
447 "movl %%ecx, %11 \n\t"
448 "js .Endb \n\t" /* if y < 0, goto end */
449 "andl $1, %%ecx \n\t"
450 "jz .LoopY_1b \n\t" /* if (y & 1) goto LoopY_1 */
451 ".LoopY_0b: \n\t"
452 "movl %2, %%ebx \n\t" /* ebx = err */
453 /* "movl %%ebx, %20 \n\t" */ /* [-36] = err */ /* useless */
454 "movl %3, %%ecx \n\t"
455 "movl %%ecx, %19 \n\t" /* [-32] = nerr */
456 "movl $0, (%%ecx) \n\t" /* init error of nerr[0] to 0 */
457 "jmp .LoopXb \n\t"
458 ".LoopY_1b: \n\t"
459 "movl %3, %%ebx \n\t" /* ebx = nerr */
460 /* "movl %%ebx, %20 \n\t" */ /* [-36] = nerr */ /* useless */
461 "movl %2, %%ecx \n\t"
462 "movl %%ecx, %19 \n\t" /* [-32] = err */
463 "movl $0, (%%ecx) \n\t" /* init error of nerr[0] to 0 */
464 ".align 16 \n\t"
465 ".LoopXb: \n\t"
466 "movl %4, %%edi \n\t" /* edi = ctable */
467 "xorl %%edx, %%edx \n\t" /* zero the upper word on edx */
468 /* RED; depends on ebx==err, esi==image->data, edi */
469 "movzbw (%%esi), %%dx \n\t" /* dx = image->data[0] */
470 "movsbw (%%ebx), %%ax \n\t" /* ax = error[0] */
471 "addw %%ax, %%dx \n\t" /* pixel.red = data[0] + error[0] */
472 "testb %%dh, %%dh \n\t" /* test if pixel.red < 0 or > 255 */
473 "jz .OKRb \n\t" /* 0 <= pixel.red <= 255 */
474 "js .NEGRb \n\t" /* pixel.red < 0 */
475 "movw $0xff, %%dx \n\t" /* pixel.red > 255 */
476 "jmp .OKRb \n\t"
477 ".NEGRb: \n\t"
478 "xorw %%dx, %%dx \n\t"
479 ".OKRb: \n\t"
480 /* partial reg */
481 "leal (%%edi, %%edx, 2), %%ecx \n\t" /* ecx = &ctable[pixel.red] */
482 /* agi */
483 "movl (%%ecx), %%eax \n\t" /* ax = ctable[pixel.red] */
484 "movw %%ax, %16 \n\t" /* save rr */
485 "mulb %5 \n\t" /* ax = rr*dr */
486 "subw %%ax, %%dx \n\t" /* rer = dx = dx - rr*dr */
487 "movswl %%dx, %%eax \n\t" /* save rer */
488 /* distribute error */
489 "leal (, %%eax, 8), %%ecx \n\t"
490 "subw %%dx, %%cx \n\t" /* cx = rer * 7 */
491 "sarw $4, %%cx \n\t" /* cx = rer * 7 / 16 */
492 "addb %%cl, 4(%%ebx) \n\t" /* err[x+1] += rer * 7 / 16 */
493 "movl %19, %%ecx \n\t" /* ecx = nerr */
494 "leaw (%%eax, %%eax, 4), %%dx \n\t" /* dx = rer * 5 */
495 "sarw $4, %%dx \n\t" /* dx = rer * 5 / 16 */
496 "addb %%dl, (%%ecx) \n\t" /* nerr[x] += rer * 5 / 16 */
497 "leaw (%%eax, %%eax, 2), %%dx \n\t" /* dx = rer * 3 */
498 "sarw $4, %%dx \n\t" /* dx = rer * 3 / 16 */
499 "addb %%dl, -4(%%ecx) \n\t" /* nerr[x-1] += rer * 3 / 16 */
500 "sarw $4, %%ax \n\t" /* ax = rer / 16 */
501 "movb %%al, 4(%%ecx) \n\t" /* nerr[x+1] = rer / 16 */
502 /* GREEN; depends on ebx, esi, edi */
503 "movzbw 1(%%esi), %%dx \n\t" /* dx = image->data[1] */
504 "movsbw 1(%%ebx), %%ax \n\t" /* ax = error[1] */
505 "addw %%ax, %%dx \n\t" /* pixel.grn = data[1] + error[1] */
506 "testb %%dh, %%dh \n\t" /* test if pixel.grn < 0 or > 255 */
507 "jz .OKGb \n\t" /* 0 <= pixel.grn <= 255 */
508 "js .NEGGb \n\t" /* pixel.grn < 0 */
509 "movw $0xff, %%dx \n\t" /* pixel.grn > 255 */
510 "jmp .OKGb \n\t"
511 ".NEGGb: \n\t"
512 "xorw %%dx, %%dx \n\t"
513 ".OKGb: \n\t"
514 /* partial reg */
515 "leal (%%edi, %%edx, 2), %%ecx \n\t" /* ecx = &ctable[pixel.grn] */
516 /* agi */
517 "movw (%%ecx), %%ax \n\t" /* ax = ctable[pixel.grn] */
518 "movw %%ax, %17 \n\t" /* save gg */
519 "mulb %6 \n\t" /* ax = gg*dg */
520 "subw %%ax, %%dx \n\t" /* ger = dx = dx - gg*dg */
521 "movswl %%dx, %%eax \n\t" /* save ger */
522 /* distribute error */
523 "leal (, %%eax, 8), %%ecx \n\t"
524 "subw %%dx, %%cx \n\t" /* cx = ger * 7 */
525 "sarw $4, %%cx \n\t" /* cx = ger * 7 / 16 */
526 "addb %%cl, 5(%%ebx) \n\t" /* err[x+1] += ger * 7 / 16 */
527 "movl %19, %%ecx \n\t" /* ecx = nerr */
528 "leaw (%%eax, %%eax, 4), %%dx \n\t" /* dx = ger * 5 */
529 "sarw $4, %%dx \n\t" /* dx = ger * 5 / 16 */
530 "addb %%dl, 1(%%ecx) \n\t" /* nerr[x] += ger * 5 / 16 */
531 "leaw (%%eax, %%eax, 2), %%dx \n\t" /* dx = ger * 3 */
532 "sarw $4, %%dx \n\t" /* dx = ger * 3 / 16 */
533 "addb %%dl, -3(%%ecx) \n\t" /* nerr[x-1] += ger * 3 / 16 */
534 "sarw $4, %%ax \n\t" /* ax = ger / 16 */
535 "movb %%al, 5(%%ecx) \n\t" /* nerr[x+1] = ger / 16 */
536 /* BLUE; depends on ebx, esi */
537 "movzbw 2(%%esi), %%dx \n\t" /* dx = image->data[2] */
538 "movsbw 2(%%ebx), %%ax \n\t" /* ax = error[2] */
539 "addw %%ax, %%dx \n\t" /* pixel.grn = data[2] + error[2] */
540 "testb %%dh, %%dh \n\t" /* test if pixel.blu < 0 or > 255 */
541 "jz .OKBb \n\t" /* 0 <= pixel.blu <= 255 */
542 "js .NEGBb \n\t" /* pixel.blu < 0 */
543 "movw $0xff, %%dx \n\t" /* pixel.blu > 255 */
544 "jmp .OKBb \n\t"
545 ".NEGBb: \n\t"
546 "xorw %%dx, %%dx \n\t"
547 ".OKBb: \n\t"
548 /* partial reg */
549 "leal (%%edi, %%edx, 2), %%ecx \n\t" /* ecx = &ctable[pixel.blu] */
550 /* agi */
551 "movw (%%ecx), %%ax \n\t" /* ax = ctable[pixel.blu] */
552 "movw %%ax, %18 \n\t" /* save bb */
553 "mulb %7 \n\t" /* ax = bb*db */
554 "subw %%ax, %%dx \n\t" /* ber = dx = dx - bb*db */
555 "movswl %%dx, %%eax \n\t" /* save ber */
556 /* distribute error */
557 "leal (, %%eax, 8), %%ecx \n\t"
558 "subw %%dx, %%cx \n\t" /* cx = ber * 7 */
559 "sarw $4, %%cx \n\t" /* cx = ber * 7 / 16 */
560 "addb %%cl, 6(%%ebx) \n\t" /* err[x+1] += ber * 7 / 16 */
561 "movl %19, %%ecx \n\t" /* ecx = nerr */
562 "leaw (%%eax, %%eax, 4), %%dx \n\t" /* dx = ber * 5 */
563 "sarw $4, %%dx \n\t" /* dx = ber * 5 / 16 */
564 "addb %%dl, 2(%%ecx) \n\t" /* nerr[x] += ber * 5 / 16 */
565 "leaw (%%eax, %%eax, 2), %%dx \n\t" /* dx = ber * 3 */
566 "sarw $4, %%dx \n\t" /* dx = ber * 3 / 16 */
567 "addb %%dl, -4(%%ecx) \n\t" /* nerr[x-1] += ber * 3 / 16 */
568 "sarw $4, %%ax \n\t" /* ax = ber / 16 */
569 "movb %%al, 6(%%ecx) \n\t" /* nerr[x+1] = ber / 16 */
570 "andl $0xffff, %%eax \n\t"
571 /* depends on eax & 0xffff0000 == 0
572 * calculate the index of the value of the pixel */
573 "movw %16, %%ax \n\t" /* ax = rr */
574 "mulb %15 \n\t" /* ax = cpcpc*rr */
575 "movw %%ax, %%cx \n\t"
576 "movw %17, %%ax \n\t" /* ax = gg */
577 "mulb %9 \n\t" /* ax = cpc*gg */
578 "addw %%cx, %%ax \n\t" /* ax = cpc*gg + cpcpc*rr */
579 "addw %18, %%ax \n\t" /* ax = cpcpc*rr + cpc*gg + bb */
580 "movl %8, %%ecx \n\t"
581 /* agi */
582 "leal (%%ecx, %%eax, 4), %%edx \n\t"
583 /* agi */
584 "movb (%%edx), %%cl \n\t" /* cl = pixels[ax] */
585 /* store the pixel */
586 "movl %1, %%eax \n\t"
587 "movb %%cl, (%%eax) \n\t" /* *ximage = cl */
588 "incl %1 \n\t" /* ximage++ */
589 /* prepare for next iteration on X */
590 "addl $4, %19 \n\t" /* nerr += 4 */
591 "addl $4, %%ebx \n\t" /* err += 4 */
592 "addl %12, %%esi \n\t" /* image->data += bpp */
593 "decl %14 \n\t" /* x-- */
594 "jnz .LoopXb \n\t" /* if x>0, goto .LoopX */
595 "movl %13, %%eax \n\t"
596 "addl %%eax, %1 \n\t" /* add extra offset to ximage */
597 "jmp .LoopYb \n\t"
598 ".Endb: \n\t"
599 "emms \n\t"
600 "popal \n\t"
602 : "m" (image), /* %0 */
603 "m" (ximage), /* %1 */
604 "m" (err), /* %2 */
605 "m" (nerr), /* %3 */
606 "m" (ctable), /* %4 */
607 "m" (dr), /* %5 */
608 "m" (dg), /* %6 */
609 "m" (db), /* %7 */
610 "m" (pixels), /* %8 */
611 "m" (cpc), /* %9 */
612 "m" (width), /* %10 */
613 "m" (height), /* %11 */
614 "m" (bytesPerPixel), /* %12 */
615 "m" (line_offset), /* %13 */
616 "m" (x), /* %14 */
617 "m" (cpcpc), /* %15 */
618 "m" (rr), /* %16 */
619 "m" (gg), /* %17 */
620 "m" (bb), /* %18 */
621 "m" (tmp_err), /* %19 */
622 "m" (tmp_nerr), /* %20 */
623 "m" (ndr), /* %21 */
624 "m" (ndg), /* %22 */
625 "m" (ndb) /* %23 */
629 #endif /* ASM_X86 */