2 * Simple Framebuffer Gfx/GUI lib
4 * coded by Ketmar // Invisible Vector <ketmar@ketmar.no-ip.org>
5 * Understanding is not required. Only obedience.
7 * This program is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, version 3 of the License ONLY.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
19 module iv
.egra
.gfx
.lowlevel
/*is aliced*/;
22 // uncomment this to disable SSE4.1 optimisations
23 //version = egfx_disable_sse41;
26 version(egfx_disable_sse41
) {
27 version(egfx_use_sse41
) {
28 static assert(false, "EGRA: SSE4.1 is both forced and disabled. wtf?!");
31 version(D_InlineAsm_X86
) {
33 version = egfx_use_sse41
;
35 version(egfx_use_sse41
) {
36 static assert(false, "EGRA: SSE4.1 is not supported on 64-bit architectures.");
40 version(egfx_use_sse41
) {
41 static assert(false, "EGRA: SSE4.1 is not supported on non-DMD compilers.");
46 version(egfx_use_sse41
) {
47 public enum EGfxUseSSE41
= true;
49 public enum EGfxUseSSE41
= false;
53 // ////////////////////////////////////////////////////////////////////////// //
54 public void egfxCheckCPU () nothrow @trusted @nogc {
55 version(egfx_use_sse41
) {
56 import core
.cpuid : sse41
;
58 import core
.stdc
.stdio
: stderr
, fprintf
;
59 fprintf(stderr
, "ERROR: EGRA requires CPU with SSE4.1 support!");
60 assert(0, "ERROR: EGRA requires CPU with SSE4.1 support!");
66 // ////////////////////////////////////////////////////////////////////////// //
67 // mix `dcvar` with ARGB (or ABGR) `colvar`; dc A is ignored (set to 255)
68 // main code almost never calls this with solid or transparent `colvar`
69 // the result will be put to `destvar` (it is written only once, at the end)
70 // `colvar` and `dcvar` may be read several times
71 // see http://stereopsis.com/doubleblend.html for the inspiration
73 // this works for solid and transparent colors too
74 public enum GxColMixMixin(string destvar
, string dcvar
, string colvar
) = `{
75 immutable uint col_ = `~colvar
~`;
76 immutable uint dc_ = (`~dcvar
~`)&0xffffffu;
77 /*immutable uint a_ = 256-(col_>>24);*/ /* to not loose bits */
78 immutable uint a_ = (col_>>24)+1; /* so it will work for both 0 and 255 correctly */
79 immutable uint srb_ = (col_&0xff00ffu);
80 immutable uint sg_ = (col_&0x00ff00u);
81 immutable uint drb_ = (dc_&0xff00ffu);
82 immutable uint dg_ = (dc_&0x00ff00u);
83 immutable uint orb_ = (drb_+(((srb_-drb_)*a_+0x800080u)>>8))&0xff00ffu;
84 immutable uint og_ = (dg_+(((sg_-dg_)*a_+0x008000u)>>8))&0x00ff00u;
85 (`~destvar
~`) = orb_|og_|0xff_00_00_00u;
88 // this works for solid and transparent colors too
89 public enum GxColMixMixin(string destvar
, string dcvar
, string colvar
) = `{
90 immutable uint a_ = ((`~colvar
~`)>>24)+1u; /* to not loose bits */
91 uint rb_ = (`~dcvar
~`)&0xff00ffu;
92 uint g_ = (`~dcvar
~`)&0x00ff00u;
93 rb_ += ((cast(uint)((`~colvar
~`)&0xff00ffu)-rb_)*a_)>>8;
94 g_ += ((cast(uint)((`~colvar
~`)&0x00ff00u)-g_)*a_)>>8;
95 /* g is mixed with solid alpha; replace "0xff_" with other alpha if you want to */
96 (`~destvar
~`) = (rb_&0xff00ffu)|(g_&0xff_00ff00u)|0xff_00_00_00u;
102 public int gxInterpolateColorF (in uint c0
, in uint c1
, in float t
) pure nothrow @safe @nogc {
104 import iv
.egra
.gfx
.base
;
106 if (t
<= 0.0f) return c0
;
107 if (t
>= 1.0f) return c1
;
109 static ubyte interpByte (in ubyte b0
, in ubyte b1
, in float t
) pure nothrow @safe @nogc {
110 pragma(inline
, true);
111 return (b0
== b1 ? b0
: clampToByte(b0
+cast(int)((cast(int)b1
-cast(int)b0
)*t
)));
114 immutable ubyte r
= interpByte(gxGetRed(c0
), gxGetRed(c1
), t
);
115 immutable ubyte g
= interpByte(gxGetGreen(c0
), gxGetGreen(c1
), t
);
116 immutable ubyte b
= interpByte(gxGetBlue(c0
), gxGetBlue(c1
), t
);
117 immutable ubyte a
= interpByte(gxGetAlpha(c0
), gxGetAlpha(c1
), t
);
118 return (a
<<24)|
(r
<<16)|
(g
<<8)|b
;
123 public int gxInterpolateColorI (in uint c0
, in uint c1
, in int t
) pure nothrow @safe @nogc {
124 if (t
<= 0) return c0
;
125 if (t
>= 65535) return c1
;
126 if (c0
== c1
) return c0
;
129 immutable uint a_
= cast(uint)(t
>>8)+1; // to not loose bits
130 uint rb_
= c0
&0xff00ffu
;
131 uint g_
= c0
&0x00ff00u
;
132 rb_
+= (((c1
&0xff00ffu
)-rb_
)*a_
)>>8;
133 g_
+= (((c1
&0x00ff00u
)-g_
)*a_
)>>8;
134 /* g is mixed with solid alpha; replace "0xff_" with other alpha if you want to */
135 immutable uint res
= (rb_
&0xff00ffu
)|
(g_
&0x00ff00u
);
137 immutable int a0
= (c0
>>24);
138 immutable int a1
= (c1
>>24);
140 if (a0
== a1
) return res|
(c0
&0xff000000u
);
142 return res|
(((((a1
-a0
)*(t
+1))>>16)+a0
)<<24);
144 //return gxInterpolateColorF(c0, c1, cast(float)t/65535.0);
146 int b0
= cast(int)cast(ubyte)c0
;
147 int b1
= cast(int)cast(ubyte)c1
;
148 if (b0
!= b1
) b0
= cast(ubyte)((((b1
-b0
)*(t
+1))>>16)+b0
);
150 int g0
= cast(int)cast(ubyte)(c0
>>8);
151 int g1
= cast(int)cast(ubyte)(c1
>>8);
152 if (g0
!= g1
) g0
= cast(ubyte)((((g1
-g0
)*(t
+1))>>16)+g0
);
154 int r0
= cast(int)cast(ubyte)(c0
>>16);
155 int r1
= cast(int)cast(ubyte)(c1
>>16);
156 if (r0
!= r1
) r0
= cast(ubyte)((((r1
-r0
)*(t
+1))>>16)+r0
);
158 int a0
= cast(int)cast(ubyte)(c0
>>24);
159 int a1
= cast(int)cast(ubyte)(c1
>>24);
160 if (a0
!= a1
) a0
= cast(ubyte)((((a1
-a0
)*(t
+1))>>16)+a0
);
162 return cast(uint)((a0
<<24)|
(r0
<<16)|
(g0
<<8)|b0
);
167 // ////////////////////////////////////////////////////////////////////////// //
169 version(egfx_use_sse41
) {
170 //pragma(msg," !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ");
173 align(16) immutable ubyte[16] sseSpreadOneColor = [
174 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03,
175 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03,
179 // for x86 naked functions, DMD will pass last arg in EAX
180 // sadly, with -O DMD makes some assumptions about dead registers, and nothing is working right
181 // we need to preserve ESI and EDI (and EBX in case of PIC code)
182 public uint* memFillDW (uint* mptr
, in uint value
, in int count
) nothrow @trusted @nogc {
183 asm nothrow @trusted @nogc {
185 xchg EDI
,/*SS:*/[ESP
+8]; // EDI=mptr; also, save old EDI
189 mov ECX
,EAX
; // ECX=count (because last arg is in EAX)
190 mov EAX
,/*SS:*/[ESP
+4]; // EAX=value
193 jc simplestore
; // too small
195 // load XMM0 with our color
198 // used `movdqu`, because it indicates int type
199 // this doesn't matter, it just looks nicer
200 // also, `movlps` is one byte shorter
201 movlps XMM0
,/*SS:*/[ESP
];
202 movlhps XMM0
,XMM0
; // copy low 64 bits of XMM0 to high 64 bits of XMM0
203 //movdqu XMM0,/*SS:*/[ESP];
204 //pshufb XMM0,[sseSpreadOneColor];
207 // if we cannot align at all, use "rep stosd"
208 // this should not happen, so i won't bother optimising it
212 // align EDI (we have at least 8 pixels to fill here, so it is safe)
221 // ECX is never zero here
223 jc simplestore
; // too small
225 // save last 2 bits of counter (we'll mask them later)
228 // fill by 4 pixels while we can
230 //align 16; // why not
237 // fill last 1-3 pixels
246 mov EAX
,EDI
; // return new mptr
247 mov EDI
,/*SS:*/[ESP
+8]; // restore EDI
252 // WARNING! this function is not quite right (0 and 255 alphas will still modify the colors)
253 // WARNING! do not call it with fully opaque or fully transparent `clr`!
254 public alias memBlendColor
= sseBlendColor
;
256 public uint* memBlendColor (uint* mptr, in uint clr, int count) nothrow @trusted @nogc {
257 pragma(inline, true);
259 if (count < 1) return mptr;
260 immutable int c4 = (count>>2); // it is actually unsigned
261 if (c4) { mptr = sseBlendColor4px(mptr, clr, cast(uint)c4); count -= (c4<<2); }
262 return (count ? memBlendColorSlow(mptr, clr, count) : mptr);
264 return memBlendColorSlow(mptr, clr, count);
270 align(16) immutable ubyte[16] sseSpreadAlpha
= [
271 0xff, 0x03, 0xff, 0x03, 0xff, 0x03, 0xff, 0xff,
272 0xff, 0x03, 0xff, 0x03, 0xff, 0x03, 0xff, 0xff,
275 align(16) immutable ubyte[16] sseMaxAlpha
= [
276 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0x00,
277 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0x00,
280 align(16) immutable ubyte[16] sseFullByteAlpha
= [
281 0x00, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0xff,
282 0x00, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0xff,
285 // mix foreground to background
286 // EAX is pixel count
287 // background = (alpha * foreground) + (1-alpha)*background
288 // WARNING! this function is not quite right (0 and 255 alphas will still modify the colors)
289 // WARNING! do not call it with fully opaque or fully transparent `clr`!
290 public uint* sseBlendColor (uint* dest
, uint clr
, uint count
) nothrow @trusted @nogc {
291 asm nothrow @trusted @nogc {
293 //enter 0,0; // this actually slower than the byte soup below
296 // save modified registers
299 mov EDI
,[EBP
+12]; // dest
300 // it can be negative
303 mov ECX
,EAX
; // counter
313 mov EAX
,[EBP
+8]; // clr
315 // we can premultiply clr first, and convert alpha to 255-alpha
317 // prepare SSE data -- 2 pixels
318 mov /*SS:*/[ESP
],EAX
;
319 mov /*SS:*/[ESP
+4],EAX
;
321 // used `movdqa`, because it indicates int type
322 // this doesn't matter, it just looks nicer
323 // also, `movlps` is one byte shorter
324 movlps XMM0
,/*SS:*/[ESP
];
325 //movdqa XMM0,/*SS:*/[ESP];
326 // expand 8 ubytes to 8 ushorts
328 // XMM0: xx xx xx xx ar gb ar gb
329 // XMM1: 0a 0r 0g 0b 0a 0r 0g 0b
330 pshufb XMM0
,[sseSpreadAlpha
];
331 // XMM0: 00 0a 0a 0a 00 0a 0a 0a
332 movdqa XMM7
,[sseMaxAlpha
];
333 psubw XMM7
,XMM0
; // XMM7 is 255-alpha
334 // XMM7: 00 0a 0a 0a 00 0a 0a 0a
336 // XMM0: 00 0r 0g 0b 00 0r 0g 0b
337 movdqa XMM6
,[sseFullByteAlpha
];
339 //XMM0: 2 premultiplied colors
340 //XMM7: 2 inverted alphas
341 //XMM6: destination alpha (replace value)
343 // totally unaligned?
344 // this should never happen, but meh...
346 jnz slowestpath
; // alas, the slowest path
348 // align the address (if necessary)
352 // we need to mix 1-3 pixels to make the address aligned
353 // check counter here to allow "slow, but aligned" path (see the code below)
355 jc slowestpath
; // alas
357 // process 4 pixels (we will drop unused ones)
358 movdqu XMM5
,[EDI
]; // 4 background pixels
359 pmovzxbw XMM1
,XMM5
; // expand 2 lower pixels to XMM1
360 // copy high part of XMM5 to low part of XMM5
362 pmovzxbw XMM2
,XMM5
; // expand 2 upper pixels to XMM2
363 //XMM1: 2 lower pixels
364 //XMM2: 2 upper pixels
366 pmulhuw XMM1
,XMM7
; // multiply by alpha
367 pmulhuw XMM2
,XMM7
; // multiply by alpha
369 paddusw XMM1
,XMM0
; // add premultiplied colors
370 paddusw XMM2
,XMM0
; // add premultiplied colors
374 // set destination alpha
377 // now write 1-3 pixels to align the address
378 // we are guaranteed to have at least 4 pixels to mix here
379 // i.e. 4 processed pixels, and at least 4 pixels in the counter
381 // put in temp storage (it is aligned)
382 movdqa /*SS:*/[ESP
],XMM1
;
383 mov EDX
,ESI
; // save ESI (DMD expects it unchanged)
390 mov ESI
,EDX
; // restore ESI
391 // ECX is at least 1 here, and EDI is aligned
394 // ECX is never zero here
395 // use "slow, but aligned" path if we have less than 8 pixels to process
399 // save last 3 bits in EAX
400 // we'll mask it later
403 // process by 8 pixels while we can
407 movdqa XMM5
,[EDI
]; // 4 background pixels
408 pmovzxbw XMM1
,XMM5
; // expand 2 lower pixels to XMM1
409 // copy high part of XMM5 to low part of XMM5
411 pmovzxbw XMM2
,XMM5
; // expand 2 upper pixels to XMM2
412 //XMM1: 2 lower pixels
413 //XMM2: 2 upper pixels
415 movdqa XMM5
,[EDI
+16]; // 4 background pixels
416 pmovzxbw XMM3
,XMM5
; // expand 2 lower pixels to XMM3
417 // copy high part of XMM5 to low part of XMM5
419 pmovzxbw XMM4
,XMM5
; // expand 2 upper pixels to XMM4
420 //XMM3: 2 lower pixels
421 //XMM4: 2 upper pixels
423 pmulhuw XMM1
,XMM7
; // multiply by alpha
424 pmulhuw XMM2
,XMM7
; // multiply by alpha
425 pmulhuw XMM3
,XMM7
; // multiply by alpha
426 pmulhuw XMM4
,XMM7
; // multiply by alpha
428 paddusw XMM1
,XMM0
; // add premultiplied colors
429 paddusw XMM2
,XMM0
; // add premultiplied colors
430 paddusw XMM3
,XMM0
; // add premultiplied colors
431 paddusw XMM4
,XMM0
; // add premultiplied colors
436 // set destination alpha
441 movdqa [EDI
+16],XMM3
;
447 // do last 1-7 pixels (last counter is in EAX)
448 // EDI is guaranteed to be aligned here
455 mov EDI
,[EBP
-4]; // restore EDI
456 //leave; // this actually slower than the byte soup below
462 // mix by 4 pixels, unaligned
465 movdqu XMM5
,[EDI
]; // 4 background pixels
466 pmovzxbw XMM1
,XMM5
; // expand 2 lower pixels to XMM1
467 // copy high part of XMM5 to low part of XMM5
469 pmovzxbw XMM2
,XMM5
; // expand 2 upper pixels to XMM2
470 //XMM1: 2 lower pixels
471 //XMM2: 2 upper pixels
473 pmulhuw XMM1
,XMM7
; // multiply by alpha
474 pmulhuw XMM2
,XMM7
; // multiply by alpha
476 paddusw XMM1
,XMM0
; // add premultiplied colors
477 paddusw XMM2
,XMM0
; // add premultiplied colors
481 // set destination alpha
492 // last 1-3 pixels (never 0)
494 // put in temp storage (it is aligned)
495 movdqa /*SS:*/[ESP
],XMM1
;
496 mov EDX
,ESI
; // save ESI (DMD expects it unchanged)
498 and ECX
,0x03; // left counter
500 mov ESI
,EDX
; // restore ESI
505 mov EDI
,[EBP
-4]; // restore EDI
506 //leave; // this actually slower than the byte soup below
513 // mix by 4 pixels, aligned (used for 1-7 pixels)
516 movdqa XMM5
,[EDI
]; // 4 background pixels
517 pmovzxbw XMM1
,XMM5
; // expand 2 lower pixels to XMM1
518 // copy high part of XMM5 to low part of XMM5
520 pmovzxbw XMM2
,XMM5
; // expand 2 upper pixels to XMM2
521 //XMM1: 2 lower pixels
522 //XMM2: 2 upper pixels
524 pmulhuw XMM1
,XMM7
; // multiply by alpha
525 pmulhuw XMM2
,XMM7
; // multiply by alpha
527 paddusw XMM1
,XMM0
; // add premultiplied colors
528 paddusw XMM2
,XMM0
; // add premultiplied colors
532 // set destination alpha
545 // for x86 naked functions, DMD will pass last arg in EAX
546 // sadly, with -O DMD makes some assumptions about dead registers, and nothing is working right
547 // we need to preserve ESI and EDI (and EBX in case of PIC code)
548 // this doesn't change every 2nd pixel; `count` is count of ALL pixels
549 public uint* memFillDWDash (uint* mptr
, in uint value
, in int count
) nothrow @trusted @nogc {
550 asm nothrow @trusted @nogc {
552 xchg EDI
,/*SS:*/[ESP
+8]; // EDI=mptr; also, save old EDI
553 // it can be negative
555 jl quit
; // 0 or less
556 // another jump is done later, after the setup
557 mov ECX
,EAX
; // ECX=count (because last arg is in EAX)
558 mov EAX
,/*SS:*/[ESP
+4]; // EAX=value
559 // if we only filling one pixel, just do it
562 // ECX is always >=2 here, and we are actually processing 2 pixels at a time anyway
563 mov DL
,CL
; // save the last bit for later use (we may need to set the last pixel)
579 mov EAX
,EDI
; // return new mptr
580 mov EDI
,/*SS:*/[ESP
+8]; // restore EDI
586 //TODO: rewrite this with SSE
588 // sadly, with -O DMD makes some assumptions about dead registers, and nothing is working right
589 // we need to preserve ESI and EDI (and EBX in case of PIC code)
590 // this doesn't change every 2nd pixel; `count` is count of ALL pixels
591 public uint* memBlendColorDash (uint* mptr
, in uint clr
, in int count
) nothrow @trusted @nogc {
592 asm nothrow @trusted @nogc {
595 xchg EDI
,/*SS:*/[ESP
+8]; // EDI=mptr; also, save old EDI
599 push EBP
; // EBP will contain the counter
600 push EBX
; // EBX is temporary register
601 push ESI
; // DMD expects ESI to be unmodified at exit
602 mov EBP
,EAX
; // EBP=counter
604 mov EAX
,/*SS:*/[ESP
+16]; // EAX=clr
605 mov ECX
,EAX
; // ECX will be clrA
606 // clrG=clr&0x00ff00u;
609 // clrRB=clr&0xff00ffu;
613 // ECX=clrA=(clr>>24)+1; -- `+1` to keep some precision
633 clrRB = clr&0xff00ffu;
634 clrG = clr&0x00ff00u;
636 rb = (*mptr)&0xff00ffu;
637 rb += ((clrRB-rb)*clrA)>>8;
640 g = (*mptr)&0x00ff00u;
641 g += ((clrG-g)*clrA)>>8;
644 *mptr++ = rb|g|0xff000000u;
648 // rb = (*mptr)&0xff00ffu;
649 // rb += (((clrRB-rb)*clrA)>>8)&0xff00ffu;
651 mov ESI
,EBX
; // save `*mptr`
652 and EBX
,0xff00ffu
; // EBX=rb=(*mptr)&0xff00ffu
653 mov EAX
,/*SS:*/[ESP
]; // EAX=clrRB
654 sub EAX
,EBX
; // EAX=clrRB-rb
655 mul ECX
; // EAX=(clrRB-rb)*clrA (EDX is dead)
656 shr EAX
,8; // EAX=((clrRB-rb)*clrA)>>8
657 add EBX
,EAX
; // EBX=rb+(((clrRB-rb)*clrA)>>8)
658 and EBX
,0xff00ffu
; // EAX=(rb+(((clrRB-rb)*clrA)>>8))&0xff00ffu
660 // g = (*mptr)&0x00ff00u;
661 // g += (((clrG-g)*clrA)>>8)&0x00ff00u;
662 mov EDX
,ESI
; // EDX=*mptr
663 and EDX
,0x00ff00u
; // EDX=g=(*mptr)&0x00ff00u
664 mov ESI
,EDX
; // save g, we well need it later
665 mov EAX
,/*SS:*/[ESP
+4]; // EAX=clrG
666 sub EAX
,EDX
; // EAX=clrG-g
667 mul ECX
; // EAX=(clrG-g)*clrA (EDX is dead)
668 shr EAX
,8; // EAX=((clrG-g)*clrA)>>8
669 add EAX
,ESI
; // EAX=(((clrG-g)*clrA)>>8)+g
670 and EAX
,0x00ff00u
; // EAX=((((clrG-g)*clrA)>>8)+g)&0x00ff00u
684 add ESP
,2*4; // drop temp vars
691 mov EAX
,EDI
; // result
692 mov EDI
,/*SS:*/[ESP
+8]; // restore EDI
698 // sadly, with -O DMD makes some assumptions about dead registers, and nothing is working right
699 // we need to preserve ESI and EDI (and EBX in case of PIC code)
700 // this is using a branch for empty/opaque alphas; i didn't profiled it, but i think it is faster than 3 muls
701 public void memBlendColorCoverage (uint* mptr
, const(ubyte)* coverage
, in uint clr
, in int count
) nothrow @trusted @nogc {
702 asm nothrow @trusted @nogc {
704 //enter 0,0; // this actually slower than the byte soup below
707 // save modified registers
712 mov EDI
,[EBP
+16]; // dest
718 mov AL
,byte ptr
[EBP
+11]; // c.a
719 // skip completely opaque pixels (just in case)
723 mov ESI
,[EBP
+12]; // coverage
724 // for fully opaque pixels we can skip one load and one mul
729 movzx EAX
,byte ptr
[ESI
]; // load coverage byte
733 // alpha = (*coverage)*c.a;
734 movzx EDX
,byte ptr
[EBP
+11]; // c.a
735 inc EDX
; // for better precision
737 // is the source color completely opaque?
738 cmp AX
,0xff00; // 256*255 == 0xff00
743 // cast(ubyte)((((c.b-v)*alpha)>>16)+v);
744 movzx EAX
,byte ptr
[EBP
+8]; // c.b
745 movzx EDX
,byte ptr
[EDI
]; // v
749 // EAX: (c.b-v)*alpha
752 add byte ptr
[EDI
],AL
;
755 movzx EAX
,byte ptr
[EBP
+9]; // c.r
756 movzx EDX
,byte ptr
[EDI
]; // v
760 // EAX: (c.b-v)*alpha
763 add byte ptr
[EDI
],AL
;
766 movzx EAX
,byte ptr
[EBP
+10]; // c.g
767 movzx EDX
,byte ptr
[EDI
]; // v
771 // EAX: (c.b-v)*alpha
774 add byte ptr
[EDI
],AL
;
777 mov byte ptr
[EDI
],0xff;
787 //leave; // this actually slower than the byte soup below
793 add EDI
,4; // skip destination pixel
797 mov EAX
,[EBP
+8]; // source pixel
798 or EAX
,0xff000000; // alpha
802 // the source color is completely opaque
803 // use slightly faster code to calculate coverage alpha
805 movzx EAX
,byte ptr
[ESI
]; // load coverage byte
815 // cast(ubyte)((((c.b-v)*alpha)>>16)+v);
816 movzx EAX
,byte ptr
[EBP
+8]; // c.b
817 movzx EDX
,byte ptr
[EDI
]; // v
821 // EAX: (c.b-v)*alpha
824 add byte ptr
[EDI
],AL
;
827 movzx EAX
,byte ptr
[EBP
+9]; // c.r
828 movzx EDX
,byte ptr
[EDI
]; // v
832 // EAX: (c.b-v)*alpha
835 add byte ptr
[EDI
],AL
;
838 movzx EAX
,byte ptr
[EBP
+10]; // c.g
839 movzx EDX
,byte ptr
[EDI
]; // v
843 // EAX: (c.b-v)*alpha
846 add byte ptr
[EDI
],AL
;
849 mov byte ptr
[EDI
],0xff;
857 add EDI
,4; // skip destination pixel
861 mov EAX
,[EBP
+8]; // source pixel
862 or EAX
,0xff000000; // alpha
871 public uint* memFillDW (uint* ptr
, in uint value
, in int count
) nothrow @trusted @nogc {
872 pragma(inline
, true);
874 ptr
[0..cast(usize
)count
] = value
;
875 ptr
+= cast(usize
)count
;
879 public uint* memFillDWDash (uint* ptr
, in uint value
, in int count
) nothrow @trusted @nogc {
880 pragma(inline
, true);
881 foreach (immutable c
; 0..count
) { if (!(c
&1)) *ptr
++ = value
; else ++ptr
; }
883 ptr
[0..cast(usize
)count
] = value
;
884 ptr
+= cast(usize
)count
;
888 public uint* memBlendColor (uint* mptr
, in uint clr
, in int count
) nothrow @trusted @nogc {
889 foreach (immutable _
; 0..count
) { mixin(GxColMixMixin
!("*mptr++", "*mptr", "clr")); }
892 public uint* memBlendColorDash (uint* mptr
, in uint clr
, in int count
) nothrow @trusted @nogc {
893 foreach (immutable c
; 0..count
) { if (!(c
&1)) mixin(GxColMixMixin
!("*mptr++", "*mptr", "clr")); else ++mptr
; }
896 public void memBlendColorCoverage (uint* mptr
, const(ubyte)* coverage
, in uint clr
, in int count
) nothrow @trusted @nogc {
897 immutable uint cb
= clr
&0xff;
898 immutable uint cg
= (clr
>>8)&0xff;
899 immutable uint cr
= (clr
>>16)&0xff;
900 immutable uint ca
= clr
>>24;
901 ubyte* p
= cast(ubyte*)mptr
;
902 foreach (immutable _
; 0..count
) {
903 immutable uint alpha
= (*coverage
++)*ca
;
905 uint v
= *p
; *p
++ = cast(ubyte)((((cb
-v
)*alpha
)+(v
<<16))>>16);
906 v
= *p
; *p
++ = cast(ubyte)((((cg
-v
)*alpha
)+(v
<<16))>>16);
907 v
= *p
; *p
++ = cast(ubyte)((((cr
-v
)*alpha
)+(v
<<16))>>16);
909 uint v
= *p
; *p
++ = cast(ubyte)((((cb
-v
)*alpha
)>>16)+v
);
910 v
= *p
; *p
++ = cast(ubyte)((((cg
-v
)*alpha
)>>16)+v
);
911 v
= *p
; *p
++ = cast(ubyte)((((cr
-v
)*alpha
)>>16)+v
);