2 Copyright (c) 2006 Paolo Capriotti <p.capriotti@sns.it>
3 (c) 2006 Maurizio Monge <maurizio.monge@kdemail.net>
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
13 #include <xmmintrin.h>
17 namespace ImageEffects
{
25 static inline void blur_sse_near(void* pixels
, __m128i
& state
, __m128i alpha
)
32 "movq %[ppix], %[pixels]\n"
33 "punpcklbw %[pixels], %[aux1]\n" // unpack two pixels setting their bytes
34 // as the most significant in the corr. word
35 "psrlw $1, %[aux1]\n" // shift right by 1, i.e. shift the colour
37 "psubw %[state], %[aux1]\n" // - state
38 "pmulhw %[alpha], %[aux1]\n" // * alpha, and take the 16 most significant bits
39 "psllw $1, %[aux1]\n" // shift left (we trade 1 bit for performance, here)
40 "paddw %[aux1], %[state]\n" // sum result to state
41 "movdqa %[state], %[aux2]\n" // copy state to the aux2 register
42 "psrlw $7, %[aux2]\n" // shift right by 7: this is the new pixel value
43 "packuswb %[aux2], %[aux2]\n" // pack pixels as 8 bits
44 "movq %[aux2], %[ppix]\n"
46 , [ppix
] "+m"(*(uint64_t*)pixels
)
54 static inline void blur_sse_sep(void* pixel1
, void* pixel2
, __m128i
& state
, __m128i alpha
)
62 "movd %[ppix1], %[pixels]\n" // load the first pixel
63 "movd %[ppix2], %[tmp]\n" // load the second pixel in [tmp]
64 "pslldq $4, %[tmp]\n" // shift left the second pixel
65 "paddd %[tmp], %[pixels]\n" // now both pixel are packed in [pixels]
67 "punpcklbw %[pixels], %[aux1]\n" // unpack two pixels setting their bytes
68 // as the most significant in the corr. word
69 "psrlw $1, %[aux1]\n" // shift right by 1, i.e. shift the colour
71 "psubw %[state], %[aux1]\n" // - state
72 "pmulhw %[alpha], %[aux1]\n" // * alpha, and take the 16 most significant bits
73 "psllw $1, %[aux1]\n" // shift left (we trade 1 bit for performance, here)
74 "paddw %[aux1], %[state]\n" // sum result to state
75 "movdqa %[state], %[aux2]\n" // copy state to the aux2 register
76 "psrlw $7, %[aux2]\n" // shift right by 7: this is the new pixel value
77 "packuswb %[aux2], %[aux2]\n" // pack pixels as 8 bits
79 "movd %[aux2], %[ppix1]\n"
80 "psrldq $4, %[aux2]\n"
81 "movd %[aux2], %[ppix2]\n"
83 , [ppix1
] "+m"(*(uint32_t*)pixel1
)
84 , [ppix2
] "+m"(*(uint32_t*)pixel2
)
93 void expblur_sse( QImage
&img
, int radius
)
98 /* Calculate the alpha such that 90% of
99 the kernel is within the radius.
100 (Kernel extends to infinity)
102 uint16_t alpha
= (uint16_t)((1<<15)*(1.0f
-expf(-2.3f
/(radius
+1.f
))));
105 QRgb
*ptr
= (QRgb
*)img
.bits();
106 int h
= img
.height();
108 int hw
= (img
.height()-1)*img
.width();
112 for(int row
=0;row
<h
-1;row
+=2)
115 uint8_t *cptr
= (uint8_t*)(ptr
+row
*w
);
119 z
.i
[4+i
] = cptr
[w
*4+i
]<<7;
121 for(int index
=1; index
<w
; index
++)
122 blur_sse_sep(&cptr
[index
*4], &cptr
[(index
+w
)*4], z
.v
, a
.v
);
124 for(int index
=w
-2; index
>=0; index
--)
125 blur_sse_sep(&cptr
[index
*4], &cptr
[(index
+w
)*4] , z
.v
, a
.v
);
132 uint8_t *cptr
= (uint8_t*)(ptr
+(h
-1)*w
);
136 for(int index
=1; index
<w
; index
++)
137 blur_sse_sep(&cptr
[index
*4], &dummy
, z
.v
, a
.v
);
139 for(int index
=w
-2; index
>=0; index
--)
140 blur_sse_sep(&cptr
[index
*4], &dummy
, z
.v
, a
.v
);
143 for(int col
=0;col
<w
-1;col
+=2)
146 uint8_t *cptr
= (uint8_t*)(ptr
+col
);
151 for(int index
=w
; index
<hw
; index
+=w
)
152 blur_sse_near(&cptr
[index
*4], z
.v
, a
.v
);
154 for(int index
=hw
-2*w
; index
>=0; index
-=w
)
155 blur_sse_near(&cptr
[index
*4], z
.v
, a
.v
);
162 uint8_t *cptr
= (uint8_t*)(ptr
+w
-1);
167 for(int index
=w
; index
<hw
; index
+=w
)
168 blur_sse_sep(&cptr
[index
*4], &dummy
, z
.v
, a
.v
);
170 for(int index
=hw
-w
; index
>=0; index
-=w
)
171 blur_sse_sep(&cptr
[index
*4], &dummy
, z
.v
, a
.v
);
177 } //end namespace ImageEffects