Bug 1785744 [wpt PR 35504] - Recalc style for elements where :toggle() pseudo-class...
[gecko.git] / gfx / cairo / pixman-8888-over-565.patch
blob0dd34c04f35c31f5609eb120fe62778b5d68e073
1 changeset: 96613:3e003f0b8026
2 tag: 2pass
3 tag: qbase
4 tag: qtip
5 tag: tip
6 user: Jeff Muizelaar <jmuizelaar@mozilla.com>
7 date: Thu May 17 19:23:53 2012 -0400
8 summary: Bug 757878. Add a fast path for 8888_over_565 with NEON. r=bgirard,joe
10 diff --git a/gfx/cairo/libpixman/src/pixman-arm-common.h b/gfx/cairo/libpixman/src/pixman-arm-common.h
11 --- a/gfx/cairo/libpixman/src/pixman-arm-common.h
12 +++ b/gfx/cairo/libpixman/src/pixman-arm-common.h
13 @@ -355,26 +355,26 @@ scaled_bilinear_scanline_##cputype##_##n
14 if ((flags & SKIP_ZERO_SRC) && zero_src) \
15 return; \
16 pixman_scaled_bilinear_scanline_##name##_##op##_asm_##cputype ( \
17 dst, src_top, src_bottom, wt, wb, vx, unit_x, w); \
18 } \
20 FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_cover_##op, \
21 scaled_bilinear_scanline_##cputype##_##name##_##op, \
22 - src_type, uint32_t, dst_type, COVER, FLAG_NONE) \
23 + NULL, src_type, uint32_t, dst_type, COVER, FLAG_NONE) \
24 FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_none_##op, \
25 scaled_bilinear_scanline_##cputype##_##name##_##op, \
26 - src_type, uint32_t, dst_type, NONE, FLAG_NONE) \
27 + NULL, src_type, uint32_t, dst_type, NONE, FLAG_NONE) \
28 FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_pad_##op, \
29 scaled_bilinear_scanline_##cputype##_##name##_##op, \
30 - src_type, uint32_t, dst_type, PAD, FLAG_NONE) \
31 + NULL, src_type, uint32_t, dst_type, PAD, FLAG_NONE) \
32 FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_normal_##op, \
33 scaled_bilinear_scanline_##cputype##_##name##_##op, \
34 - src_type, uint32_t, dst_type, NORMAL, \
35 + NULL, src_type, uint32_t, dst_type, NORMAL, \
36 FLAG_NONE)
39 #define PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST(flags, cputype, name, op, \
40 src_type, dst_type) \
41 void \
42 pixman_scaled_bilinear_scanline_##name##_##op##_asm_##cputype ( \
43 dst_type * dst, \
44 @@ -404,25 +404,25 @@ scaled_bilinear_scanline_##cputype##_##n
45 if ((flags & SKIP_ZERO_SRC) && zero_src) \
46 return; \
47 pixman_scaled_bilinear_scanline_##name##_##op##_asm_##cputype ( \
48 dst, mask, src_top, src_bottom, wt, wb, vx, unit_x, w); \
49 } \
51 FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_cover_##op, \
52 scaled_bilinear_scanline_##cputype##_##name##_##op, \
53 - src_type, uint8_t, dst_type, COVER, \
54 + NULL, src_type, uint8_t, dst_type, COVER, \
55 FLAG_HAVE_NON_SOLID_MASK) \
56 FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_none_##op, \
57 scaled_bilinear_scanline_##cputype##_##name##_##op, \
58 - src_type, uint8_t, dst_type, NONE, \
59 + NULL, src_type, uint8_t, dst_type, NONE, \
60 FLAG_HAVE_NON_SOLID_MASK) \
61 FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_pad_##op, \
62 scaled_bilinear_scanline_##cputype##_##name##_##op, \
63 - src_type, uint8_t, dst_type, PAD, \
64 + NULL, src_type, uint8_t, dst_type, PAD, \
65 FLAG_HAVE_NON_SOLID_MASK) \
66 FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_normal_##op, \
67 scaled_bilinear_scanline_##cputype##_##name##_##op, \
68 - src_type, uint8_t, dst_type, NORMAL, \
69 + NULL, src_type, uint8_t, dst_type, NORMAL, \
70 FLAG_HAVE_NON_SOLID_MASK)
73 #endif
74 diff --git a/gfx/cairo/libpixman/src/pixman-arm-neon.c b/gfx/cairo/libpixman/src/pixman-arm-neon.c
75 --- a/gfx/cairo/libpixman/src/pixman-arm-neon.c
76 +++ b/gfx/cairo/libpixman/src/pixman-arm-neon.c
77 @@ -140,16 +140,33 @@ PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST
78 PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_0565, SRC,
79 uint32_t, uint16_t)
80 PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 0565_x888, SRC,
81 uint16_t, uint32_t)
82 PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 0565_0565, SRC,
83 uint16_t, uint16_t)
84 PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (SKIP_ZERO_SRC, neon, 8888_8888, OVER,
85 uint32_t, uint32_t)
86 +static force_inline void
87 +pixman_scaled_bilinear_scanline_8888_8888_SRC (
88 + uint32_t * dst,
89 + const uint32_t * mask,
90 + const uint32_t * src_top,
91 + const uint32_t * src_bottom,
92 + int32_t w,
93 + int wt,
94 + int wb,
95 + pixman_fixed_t vx,
96 + pixman_fixed_t unit_x,
97 + pixman_fixed_t max_vx,
98 + pixman_bool_t zero_src)
100 + pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon (dst, src_top, src_bottom, wt, wb, vx, unit_x, w);
103 PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (SKIP_ZERO_SRC, neon, 8888_8888, ADD,
104 uint32_t, uint32_t)
106 PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (0, neon, 8888_8_8888, SRC,
107 uint32_t, uint32_t)
108 PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (0, neon, 8888_8_0565, SRC,
109 uint32_t, uint16_t)
110 PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (0, neon, 0565_8_x888, SRC,
111 @@ -261,16 +278,38 @@ pixman_blt_neon (uint32_t *src_bits,
112 (uint32_t *)(((char *) src_bits) +
113 src_y * src_stride * 4 + src_x * 4), src_stride);
114 return TRUE;
115 default:
116 return FALSE;
120 +static inline void op_bilinear_over_8888_0565(uint16_t *dst, const uint32_t *mask, const uint32_t *src, int width)
122 + pixman_composite_over_8888_0565_asm_neon (width, 1, dst, 0, src, 0);
125 +FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_0565_cover_OVER,
126 + pixman_scaled_bilinear_scanline_8888_8888_SRC, op_bilinear_over_8888_0565,
127 + uint32_t, uint32_t, uint16_t,
128 + COVER, FLAG_NONE)
129 +FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_0565_pad_OVER,
130 + pixman_scaled_bilinear_scanline_8888_8888_SRC, op_bilinear_over_8888_0565,
131 + uint32_t, uint32_t, uint16_t,
132 + PAD, FLAG_NONE)
133 +FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_0565_none_OVER,
134 + pixman_scaled_bilinear_scanline_8888_8888_SRC, op_bilinear_over_8888_0565,
135 + uint32_t, uint32_t, uint16_t,
136 + NONE, FLAG_NONE)
137 +FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_0565_normal_OVER,
138 + pixman_scaled_bilinear_scanline_8888_8888_SRC, op_bilinear_over_8888_0565,
139 + uint32_t, uint32_t, uint16_t,
140 + NORMAL, FLAG_NONE)
142 static const pixman_fast_path_t arm_neon_fast_paths[] =
144 PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, neon_composite_src_0565_0565),
145 PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, neon_composite_src_0565_0565),
146 PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, neon_composite_src_8888_0565),
147 PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, neon_composite_src_8888_0565),
148 PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, neon_composite_src_8888_0565),
149 PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, neon_composite_src_8888_0565),
150 @@ -414,16 +453,18 @@ static const pixman_fast_path_t arm_neon
151 SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, r5g6b5, r5g6b5, neon_0565_8_0565),
153 SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, neon_8888_8_8888),
154 SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, neon_8888_8_8888),
156 SIMPLE_BILINEAR_A8_MASK_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, neon_8888_8_8888),
157 SIMPLE_BILINEAR_A8_MASK_FAST_PATH (ADD, a8r8g8b8, x8r8g8b8, neon_8888_8_8888),
159 + SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_0565),
161 { PIXMAN_OP_NONE },
164 static pixman_bool_t
165 arm_neon_blt (pixman_implementation_t *imp,
166 uint32_t * src_bits,
167 uint32_t * dst_bits,
168 int src_stride,
169 diff --git a/gfx/cairo/libpixman/src/pixman-fast-path.c b/gfx/cairo/libpixman/src/pixman-fast-path.c
170 --- a/gfx/cairo/libpixman/src/pixman-fast-path.c
171 +++ b/gfx/cairo/libpixman/src/pixman-fast-path.c
172 @@ -1356,63 +1356,63 @@ scaled_bilinear_scanline_565_565_SRC (ui
173 vx += unit_x;
174 *dst++ = d;
178 #endif
180 FAST_BILINEAR_MAINLOOP_COMMON (565_565_cover_SRC,
181 - scaled_bilinear_scanline_565_565_SRC,
182 + scaled_bilinear_scanline_565_565_SRC, NULL,
183 uint16_t, uint32_t, uint16_t,
184 COVER, FLAG_NONE)
185 FAST_BILINEAR_MAINLOOP_COMMON (565_565_pad_SRC,
186 - scaled_bilinear_scanline_565_565_SRC,
187 + scaled_bilinear_scanline_565_565_SRC, NULL,
188 uint16_t, uint32_t, uint16_t,
189 PAD, FLAG_NONE)
190 FAST_BILINEAR_MAINLOOP_COMMON (565_565_none_SRC,
191 - scaled_bilinear_scanline_565_565_SRC,
192 + scaled_bilinear_scanline_565_565_SRC, NULL,
193 uint16_t, uint32_t, uint16_t,
194 NONE, FLAG_NONE)
195 FAST_BILINEAR_MAINLOOP_COMMON (565_565_normal_SRC,
196 - scaled_bilinear_scanline_565_565_SRC,
197 + scaled_bilinear_scanline_565_565_SRC, NULL,
198 uint16_t, uint32_t, uint16_t,
199 NORMAL, FLAG_NONE)
201 FAST_BILINEAR_MAINLOOP_COMMON (8888_565_cover_OVER,
202 - scaled_bilinear_scanline_8888_565_OVER,
203 + scaled_bilinear_scanline_8888_565_OVER, NULL,
204 uint32_t, uint32_t, uint16_t,
205 COVER, FLAG_NONE)
206 FAST_BILINEAR_MAINLOOP_COMMON (8888_565_pad_OVER,
207 - scaled_bilinear_scanline_8888_565_OVER,
208 + scaled_bilinear_scanline_8888_565_OVER, NULL,
209 uint32_t, uint32_t, uint16_t,
210 PAD, FLAG_NONE)
211 FAST_BILINEAR_MAINLOOP_COMMON (8888_565_none_OVER,
212 - scaled_bilinear_scanline_8888_565_OVER,
213 + scaled_bilinear_scanline_8888_565_OVER, NULL,
214 uint32_t, uint32_t, uint16_t,
215 NONE, FLAG_NONE)
216 FAST_BILINEAR_MAINLOOP_COMMON (8888_565_normal_OVER,
217 - scaled_bilinear_scanline_8888_565_OVER,
218 + scaled_bilinear_scanline_8888_565_OVER, NULL,
219 uint32_t, uint32_t, uint16_t,
220 NORMAL, FLAG_NONE)
222 FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_cover_OVER,
223 - scaled_bilinear_scanline_8888_8888_OVER,
224 + scaled_bilinear_scanline_8888_8888_OVER, NULL,
225 uint32_t, uint32_t, uint32_t,
226 COVER, FLAG_NONE)
227 FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_pad_OVER,
228 - scaled_bilinear_scanline_8888_8888_OVER,
229 + scaled_bilinear_scanline_8888_8888_OVER, NULL,
230 uint32_t, uint32_t, uint32_t,
231 PAD, FLAG_NONE)
232 FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_none_OVER,
233 - scaled_bilinear_scanline_8888_8888_OVER,
234 + scaled_bilinear_scanline_8888_8888_OVER, NULL,
235 uint32_t, uint32_t, uint32_t,
236 NONE, FLAG_NONE)
237 FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_normal_OVER,
238 - scaled_bilinear_scanline_8888_8888_OVER,
239 + scaled_bilinear_scanline_8888_8888_OVER, NULL,
240 uint32_t, uint32_t, uint32_t,
241 NORMAL, FLAG_NONE)
243 #define REPEAT_MIN_WIDTH 32
245 static void
246 fast_composite_tiled_repeat (pixman_implementation_t *imp,
247 pixman_composite_info_t *info)
248 diff --git a/gfx/cairo/libpixman/src/pixman-inlines.h b/gfx/cairo/libpixman/src/pixman-inlines.h
249 --- a/gfx/cairo/libpixman/src/pixman-inlines.h
250 +++ b/gfx/cairo/libpixman/src/pixman-inlines.h
251 @@ -21,16 +21,17 @@
252 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
254 * Author: Keith Packard, SuSE, Inc.
257 #ifndef PIXMAN_FAST_PATH_H__
258 #define PIXMAN_FAST_PATH_H__
260 +#include <stdlib.h>
261 #include "pixman-private.h"
263 #define PIXMAN_REPEAT_COVER -1
265 /* Flags describing input parameters to fast path macro template.
266 * Turning on some flag values may indicate that
267 * "some property X is available so template can use this" or
268 * "some property X should be handled by template".
269 @@ -816,18 +816,48 @@ bilinear_pad_repeat_get_scanline_bounds
271 * Note: normally the sum of 'weight_top' and 'weight_bottom' is equal to 256,
272 * but sometimes it may be less than that for NONE repeat when handling
273 * fuzzy antialiased top or bottom image edges. Also both top and
274 * bottom weight variables are guaranteed to have value in 0-255
275 * range and can fit into unsigned byte or be used with 8-bit SIMD
276 * multiplication instructions.
278 -#define FAST_BILINEAR_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t, \
279 - dst_type_t, repeat_mode, flags) \
281 +/* Replace a single "scanline_func" with "fetch_func" & "op_func" to allow optional
282 + * two stage processing (bilinear fetch to a temp buffer, followed by unscaled
283 + * combine), "op_func" may be NULL, in this case we keep old behavior.
284 + * This is ugly and gcc issues some warnings, but works.
286 + * An advice: clang has much better error reporting than gcc for deeply nested macros.
287 + */
289 +#define scanline_func(dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
290 + scanline_buf, mask, src_top, src_bottom, width, \
291 + weight_top, weight_bottom, vx, unit_x, max_vx, zero_src) \
292 + do { \
293 + if (op_func != NULL) \
294 + { \
295 + fetch_func ((void *)scanline_buf, (mask), (src_top), (src_bottom), (width), \
296 + (weight_top), (weight_bottom), (vx), (unit_x), (max_vx), (zero_src)); \
297 + ((void (*)(dst_type_t *, const mask_type_t *, const src_type_t *, int)) op_func)\
298 + ((dst), (mask), (src_type_t *)scanline_buf, (width)); \
299 + } \
300 + else \
301 + { \
302 + fetch_func ((void*)(dst), (mask), (src_top), (src_bottom), (width), (weight_top), \
303 + (weight_bottom), (vx), (unit_x), (max_vx), (zero_src)); \
304 + } \
305 + } while (0)
308 +#define SCANLINE_BUFFER_LENGTH 3072
310 +#define FAST_BILINEAR_MAINLOOP_INT(scale_func_name, fetch_func, op_func, src_type_t, \
311 + mask_type_t, dst_type_t, repeat_mode, flags) \
312 static void \
313 fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp, \
314 pixman_composite_info_t *info) \
316 PIXMAN_COMPOSITE_ARGS (info); \
317 dst_type_t *dst_line; \
318 mask_type_t *mask_line; \
319 src_type_t *src_first_line; \
320 @@ -842,16 +872,19 @@ fast_composite_scaled_bilinear ## scale_
321 mask_type_t solid_mask; \
322 const mask_type_t *mask = &solid_mask; \
323 int src_stride, mask_stride, dst_stride; \
325 int src_width; \
326 pixman_fixed_t src_width_fixed; \
327 int max_x; \
328 pixman_bool_t need_src_extension; \
330 + uint64_t stack_scanline_buffer[SCANLINE_BUFFER_LENGTH]; \
331 + uint8_t *scanline_buffer = (uint8_t *) stack_scanline_buffer; \
333 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1); \
334 if (flags & FLAG_HAVE_SOLID_MASK) \
336 solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); \
337 mask_stride = 0; \
339 else if (flags & FLAG_HAVE_NON_SOLID_MASK) \
340 @@ -914,16 +947,24 @@ fast_composite_scaled_bilinear ## scale_
341 else \
343 src_width = src_image->bits.width; \
344 need_src_extension = FALSE; \
347 src_width_fixed = pixman_int_to_fixed (src_width); \
350 + if (op_func != NULL && width * sizeof(src_type_t) > sizeof(stack_scanline_buffer)) \
351 + { \
352 + scanline_buffer = pixman_malloc_ab (width, sizeof(src_type_t)); \
354 + if (!scanline_buffer) \
355 + return; \
356 + } \
358 while (--height >= 0) \
360 int weight1, weight2; \
361 dst = dst_line; \
362 dst_line += dst_stride; \
363 vx = v.vector[0]; \
364 if (flags & FLAG_HAVE_NON_SOLID_MASK) \
365 @@ -956,36 +997,39 @@ fast_composite_scaled_bilinear ## scale_
366 repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height); \
367 src1 = src_first_line + src_stride * y1; \
368 src2 = src_first_line + src_stride * y2; \
370 if (left_pad > 0) \
372 buf1[0] = buf1[1] = src1[0]; \
373 buf2[0] = buf2[1] = src2[0]; \
374 - scanline_func (dst, mask, \
375 - buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, FALSE); \
376 + scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
377 + scanline_buffer, mask, buf1, buf2, left_pad, weight1, weight2, \
378 + 0, 0, 0, FALSE); \
379 dst += left_pad; \
380 if (flags & FLAG_HAVE_NON_SOLID_MASK) \
381 mask += left_pad; \
383 if (width > 0) \
385 - scanline_func (dst, mask, \
386 - src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE); \
387 + scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
388 + scanline_buffer, mask, src1, src2, width, weight1, weight2, \
389 + vx, unit_x, 0, FALSE); \
390 dst += width; \
391 if (flags & FLAG_HAVE_NON_SOLID_MASK) \
392 mask += width; \
394 if (right_pad > 0) \
396 buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; \
397 buf2[0] = buf2[1] = src2[src_image->bits.width - 1]; \
398 - scanline_func (dst, mask, \
399 - buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, FALSE); \
400 + scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
401 + scanline_buffer, mask, buf1, buf2, right_pad, weight1, weight2, \
402 + 0, 0, 0, FALSE); \
405 else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
407 src_type_t *src1, *src2; \
408 src_type_t buf1[2]; \
409 src_type_t buf2[2]; \
410 /* handle top/bottom zero padding by just setting weights to 0 if needed */ \
411 @@ -1011,64 +1055,67 @@ fast_composite_scaled_bilinear ## scale_
413 src1 = src_first_line + src_stride * y1; \
414 src2 = src_first_line + src_stride * y2; \
416 if (left_pad > 0) \
418 buf1[0] = buf1[1] = 0; \
419 buf2[0] = buf2[1] = 0; \
420 - scanline_func (dst, mask, \
421 - buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, TRUE); \
422 + scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
423 + scanline_buffer, mask, buf1, buf2, left_pad, weight1, weight2, \
424 + 0, 0, 0, TRUE); \
425 dst += left_pad; \
426 if (flags & FLAG_HAVE_NON_SOLID_MASK) \
427 mask += left_pad; \
429 if (left_tz > 0) \
431 buf1[0] = 0; \
432 buf1[1] = src1[0]; \
433 buf2[0] = 0; \
434 buf2[1] = src2[0]; \
435 - scanline_func (dst, mask, \
436 - buf1, buf2, left_tz, weight1, weight2, \
437 + scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
438 + scanline_buffer, mask, buf1, buf2, left_tz, weight1, weight2, \
439 pixman_fixed_frac (vx), unit_x, 0, FALSE); \
440 dst += left_tz; \
441 if (flags & FLAG_HAVE_NON_SOLID_MASK) \
442 mask += left_tz; \
443 vx += left_tz * unit_x; \
445 if (width > 0) \
447 - scanline_func (dst, mask, \
448 - src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE); \
449 + scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
450 + scanline_buffer, mask, src1, src2, width, weight1, weight2, \
451 + vx, unit_x, 0, FALSE); \
452 dst += width; \
453 if (flags & FLAG_HAVE_NON_SOLID_MASK) \
454 mask += width; \
455 vx += width * unit_x; \
457 if (right_tz > 0) \
459 buf1[0] = src1[src_image->bits.width - 1]; \
460 buf1[1] = 0; \
461 buf2[0] = src2[src_image->bits.width - 1]; \
462 buf2[1] = 0; \
463 - scanline_func (dst, mask, \
464 - buf1, buf2, right_tz, weight1, weight2, \
465 + scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
466 + scanline_buffer, mask, buf1, buf2, right_tz, weight1, weight2, \
467 pixman_fixed_frac (vx), unit_x, 0, FALSE); \
468 dst += right_tz; \
469 if (flags & FLAG_HAVE_NON_SOLID_MASK) \
470 mask += right_tz; \
472 if (right_pad > 0) \
474 buf1[0] = buf1[1] = 0; \
475 buf2[0] = buf2[1] = 0; \
476 - scanline_func (dst, mask, \
477 - buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, TRUE); \
478 + scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
479 + scanline_buffer, mask, buf1, buf2, right_pad, weight1, weight2, \
480 + 0, 0, 0, TRUE); \
483 else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
485 int32_t num_pixels; \
486 int32_t width_remain; \
487 src_type_t * src_line_top; \
488 src_type_t * src_line_bottom; \
489 @@ -1120,17 +1167,18 @@ fast_composite_scaled_bilinear ## scale_
490 * vx is in range [0, src_width_fixed - pixman_fixed_e] \
491 * So we are safe from overflow. \
492 */ \
493 num_pixels = ((src_width_fixed - vx - pixman_fixed_e) / unit_x) + 1; \
495 if (num_pixels > width_remain) \
496 num_pixels = width_remain; \
498 - scanline_func (dst, mask, buf1, buf2, num_pixels, \
499 + scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, \
500 + dst, scanline_buffer, mask, buf1, buf2, num_pixels, \
501 weight1, weight2, pixman_fixed_frac(vx), \
502 unit_x, src_width_fixed, FALSE); \
504 width_remain -= num_pixels; \
505 vx += num_pixels * unit_x; \
506 dst += num_pixels; \
508 if (flags & FLAG_HAVE_NON_SOLID_MASK) \
509 @@ -1149,41 +1197,47 @@ fast_composite_scaled_bilinear ## scale_
510 * So we are safe from overflow here. \
511 */ \
512 num_pixels = ((src_width_fixed - pixman_fixed_1 - vx - pixman_fixed_e) \
513 / unit_x) + 1; \
515 if (num_pixels > width_remain) \
516 num_pixels = width_remain; \
518 - scanline_func (dst, mask, src_line_top, src_line_bottom, num_pixels, \
519 - weight1, weight2, vx, unit_x, src_width_fixed, FALSE); \
520 + scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, \
521 + dst, scanline_buffer, mask, src_line_top, src_line_bottom, \
522 + num_pixels, weight1, weight2, vx, unit_x, src_width_fixed, \
523 + FALSE); \
525 width_remain -= num_pixels; \
526 vx += num_pixels * unit_x; \
527 dst += num_pixels; \
529 if (flags & FLAG_HAVE_NON_SOLID_MASK) \
530 mask += num_pixels; \
534 else \
536 - scanline_func (dst, mask, src_first_line + src_stride * y1, \
537 + scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
538 + scanline_buffer, mask, \
539 + src_first_line + src_stride * y1, \
540 src_first_line + src_stride * y2, width, \
541 weight1, weight2, vx, unit_x, max_vx, FALSE); \
544 + if (scanline_buffer != (uint8_t *) stack_scanline_buffer) \
545 + free (scanline_buffer); \
548 /* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
549 -#define FAST_BILINEAR_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t, \
550 +#define FAST_BILINEAR_MAINLOOP_COMMON(scale_func_name, fetch_func, op_func, src_type_t, mask_type_t,\
551 dst_type_t, repeat_mode, flags) \
552 - FAST_BILINEAR_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t,\
553 + FAST_BILINEAR_MAINLOOP_INT(_ ## scale_func_name, fetch_func, op_func, src_type_t, mask_type_t,\
554 dst_type_t, repeat_mode, flags)
556 #define SCALED_BILINEAR_FLAGS \
557 (FAST_PATH_SCALE_TRANSFORM | \
558 FAST_PATH_NO_ALPHA_MAP | \
559 FAST_PATH_BILINEAR_FILTER | \
560 FAST_PATH_NO_ACCESSORS | \
561 FAST_PATH_NARROW_FORMAT)
562 diff --git a/gfx/cairo/libpixman/src/pixman-sse2.c b/gfx/cairo/libpixman/src/pixman-sse2.c
563 --- a/gfx/cairo/libpixman/src/pixman-sse2.c
564 +++ b/gfx/cairo/libpixman/src/pixman-sse2.c
565 @@ -5404,30 +5404,33 @@ scaled_bilinear_scanline_sse2_8888_8888_
566 if (w & 1)
568 BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
569 *dst = pix1;
574 +/* Add extra NULL argument to the existing bilinear fast paths to indicate
575 + * that we don't need two-pass processing */
577 FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_cover_SRC,
578 - scaled_bilinear_scanline_sse2_8888_8888_SRC,
579 + scaled_bilinear_scanline_sse2_8888_8888_SRC, NULL,
580 uint32_t, uint32_t, uint32_t,
581 COVER, FLAG_NONE)
582 FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_pad_SRC,
583 - scaled_bilinear_scanline_sse2_8888_8888_SRC,
584 + scaled_bilinear_scanline_sse2_8888_8888_SRC, NULL,
585 uint32_t, uint32_t, uint32_t,
586 PAD, FLAG_NONE)
587 FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_none_SRC,
588 - scaled_bilinear_scanline_sse2_8888_8888_SRC,
589 + scaled_bilinear_scanline_sse2_8888_8888_SRC, NULL,
590 uint32_t, uint32_t, uint32_t,
591 NONE, FLAG_NONE)
592 FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_normal_SRC,
593 - scaled_bilinear_scanline_sse2_8888_8888_SRC,
594 + scaled_bilinear_scanline_sse2_8888_8888_SRC, NULL,
595 uint32_t, uint32_t, uint32_t,
596 NORMAL, FLAG_NONE)
598 static force_inline void
599 scaled_bilinear_scanline_sse2_8888_8888_OVER (uint32_t * dst,
600 const uint32_t * mask,
601 const uint32_t * src_top,
602 const uint32_t * src_bottom,
603 @@ -5505,32 +5508,66 @@ scaled_bilinear_scanline_sse2_8888_8888_
606 w--;
607 dst++;
611 FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_cover_OVER,
612 - scaled_bilinear_scanline_sse2_8888_8888_OVER,
613 + scaled_bilinear_scanline_sse2_8888_8888_OVER, NULL,
614 uint32_t, uint32_t, uint32_t,
615 COVER, FLAG_NONE)
616 FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_pad_OVER,
617 - scaled_bilinear_scanline_sse2_8888_8888_OVER,
618 + scaled_bilinear_scanline_sse2_8888_8888_OVER, NULL,
619 uint32_t, uint32_t, uint32_t,
620 PAD, FLAG_NONE)
621 FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_none_OVER,
622 - scaled_bilinear_scanline_sse2_8888_8888_OVER,
623 + scaled_bilinear_scanline_sse2_8888_8888_OVER, NULL,
624 uint32_t, uint32_t, uint32_t,
625 NONE, FLAG_NONE)
626 FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_normal_OVER,
627 - scaled_bilinear_scanline_sse2_8888_8888_OVER,
628 + scaled_bilinear_scanline_sse2_8888_8888_OVER, NULL,
629 uint32_t, uint32_t, uint32_t,
630 NORMAL, FLAG_NONE)
633 +/* An example of SSE2 two-stage bilinear_over_8888_0565 fast path, which is implemented
634 + as scaled_bilinear_scanline_sse2_8888_8888_SRC + op_bilinear_over_8888_0565 */
636 +void op_bilinear_over_8888_0565(uint16_t *dst, const uint32_t *mask, const uint32_t *src, int width)
638 + /* Note: this is not really fast and should be based on 8 pixel loop from sse2_composite_over_8888_0565 */
639 + while (--width >= 0)
641 + *dst = composite_over_8888_0565pixel (*src, *dst);
642 + src++;
643 + dst++;
647 +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_0565_cover_OVER,
648 + scaled_bilinear_scanline_sse2_8888_8888_SRC, op_bilinear_over_8888_0565,
649 + uint32_t, uint32_t, uint16_t,
650 + COVER, FLAG_NONE)
651 +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_0565_pad_OVER,
652 + scaled_bilinear_scanline_sse2_8888_8888_SRC, op_bilinear_over_8888_0565,
653 + uint32_t, uint32_t, uint16_t,
654 + PAD, FLAG_NONE)
655 +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_0565_none_OVER,
656 + scaled_bilinear_scanline_sse2_8888_8888_SRC, op_bilinear_over_8888_0565,
657 + uint32_t, uint32_t, uint16_t,
658 + NONE, FLAG_NONE)
659 +FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_0565_normal_OVER,
660 + scaled_bilinear_scanline_sse2_8888_8888_SRC, op_bilinear_over_8888_0565,
661 + uint32_t, uint32_t, uint16_t,
662 + NORMAL, FLAG_NONE)
664 +/*****************************/
666 static force_inline void
667 scaled_bilinear_scanline_sse2_8888_8_8888_OVER (uint32_t * dst,
668 const uint8_t * mask,
669 const uint32_t * src_top,
670 const uint32_t * src_bottom,
671 int32_t w,
672 int wt,
673 int wb,
674 @@ -5669,29 +5706,29 @@ scaled_bilinear_scanline_sse2_8888_8_888
677 w--;
678 dst++;
682 FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_cover_OVER,
683 - scaled_bilinear_scanline_sse2_8888_8_8888_OVER,
684 + scaled_bilinear_scanline_sse2_8888_8_8888_OVER, NULL,
685 uint32_t, uint8_t, uint32_t,
686 COVER, FLAG_HAVE_NON_SOLID_MASK)
687 FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_pad_OVER,
688 - scaled_bilinear_scanline_sse2_8888_8_8888_OVER,
689 + scaled_bilinear_scanline_sse2_8888_8_8888_OVER, NULL,
690 uint32_t, uint8_t, uint32_t,
691 PAD, FLAG_HAVE_NON_SOLID_MASK)
692 FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_none_OVER,
693 - scaled_bilinear_scanline_sse2_8888_8_8888_OVER,
694 + scaled_bilinear_scanline_sse2_8888_8_8888_OVER, NULL,
695 uint32_t, uint8_t, uint32_t,
696 NONE, FLAG_HAVE_NON_SOLID_MASK)
697 FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_normal_OVER,
698 - scaled_bilinear_scanline_sse2_8888_8_8888_OVER,
699 + scaled_bilinear_scanline_sse2_8888_8_8888_OVER, NULL,
700 uint32_t, uint8_t, uint32_t,
701 NORMAL, FLAG_HAVE_NON_SOLID_MASK)
703 static const pixman_fast_path_t sse2_fast_paths[] =
705 /* PIXMAN_OP_OVER */
706 PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, sse2_composite_over_n_8_0565),
707 PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, sse2_composite_over_n_8_0565),
708 @@ -5808,16 +5845,21 @@ static const pixman_fast_path_t sse2_fas
709 SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
710 SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888),
712 SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8_8888),
713 SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8_8888),
714 SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8_8888),
715 SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8_8888),
717 + /* and here the needed entries are added to the fast path table */
719 + SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, r5g6b5, sse2_8888_0565),
720 + SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, b5g6r5, sse2_8888_0565),
722 { PIXMAN_OP_NONE },
725 static pixman_bool_t
726 sse2_blt (pixman_implementation_t *imp,
727 uint32_t * src_bits,
728 uint32_t * dst_bits,
729 int src_stride,