2 * Copyright © 2008 Mozilla Corporation
3 * Copyright © 2010 Nokia Corporation
5 * Permission to use, copy, modify, distribute, and sell this software and its
6 * documentation for any purpose is hereby granted without fee, provided that
7 * the above copyright notice appear in all copies and that both that
8 * copyright notice and this permission notice appear in supporting
9 * documentation, and that the name of Mozilla Corporation not be used in
10 * advertising or publicity pertaining to distribution of the software without
11 * specific, written prior permission. Mozilla Corporation makes no
12 * representations about the suitability of this software for any purpose. It
13 * is provided "as is" without express or implied warranty.
15 * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
16 * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
17 * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
18 * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
20 * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
21 * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
24 * Author: Jeff Muizelaar (jeff@infidigm.net)
28 /* Prevent the stack from becoming executable */
29 #if defined(__linux__) && defined(__ELF__)
30 .section .note.GNU-stack,"",%progbits
40 #include "pixman-arm-asm.h"
43 * Note: This code is only using armv5te instructions (not even armv6),
44 * but is scheduled for ARM Cortex-A8 pipeline. So it might need to
45 * be split into a few variants, tuned for each microarchitecture.
47 * TODO: In order to get good performance on ARM9/ARM11 cores (which don't
48 * have efficient write combining), it needs to be changed to use 16-byte
49 * aligned writes using STM instruction.
51 * Nearest scanline scaler macro template uses the following arguments:
52 * fname - name of the function to generate
53 * bpp_shift - (1 << bpp_shift) is the size of pixel in bytes
54 * t - type suffix for LDR/STR instructions
55 * prefetch_distance - prefetch in the source image by that many
57 * prefetch_braking_distance - stop prefetching when that many pixels are
58 * remaining before the end of scanline
61 .macro generate_nearest_scanline_func fname, bpp_shift, t, \
63 prefetch_braking_distance
65 pixman_asm_function fname
75 SRC_WIDTH_FIXED .req r8
78 push {r4, r5, r6, r7, r8, r10}
79 mvn VXMASK, #((1 << bpp_shift) - 1)
80 ldr SRC_WIDTH_FIXED, [sp, #28]
82 /* define helper macro */
84 ldr&t TMP1, [SRC, TMP1]
85 and TMP2, VXMASK, VX, asr #(16 - bpp_shift)
87 str&t TMP1, [DST], #(1 << bpp_shift)
88 9: subpls VX, VX, SRC_WIDTH_FIXED
91 ldr&t TMP2, [SRC, TMP2]
92 and TMP1, VXMASK, VX, asr #(16 - bpp_shift)
94 str&t TMP2, [DST], #(1 << bpp_shift)
95 9: subpls VX, VX, SRC_WIDTH_FIXED
99 /* now do the scaling */
100 and TMP1, VXMASK, VX, asr #(16 - bpp_shift)
102 9: subpls VX, VX, SRC_WIDTH_FIXED
104 subs W, W, #(8 + prefetch_braking_distance)
106 /* calculate prefetch offset */
107 mov PF_OFFS, #prefetch_distance
108 mla PF_OFFS, UNIT_X, PF_OFFS, VX
109 1: /* main loop, process 8 pixels per iteration with prefetch */
110 pld [SRC, PF_OFFS, asr #(16 - bpp_shift)]
111 add PF_OFFS, UNIT_X, lsl #3
119 subs W, W, #(4 - 8 - prefetch_braking_distance)
121 1: /* process the remaining pixels */
132 ldrne&t TMP1, [SRC, TMP1]
134 /* cleanup helper macro */
135 .purgem scale_2_pixels
145 .unreq SRC_WIDTH_FIXED
147 pop {r4, r5, r6, r7, r8, r10}
152 generate_nearest_scanline_func \
153 pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6, 1, h, 80, 32
155 generate_nearest_scanline_func \
156 pixman_scaled_nearest_scanline_8888_8888_SRC_asm_armv6, 2, , 48, 32