1 @*****************************************************************************
2 @ nv12_rgb.S : ARM NEONv1 NV12 to RGB chroma conversion
3 @*****************************************************************************
4 @ Copyright (C) 2011 Sébastien Toque
7 @ This program is free software; you can redistribute it and/or modify it
8 @ under the terms of the GNU Lesser General Public License as published by
9 @ the Free Software Foundation; either version 2.1 of the License, or
10 @ (at your option) any later version.
12 @ This program is distributed in the hope that it will be useful,
13 @ but WITHOUT ANY WARRANTY; without even the implied warranty of
14 @ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 @ GNU Lesser General Public License for more details.
17 @ You should have received a copy of the GNU Lesser General Public License
18 @ along with this program; if not, write to the Free Software Foundation,
19 @ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
20 @****************************************************************************/
25 #if HAVE_AS_FPU_DIRECTIVE
83 function nv12_rgb_neon
84 push {r4-r8,r10-r11,lr}
88 ldmia r0, {O1, OPITCH}
89 ldmia r1, {Y1, U, V, YPITCH}
91 /* round the width to be a multiple of 16 */
93 sub WIDTH, WIDTH, OPAD
95 addne WIDTH, WIDTH, #16
97 /* init constants (scale value by 64) */
103 adr OPAD, coefficients
104 vld1.s16 {d6[], d7[]}, [OPAD]!
105 vld1.s16 {d8[], d9[]}, [OPAD]!
106 vld1.s16 {d10[], d11[]}, [OPAD]!
111 sub OPAD, OPITCH, WIDTH, lsl #2
112 sub YPAD, YPITCH, WIDTH
119 /* exit if all rows have been processed */
122 pople {r4-r8,r10-r11,pc}
128 vld2.u8 {u,v}, [U,:128]!
130 vmull.u8 chro_r, v, coefRV
131 vmull.u8 chro_g, u, coefGU
132 vmlal.u8 chro_g, v, coefGV
133 vmull.u8 chro_b, u, coefBU
135 vadd.s16 chro_r, Rc, chro_r
136 vsub.s16 chro_g, Gc, chro_g
137 vadd.s16 chro_b, Bc, chro_b
142 vld2.u8 {y1,y2}, [Y1,:128]!
144 /* y1 : chrominance + luminance, then clamp (divide by 64) */
145 vmull.u8 lumi, y1, coefY
146 vqadd.s16 red, lumi, chro_r
147 vqadd.s16 green, lumi, chro_g
148 vqadd.s16 blue, lumi, chro_b
149 vqrshrun.s16 red1, red, #6
150 vqrshrun.s16 green1, green, #6
151 vqrshrun.s16 blue1, blue, #6
153 /* y2 : chrominance + luminance, then clamp (divide by 64) */
154 vmull.u8 lumi, y2, coefY
155 vqadd.s16 red, lumi, chro_r
156 vqadd.s16 green, lumi, chro_g
157 vqadd.s16 blue, lumi, chro_b
158 vqrshrun.s16 red2, red, #6
159 vqrshrun.s16 green2, green, #6
160 vqrshrun.s16 blue2, blue, #6
166 vzip.u8 green1, green2
169 vst4.u8 {red1,green1,blue1,alpha1}, [O1,:128]!
170 vst4.u8 {red2,green2,blue2,alpha2}, [O1,:128]!
173 vld2.u8 {y1,y2}, [Y2,:128]!
175 /* y1 : chrominance + luminance, then clamp (divide by 64) */
176 vmull.u8 lumi, y1, coefY
177 vqadd.s16 red, lumi, chro_r
178 vqadd.s16 green, lumi, chro_g
179 vqadd.s16 blue, lumi, chro_b
180 vqrshrun.s16 red1, red, #6
181 vqrshrun.s16 green1, green, #6
182 vqrshrun.s16 blue1, blue, #6
184 /* y2 : chrominance + luminance, then clamp (divide by 64) */
185 vmull.u8 lumi, y2, coefY
186 vqadd.s16 red, lumi, chro_r
187 vqadd.s16 green, lumi, chro_g
188 vqadd.s16 blue, lumi, chro_b
189 vqrshrun.s16 red2, red, #6
190 vqrshrun.s16 green2, green, #6
191 vqrshrun.s16 blue2, blue, #6
197 vzip.u8 green1, green2
200 vst4.u8 {red1,green1,blue1,alpha1}, [O2,:128]!
201 vst4.u8 {red2,green2,blue2,alpha2}, [O2,:128]!
203 /* next columns (x16) */
204 subs COUNT, COUNT, #16