1 @*****************************************************************************
2 @ simple_channel_mixer.S : ARM NEON channel mixer
3 @*****************************************************************************
4 @ Copyright (C) 2012 David Geldreich <david.geldreich@free.fr>
7 @ This program is free software; you can redistribute it and/or modify it
8 @ under the terms of the GNU Lesser General Public License as published by
9 @ the Free Software Foundation; either version 2.1 of the License, or
10 @ (at your option) any later version.
12 @ This program is distributed in the hope that it will be useful,
13 @ but WITHOUT ANY WARRANTY; without even the implied warranty of
14 @ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 @ GNU Lesser General Public License for more details.
17 @ You should have received a copy of the GNU Lesser General Public License
18 @ along with this program; if not, write to the Free Software Foundation,
19 @ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
20 @****************************************************************************/
37 .global convert_7_x_to_2_0_neon_asm
38 .type convert_7_x_to_2_0_neon_asm, %function
39 convert_7_x_to_2_0_neon_asm:
45 vld1.32 {q2},[SRC]! @ load 0,1,2,3
46 vmul.f32 q2,q2,q0 @ 0.5*src[0] 0.5*src[1] 0.25*src[2] 0.25*src[3]
47 vld1.32 {d6},[SRC]! @ load 4,5
48 vmul.f32 d6,d6,d1 @ 0.25*src[4] 0.25*src[5]
49 vadd.f32 d4,d4,d5 @ 0.5*src[0] + 0.25*src[2]
50 @ 0.5*src[1] + 0.25*src[3]
51 vadd.f32 d4,d4,d6 @ 0.5*src[0] + 0.25*src[2] + 0.25*src[4]
52 @ 0.5*src[1] + 0.25*src[3] + 0.25*src[5]
53 flds s14,[SRC] @ load 6
58 addne SRC,SRC,#8 @ skip the lfe channel
59 vadd.f32 d4,d4,d7 @ 0.5*src[0] + 0.25*src[2] + 0.25*src[4] + src[6]
60 @ 0.5*src[1] + 0.25*src[3] + 0.25*src[5] + src[6]
73 .global convert_5_x_to_2_0_neon_asm
74 .type convert_5_x_to_2_0_neon_asm, %function
75 convert_5_x_to_2_0_neon_asm:
79 vld1.32 {q0},[COEFF] @ load constants
81 vld1.32 {q1},[SRC]! @ load 0,1,2,3
82 flds s8,[SRC] @ load 4
87 addne SRC,SRC,#8 @ skip the lfe channel
88 vmul.f32 q1,q1,q0 @ 0.5*src[0] 0.5*src[1] 0.33*src[2] 0.33*src[3]/3
89 vadd.f32 d2,d2,d3 @ 0.5*src[0] + 0.33*src[2]
90 @ 0.5*src[1] + 0.33*src[3]
91 vadd.f32 d2,d2,d4 @ 0.5*src[0] + 0.33*src[2] + src[4]
92 @ 0.5*src[1] + 0.33*src[3] + src[4]
103 .global convert_4_0_to_2_0_neon_asm
104 .type convert_4_0_to_2_0_neon_asm, %function
105 convert_4_0_to_2_0_neon_asm:
108 adr COEFF, coeff_4to2
109 vld1.32 {d0},[COEFF] @ load constants
112 vmul.f32 d2,d2,d0 @ 0.5*src[0] 0.5*src[1]
113 vdup.32 d4,d3[0] @ dup src[2]
114 vdup.32 d3,d3[1] @ dup src[3]
115 vadd.f32 d2,d2,d3 @ +src[3]
116 vadd.f32 d2,d2,d4 @ +src[2]
127 .global convert_3_x_to_2_0_neon_asm
128 .type convert_3_x_to_2_0_neon_asm, %function
129 convert_3_x_to_2_0_neon_asm:
132 adr COEFF, coeff_3to2
133 vld1.32 {d0},[COEFF] @ load constants
135 vld1.32 {d1},[SRC]! @ load 0,1
136 flds s4,[SRC] @ load 2
141 addne SRC,SRC,#8 @ skip the lfe channel
142 vmul.f32 d1,d1,d0 @ 0.5*src[0] 0.5*src[1]
143 vadd.f32 d1,d1,d2 @ 0.5*src[0] + src[2]
144 @ 0.5*src[1] + src[2]
157 .global convert_7_x_to_1_0_neon_asm
158 .type convert_7_x_to_1_0_neon_asm, %function
159 convert_7_x_to_1_0_neon_asm:
162 adr COEFF, coeff_7to1
165 vld1.32 {q1},[SRC]! @ load 0,1,2,3
166 vmul.f32 q1,q1,q0 @ 0.25*src[0] 0.25*src[1] 0.125*src[2] 0.125*src[3]
167 vld1.32 {d4},[SRC]! @ load 4,5
168 vmul.f32 d4,d4,d1 @ 0.125*src[4] 0.125*src[5]
171 flds s10,[SRC] @ load 6
175 addne SRC,SRC,#8 @ skip the lfe channel
191 .global convert_5_x_to_1_0_neon_asm
192 .type convert_5_x_to_1_0_neon_asm, %function
193 convert_5_x_to_1_0_neon_asm:
196 adr COEFF, coeff_5to1
199 vld1.32 {q1},[SRC]! @ load 0,1,2,3
200 vmul.f32 q1,q1,q0 @ 0.25*src[0] 0.25*src[1] src[2]/6 src[3]/6
202 flds s10,[SRC] @ load 4
206 addne SRC,SRC,#8 @ skip the lfe channel
222 .global convert_7_x_to_4_0_neon_asm
223 .type convert_7_x_to_4_0_neon_asm, %function
224 convert_7_x_to_4_0_neon_asm:
227 adr COEFF, coeff_7to4
230 vld1.32 {q1},[SRC]! @ load 0,1,2,3
231 vmul.f32 q1,q1,q0 @ 0.5*src[0] 0.5*src[1] src[2]/6 src[3]/6
232 vld1.32 {d5},[SRC]! @ load 4,5
233 flds s14,[SRC] @ load 6
234 vadd.f32 d2,d2,d3 @ 0.5*src[0] + src[2]/6
235 @ 0.5*src[1] + src[3]/6
236 vdup.32 d4,d7[0] @ so q2 : src[6] src[6] src[4] src[5]
237 vadd.f32 q2,q2,q1 @ src[6] + 0.5*src[0] + src[2]/6
238 @ src[6] + 0.5*src[1] + src[3]/6
244 addne SRC,SRC,#8 @ skip the lfe channel
255 .global convert_5_x_to_4_0_neon_asm
256 .type convert_5_x_to_4_0_neon_asm, %function
257 convert_5_x_to_4_0_neon_asm:
260 adr COEFF, coeff_5to4
263 vld1.32 {q1},[SRC]! @ load 0,1,2,3
264 vmul.f32 d2,d2,d0 @ 0.5*src[0] 0.5*src[1]
265 flds s8,[SRC] @ load 4
267 vadd.f32 d2,d2,d4 @ 0.5*src[0] + src[4]
268 @ 0.5*src[1] + src[4]
274 addne SRC,SRC,#8 @ skip the lfe channel