1 /* Copyright (c) 2007-2008 CSIRO
2 Copyright (c) 2007-2009 Xiph.Org Foundation
3 Written by Jean-Marc Valin */
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions
14 - Redistributions of source code must retain the above copyright
15 notice, this list of conditions and the following disclaimer.
17 - Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in the
19 documentation and/or other materials provided with the distribution.
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
25 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 #include "os_support.h"
41 #include "stack_alloc.h"
45 static void find_best_pitch(opus_val32
*xcorr
, opus_val16
*y
, int len
,
46 int max_pitch
, int *best_pitch
48 , int yshift
, opus_val32 maxcorr
54 opus_val16 best_num
[2];
55 opus_val32 best_den
[2];
59 xshift
= celt_ilog2(maxcorr
)-14;
69 Syy
= ADD32(Syy
, SHR32(MULT16_16(y
[j
],y
[j
]), yshift
));
70 for (i
=0;i
<max_pitch
;i
++)
76 xcorr16
= EXTRACT16(VSHR32(xcorr
[i
], xshift
));
78 /* Considering the range of xcorr16, this should avoid both underflows
79 and overflows (inf) when squaring xcorr16 */
82 num
= MULT16_16_Q15(xcorr16
,xcorr16
);
83 if (MULT16_32_Q15(num
,best_den
[1]) > MULT16_32_Q15(best_num
[1],Syy
))
85 if (MULT16_32_Q15(num
,best_den
[0]) > MULT16_32_Q15(best_num
[0],Syy
))
87 best_num
[1] = best_num
[0];
88 best_den
[1] = best_den
[0];
89 best_pitch
[1] = best_pitch
[0];
100 Syy
+= SHR32(MULT16_16(y
[i
+len
],y
[i
+len
]),yshift
) - SHR32(MULT16_16(y
[i
],y
[i
]),yshift
);
105 static void celt_fir5(opus_val16
*x
,
106 const opus_val16
*num
,
110 opus_val16 num0
, num1
, num2
, num3
, num4
;
111 opus_val32 mem0
, mem1
, mem2
, mem3
, mem4
;
124 opus_val32 sum
= SHL32(EXTEND32(x
[i
]), SIG_SHIFT
);
125 sum
= MAC16_16(sum
,num0
,mem0
);
126 sum
= MAC16_16(sum
,num1
,mem1
);
127 sum
= MAC16_16(sum
,num2
,mem2
);
128 sum
= MAC16_16(sum
,num3
,mem3
);
129 sum
= MAC16_16(sum
,num4
,mem4
);
135 x
[i
] = ROUND16(sum
, SIG_SHIFT
);
140 void pitch_downsample(celt_sig
* OPUS_RESTRICT x
[], opus_val16
* OPUS_RESTRICT x_lp
,
141 int len
, int C
, int arch
)
145 opus_val16 tmp
=Q15ONE
;
148 opus_val16 c1
= QCONST16(.8f
,15);
151 opus_val32 maxabs
= celt_maxabs32(x
[0], len
);
154 opus_val32 maxabs_1
= celt_maxabs32(x
[1], len
);
155 maxabs
= MAX32(maxabs
, maxabs_1
);
159 shift
= celt_ilog2(maxabs
)-10;
164 for (i
=1;i
<len
>>1;i
++)
165 x_lp
[i
] = SHR32(x
[0][(2*i
-1)], shift
+2) + SHR32(x
[0][(2*i
+1)], shift
+2) + SHR32(x
[0][2*i
], shift
+1);
166 x_lp
[0] = SHR32(x
[0][1], shift
+2) + SHR32(x
[0][0], shift
+1);
169 for (i
=1;i
<len
>>1;i
++)
170 x_lp
[i
] += SHR32(x
[1][(2*i
-1)], shift
+2) + SHR32(x
[1][(2*i
+1)], shift
+2) + SHR32(x
[1][2*i
], shift
+1);
171 x_lp
[0] += SHR32(x
[1][1], shift
+2) + SHR32(x
[1][0], shift
+1);
174 for (i
=1;i
<len
>>1;i
++)
175 x_lp
[i
] = .25f
*x
[0][(2*i
-1)] + .25f
*x
[0][(2*i
+1)] + .5f
*x
[0][2*i
];
176 x_lp
[0] = .25f
*x
[0][1] + .5f
*x
[0][0];
179 for (i
=1;i
<len
>>1;i
++)
180 x_lp
[i
] += .25f
*x
[1][(2*i
-1)] + .25f
*x
[1][(2*i
+1)] + .5f
*x
[1][2*i
];
181 x_lp
[0] += .25f
*x
[1][1] + .5f
*x
[1][0];
184 _celt_autocorr(x_lp
, ac
, NULL
, 0,
187 /* Noise floor -40 dB */
189 ac
[0] += SHR32(ac
[0],13);
196 /*ac[i] *= exp(-.5*(2*M_PI*.002*i)*(2*M_PI*.002*i));*/
198 ac
[i
] -= MULT16_32_Q15(2*i
*i
, ac
[i
]);
200 ac
[i
] -= ac
[i
]*(.008f
*i
)*(.008f
*i
);
204 _celt_lpc(lpc
, ac
, 4);
207 tmp
= MULT16_16_Q15(QCONST16(.9f
,15), tmp
);
208 lpc
[i
] = MULT16_16_Q15(lpc
[i
], tmp
);
211 lpc2
[0] = lpc
[0] + QCONST16(.8f
,SIG_SHIFT
);
212 lpc2
[1] = lpc
[1] + MULT16_16_Q15(c1
,lpc
[0]);
213 lpc2
[2] = lpc
[2] + MULT16_16_Q15(c1
,lpc
[1]);
214 lpc2
[3] = lpc
[3] + MULT16_16_Q15(c1
,lpc
[2]);
215 lpc2
[4] = MULT16_16_Q15(c1
,lpc
[3]);
216 celt_fir5(x_lp
, lpc2
, len
>>1);
219 /* Pure C implementation. */
225 celt_pitch_xcorr_c(const opus_val16
*_x
, const opus_val16
*_y
,
226 opus_val32
*xcorr
, int len
, int max_pitch
, int arch
)
229 #if 0 /* This is a simple version of the pitch correlation that should work
230 well on DSPs like Blackfin and TI C5x/C6x */
233 opus_val32 maxcorr
=1;
235 #if !defined(OVERRIDE_PITCH_XCORR)
238 for (i
=0;i
<max_pitch
;i
++)
242 sum
= MAC16_16(sum
, _x
[j
], _y
[i
+j
]);
245 maxcorr
= MAX32(maxcorr
, sum
);
252 #else /* Unrolled version of the pitch correlation -- runs faster on x86 and ARM */
254 /*The EDSP version requires that max_pitch is at least 1, and that _x is
256 Since it's hard to put asserts in assembly, put them here.*/
258 opus_val32 maxcorr
=1;
260 celt_assert(max_pitch
>0);
261 celt_sig_assert(((size_t)_x
&3)==0);
262 for (i
=0;i
<max_pitch
-3;i
+=4)
264 opus_val32 sum
[4]={0,0,0,0};
265 xcorr_kernel(_x
, _y
+i
, sum
, len
, arch
);
271 sum
[0] = MAX32(sum
[0], sum
[1]);
272 sum
[2] = MAX32(sum
[2], sum
[3]);
273 sum
[0] = MAX32(sum
[0], sum
[2]);
274 maxcorr
= MAX32(maxcorr
, sum
[0]);
277 /* In case max_pitch isn't a multiple of 4, do non-unrolled version. */
278 for (;i
<max_pitch
;i
++)
281 sum
= celt_inner_prod(_x
, _y
+i
, len
, arch
);
284 maxcorr
= MAX32(maxcorr
, sum
);
293 void pitch_search(const opus_val16
* OPUS_RESTRICT x_lp
, opus_val16
* OPUS_RESTRICT y
,
294 int len
, int max_pitch
, int *pitch
, int arch
)
298 int best_pitch
[2]={0,0};
299 VARDECL(opus_val16
, x_lp4
);
300 VARDECL(opus_val16
, y_lp4
);
301 VARDECL(opus_val32
, xcorr
);
304 opus_val32 xmax
, ymax
;
312 celt_assert(max_pitch
>0);
315 ALLOC(x_lp4
, len
>>2, opus_val16
);
316 ALLOC(y_lp4
, lag
>>2, opus_val16
);
317 ALLOC(xcorr
, max_pitch
>>1, opus_val32
);
319 /* Downsample by 2 again */
320 for (j
=0;j
<len
>>2;j
++)
321 x_lp4
[j
] = x_lp
[2*j
];
322 for (j
=0;j
<lag
>>2;j
++)
326 xmax
= celt_maxabs16(x_lp4
, len
>>2);
327 ymax
= celt_maxabs16(y_lp4
, lag
>>2);
328 shift
= celt_ilog2(MAX32(1, MAX32(xmax
, ymax
)))-11;
331 for (j
=0;j
<len
>>2;j
++)
332 x_lp4
[j
] = SHR16(x_lp4
[j
], shift
);
333 for (j
=0;j
<lag
>>2;j
++)
334 y_lp4
[j
] = SHR16(y_lp4
[j
], shift
);
335 /* Use double the shift for a MAC */
342 /* Coarse search with 4x decimation */
347 celt_pitch_xcorr(x_lp4
, y_lp4
, xcorr
, len
>>2, max_pitch
>>2, arch
);
349 find_best_pitch(xcorr
, y_lp4
, len
>>2, max_pitch
>>2, best_pitch
355 /* Finer search with 2x decimation */
359 for (i
=0;i
<max_pitch
>>1;i
++)
363 if (abs(i
-2*best_pitch
[0])>2 && abs(i
-2*best_pitch
[1])>2)
367 for (j
=0;j
<len
>>1;j
++)
368 sum
+= SHR32(MULT16_16(x_lp
[j
],y
[i
+j
]), shift
);
370 sum
= celt_inner_prod(x_lp
, y
+i
, len
>>1, arch
);
372 xcorr
[i
] = MAX32(-1, sum
);
374 maxcorr
= MAX32(maxcorr
, sum
);
377 find_best_pitch(xcorr
, y
, len
>>1, max_pitch
>>1, best_pitch
383 /* Refine by pseudo-interpolation */
384 if (best_pitch
[0]>0 && best_pitch
[0]<(max_pitch
>>1)-1)
387 a
= xcorr
[best_pitch
[0]-1];
388 b
= xcorr
[best_pitch
[0]];
389 c
= xcorr
[best_pitch
[0]+1];
390 if ((c
-a
) > MULT16_32_Q15(QCONST16(.7f
,15),b
-a
))
392 else if ((a
-c
) > MULT16_32_Q15(QCONST16(.7f
,15),b
-c
))
399 *pitch
= 2*best_pitch
[0]-offset
;
405 static opus_val16
compute_pitch_gain(opus_val32 xy
, opus_val32 xx
, opus_val32 yy
)
411 if (xy
== 0 || xx
== 0 || yy
== 0)
413 sx
= celt_ilog2(xx
)-14;
414 sy
= celt_ilog2(yy
)-14;
416 x2y2
= SHR32(MULT16_16(VSHR32(xx
, sx
), VSHR32(yy
, sy
)), 14);
427 den
= celt_rsqrt_norm(x2y2
);
428 g
= MULT16_32_Q15(den
, xy
);
429 g
= VSHR32(g
, (shift
>>1)-1);
430 return EXTRACT16(MIN32(g
, Q15ONE
));
433 static opus_val16
compute_pitch_gain(opus_val32 xy
, opus_val32 xx
, opus_val32 yy
)
435 return xy
/celt_sqrt(1+xx
*yy
);
439 static const int second_check
[16] = {0, 0, 3, 2, 3, 2, 5, 2, 3, 2, 3, 2, 5, 2, 3, 2};
440 opus_val16
remove_doubling(opus_val16
*x
, int maxperiod
, int minperiod
,
441 int N
, int *T0_
, int prev_period
, opus_val16 prev_gain
, int arch
)
446 opus_val32 xy
,xx
,yy
,xy2
;
448 opus_val32 best_xy
, best_yy
;
451 VARDECL(opus_val32
, yy_lookup
);
454 minperiod0
= minperiod
;
465 ALLOC(yy_lookup
, maxperiod
+1, opus_val32
);
466 dual_inner_prod(x
, x
, x
-T0
, N
, &xx
, &xy
, arch
);
469 for (i
=1;i
<=maxperiod
;i
++)
471 yy
= yy
+MULT16_16(x
[-i
],x
[-i
])-MULT16_16(x
[N
-i
],x
[N
-i
]);
472 yy_lookup
[i
] = MAX32(0, yy
);
477 g
= g0
= compute_pitch_gain(xy
, xx
, yy
);
478 /* Look for any pitch at T/k */
485 T1
= celt_udiv(2*T0
+k
, 2*k
);
488 /* Look for another strong correlation at T1b */
497 T1b
= celt_udiv(2*second_check
[k
]*T0
+k
, 2*k
);
499 dual_inner_prod(x
, &x
[-T1
], &x
[-T1b
], N
, &xy
, &xy2
, arch
);
500 xy
= HALF32(xy
+ xy2
);
501 yy
= HALF32(yy_lookup
[T1
] + yy_lookup
[T1b
]);
502 g1
= compute_pitch_gain(xy
, xx
, yy
);
503 if (abs(T1
-prev_period
)<=1)
505 else if (abs(T1
-prev_period
)<=2 && 5*k
*k
< T0
)
506 cont
= HALF16(prev_gain
);
509 thresh
= MAX16(QCONST16(.3f
,15), MULT16_16_Q15(QCONST16(.7f
,15),g0
)-cont
);
510 /* Bias against very high pitch (very short period) to avoid false-positives
511 due to short-term correlation */
513 thresh
= MAX16(QCONST16(.4f
,15), MULT16_16_Q15(QCONST16(.85f
,15),g0
)-cont
);
514 else if (T1
<2*minperiod
)
515 thresh
= MAX16(QCONST16(.5f
,15), MULT16_16_Q15(QCONST16(.9f
,15),g0
)-cont
);
524 best_xy
= MAX32(0, best_xy
);
525 if (best_yy
<= best_xy
)
528 pg
= SHR32(frac_div32(best_xy
,best_yy
+1),16);
531 xcorr
[k
] = celt_inner_prod(x
, x
-(T
+k
-1), N
, arch
);
532 if ((xcorr
[2]-xcorr
[0]) > MULT16_32_Q15(QCONST16(.7f
,15),xcorr
[1]-xcorr
[0]))
534 else if ((xcorr
[0]-xcorr
[2]) > MULT16_32_Q15(QCONST16(.7f
,15),xcorr
[1]-xcorr
[2]))