Merge r14152 from theora-exp.
[xiph/unicode.git] / postfish / declip.c
blobecf42cac8d7c989b3695801ace78ce9be109511c
1 /*
3 * postfish
4 *
5 * Copyright (C) 2002-2005 Monty and Xiph.Org
7 * Postfish is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2, or (at your option)
10 * any later version.
12 * Postfish is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Postfish; see the file COPYING. If not, write to the
19 * Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
24 #include "postfish.h"
25 #include <math.h>
26 #include <sys/types.h>
27 #include <fftw3.h>
28 #include "reconstruct.h"
29 #include "window.h"
30 #include "feedback.h"
32 extern int input_rate;
33 extern int input_ch;
34 extern int input_size;
36 /* accessed only in playback thread/setup */
38 static fftwf_plan fftwf_weight;
40 static float *work;
41 static float *freq;
43 static int blocksize=0;
44 static int lopad=0,hipad=0;
45 static u_int32_t *widthlookup=0;
47 static float *leftwindow=0;
48 static float *rightwindow=0;
49 static int left=0;
50 static int right=0;
51 static float width=.5;
52 static float **lap;
53 static float **cache;
54 static u_int32_t cache_active;
55 static int cache_samples;
56 static int fillstate=0; /* 0: uninitialized
57 1: normal
58 2: eof processed */
59 static time_linkage out;
61 /* accessed across threads */
62 sig_atomic_t *declip_active;
63 int *declip_prev_active;
65 sig_atomic_t declip_visible=0;
67 sig_atomic_t *declip_chtrigger=0;
68 sig_atomic_t declip_pending_blocksize=0;
69 sig_atomic_t declip_convergence=0;
70 sig_atomic_t declip_iterations=0;
72 /* feedback! */
73 typedef struct declip_feedback{
74 feedback_generic parent_class;
75 float *peak;
76 int *clipcount;
77 int *total;
78 } declip_feedback;
80 static feedback_generic_pool feedpool;
82 static feedback_generic *new_declip_feedback(void){
83 declip_feedback *ret=malloc(sizeof(*ret));
84 ret->clipcount=malloc((input_ch)*sizeof(*ret->clipcount));
85 ret->peak=malloc((input_ch)*sizeof(*ret->peak));
86 ret->total=malloc((input_ch)*sizeof(*ret->total));
87 return (feedback_generic *)ret;
90 static void push_declip_feedback(int *clip,float *peak,int *total){
91 int n=input_ch;
92 declip_feedback *f=(declip_feedback *)
93 feedback_new(&feedpool,new_declip_feedback);
94 memcpy(f->clipcount,clip,n*sizeof(*clip));
95 memcpy(f->peak,peak,n*sizeof(*peak));
96 memcpy(f->total,total,n*sizeof(*total));
97 feedback_push(&feedpool,(feedback_generic *)f);
100 int pull_declip_feedback(int *clip,float *peak,int *total){
101 declip_feedback *f=(declip_feedback *)feedback_pull(&feedpool);
103 if(!f)return 0;
105 if(clip)memcpy(clip,f->clipcount,sizeof(*clip)*input_ch);
106 if(peak)memcpy(peak,f->peak,sizeof(*peak)*input_ch);
107 if(total)memcpy(total,f->total,sizeof(*total)*input_ch);
109 feedback_old(&feedpool,(feedback_generic *)f);
110 return 1;
113 static void apply_window(float *out,float *data,int sq){
114 int i,j;
115 int half=max(left,right);
117 for(i=0;i<half-left;i++)
118 out[i]=0;
120 if(sq){
121 for(j=0;i<half;i++,j++)
122 out[i]=data[i]*leftwindow[j]*leftwindow[j];
123 for(j=0;j<right;i++,j++)
124 out[i]=data[i]*rightwindow[right-j]*rightwindow[right-j];
125 }else{
126 for(j=0;i<half;i++,j++)
127 out[i]=data[i]*leftwindow[j];
128 for(j=0;j<right;i++,j++)
129 out[i]=data[i]*rightwindow[right-j];
132 for(;i<half*2;i++)
133 out[i]=0;
136 static void setup_window(int lleft,int lright){
137 left=lleft;
138 right=lright;
140 leftwindow=window_get(2,left);
141 rightwindow=window_get(2,right);
144 static void setup_blocksize(int newblocksize){
145 int i;
147 if(blocksize)fftwf_destroy_plan(fftwf_weight);
148 blocksize=newblocksize;
149 fftwf_weight=fftwf_plan_dft_r2c_1d(blocksize*2,work,
150 (fftwf_complex *)freq,
151 FFTW_MEASURE);
153 lopad=1-rint(fromBark(toBark(0.)-width)*blocksize*2/input_rate);
154 hipad=rint(fromBark(toBark(input_rate*.5)+width)*blocksize*2/input_rate)+lopad;
155 for(i=0;i<blocksize;i++){
156 float bark=toBark(input_rate*i/(blocksize*2));
157 int hi=rint(fromBark(bark-width)*(blocksize*2)/input_rate)-1+lopad;
158 int lo=rint(fromBark(bark+width)*(blocksize*2)/input_rate)+1+lopad;
159 widthlookup[i]=(hi<<16)+lo;
162 reconstruct_reinit(blocksize*2);
165 /* called only by initial setup */
166 int declip_load(void){
167 int i,j;
168 declip_active=calloc(input_ch,sizeof(*declip_active));
169 declip_prev_active=calloc(input_ch,sizeof(*declip_prev_active));
170 declip_chtrigger=malloc(input_ch*sizeof(*declip_chtrigger));
171 for(i=0;i<input_ch;i++)
172 declip_chtrigger[i]=10000;
174 out.channels=input_ch;
175 out.data=malloc(input_ch*sizeof(*out.data));
176 for(i=0;i<input_ch;i++)
177 out.data[i]=malloc(input_size*sizeof(**out.data));
179 fillstate=0;
180 cache=malloc(input_ch*sizeof(*cache));
181 for(i=0;i<input_ch;i++)
182 cache[i]=malloc(input_size*sizeof(**cache));
184 lap=malloc(input_ch*sizeof(*lap));
185 for(i=0;i<input_ch;i++)
186 lap[i]=malloc(input_size*sizeof(**lap));
189 /* alloc for largest possible blocksize */
190 int blocksize=input_size*2;
191 int loestpad=1-rint(fromBark(toBark(0.)-width)*blocksize*2/input_rate);
192 int hiestpad=rint(fromBark(toBark(input_rate*.5)+width)*blocksize*2/input_rate)+loestpad;
193 widthlookup=malloc((hiestpad+1)*sizeof(*widthlookup));
194 freq=fftwf_malloc((blocksize*2+2)*sizeof(*freq));
195 work=fftwf_malloc((blocksize*2)*sizeof(*work));
197 for(i=0,j=32;j<=blocksize*2;i++,j*=2){
198 fftwf_weight=fftwf_plan_dft_r2c_1d(j,work,
199 (fftwf_complex *)freq,
200 FFTW_MEASURE);
201 fftwf_destroy_plan(fftwf_weight);
204 reconstruct_init(32,input_size*4);
206 declip_pending_blocksize=input_size*2;
207 return(0);
210 /* called only in playback thread */
211 int declip_reset(void){
212 /* reset cached pipe state */
213 fillstate=0;
214 while(pull_declip_feedback(NULL,NULL,NULL));
215 return 0;
218 int noisy=0;
219 static void sliding_bark_average(float *f,int n,float width){
220 int i=0;
221 double acc=0.,del=0.;
222 double sec[hipad+1];
224 memset(sec,0,sizeof(sec));
226 for(i=0;i<n/2;i++){
228 int hi=widthlookup[i]>>16;
229 int lo=widthlookup[i]&(0xffff);
230 float del=hypot(f[(i<<1)+1],f[i<<1])/(lo-hi);
232 float hidel=del/((i-hi+lopad));
233 float lodel=del/((lo-i-lopad));
235 sec[hi]+=hidel;
236 sec[i+lopad]-=hidel;
237 sec[i+lopad]-=lodel;
238 sec[lo]+=lodel;
242 for(i=0;i<lopad;i++){
243 del+=sec[i];
244 acc+=del;
247 for(i=0;i<n/2;i++){
248 f[(i<<1)+1]=f[i<<1]=1./(acc*acc);
249 del+=sec[i+lopad];
250 acc+=del;
252 f[n+1]=f[n]=f[n-1];
255 /* work,freq are passed through the static buffer fftwf requires */
256 static void declip(int blocksize,float trigger,
257 float epsilon, float iteration,
258 int *runningtotal, int *runningcount){
259 float flag[blocksize*2];
260 int iterbound,i,count=0;
262 /* too many apps screw up proper output scaling, so previously
263 clipped audio ends up getting rounded to just short of the rail
264 upon export. To avoid users accidentally shooting themselves in
265 the foot, trigger clipping at -.05dB rather than 0dB even if 0dB
266 is selected. This corresponds to mis-rounding 8 bit audio by 1.5
267 steps.*/
268 if(trigger>.99426)trigger=.99426;
270 for(i=blocksize/2;i<blocksize*3/2;i++){
271 flag[i]=0.;
272 if(work[i]>=trigger || work[i]<=-trigger){
273 flag[i]=1.;
274 count++;
279 *runningtotal+=blocksize;
280 *runningcount+=count;
282 if(count){
283 for(i=0;i<blocksize/2;i++)flag[i]=0.;
284 for(i=blocksize*3/2;i<blocksize*2;i++)flag[i]=0.;
285 apply_window(work+blocksize/2,work+blocksize/2,0);
287 fftwf_execute(fftwf_weight);
288 sliding_bark_average(freq,blocksize*2,width);
289 iterbound=blocksize*iteration;
290 if(iterbound<10)iterbound=10;
292 reconstruct(work,freq,flag,epsilon,iterbound);
294 apply_window(work+blocksize/2,work+blocksize/2,0);
295 }else
296 apply_window(work+blocksize/2,work+blocksize/2,1);
300 /* called only by playback thread */
301 time_linkage *declip_read(time_linkage *in){
302 int i,j,k;
303 float local_trigger[input_ch];
304 int count[input_ch];
305 int total[input_ch];
306 float peak[input_ch];
307 u_int32_t active=0;
308 int next_blocksize=declip_pending_blocksize;
309 int orig_blocksize;
311 float local_convergence;
312 float local_iterations;
314 for(i=0;i<input_ch;i++)
315 local_trigger[i]=declip_chtrigger[i]*.0001;
316 local_iterations=declip_iterations*.0001;
317 local_convergence=fromdB(declip_convergence*.1);
319 memset(count,0,sizeof(count));
320 memset(peak,0,sizeof(peak));
321 memset(total,0,sizeof(total));
323 switch(fillstate){
324 case 0: /* prime the lapping and cache */
326 /* set up for the blocksize we're actually using for now */
328 setup_blocksize(next_blocksize);
329 setup_window(blocksize/2,blocksize/2);
332 for(i=0;i<input_ch;i++){
333 int channel_active=declip_active[i];
334 declip_prev_active[i]=channel_active;
336 /* peak feedback */
337 if(declip_visible && !mute_channel_muted(in->active,i)){
338 float *l=in->data[i];
339 for(j=0;j<in->samples;j++)
340 if(fabs(l[j])>peak[i])peak[i]=fabs(l[j]);
343 if(channel_active){
345 /* fill work with the block spanning cache/in (first 1/4, last 1/4 are zeroed) */
346 memset(work,0,sizeof(*work)*blocksize);
347 memcpy(work+blocksize,in->data[i],sizeof(*work)*blocksize/2);
348 memset(work+blocksize+blocksize/2,0,sizeof(*work)*blocksize/2);
349 declip(blocksize,local_trigger[i],local_convergence,local_iterations,
350 total+i,count+i);
352 /* second half of work goes to lap */
353 memcpy(lap[i],work+blocksize,sizeof(*work)*blocksize/2);
355 /* now iterate the pieces purely within in */
356 for(j=0;j+blocksize<=input_size;j+=blocksize/2){
357 memset(work,0,sizeof(*work)*blocksize);
358 memcpy(work+blocksize/2,in->data[i]+j,sizeof(*work)*blocksize);
359 memset(work+blocksize+blocksize/2,0,sizeof(*work)*blocksize/2);
361 declip(blocksize,local_trigger[i],local_convergence,local_iterations,
362 total+i,count+i);
364 /* second half of work goes to lap */
366 float *llap=lap[i]+j;
367 float *lwork=work+blocksize/2;
368 for(k=0;k<blocksize/2;k++)
369 llap[k]+=lwork[k];
370 memcpy(llap+k,lwork+k,sizeof(*work)*blocksize/2);
374 }//else no declipping to do, so direct cache/lap buffer rotation */
377 float *temp=cache[i];
378 cache[i]=in->data[i];
379 in->data[i]=temp;
380 memset(temp,0,sizeof(*temp)*input_size);
383 cache_samples=in->samples;
384 cache_active=in->active;
385 fillstate=1;
386 out.samples=0;
387 if(in->samples==input_size)break;
389 for(i=0;i<input_ch;i++)
390 memset(in->data[i],0,sizeof(**in->data)*input_size);
391 in->samples=0;
392 /* fall through */
394 case 1: /* nominal processing */
395 orig_blocksize=blocksize;
397 /* the 'gap' transition and finishing off the output block is done
398 first as it may need to handle a blocksize transition (and a
399 temporary transition window */
401 if(next_blocksize != orig_blocksize){
402 if(next_blocksize > orig_blocksize) setup_blocksize(next_blocksize);
403 setup_window(orig_blocksize/2,next_blocksize/2);
406 /* the gap piece is also special in that it may need to deal with
407 a transition to/from bypass */
409 for(i=0;i<input_ch;i++){
410 int channel_active=declip_active[i];
412 /* peak feedback */
413 if(declip_visible && !mute_channel_muted(in->active,i)){
414 float *l=in->data[i];
415 for(j=0;j<in->samples;j++)
416 if(fabs(l[j])>peak[i])peak[i]=fabs(l[j]);
420 if(mute_channel_muted(cache_active,i)){
422 /* we may need cache for a later transition, so keep it up to date */
423 float *temp=cache[i];
424 cache[i]=in->data[i];
425 in->data[i]=temp;
427 /* zero the lap */
428 if(!mute_channel_muted(in->active,i))
429 memset(lap[i],0,sizeof(*lap[i])*input_size);
431 }else{
433 active|=(1<<i); /* audible output in out.data[i] */
435 if(mute_channel_muted(in->active,i)){
436 if(declip_prev_active[i]){
437 /* Cache: Muted=False, Bypass=False
438 Input: Muted=True, Bypass=X */
440 /* transition to mute, so lap is finished output. Rotate all */
441 float *temp=cache[i];
442 cache[i]=in->data[i];
443 in->data[i]=temp;
445 temp=out.data[i];
446 out.data[i]=lap[i];
447 lap[i]=temp;
448 }else{
449 /* Cache: Muted=False, Bypass=True
450 Input: Muted=True, Bypass=X */
452 /* rotate in/cache/out, transition out */
453 float *temp=out.data[i];
454 out.data[i]=cache[i];
455 cache[i]=in->data[i];
456 in->data[i]=temp;
459 }else{
460 if(!declip_prev_active[i]){
461 if(!channel_active){
462 /* Cache: Muted=False, Bypass=True
463 Input: Muted=False, Bypass=True */
465 /* all bypass! rotate in/cache/out */
466 float *temp=out.data[i];
467 out.data[i]=cache[i];
468 cache[i]=in->data[i];
469 in->data[i]=temp;
470 }else{
471 /* Cache: Muted=False, Bypass=True
472 Input: Muted=False, Bypass=False */
474 /* transition the lap; right window to left of in */
475 for(j=0;j<right;j++)
476 lap[i][j]=in->data[i][j]*
477 rightwindow[right-j]*rightwindow[right-j];
479 /* all rotate in/cache/out */
480 float *temp=out.data[i];
481 out.data[i]=cache[i];
482 cache[i]=in->data[i];
483 in->data[i]=temp;
485 }else{
486 if(!channel_active){
487 /* Cache: Muted=False, Bypass=False
488 Input: Muted=False, Bypass=True */
490 /* finish off lap, then rotate all */
491 /* left window to end of cache */
492 for(j=input_size-left,k=0;j<input_size;j++,k++)
493 lap[i][j]+=cache[i][j]*leftwindow[k]*leftwindow[k];
494 float *temp=cache[i];
495 cache[i]=in->data[i];
496 in->data[i]=temp;
498 temp=out.data[i];
499 out.data[i]=lap[i];
500 lap[i]=temp;
501 }else{
502 /* Cache: Muted=False, Bypass=False
503 Input: Muted=False, Bypass=False */
505 /* nominal case; the only one involving declipping the gap */
506 memset(work,0,sizeof(*work)*blocksize/2);
507 memcpy(work+blocksize/2,cache[i]+input_size-blocksize/2,sizeof(*work)*blocksize/2);
508 memcpy(work+blocksize,in->data[i],sizeof(*work)*blocksize/2);
509 memset(work+blocksize+blocksize/2,0,sizeof(*work)*blocksize/2);
511 declip(blocksize,local_trigger[i],local_convergence,local_iterations,
512 total+i,count+i);
514 /* finish lap from last frame */
516 float *llap=lap[i]+input_size-blocksize/2;
517 float *lwork=work+blocksize/2;
518 for(j=0;j<blocksize/2;j++)
519 llap[j]+=lwork[j];
521 /* rotate buffers */
522 float *temp=out.data[i];
523 out.data[i]=lap[i];
524 lap[i]=temp;
526 temp=in->data[i];
527 in->data[i]=cache[i];
528 cache[i]=temp;
530 /* begin lap for this frame */
531 memcpy(lap[i],work+blocksize,sizeof(*work)*blocksize/2);
537 declip_prev_active[i]=channel_active;
540 /* also rotate metadata */
541 out.samples=cache_samples;
542 cache_samples=in->samples;
543 cache_active=in->active;
545 /* finish transition to new blocksize (if a change is in progress) */
546 if(next_blocksize != orig_blocksize){
547 if(next_blocksize <= orig_blocksize) setup_blocksize(next_blocksize);
548 setup_window(blocksize/2,blocksize/2);
551 /* declip the rest of the current frame */
552 for(i=0;i<input_ch;i++){
553 if(!mute_channel_muted(cache_active,i)){
554 /* declip */
555 if(declip_prev_active[i]){
557 for(j=0;j+blocksize<=input_size;j+=blocksize/2){
558 memset(work,0,sizeof(*work)*blocksize);
559 memcpy(work+blocksize/2,cache[i]+j,sizeof(*work)*blocksize);
560 memset(work+blocksize+blocksize/2,0,sizeof(*work)*blocksize/2);
561 declip(blocksize,local_trigger[i],local_convergence,local_iterations,
562 total+i,count+i);
565 float *llap=lap[i]+j;
566 float *lwork=work+blocksize/2;
567 for(k=0;k<blocksize/2;k++)
568 llap[k]+=lwork[k];
569 memcpy(llap+k,lwork+k,sizeof(*work)*blocksize/2);
575 if(out.samples<input_size)fillstate=2;
576 break;
577 case 2: /* we've pushed out EOF already */
578 out.samples=0;
581 push_declip_feedback(count,peak,total); /* we can push one (and
582 exactly one) for every
583 block that comes in *or*
584 one for every block that
585 goes out. In declip,
586 it's for every block that
587 comes in */
590 int tozero=input_size-out.samples;
591 if(tozero)
592 for(i=0;i<out.channels;i++)
593 memset(out.data[i]+out.samples,0,sizeof(**out.data)*tozero);
596 out.active=active;
597 return &out;