8 #include "psycho_1_priv.h"
11 double dbtable
[DBTAB
];
13 /**********************************************************************
15 This module implements the psychoacoustic model I for the
16 MPEG encoder layer II. It uses simplified tonal and noise masking
17 threshold analysis to generate SMR for the encoder bit allocation
20 **********************************************************************/
22 void psycho_1 (short buffer
[2][1152], double scale
[2][SBLIMIT
],
23 double ltmin
[2][SBLIMIT
], frame_info
* frame
)
25 frame_header
*header
= frame
->header
;
27 int sblimit
= frame
->sblimit
;
28 int k
, i
, tone
= 0, noise
= 0;
30 static int off
[2] = { 256, 256 };
31 double sample
[FFT_SIZE
];
32 double spike
[2][SBLIMIT
];
33 static D1408
*fft_buf
;
34 static mask_ptr power
;
36 FLOAT energy
[FFT_SIZE
];
38 /* call functions for critical boundaries, freq. */
39 if (!init
) { /* bands, bark values, and mapping */
40 fft_buf
= (D1408
*) mem_alloc ((long) sizeof (D1408
) * 2, "fft_buf");
41 power
= (mask_ptr
) mem_alloc (sizeof (mask
) * HAN_SIZE
, "power");
42 if (header
->version
== MPEG_AUDIO_ID
) {
43 psycho_1_read_cbound (header
->lay
, header
->sampling_frequency
);
44 psycho_1_read_freq_band (<g
, header
->lay
, header
->sampling_frequency
);
46 psycho_1_read_cbound (header
->lay
, header
->sampling_frequency
+ 4);
47 psycho_1_read_freq_band (<g
, header
->lay
, header
->sampling_frequency
+ 4);
49 psycho_1_make_map (power
, ltg
);
50 for (i
= 0; i
< 1408; i
++)
51 fft_buf
[0][i
] = fft_buf
[1][i
] = 0;
53 psycho_1_init_add_db (); /* create the add_db table */
57 for (k
= 0; k
< nch
; k
++) {
58 /* check pcm input for 3 blocks of 384 samples */
59 /* sami's speedup, added in 02j
60 saves about 4% overall during an encode */
61 int ok
= off
[k
] % 1408;
62 for (i
= 0; i
< 1152; i
++) {
63 fft_buf
[k
][ok
++] = (double) buffer
[k
][i
] / SCALE
;
67 ok
= (off
[k
] + 1216) % 1408;
68 for (i
= 0; i
< FFT_SIZE
; i
++) {
69 sample
[i
] = fft_buf
[k
][ok
++];
76 psycho_1_hann_fft_pickmax (sample
, power
, &spike
[k
][0], energy
);
77 psycho_1_tonal_label (power
, &tone
);
78 psycho_1_noise_label (power
, &noise
, ltg
, energy
);
79 //psycho_1_dump(power, &tone, &noise) ;
80 psycho_1_subsampling (power
, ltg
, &tone
, &noise
);
81 psycho_1_threshold (power
, ltg
, &tone
, &noise
,
82 bitrate
[header
->version
][header
->bitrate_index
] / nch
);
83 psycho_1_minimum_mask (ltg
, <min
[k
][0], sblimit
);
84 psycho_1_smr (<min
[k
][0], &spike
[k
][0], &scale
[k
][0], sblimit
);
94 void psycho_1_read_cbound (int lay
, int freq
)
95 /* this function reads in critical band boundaries */
99 //static const int FirstCriticalBand[7][27] = {...
103 if ((lay
< 1) || (lay
> 2)) {
104 printf ("Internal error (read_cbound())\n");
107 if ((freq
< 0) || (freq
> 6) || (freq
== 3)) {
108 printf ("Internal error (read_cbound())\n");
112 crit_band
= SecondCriticalBand
[freq
][0];
113 cbound
= (int *) mem_alloc (sizeof (int) * crit_band
, "cbound");
114 for (i
= 0; i
< crit_band
; i
++) {
115 k
= SecondCriticalBand
[freq
][i
+ 1];
119 printf ("Internal error (read_cbound())\n");
125 void psycho_1_read_freq_band (ltg
, lay
, freq
) /* this function reads in */
126 int lay
, freq
; /* frequency bands and bark */
127 g_ptr
*ltg
; /* values */
130 #include "freqtable.h"
134 if ((freq
< 0) || (freq
> 6) || (freq
== 3)) {
135 printf ("Internal error (read_freq_band())\n");
139 /* read input for freq. subbands */
141 sub_size
= SecondFreqEntries
[freq
] + 1;
142 *ltg
= (g_ptr
) mem_alloc (sizeof (g_thres
) * sub_size
, "ltg");
143 (*ltg
)[0].line
= 0; /* initialize global masking threshold */
144 (*ltg
)[0].bark
= 0.0;
145 (*ltg
)[0].hear
= 0.0;
146 for (i
= 1; i
< sub_size
; i
++) {
147 k
= SecondFreqSubband
[freq
][i
- 1].line
;
150 (*ltg
)[i
].bark
= SecondFreqSubband
[freq
][i
- 1].bark
;
151 (*ltg
)[i
].hear
= SecondFreqSubband
[freq
][i
- 1].hear
;
153 printf ("Internal error (read_freq_band())\n");
160 void psycho_1_make_map (mask power
[HAN_SIZE
], g_thres
* ltg
)
161 /* this function calculates the global masking threshold */
165 for (i
= 1; i
< sub_size
; i
++)
166 for (j
= ltg
[i
- 1].line
; j
<= ltg
[i
].line
; j
++)
170 void psycho_1_init_add_db (void)
174 for (i
= 0; i
< DBTAB
; i
++) {
175 x
= (double) i
/ 10.0;
176 dbtable
[i
] = 10 * log10 (1 + pow (10.0, x
/ 10.0)) - x
;
180 INLINE
double add_db (double a
, double b
)
182 /* MFC - if the difference between a and b is large (>99), then just return the
183 largest one. (about 10% of the time)
184 - For differences between 0 and 99, return the largest value, but add
185 in a pre-calculated difference value.
186 - the value 99 was chosen arbitarily.
187 - maximum (a-b) i've seen is 572 */
190 fdiff
= (10.0 * (a
- b
));
195 if (fdiff
< -990.0) {
201 return (a
+ dbtable
[idiff
]);
204 return (b
+ dbtable
[-idiff
]);
207 /****************************************************************
208 * Window the samples then,
209 * Fast Fourier transform of the input samples.
211 * ( call the FHT-based fft() in fft.c )
214 ****************************************************************/
215 void psycho_1_hann_fft_pickmax (double sample
[FFT_SIZE
], mask power
[HAN_SIZE
],
216 double spike
[SBLIMIT
], FLOAT energy
[FFT_SIZE
])
218 FLOAT x_real
[FFT_SIZE
];
220 register double sqrt_8_over_3
;
222 static double *window
;
225 if (!init
) { /* calculate window function for the Fourier transform */
226 window
= (double *) mem_alloc (sizeof (DFFT
), "window");
227 sqrt_8_over_3
= pow (8.0 / 3.0, 0.5);
228 for (i
= 0; i
< FFT_SIZE
; i
++) {
229 /* Hann window formula */
231 sqrt_8_over_3
* 0.5 * (1 -
232 cos (2.0 * PI
* i
/ (FFT_SIZE
))) / FFT_SIZE
;
236 for (i
= 0; i
< FFT_SIZE
; i
++)
237 x_real
[i
] = (FLOAT
) (sample
[i
] * window
[i
]);
239 psycho_1_fft (x_real
, energy
, FFT_SIZE
);
241 for (i
= 0; i
< HAN_SIZE
; i
++) { /* calculate power density spectrum */
242 if (energy
[i
] < 1E-20)
243 power
[i
].x
= -200.0 + POWERNORM
;
245 power
[i
].x
= 10 * log10 (energy
[i
]) + POWERNORM
;
246 power
[i
].next
= STOP
;
247 power
[i
].type
= FALSE
;
250 /* Calculate the sum of spectral component in each subband from bound 4-16 */
252 #define CF 1073741824 /* pow(10, 0.1*POWERNORM) */
253 #define DBM 1E-20 /* pow(10.0, 0.1*DBMIN */
254 for (i
= 0; i
< HAN_SIZE
; spike
[i
>> 4] = 10.0 * log10 (sum
), i
+= 16) {
255 for (j
= 0, sum
= DBM
; j
< 16; j
++)
256 sum
+= CF
* energy
[i
+ j
];
260 /****************************************************************
262 * This function labels the tonal component in the power
265 ****************************************************************/
267 void psycho_1_tonal_label (mask power
[HAN_SIZE
], int *tone
)
268 /* this function extracts (tonal) sinusoidals from the spectrum */
270 int i
, j
, last
= LAST
, first
, run
, last_but_one
= LAST
; /* dpwe */
274 for (i
= 2; i
< HAN_SIZE
- 12; i
++) {
275 if (power
[i
].x
> power
[i
- 1].x
&& power
[i
].x
>= power
[i
+ 1].x
) {
276 power
[i
].type
= TONE
;
277 power
[i
].next
= LAST
;
279 power
[last
].next
= i
;
288 while ((first
!= LAST
) && (first
!= STOP
)) { /* the conditions for the tonal */
289 if (first
< 3 || first
> 500)
290 run
= 0; /* otherwise k+/-j will be out of bounds */
292 run
= 2; /* components in layer II, which */
293 else if (first
< 127)
294 run
= 3; /* are the boundaries for calc. */
295 else if (first
< 255)
296 run
= 6; /* the tonal components */
299 max
= power
[first
].x
- 7; /* after calculation of tonal */
300 for (j
= 2; j
<= run
; j
++) /* components, set to local max */
301 if (max
< power
[first
- j
].x
|| max
< power
[first
+ j
].x
) {
302 power
[first
].type
= FALSE
;
305 if (power
[first
].type
== TONE
) { /* extract tonal components */
309 while ((power
[help
].next
!= LAST
) && (power
[help
].next
- first
) <= run
)
310 help
= power
[help
].next
;
311 help
= power
[help
].next
;
312 power
[first
].next
= help
;
313 if ((first
- last
) <= run
) {
314 if (last_but_one
!= LAST
)
315 power
[last_but_one
].next
= first
;
317 if (first
> 1 && first
< 500) { /* calculate the sum of the */
318 double tmp
; /* powers of the components */
319 tmp
= add_db (power
[first
- 1].x
, power
[first
+ 1].x
);
320 power
[first
].x
= add_db (power
[first
].x
, tmp
);
322 for (j
= 1; j
<= run
; j
++) {
323 power
[first
- j
].x
= power
[first
+ j
].x
= DBMIN
;
324 power
[first
- j
].next
= power
[first
+ j
].next
= STOP
;
325 power
[first
- j
].type
= power
[first
+ j
].type
= FALSE
;
329 first
= power
[first
].next
;
332 if (last
== LAST
); /* *tone = power[first].next; dpwe */
334 power
[last
].next
= power
[first
].next
;
336 first
= power
[first
].next
;
337 power
[ll
].next
= STOP
;
342 /****************************************************************
344 * This function groups all the remaining non-tonal
345 * spectral lines into critical band where they are replaced by
348 ****************************************************************/
350 void psycho_1_noise_label (mask
* power
, int *noise
, g_thres
* ltg
,
351 FLOAT energy
[FFT_SIZE
])
353 int i
, j
, centre
, last
= LAST
;
354 double index
, weight
, sum
;
355 /* calculate the remaining spectral */
356 for (i
= 0; i
< crit_band
- 1; i
++) { /* lines for non-tonal components */
357 for (j
= cbound
[i
], weight
= 0.0, sum
= DBMIN
; j
< cbound
[i
+ 1]; j
++) {
358 if (power
[j
].type
!= TONE
) {
359 if (power
[j
].x
!= DBMIN
) {
360 sum
= add_db (power
[j
].x
, sum
);
361 /* Weight is used in finding the geometric mean of the noise energy within a subband */
362 weight
+= CF
* energy
[j
] * (double) (j
- cbound
[i
]) / (double) (cbound
[i
+ 1] - cbound
[i
]); /* correction */
365 } /* check to see if the spectral line is low dB, and if */
366 } /* so replace the center of the critical band, which is */
367 /* the center freq. of the noise component */
370 centre
= (cbound
[i
+ 1] + cbound
[i
]) / 2;
372 /* fprintf(stderr, "%i [%f %f] -", count++,weight/pow(10.0,0.1*sum), weight*pow(10.0,-0.1*sum)); */
373 index
= weight
* pow (10.0, -0.1 * sum
);
375 cbound
[i
] + (int) (index
* (double) (cbound
[i
+ 1] - cbound
[i
]));
379 /* locate next non-tonal component until finished; */
380 /* add to list of non-tonal components */
382 /* Masahiro Iwadare's fix for infinite looping problem? */
383 if (power
[centre
].type
== TONE
) {
384 if (power
[centre
+ 1].type
== TONE
) {
393 power
[centre
].next
= LAST
;
394 power
[last
].next
= centre
;
396 power
[centre
].x
= sum
;
397 power
[centre
].type
= NOISE
;
402 /****************************************************************
404 * This function reduces the number of noise and tonal
405 * component for further threshold analysis.
407 ****************************************************************/
409 void psycho_1_subsampling (mask power
[HAN_SIZE
], g_thres
* ltg
, int *tone
, int *noise
)
414 old
= STOP
; /* calculate tonal components for */
416 while ((i
!= LAST
) && (i
!= STOP
))
417 { /* reduction of spectral lines */
418 if (power
[i
].x
< ltg
[power
[i
].map
].hear
) {
419 power
[i
].type
= FALSE
;
422 *tone
= power
[i
].next
;
424 power
[old
].next
= power
[i
].next
;
430 old
= STOP
; /* calculate non-tonal components for */
431 while ((i
!= LAST
) && (i
!= STOP
)) { /* reduction of spectral lines */
432 if (power
[i
].x
< ltg
[power
[i
].map
].hear
) {
433 power
[i
].type
= FALSE
;
436 *noise
= power
[i
].next
;
438 power
[old
].next
= power
[i
].next
;
445 while ((i
!= LAST
) && (i
!= STOP
))
446 { /* if more than one */
447 if (power
[i
].next
== LAST
)
448 break; /* tonal component */
449 if (ltg
[power
[power
[i
].next
].map
].bark
- /* is less than .5 */
450 ltg
[power
[i
].map
].bark
< 0.5) { /* bark, take the */
451 if (power
[power
[i
].next
].x
> power
[i
].x
) { /* maximum */
453 *tone
= power
[i
].next
;
455 power
[old
].next
= power
[i
].next
;
456 power
[i
].type
= FALSE
;
460 power
[power
[i
].next
].type
= FALSE
;
461 power
[power
[i
].next
].x
= DBMIN
;
462 power
[i
].next
= power
[power
[i
].next
].next
;
472 /****************************************************************
474 * This function calculates the individual threshold and
475 * sum with the quiet threshold to find the global threshold.
477 ****************************************************************/
479 /* mainly just changed the way range checking was done MFC Nov 1999 */
480 void psycho_1_threshold (mask power
[HAN_SIZE
], g_thres
* ltg
, int *tone
, int *noise
,
486 for (k
= 1; k
< sub_size
; k
++) {
488 t
= *tone
; /* calculate individual masking threshold for */
489 while ((t
!= LAST
) && (t
!= STOP
))
490 { /* components in order to find the global */
491 dz
= ltg
[k
].bark
- ltg
[power
[t
].map
].bark
; /* distance of bark value */
492 if (dz
>= -3.0 && dz
< 8.0) {
493 tmps
= -1.525 - 0.275 * ltg
[power
[t
].map
].bark
- 4.5 + power
[t
].x
;
494 /* masking function for lower & upper slopes */
496 vf
= 17 * (dz
+ 1) - (0.4 * power
[t
].x
+ 6);
498 vf
= (0.4 * power
[t
].x
+ 6) * dz
;
502 vf
= -(dz
- 1) * (17 - 0.15 * power
[t
].x
) - 17;
503 ltg
[k
].x
= add_db (ltg
[k
].x
, tmps
+ vf
);
508 t
= *noise
; /* calculate individual masking threshold */
509 while ((t
!= LAST
) && (t
!= STOP
)) { /* for non-tonal components to find LTG */
510 dz
= ltg
[k
].bark
- ltg
[power
[t
].map
].bark
; /* distance of bark value */
511 if (dz
>= -3.0 && dz
< 8.0) {
512 tmps
= -1.525 - 0.175 * ltg
[power
[t
].map
].bark
- 0.5 + power
[t
].x
;
513 /* masking function for lower & upper slopes */
515 vf
= 17 * (dz
+ 1) - (0.4 * power
[t
].x
+ 6);
517 vf
= (0.4 * power
[t
].x
+ 6) * dz
;
521 vf
= -(dz
- 1) * (17 - 0.15 * power
[t
].x
) - 17;
522 ltg
[k
].x
= add_db (ltg
[k
].x
, tmps
+ vf
);
527 ltg
[k
].x
= add_db (ltg
[k
].hear
, ltg
[k
].x
);
529 ltg
[k
].x
= add_db (ltg
[k
].hear
- 12.0, ltg
[k
].x
);
534 /****************************************************************
536 * This function finds the minimum masking threshold and
537 * return the value to the encoder.
539 ****************************************************************/
541 void psycho_1_minimum_mask (g_thres
* ltg
, double ltmin
[SBLIMIT
], int sblimit
)
547 for (i
= 0; i
< sblimit
; i
++)
548 if (j
>= sub_size
- 1) /* check subband limit, and */
549 ltmin
[i
] = ltg
[sub_size
- 1].hear
; /* calculate the minimum masking */
550 else { /* level of LTMIN for each subband */
552 while (ltg
[j
].line
>> 4 == i
&& j
< sub_size
) {
561 /*****************************************************************
563 * This procedure is called in musicin to pick out the
564 * smaller of the scalefactor or threshold.
566 *****************************************************************/
568 void psycho_1_smr (double ltmin
[SBLIMIT
], double spike
[SBLIMIT
], double scale
[SBLIMIT
],
574 for (i
= 0; i
< sblimit
; i
++) { /* determine the signal */
575 max
= 20 * log10 (scale
[i
] * 32768) - 10; /* level for each subband */
577 max
= spike
[i
]; /* for the maximum scale */
578 max
-= ltmin
[i
]; /* factors */
583 void psycho_1_dump(mask power
[HAN_SIZE
], int *tone
, int *noise
) {
586 fprintf(stdout
,"1 Ton: ");
588 while (t
!=LAST
&& t
!=STOP
) {
589 fprintf(stdout
,"[%i] %3.0f ",t
, power
[t
].x
);
592 fprintf(stdout
,"\n");
594 fprintf(stdout
,"1 Nos: ");
596 while (t
!=LAST
&& t
!=STOP
) {
597 fprintf(stdout
,"[%i] %3.0f ",t
, power
[t
].x
);
600 fprintf(stdout
,"\n");