From 2f147c3280b2a726e3112ae802c08705e3ab09db Mon Sep 17 00:00:00 2001 From: saratoga Date: Sun, 10 May 2009 23:18:04 +0000 Subject: [PATCH] Patch by Mohamed Tarek from FS #10182. Remove floating point code (FFT, MDCT, etc) from libcook. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@20902 a1c6a512-1295-4272-9138-f99709370657 --- apps/codecs/libcook/Makefile.test | 2 +- apps/codecs/libcook/cook.c | 267 ----------- apps/codecs/libcook/cook.h | 17 - apps/codecs/libcook/dsputil.h | 908 -------------------------------------- apps/codecs/libcook/fft.c | 374 ---------------- apps/codecs/libcook/main.c | 2 +- apps/codecs/libcook/mdct.c | 229 ---------- 7 files changed, 2 insertions(+), 1797 deletions(-) rewrite apps/codecs/libcook/dsputil.h (100%) rewrite apps/codecs/libcook/fft.c (100%) rewrite apps/codecs/libcook/mdct.c (100%) diff --git a/apps/codecs/libcook/Makefile.test b/apps/codecs/libcook/Makefile.test index 5b60ca3ef..d992eceeb 100644 --- a/apps/codecs/libcook/Makefile.test +++ b/apps/codecs/libcook/Makefile.test @@ -1,5 +1,5 @@ CFLAGS = -Wall -O3 -OBJS = main.o bitstream.o cook.o fft.o libavutil/log.o mdct.o libavutil/mem.o libavutil/lfg.o libavutil/md5.o rm2wav.o +OBJS = main.o bitstream.o cook.o libavutil/log.o libavutil/mem.o libavutil/lfg.o libavutil/md5.o rm2wav.o cooktest: $(OBJS) gcc -o cooktest $(OBJS) -lm diff --git a/apps/codecs/libcook/cook.c b/apps/codecs/libcook/cook.c index 8bb3b5a11..bd72179cb 100644 --- a/apps/codecs/libcook/cook.c +++ b/apps/codecs/libcook/cook.c @@ -71,23 +71,11 @@ const uint8_t ff_log2_tab[256]={ #define MAX_SUBPACKETS 5 //#define COOKDEBUG #define DEBUGF(message,args ...) av_log(NULL,AV_LOG_ERROR,message,## args) - -static float pow2tab[127]; -static float rootpow2tab[127]; #include "cook_fixpoint.h" /* debug functions */ #ifdef COOKDEBUG -static void dump_float_table(float* table, int size, int delimiter) { - int i=0; - av_log(NULL,AV_LOG_ERROR,"\n[%d]: ",i); - for (i=0 ; igain_size_factor = q->samples_per_channel/8; - for (i=0 ; i<23 ; i++) { - q->gain_table[i] = pow(pow2tab[i+52] , - (1.0/(double)q->gain_size_factor)); - } -} - - static av_cold int init_cook_vlc_tables(COOKContext *q) { int i, result; @@ -156,42 +123,6 @@ static av_cold int init_cook_vlc_tables(COOKContext *q) { av_log(NULL,AV_LOG_ERROR,"VLC tables initialized. Result = %d\n",result); return result; } - -static av_cold int init_cook_mlt(COOKContext *q) { - int j; - int mlt_size = q->samples_per_channel; - - if ((q->mlt_window = av_malloc(sizeof(float)*mlt_size)) == 0) - return -1; - - /* Initialize the MLT window: simple sine window. */ - ff_sine_window_init(q->mlt_window, mlt_size); - for(j=0 ; jmlt_window[j] *= sqrt(2.0 / q->samples_per_channel); - - /* Initialize the MDCT. */ - if (ff_mdct_init(&q->mdct_ctx, av_log2(mlt_size)+1, 1)) { - av_free(q->mlt_window); - return -1; - } - av_log(NULL,AV_LOG_ERROR,"MDCT initialized, order = %d. mlt_window = %d\n", - av_log2(mlt_size)+1,sizeof(q->mlt_window)*mlt_size); - - return 0; -} - -static const float *maybe_reformat_buffer32 (COOKContext *q, const float *ptr, int n) -{ - if (1) - return ptr; -} - -static av_cold void init_cplscales_table (COOKContext *q) { - int i; - for (i=0;i<5;i++) - q->cplscales[i] = maybe_reformat_buffer32 (q, q->cplscales[i], (1<<(i+2))-1); -} - /*************** init functions end ***********/ /** @@ -249,12 +180,8 @@ av_cold int cook_decode_close(COOKContext *q) av_log(NULL,AV_LOG_ERROR, "Deallocating memory.\n"); /* Free allocated memory buffers. */ - av_free(q->mlt_window); av_free(q->decoded_bytes_buffer); - /* Free the transform. */ - ff_mdct_end(&q->mdct_ctx); - /* Free the VLC tables. */ for (i=0 ; i<13 ; i++) { free_vlc(&q->envelope_quant_index[i]); @@ -444,37 +371,6 @@ static inline void expand_category(COOKContext *q, int* category, } /** - * The real requantization of the mltcoefs - * - * @param q pointer to the COOKContext - * @param index index - * @param quant_index quantisation index - * @param subband_coef_index array of indexes to quant_centroid_tab - * @param subband_coef_sign signs of coefficients - * @param mlt_p pointer into the mlt buffer - */ - -#if 0 -static void scalar_dequant_float(COOKContext *q, int index, int quant_index, - int* subband_coef_index, int* subband_coef_sign, - float* mlt_p){ - int i; - float f1; - - for(i=0 ; irandom_state) < 0x80000000) f1 = -f1; - } - mlt_p[i] = f1 * rootpow2tab[quant_index+63]; - } -} -#endif -/** * Unpack the subband_coef_index and subband_coef_sign vectors. * * @param q pointer to the COOKContext @@ -585,102 +481,6 @@ static void mono_decode(COOKContext *q, REAL_T* mlt_buffer) { decode_vectors(q, category, quant_index_table, mlt_buffer); } - -/** - * the actual requantization of the timedomain samples - * - * @param q pointer to the COOKContext - * @param buffer pointer to the timedomain buffer - * @param gain_index index for the block multiplier - * @param gain_index_next index for the next block multiplier - */ - -#if 0 -static void interpolate_float(COOKContext *q, float* buffer, - int gain_index, int gain_index_next){ - int i; - float fc1, fc2; - fc1 = pow2tab[gain_index+63]; - - if(gain_index == gain_index_next){ //static gain - for(i=0 ; igain_size_factor ; i++){ - buffer[i]*=fc1; - } - return; - } else { //smooth gain - fc2 = q->gain_table[11 + (gain_index_next-gain_index)]; - for(i=0 ; igain_size_factor ; i++){ - buffer[i]*=fc1; - fc1*=fc2; - } - return; - } -} -#endif - -/** - * Apply transform window, overlap buffers. - * - * @param q pointer to the COOKContext - * @param inbuffer pointer to the mltcoefficients - * @param gains_ptr current and previous gains - * @param previous_buffer pointer to the previous buffer to be used for overlapping - */ - -static void imlt_window_float (COOKContext *q, float *buffer1, - cook_gains *gains_ptr, float *previous_buffer) -{ - const float fc = pow2tab[gains_ptr->previous[0] + 63]; - int i; - /* The weird thing here, is that the two halves of the time domain - * buffer are swapped. Also, the newest data, that we save away for - * next frame, has the wrong sign. Hence the subtraction below. - * Almost sounds like a complex conjugate/reverse data/FFT effect. - */ - - /* Apply window and overlap */ - for(i = 0; i < q->samples_per_channel; i++){ - buffer1[i] = buffer1[i] * fc * q->mlt_window[i] - - previous_buffer[i] * q->mlt_window[q->samples_per_channel - 1 - i]; - } -} - -/** - * The modulated lapped transform, this takes transform coefficients - * and transforms them into timedomain samples. - * Apply transform window, overlap buffers, apply gain profile - * and buffer management. - * - * @param q pointer to the COOKContext - * @param inbuffer pointer to the mltcoefficients - * @param gains_ptr current and previous gains - * @param previous_buffer pointer to the previous buffer to be used for overlapping - */ -#if 0 -static void imlt_gain(COOKContext *q, REAL_T *inbuffer, - cook_gains *gains_ptr, REAL_T* previous_buffer) -{ - REAL_T *buffer0 = q->mono_mdct_output; - REAL_T *buffer1 = q->mono_mdct_output + q->samples_per_channel; - int i; - - /* Inverse modified discrete cosine transform */ - ff_imdct_calc(&q->mdct_ctx, q->mono_mdct_output, inbuffer); - - q->imlt_window (q, buffer1, gains_ptr, previous_buffer); - - /* Apply gain profile */ - for (i = 0; i < 8; i++) { - if (gains_ptr->now[i] || gains_ptr->now[i + 1]) - q->interpolate(q, &buffer1[q->gain_size_factor * i], - gains_ptr->now[i], gains_ptr->now[i + 1]); - } - - /* Save away the current to be previous block. */ - memcpy(previous_buffer, buffer0, sizeof(float)*q->samples_per_channel); -} - -#endif /** * function for getting the jointstereo coupling information * @@ -711,31 +511,6 @@ static void decouple_info(COOKContext *q, int* decouple_tab){ return; } -/* - * function decouples a pair of signals from a single signal via multiplication. - * - * @param q pointer to the COOKContext - * @param subband index of the current subband - * @param f1 multiplier for channel 1 extraction - * @param f2 multiplier for channel 2 extraction - * @param decode_buffer input buffer - * @param mlt_buffer1 pointer to left channel mlt coefficients - * @param mlt_buffer2 pointer to right channel mlt coefficients - */ -static void decouple_float (COOKContext *q, - int subband, - REAL_T f1, REAL_T f2, - REAL_T *decode_buffer, - REAL_T *mlt_buffer1, REAL_T *mlt_buffer2) -{ - int j, tmp_idx; - for (j=0 ; jjs_subband_start + subband)*SUBBAND_SIZE)+j; - mlt_buffer1[SUBBAND_SIZE*subband + j] = f1 * decode_buffer[tmp_idx]; - mlt_buffer2[SUBBAND_SIZE*subband + j] = f2 * decode_buffer[tmp_idx]; - } -} - /** * function for decoding joint stereo data * @@ -809,26 +584,6 @@ decode_bytes_and_gain(COOKContext *q, const uint8_t *inbuffer, FFSWAP(int *, gains_ptr->now, gains_ptr->previous); } - /** - * Saturate the output signal to signed 16bit integers. - * - * @param q pointer to the COOKContext - * @param chan channel to saturate - * @param out pointer to the output vector - */ -static void -saturate_output_float (COOKContext *q, int chan, int16_t *out) -{ - int j; - float *output = (float*)q->mono_mdct_output + q->samples_per_channel; - /* Clip and convert floats to 16 bits. - */ - for (j = 0; j < q->samples_per_channel; j++) { - out[chan + q->nb_channels * j] = - av_clip_int16(lrintf(output[j])); - } -} - /** * Final part of subpacket decoding: * Apply modulated lapped transform, gain compensation, @@ -965,18 +720,6 @@ static void dump_cook_context(COOKContext *q) } #endif -#if 0 -static av_cold int cook_count_channels(unsigned int mask){ - int i; - int channels = 0; - for(i = 0;i<32;i++){ - if(mask & (1<numvector_size = (1 << q->log2_numvector_size); /* Generate tables */ - init_pow2table(); - init_gain_table(q); - init_cplscales_table(q); - if (init_cook_vlc_tables(q) != 0) return -1; @@ -1092,17 +831,11 @@ av_cold int cook_decode_init(RMContext *rmctx, COOKContext *q) q->gains2.now = q->gain_3; q->gains2.previous = q->gain_4; - /* Initialize transform. */ - if ( init_cook_mlt(q) != 0 ) - return -1; /* Initialize COOK signal arithmetic handling */ if (1) { q->scalar_dequant = scalar_dequant_math; - q->decouple = decouple_float; - q->imlt_window = imlt_window_float; q->interpolate = interpolate_math; - q->saturate_output = saturate_output_float; } /* Try to catch some obviously faulty streams, othervise it might be exploitable */ diff --git a/apps/codecs/libcook/cook.h b/apps/codecs/libcook/cook.h index c4c06cd4f..e34883255 100644 --- a/apps/codecs/libcook/cook.h +++ b/apps/codecs/libcook/cook.h @@ -25,7 +25,6 @@ #include #include "libavutil/lfg.h" #include "bitstream.h" -#include "dsputil.h" #include "bytestream.h" #include "rm2wav.h" #include "cookdata_fixpoint.h" @@ -44,20 +43,9 @@ typedef struct cook { int* subband_coef_index, int* subband_coef_sign, REAL_T* mlt_p); - void (* decouple) (struct cook *q, - int subband, - REAL_T f1, REAL_T f2, - REAL_T *decode_buffer, - REAL_T *mlt_buffer1, REAL_T *mlt_buffer2); - - void (* imlt_window) (struct cook *q, float *buffer1, - cook_gains *gains_ptr, float *previous_buffer); - void (* interpolate) (struct cook *q, REAL_T* buffer, int gain_index, int gain_index_next); - void (* saturate_output) (struct cook *q, int chan, int16_t *out); - GetBitContext gb; int frame_number; int block_align; @@ -79,9 +67,6 @@ typedef struct cook { int cookversion; /* states */ AVLFG random_state; - /* transform data */ - MDCTContext mdct_ctx; - float* mlt_window; /* gain buffers */ cook_gains gains1; @@ -110,8 +95,6 @@ typedef struct cook { REAL_T decode_buffer_1[1024]; REAL_T decode_buffer_2[1024]; REAL_T decode_buffer_0[1060]; /* static allocation for joint decode */ - - const float *cplscales[5]; } COOKContext; av_cold int cook_decode_init(RMContext *rmctx, COOKContext *q); diff --git a/apps/codecs/libcook/dsputil.h b/apps/codecs/libcook/dsputil.h dissimilarity index 100% index 4573c17a6..e69de29bb 100644 --- a/apps/codecs/libcook/dsputil.h +++ b/apps/codecs/libcook/dsputil.h @@ -1,908 +0,0 @@ -/* - * DSP utils - * Copyright (c) 2000, 2001, 2002 Fabrice Bellard - * Copyright (c) 2002-2004 Michael Niedermayer - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -/** - * @file libavcodec/dsputil.h - * DSP utils. - * note, many functions in here may use MMX which trashes the FPU state, it is - * absolutely necessary to call emms_c() between dsp & float/double code - */ - -#ifndef AVCODEC_DSPUTIL_H -#define AVCODEC_DSPUTIL_H - -#include "libavutil/intreadwrite.h" -#include "avcodec.h" - - -//#define DEBUG -/* dct code */ -#if 0 /*MT : DELETE THIS LINE.*/ -typedef short DCTELEM; -typedef int DWTELEM; -typedef short IDWTELEM; - -void fdct_ifast (DCTELEM *data); -void fdct_ifast248 (DCTELEM *data); -void ff_jpeg_fdct_islow (DCTELEM *data); -void ff_fdct248_islow (DCTELEM *data); - -void j_rev_dct (DCTELEM *data); -void j_rev_dct4 (DCTELEM *data); -void j_rev_dct2 (DCTELEM *data); -void j_rev_dct1 (DCTELEM *data); -void ff_wmv2_idct_c(DCTELEM *data); - -void ff_fdct_mmx(DCTELEM *block); -void ff_fdct_mmx2(DCTELEM *block); -void ff_fdct_sse2(DCTELEM *block); - -void ff_h264_idct8_add_c(uint8_t *dst, DCTELEM *block, int stride); -void ff_h264_idct_add_c(uint8_t *dst, DCTELEM *block, int stride); -void ff_h264_idct8_dc_add_c(uint8_t *dst, DCTELEM *block, int stride); -void ff_h264_idct_dc_add_c(uint8_t *dst, DCTELEM *block, int stride); -void ff_h264_lowres_idct_add_c(uint8_t *dst, int stride, DCTELEM *block); -void ff_h264_lowres_idct_put_c(uint8_t *dst, int stride, DCTELEM *block); -void ff_h264_idct_add16_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]); -void ff_h264_idct_add16intra_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]); -void ff_h264_idct8_add4_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]); -void ff_h264_idct_add8_c(uint8_t **dest, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]); - -void ff_vector_fmul_add_add_c(float *dst, const float *src0, const float *src1, - const float *src2, int src3, int blocksize, int step); -void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1, - const float *win, float add_bias, int len); -void ff_float_to_int16_c(int16_t *dst, const float *src, long len); -void ff_float_to_int16_interleave_c(int16_t *dst, const float **src, long len, int channels); - -/* encoding scans */ -extern const uint8_t ff_alternate_horizontal_scan[64]; -extern const uint8_t ff_alternate_vertical_scan[64]; -extern const uint8_t ff_zigzag_direct[64]; -extern const uint8_t ff_zigzag248_direct[64]; - -/* pixel operations */ -#define MAX_NEG_CROP 1024 - -/* temporary */ -extern uint32_t ff_squareTbl[512]; -extern uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP]; - -/* VP3 DSP functions */ -void ff_vp3_idct_c(DCTELEM *block/* align 16*/); -void ff_vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); -void ff_vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); - -void ff_vp3_v_loop_filter_c(uint8_t *src, int stride, int *bounding_values); -void ff_vp3_h_loop_filter_c(uint8_t *src, int stride, int *bounding_values); - -/* VP6 DSP functions */ -void ff_vp6_filter_diag4_c(uint8_t *dst, uint8_t *src, int stride, - const int16_t *h_weights, const int16_t *v_weights); - -/* 1/2^n downscaling functions from imgconvert.c */ -void ff_img_copy_plane(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height); -void ff_shrink22(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height); -void ff_shrink44(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height); -void ff_shrink88(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height); - -void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, - int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height); - -/* minimum alignment rules ;) -If you notice errors in the align stuff, need more alignment for some ASM code -for some CPU or need to use a function with less aligned data then send a mail -to the ffmpeg-devel mailing list, ... - -!warning These alignments might not match reality, (missing attribute((align)) -stuff somewhere possible). -I (Michael) did not check them, these are just the alignments which I think -could be reached easily ... - -!future video codecs might need functions with less strict alignment -*/ - -/* -void get_pixels_c(DCTELEM *block, const uint8_t *pixels, int line_size); -void diff_pixels_c(DCTELEM *block, const uint8_t *s1, const uint8_t *s2, int stride); -void put_pixels_clamped_c(const DCTELEM *block, uint8_t *pixels, int line_size); -void add_pixels_clamped_c(const DCTELEM *block, uint8_t *pixels, int line_size); -void clear_blocks_c(DCTELEM *blocks); -*/ - -/* add and put pixel (decoding) */ -// blocksizes for op_pixels_func are 8x4,8x8 16x8 16x16 -//h for op_pixels_func is limited to {width/2, width} but never larger than 16 and never smaller then 4 -typedef void (*op_pixels_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int h); -typedef void (*tpel_mc_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int w, int h); -typedef void (*qpel_mc_func)(uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride); -typedef void (*h264_chroma_mc_func)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x, int y); -typedef void (*h264_weight_func)(uint8_t *block, int stride, int log2_denom, int weight, int offset); -typedef void (*h264_biweight_func)(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset); - -#define DEF_OLD_QPEL(name)\ -void ff_put_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);\ -void ff_put_no_rnd_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);\ -void ff_avg_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride); - -DEF_OLD_QPEL(qpel16_mc11_old_c) -DEF_OLD_QPEL(qpel16_mc31_old_c) -DEF_OLD_QPEL(qpel16_mc12_old_c) -DEF_OLD_QPEL(qpel16_mc32_old_c) -DEF_OLD_QPEL(qpel16_mc13_old_c) -DEF_OLD_QPEL(qpel16_mc33_old_c) -DEF_OLD_QPEL(qpel8_mc11_old_c) -DEF_OLD_QPEL(qpel8_mc31_old_c) -DEF_OLD_QPEL(qpel8_mc12_old_c) -DEF_OLD_QPEL(qpel8_mc32_old_c) -DEF_OLD_QPEL(qpel8_mc13_old_c) -DEF_OLD_QPEL(qpel8_mc33_old_c) - -#define CALL_2X_PIXELS(a, b, n)\ -static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ - b(block , pixels , line_size, h);\ - b(block+n, pixels+n, line_size, h);\ -} - -/* motion estimation */ -// h is limited to {width/2, width, 2*width} but never larger than 16 and never smaller then 2 -// although currently h<4 is not used as functions with width <8 are neither used nor implemented -typedef int (*me_cmp_func)(void /*MpegEncContext*/ *s, uint8_t *blk1/*align width (8 or 16)*/, uint8_t *blk2/*align 1*/, int line_size, int h)/* __attribute__ ((const))*/; - - -// for snow slices -typedef struct slice_buffer_s slice_buffer; - -/** - * Scantable. - */ -typedef struct ScanTable{ - const uint8_t *scantable; - uint8_t permutated[64]; - uint8_t raster_end[64]; -#if ARCH_PPC - /** Used by dct_quantize_altivec to find last-non-zero */ - DECLARE_ALIGNED(16, uint8_t, inverse[64]); -#endif -} ScanTable; - -void ff_init_scantable(uint8_t *, ScanTable *st, const uint8_t *src_scantable); - -void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, - int block_w, int block_h, - int src_x, int src_y, int w, int h); - -/** - * DSPContext. - */ -typedef struct DSPContext { - /* pixel ops : interface with DCT */ - void (*get_pixels)(DCTELEM *block/*align 16*/, const uint8_t *pixels/*align 8*/, int line_size); - void (*diff_pixels)(DCTELEM *block/*align 16*/, const uint8_t *s1/*align 8*/, const uint8_t *s2/*align 8*/, int stride); - void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size); - void (*put_signed_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size); - void (*add_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size); - void (*add_pixels8)(uint8_t *pixels, DCTELEM *block, int line_size); - void (*add_pixels4)(uint8_t *pixels, DCTELEM *block, int line_size); - int (*sum_abs_dctelem)(DCTELEM *block/*align 16*/); - /** - * translational global motion compensation. - */ - void (*gmc1)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x16, int y16, int rounder); - /** - * global motion compensation. - */ - void (*gmc )(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int ox, int oy, - int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height); - void (*clear_block)(DCTELEM *block/*align 16*/); - void (*clear_blocks)(DCTELEM *blocks/*align 16*/); - int (*pix_sum)(uint8_t * pix, int line_size); - int (*pix_norm1)(uint8_t * pix, int line_size); -// 16x16 8x8 4x4 2x2 16x8 8x4 4x2 8x16 4x8 2x4 - - me_cmp_func sad[6]; /* identical to pix_absAxA except additional void * */ - me_cmp_func sse[6]; - me_cmp_func hadamard8_diff[6]; - me_cmp_func dct_sad[6]; - me_cmp_func quant_psnr[6]; - me_cmp_func bit[6]; - me_cmp_func rd[6]; - me_cmp_func vsad[6]; - me_cmp_func vsse[6]; - me_cmp_func nsse[6]; - me_cmp_func w53[6]; - me_cmp_func w97[6]; - me_cmp_func dct_max[6]; - me_cmp_func dct264_sad[6]; - - me_cmp_func me_pre_cmp[6]; - me_cmp_func me_cmp[6]; - me_cmp_func me_sub_cmp[6]; - me_cmp_func mb_cmp[6]; - me_cmp_func ildct_cmp[6]; //only width 16 used - me_cmp_func frame_skip_cmp[6]; //only width 8 used - - int (*ssd_int8_vs_int16)(const int8_t *pix1, const int16_t *pix2, - int size); - - /** - * Halfpel motion compensation with rounding (a+b+1)>>1. - * this is an array[4][4] of motion compensation functions for 4 - * horizontal blocksizes (8,16) and the 4 halfpel positions
- * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ] - * @param block destination where the result is stored - * @param pixels source - * @param line_size number of bytes in a horizontal line of block - * @param h height - */ - op_pixels_func put_pixels_tab[4][4]; - - /** - * Halfpel motion compensation with rounding (a+b+1)>>1. - * This is an array[4][4] of motion compensation functions for 4 - * horizontal blocksizes (8,16) and the 4 halfpel positions
- * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ] - * @param block destination into which the result is averaged (a+b+1)>>1 - * @param pixels source - * @param line_size number of bytes in a horizontal line of block - * @param h height - */ - op_pixels_func avg_pixels_tab[4][4]; - - /** - * Halfpel motion compensation with no rounding (a+b)>>1. - * this is an array[2][4] of motion compensation functions for 2 - * horizontal blocksizes (8,16) and the 4 halfpel positions
- * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ] - * @param block destination where the result is stored - * @param pixels source - * @param line_size number of bytes in a horizontal line of block - * @param h height - */ - op_pixels_func put_no_rnd_pixels_tab[4][4]; - - /** - * Halfpel motion compensation with no rounding (a+b)>>1. - * this is an array[2][4] of motion compensation functions for 2 - * horizontal blocksizes (8,16) and the 4 halfpel positions
- * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ] - * @param block destination into which the result is averaged (a+b)>>1 - * @param pixels source - * @param line_size number of bytes in a horizontal line of block - * @param h height - */ - op_pixels_func avg_no_rnd_pixels_tab[4][4]; - - void (*put_no_rnd_pixels_l2[2])(uint8_t *block/*align width (8 or 16)*/, const uint8_t *a/*align 1*/, const uint8_t *b/*align 1*/, int line_size, int h); - - /** - * Thirdpel motion compensation with rounding (a+b+1)>>1. - * this is an array[12] of motion compensation functions for the 9 thirdpe - * positions
- * *pixels_tab[ xthirdpel + 4*ythirdpel ] - * @param block destination where the result is stored - * @param pixels source - * @param line_size number of bytes in a horizontal line of block - * @param h height - */ - tpel_mc_func put_tpel_pixels_tab[11]; //FIXME individual func ptr per width? - tpel_mc_func avg_tpel_pixels_tab[11]; //FIXME individual func ptr per width? - - qpel_mc_func put_qpel_pixels_tab[2][16]; - qpel_mc_func avg_qpel_pixels_tab[2][16]; - qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16]; - qpel_mc_func avg_no_rnd_qpel_pixels_tab[2][16]; - qpel_mc_func put_mspel_pixels_tab[8]; - - /** - * h264 Chroma MC - */ - h264_chroma_mc_func put_h264_chroma_pixels_tab[3]; - /* This is really one func used in VC-1 decoding */ - h264_chroma_mc_func put_no_rnd_h264_chroma_pixels_tab[3]; - h264_chroma_mc_func avg_h264_chroma_pixels_tab[3]; - - qpel_mc_func put_h264_qpel_pixels_tab[4][16]; - qpel_mc_func avg_h264_qpel_pixels_tab[4][16]; - - qpel_mc_func put_2tap_qpel_pixels_tab[4][16]; - qpel_mc_func avg_2tap_qpel_pixels_tab[4][16]; - - h264_weight_func weight_h264_pixels_tab[10]; - h264_biweight_func biweight_h264_pixels_tab[10]; - - /* AVS specific */ - qpel_mc_func put_cavs_qpel_pixels_tab[2][16]; - qpel_mc_func avg_cavs_qpel_pixels_tab[2][16]; - void (*cavs_filter_lv)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2); - void (*cavs_filter_lh)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2); - void (*cavs_filter_cv)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2); - void (*cavs_filter_ch)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2); - void (*cavs_idct8_add)(uint8_t *dst, DCTELEM *block, int stride); - - me_cmp_func pix_abs[2][4]; - - /* huffyuv specific */ - void (*add_bytes)(uint8_t *dst/*align 16*/, uint8_t *src/*align 16*/, int w); - void (*add_bytes_l2)(uint8_t *dst/*align 16*/, uint8_t *src1/*align 16*/, uint8_t *src2/*align 16*/, int w); - void (*diff_bytes)(uint8_t *dst/*align 16*/, uint8_t *src1/*align 16*/, uint8_t *src2/*align 1*/,int w); - /** - * subtract huffyuv's variant of median prediction - * note, this might read from src1[-1], src2[-1] - */ - void (*sub_hfyu_median_prediction)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top); - void (*add_hfyu_median_prediction)(uint8_t *dst, uint8_t *top, uint8_t *diff, int w, int *left, int *left_top); - /* this might write to dst[w] */ - void (*add_png_paeth_prediction)(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp); - void (*bswap_buf)(uint32_t *dst, const uint32_t *src, int w); - - void (*h264_v_loop_filter_luma)(uint8_t *pix/*align 16*/, int stride, int alpha, int beta, int8_t *tc0); - void (*h264_h_loop_filter_luma)(uint8_t *pix/*align 4 */, int stride, int alpha, int beta, int8_t *tc0); - /* v/h_loop_filter_luma_intra: align 16 */ - void (*h264_v_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta); - void (*h264_h_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta); - void (*h264_v_loop_filter_chroma)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta, int8_t *tc0); - void (*h264_h_loop_filter_chroma)(uint8_t *pix/*align 4*/, int stride, int alpha, int beta, int8_t *tc0); - void (*h264_v_loop_filter_chroma_intra)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta); - void (*h264_h_loop_filter_chroma_intra)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta); - // h264_loop_filter_strength: simd only. the C version is inlined in h264.c - void (*h264_loop_filter_strength)(int16_t bS[2][4][4], uint8_t nnz[40], int8_t ref[2][40], int16_t mv[2][40][2], - int bidir, int edges, int step, int mask_mv0, int mask_mv1, int field); - - void (*h263_v_loop_filter)(uint8_t *src, int stride, int qscale); - void (*h263_h_loop_filter)(uint8_t *src, int stride, int qscale); - - void (*h261_loop_filter)(uint8_t *src, int stride); - - void (*x8_v_loop_filter)(uint8_t *src, int stride, int qscale); - void (*x8_h_loop_filter)(uint8_t *src, int stride, int qscale); - - void (*vp3_v_loop_filter)(uint8_t *src, int stride, int *bounding_values); - void (*vp3_h_loop_filter)(uint8_t *src, int stride, int *bounding_values); - - void (*vp6_filter_diag4)(uint8_t *dst, uint8_t *src, int stride, - const int16_t *h_weights,const int16_t *v_weights); - - /* assume len is a multiple of 4, and arrays are 16-byte aligned */ - void (*vorbis_inverse_coupling)(float *mag, float *ang, int blocksize); - void (*ac3_downmix)(float (*samples)[256], float (*matrix)[2], int out_ch, int in_ch, int len); - /* no alignment needed */ - void (*flac_compute_autocorr)(const int32_t *data, int len, int lag, double *autoc); - /* assume len is a multiple of 8, and arrays are 16-byte aligned */ - void (*vector_fmul)(float *dst, const float *src, int len); - void (*vector_fmul_reverse)(float *dst, const float *src0, const float *src1, int len); - /* assume len is a multiple of 8, and src arrays are 16-byte aligned */ - void (*vector_fmul_add_add)(float *dst, const float *src0, const float *src1, const float *src2, int src3, int len, int step); - /* assume len is a multiple of 4, and arrays are 16-byte aligned */ - void (*vector_fmul_window)(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len); - /* assume len is a multiple of 8, and arrays are 16-byte aligned */ - void (*int32_to_float_fmul_scalar)(float *dst, const int *src, float mul, int len); - - /* C version: convert floats from the range [384.0,386.0] to ints in [-32768,32767] - * simd versions: convert floats from [-32768.0,32767.0] without rescaling and arrays are 16byte aligned */ - void (*float_to_int16)(int16_t *dst, const float *src, long len); - void (*float_to_int16_interleave)(int16_t *dst, const float **src, long len, int channels); - - /* (I)DCT */ - void (*fdct)(DCTELEM *block/* align 16*/); - void (*fdct248)(DCTELEM *block/* align 16*/); - - /* IDCT really*/ - void (*idct)(DCTELEM *block/* align 16*/); - - /** - * block -> idct -> clip to unsigned 8 bit -> dest. - * (-1392, 0, 0, ...) -> idct -> (-174, -174, ...) -> put -> (0, 0, ...) - * @param line_size size in bytes of a horizontal line of dest - */ - void (*idct_put)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); - - /** - * block -> idct -> add dest -> clip to unsigned 8 bit -> dest. - * @param line_size size in bytes of a horizontal line of dest - */ - void (*idct_add)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); - - /** - * idct input permutation. - * several optimized IDCTs need a permutated input (relative to the normal order of the reference - * IDCT) - * this permutation must be performed before the idct_put/add, note, normally this can be merged - * with the zigzag/alternate scan
- * an example to avoid confusion: - * - (->decode coeffs -> zigzag reorder -> dequant -> reference idct ->...) - * - (x -> referece dct -> reference idct -> x) - * - (x -> referece dct -> simple_mmx_perm = idct_permutation -> simple_idct_mmx -> x) - * - (->decode coeffs -> zigzag reorder -> simple_mmx_perm -> dequant -> simple_idct_mmx ->...) - */ - uint8_t idct_permutation[64]; - int idct_permutation_type; -#define FF_NO_IDCT_PERM 1 -#define FF_LIBMPEG2_IDCT_PERM 2 -#define FF_SIMPLE_IDCT_PERM 3 -#define FF_TRANSPOSE_IDCT_PERM 4 -#define FF_PARTTRANS_IDCT_PERM 5 -#define FF_SSE2_IDCT_PERM 6 - - int (*try_8x8basis)(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale); - void (*add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale); -#define BASIS_SHIFT 16 -#define RECON_SHIFT 6 - - void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w); -#define EDGE_WIDTH 16 - - /* h264 functions */ - /* NOTE!!! if you implement any of h264_idct8_add, h264_idct8_add4 then you must implement all of them - NOTE!!! if you implement any of h264_idct_add, h264_idct_add16, h264_idct_add16intra, h264_idct_add8 then you must implement all of them - The reason for above, is that no 2 out of one list may use a different permutation. - */ - void (*h264_idct_add)(uint8_t *dst/*align 4*/, DCTELEM *block/*align 16*/, int stride); - void (*h264_idct8_add)(uint8_t *dst/*align 8*/, DCTELEM *block/*align 16*/, int stride); - void (*h264_idct_dc_add)(uint8_t *dst/*align 4*/, DCTELEM *block/*align 16*/, int stride); - void (*h264_idct8_dc_add)(uint8_t *dst/*align 8*/, DCTELEM *block/*align 16*/, int stride); - void (*h264_dct)(DCTELEM block[4][4]); - void (*h264_idct_add16)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]); - void (*h264_idct8_add4)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]); - void (*h264_idct_add8)(uint8_t **dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]); - void (*h264_idct_add16intra)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]); - - /* snow wavelet */ - void (*vertical_compose97i)(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width); - void (*horizontal_compose97i)(IDWTELEM *b, int width); - void (*inner_add_yblock)(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8); - - void (*prefetch)(void *mem, int stride, int h); - - void (*shrink[4])(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height); - - /* vc1 functions */ - void (*vc1_inv_trans_8x8)(DCTELEM *b); - void (*vc1_inv_trans_8x4)(uint8_t *dest, int line_size, DCTELEM *block); - void (*vc1_inv_trans_4x8)(uint8_t *dest, int line_size, DCTELEM *block); - void (*vc1_inv_trans_4x4)(uint8_t *dest, int line_size, DCTELEM *block); - void (*vc1_v_overlap)(uint8_t* src, int stride); - void (*vc1_h_overlap)(uint8_t* src, int stride); - /* put 8x8 block with bicubic interpolation and quarterpel precision - * last argument is actually round value instead of height - */ - op_pixels_func put_vc1_mspel_pixels_tab[16]; - - /* intrax8 functions */ - void (*x8_spatial_compensation[12])(uint8_t *src , uint8_t *dst, int linesize); - void (*x8_setup_spatial_compensation)(uint8_t *src, uint8_t *dst, int linesize, - int * range, int * sum, int edges); - - /* ape functions */ - /** - * Add contents of the second vector to the first one. - * @param len length of vectors, should be multiple of 16 - */ - void (*add_int16)(int16_t *v1/*align 16*/, int16_t *v2, int len); - /** - * Add contents of the second vector to the first one. - * @param len length of vectors, should be multiple of 16 - */ - void (*sub_int16)(int16_t *v1/*align 16*/, int16_t *v2, int len); - /** - * Calculate scalar product of two vectors. - * @param len length of vectors, should be multiple of 16 - * @param shift number of bits to discard from product - */ - int32_t (*scalarproduct_int16)(int16_t *v1, int16_t *v2/*align 16*/, int len, int shift); - - /* rv30 functions */ - qpel_mc_func put_rv30_tpel_pixels_tab[4][16]; - qpel_mc_func avg_rv30_tpel_pixels_tab[4][16]; - - /* rv40 functions */ - qpel_mc_func put_rv40_qpel_pixels_tab[4][16]; - qpel_mc_func avg_rv40_qpel_pixels_tab[4][16]; - h264_chroma_mc_func put_rv40_chroma_pixels_tab[3]; - h264_chroma_mc_func avg_rv40_chroma_pixels_tab[3]; -} DSPContext; - -void dsputil_static_init(void); -void dsputil_init(DSPContext* p, AVCodecContext *avctx); - -int ff_check_alignment(void); - -/** - * permute block according to permuatation. - * @param last last non zero element in scantable order - */ -void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last); - -void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type); - -#define BYTE_VEC32(c) ((c)*0x01010101UL) - -static inline uint32_t rnd_avg32(uint32_t a, uint32_t b) -{ - return (a | b) - (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1); -} - -static inline uint32_t no_rnd_avg32(uint32_t a, uint32_t b) -{ - return (a & b) + (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1); -} - -static inline int get_penalty_factor(int lambda, int lambda2, int type){ - switch(type&0xFF){ - default: - case FF_CMP_SAD: - return lambda>>FF_LAMBDA_SHIFT; - case FF_CMP_DCT: - return (3*lambda)>>(FF_LAMBDA_SHIFT+1); - case FF_CMP_W53: - return (4*lambda)>>(FF_LAMBDA_SHIFT); - case FF_CMP_W97: - return (2*lambda)>>(FF_LAMBDA_SHIFT); - case FF_CMP_SATD: - case FF_CMP_DCT264: - return (2*lambda)>>FF_LAMBDA_SHIFT; - case FF_CMP_RD: - case FF_CMP_PSNR: - case FF_CMP_SSE: - case FF_CMP_NSSE: - return lambda2>>FF_LAMBDA_SHIFT; - case FF_CMP_BIT: - return 1; - } -} - -/** - * Empty mmx state. - * this must be called between any dsp function and float/double code. - * for example sin(); dsp->idct_put(); emms_c(); cos() - */ -#define emms_c() - -/* should be defined by architectures supporting - one or more MultiMedia extension */ -int mm_support(void); - -void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx); -void dsputil_init_arm(DSPContext* c, AVCodecContext *avctx); -void dsputil_init_bfin(DSPContext* c, AVCodecContext *avctx); -void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx); -void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx); -void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx); -void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx); -void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx); -void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx); - -#endif /*MT : DELETE THIS LINE ONLY. */ -#define DECLARE_ALIGNED_16(t, v) DECLARE_ALIGNED(16, t, v) - -#if 0 /*MT : DELETE THIS LINE ONLY. */ -#if HAVE_MMX - -#undef emms_c - -extern int mm_flags; - -void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size); -void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size); -void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size); - -static inline void emms(void) -{ - __asm__ volatile ("emms;":::"memory"); -} - - -#define emms_c() \ -{\ - if (mm_flags & FF_MM_MMX)\ - emms();\ -} - -void dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx); - -#elif ARCH_ARM - -extern int mm_flags; - -#if HAVE_NEON -# define DECLARE_ALIGNED_8(t, v) DECLARE_ALIGNED(16, t, v) -# define STRIDE_ALIGN 16 -#endif - -#elif ARCH_PPC - -extern int mm_flags; - -#define DECLARE_ALIGNED_8(t, v) DECLARE_ALIGNED(16, t, v) -#define STRIDE_ALIGN 16 - -#elif HAVE_MMI - -#define DECLARE_ALIGNED_8(t, v) DECLARE_ALIGNED(16, t, v) -#define STRIDE_ALIGN 16 - -#else - -#define mm_flags 0 -#define mm_support() 0 - -#endif - -#endif /* MT : DELETE THIS LINE ONLY */ -#ifndef DECLARE_ALIGNED_8 -# define DECLARE_ALIGNED_8(t, v) DECLARE_ALIGNED(8, t, v) -#endif - -#if 0 /* MT : DELETE THIS LINE ONLY */ -#ifndef STRIDE_ALIGN -# define STRIDE_ALIGN 8 -#endif - -/* PSNR */ -void get_psnr(uint8_t *orig_image[3], uint8_t *coded_image[3], - int orig_linesize[3], int coded_linesize, - AVCodecContext *avctx); - -#endif /*MT : DELETE THIS LINE.*/ -/* FFT computation */ - -/* NOTE: soon integer code will be added, so you must use the - FFTSample type */ -typedef float FFTSample; - -struct MDCTContext; - -typedef struct FFTComplex { - FFTSample re, im; -} FFTComplex; - -typedef struct FFTContext { - int nbits; - int inverse; - uint16_t *revtab; - FFTComplex *exptab; - FFTComplex *exptab1; /* only used by SSE code */ - FFTComplex *tmp_buf; - void (*fft_permute)(struct FFTContext *s, FFTComplex *z); - void (*fft_calc)(struct FFTContext *s, FFTComplex *z); - void (*imdct_calc)(struct MDCTContext *s, FFTSample *output, const FFTSample *input); - void (*imdct_half)(struct MDCTContext *s, FFTSample *output, const FFTSample *input); -} FFTContext; - -extern FFTSample* ff_cos_tabs[13]; - -/** - * Sets up a complex FFT. - * @param nbits log2 of the length of the input array - * @param inverse if 0 perform the forward transform, if 1 perform the inverse - */ -int ff_fft_init(FFTContext *s, int nbits, int inverse); -void ff_fft_permute_c(FFTContext *s, FFTComplex *z); -void ff_fft_permute_sse(FFTContext *s, FFTComplex *z); -void ff_fft_calc_c(FFTContext *s, FFTComplex *z); -void ff_fft_calc_sse(FFTContext *s, FFTComplex *z); -void ff_fft_calc_3dn(FFTContext *s, FFTComplex *z); -void ff_fft_calc_3dn2(FFTContext *s, FFTComplex *z); -void ff_fft_calc_altivec(FFTContext *s, FFTComplex *z); - -/** - * Do the permutation needed BEFORE calling ff_fft_calc(). - */ -static inline void ff_fft_permute(FFTContext *s, FFTComplex *z) -{ - s->fft_permute(s, z); -} -/** - * Do a complex FFT with the parameters defined in ff_fft_init(). The - * input data must be permuted before. No 1.0/sqrt(n) normalization is done. - */ -static inline void ff_fft_calc(FFTContext *s, FFTComplex *z) -{ - s->fft_calc(s, z); -} -void ff_fft_end(FFTContext *s); - -#endif /*MT : DELETE THIS LINE.*/ -/* MDCT computation */ - -typedef struct MDCTContext { - int n; /* size of MDCT (i.e. number of input data * 2) */ - int nbits; /* n = 2^nbits */ - /* pre/post rotation tables */ - FFTSample *tcos; - FFTSample *tsin; - FFTContext fft; -} MDCTContext; - -static inline void ff_imdct_calc(MDCTContext *s, FFTSample *output, const FFTSample *input) -{ - s->fft.imdct_calc(s, output, input); -} -static inline void ff_imdct_half(MDCTContext *s, FFTSample *output, const FFTSample *input) -{ - s->fft.imdct_half(s, output, input); -} - -#if 0 /* MT : DELETE THIS LINE. */ -/** - * Generate a Kaiser-Bessel Derived Window. - * @param window pointer to half window - * @param alpha determines window shape - * @param n size of half window - */ -void ff_kbd_window_init(float *window, float alpha, int n); -#endif /* MT : DELETE THIS LINE.*/ - -/** - * Generate a sine window. - * @param window pointer to half window - * @param n size of half window - */ -void ff_sine_window_init(float *window, int n); -extern float ff_sine_128 [ 128]; -extern float ff_sine_256 [ 256]; -extern float ff_sine_512 [ 512]; -extern float ff_sine_1024[1024]; -extern float ff_sine_2048[2048]; -extern float ff_sine_4096[4096]; -extern float *ff_sine_windows[6]; - -int ff_mdct_init(MDCTContext *s, int nbits, int inverse); -void ff_imdct_calc_c(MDCTContext *s, FFTSample *output, const FFTSample *input); -void ff_imdct_half_c(MDCTContext *s, FFTSample *output, const FFTSample *input); -void ff_imdct_calc_3dn(MDCTContext *s, FFTSample *output, const FFTSample *input); -void ff_imdct_half_3dn(MDCTContext *s, FFTSample *output, const FFTSample *input); -void ff_imdct_calc_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *input); -void ff_imdct_half_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *input); -void ff_imdct_calc_sse(MDCTContext *s, FFTSample *output, const FFTSample *input); -void ff_imdct_half_sse(MDCTContext *s, FFTSample *output, const FFTSample *input); -void ff_mdct_calc(MDCTContext *s, FFTSample *out, const FFTSample *input); -void ff_mdct_end(MDCTContext *s); - -#if 0 /* MT : DELETE THIS LINE.*/ -/* Real Discrete Fourier Transform */ - -enum RDFTransformType { - RDFT, - IRDFT, - RIDFT, - IRIDFT, -}; - -typedef struct { - int nbits; - int inverse; - int sign_convention; - - /* pre/post rotation tables */ - FFTSample *tcos; - FFTSample *tsin; - FFTContext fft; -} RDFTContext; - -/** - * Sets up a real FFT. - * @param nbits log2 of the length of the input array - * @param trans the type of transform - */ -int ff_rdft_init(RDFTContext *s, int nbits, enum RDFTransformType trans); -void ff_rdft_calc(RDFTContext *s, FFTSample *data); -void ff_rdft_end(RDFTContext *s); - -#define WRAPPER8_16(name8, name16)\ -static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\ - return name8(s, dst , src , stride, h)\ - +name8(s, dst+8 , src+8 , stride, h);\ -} - -#define WRAPPER8_16_SQ(name8, name16)\ -static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\ - int score=0;\ - score +=name8(s, dst , src , stride, 8);\ - score +=name8(s, dst+8 , src+8 , stride, 8);\ - if(h==16){\ - dst += 8*stride;\ - src += 8*stride;\ - score +=name8(s, dst , src , stride, 8);\ - score +=name8(s, dst+8 , src+8 , stride, 8);\ - }\ - return score;\ -} - - -static inline void copy_block2(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h) -{ - int i; - for(i=0; i> 1; - if(!(i&m)) return split_radix_permutation(i, m, inverse)*2; - m >>= 1; - if(inverse == !(i&m)) return split_radix_permutation(i, m, inverse)*4 + 1; - else return split_radix_permutation(i, m, inverse)*4 - 1; -} - -av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse) -{ - int i, j, m, n; - float alpha, c1, s1, s2; - int split_radix = 1; - int av_unused has_vectors; - - if (nbits < 2 || nbits > 16) - goto fail; - s->nbits = nbits; - n = 1 << nbits; - - s->tmp_buf = NULL; - s->exptab = av_malloc((n / 2) * sizeof(FFTComplex)); - if (!s->exptab) - goto fail; - s->revtab = av_malloc(n * sizeof(uint16_t)); - if (!s->revtab) - goto fail; - s->inverse = inverse; - - s2 = inverse ? 1.0 : -1.0; - - s->fft_permute = ff_fft_permute_c; - s->fft_calc = ff_fft_calc_c; - s->imdct_calc = ff_imdct_calc_c; - s->imdct_half = ff_imdct_half_c; - s->exptab1 = NULL; - -#if HAVE_MMX && HAVE_YASM - has_vectors = mm_support(); - if (has_vectors & FF_MM_SSE && HAVE_SSE) { - /* SSE for P3/P4/K8 */ - s->imdct_calc = ff_imdct_calc_sse; - s->imdct_half = ff_imdct_half_sse; - s->fft_permute = ff_fft_permute_sse; - s->fft_calc = ff_fft_calc_sse; - } else if (has_vectors & FF_MM_3DNOWEXT && HAVE_AMD3DNOWEXT) { - /* 3DNowEx for K7 */ - s->imdct_calc = ff_imdct_calc_3dn2; - s->imdct_half = ff_imdct_half_3dn2; - s->fft_calc = ff_fft_calc_3dn2; - } else if (has_vectors & FF_MM_3DNOW && HAVE_AMD3DNOW) { - /* 3DNow! for K6-2/3 */ - s->imdct_calc = ff_imdct_calc_3dn; - s->imdct_half = ff_imdct_half_3dn; - s->fft_calc = ff_fft_calc_3dn; - } -#elif HAVE_ALTIVEC - has_vectors = mm_support(); - if (has_vectors & FF_MM_ALTIVEC) { - s->fft_calc = ff_fft_calc_altivec; - split_radix = 0; - } -#endif - - if (split_radix) { - for(j=4; j<=nbits; j++) { - int m = 1<revtab[-split_radix_permutation(i, n, s->inverse) & (n-1)] = i; - s->tmp_buf = av_malloc(n * sizeof(FFTComplex)); - } else { - int np, nblocks, np2, l; - FFTComplex *q; - - for(i=0; i<(n/2); i++) { - alpha = 2 * M_PI * (float)i / (float)n; - c1 = cos(alpha); - s1 = sin(alpha) * s2; - s->exptab[i].re = c1; - s->exptab[i].im = s1; - } - - np = 1 << nbits; - nblocks = np >> 3; - np2 = np >> 1; - s->exptab1 = av_malloc(np * 2 * sizeof(FFTComplex)); - if (!s->exptab1) - goto fail; - q = s->exptab1; - do { - for(l = 0; l < np2; l += 2 * nblocks) { - *q++ = s->exptab[l]; - *q++ = s->exptab[l + nblocks]; - - q->re = -s->exptab[l].im; - q->im = s->exptab[l].re; - q++; - q->re = -s->exptab[l + nblocks].im; - q->im = s->exptab[l + nblocks].re; - q++; - } - nblocks = nblocks >> 1; - } while (nblocks != 0); - av_freep(&s->exptab); - - /* compute bit reverse table */ - for(i=0;i> j) & 1) << (nbits-j-1); - } - s->revtab[i]=m; - } - } - - return 0; - fail: - av_freep(&s->revtab); - av_freep(&s->exptab); - av_freep(&s->exptab1); - av_freep(&s->tmp_buf); - return -1; -} - -void ff_fft_permute_c(FFTContext *s, FFTComplex *z) -{ - int j, k, np; - FFTComplex tmp; - const uint16_t *revtab = s->revtab; - np = 1 << s->nbits; - - if (s->tmp_buf) { - /* TODO: handle split-radix permute in a more optimal way, probably in-place */ - for(j=0;jtmp_buf[revtab[j]] = z[j]; - memcpy(z, s->tmp_buf, np * sizeof(FFTComplex)); - return; - } - - /* reverse */ - for(j=0;jrevtab); - av_freep(&s->exptab); - av_freep(&s->exptab1); - av_freep(&s->tmp_buf); -} - -#define sqrthalf (float)M_SQRT1_2 - -#define BF(x,y,a,b) {\ - x = a - b;\ - y = a + b;\ -} - -#define BUTTERFLIES(a0,a1,a2,a3) {\ - BF(t3, t5, t5, t1);\ - BF(a2.re, a0.re, a0.re, t5);\ - BF(a3.im, a1.im, a1.im, t3);\ - BF(t4, t6, t2, t6);\ - BF(a3.re, a1.re, a1.re, t4);\ - BF(a2.im, a0.im, a0.im, t6);\ -} - -// force loading all the inputs before storing any. -// this is slightly slower for small data, but avoids store->load aliasing -// for addresses separated by large powers of 2. -#define BUTTERFLIES_BIG(a0,a1,a2,a3) {\ - FFTSample r0=a0.re, i0=a0.im, r1=a1.re, i1=a1.im;\ - BF(t3, t5, t5, t1);\ - BF(a2.re, a0.re, r0, t5);\ - BF(a3.im, a1.im, i1, t3);\ - BF(t4, t6, t2, t6);\ - BF(a3.re, a1.re, r1, t4);\ - BF(a2.im, a0.im, i0, t6);\ -} - -#define TRANSFORM(a0,a1,a2,a3,wre,wim) {\ - t1 = a2.re * wre + a2.im * wim;\ - t2 = a2.im * wre - a2.re * wim;\ - t5 = a3.re * wre - a3.im * wim;\ - t6 = a3.im * wre + a3.re * wim;\ - BUTTERFLIES(a0,a1,a2,a3)\ -} - -#define TRANSFORM_ZERO(a0,a1,a2,a3) {\ - t1 = a2.re;\ - t2 = a2.im;\ - t5 = a3.re;\ - t6 = a3.im;\ - BUTTERFLIES(a0,a1,a2,a3)\ -} - -/* z[0...8n-1], w[1...2n-1] */ -#define PASS(name)\ -static void name(FFTComplex *z, const FFTSample *wre, unsigned int n)\ -{\ - FFTSample t1, t2, t3, t4, t5, t6;\ - int o1 = 2*n;\ - int o2 = 4*n;\ - int o3 = 6*n;\ - const FFTSample *wim = wre+o1;\ - n--;\ -\ - TRANSFORM_ZERO(z[0],z[o1],z[o2],z[o3]);\ - TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\ - do {\ - z += 2;\ - wre += 2;\ - wim -= 2;\ - TRANSFORM(z[0],z[o1],z[o2],z[o3],wre[0],wim[0]);\ - TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\ - } while(--n);\ -} - -PASS(pass) -#undef BUTTERFLIES -#define BUTTERFLIES BUTTERFLIES_BIG -PASS(pass_big) - -#define DECL_FFT(n,n2,n4)\ -static void fft##n(FFTComplex *z)\ -{\ - fft##n2(z);\ - fft##n4(z+n4*2);\ - fft##n4(z+n4*3);\ - pass(z,ff_cos_##n,n4/2);\ -} - -static void fft4(FFTComplex *z) -{ - FFTSample t1, t2, t3, t4, t5, t6, t7, t8; - - BF(t3, t1, z[0].re, z[1].re); - BF(t8, t6, z[3].re, z[2].re); - BF(z[2].re, z[0].re, t1, t6); - BF(t4, t2, z[0].im, z[1].im); - BF(t7, t5, z[2].im, z[3].im); - BF(z[3].im, z[1].im, t4, t8); - BF(z[3].re, z[1].re, t3, t7); - BF(z[2].im, z[0].im, t2, t5); -} - -static void fft8(FFTComplex *z) -{ - FFTSample t1, t2, t3, t4, t5, t6, t7, t8; - - fft4(z); - - BF(t1, z[5].re, z[4].re, -z[5].re); - BF(t2, z[5].im, z[4].im, -z[5].im); - BF(t3, z[7].re, z[6].re, -z[7].re); - BF(t4, z[7].im, z[6].im, -z[7].im); - BF(t8, t1, t3, t1); - BF(t7, t2, t2, t4); - BF(z[4].re, z[0].re, z[0].re, t1); - BF(z[4].im, z[0].im, z[0].im, t2); - BF(z[6].re, z[2].re, z[2].re, t7); - BF(z[6].im, z[2].im, z[2].im, t8); - - TRANSFORM(z[1],z[3],z[5],z[7],sqrthalf,sqrthalf); -} - -#if !CONFIG_SMALL -static void fft16(FFTComplex *z) -{ - FFTSample t1, t2, t3, t4, t5, t6; - - fft8(z); - fft4(z+8); - fft4(z+12); - - TRANSFORM_ZERO(z[0],z[4],z[8],z[12]); - TRANSFORM(z[2],z[6],z[10],z[14],sqrthalf,sqrthalf); - TRANSFORM(z[1],z[5],z[9],z[13],ff_cos_16[1],ff_cos_16[3]); - TRANSFORM(z[3],z[7],z[11],z[15],ff_cos_16[3],ff_cos_16[1]); -} -#else -DECL_FFT(16,8,4) -#endif -DECL_FFT(32,16,8) -DECL_FFT(64,32,16) -DECL_FFT(128,64,32) -DECL_FFT(256,128,64) -DECL_FFT(512,256,128) -#if !CONFIG_SMALL -#define pass pass_big -#endif -DECL_FFT(1024,512,256) -DECL_FFT(2048,1024,512) -DECL_FFT(4096,2048,1024) -DECL_FFT(8192,4096,2048) -DECL_FFT(16384,8192,4096) -DECL_FFT(32768,16384,8192) -DECL_FFT(65536,32768,16384) - -static void (*fft_dispatch[])(FFTComplex*) = { - fft4, fft8, fft16, fft32, fft64, fft128, fft256, fft512, fft1024, - fft2048, fft4096, fft8192, fft16384, fft32768, fft65536, -}; - -void ff_fft_calc_c(FFTContext *s, FFTComplex *z) -{ - fft_dispatch[s->nbits-2](z); -} - diff --git a/apps/codecs/libcook/main.c b/apps/codecs/libcook/main.c index 2f85295fb..40e2fd8a4 100644 --- a/apps/codecs/libcook/main.c +++ b/apps/codecs/libcook/main.c @@ -5,7 +5,7 @@ * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ * \/ \/ \/ \/ \/ - * $Id$ + * $Id$ main.c 20898 2009-05-09 23:24:02Z dave $ * * Copyright (C) 2009 Mohamed Tarek * diff --git a/apps/codecs/libcook/mdct.c b/apps/codecs/libcook/mdct.c dissimilarity index 100% index cb3388f6f..e69de29bb 100644 --- a/apps/codecs/libcook/mdct.c +++ b/apps/codecs/libcook/mdct.c @@ -1,229 +0,0 @@ -/* - * MDCT/IMDCT transforms - * Copyright (c) 2002 Fabrice Bellard - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "dsputil.h" - -/** - * @file libavcodec/mdct.c - * MDCT/IMDCT transforms. - */ - -// Generate a Kaiser-Bessel Derived Window. -#define BESSEL_I0_ITER 50 // default: 50 iterations of Bessel I0 approximation -av_cold void ff_kbd_window_init(float *window, float alpha, int n) -{ - int i, j; - double sum = 0.0, bessel, tmp; - double local_window[n]; - double alpha2 = (alpha * M_PI / n) * (alpha * M_PI / n); - - for (i = 0; i < n; i++) { - tmp = i * (n - i) * alpha2; - bessel = 1.0; - for (j = BESSEL_I0_ITER; j > 0; j--) - bessel = bessel * tmp / (j * j) + 1; - sum += bessel; - local_window[i] = sum; - } - - sum++; - for (i = 0; i < n; i++) - window[i] = sqrt(local_window[i] / sum); -} - -DECLARE_ALIGNED(16, float, ff_sine_128 [ 128]); -DECLARE_ALIGNED(16, float, ff_sine_256 [ 256]); -DECLARE_ALIGNED(16, float, ff_sine_512 [ 512]); -DECLARE_ALIGNED(16, float, ff_sine_1024[1024]); -DECLARE_ALIGNED(16, float, ff_sine_2048[2048]); -DECLARE_ALIGNED(16, float, ff_sine_4096[4096]); -float *ff_sine_windows[6] = { - ff_sine_128, ff_sine_256, ff_sine_512, ff_sine_1024, ff_sine_2048, ff_sine_4096 -}; - -// Generate a sine window. -av_cold void ff_sine_window_init(float *window, int n) { - int i; - for(i = 0; i < n; i++) - window[i] = sinf((i + 0.5) * (M_PI / (2.0 * n))); -} - -/** - * init MDCT or IMDCT computation. - */ -av_cold int ff_mdct_init(MDCTContext *s, int nbits, int inverse) -{ - int n, n4, i; - double alpha; - - memset(s, 0, sizeof(*s)); - n = 1 << nbits; - s->nbits = nbits; - s->n = n; - n4 = n >> 2; - s->tcos = av_malloc(n4 * sizeof(FFTSample)); - if (!s->tcos) - goto fail; - s->tsin = av_malloc(n4 * sizeof(FFTSample)); - if (!s->tsin) - goto fail; - - for(i=0;itcos[i] = -cos(alpha); - s->tsin[i] = -sin(alpha); - } - if (ff_fft_init(&s->fft, s->nbits - 2, inverse) < 0) - goto fail; - return 0; - fail: - av_freep(&s->tcos); - av_freep(&s->tsin); - return -1; -} - -/* complex multiplication: p = a * b */ -#define CMUL(pre, pim, are, aim, bre, bim) \ -{\ - FFTSample _are = (are);\ - FFTSample _aim = (aim);\ - FFTSample _bre = (bre);\ - FFTSample _bim = (bim);\ - (pre) = _are * _bre - _aim * _bim;\ - (pim) = _are * _bim + _aim * _bre;\ -} - -/** - * Compute the middle half of the inverse MDCT of size N = 2^nbits, - * thus excluding the parts that can be derived by symmetry - * @param output N/2 samples - * @param input N/2 samples - */ -void ff_imdct_half_c(MDCTContext *s, FFTSample *output, const FFTSample *input) -{ - int k, n8, n4, n2, n, j; - const uint16_t *revtab = s->fft.revtab; - const FFTSample *tcos = s->tcos; - const FFTSample *tsin = s->tsin; - const FFTSample *in1, *in2; - FFTComplex *z = (FFTComplex *)output; - - n = 1 << s->nbits; - n2 = n >> 1; - n4 = n >> 2; - n8 = n >> 3; - - /* pre rotation */ - in1 = input; - in2 = input + n2 - 1; - for(k = 0; k < n4; k++) { - j=revtab[k]; - CMUL(z[j].re, z[j].im, *in2, *in1, tcos[k], tsin[k]); - in1 += 2; - in2 -= 2; - } - ff_fft_calc(&s->fft, z); - - /* post rotation + reordering */ - output += n4; - for(k = 0; k < n8; k++) { - FFTSample r0, i0, r1, i1; - CMUL(r0, i1, z[n8-k-1].im, z[n8-k-1].re, tsin[n8-k-1], tcos[n8-k-1]); - CMUL(r1, i0, z[n8+k ].im, z[n8+k ].re, tsin[n8+k ], tcos[n8+k ]); - z[n8-k-1].re = r0; - z[n8-k-1].im = i0; - z[n8+k ].re = r1; - z[n8+k ].im = i1; - } -} - -/** - * Compute inverse MDCT of size N = 2^nbits - * @param output N samples - * @param input N/2 samples - */ -void ff_imdct_calc_c(MDCTContext *s, FFTSample *output, const FFTSample *input) -{ - int k; - int n = 1 << s->nbits; - int n2 = n >> 1; - int n4 = n >> 2; - - ff_imdct_half_c(s, output+n4, input); - - for(k = 0; k < n4; k++) { - output[k] = -output[n2-k-1]; - output[n-k-1] = output[n2+k]; - } -} - -/** - * Compute MDCT of size N = 2^nbits - * @param input N samples - * @param out N/2 samples - */ -void ff_mdct_calc(MDCTContext *s, FFTSample *out, const FFTSample *input) -{ - int i, j, n, n8, n4, n2, n3; - FFTSample re, im; - const uint16_t *revtab = s->fft.revtab; - const FFTSample *tcos = s->tcos; - const FFTSample *tsin = s->tsin; - FFTComplex *x = (FFTComplex *)out; - - n = 1 << s->nbits; - n2 = n >> 1; - n4 = n >> 2; - n8 = n >> 3; - n3 = 3 * n4; - - /* pre rotation */ - for(i=0;ifft, x); - - /* post rotation */ - for(i=0;itcos); - av_freep(&s->tsin); - ff_fft_end(&s->fft); -} -- 2.11.4.GIT