From f5bcb62988d0a11e0d178c5d2e62b728808c2f12 Mon Sep 17 00:00:00 2001 From: jethead71 Date: Sun, 25 Apr 2010 20:04:47 +0000 Subject: [PATCH] Optimized DSP sample out functions for armv6. (For stereo output) ~9% faster than SVN asm and about 4% faster than SVN asm rearranged to observe pipeline hazards. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@25717 a1c6a512-1295-4272-9138-f99709370657 --- apps/SOURCES | 3 ++ apps/dsp_arm.S | 5 ++- apps/dsp_arm_v6.S | 127 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 134 insertions(+), 1 deletion(-) create mode 100644 apps/dsp_arm_v6.S diff --git a/apps/SOURCES b/apps/SOURCES index 7bc263a15..66f2a7da5 100644 --- a/apps/SOURCES +++ b/apps/SOURCES @@ -158,6 +158,9 @@ dsp_cf.S eq_cf.S #elif defined(CPU_ARM) dsp_arm.S +#if ARM_ARCH >= 6 +dsp_arm_v6.S +#endif eq_arm.S #endif #endif diff --git a/apps/dsp_arm.S b/apps/dsp_arm.S index b90e63278..f924569bc 100644 --- a/apps/dsp_arm.S +++ b/apps/dsp_arm.S @@ -18,6 +18,7 @@ * KIND, either express or implied. * ****************************************************************************/ + #include "config.h" /**************************************************************************** * void channels_process_sound_chan_mono(int count, int32_t *buf[]) @@ -83,7 +84,8 @@ channels_process_sound_chan_karaoke: ldmfd sp!, {r4-r5, pc} .karaokeend: .size channels_process_sound_chan_karaoke,.karaokeend-channels_process_sound_chan_karaoke - + +#if ARM_ARCH < 6 /**************************************************************************** * void sample_output_mono(int count, struct dsp_data *data, * const int32_t *src[], int16_t *dst) @@ -195,6 +197,7 @@ sample_output_stereo: ldmfd sp!, {r4-r10, pc} .sosend: .size sample_output_stereo,.sosend-sample_output_stereo +#endif /* ARM_ARCH < 6 */ /**************************************************************************** * void apply_crossfeed(int count, int32_t* src[]) diff --git a/apps/dsp_arm_v6.S b/apps/dsp_arm_v6.S new file mode 100644 index 000000000..39949498e --- /dev/null +++ b/apps/dsp_arm_v6.S @@ -0,0 +1,127 @@ +/*************************************************************************** + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * $Id$ + * + * Copyright (C) 2010 Michael Sevakis + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ****************************************************************************/ + +/**************************************************************************** + * void sample_output_mono(int count, struct dsp_data *data, + * const int32_t *src[], int16_t *dst) + */ + .section .text, "ax", %progbits + .align 2 + .global sample_output_mono + .type sample_output_mono, %function +sample_output_mono: + @ input: r0 = count, r1 = data, r2 = src, r3 = dst + stmfd sp!, { r4, lr } @ + @ + ldr r1, [r1] @ r1 = data->output_scale + ldr r2, [r2] @ r2 = src[0] + @ + mov r4, #1 @ r4 = 1 << (scale - 1) + mov r4, r4, lsl r1 @ + subs r0, r0, #1 @ odd: end at 0; even: end at -1 + mov r4, r4, lsr #1 @ + beq 2f @ Zero? Only one sample! + @ +1: @ + ldmia r2!, { r12, r14 } @ load Mi0, Mi1 + qadd r12, r12, r4 @ round, scale, saturate and + qadd r14, r14, r4 @ pack Mi0 to So0, Mi1 to So1 + mov r12, r12, asr r1 @ + mov r14, r14, asr r1 @ + ssat r12, #16, r12 @ + ssat r14, #16, r14 @ + pkhbt r12, r12, r12, asl #16 @ + pkhbt r14, r14, r14, asl #16 @ + subs r0, r0, #2 @ + stmia r3!, { r12, r14 } @ store So0, So1 + bgt 1b @ + @ + ldmltfd sp!, { r4, pc } @ if count was even, we're done + @ +2: @ + ldr r12, [r2] @ round, scale, saturate + qadd r12, r12, r4 @ and pack Mi to So + mov r12, r12, asr r1 @ + ssat r12, #16, r12 @ + pkhbt r12, r12, r12, asl #16 @ + str r12, [r3] @ store So + @ + ldmfd sp!, { r4, pc } @ + .size sample_output_mono, .-sample_output_mono + +/**************************************************************************** + * void sample_output_stereo(int count, struct dsp_data *data, + * const int32_t *src[], int16_t *dst) + */ + .section .text, "ax", %progbits + .align 2 + .global sample_output_stereo + .type sample_output_stereo, %function +sample_output_stereo: + @ input: r0 = count, r1 = data, r2 = src, r3 = dst + stmfd sp!, { r4-r7, lr } @ + @ + ldr r1, [r1] @ r1 = data->output_scale + ldmia r2, { r2, r4 } @ r2 = src[0], r4 = src[1] + @ + mov r5, #1 @ r5 = 1 << (scale - 1) + mov r5, r5, lsl r1 @ + subs r0, r0, #1 @ odd: end at 0; even: end at -1 + mov r5, r5, lsr #1 @ + beq 2f @ Zero? Only one sample! + @ +1: @ + ldmia r2!, { r6, r7 } @ r6, r7 = Li0, Li1 + ldmia r4!, { r12, r14 } @ r12, r14 = Ri0, Ri1 + qadd r6, r6, r5 @ round, scale, saturate and pack + qadd r7, r7, r5 @ Li0+Ri0 to So0, Li1+Ri1 to So1 + qadd r12, r12, r5 @ + qadd r14, r14, r5 @ + mov r6, r6, asr r1 @ + mov r7, r7, asr r1 @ + mov r12, r12, asr r1 @ + mov r14, r14, asr r1 @ + ssat r6, #16, r6 @ + ssat r12, #16, r12 @ + ssat r7, #16, r7 @ + ssat r14, #16, r14 @ + pkhbt r6, r6, r12, asl #16 @ + pkhbt r7, r7, r14, asl #16 @ + subs r0, r0, #2 @ + stmia r3!, { r6, r7 } @ store So0, So1 + bgt 1b @ + @ + ldmltfd sp!, { r4-r7, pc } @ if count was even, we're done + @ +2: @ + ldr r6, [r2] @ r6 = Li + ldr r12, [r4] @ r12 = Ri + qadd r6, r6, r5 @ round, scale, saturate + qadd r12, r12, r5 @ and pack Li+Ri to So + mov r6, r6, asr r1 @ + mov r12, r12, asr r1 @ + ssat r6, #16, r6 @ + ssat r12, #16, r12 @ + pkhbt r6, r6, r12, asl #16 @ + str r6, [r3] @ store So + @ + ldmfd sp!, { r4-r7, pc } @ + .size sample_output_stereo, .-sample_output_stereo -- 2.11.4.GIT