From 14ae175b3ab9d4ae77a156ddb87b6de3974b3f67 Mon Sep 17 00:00:00 2001 From: markun Date: Thu, 5 Apr 2007 09:56:28 +0000 Subject: [PATCH] optimized motion compensation for ARM from the mplayer-w100 project. Elefants Dream plays back 2.3fps faster on the Gigabeat git-svn-id: svn://svn.rockbox.org/rockbox/trunk@13032 a1c6a512-1295-4272-9138-f99709370657 --- apps/plugins/mpegplayer/SOURCES | 4 + apps/plugins/mpegplayer/motion_comp.c | 5 + .../{motion_comp.c => motion_comp_arm.c} | 216 ++++++++------ apps/plugins/mpegplayer/motion_comp_arm_s.S | 322 +++++++++++++++++++++ apps/plugins/mpegplayer/mpeg2_internal.h | 1 + 5 files changed, 466 insertions(+), 82 deletions(-) copy apps/plugins/mpegplayer/{motion_comp.c => motion_comp_arm.c} (55%) create mode 100644 apps/plugins/mpegplayer/motion_comp_arm_s.S diff --git a/apps/plugins/mpegplayer/SOURCES b/apps/plugins/mpegplayer/SOURCES index 464659944..fc23a2ab1 100644 --- a/apps/plugins/mpegplayer/SOURCES +++ b/apps/plugins/mpegplayer/SOURCES @@ -5,6 +5,10 @@ decode.c header.c idct.c motion_comp.c +#ifdef CPU_ARM +motion_comp_arm.c +motion_comp_arm_s.S +#endif slice.c video_out_rockbox.c mpeg_settings.c diff --git a/apps/plugins/mpegplayer/motion_comp.c b/apps/plugins/mpegplayer/motion_comp.c index fbf2ee1eb..b2f30c01f 100644 --- a/apps/plugins/mpegplayer/motion_comp.c +++ b/apps/plugins/mpegplayer/motion_comp.c @@ -58,7 +58,12 @@ void mpeg2_mc_init (uint32_t accel) mpeg2_mc = mpeg2_mc_vis; else #endif + +#ifdef CPU_ARM + mpeg2_mc = mpeg2_mc_arm; +#else mpeg2_mc = mpeg2_mc_c; +#endif } #define avg2(a,b) ((a+b+1)>>1) diff --git a/apps/plugins/mpegplayer/motion_comp.c b/apps/plugins/mpegplayer/motion_comp_arm.c similarity index 55% copy from apps/plugins/mpegplayer/motion_comp.c copy to apps/plugins/mpegplayer/motion_comp_arm.c index fbf2ee1eb..ec9eddab7 100644 --- a/apps/plugins/mpegplayer/motion_comp.c +++ b/apps/plugins/mpegplayer/motion_comp_arm.c @@ -1,7 +1,6 @@ /* - * motion_comp.c - * Copyright (C) 2000-2003 Michel Lespinasse - * Copyright (C) 1999-2000 Aaron Holtzman + * motion_comp_arm.c + * Copyright (C) 2004 AGAWA Koji * * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. * See http://libmpeg2.sourceforge.net/ for updates. @@ -21,46 +20,14 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#include "plugin.h" - #include "mpeg2dec_config.h" +#include + #include "mpeg2.h" #include "attributes.h" #include "mpeg2_internal.h" -mpeg2_mc_t mpeg2_mc; - -void mpeg2_mc_init (uint32_t accel) -{ - (void)accel; -#ifdef ARCH_X86 - if (accel & MPEG2_ACCEL_X86_MMXEXT) - mpeg2_mc = mpeg2_mc_mmxext; - else if (accel & MPEG2_ACCEL_X86_3DNOW) - mpeg2_mc = mpeg2_mc_3dnow; - else if (accel & MPEG2_ACCEL_X86_MMX) - mpeg2_mc = mpeg2_mc_mmx; - else -#endif -#ifdef ARCH_PPC - if (accel & MPEG2_ACCEL_PPC_ALTIVEC) - mpeg2_mc = mpeg2_mc_altivec; - else -#endif -#ifdef ARCH_ALPHA - if (accel & MPEG2_ACCEL_ALPHA) - mpeg2_mc = mpeg2_mc_alpha; - else -#endif -#ifdef ARCH_SPARC - if (accel & MPEG2_ACCEL_SPARC_VIS) - mpeg2_mc = mpeg2_mc_vis; - else -#endif - mpeg2_mc = mpeg2_mc_c; -} - #define avg2(a,b) ((a+b+1)>>1) #define avg4(a,b,c,d) ((a+b+c+d+2)>>2) @@ -75,57 +42,142 @@ void mpeg2_mc_init (uint32_t accel) /* mc function template */ -#define MC_FUNC(op,xy) \ -static void MC_##op##_##xy##_16_c (uint8_t * dest, const uint8_t * ref, \ - const int stride, int height) \ -{ \ - do { \ - op (predict_##xy, 0); \ - op (predict_##xy, 1); \ - op (predict_##xy, 2); \ - op (predict_##xy, 3); \ - op (predict_##xy, 4); \ - op (predict_##xy, 5); \ - op (predict_##xy, 6); \ - op (predict_##xy, 7); \ - op (predict_##xy, 8); \ - op (predict_##xy, 9); \ - op (predict_##xy, 10); \ - op (predict_##xy, 11); \ - op (predict_##xy, 12); \ - op (predict_##xy, 13); \ - op (predict_##xy, 14); \ - op (predict_##xy, 15); \ - ref += stride; \ - dest += stride; \ - } while (--height); \ -} \ -static void MC_##op##_##xy##_8_c (uint8_t * dest, const uint8_t * ref, \ - const int stride, int height) \ -{ \ - do { \ - op (predict_##xy, 0); \ - op (predict_##xy, 1); \ - op (predict_##xy, 2); \ - op (predict_##xy, 3); \ - op (predict_##xy, 4); \ - op (predict_##xy, 5); \ - op (predict_##xy, 6); \ - op (predict_##xy, 7); \ - ref += stride; \ - dest += stride; \ - } while (--height); \ -} - +#define MC_FUNC(op,xy) \ +inline static void MC_##op##_##xy##_16_c (uint8_t * dest, const uint8_t * ref, \ + const int stride, int height) \ +{ \ + do { \ + op (predict_##xy, 0); \ + op (predict_##xy, 1); \ + op (predict_##xy, 2); \ + op (predict_##xy, 3); \ + op (predict_##xy, 4); \ + op (predict_##xy, 5); \ + op (predict_##xy, 6); \ + op (predict_##xy, 7); \ + op (predict_##xy, 8); \ + op (predict_##xy, 9); \ + op (predict_##xy, 10); \ + op (predict_##xy, 11); \ + op (predict_##xy, 12); \ + op (predict_##xy, 13); \ + op (predict_##xy, 14); \ + op (predict_##xy, 15); \ + ref += stride; \ + dest += stride; \ + } while (--height); \ +} \ +static void MC_##op##_##xy##_8_c (uint8_t * dest, const uint8_t * ref, \ + const int stride, int height) \ +{ \ + do { \ + op (predict_##xy, 0); \ + op (predict_##xy, 1); \ + op (predict_##xy, 2); \ + op (predict_##xy, 3); \ + op (predict_##xy, 4); \ + op (predict_##xy, 5); \ + op (predict_##xy, 6); \ + op (predict_##xy, 7); \ + ref += stride; \ + dest += stride; \ + } while (--height); \ +} \ /* definitions of the actual mc functions */ -MC_FUNC (put,o) +/* MC_FUNC (put,o) */ MC_FUNC (avg,o) -MC_FUNC (put,x) +/* MC_FUNC (put,x) */ MC_FUNC (avg,x) MC_FUNC (put,y) MC_FUNC (avg,y) MC_FUNC (put,xy) MC_FUNC (avg,xy) -MPEG2_MC_EXTERN (c) + +extern void MC_put_o_16_arm (uint8_t * dest, const uint8_t * ref, + int stride, int height); + +extern void MC_put_x_16_arm (uint8_t * dest, const uint8_t * ref, + int stride, int height); + + +static void MC_put_y_16_arm (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put_y_16_c(dest, ref, stride, height); +} + +static void MC_put_xy_16_arm (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put_xy_16_c(dest, ref, stride, height); +} + +extern void MC_put_o_8_arm (uint8_t * dest, const uint8_t * ref, + int stride, int height); + +extern void MC_put_x_8_arm (uint8_t * dest, const uint8_t * ref, + int stride, int height); + +static void MC_put_y_8_arm (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put_y_8_c(dest, ref, stride, height); +} + +static void MC_put_xy_8_arm (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put_xy_8_c(dest, ref, stride, height); +} + +static void MC_avg_o_16_arm (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg_o_16_c(dest, ref, stride, height); +} + +static void MC_avg_x_16_arm (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg_x_16_c(dest, ref, stride, height); +} + +static void MC_avg_y_16_arm (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg_y_16_c(dest, ref, stride, height); +} + +static void MC_avg_xy_16_arm (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg_xy_16_c(dest, ref, stride, height); +} + +static void MC_avg_o_8_arm (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg_o_8_c(dest, ref, stride, height); +} + +static void MC_avg_x_8_arm (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg_x_8_c(dest, ref, stride, height); +} + +static void MC_avg_y_8_arm (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg_y_8_c(dest, ref, stride, height); +} + +static void MC_avg_xy_8_arm (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg_xy_8_c(dest, ref, stride, height); +} + +MPEG2_MC_EXTERN (arm) diff --git a/apps/plugins/mpegplayer/motion_comp_arm_s.S b/apps/plugins/mpegplayer/motion_comp_arm_s.S new file mode 100644 index 000000000..82be8e6a8 --- /dev/null +++ b/apps/plugins/mpegplayer/motion_comp_arm_s.S @@ -0,0 +1,322 @@ +@ motion_comp_arm_s.S +@ Copyright (C) 2004 AGAWA Koji +@ +@ This file is part of mpeg2dec, a free MPEG-2 video stream decoder. +@ See http://libmpeg2.sourceforge.net/ for updates. +@ +@ mpeg2dec is free software; you can redistribute it and/or modify +@ it under the terms of the GNU General Public License as published by +@ the Free Software Foundation; either version 2 of the License, or +@ (at your option) any later version. +@ +@ mpeg2dec is distributed in the hope that it will be useful, +@ but WITHOUT ANY WARRANTY; without even the implied warranty of +@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +@ GNU General Public License for more details. +@ +@ You should have received a copy of the GNU General Public License +@ along with this program; if not, write to the Free Software +@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + .text + +@ ---------------------------------------------------------------- + .align + .global MC_put_o_16_arm +MC_put_o_16_arm: + @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height) + @@ pld [r1] + stmfd sp!, {r4-r11, lr} @ R14 is also called LR + and r4, r1, #3 + adr r5, MC_put_o_16_arm_align_jt + add r5, r5, r4, lsl #2 + ldr pc, [r5] + +MC_put_o_16_arm_align0: + ldmia r1, {r4-r7} + add r1, r1, r2 + @@ pld [r1] + stmia r0, {r4-r7} + subs r3, r3, #1 + add r0, r0, r2 + bne MC_put_o_16_arm_align0 + ldmfd sp!, {r4-r11, pc} @@ update PC with LR content. + +.macro PROC shift + ldmia r1, {r4-r8} + add r1, r1, r2 + mov r9, r4, lsr #(\shift) + @@ pld [r1] + mov r10, r5, lsr #(\shift) + orr r9, r9, r5, lsl #(32-\shift) + mov r11, r6, lsr #(\shift) + orr r10, r10, r6, lsl #(32-\shift) + mov r12, r7, lsr #(\shift) + orr r11, r11, r7, lsl #(32-\shift) + orr r12, r12, r8, lsl #(32-\shift) + stmia r0, {r9-r12} + subs r3, r3, #1 + add r0, r0, r2 +.endm + +MC_put_o_16_arm_align1: + and r1, r1, #0xFFFFFFFC +1: PROC(8) + bne 1b + ldmfd sp!, {r4-r11, pc} @@ update PC with LR content. +MC_put_o_16_arm_align2: + and r1, r1, #0xFFFFFFFC +1: PROC(16) + bne 1b + ldmfd sp!, {r4-r11, pc} @@ update PC with LR content. +MC_put_o_16_arm_align3: + and r1, r1, #0xFFFFFFFC +1: PROC(24) + bne 1b + ldmfd sp!, {r4-r11, pc} @@ update PC with LR content. +MC_put_o_16_arm_align_jt: + .word MC_put_o_16_arm_align0 + .word MC_put_o_16_arm_align1 + .word MC_put_o_16_arm_align2 + .word MC_put_o_16_arm_align3 + +@ ---------------------------------------------------------------- + .align + .global MC_put_o_8_arm +MC_put_o_8_arm: + @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height) + @@ pld [r1] + stmfd sp!, {r4-r10, lr} @ R14 is also called LR + and r4, r1, #3 + adr r5, MC_put_o_8_arm_align_jt + add r5, r5, r4, lsl #2 + ldr pc, [r5] +MC_put_o_8_arm_align0: + ldmia r1, {r4-r5} + add r1, r1, r2 + @@ pld [r1] + stmia r0, {r4-r5} + add r0, r0, r2 + subs r3, r3, #1 + bne MC_put_o_8_arm_align0 + ldmfd sp!, {r4-r10, pc} @@ update PC with LR content. + +.macro PROC8 shift + ldmia r1, {r4-r6} + add r1, r1, r2 + mov r9, r4, lsr #(\shift) + @@ pld [r1] + mov r10, r5, lsr #(\shift) + orr r9, r9, r5, lsl #(32-\shift) + orr r10, r10, r6, lsl #(32-\shift) + stmia r0, {r9-r10} + subs r3, r3, #1 + add r0, r0, r2 +.endm + +MC_put_o_8_arm_align1: + and r1, r1, #0xFFFFFFFC +1: PROC8(8) + bne 1b + ldmfd sp!, {r4-r10, pc} @@ update PC with LR content. + +MC_put_o_8_arm_align2: + and r1, r1, #0xFFFFFFFC +1: PROC8(16) + bne 1b + ldmfd sp!, {r4-r10, pc} @@ update PC with LR content. + +MC_put_o_8_arm_align3: + and r1, r1, #0xFFFFFFFC +1: PROC8(24) + bne 1b + ldmfd sp!, {r4-r10, pc} @@ update PC with LR content. + +MC_put_o_8_arm_align_jt: + .word MC_put_o_8_arm_align0 + .word MC_put_o_8_arm_align1 + .word MC_put_o_8_arm_align2 + .word MC_put_o_8_arm_align3 + +@ ---------------------------------------------------------------- +.macro AVG_PW rW1, rW2 + mov \rW2, \rW2, lsl #24 + orr \rW2, \rW2, \rW1, lsr #8 + eor r9, \rW1, \rW2 + and \rW2, \rW1, \rW2 + and r10, r9, r12 + add \rW2, \rW2, r10, lsr #1 + and r10, r9, r11 + add \rW2, \rW2, r10 +.endm + + .align + .global MC_put_x_16_arm +MC_put_x_16_arm: + @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height) + @@ pld [r1] + stmfd sp!, {r4-r11,lr} @ R14 is also called LR + and r4, r1, #3 + adr r5, MC_put_x_16_arm_align_jt + ldr r11, [r5] + mvn r12, r11 + add r5, r5, r4, lsl #2 + ldr pc, [r5, #4] + +.macro ADJ_ALIGN_QW shift, R0, R1, R2, R3, R4 + mov \R0, \R0, lsr #(\shift) + orr \R0, \R0, \R1, lsl #(32 - \shift) + mov \R1, \R1, lsr #(\shift) + orr \R1, \R1, \R2, lsl #(32 - \shift) + mov \R2, \R2, lsr #(\shift) + orr \R2, \R2, \R3, lsl #(32 - \shift) + mov \R3, \R3, lsr #(\shift) + orr \R3, \R3, \R4, lsl #(32 - \shift) + mov \R4, \R4, lsr #(\shift) +@ and \R4, \R4, #0xFF +.endm + +MC_put_x_16_arm_align0: + ldmia r1, {r4-r8} + add r1, r1, r2 + @@ pld [r1] + AVG_PW r7, r8 + AVG_PW r6, r7 + AVG_PW r5, r6 + AVG_PW r4, r5 + stmia r0, {r5-r8} + subs r3, r3, #1 + add r0, r0, r2 + bne MC_put_x_16_arm_align0 + ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. +MC_put_x_16_arm_align1: + and r1, r1, #0xFFFFFFFC +1: ldmia r1, {r4-r8} + add r1, r1, r2 + @@ pld [r1] + ADJ_ALIGN_QW 8, r4, r5, r6, r7, r8 + AVG_PW r7, r8 + AVG_PW r6, r7 + AVG_PW r5, r6 + AVG_PW r4, r5 + stmia r0, {r5-r8} + subs r3, r3, #1 + add r0, r0, r2 + bne 1b + ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. +MC_put_x_16_arm_align2: + and r1, r1, #0xFFFFFFFC +1: ldmia r1, {r4-r8} + add r1, r1, r2 + @@ pld [r1] + ADJ_ALIGN_QW 16, r4, r5, r6, r7, r8 + AVG_PW r7, r8 + AVG_PW r6, r7 + AVG_PW r5, r6 + AVG_PW r4, r5 + stmia r0, {r5-r8} + subs r3, r3, #1 + add r0, r0, r2 + bne 1b + ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. +MC_put_x_16_arm_align3: + and r1, r1, #0xFFFFFFFC +1: ldmia r1, {r4-r8} + add r1, r1, r2 + @@ pld [r1] + ADJ_ALIGN_QW 24, r4, r5, r6, r7, r8 + AVG_PW r7, r8 + AVG_PW r6, r7 + AVG_PW r5, r6 + AVG_PW r4, r5 + stmia r0, {r5-r8} + subs r3, r3, #1 + add r0, r0, r2 + bne 1b + ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. +MC_put_x_16_arm_align_jt: + .word 0x01010101 + .word MC_put_x_16_arm_align0 + .word MC_put_x_16_arm_align1 + .word MC_put_x_16_arm_align2 + .word MC_put_x_16_arm_align3 + +@ ---------------------------------------------------------------- + .align + .global MC_put_x_8_arm +MC_put_x_8_arm: + @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height) + @@ pld [r1] + stmfd sp!, {r4-r11,lr} @ R14 is also called LR + and r4, r1, #3 + adr r5, MC_put_x_8_arm_align_jt + ldr r11, [r5] + mvn r12, r11 + add r5, r5, r4, lsl #2 + ldr pc, [r5, #4] + +.macro ADJ_ALIGN_DW shift, R0, R1, R2 + mov \R0, \R0, lsr #(\shift) + orr \R0, \R0, \R1, lsl #(32 - \shift) + mov \R1, \R1, lsr #(\shift) + orr \R1, \R1, \R2, lsl #(32 - \shift) + mov \R2, \R2, lsr #(\shift) +@ and \R4, \R4, #0xFF +.endm + +MC_put_x_8_arm_align0: + ldmia r1, {r4-r6} + add r1, r1, r2 + @@ pld [r1] + AVG_PW r5, r6 + AVG_PW r4, r5 + stmia r0, {r5-r6} + subs r3, r3, #1 + add r0, r0, r2 + bne MC_put_x_8_arm_align0 + ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. +MC_put_x_8_arm_align1: + and r1, r1, #0xFFFFFFFC +1: ldmia r1, {r4-r6} + add r1, r1, r2 + @@ pld [r1] + ADJ_ALIGN_DW 8, r4, r5, r6 + AVG_PW r5, r6 + AVG_PW r4, r5 + stmia r0, {r5-r6} + subs r3, r3, #1 + add r0, r0, r2 + bne 1b + ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. +MC_put_x_8_arm_align2: + and r1, r1, #0xFFFFFFFC +1: ldmia r1, {r4-r6} + add r1, r1, r2 + @@ pld [r1] + ADJ_ALIGN_DW 16, r4, r5, r6 + AVG_PW r5, r6 + AVG_PW r4, r5 + stmia r0, {r5-r6} + subs r3, r3, #1 + add r0, r0, r2 + bne 1b + ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. +MC_put_x_8_arm_align3: + and r1, r1, #0xFFFFFFFC +1: ldmia r1, {r4-r6} + add r1, r1, r2 + @@ pld [r1] + ADJ_ALIGN_DW 24, r4, r5, r6 + AVG_PW r5, r6 + AVG_PW r4, r5 + stmia r0, {r5-r6} + subs r3, r3, #1 + add r0, r0, r2 + bne 1b + ldmfd sp!, {r4-r11,pc} @@ update PC with LR content. +MC_put_x_8_arm_align_jt: + .word 0x01010101 + .word MC_put_x_8_arm_align0 + .word MC_put_x_8_arm_align1 + .word MC_put_x_8_arm_align2 + .word MC_put_x_8_arm_align3 diff --git a/apps/plugins/mpegplayer/mpeg2_internal.h b/apps/plugins/mpegplayer/mpeg2_internal.h index 850456b1f..443b6d611 100644 --- a/apps/plugins/mpegplayer/mpeg2_internal.h +++ b/apps/plugins/mpegplayer/mpeg2_internal.h @@ -298,3 +298,4 @@ extern mpeg2_mc_t mpeg2_mc_3dnow; extern mpeg2_mc_t mpeg2_mc_altivec; extern mpeg2_mc_t mpeg2_mc_alpha; extern mpeg2_mc_t mpeg2_mc_vis; +extern mpeg2_mc_t mpeg2_mc_arm; -- 2.11.4.GIT