1 /*****************************************************************************
2 * memcpy.c : AltiVec memcpy module
3 *****************************************************************************
4 * Copyright (C) 2001 the VideoLAN team
7 * Author: Christophe Massiot <massiot@via.ecp.fr>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
22 *****************************************************************************/
24 #ifndef __BUILD_ALTIVEC_ASM__
26 /*****************************************************************************
28 *****************************************************************************/
34 #include <vlc_common.h>
35 #include <vlc_plugin.h>
42 /*****************************************************************************
44 *****************************************************************************/
45 static void * fast_memcpy ( void * to
, const void * from
, size_t len
);
47 /*****************************************************************************
49 *****************************************************************************/
50 static int Activate ( vlc_object_t
*p_this
)
52 if( !(vlc_CPU() & CPU_CAPABILITY_ALTIVEC
) )
56 vlc_fastmem_register( fast_memcpy
);
60 /*****************************************************************************
62 *****************************************************************************/
64 set_description( N_("AltiVec memcpy") )
65 set_category( CAT_ADVANCED
)
66 set_subcategory( SUBCAT_ADVANCED_MISC
)
67 set_capability( "memcpy", 100 )
68 set_callbacks( Activate
, NULL
)
69 add_shortcut( "altivec" )
73 typedef unsigned long size_t;
74 #endif /* __BUILD_ALTIVEC_ASM__ */
76 #if defined(CAN_COMPILE_C_ALTIVEC) || defined( __BUILD_ALTIVEC_ASM__ )
78 #define vector_s16_t vector signed short
79 #define vector_u16_t vector unsigned short
80 #define vector_s8_t vector signed char
81 #define vector_u8_t vector unsigned char
82 #define vector_s32_t vector signed int
83 #define vector_u32_t vector unsigned int
86 #define SMALL_MEMCPY(to, from, len) \
88 unsigned char * end = to + len; \
95 static void * fast_memcpy( void * _to
, const void * _from
, size_t len
)
98 unsigned char * to
= (unsigned char *)_to
;
99 unsigned char * from
= (unsigned char *)_from
;
103 /* Align destination to MMREG_SIZE -boundary */
104 register unsigned long int delta
;
106 delta
= ((unsigned long)to
)&(MMREG_SIZE
-1);
109 delta
= MMREG_SIZE
- delta
;
111 SMALL_MEMCPY(to
, from
, delta
);
114 if( len
& ~(MMREG_SIZE
-1) )
116 vector_u8_t perm
, ref0
, ref1
, tmp
;
118 perm
= vec_lvsl( 0, from
);
119 ref0
= vec_ld( 0, from
);
120 ref1
= vec_ld( 15, from
);
123 tmp
= vec_perm( ref0
, ref1
, perm
);
124 while( len
& ~(MMREG_SIZE
-1) )
126 ref0
= vec_ld( 0, from
);
127 ref1
= vec_ld( 15, from
);
130 vec_st( tmp
, 0, to
);
131 tmp
= vec_perm( ref0
, ref1
, perm
);
134 vec_st( tmp
, 0, to
);
141 SMALL_MEMCPY( to
, from
, len
);
149 #if !defined(CAN_COMPILE_C_ALTIVEC) && !defined(__BUILD_ALTIVEC_ASM__)
152 * The asm code is generated with:
154 * gcc-2.95 -fvec -D__BUILD_ALTIVEC_ASM__ -O9 -fomit-frame-pointer -mregnames -S * memcpyaltivec.c
156 * sed 's/.L/._L/g' memcpyaltivec.s |
157 * awk '{args=""; len=split ($2, arg, ",");
158 * for (i=1; i<=len; i++) { a=arg[i]; if (i<len) a=a",";
159 * args = args sprintf ("%-6s", a) }
160 * printf ("\t\"\t%-16s%-24s\\n\"\n", $1, args) }' |
164 static void * fast_memcpy( void * _to
, const void * _from
, size_t len
)
167 " cmplwi %cr0, %r5, 16 \n"
170 " andi. %r0, %r3, 15 \n"
172 " subfic %r0, %r0, 16 \n"
173 " add %r11, %r3, %r0 \n"
174 " cmplw %cr0, %r3, %r11 \n"
175 " subf %r5, %r0, %r5 \n"
178 " lbz %r0, 0(%r4) \n"
179 " stb %r0, 0(%r9) \n"
180 " addi %r9, %r9, 1 \n"
181 " cmplw %cr0, %r9, %r11 \n"
182 " addi %r4, %r4, 1 \n"
185 " rlwinm. %r0, %r5, 0, 0, 27 \n"
187 " addi %r5, %r5, -16 \n"
189 " lvsl %v12, 0, %r4 \n"
190 " lvx %v1, 0, %r4 \n"
191 " lvx %v0, %r11, %r4 \n"
192 " rlwinm. %r0, %r5, 0, 0, 27 \n"
193 " vperm %v13, %v1, %v0, %v12 \n"
194 " addi %r4, %r4, 16 \n"
195 " bc 12, 2, ._L11 \n"
197 " addi %r5, %r5, -16 \n"
199 " lvx %v1, 0, %r4 \n"
200 " lvx %v0, %r11, %r4 \n"
201 " rlwinm. %r0, %r5, 0, 0, 27 \n"
202 " stvx %v13, 0, %r9 \n"
203 " vperm %v13, %v1, %v0, %v12 \n"
204 " addi %r4, %r4, 16 \n"
205 " addi %r9, %r9, 16 \n"
208 " stvx %v13, 0, %r9 \n"
209 " addi %r9, %r9, 16 \n"
211 " cmpwi %cr0, %r5, 0 \n"
213 " add %r5, %r9, %r5 \n"
214 " cmplw %cr0, %r9, %r5 \n"
217 " lbz %r0, 0(%r4) \n"
218 " stb %r0, 0(%r9) \n"
219 " addi %r9, %r9, 1 \n"
220 " cmplw %cr0, %r9, %r5 \n"
221 " addi %r4, %r4, 1 \n"
222 " bc 12, 0, ._L17 \n"