Contrib: upnp/win32, remove strerror use, and other small hacks...
[vlc/asuraparaju-public.git] / modules / altivec / memcpy.c
blob2d91ea226591068cef924f38f9d335f534f893b0
1 /*****************************************************************************
2 * memcpy.c : AltiVec memcpy module
3 *****************************************************************************
4 * Copyright (C) 2001 the VideoLAN team
5 * $Id$
7 * Author: Christophe Massiot <massiot@via.ecp.fr>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
22 *****************************************************************************/
24 #ifndef __BUILD_ALTIVEC_ASM__
26 /*****************************************************************************
27 * Preamble
28 *****************************************************************************/
30 #ifdef HAVE_CONFIG_H
31 # include "config.h"
32 #endif
34 #include <vlc_common.h>
35 #include <vlc_plugin.h>
36 #include <vlc_cpu.h>
38 #ifdef HAVE_ALTIVEC_H
39 # include <altivec.h>
40 #endif
42 /*****************************************************************************
43 * Local prototypes.
44 *****************************************************************************/
45 static void * fast_memcpy ( void * to, const void * from, size_t len );
47 /*****************************************************************************
48 * Module initializer.
49 *****************************************************************************/
50 static int Activate ( vlc_object_t *p_this )
52 VLC_UNUSED(p_this);
53 vlc_fastmem_register( fast_memcpy, NULL );
54 return VLC_SUCCESS;
57 /*****************************************************************************
58 * Module descriptor.
59 *****************************************************************************/
60 vlc_module_begin ()
61 set_description( N_("AltiVec memcpy") )
62 set_category( CAT_ADVANCED )
63 set_subcategory( SUBCAT_ADVANCED_MISC )
64 set_capability( "memcpy", 100 )
65 set_callbacks( Activate, NULL )
66 add_shortcut( "altivec" )
67 vlc_module_end ()
69 #else
70 typedef unsigned long size_t;
71 #endif /* __BUILD_ALTIVEC_ASM__ */
73 #if defined(CAN_COMPILE_C_ALTIVEC) || defined( __BUILD_ALTIVEC_ASM__ )
75 #define vector_s16_t vector signed short
76 #define vector_u16_t vector unsigned short
77 #define vector_s8_t vector signed char
78 #define vector_u8_t vector unsigned char
79 #define vector_s32_t vector signed int
80 #define vector_u32_t vector unsigned int
81 #define MMREG_SIZE 16
83 #define SMALL_MEMCPY(to, from, len) \
84 { \
85 unsigned char * end = to + len; \
86 while( to < end ) \
87 { \
88 *to++ = *from++; \
89 } \
92 static void * fast_memcpy( void * _to, const void * _from, size_t len )
94 void * retval = _to;
95 unsigned char * to = (unsigned char *)_to;
96 unsigned char * from = (unsigned char *)_from;
98 if( len > 16 )
100 /* Align destination to MMREG_SIZE -boundary */
101 register unsigned long int delta;
103 delta = ((unsigned long)to)&(MMREG_SIZE-1);
104 if( delta )
106 delta = MMREG_SIZE - delta;
107 len -= delta;
108 SMALL_MEMCPY(to, from, delta);
111 if( len & ~(MMREG_SIZE-1) )
113 vector_u8_t perm, ref0, ref1, tmp;
115 perm = vec_lvsl( 0, from );
116 ref0 = vec_ld( 0, from );
117 ref1 = vec_ld( 15, from );
118 from += 16;
119 len -= 16;
120 tmp = vec_perm( ref0, ref1, perm );
121 while( len & ~(MMREG_SIZE-1) )
123 ref0 = vec_ld( 0, from );
124 ref1 = vec_ld( 15, from );
125 from += 16;
126 len -= 16;
127 vec_st( tmp, 0, to );
128 tmp = vec_perm( ref0, ref1, perm );
129 to += 16;
131 vec_st( tmp, 0, to );
132 to += 16;
136 if( len )
138 SMALL_MEMCPY( to, from, len );
141 return retval;
144 #endif
146 #if !defined(CAN_COMPILE_C_ALTIVEC) && !defined(__BUILD_ALTIVEC_ASM__)
149 * The asm code is generated with:
151 * gcc-2.95 -fvec -D__BUILD_ALTIVEC_ASM__ -O9 -fomit-frame-pointer -mregnames -S * memcpyaltivec.c
153 * sed 's/.L/._L/g' memcpyaltivec.s |
154 * awk '{args=""; len=split ($2, arg, ",");
155 * for (i=1; i<=len; i++) { a=arg[i]; if (i<len) a=a",";
156 * args = args sprintf ("%-6s", a) }
157 * printf ("\t\"\t%-16s%-24s\\n\"\n", $1, args) }' |
158 * unexpand -a
161 static void * fast_memcpy( void * _to, const void * _from, size_t len )
163 asm (" \n"
164 " cmplwi %cr0, %r5, 16 \n"
165 " mr %r9, %r3 \n"
166 " bc 4, 1, ._L3 \n"
167 " andi. %r0, %r3, 15 \n"
168 " bc 12, 2, ._L4 \n"
169 " subfic %r0, %r0, 16 \n"
170 " add %r11, %r3, %r0 \n"
171 " cmplw %cr0, %r3, %r11 \n"
172 " subf %r5, %r0, %r5 \n"
173 " bc 4, 0, ._L4 \n"
174 " ._L7: \n"
175 " lbz %r0, 0(%r4) \n"
176 " stb %r0, 0(%r9) \n"
177 " addi %r9, %r9, 1 \n"
178 " cmplw %cr0, %r9, %r11 \n"
179 " addi %r4, %r4, 1 \n"
180 " bc 12, 0, ._L7 \n"
181 " ._L4: \n"
182 " rlwinm. %r0, %r5, 0, 0, 27 \n"
183 " bc 12, 2, ._L3 \n"
184 " addi %r5, %r5, -16 \n"
185 " li %r11, 15 \n"
186 " lvsl %v12, 0, %r4 \n"
187 " lvx %v1, 0, %r4 \n"
188 " lvx %v0, %r11, %r4 \n"
189 " rlwinm. %r0, %r5, 0, 0, 27 \n"
190 " vperm %v13, %v1, %v0, %v12 \n"
191 " addi %r4, %r4, 16 \n"
192 " bc 12, 2, ._L11 \n"
193 " ._L12: \n"
194 " addi %r5, %r5, -16 \n"
195 " li %r11, 15 \n"
196 " lvx %v1, 0, %r4 \n"
197 " lvx %v0, %r11, %r4 \n"
198 " rlwinm. %r0, %r5, 0, 0, 27 \n"
199 " stvx %v13, 0, %r9 \n"
200 " vperm %v13, %v1, %v0, %v12 \n"
201 " addi %r4, %r4, 16 \n"
202 " addi %r9, %r9, 16 \n"
203 " bc 4, 2, ._L12 \n"
204 " ._L11: \n"
205 " stvx %v13, 0, %r9 \n"
206 " addi %r9, %r9, 16 \n"
207 " ._L3: \n"
208 " cmpwi %cr0, %r5, 0 \n"
209 " bclr 12, 2 \n"
210 " add %r5, %r9, %r5 \n"
211 " cmplw %cr0, %r9, %r5 \n"
212 " bclr 4, 0 \n"
213 " ._L17: \n"
214 " lbz %r0, 0(%r4) \n"
215 " stb %r0, 0(%r9) \n"
216 " addi %r9, %r9, 1 \n"
217 " cmplw %cr0, %r9, %r5 \n"
218 " addi %r4, %r4, 1 \n"
219 " bc 12, 0, ._L17 \n"
223 #endif