added fefe's blog reader
[dbstuff.git] / memcpy_mmx.c
blob9abb2d4ed64f2558f4b31fa45afa0e552ed48f7d
1 #include <string.h>
3 /* Ported from the Linux kernel, GPLv2 */
4 /* Experimental! */
6 /* MMX extension from the kernel */
7 #define __ASM_FORM(x) " " #x " "
8 #define __ASM_EX_SEC " .section __ex_table,\"a\"\n"
9 #define __ASM_SEL(a,b) __ASM_FORM(a)
11 #define __ASM_SIZE(inst) __ASM_SEL(inst##l, inst##q)
12 #define __ASM_REG(reg) __ASM_SEL(e##reg, r##reg)
14 #define _ASM_PTR __ASM_SEL(.long, .quad)
15 #define _ASM_ALIGN __ASM_SEL(.balign 4, .balign 8)
17 #define _ASM_MOV __ASM_SIZE(mov)
18 #define _ASM_INC __ASM_SIZE(inc)
19 #define _ASM_DEC __ASM_SIZE(dec)
20 #define _ASM_ADD __ASM_SIZE(add)
21 #define _ASM_SUB __ASM_SIZE(sub)
22 #define _ASM_XADD __ASM_SIZE(xadd)
24 #define _ASM_AX __ASM_REG(ax)
25 #define _ASM_BX __ASM_REG(bx)
26 #define _ASM_CX __ASM_REG(cx)
27 #define _ASM_DX __ASM_REG(dx)
28 #define _ASM_SP __ASM_REG(sp)
29 #define _ASM_BP __ASM_REG(bp)
30 #define _ASM_SI __ASM_REG(si)
31 #define _ASM_DI __ASM_REG(di)
33 /* Exception table entry */
34 # define _ASM_EXTABLE(from,to) \
35 __ASM_EX_SEC \
36 _ASM_ALIGN "\n" \
37 _ASM_PTR #from "," #to "\n" \
38 " .previous\n"
40 static inline void user_fpu_begin(void)
44 static inline void user_fpu_end(void)
46 /* Allow CPU to use floating point */
47 __asm__ __volatile__ (
48 "emms");
51 void *geode_exp_mmx_memcpy(void *dest, const void *src, size_t len)
53 char* from = (char*)src;
54 char* to = (char*)dest;
55 char *p;
56 int i;
58 p = to;
59 i = len >> 6; /* len/64 */
61 user_fpu_begin();
63 __asm__ __volatile__ (
64 "1: prefetch (%0)\n" /* This set is 28 bytes */
65 " prefetch 64(%0)\n"
66 " prefetch 128(%0)\n"
67 " prefetch 192(%0)\n"
68 " prefetch 256(%0)\n"
69 "2: \n"
70 ".section .fixup, \"ax\"\n"
71 "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */
72 " jmp 2b\n"
73 ".previous\n"
74 _ASM_EXTABLE(1b, 3b)
75 : : "r" (from));
77 for ( ; i > 5; i--) {
78 __asm__ __volatile__ (
79 "1: prefetch 320(%0)\n"
80 "2: movq (%0), %%mm0\n"
81 " movq 8(%0), %%mm1\n"
82 " movq 16(%0), %%mm2\n"
83 " movq 24(%0), %%mm3\n"
84 " movq %%mm0, (%1)\n"
85 " movq %%mm1, 8(%1)\n"
86 " movq %%mm2, 16(%1)\n"
87 " movq %%mm3, 24(%1)\n"
88 " movq 32(%0), %%mm0\n"
89 " movq 40(%0), %%mm1\n"
90 " movq 48(%0), %%mm2\n"
91 " movq 56(%0), %%mm3\n"
92 " movq %%mm0, 32(%1)\n"
93 " movq %%mm1, 40(%1)\n"
94 " movq %%mm2, 48(%1)\n"
95 " movq %%mm3, 56(%1)\n"
96 ".section .fixup, \"ax\"\n"
97 "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */
98 " jmp 2b\n"
99 ".previous\n"
100 _ASM_EXTABLE(1b, 3b)
101 : : "r" (from), "r" (to) : "memory");
103 from += 64;
104 to += 64;
107 for ( ; i > 0; i--) {
108 __asm__ __volatile__ (
109 " movq (%0), %%mm0\n"
110 " movq 8(%0), %%mm1\n"
111 " movq 16(%0), %%mm2\n"
112 " movq 24(%0), %%mm3\n"
113 " movq %%mm0, (%1)\n"
114 " movq %%mm1, 8(%1)\n"
115 " movq %%mm2, 16(%1)\n"
116 " movq %%mm3, 24(%1)\n"
117 " movq 32(%0), %%mm0\n"
118 " movq 40(%0), %%mm1\n"
119 " movq 48(%0), %%mm2\n"
120 " movq 56(%0), %%mm3\n"
121 " movq %%mm0, 32(%1)\n"
122 " movq %%mm1, 40(%1)\n"
123 " movq %%mm2, 48(%1)\n"
124 " movq %%mm3, 56(%1)\n"
125 : : "r" (from), "r" (to) : "memory");
127 from += 64;
128 to += 64;
131 * Now do the tail of the block:
133 memcpy(to, from, len & 63);
134 user_fpu_end();
136 return p;