2 #include "cmimemcpy_qpx.h"
4 #define QPX_LOAD(si,sb,fp) \
6 asm volatile("qvlfdx %0,%1,%2": "=f"(fp) : "b" (si), "r" (sb)); \
9 #define QPX_STORE(si,sb,fp) \
11 asm volatile("qvstfdx %2,%0,%1": : "b" (si), "r" (sb), "f"(fp) :"memory"); \
15 #define FP_REG(i) asm("f"#i)
16 #define FP_REG1(i) "fr"#i
18 #define FP_REG(i) asm("fr"#i)
19 #define FP_REG1(i) "fr"#i
22 //Copy 512 bytes from a 32b aligned pointers
23 static inline size_t quad_copy_512( char* dest
, const char* src
) {
24 register const double *fpp1_1
, *fpp1_2
;
25 register double *fpp2_1
, *fpp2_2
;
27 register double f0
FP_REG(0);
28 register double f1
FP_REG(1);
29 register double f2
FP_REG(2);
30 register double f3
FP_REG(3);
31 register double f4
FP_REG(4);
32 register double f5
FP_REG(5);
33 register double f6
FP_REG(6);
34 register double f7
FP_REG(7);
53 fpp1_1
= (const double *)src
;
54 fpp1_2
= (const double *)src
+4;
56 fpp2_1
= (double *)dest
;
57 fpp2_2
= (double *)dest
+4;
59 QPX_LOAD(fpp1_1
,r0
,f0
);
60 //asm volatile("qvlfdx 0,%0,%1": : "Ob" (fpp1_1), "r"(r0) :"memory");
61 QPX_LOAD(fpp1_1
,r1
,f1
);
62 QPX_LOAD(fpp1_1
,r2
,f2
);
63 QPX_LOAD(fpp1_1
,r3
,f3
);
64 QPX_LOAD(fpp1_1
,r4
,f4
);
65 QPX_LOAD(fpp1_1
,r5
,f5
);
66 QPX_LOAD(fpp1_1
,r6
,f6
);
67 QPX_LOAD(fpp1_1
,r7
,f7
);
69 QPX_STORE(fpp2_1
,r0
,f0
);
70 QPX_LOAD(fpp1_2
,r0
,f0
);
71 QPX_STORE(fpp2_1
,r1
,f1
);
72 QPX_LOAD(fpp1_2
,r1
,f1
);
73 QPX_STORE(fpp2_1
,r2
,f2
);
74 QPX_LOAD(fpp1_2
,r2
,f2
);
75 QPX_STORE(fpp2_1
,r3
,f3
);
76 QPX_LOAD(fpp1_2
,r3
,f3
);
77 QPX_STORE(fpp2_1
,r4
,f4
);
78 QPX_LOAD(fpp1_2
,r4
,f4
);
79 QPX_STORE(fpp2_1
,r5
,f5
);
80 QPX_LOAD(fpp1_2
,r5
,f5
);
81 QPX_STORE(fpp2_1
,r6
,f6
);
82 QPX_LOAD(fpp1_2
,r6
,f6
);
83 QPX_STORE(fpp2_1
,r7
,f7
);
84 QPX_LOAD(fpp1_2
,r7
,f7
);
86 QPX_STORE(fpp2_2
,r0
,f0
);
87 QPX_STORE(fpp2_2
,r1
,f1
);
88 QPX_STORE(fpp2_2
,r2
,f2
);
89 QPX_STORE(fpp2_2
,r3
,f3
);
90 QPX_STORE(fpp2_2
,r4
,f4
);
91 QPX_STORE(fpp2_2
,r5
,f5
);
92 QPX_STORE(fpp2_2
,r6
,f6
);
93 QPX_STORE(fpp2_2
,r7
,f7
);
98 void CmiMemcpy_qpx (void *dst
, const void *src
, size_t n
)
109 if ( (n
& 511UL) != 0 )
110 memcpy (d
, s
, n
& 511UL);