2 * netsniff-ng - the packet sniffing beast
3 * By Daniel Borkmann <daniel@netsniff-ng.org>
4 * Copyright 2011 Daniel Borkmann.
5 * Subject to the GPL, version 2.
16 #include "opt_memcpy.h"
18 #define CPU_FLAG_NONE 0
19 #define CPU_FLAG_MMX 1
20 #define CPU_FLAG_MMX2 2
21 #define CPU_FLAG_SSE 3
22 #define CPU_FLAG_SSE2 4
24 static volatile sig_atomic_t checked
= 0;
25 void *(*____memcpy
)(void *__restrict__ dest
, const void *__restrict__ src
,
26 size_t n
) = ___memcpy
;
35 static int check_cpu_flags(void)
37 struct cpuid_regs regs
;
39 #define CPUID ".byte 0x0f, 0xa2; "
40 asm (CPUID
: "=a" (regs
.eax
),
43 "=d" (regs
.edx
) : "0" (1));
45 /* Note: priority ordered */
47 if (regs
.edx
& (1 << 26)) {
48 info("SSE2 on CPU!\n");
50 } else if (regs
.edx
& (1 << 25)) {
51 /* SSE, same as extended MMX, we prefer SSE */
52 info("SSE on CPU!\n");
54 } else if (regs
.edx
& (1 << 25)) {
56 info("MMX2 on CPU!\n");
58 } else if (regs
.edx
& (1 << 23)) {
59 info("MMX on CPU!\n");
62 info("nothing on CPU! :-P\n");
67 #define SSE_MMREG_SIZE 16
68 #define MMX_MMREG_SIZE 8
70 void *__sse_memcpy(void *__restrict__ dest
,
71 const void *__restrict__ src
, size_t n
)
74 const uint8_t *from
= src
;
75 void *const save
= to
;
77 __asm__
__volatile__ ("prefetchnta (%0)\n"
78 "prefetchnta 32(%0)\n"
79 "prefetchnta 64(%0)\n"
80 "prefetchnta 96(%0)\n"
81 "prefetchnta 128(%0)\n"
82 "prefetchnta 160(%0)\n"
83 "prefetchnta 192(%0)\n"
84 "prefetchnta 224(%0)\n"
85 "prefetchnta 256(%0)\n"
86 "prefetchnta 288(%0)\n"
92 register unsigned long delta
;
94 delta
= ((unsigned long) to
) & (SSE_MMREG_SIZE
- 1);
96 delta
= SSE_MMREG_SIZE
- delta
;
98 small_memcpy(to
, from
, delta
);
103 for (i
= 0; i
< j
; i
++) {
104 __asm__
__volatile__ ("prefetchnta 320(%0)\n"
105 "prefetchnta 352(%0)\n"
106 "movups (%0), %%xmm0\n"
107 "movups 16(%0), %%xmm1\n"
108 "movups 32(%0), %%xmm2\n"
109 "movups 48(%0), %%xmm3\n"
110 "movntps %%xmm0, (%1)\n"
111 "movntps %%xmm1, 16(%1)\n"
112 "movntps %%xmm2, 32(%1)\n"
113 "movntps %%xmm3, 48(%1)\n"
114 :: "r" (from
), "r" (to
)
120 __asm__
__volatile__ ("sfence":::"memory");
123 ___memcpy(to
, from
, n
);
127 void *__sse2_memcpy(void *__restrict__ dest
, const void *__restrict__ src
,
131 const uint8_t *from
= src
;
132 void *const save
= to
;
134 __asm__
__volatile__ ("prefetchnta (%0)\n"
135 "prefetchnta 64(%0)\n"
136 "prefetchnta 128(%0)\n"
137 "prefetchnta 192(%0)\n"
138 "prefetchnta 256(%0)\n"
144 register unsigned long delta
;
146 delta
= ((unsigned long) to
) & (SSE_MMREG_SIZE
- 1);
148 delta
= SSE_MMREG_SIZE
- delta
;
150 small_memcpy(to
, from
, delta
);
155 for (i
= 0; i
< j
; i
++) {
156 __asm__
__volatile__ ("prefetchnta 320(%0)\n"
157 "movups (%0), %%xmm0\n"
158 "movups 16(%0), %%xmm1\n"
159 "movups 32(%0), %%xmm2\n"
160 "movups 48(%0), %%xmm3\n"
161 "movntps %%xmm0, (%1)\n"
162 "movntps %%xmm1, 16(%1)\n"
163 "movntps %%xmm2, 32(%1)\n"
164 "movntps %%xmm3, 48(%1)\n"
165 :: "r" (from
), "r" (to
)
171 __asm__
__volatile__ ("sfence":::"memory");
175 ___memcpy(to
, from
, n
);
179 void *__mmx_memcpy(void *__restrict__ dest
, const void *__restrict__ src
,
183 const uint8_t *from
= src
;
184 void *const save
= to
;
188 __asm__
__volatile__ ("prefetchnta (%0)\n"
189 "prefetchnta 32(%0)\n"
190 "prefetchnta 64(%0)\n"
191 "prefetchnta 96(%0)\n"
192 "prefetchnta 128(%0)\n"
193 "prefetchnta 160(%0)\n"
194 "prefetchnta 192(%0)\n"
195 "prefetchnta 224(%0)\n"
196 "prefetchnta 256(%0)\n"
197 "prefetchnta 288(%0)\n"
201 for (i
= 0; i
< j
; i
++) {
202 __asm__
__volatile__ ("prefetchnta 320(%0)\n"
203 "prefetchnta 352(%0)\n"
205 "movq 8(%0), %%mm1\n"
206 "movq 16(%0), %%mm2\n"
207 "movq 24(%0), %%mm3\n"
208 "movq 32(%0), %%mm4\n"
209 "movq 40(%0), %%mm5\n"
210 "movq 48(%0), %%mm6\n"
211 "movq 56(%0), %%mm7\n"
213 "movq %%mm1, 8(%1)\n"
214 "movq %%mm2, 16(%1)\n"
215 "movq %%mm3, 24(%1)\n"
216 "movq %%mm4, 32(%1)\n"
217 "movq %%mm5, 40(%1)\n"
218 "movq %%mm6, 48(%1)\n"
219 "movq %%mm7, 56(%1)\n"
220 :: "r" (from
), "r" (to
)
226 __asm__
__volatile__ ("sfence":::"memory");
227 __asm__
__volatile__ ("emms":::"memory");
230 ___memcpy(to
, from
, n
);
234 void *__mmx2_memcpy(void *__restrict__ dest
, const void *__restrict__ src
,
238 const uint8_t *from
= src
;
239 void *const save
= to
;
243 __asm__
__volatile__ ("prefetchnta (%0)\n"
244 "prefetchnta 64(%0)\n"
245 "prefetchnta 128(%0)\n"
246 "prefetchnta 192(%0)\n"
247 "prefetchnta 256(%0)\n"::"r" (from
));
250 for (i
= 0; i
< j
; i
++) {
251 __asm__
__volatile__ ("prefetchnta 320(%0)\n"
253 "movq 8(%0), %%mm1\n"
254 "movq 16(%0), %%mm2\n"
255 "movq 24(%0), %%mm3\n"
256 "movq 32(%0), %%mm4\n"
257 "movq 40(%0), %%mm5\n"
258 "movq 48(%0), %%mm6\n"
259 "movq 56(%0), %%mm7\n"
261 "movq %%mm1, 8(%1)\n"
262 "movq %%mm2, 16(%1)\n"
263 "movq %%mm3, 24(%1)\n"
264 "movq %%mm4, 32(%1)\n"
265 "movq %%mm5, 40(%1)\n"
266 "movq %%mm6, 48(%1)\n"
267 "movq %%mm7, 56(%1)\n"
268 :: "r" (from
), "r" (to
)
274 __asm__
__volatile__ ("sfence":::"memory");
275 __asm__
__volatile__ ("emms":::"memory");
278 ___memcpy(to
, from
, n
);
282 /* Will be extended in future! */
283 void set_memcpy(void)
289 cpu_flag
= check_cpu_flags();
290 if (cpu_flag
== CPU_FLAG_SSE2
)
291 ____memcpy
= __sse2_memcpy
;
292 else if (cpu_flag
== CPU_FLAG_SSE
)
293 ____memcpy
= __sse_memcpy
;
294 else if (cpu_flag
== CPU_FLAG_MMX2
)
295 ____memcpy
= __mmx2_memcpy
;
296 else if (cpu_flag
== CPU_FLAG_MMX
)
297 ____memcpy
= __mmx_memcpy
;
299 ____memcpy
= ___memcpy
;
302 #endif /* ARCH_X86 */