ring: built_in: fix if cacheline_aligned is not defined
[netsniff-ng.git] / src / opt_memcpy.c
bloba561b58bd5c9917e4ac2a64d3cb24d2904ba8760
1 /*
2 * netsniff-ng - the packet sniffing beast
3 * By Daniel Borkmann <daniel@netsniff-ng.org>
4 * Copyright 2011 Daniel Borkmann.
5 * Subject to the GPL, version 2.
6 */
8 #ifdef ARCH_X86
9 #include <string.h>
10 #include <signal.h>
11 #include <stdint.h>
12 #include <stdio.h>
14 #include "die.h"
15 #include "built_in.h"
16 #include "opt_memcpy.h"
18 #define CPU_FLAG_NONE 0
19 #define CPU_FLAG_MMX 1
20 #define CPU_FLAG_MMX2 2
21 #define CPU_FLAG_SSE 3
22 #define CPU_FLAG_SSE2 4
24 static volatile sig_atomic_t checked = 0;
25 void *(*____memcpy)(void *__restrict__ dest, const void *__restrict__ src,
26 size_t n) = ___memcpy;
28 struct cpuid_regs {
29 unsigned int eax;
30 unsigned int ebx;
31 unsigned int ecx;
32 unsigned int edx;
35 static int check_cpu_flags(void)
37 struct cpuid_regs regs;
39 #define CPUID ".byte 0x0f, 0xa2; "
40 asm (CPUID : "=a" (regs.eax),
41 "=b" (regs.ebx),
42 "=c" (regs.ecx),
43 "=d" (regs.edx) : "0" (1));
45 /* Note: priority ordered */
46 info("Found ");
47 if (regs.edx & (1 << 26)) {
48 info("SSE2 on CPU!\n");
49 return CPU_FLAG_SSE2;
50 } else if (regs.edx & (1 << 25)) {
51 /* SSE, same as extended MMX, we prefer SSE */
52 info("SSE on CPU!\n");
53 return CPU_FLAG_SSE;
54 } else if (regs.edx & (1 << 25)) {
55 /* Extended MMX */
56 info("MMX2 on CPU!\n");
57 return CPU_FLAG_MMX2;
58 } else if (regs.edx & (1 << 23)) {
59 info("MMX on CPU!\n");
60 return CPU_FLAG_MMX;
61 } else
62 info("nothing on CPU! :-P\n");
63 return CPU_FLAG_NONE;
66 #define MIN_LEN 0x40
67 #define SSE_MMREG_SIZE 16
68 #define MMX_MMREG_SIZE 8
70 void *__sse_memcpy(void *__restrict__ dest,
71 const void *__restrict__ src, size_t n)
73 uint8_t *to = dest;
74 const uint8_t *from = src;
75 void *const save = to;
77 __asm__ __volatile__ ("prefetchnta (%0)\n"
78 "prefetchnta 32(%0)\n"
79 "prefetchnta 64(%0)\n"
80 "prefetchnta 96(%0)\n"
81 "prefetchnta 128(%0)\n"
82 "prefetchnta 160(%0)\n"
83 "prefetchnta 192(%0)\n"
84 "prefetchnta 224(%0)\n"
85 "prefetchnta 256(%0)\n"
86 "prefetchnta 288(%0)\n"
87 :: "r" (from));
89 if (n >= MIN_LEN) {
90 register int i;
91 register int j;
92 register unsigned long delta;
94 delta = ((unsigned long) to) & (SSE_MMREG_SIZE - 1);
95 if (delta) {
96 delta = SSE_MMREG_SIZE - delta;
97 n -= delta;
98 small_memcpy(to, from, delta);
101 j = n >> 6;
102 n &= 63;
103 for (i = 0; i < j; i++) {
104 __asm__ __volatile__ ("prefetchnta 320(%0)\n"
105 "prefetchnta 352(%0)\n"
106 "movups (%0), %%xmm0\n"
107 "movups 16(%0), %%xmm1\n"
108 "movups 32(%0), %%xmm2\n"
109 "movups 48(%0), %%xmm3\n"
110 "movntps %%xmm0, (%1)\n"
111 "movntps %%xmm1, 16(%1)\n"
112 "movntps %%xmm2, 32(%1)\n"
113 "movntps %%xmm3, 48(%1)\n"
114 :: "r" (from), "r" (to)
115 : "memory");
116 from += 64;
117 to += 64;
120 __asm__ __volatile__ ("sfence":::"memory");
122 if (n != 0)
123 ___memcpy(to, from, n);
124 return save;
127 void *__sse2_memcpy(void *__restrict__ dest, const void *__restrict__ src,
128 size_t n)
130 uint8_t *to = dest;
131 const uint8_t *from = src;
132 void *const save = to;
134 __asm__ __volatile__ ("prefetchnta (%0)\n"
135 "prefetchnta 64(%0)\n"
136 "prefetchnta 128(%0)\n"
137 "prefetchnta 192(%0)\n"
138 "prefetchnta 256(%0)\n"
139 :: "r" (from));
141 if (n >= MIN_LEN) {
142 register int i;
143 register int j;
144 register unsigned long delta;
146 delta = ((unsigned long) to) & (SSE_MMREG_SIZE - 1);
147 if (delta) {
148 delta = SSE_MMREG_SIZE - delta;
149 n -= delta;
150 small_memcpy(to, from, delta);
153 j = n >> 6;
154 n &= 63;
155 for (i = 0; i < j; i++) {
156 __asm__ __volatile__ ("prefetchnta 320(%0)\n"
157 "movups (%0), %%xmm0\n"
158 "movups 16(%0), %%xmm1\n"
159 "movups 32(%0), %%xmm2\n"
160 "movups 48(%0), %%xmm3\n"
161 "movntps %%xmm0, (%1)\n"
162 "movntps %%xmm1, 16(%1)\n"
163 "movntps %%xmm2, 32(%1)\n"
164 "movntps %%xmm3, 48(%1)\n"
165 :: "r" (from), "r" (to)
166 : "memory");
167 from += 64;
168 to += 64;
171 __asm__ __volatile__ ("sfence":::"memory");
174 if (n != 0)
175 ___memcpy(to, from, n);
176 return save;
179 void *__mmx_memcpy(void *__restrict__ dest, const void *__restrict__ src,
180 size_t n)
182 uint8_t *to = dest;
183 const uint8_t *from = src;
184 void *const save = to;
185 register int i;
186 register int j;
188 __asm__ __volatile__ ("prefetchnta (%0)\n"
189 "prefetchnta 32(%0)\n"
190 "prefetchnta 64(%0)\n"
191 "prefetchnta 96(%0)\n"
192 "prefetchnta 128(%0)\n"
193 "prefetchnta 160(%0)\n"
194 "prefetchnta 192(%0)\n"
195 "prefetchnta 224(%0)\n"
196 "prefetchnta 256(%0)\n"
197 "prefetchnta 288(%0)\n"
198 :: "r" (from));
199 j = n >> 6;
200 n &= 63;
201 for (i = 0; i < j; i++) {
202 __asm__ __volatile__ ("prefetchnta 320(%0)\n"
203 "prefetchnta 352(%0)\n"
204 "movq (%0), %%mm0\n"
205 "movq 8(%0), %%mm1\n"
206 "movq 16(%0), %%mm2\n"
207 "movq 24(%0), %%mm3\n"
208 "movq 32(%0), %%mm4\n"
209 "movq 40(%0), %%mm5\n"
210 "movq 48(%0), %%mm6\n"
211 "movq 56(%0), %%mm7\n"
212 "movq %%mm0, (%1)\n"
213 "movq %%mm1, 8(%1)\n"
214 "movq %%mm2, 16(%1)\n"
215 "movq %%mm3, 24(%1)\n"
216 "movq %%mm4, 32(%1)\n"
217 "movq %%mm5, 40(%1)\n"
218 "movq %%mm6, 48(%1)\n"
219 "movq %%mm7, 56(%1)\n"
220 :: "r" (from), "r" (to)
221 : "memory");
222 from += 64;
223 to += 64;
226 __asm__ __volatile__ ("sfence":::"memory");
227 __asm__ __volatile__ ("emms":::"memory");
229 if (n != 0)
230 ___memcpy(to, from, n);
231 return save;
234 void *__mmx2_memcpy(void *__restrict__ dest, const void *__restrict__ src,
235 size_t n)
237 uint8_t *to = dest;
238 const uint8_t *from = src;
239 void *const save = to;
240 register int i;
241 register int j;
243 __asm__ __volatile__ ("prefetchnta (%0)\n"
244 "prefetchnta 64(%0)\n"
245 "prefetchnta 128(%0)\n"
246 "prefetchnta 192(%0)\n"
247 "prefetchnta 256(%0)\n"::"r" (from));
248 j = n >> 6;
249 n &= 63;
250 for (i = 0; i < j; i++) {
251 __asm__ __volatile__ ("prefetchnta 320(%0)\n"
252 "movq (%0), %%mm0\n"
253 "movq 8(%0), %%mm1\n"
254 "movq 16(%0), %%mm2\n"
255 "movq 24(%0), %%mm3\n"
256 "movq 32(%0), %%mm4\n"
257 "movq 40(%0), %%mm5\n"
258 "movq 48(%0), %%mm6\n"
259 "movq 56(%0), %%mm7\n"
260 "movq %%mm0, (%1)\n"
261 "movq %%mm1, 8(%1)\n"
262 "movq %%mm2, 16(%1)\n"
263 "movq %%mm3, 24(%1)\n"
264 "movq %%mm4, 32(%1)\n"
265 "movq %%mm5, 40(%1)\n"
266 "movq %%mm6, 48(%1)\n"
267 "movq %%mm7, 56(%1)\n"
268 :: "r" (from), "r" (to)
269 : "memory");
270 from += 64;
271 to += 64;
274 __asm__ __volatile__ ("sfence":::"memory");
275 __asm__ __volatile__ ("emms":::"memory");
277 if (n != 0)
278 ___memcpy(to, from, n);
279 return save;
282 /* Will be extended in future! */
283 void set_memcpy(void)
285 int cpu_flag;
287 if (likely(checked))
288 return;
289 cpu_flag = check_cpu_flags();
290 if (cpu_flag == CPU_FLAG_SSE2)
291 ____memcpy = __sse2_memcpy;
292 else if (cpu_flag == CPU_FLAG_SSE)
293 ____memcpy = __sse_memcpy;
294 else if (cpu_flag == CPU_FLAG_MMX2)
295 ____memcpy = __mmx2_memcpy;
296 else if (cpu_flag == CPU_FLAG_MMX)
297 ____memcpy = __mmx_memcpy;
298 else
299 ____memcpy = ___memcpy;
300 checked = 1;
302 #endif /* ARCH_X86 */