Merge branches 'x86/apic', 'x86/asm', 'x86/cleanups', 'x86/debug', 'x86/kconfig'...
[linux-2.6/mini2440.git] / include / asm-generic / xor.h
blobaaab875e1a351f5596a22af645f567c2d2b5ae66
1 /*
2 * include/asm-generic/xor.h
4 * Generic optimized RAID-5 checksumming functions.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2, or (at your option)
9 * any later version.
11 * You should have received a copy of the GNU General Public License
12 * (for example /usr/src/linux/COPYING); if not, write to the Free
13 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
16 #include <asm/processor.h>
18 static void
19 xor_8regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
21 long lines = bytes / (sizeof (long)) / 8;
23 do {
24 p1[0] ^= p2[0];
25 p1[1] ^= p2[1];
26 p1[2] ^= p2[2];
27 p1[3] ^= p2[3];
28 p1[4] ^= p2[4];
29 p1[5] ^= p2[5];
30 p1[6] ^= p2[6];
31 p1[7] ^= p2[7];
32 p1 += 8;
33 p2 += 8;
34 } while (--lines > 0);
37 static void
38 xor_8regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
39 unsigned long *p3)
41 long lines = bytes / (sizeof (long)) / 8;
43 do {
44 p1[0] ^= p2[0] ^ p3[0];
45 p1[1] ^= p2[1] ^ p3[1];
46 p1[2] ^= p2[2] ^ p3[2];
47 p1[3] ^= p2[3] ^ p3[3];
48 p1[4] ^= p2[4] ^ p3[4];
49 p1[5] ^= p2[5] ^ p3[5];
50 p1[6] ^= p2[6] ^ p3[6];
51 p1[7] ^= p2[7] ^ p3[7];
52 p1 += 8;
53 p2 += 8;
54 p3 += 8;
55 } while (--lines > 0);
58 static void
59 xor_8regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
60 unsigned long *p3, unsigned long *p4)
62 long lines = bytes / (sizeof (long)) / 8;
64 do {
65 p1[0] ^= p2[0] ^ p3[0] ^ p4[0];
66 p1[1] ^= p2[1] ^ p3[1] ^ p4[1];
67 p1[2] ^= p2[2] ^ p3[2] ^ p4[2];
68 p1[3] ^= p2[3] ^ p3[3] ^ p4[3];
69 p1[4] ^= p2[4] ^ p3[4] ^ p4[4];
70 p1[5] ^= p2[5] ^ p3[5] ^ p4[5];
71 p1[6] ^= p2[6] ^ p3[6] ^ p4[6];
72 p1[7] ^= p2[7] ^ p3[7] ^ p4[7];
73 p1 += 8;
74 p2 += 8;
75 p3 += 8;
76 p4 += 8;
77 } while (--lines > 0);
80 static void
81 xor_8regs_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
82 unsigned long *p3, unsigned long *p4, unsigned long *p5)
84 long lines = bytes / (sizeof (long)) / 8;
86 do {
87 p1[0] ^= p2[0] ^ p3[0] ^ p4[0] ^ p5[0];
88 p1[1] ^= p2[1] ^ p3[1] ^ p4[1] ^ p5[1];
89 p1[2] ^= p2[2] ^ p3[2] ^ p4[2] ^ p5[2];
90 p1[3] ^= p2[3] ^ p3[3] ^ p4[3] ^ p5[3];
91 p1[4] ^= p2[4] ^ p3[4] ^ p4[4] ^ p5[4];
92 p1[5] ^= p2[5] ^ p3[5] ^ p4[5] ^ p5[5];
93 p1[6] ^= p2[6] ^ p3[6] ^ p4[6] ^ p5[6];
94 p1[7] ^= p2[7] ^ p3[7] ^ p4[7] ^ p5[7];
95 p1 += 8;
96 p2 += 8;
97 p3 += 8;
98 p4 += 8;
99 p5 += 8;
100 } while (--lines > 0);
103 static void
104 xor_32regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
106 long lines = bytes / (sizeof (long)) / 8;
108 do {
109 register long d0, d1, d2, d3, d4, d5, d6, d7;
110 d0 = p1[0]; /* Pull the stuff into registers */
111 d1 = p1[1]; /* ... in bursts, if possible. */
112 d2 = p1[2];
113 d3 = p1[3];
114 d4 = p1[4];
115 d5 = p1[5];
116 d6 = p1[6];
117 d7 = p1[7];
118 d0 ^= p2[0];
119 d1 ^= p2[1];
120 d2 ^= p2[2];
121 d3 ^= p2[3];
122 d4 ^= p2[4];
123 d5 ^= p2[5];
124 d6 ^= p2[6];
125 d7 ^= p2[7];
126 p1[0] = d0; /* Store the result (in bursts) */
127 p1[1] = d1;
128 p1[2] = d2;
129 p1[3] = d3;
130 p1[4] = d4;
131 p1[5] = d5;
132 p1[6] = d6;
133 p1[7] = d7;
134 p1 += 8;
135 p2 += 8;
136 } while (--lines > 0);
139 static void
140 xor_32regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
141 unsigned long *p3)
143 long lines = bytes / (sizeof (long)) / 8;
145 do {
146 register long d0, d1, d2, d3, d4, d5, d6, d7;
147 d0 = p1[0]; /* Pull the stuff into registers */
148 d1 = p1[1]; /* ... in bursts, if possible. */
149 d2 = p1[2];
150 d3 = p1[3];
151 d4 = p1[4];
152 d5 = p1[5];
153 d6 = p1[6];
154 d7 = p1[7];
155 d0 ^= p2[0];
156 d1 ^= p2[1];
157 d2 ^= p2[2];
158 d3 ^= p2[3];
159 d4 ^= p2[4];
160 d5 ^= p2[5];
161 d6 ^= p2[6];
162 d7 ^= p2[7];
163 d0 ^= p3[0];
164 d1 ^= p3[1];
165 d2 ^= p3[2];
166 d3 ^= p3[3];
167 d4 ^= p3[4];
168 d5 ^= p3[5];
169 d6 ^= p3[6];
170 d7 ^= p3[7];
171 p1[0] = d0; /* Store the result (in bursts) */
172 p1[1] = d1;
173 p1[2] = d2;
174 p1[3] = d3;
175 p1[4] = d4;
176 p1[5] = d5;
177 p1[6] = d6;
178 p1[7] = d7;
179 p1 += 8;
180 p2 += 8;
181 p3 += 8;
182 } while (--lines > 0);
185 static void
186 xor_32regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
187 unsigned long *p3, unsigned long *p4)
189 long lines = bytes / (sizeof (long)) / 8;
191 do {
192 register long d0, d1, d2, d3, d4, d5, d6, d7;
193 d0 = p1[0]; /* Pull the stuff into registers */
194 d1 = p1[1]; /* ... in bursts, if possible. */
195 d2 = p1[2];
196 d3 = p1[3];
197 d4 = p1[4];
198 d5 = p1[5];
199 d6 = p1[6];
200 d7 = p1[7];
201 d0 ^= p2[0];
202 d1 ^= p2[1];
203 d2 ^= p2[2];
204 d3 ^= p2[3];
205 d4 ^= p2[4];
206 d5 ^= p2[5];
207 d6 ^= p2[6];
208 d7 ^= p2[7];
209 d0 ^= p3[0];
210 d1 ^= p3[1];
211 d2 ^= p3[2];
212 d3 ^= p3[3];
213 d4 ^= p3[4];
214 d5 ^= p3[5];
215 d6 ^= p3[6];
216 d7 ^= p3[7];
217 d0 ^= p4[0];
218 d1 ^= p4[1];
219 d2 ^= p4[2];
220 d3 ^= p4[3];
221 d4 ^= p4[4];
222 d5 ^= p4[5];
223 d6 ^= p4[6];
224 d7 ^= p4[7];
225 p1[0] = d0; /* Store the result (in bursts) */
226 p1[1] = d1;
227 p1[2] = d2;
228 p1[3] = d3;
229 p1[4] = d4;
230 p1[5] = d5;
231 p1[6] = d6;
232 p1[7] = d7;
233 p1 += 8;
234 p2 += 8;
235 p3 += 8;
236 p4 += 8;
237 } while (--lines > 0);
240 static void
241 xor_32regs_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
242 unsigned long *p3, unsigned long *p4, unsigned long *p5)
244 long lines = bytes / (sizeof (long)) / 8;
246 do {
247 register long d0, d1, d2, d3, d4, d5, d6, d7;
248 d0 = p1[0]; /* Pull the stuff into registers */
249 d1 = p1[1]; /* ... in bursts, if possible. */
250 d2 = p1[2];
251 d3 = p1[3];
252 d4 = p1[4];
253 d5 = p1[5];
254 d6 = p1[6];
255 d7 = p1[7];
256 d0 ^= p2[0];
257 d1 ^= p2[1];
258 d2 ^= p2[2];
259 d3 ^= p2[3];
260 d4 ^= p2[4];
261 d5 ^= p2[5];
262 d6 ^= p2[6];
263 d7 ^= p2[7];
264 d0 ^= p3[0];
265 d1 ^= p3[1];
266 d2 ^= p3[2];
267 d3 ^= p3[3];
268 d4 ^= p3[4];
269 d5 ^= p3[5];
270 d6 ^= p3[6];
271 d7 ^= p3[7];
272 d0 ^= p4[0];
273 d1 ^= p4[1];
274 d2 ^= p4[2];
275 d3 ^= p4[3];
276 d4 ^= p4[4];
277 d5 ^= p4[5];
278 d6 ^= p4[6];
279 d7 ^= p4[7];
280 d0 ^= p5[0];
281 d1 ^= p5[1];
282 d2 ^= p5[2];
283 d3 ^= p5[3];
284 d4 ^= p5[4];
285 d5 ^= p5[5];
286 d6 ^= p5[6];
287 d7 ^= p5[7];
288 p1[0] = d0; /* Store the result (in bursts) */
289 p1[1] = d1;
290 p1[2] = d2;
291 p1[3] = d3;
292 p1[4] = d4;
293 p1[5] = d5;
294 p1[6] = d6;
295 p1[7] = d7;
296 p1 += 8;
297 p2 += 8;
298 p3 += 8;
299 p4 += 8;
300 p5 += 8;
301 } while (--lines > 0);
304 static void
305 xor_8regs_p_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
307 long lines = bytes / (sizeof (long)) / 8 - 1;
308 prefetchw(p1);
309 prefetch(p2);
311 do {
312 prefetchw(p1+8);
313 prefetch(p2+8);
314 once_more:
315 p1[0] ^= p2[0];
316 p1[1] ^= p2[1];
317 p1[2] ^= p2[2];
318 p1[3] ^= p2[3];
319 p1[4] ^= p2[4];
320 p1[5] ^= p2[5];
321 p1[6] ^= p2[6];
322 p1[7] ^= p2[7];
323 p1 += 8;
324 p2 += 8;
325 } while (--lines > 0);
326 if (lines == 0)
327 goto once_more;
330 static void
331 xor_8regs_p_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
332 unsigned long *p3)
334 long lines = bytes / (sizeof (long)) / 8 - 1;
335 prefetchw(p1);
336 prefetch(p2);
337 prefetch(p3);
339 do {
340 prefetchw(p1+8);
341 prefetch(p2+8);
342 prefetch(p3+8);
343 once_more:
344 p1[0] ^= p2[0] ^ p3[0];
345 p1[1] ^= p2[1] ^ p3[1];
346 p1[2] ^= p2[2] ^ p3[2];
347 p1[3] ^= p2[3] ^ p3[3];
348 p1[4] ^= p2[4] ^ p3[4];
349 p1[5] ^= p2[5] ^ p3[5];
350 p1[6] ^= p2[6] ^ p3[6];
351 p1[7] ^= p2[7] ^ p3[7];
352 p1 += 8;
353 p2 += 8;
354 p3 += 8;
355 } while (--lines > 0);
356 if (lines == 0)
357 goto once_more;
360 static void
361 xor_8regs_p_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
362 unsigned long *p3, unsigned long *p4)
364 long lines = bytes / (sizeof (long)) / 8 - 1;
366 prefetchw(p1);
367 prefetch(p2);
368 prefetch(p3);
369 prefetch(p4);
371 do {
372 prefetchw(p1+8);
373 prefetch(p2+8);
374 prefetch(p3+8);
375 prefetch(p4+8);
376 once_more:
377 p1[0] ^= p2[0] ^ p3[0] ^ p4[0];
378 p1[1] ^= p2[1] ^ p3[1] ^ p4[1];
379 p1[2] ^= p2[2] ^ p3[2] ^ p4[2];
380 p1[3] ^= p2[3] ^ p3[3] ^ p4[3];
381 p1[4] ^= p2[4] ^ p3[4] ^ p4[4];
382 p1[5] ^= p2[5] ^ p3[5] ^ p4[5];
383 p1[6] ^= p2[6] ^ p3[6] ^ p4[6];
384 p1[7] ^= p2[7] ^ p3[7] ^ p4[7];
385 p1 += 8;
386 p2 += 8;
387 p3 += 8;
388 p4 += 8;
389 } while (--lines > 0);
390 if (lines == 0)
391 goto once_more;
394 static void
395 xor_8regs_p_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
396 unsigned long *p3, unsigned long *p4, unsigned long *p5)
398 long lines = bytes / (sizeof (long)) / 8 - 1;
400 prefetchw(p1);
401 prefetch(p2);
402 prefetch(p3);
403 prefetch(p4);
404 prefetch(p5);
406 do {
407 prefetchw(p1+8);
408 prefetch(p2+8);
409 prefetch(p3+8);
410 prefetch(p4+8);
411 prefetch(p5+8);
412 once_more:
413 p1[0] ^= p2[0] ^ p3[0] ^ p4[0] ^ p5[0];
414 p1[1] ^= p2[1] ^ p3[1] ^ p4[1] ^ p5[1];
415 p1[2] ^= p2[2] ^ p3[2] ^ p4[2] ^ p5[2];
416 p1[3] ^= p2[3] ^ p3[3] ^ p4[3] ^ p5[3];
417 p1[4] ^= p2[4] ^ p3[4] ^ p4[4] ^ p5[4];
418 p1[5] ^= p2[5] ^ p3[5] ^ p4[5] ^ p5[5];
419 p1[6] ^= p2[6] ^ p3[6] ^ p4[6] ^ p5[6];
420 p1[7] ^= p2[7] ^ p3[7] ^ p4[7] ^ p5[7];
421 p1 += 8;
422 p2 += 8;
423 p3 += 8;
424 p4 += 8;
425 p5 += 8;
426 } while (--lines > 0);
427 if (lines == 0)
428 goto once_more;
431 static void
432 xor_32regs_p_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
434 long lines = bytes / (sizeof (long)) / 8 - 1;
436 prefetchw(p1);
437 prefetch(p2);
439 do {
440 register long d0, d1, d2, d3, d4, d5, d6, d7;
442 prefetchw(p1+8);
443 prefetch(p2+8);
444 once_more:
445 d0 = p1[0]; /* Pull the stuff into registers */
446 d1 = p1[1]; /* ... in bursts, if possible. */
447 d2 = p1[2];
448 d3 = p1[3];
449 d4 = p1[4];
450 d5 = p1[5];
451 d6 = p1[6];
452 d7 = p1[7];
453 d0 ^= p2[0];
454 d1 ^= p2[1];
455 d2 ^= p2[2];
456 d3 ^= p2[3];
457 d4 ^= p2[4];
458 d5 ^= p2[5];
459 d6 ^= p2[6];
460 d7 ^= p2[7];
461 p1[0] = d0; /* Store the result (in bursts) */
462 p1[1] = d1;
463 p1[2] = d2;
464 p1[3] = d3;
465 p1[4] = d4;
466 p1[5] = d5;
467 p1[6] = d6;
468 p1[7] = d7;
469 p1 += 8;
470 p2 += 8;
471 } while (--lines > 0);
472 if (lines == 0)
473 goto once_more;
476 static void
477 xor_32regs_p_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
478 unsigned long *p3)
480 long lines = bytes / (sizeof (long)) / 8 - 1;
482 prefetchw(p1);
483 prefetch(p2);
484 prefetch(p3);
486 do {
487 register long d0, d1, d2, d3, d4, d5, d6, d7;
489 prefetchw(p1+8);
490 prefetch(p2+8);
491 prefetch(p3+8);
492 once_more:
493 d0 = p1[0]; /* Pull the stuff into registers */
494 d1 = p1[1]; /* ... in bursts, if possible. */
495 d2 = p1[2];
496 d3 = p1[3];
497 d4 = p1[4];
498 d5 = p1[5];
499 d6 = p1[6];
500 d7 = p1[7];
501 d0 ^= p2[0];
502 d1 ^= p2[1];
503 d2 ^= p2[2];
504 d3 ^= p2[3];
505 d4 ^= p2[4];
506 d5 ^= p2[5];
507 d6 ^= p2[6];
508 d7 ^= p2[7];
509 d0 ^= p3[0];
510 d1 ^= p3[1];
511 d2 ^= p3[2];
512 d3 ^= p3[3];
513 d4 ^= p3[4];
514 d5 ^= p3[5];
515 d6 ^= p3[6];
516 d7 ^= p3[7];
517 p1[0] = d0; /* Store the result (in bursts) */
518 p1[1] = d1;
519 p1[2] = d2;
520 p1[3] = d3;
521 p1[4] = d4;
522 p1[5] = d5;
523 p1[6] = d6;
524 p1[7] = d7;
525 p1 += 8;
526 p2 += 8;
527 p3 += 8;
528 } while (--lines > 0);
529 if (lines == 0)
530 goto once_more;
533 static void
534 xor_32regs_p_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
535 unsigned long *p3, unsigned long *p4)
537 long lines = bytes / (sizeof (long)) / 8 - 1;
539 prefetchw(p1);
540 prefetch(p2);
541 prefetch(p3);
542 prefetch(p4);
544 do {
545 register long d0, d1, d2, d3, d4, d5, d6, d7;
547 prefetchw(p1+8);
548 prefetch(p2+8);
549 prefetch(p3+8);
550 prefetch(p4+8);
551 once_more:
552 d0 = p1[0]; /* Pull the stuff into registers */
553 d1 = p1[1]; /* ... in bursts, if possible. */
554 d2 = p1[2];
555 d3 = p1[3];
556 d4 = p1[4];
557 d5 = p1[5];
558 d6 = p1[6];
559 d7 = p1[7];
560 d0 ^= p2[0];
561 d1 ^= p2[1];
562 d2 ^= p2[2];
563 d3 ^= p2[3];
564 d4 ^= p2[4];
565 d5 ^= p2[5];
566 d6 ^= p2[6];
567 d7 ^= p2[7];
568 d0 ^= p3[0];
569 d1 ^= p3[1];
570 d2 ^= p3[2];
571 d3 ^= p3[3];
572 d4 ^= p3[4];
573 d5 ^= p3[5];
574 d6 ^= p3[6];
575 d7 ^= p3[7];
576 d0 ^= p4[0];
577 d1 ^= p4[1];
578 d2 ^= p4[2];
579 d3 ^= p4[3];
580 d4 ^= p4[4];
581 d5 ^= p4[5];
582 d6 ^= p4[6];
583 d7 ^= p4[7];
584 p1[0] = d0; /* Store the result (in bursts) */
585 p1[1] = d1;
586 p1[2] = d2;
587 p1[3] = d3;
588 p1[4] = d4;
589 p1[5] = d5;
590 p1[6] = d6;
591 p1[7] = d7;
592 p1 += 8;
593 p2 += 8;
594 p3 += 8;
595 p4 += 8;
596 } while (--lines > 0);
597 if (lines == 0)
598 goto once_more;
601 static void
602 xor_32regs_p_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
603 unsigned long *p3, unsigned long *p4, unsigned long *p5)
605 long lines = bytes / (sizeof (long)) / 8 - 1;
607 prefetchw(p1);
608 prefetch(p2);
609 prefetch(p3);
610 prefetch(p4);
611 prefetch(p5);
613 do {
614 register long d0, d1, d2, d3, d4, d5, d6, d7;
616 prefetchw(p1+8);
617 prefetch(p2+8);
618 prefetch(p3+8);
619 prefetch(p4+8);
620 prefetch(p5+8);
621 once_more:
622 d0 = p1[0]; /* Pull the stuff into registers */
623 d1 = p1[1]; /* ... in bursts, if possible. */
624 d2 = p1[2];
625 d3 = p1[3];
626 d4 = p1[4];
627 d5 = p1[5];
628 d6 = p1[6];
629 d7 = p1[7];
630 d0 ^= p2[0];
631 d1 ^= p2[1];
632 d2 ^= p2[2];
633 d3 ^= p2[3];
634 d4 ^= p2[4];
635 d5 ^= p2[5];
636 d6 ^= p2[6];
637 d7 ^= p2[7];
638 d0 ^= p3[0];
639 d1 ^= p3[1];
640 d2 ^= p3[2];
641 d3 ^= p3[3];
642 d4 ^= p3[4];
643 d5 ^= p3[5];
644 d6 ^= p3[6];
645 d7 ^= p3[7];
646 d0 ^= p4[0];
647 d1 ^= p4[1];
648 d2 ^= p4[2];
649 d3 ^= p4[3];
650 d4 ^= p4[4];
651 d5 ^= p4[5];
652 d6 ^= p4[6];
653 d7 ^= p4[7];
654 d0 ^= p5[0];
655 d1 ^= p5[1];
656 d2 ^= p5[2];
657 d3 ^= p5[3];
658 d4 ^= p5[4];
659 d5 ^= p5[5];
660 d6 ^= p5[6];
661 d7 ^= p5[7];
662 p1[0] = d0; /* Store the result (in bursts) */
663 p1[1] = d1;
664 p1[2] = d2;
665 p1[3] = d3;
666 p1[4] = d4;
667 p1[5] = d5;
668 p1[6] = d6;
669 p1[7] = d7;
670 p1 += 8;
671 p2 += 8;
672 p3 += 8;
673 p4 += 8;
674 p5 += 8;
675 } while (--lines > 0);
676 if (lines == 0)
677 goto once_more;
680 static struct xor_block_template xor_block_8regs = {
681 .name = "8regs",
682 .do_2 = xor_8regs_2,
683 .do_3 = xor_8regs_3,
684 .do_4 = xor_8regs_4,
685 .do_5 = xor_8regs_5,
688 static struct xor_block_template xor_block_32regs = {
689 .name = "32regs",
690 .do_2 = xor_32regs_2,
691 .do_3 = xor_32regs_3,
692 .do_4 = xor_32regs_4,
693 .do_5 = xor_32regs_5,
696 static struct xor_block_template xor_block_8regs_p = {
697 .name = "8regs_prefetch",
698 .do_2 = xor_8regs_p_2,
699 .do_3 = xor_8regs_p_3,
700 .do_4 = xor_8regs_p_4,
701 .do_5 = xor_8regs_p_5,
704 static struct xor_block_template xor_block_32regs_p = {
705 .name = "32regs_prefetch",
706 .do_2 = xor_32regs_p_2,
707 .do_3 = xor_32regs_p_3,
708 .do_4 = xor_32regs_p_4,
709 .do_5 = xor_32regs_p_5,
712 #define XOR_TRY_TEMPLATES \
713 do { \
714 xor_speed(&xor_block_8regs); \
715 xor_speed(&xor_block_8regs_p); \
716 xor_speed(&xor_block_32regs); \
717 xor_speed(&xor_block_32regs_p); \
718 } while (0)