2 * include/asm-alpha/xor.h
4 * Optimized RAID-5 checksumming functions for alpha EV5 and EV6
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2, or (at your option)
11 * You should have received a copy of the GNU General Public License
12 * (for example /usr/src/linux/COPYING); if not, write to the Free
13 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
16 extern void xor_alpha_2(unsigned long, unsigned long *, unsigned long *);
17 extern void xor_alpha_3(unsigned long, unsigned long *, unsigned long *,
19 extern void xor_alpha_4(unsigned long, unsigned long *, unsigned long *,
20 unsigned long *, unsigned long *);
21 extern void xor_alpha_5(unsigned long, unsigned long *, unsigned long *,
22 unsigned long *, unsigned long *, unsigned long *);
24 extern void xor_alpha_prefetch_2(unsigned long, unsigned long *,
26 extern void xor_alpha_prefetch_3(unsigned long, unsigned long *,
27 unsigned long *, unsigned long *);
28 extern void xor_alpha_prefetch_4(unsigned long, unsigned long *,
29 unsigned long *, unsigned long *,
31 extern void xor_alpha_prefetch_5(unsigned long, unsigned long *,
32 unsigned long *, unsigned long *,
33 unsigned long *, unsigned long *);
62 xor $0,$1,$0 # 7 cycles from $1 load \n\
119 xor $0,$1,$1 # 8 cycles from $0 load \n\
120 xor $3,$4,$4 # 6 cycles from $4 load \n\
121 xor $6,$7,$7 # 6 cycles from $7 load \n\
122 xor $21,$22,$22 # 5 cycles from $22 load \n\
124 xor $1,$2,$2 # 9 cycles from $2 load \n\
125 xor $24,$25,$25 # 5 cycles from $25 load \n\
127 xor $4,$5,$5 # 6 cycles from $5 load \n\
130 xor $7,$20,$20 # 7 cycles from $20 load \n\
132 xor $22,$23,$23 # 7 cycles from $23 load \n\
135 xor $25,$27,$27 # 7 cycles from $27 load \n\
150 xor $0,$1,$1 # 4 cycles from $1 load \n\
151 xor $3,$4,$4 # 5 cycles from $4 load \n\
152 xor $6,$7,$7 # 5 cycles from $7 load \n\
154 xor $1,$2,$2 # 4 cycles from $2 load \n\
155 xor $4,$5,$5 # 5 cycles from $5 load \n\
157 xor $7,$20,$20 # 4 cycles from $20 load \n\
193 xor $0,$1,$1 # 6 cycles from $1 load \n\
195 xor $2,$3,$3 # 6 cycles from $3 load \n\
200 xor $4,$5,$5 # 7 cycles from $5 load \n\
204 xor $21,$22,$22 # 7 cycles from $22 load \n\
208 xor $23,$24,$24 # 7 cycles from $24 load \n\
215 xor $25,$27,$27 # 8 cycles from $27 load \n\
223 xor $0,$1,$1 # 9 cycles from $1 load \n\
224 xor $2,$3,$3 # 5 cycles from $3 load \n\
228 xor $4,$5,$5 # 5 cycles from $5 load \n\
239 xor $6,$7,$7 # 8 cycles from $6 load \n\
243 xor $21,$22,$22 # 8 cycles from $22 load \n\
245 xor $23,$24,$24 # 5 cycles from $24 load \n\
248 xor $25,$27,$27 # 5 cycles from $27 load \n\
250 xor $0,$1,$1 # 5 cycles from $1 load \n\
253 xor $2,$3,$3 # 4 cycles from $3 load \n\
290 xor $0,$1,$1 # 6 cycles from $1 load \n\
292 xor $2,$3,$3 # 6 cycles from $3 load \n\
297 xor $3,$4,$4 # 7 cycles from $4 load \n\
300 xor $5,$6,$6 # 7 cycles from $6 load \n\
301 xor $7,$22,$22 # 7 cycles from $22 load \n\
302 xor $6,$23,$23 # 7 cycles from $23 load \n\
307 xor $24,$25,$25 # 8 cycles from $25 load \n\
310 xor $25,$27,$27 # 8 cycles from $27 load \n\
312 xor $28,$0,$0 # 7 cycles from $0 load \n\
320 xor $1,$2,$2 # 6 cycles from $2 load \n\
322 xor $3,$4,$4 # 4 cycles from $4 load \n\
332 xor $4,$5,$5 # 7 cycles from $5 load \n\
335 xor $6,$7,$7 # 7 cycles from $7 load \n\
340 xor $7,$22,$22 # 7 cycles from $22 load \n\
342 xor $23,$24,$24 # 6 cycles from $24 load \n\
347 xor $25,$27,$27 # 7 cycles from $27 load \n\
350 xor $27,$28,$28 # 8 cycles from $28 load \n\
352 xor $0,$1,$1 # 6 cycles from $1 load \n\
360 xor $2,$3,$3 # 9 cycles from $3 load \n\
361 xor $3,$4,$4 # 9 cycles from $4 load \n\
362 xor $5,$6,$6 # 8 cycles from $6 load \n\
366 xor $7,$22,$22 # 7 cycles from $22 load \n\
367 xor $23,$24,$24 # 6 cycles from $24 load \n\
372 xor $24,$25,$25 # 8 cycles from $25 load \n\
386 .ent xor_alpha_prefetch_2 \n\
387 xor_alpha_prefetch_2: \n\
397 ldq $31, 128($17) \n\
398 ldq $31, 128($18) \n\
400 ldq $31, 192($17) \n\
401 ldq $31, 192($18) \n\
425 xor $0,$1,$0 # 8 cycles from $1 load \n\
452 .end xor_alpha_prefetch_2 \n\
455 .ent xor_alpha_prefetch_3 \n\
456 xor_alpha_prefetch_3: \n\
468 ldq $31, 128($17) \n\
469 ldq $31, 128($18) \n\
470 ldq $31, 128($19) \n\
472 ldq $31, 192($17) \n\
473 ldq $31, 192($18) \n\
474 ldq $31, 192($19) \n\
497 xor $0,$1,$1 # 8 cycles from $0 load \n\
498 xor $3,$4,$4 # 7 cycles from $4 load \n\
499 xor $6,$7,$7 # 6 cycles from $7 load \n\
500 xor $21,$22,$22 # 5 cycles from $22 load \n\
502 xor $1,$2,$2 # 9 cycles from $2 load \n\
503 xor $24,$25,$25 # 5 cycles from $25 load \n\
505 xor $4,$5,$5 # 6 cycles from $5 load \n\
508 xor $7,$20,$20 # 7 cycles from $20 load \n\
510 xor $22,$23,$23 # 7 cycles from $23 load \n\
513 xor $25,$27,$27 # 7 cycles from $27 load \n\
532 xor $0,$1,$1 # 6 cycles from $1 load \n\
533 xor $3,$4,$4 # 5 cycles from $4 load \n\
534 xor $6,$7,$7 # 5 cycles from $7 load \n\
535 xor $1,$2,$2 # 4 cycles from $2 load \n\
537 xor $4,$5,$5 # 5 cycles from $5 load \n\
538 xor $7,$20,$20 # 4 cycles from $20 load \n\
550 .end xor_alpha_prefetch_3 \n\
553 .ent xor_alpha_prefetch_4 \n\
554 xor_alpha_prefetch_4: \n\
568 ldq $31, 128($17) \n\
569 ldq $31, 128($18) \n\
570 ldq $31, 128($19) \n\
571 ldq $31, 128($20) \n\
573 ldq $31, 192($17) \n\
574 ldq $31, 192($18) \n\
575 ldq $31, 192($19) \n\
576 ldq $31, 192($20) \n\
595 xor $0,$1,$1 # 6 cycles from $1 load \n\
597 xor $2,$3,$3 # 6 cycles from $3 load \n\
602 xor $4,$5,$5 # 7 cycles from $5 load \n\
606 xor $21,$22,$22 # 7 cycles from $22 load \n\
610 xor $23,$24,$24 # 7 cycles from $24 load \n\
617 xor $25,$27,$27 # 8 cycles from $27 load \n\
625 xor $0,$1,$1 # 9 cycles from $1 load \n\
626 xor $2,$3,$3 # 5 cycles from $3 load \n\
630 xor $4,$5,$5 # 5 cycles from $5 load \n\
642 xor $6,$7,$7 # 8 cycles from $6 load \n\
645 xor $21,$22,$22 # 8 cycles from $22 load \n\
650 xor $23,$24,$24 # 6 cycles from $24 load \n\
652 xor $25,$27,$27 # 6 cycles from $27 load \n\
656 xor $0,$1,$1 # 7 cycles from $1 load \n\
657 xor $2,$3,$3 # 6 cycles from $3 load \n\
672 .end xor_alpha_prefetch_4 \n\
675 .ent xor_alpha_prefetch_5 \n\
676 xor_alpha_prefetch_5: \n\
692 ldq $31, 128($17) \n\
693 ldq $31, 128($18) \n\
694 ldq $31, 128($19) \n\
695 ldq $31, 128($20) \n\
696 ldq $31, 128($21) \n\
698 ldq $31, 192($17) \n\
699 ldq $31, 192($18) \n\
700 ldq $31, 192($19) \n\
701 ldq $31, 192($20) \n\
702 ldq $31, 192($21) \n\
721 xor $0,$1,$1 # 6 cycles from $1 load \n\
723 xor $2,$3,$3 # 6 cycles from $3 load \n\
728 xor $3,$4,$4 # 7 cycles from $4 load \n\
731 xor $5,$6,$6 # 7 cycles from $6 load \n\
732 xor $7,$22,$22 # 7 cycles from $22 load \n\
733 xor $6,$23,$23 # 7 cycles from $23 load \n\
738 xor $24,$25,$25 # 8 cycles from $25 load \n\
741 xor $25,$27,$27 # 8 cycles from $27 load \n\
743 xor $28,$0,$0 # 7 cycles from $0 load \n\
751 xor $1,$2,$2 # 6 cycles from $2 load \n\
753 xor $3,$4,$4 # 4 cycles from $4 load \n\
763 xor $4,$5,$5 # 7 cycles from $5 load \n\
766 xor $6,$7,$7 # 7 cycles from $7 load \n\
771 xor $7,$22,$22 # 7 cycles from $22 load \n\
773 xor $23,$24,$24 # 6 cycles from $24 load \n\
778 xor $25,$27,$27 # 7 cycles from $27 load \n\
781 xor $27,$28,$28 # 8 cycles from $28 load \n\
783 xor $0,$1,$1 # 6 cycles from $1 load \n\
793 xor $2,$3,$3 # 9 cycles from $3 load \n\
796 xor $3,$4,$4 # 9 cycles from $4 load \n\
798 xor $5,$6,$6 # 8 cycles from $6 load \n\
802 xor $7,$22,$22 # 7 cycles from $22 load \n\
803 xor $23,$24,$24 # 6 cycles from $24 load \n\
808 xor $24,$25,$25 # 8 cycles from $25 load \n\
821 .end xor_alpha_prefetch_5 \n\
824 static struct xor_block_template xor_block_alpha
= {
832 static struct xor_block_template xor_block_alpha_prefetch
= {
833 .name
= "alpha prefetch",
834 .do_2
= xor_alpha_prefetch_2
,
835 .do_3
= xor_alpha_prefetch_3
,
836 .do_4
= xor_alpha_prefetch_4
,
837 .do_5
= xor_alpha_prefetch_5
,
840 /* For grins, also test the generic routines. */
841 #include <asm-generic/xor.h>
843 #undef XOR_TRY_TEMPLATES
844 #define XOR_TRY_TEMPLATES \
846 xor_speed(&xor_block_8regs); \
847 xor_speed(&xor_block_32regs); \
848 xor_speed(&xor_block_alpha); \
849 xor_speed(&xor_block_alpha_prefetch); \
852 /* Force the use of alpha_prefetch if EV6, as it is significantly
853 faster in the cold cache case. */
854 #define XOR_SELECT_TEMPLATE(FASTEST) \
855 (implver() == IMPLVER_EV6 ? &xor_block_alpha_prefetch : FASTEST)