Updates to Tomato RAF including NGINX && PHP
[tomato.git] / release / src / router / openssl / crypto / bn / bn-mips.s
blob02097fa38857d41d8824c9258a9ab32c7e53be07
1 .set mips2
2 .rdata
3 .asciiz "mips3.s, Version 1.2"
4 .asciiz "MIPS II/III/IV ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
6 .text
7 .set noat
9 .align 5
10 .globl bn_mul_add_words
11 .ent bn_mul_add_words
12 bn_mul_add_words:
13 .set noreorder
14 bgtz $6,bn_mul_add_words_internal
15 move $2,$0
16 jr $31
17 move $4,$2
18 .end bn_mul_add_words
20 .align 5
21 .ent bn_mul_add_words_internal
22 bn_mul_add_words_internal:
23 .set reorder
24 li $3,-4
25 and $8,$6,$3
26 lw $12,0($5)
27 beqz $8,.L_bn_mul_add_words_tail
29 .L_bn_mul_add_words_loop:
30 multu $12,$7
31 lw $13,0($4)
32 lw $14,4($5)
33 lw $15,4($4)
34 lw $8,2*4($5)
35 lw $9,2*4($4)
36 addu $13,$2
37 sltu $2,$13,$2 # All manuals say it "compares 32-bit
38 # values", but it seems to work fine
39 # even on 64-bit registers.
40 mflo $1
41 mfhi $12
42 addu $13,$1
43 addu $2,$12
44 multu $14,$7
45 sltu $1,$13,$1
46 sw $13,0($4)
47 addu $2,$1
49 lw $10,3*4($5)
50 lw $11,3*4($4)
51 addu $15,$2
52 sltu $2,$15,$2
53 mflo $1
54 mfhi $14
55 addu $15,$1
56 addu $2,$14
57 multu $8,$7
58 sltu $1,$15,$1
59 sw $15,4($4)
60 addu $2,$1
62 subu $6,4
63 addu $4,4*4
64 addu $5,4*4
65 addu $9,$2
66 sltu $2,$9,$2
67 mflo $1
68 mfhi $8
69 addu $9,$1
70 addu $2,$8
71 multu $10,$7
72 sltu $1,$9,$1
73 sw $9,-2*4($4)
74 addu $2,$1
77 and $8,$6,$3
78 addu $11,$2
79 sltu $2,$11,$2
80 mflo $1
81 mfhi $10
82 addu $11,$1
83 addu $2,$10
84 sltu $1,$11,$1
85 sw $11,-4($4)
86 addu $2,$1
87 .set noreorder
88 bgtzl $8,.L_bn_mul_add_words_loop
89 lw $12,0($5)
91 beqz $6,.L_bn_mul_add_words_return
92 nop
94 .L_bn_mul_add_words_tail:
95 .set reorder
96 lw $12,0($5)
97 multu $12,$7
98 lw $13,0($4)
99 subu $6,1
100 addu $13,$2
101 sltu $2,$13,$2
102 mflo $1
103 mfhi $12
104 addu $13,$1
105 addu $2,$12
106 sltu $1,$13,$1
107 sw $13,0($4)
108 addu $2,$1
109 beqz $6,.L_bn_mul_add_words_return
111 lw $12,4($5)
112 multu $12,$7
113 lw $13,4($4)
114 subu $6,1
115 addu $13,$2
116 sltu $2,$13,$2
117 mflo $1
118 mfhi $12
119 addu $13,$1
120 addu $2,$12
121 sltu $1,$13,$1
122 sw $13,4($4)
123 addu $2,$1
124 beqz $6,.L_bn_mul_add_words_return
126 lw $12,2*4($5)
127 multu $12,$7
128 lw $13,2*4($4)
129 addu $13,$2
130 sltu $2,$13,$2
131 mflo $1
132 mfhi $12
133 addu $13,$1
134 addu $2,$12
135 sltu $1,$13,$1
136 sw $13,2*4($4)
137 addu $2,$1
139 .L_bn_mul_add_words_return:
140 .set noreorder
141 jr $31
142 move $4,$2
143 .end bn_mul_add_words_internal
145 .align 5
146 .globl bn_mul_words
147 .ent bn_mul_words
148 bn_mul_words:
149 .set noreorder
150 bgtz $6,bn_mul_words_internal
151 move $2,$0
152 jr $31
153 move $4,$2
154 .end bn_mul_words
156 .align 5
157 .ent bn_mul_words_internal
158 bn_mul_words_internal:
159 .set reorder
160 li $3,-4
161 and $8,$6,$3
162 lw $12,0($5)
163 beqz $8,.L_bn_mul_words_tail
165 .L_bn_mul_words_loop:
166 multu $12,$7
167 lw $14,4($5)
168 lw $8,2*4($5)
169 lw $10,3*4($5)
170 mflo $1
171 mfhi $12
172 addu $2,$1
173 sltu $13,$2,$1
174 multu $14,$7
175 sw $2,0($4)
176 addu $2,$13,$12
178 subu $6,4
179 addu $4,4*4
180 addu $5,4*4
181 mflo $1
182 mfhi $14
183 addu $2,$1
184 sltu $15,$2,$1
185 multu $8,$7
186 sw $2,-3*4($4)
187 addu $2,$15,$14
189 mflo $1
190 mfhi $8
191 addu $2,$1
192 sltu $9,$2,$1
193 multu $10,$7
194 sw $2,-2*4($4)
195 addu $2,$9,$8
197 and $8,$6,$3
198 mflo $1
199 mfhi $10
200 addu $2,$1
201 sltu $11,$2,$1
202 sw $2,-4($4)
203 addu $2,$11,$10
204 .set noreorder
205 bgtzl $8,.L_bn_mul_words_loop
206 lw $12,0($5)
208 beqz $6,.L_bn_mul_words_return
211 .L_bn_mul_words_tail:
212 .set reorder
213 lw $12,0($5)
214 multu $12,$7
215 subu $6,1
216 mflo $1
217 mfhi $12
218 addu $2,$1
219 sltu $13,$2,$1
220 sw $2,0($4)
221 addu $2,$13,$12
222 beqz $6,.L_bn_mul_words_return
224 lw $12,4($5)
225 multu $12,$7
226 subu $6,1
227 mflo $1
228 mfhi $12
229 addu $2,$1
230 sltu $13,$2,$1
231 sw $2,4($4)
232 addu $2,$13,$12
233 beqz $6,.L_bn_mul_words_return
235 lw $12,2*4($5)
236 multu $12,$7
237 mflo $1
238 mfhi $12
239 addu $2,$1
240 sltu $13,$2,$1
241 sw $2,2*4($4)
242 addu $2,$13,$12
244 .L_bn_mul_words_return:
245 .set noreorder
246 jr $31
247 move $4,$2
248 .end bn_mul_words_internal
250 .align 5
251 .globl bn_sqr_words
252 .ent bn_sqr_words
253 bn_sqr_words:
254 .set noreorder
255 bgtz $6,bn_sqr_words_internal
256 move $2,$0
257 jr $31
258 move $4,$2
259 .end bn_sqr_words
261 .align 5
262 .ent bn_sqr_words_internal
263 bn_sqr_words_internal:
264 .set reorder
265 li $3,-4
266 and $8,$6,$3
267 lw $12,0($5)
268 beqz $8,.L_bn_sqr_words_tail
270 .L_bn_sqr_words_loop:
271 multu $12,$12
272 lw $14,4($5)
273 lw $8,2*4($5)
274 lw $10,3*4($5)
275 mflo $13
276 mfhi $12
277 sw $13,0($4)
278 sw $12,4($4)
280 multu $14,$14
281 subu $6,4
282 addu $4,8*4
283 addu $5,4*4
284 mflo $15
285 mfhi $14
286 sw $15,-6*4($4)
287 sw $14,-5*4($4)
289 multu $8,$8
290 mflo $9
291 mfhi $8
292 sw $9,-4*4($4)
293 sw $8,-3*4($4)
296 multu $10,$10
297 and $8,$6,$3
298 mflo $11
299 mfhi $10
300 sw $11,-2*4($4)
301 sw $10,-4($4)
303 .set noreorder
304 bgtzl $8,.L_bn_sqr_words_loop
305 lw $12,0($5)
307 beqz $6,.L_bn_sqr_words_return
310 .L_bn_sqr_words_tail:
311 .set reorder
312 lw $12,0($5)
313 multu $12,$12
314 subu $6,1
315 mflo $13
316 mfhi $12
317 sw $13,0($4)
318 sw $12,4($4)
319 beqz $6,.L_bn_sqr_words_return
321 lw $12,4($5)
322 multu $12,$12
323 subu $6,1
324 mflo $13
325 mfhi $12
326 sw $13,2*4($4)
327 sw $12,3*4($4)
328 beqz $6,.L_bn_sqr_words_return
330 lw $12,2*4($5)
331 multu $12,$12
332 mflo $13
333 mfhi $12
334 sw $13,4*4($4)
335 sw $12,5*4($4)
337 .L_bn_sqr_words_return:
338 .set noreorder
339 jr $31
340 move $4,$2
342 .end bn_sqr_words_internal
344 .align 5
345 .globl bn_add_words
346 .ent bn_add_words
347 bn_add_words:
348 .set noreorder
349 bgtz $7,bn_add_words_internal
350 move $2,$0
351 jr $31
352 move $4,$2
353 .end bn_add_words
355 .align 5
356 .ent bn_add_words_internal
357 bn_add_words_internal:
358 .set reorder
359 li $3,-4
360 and $1,$7,$3
361 lw $12,0($5)
362 beqz $1,.L_bn_add_words_tail
364 .L_bn_add_words_loop:
365 lw $8,0($6)
366 subu $7,4
367 lw $13,4($5)
368 and $1,$7,$3
369 lw $14,2*4($5)
370 addu $6,4*4
371 lw $15,3*4($5)
372 addu $4,4*4
373 lw $9,-3*4($6)
374 addu $5,4*4
375 lw $10,-2*4($6)
376 lw $11,-4($6)
377 addu $8,$12
378 sltu $24,$8,$12
379 addu $12,$8,$2
380 sltu $2,$12,$8
381 sw $12,-4*4($4)
382 addu $2,$24
384 addu $9,$13
385 sltu $25,$9,$13
386 addu $13,$9,$2
387 sltu $2,$13,$9
388 sw $13,-3*4($4)
389 addu $2,$25
391 addu $10,$14
392 sltu $24,$10,$14
393 addu $14,$10,$2
394 sltu $2,$14,$10
395 sw $14,-2*4($4)
396 addu $2,$24
398 addu $11,$15
399 sltu $25,$11,$15
400 addu $15,$11,$2
401 sltu $2,$15,$11
402 sw $15,-4($4)
403 addu $2,$25
405 .set noreorder
406 bgtzl $1,.L_bn_add_words_loop
407 lw $12,0($5)
409 beqz $7,.L_bn_add_words_return
412 .L_bn_add_words_tail:
413 .set reorder
414 lw $12,0($5)
415 lw $8,0($6)
416 addu $8,$12
417 subu $7,1
418 sltu $24,$8,$12
419 addu $12,$8,$2
420 sltu $2,$12,$8
421 sw $12,0($4)
422 addu $2,$24
423 beqz $7,.L_bn_add_words_return
425 lw $13,4($5)
426 lw $9,4($6)
427 addu $9,$13
428 subu $7,1
429 sltu $25,$9,$13
430 addu $13,$9,$2
431 sltu $2,$13,$9
432 sw $13,4($4)
433 addu $2,$25
434 beqz $7,.L_bn_add_words_return
436 lw $14,2*4($5)
437 lw $10,2*4($6)
438 addu $10,$14
439 sltu $24,$10,$14
440 addu $14,$10,$2
441 sltu $2,$14,$10
442 sw $14,2*4($4)
443 addu $2,$24
445 .L_bn_add_words_return:
446 .set noreorder
447 jr $31
448 move $4,$2
450 .end bn_add_words_internal
452 .align 5
453 .globl bn_sub_words
454 .ent bn_sub_words
455 bn_sub_words:
456 .set noreorder
457 bgtz $7,bn_sub_words_internal
458 move $2,$0
459 jr $31
460 move $4,$0
461 .end bn_sub_words
463 .align 5
464 .ent bn_sub_words_internal
465 bn_sub_words_internal:
466 .set reorder
467 li $3,-4
468 and $1,$7,$3
469 lw $12,0($5)
470 beqz $1,.L_bn_sub_words_tail
472 .L_bn_sub_words_loop:
473 lw $8,0($6)
474 subu $7,4
475 lw $13,4($5)
476 and $1,$7,$3
477 lw $14,2*4($5)
478 addu $6,4*4
479 lw $15,3*4($5)
480 addu $4,4*4
481 lw $9,-3*4($6)
482 addu $5,4*4
483 lw $10,-2*4($6)
484 lw $11,-4($6)
485 sltu $24,$12,$8
486 subu $8,$12,$8
487 subu $12,$8,$2
488 sgtu $2,$12,$8
489 sw $12,-4*4($4)
490 addu $2,$24
492 sltu $25,$13,$9
493 subu $9,$13,$9
494 subu $13,$9,$2
495 sgtu $2,$13,$9
496 sw $13,-3*4($4)
497 addu $2,$25
500 sltu $24,$14,$10
501 subu $10,$14,$10
502 subu $14,$10,$2
503 sgtu $2,$14,$10
504 sw $14,-2*4($4)
505 addu $2,$24
507 sltu $25,$15,$11
508 subu $11,$15,$11
509 subu $15,$11,$2
510 sgtu $2,$15,$11
511 sw $15,-4($4)
512 addu $2,$25
514 .set noreorder
515 bgtzl $1,.L_bn_sub_words_loop
516 lw $12,0($5)
518 beqz $7,.L_bn_sub_words_return
521 .L_bn_sub_words_tail:
522 .set reorder
523 lw $12,0($5)
524 lw $8,0($6)
525 subu $7,1
526 sltu $24,$12,$8
527 subu $8,$12,$8
528 subu $12,$8,$2
529 sgtu $2,$12,$8
530 sw $12,0($4)
531 addu $2,$24
532 beqz $7,.L_bn_sub_words_return
534 lw $13,4($5)
535 subu $7,1
536 lw $9,4($6)
537 sltu $25,$13,$9
538 subu $9,$13,$9
539 subu $13,$9,$2
540 sgtu $2,$13,$9
541 sw $13,4($4)
542 addu $2,$25
543 beqz $7,.L_bn_sub_words_return
545 lw $14,2*4($5)
546 lw $10,2*4($6)
547 sltu $24,$14,$10
548 subu $10,$14,$10
549 subu $14,$10,$2
550 sgtu $2,$14,$10
551 sw $14,2*4($4)
552 addu $2,$24
554 .L_bn_sub_words_return:
555 .set noreorder
556 jr $31
557 move $4,$2
558 .end bn_sub_words_internal
560 .align 5
561 .globl bn_div_3_words
562 .ent bn_div_3_words
563 bn_div_3_words:
564 .set noreorder
565 move $7,$4 # we know that bn_div_words does not
566 # touch $7, $10, $11 and preserves $6
567 # so that we can save two arguments
568 # and return address in registers
569 # instead of stack:-)
571 lw $4,($7)
572 move $10,$5
573 bne $4,$6,bn_div_3_words_internal
574 lw $5,-4($7)
575 li $2,-1
576 jr $31
577 move $4,$2
578 .end bn_div_3_words
580 .align 5
581 .ent bn_div_3_words_internal
582 bn_div_3_words_internal:
583 .set reorder
584 move $11,$31
585 bal bn_div_words
586 move $31,$11
587 multu $10,$2
588 lw $14,-2*4($7)
589 move $8,$0
590 mfhi $13
591 mflo $12
592 sltu $24,$13,$5
593 .L_bn_div_3_words_inner_loop:
594 bnez $24,.L_bn_div_3_words_inner_loop_done
595 sgeu $1,$14,$12
596 seq $25,$13,$5
597 and $1,$25
598 sltu $15,$12,$10
599 addu $5,$6
600 subu $13,$15
601 subu $12,$10
602 sltu $24,$13,$5
603 sltu $8,$5,$6
604 or $24,$8
605 .set noreorder
606 beqzl $1,.L_bn_div_3_words_inner_loop
607 subu $2,1
608 .set reorder
609 .L_bn_div_3_words_inner_loop_done:
610 .set noreorder
611 jr $31
612 move $4,$2
613 .end bn_div_3_words_internal
615 .align 5
616 .globl bn_div_words
617 .ent bn_div_words
618 bn_div_words:
619 .set noreorder
620 bnez $6,bn_div_words_internal
621 li $2,-1 # I would rather signal div-by-zero
622 # which can be done with 'break 7'
623 jr $31
624 move $4,$2
625 .end bn_div_words
627 .align 5
628 .ent bn_div_words_internal
629 bn_div_words_internal:
630 move $3,$0
631 bltz $6,.L_bn_div_words_body
632 move $25,$3
633 sll $6,1
634 bgtz $6,.-4
635 addu $25,1
637 .set reorder
638 negu $13,$25
639 li $14,-1
640 sll $14,$13
641 and $14,$4
642 srl $1,$5,$13
643 .set noreorder
644 bnezl $14,.+8
645 break 6 # signal overflow
646 .set reorder
647 sll $4,$25
648 sll $5,$25
649 or $4,$1
650 .L_bn_div_words_body:
651 srl $3,$6,4*4 # bits
652 sgeu $1,$4,$6
653 .set noreorder
654 bnezl $1,.+8
655 subu $4,$6
656 .set reorder
658 li $8,-1
659 srl $9,$4,4*4 # bits
660 srl $8,4*4 # q=0xffffffff
661 beq $3,$9,.L_bn_div_words_skip_div1
662 divu $0,$4,$3
663 mflo $8
664 .L_bn_div_words_skip_div1:
665 multu $6,$8
666 sll $15,$4,4*4 # bits
667 srl $1,$5,4*4 # bits
668 or $15,$1
669 mflo $12
670 mfhi $13
671 .L_bn_div_words_inner_loop1:
672 sltu $14,$15,$12
673 seq $24,$9,$13
674 sltu $1,$9,$13
675 and $14,$24
676 sltu $2,$12,$6
677 or $1,$14
678 .set noreorder
679 beqz $1,.L_bn_div_words_inner_loop1_done
680 subu $13,$2
681 subu $12,$6
682 b .L_bn_div_words_inner_loop1
683 subu $8,1
684 .set reorder
685 .L_bn_div_words_inner_loop1_done:
687 sll $5,4*4 # bits
688 subu $4,$15,$12
689 sll $2,$8,4*4 # bits
691 li $8,-1
692 srl $9,$4,4*4 # bits
693 srl $8,4*4 # q=0xffffffff
694 beq $3,$9,.L_bn_div_words_skip_div2
695 divu $0,$4,$3
696 mflo $8
697 .L_bn_div_words_skip_div2:
698 multu $6,$8
699 sll $15,$4,4*4 # bits
700 srl $1,$5,4*4 # bits
701 or $15,$1
702 mflo $12
703 mfhi $13
704 .L_bn_div_words_inner_loop2:
705 sltu $14,$15,$12
706 seq $24,$9,$13
707 sltu $1,$9,$13
708 and $14,$24
709 sltu $3,$12,$6
710 or $1,$14
711 .set noreorder
712 beqz $1,.L_bn_div_words_inner_loop2_done
713 subu $13,$3
714 subu $12,$6
715 b .L_bn_div_words_inner_loop2
716 subu $8,1
717 .set reorder
718 .L_bn_div_words_inner_loop2_done:
720 subu $4,$15,$12
721 or $2,$8
722 srl $3,$4,$25 # $3 contains remainder if anybody wants it
723 srl $6,$25 # restore $6
725 .set noreorder
726 move $5,$3
727 jr $31
728 move $4,$2
729 .end bn_div_words_internal
731 .align 5
732 .globl bn_mul_comba8
733 .ent bn_mul_comba8
734 bn_mul_comba8:
735 .set noreorder
736 .frame $29,6*4,$31
737 .mask 0x003f0000,-4
738 subu $29,6*4
739 sw $21,5*4($29)
740 sw $20,4*4($29)
741 sw $19,3*4($29)
742 sw $18,2*4($29)
743 sw $17,1*4($29)
744 sw $16,0*4($29)
746 .set reorder
747 lw $12,0($5) # If compiled with -mips3 option on
748 # R5000 box assembler barks on this
749 # 1ine with "should not have mult/div
750 # as last instruction in bb (R10K
751 # bug)" warning. If anybody out there
752 # has a clue about how to circumvent
753 # this do send me a note.
754 # <appro@fy.chalmers.se>
756 lw $8,0($6)
757 lw $13,4($5)
758 lw $14,2*4($5)
759 multu $12,$8 # mul_add_c(a[0],b[0],c1,c2,c3);
760 lw $15,3*4($5)
761 lw $9,4($6)
762 lw $10,2*4($6)
763 lw $11,3*4($6)
764 mflo $2
765 mfhi $3
767 lw $16,4*4($5)
768 lw $18,5*4($5)
769 multu $12,$9 # mul_add_c(a[0],b[1],c2,c3,c1);
770 lw $20,6*4($5)
771 lw $5,7*4($5)
772 lw $17,4*4($6)
773 lw $19,5*4($6)
774 mflo $24
775 mfhi $25
776 addu $3,$24
777 sltu $1,$3,$24
778 multu $13,$8 # mul_add_c(a[1],b[0],c2,c3,c1);
779 addu $7,$25,$1
780 lw $21,6*4($6)
781 lw $6,7*4($6)
782 sw $2,0($4) # r[0]=c1;
783 mflo $24
784 mfhi $25
785 addu $3,$24
786 sltu $1,$3,$24
787 multu $14,$8 # mul_add_c(a[2],b[0],c3,c1,c2);
788 addu $25,$1
789 addu $7,$25
790 sltu $2,$7,$25
791 sw $3,4($4) # r[1]=c2;
793 mflo $24
794 mfhi $25
795 addu $7,$24
796 sltu $1,$7,$24
797 multu $13,$9 # mul_add_c(a[1],b[1],c3,c1,c2);
798 addu $25,$1
799 addu $2,$25
800 mflo $24
801 mfhi $25
802 addu $7,$24
803 sltu $1,$7,$24
804 multu $12,$10 # mul_add_c(a[0],b[2],c3,c1,c2);
805 addu $25,$1
806 addu $2,$25
807 sltu $3,$2,$25
808 mflo $24
809 mfhi $25
810 addu $7,$24
811 sltu $1,$7,$24
812 multu $12,$11 # mul_add_c(a[0],b[3],c1,c2,c3);
813 addu $25,$1
814 addu $2,$25
815 sltu $1,$2,$25
816 addu $3,$1
817 sw $7,2*4($4) # r[2]=c3;
819 mflo $24
820 mfhi $25
821 addu $2,$24
822 sltu $1,$2,$24
823 multu $13,$10 # mul_add_c(a[1],b[2],c1,c2,c3);
824 addu $25,$1
825 addu $3,$25
826 sltu $7,$3,$25
827 mflo $24
828 mfhi $25
829 addu $2,$24
830 sltu $1,$2,$24
831 multu $14,$9 # mul_add_c(a[2],b[1],c1,c2,c3);
832 addu $25,$1
833 addu $3,$25
834 sltu $1,$3,$25
835 addu $7,$1
836 mflo $24
837 mfhi $25
838 addu $2,$24
839 sltu $1,$2,$24
840 multu $15,$8 # mul_add_c(a[3],b[0],c1,c2,c3);
841 addu $25,$1
842 addu $3,$25
843 sltu $1,$3,$25
844 addu $7,$1
845 mflo $24
846 mfhi $25
847 addu $2,$24
848 sltu $1,$2,$24
849 multu $16,$8 # mul_add_c(a[4],b[0],c2,c3,c1);
850 addu $25,$1
851 addu $3,$25
852 sltu $1,$3,$25
853 addu $7,$1
854 sw $2,3*4($4) # r[3]=c1;
856 mflo $24
857 mfhi $25
858 addu $3,$24
859 sltu $1,$3,$24
860 multu $15,$9 # mul_add_c(a[3],b[1],c2,c3,c1);
861 addu $25,$1
862 addu $7,$25
863 sltu $2,$7,$25
864 mflo $24
865 mfhi $25
866 addu $3,$24
867 sltu $1,$3,$24
868 multu $14,$10 # mul_add_c(a[2],b[2],c2,c3,c1);
869 addu $25,$1
870 addu $7,$25
871 sltu $1,$7,$25
872 addu $2,$1
873 mflo $24
874 mfhi $25
875 addu $3,$24
876 sltu $1,$3,$24
877 multu $13,$11 # mul_add_c(a[1],b[3],c2,c3,c1);
878 addu $25,$1
879 addu $7,$25
880 sltu $1,$7,$25
881 addu $2,$1
882 mflo $24
883 mfhi $25
884 addu $3,$24
885 sltu $1,$3,$24
886 multu $12,$17 # mul_add_c(a[0],b[4],c2,c3,c1);
887 addu $25,$1
888 addu $7,$25
889 sltu $1,$7,$25
890 addu $2,$1
891 mflo $24
892 mfhi $25
893 addu $3,$24
894 sltu $1,$3,$24
895 multu $12,$19 # mul_add_c(a[0],b[5],c3,c1,c2);
896 addu $25,$1
897 addu $7,$25
898 sltu $1,$7,$25
899 addu $2,$1
900 sw $3,4*4($4) # r[4]=c2;
902 mflo $24
903 mfhi $25
904 addu $7,$24
905 sltu $1,$7,$24
906 multu $13,$17 # mul_add_c(a[1],b[4],c3,c1,c2);
907 addu $25,$1
908 addu $2,$25
909 sltu $3,$2,$25
910 mflo $24
911 mfhi $25
912 addu $7,$24
913 sltu $1,$7,$24
914 multu $14,$11 # mul_add_c(a[2],b[3],c3,c1,c2);
915 addu $25,$1
916 addu $2,$25
917 sltu $1,$2,$25
918 addu $3,$1
919 mflo $24
920 mfhi $25
921 addu $7,$24
922 sltu $1,$7,$24
923 multu $15,$10 # mul_add_c(a[3],b[2],c3,c1,c2);
924 addu $25,$1
925 addu $2,$25
926 sltu $1,$2,$25
927 addu $3,$1
928 mflo $24
929 mfhi $25
930 addu $7,$24
931 sltu $1,$7,$24
932 multu $16,$9 # mul_add_c(a[4],b[1],c3,c1,c2);
933 addu $25,$1
934 addu $2,$25
935 sltu $1,$2,$25
936 addu $3,$1
937 mflo $24
938 mfhi $25
939 addu $7,$24
940 sltu $1,$7,$24
941 multu $18,$8 # mul_add_c(a[5],b[0],c3,c1,c2);
942 addu $25,$1
943 addu $2,$25
944 sltu $1,$2,$25
945 addu $3,$1
946 mflo $24
947 mfhi $25
948 addu $7,$24
949 sltu $1,$7,$24
950 multu $20,$8 # mul_add_c(a[6],b[0],c1,c2,c3);
951 addu $25,$1
952 addu $2,$25
953 sltu $1,$2,$25
954 addu $3,$1
955 sw $7,5*4($4) # r[5]=c3;
957 mflo $24
958 mfhi $25
959 addu $2,$24
960 sltu $1,$2,$24
961 multu $18,$9 # mul_add_c(a[5],b[1],c1,c2,c3);
962 addu $25,$1
963 addu $3,$25
964 sltu $7,$3,$25
965 mflo $24
966 mfhi $25
967 addu $2,$24
968 sltu $1,$2,$24
969 multu $16,$10 # mul_add_c(a[4],b[2],c1,c2,c3);
970 addu $25,$1
971 addu $3,$25
972 sltu $1,$3,$25
973 addu $7,$1
974 mflo $24
975 mfhi $25
976 addu $2,$24
977 sltu $1,$2,$24
978 multu $15,$11 # mul_add_c(a[3],b[3],c1,c2,c3);
979 addu $25,$1
980 addu $3,$25
981 sltu $1,$3,$25
982 addu $7,$1
983 mflo $24
984 mfhi $25
985 addu $2,$24
986 sltu $1,$2,$24
987 multu $14,$17 # mul_add_c(a[2],b[4],c1,c2,c3);
988 addu $25,$1
989 addu $3,$25
990 sltu $1,$3,$25
991 addu $7,$1
992 mflo $24
993 mfhi $25
994 addu $2,$24
995 sltu $1,$2,$24
996 multu $13,$19 # mul_add_c(a[1],b[5],c1,c2,c3);
997 addu $25,$1
998 addu $3,$25
999 sltu $1,$3,$25
1000 addu $7,$1
1001 mflo $24
1002 mfhi $25
1003 addu $2,$24
1004 sltu $1,$2,$24
1005 multu $12,$21 # mul_add_c(a[0],b[6],c1,c2,c3);
1006 addu $25,$1
1007 addu $3,$25
1008 sltu $1,$3,$25
1009 addu $7,$1
1010 mflo $24
1011 mfhi $25
1012 addu $2,$24
1013 sltu $1,$2,$24
1014 multu $12,$6 # mul_add_c(a[0],b[7],c2,c3,c1);
1015 addu $25,$1
1016 addu $3,$25
1017 sltu $1,$3,$25
1018 addu $7,$1
1019 sw $2,6*4($4) # r[6]=c1;
1021 mflo $24
1022 mfhi $25
1023 addu $3,$24
1024 sltu $1,$3,$24
1025 multu $13,$21 # mul_add_c(a[1],b[6],c2,c3,c1);
1026 addu $25,$1
1027 addu $7,$25
1028 sltu $2,$7,$25
1029 mflo $24
1030 mfhi $25
1031 addu $3,$24
1032 sltu $1,$3,$24
1033 multu $14,$19 # mul_add_c(a[2],b[5],c2,c3,c1);
1034 addu $25,$1
1035 addu $7,$25
1036 sltu $1,$7,$25
1037 addu $2,$1
1038 mflo $24
1039 mfhi $25
1040 addu $3,$24
1041 sltu $1,$3,$24
1042 multu $15,$17 # mul_add_c(a[3],b[4],c2,c3,c1);
1043 addu $25,$1
1044 addu $7,$25
1045 sltu $1,$7,$25
1046 addu $2,$1
1047 mflo $24
1048 mfhi $25
1049 addu $3,$24
1050 sltu $1,$3,$24
1051 multu $16,$11 # mul_add_c(a[4],b[3],c2,c3,c1);
1052 addu $25,$1
1053 addu $7,$25
1054 sltu $1,$7,$25
1055 addu $2,$1
1056 mflo $24
1057 mfhi $25
1058 addu $3,$24
1059 sltu $1,$3,$24
1060 multu $18,$10 # mul_add_c(a[5],b[2],c2,c3,c1);
1061 addu $25,$1
1062 addu $7,$25
1063 sltu $1,$7,$25
1064 addu $2,$1
1065 mflo $24
1066 mfhi $25
1067 addu $3,$24
1068 sltu $1,$3,$24
1069 multu $20,$9 # mul_add_c(a[6],b[1],c2,c3,c1);
1070 addu $25,$1
1071 addu $7,$25
1072 sltu $1,$7,$25
1073 addu $2,$1
1074 mflo $24
1075 mfhi $25
1076 addu $3,$24
1077 sltu $1,$3,$24
1078 multu $5,$8 # mul_add_c(a[7],b[0],c2,c3,c1);
1079 addu $25,$1
1080 addu $7,$25
1081 sltu $1,$7,$25
1082 addu $2,$1
1083 mflo $24
1084 mfhi $25
1085 addu $3,$24
1086 sltu $1,$3,$24
1087 multu $5,$9 # mul_add_c(a[7],b[1],c3,c1,c2);
1088 addu $25,$1
1089 addu $7,$25
1090 sltu $1,$7,$25
1091 addu $2,$1
1092 sw $3,7*4($4) # r[7]=c2;
1094 mflo $24
1095 mfhi $25
1096 addu $7,$24
1097 sltu $1,$7,$24
1098 multu $20,$10 # mul_add_c(a[6],b[2],c3,c1,c2);
1099 addu $25,$1
1100 addu $2,$25
1101 sltu $3,$2,$25
1102 mflo $24
1103 mfhi $25
1104 addu $7,$24
1105 sltu $1,$7,$24
1106 multu $18,$11 # mul_add_c(a[5],b[3],c3,c1,c2);
1107 addu $25,$1
1108 addu $2,$25
1109 sltu $1,$2,$25
1110 addu $3,$1
1111 mflo $24
1112 mfhi $25
1113 addu $7,$24
1114 sltu $1,$7,$24
1115 multu $16,$17 # mul_add_c(a[4],b[4],c3,c1,c2);
1116 addu $25,$1
1117 addu $2,$25
1118 sltu $1,$2,$25
1119 addu $3,$1
1120 mflo $24
1121 mfhi $25
1122 addu $7,$24
1123 sltu $1,$7,$24
1124 multu $15,$19 # mul_add_c(a[3],b[5],c3,c1,c2);
1125 addu $25,$1
1126 addu $2,$25
1127 sltu $1,$2,$25
1128 addu $3,$1
1129 mflo $24
1130 mfhi $25
1131 addu $7,$24
1132 sltu $1,$7,$24
1133 multu $14,$21 # mul_add_c(a[2],b[6],c3,c1,c2);
1134 addu $25,$1
1135 addu $2,$25
1136 sltu $1,$2,$25
1137 addu $3,$1
1138 mflo $24
1139 mfhi $25
1140 addu $7,$24
1141 sltu $1,$7,$24
1142 multu $13,$6 # mul_add_c(a[1],b[7],c3,c1,c2);
1143 addu $25,$1
1144 addu $2,$25
1145 sltu $1,$2,$25
1146 addu $3,$1
1147 mflo $24
1148 mfhi $25
1149 addu $7,$24
1150 sltu $1,$7,$24
1151 multu $14,$6 # mul_add_c(a[2],b[7],c1,c2,c3);
1152 addu $25,$1
1153 addu $2,$25
1154 sltu $1,$2,$25
1155 addu $3,$1
1156 sw $7,8*4($4) # r[8]=c3;
1158 mflo $24
1159 mfhi $25
1160 addu $2,$24
1161 sltu $1,$2,$24
1162 multu $15,$21 # mul_add_c(a[3],b[6],c1,c2,c3);
1163 addu $25,$1
1164 addu $3,$25
1165 sltu $7,$3,$25
1166 mflo $24
1167 mfhi $25
1168 addu $2,$24
1169 sltu $1,$2,$24
1170 multu $16,$19 # mul_add_c(a[4],b[5],c1,c2,c3);
1171 addu $25,$1
1172 addu $3,$25
1173 sltu $1,$3,$25
1174 addu $7,$1
1175 mflo $24
1176 mfhi $25
1177 addu $2,$24
1178 sltu $1,$2,$24
1179 multu $18,$17 # mul_add_c(a[5],b[4],c1,c2,c3);
1180 addu $25,$1
1181 addu $3,$25
1182 sltu $1,$3,$25
1183 addu $7,$1
1184 mflo $24
1185 mfhi $25
1186 addu $2,$24
1187 sltu $1,$2,$24
1188 multu $20,$11 # mul_add_c(a[6],b[3],c1,c2,c3);
1189 addu $25,$1
1190 addu $3,$25
1191 sltu $1,$3,$25
1192 addu $7,$1
1193 mflo $24
1194 mfhi $25
1195 addu $2,$24
1196 sltu $1,$2,$24
1197 multu $5,$10 # mul_add_c(a[7],b[2],c1,c2,c3);
1198 addu $25,$1
1199 addu $3,$25
1200 sltu $1,$3,$25
1201 addu $7,$1
1202 mflo $24
1203 mfhi $25
1204 addu $2,$24
1205 sltu $1,$2,$24
1206 multu $5,$11 # mul_add_c(a[7],b[3],c2,c3,c1);
1207 addu $25,$1
1208 addu $3,$25
1209 sltu $1,$3,$25
1210 addu $7,$1
1211 sw $2,9*4($4) # r[9]=c1;
1213 mflo $24
1214 mfhi $25
1215 addu $3,$24
1216 sltu $1,$3,$24
1217 multu $20,$17 # mul_add_c(a[6],b[4],c2,c3,c1);
1218 addu $25,$1
1219 addu $7,$25
1220 sltu $2,$7,$25
1221 mflo $24
1222 mfhi $25
1223 addu $3,$24
1224 sltu $1,$3,$24
1225 multu $18,$19 # mul_add_c(a[5],b[5],c2,c3,c1);
1226 addu $25,$1
1227 addu $7,$25
1228 sltu $1,$7,$25
1229 addu $2,$1
1230 mflo $24
1231 mfhi $25
1232 addu $3,$24
1233 sltu $1,$3,$24
1234 multu $16,$21 # mul_add_c(a[4],b[6],c2,c3,c1);
1235 addu $25,$1
1236 addu $7,$25
1237 sltu $1,$7,$25
1238 addu $2,$1
1239 mflo $24
1240 mfhi $25
1241 addu $3,$24
1242 sltu $1,$3,$24
1243 multu $15,$6 # mul_add_c(a[3],b[7],c2,c3,c1);
1244 addu $25,$1
1245 addu $7,$25
1246 sltu $1,$7,$25
1247 addu $2,$1
1248 mflo $24
1249 mfhi $25
1250 addu $3,$24
1251 sltu $1,$3,$24
1252 multu $16,$6 # mul_add_c(a[4],b[7],c3,c1,c2);
1253 addu $25,$1
1254 addu $7,$25
1255 sltu $1,$7,$25
1256 addu $2,$1
1257 sw $3,10*4($4) # r[10]=c2;
1259 mflo $24
1260 mfhi $25
1261 addu $7,$24
1262 sltu $1,$7,$24
1263 multu $18,$21 # mul_add_c(a[5],b[6],c3,c1,c2);
1264 addu $25,$1
1265 addu $2,$25
1266 sltu $3,$2,$25
1267 mflo $24
1268 mfhi $25
1269 addu $7,$24
1270 sltu $1,$7,$24
1271 multu $20,$19 # mul_add_c(a[6],b[5],c3,c1,c2);
1272 addu $25,$1
1273 addu $2,$25
1274 sltu $1,$2,$25
1275 addu $3,$1
1276 mflo $24
1277 mfhi $25
1278 addu $7,$24
1279 sltu $1,$7,$24
1280 multu $5,$17 # mul_add_c(a[7],b[4],c3,c1,c2);
1281 addu $25,$1
1282 addu $2,$25
1283 sltu $1,$2,$25
1284 addu $3,$1
1285 mflo $24
1286 mfhi $25
1287 addu $7,$24
1288 sltu $1,$7,$24
1289 multu $5,$19 # mul_add_c(a[7],b[5],c1,c2,c3);
1290 addu $25,$1
1291 addu $2,$25
1292 sltu $1,$2,$25
1293 addu $3,$1
1294 sw $7,11*4($4) # r[11]=c3;
1296 mflo $24
1297 mfhi $25
1298 addu $2,$24
1299 sltu $1,$2,$24
1300 multu $20,$21 # mul_add_c(a[6],b[6],c1,c2,c3);
1301 addu $25,$1
1302 addu $3,$25
1303 sltu $7,$3,$25
1304 mflo $24
1305 mfhi $25
1306 addu $2,$24
1307 sltu $1,$2,$24
1308 multu $18,$6 # mul_add_c(a[5],b[7],c1,c2,c3);
1309 addu $25,$1
1310 addu $3,$25
1311 sltu $1,$3,$25
1312 addu $7,$1
1313 mflo $24
1314 mfhi $25
1315 addu $2,$24
1316 sltu $1,$2,$24
1317 multu $20,$6 # mul_add_c(a[6],b[7],c2,c3,c1);
1318 addu $25,$1
1319 addu $3,$25
1320 sltu $1,$3,$25
1321 addu $7,$1
1322 sw $2,12*4($4) # r[12]=c1;
1324 mflo $24
1325 mfhi $25
1326 addu $3,$24
1327 sltu $1,$3,$24
1328 multu $5,$21 # mul_add_c(a[7],b[6],c2,c3,c1);
1329 addu $25,$1
1330 addu $7,$25
1331 sltu $2,$7,$25
1332 mflo $24
1333 mfhi $25
1334 addu $3,$24
1335 sltu $1,$3,$24
1336 multu $5,$6 # mul_add_c(a[7],b[7],c3,c1,c2);
1337 addu $25,$1
1338 addu $7,$25
1339 sltu $1,$7,$25
1340 addu $2,$1
1341 sw $3,13*4($4) # r[13]=c2;
1343 mflo $24
1344 mfhi $25
1345 addu $7,$24
1346 sltu $1,$7,$24
1347 addu $25,$1
1348 addu $2,$25
1349 sw $7,14*4($4) # r[14]=c3;
1350 sw $2,15*4($4) # r[15]=c1;
1352 .set noreorder
1353 lw $21,5*4($29)
1354 lw $20,4*4($29)
1355 lw $19,3*4($29)
1356 lw $18,2*4($29)
1357 lw $17,1*4($29)
1358 lw $16,0*4($29)
1359 jr $31
1360 addu $29,6*4
1361 .end bn_mul_comba8
1363 .align 5
1364 .globl bn_mul_comba4
1365 .ent bn_mul_comba4
1366 bn_mul_comba4:
1367 .set reorder
1368 lw $12,0($5)
1369 lw $8,0($6)
1370 lw $13,4($5)
1371 lw $14,2*4($5)
1372 multu $12,$8 # mul_add_c(a[0],b[0],c1,c2,c3);
1373 lw $15,3*4($5)
1374 lw $9,4($6)
1375 lw $10,2*4($6)
1376 lw $11,3*4($6)
1377 mflo $2
1378 mfhi $3
1379 sw $2,0($4)
1381 multu $12,$9 # mul_add_c(a[0],b[1],c2,c3,c1);
1382 mflo $24
1383 mfhi $25
1384 addu $3,$24
1385 sltu $1,$3,$24
1386 multu $13,$8 # mul_add_c(a[1],b[0],c2,c3,c1);
1387 addu $7,$25,$1
1388 mflo $24
1389 mfhi $25
1390 addu $3,$24
1391 sltu $1,$3,$24
1392 multu $14,$8 # mul_add_c(a[2],b[0],c3,c1,c2);
1393 addu $25,$1
1394 addu $7,$25
1395 sltu $2,$7,$25
1396 sw $3,4($4)
1398 mflo $24
1399 mfhi $25
1400 addu $7,$24
1401 sltu $1,$7,$24
1402 multu $13,$9 # mul_add_c(a[1],b[1],c3,c1,c2);
1403 addu $25,$1
1404 addu $2,$25
1405 mflo $24
1406 mfhi $25
1407 addu $7,$24
1408 sltu $1,$7,$24
1409 multu $12,$10 # mul_add_c(a[0],b[2],c3,c1,c2);
1410 addu $25,$1
1411 addu $2,$25
1412 sltu $3,$2,$25
1413 mflo $24
1414 mfhi $25
1415 addu $7,$24
1416 sltu $1,$7,$24
1417 multu $12,$11 # mul_add_c(a[0],b[3],c1,c2,c3);
1418 addu $25,$1
1419 addu $2,$25
1420 sltu $1,$2,$25
1421 addu $3,$1
1422 sw $7,2*4($4)
1424 mflo $24
1425 mfhi $25
1426 addu $2,$24
1427 sltu $1,$2,$24
1428 multu $13,$10 # mul_add_c(a[1],b[2],c1,c2,c3);
1429 addu $25,$1
1430 addu $3,$25
1431 sltu $7,$3,$25
1432 mflo $24
1433 mfhi $25
1434 addu $2,$24
1435 sltu $1,$2,$24
1436 multu $14,$9 # mul_add_c(a[2],b[1],c1,c2,c3);
1437 addu $25,$1
1438 addu $3,$25
1439 sltu $1,$3,$25
1440 addu $7,$1
1441 mflo $24
1442 mfhi $25
1443 addu $2,$24
1444 sltu $1,$2,$24
1445 multu $15,$8 # mul_add_c(a[3],b[0],c1,c2,c3);
1446 addu $25,$1
1447 addu $3,$25
1448 sltu $1,$3,$25
1449 addu $7,$1
1450 mflo $24
1451 mfhi $25
1452 addu $2,$24
1453 sltu $1,$2,$24
1454 multu $15,$9 # mul_add_c(a[3],b[1],c2,c3,c1);
1455 addu $25,$1
1456 addu $3,$25
1457 sltu $1,$3,$25
1458 addu $7,$1
1459 sw $2,3*4($4)
1461 mflo $24
1462 mfhi $25
1463 addu $3,$24
1464 sltu $1,$3,$24
1465 multu $14,$10 # mul_add_c(a[2],b[2],c2,c3,c1);
1466 addu $25,$1
1467 addu $7,$25
1468 sltu $2,$7,$25
1469 mflo $24
1470 mfhi $25
1471 addu $3,$24
1472 sltu $1,$3,$24
1473 multu $13,$11 # mul_add_c(a[1],b[3],c2,c3,c1);
1474 addu $25,$1
1475 addu $7,$25
1476 sltu $1,$7,$25
1477 addu $2,$1
1478 mflo $24
1479 mfhi $25
1480 addu $3,$24
1481 sltu $1,$3,$24
1482 multu $14,$11 # mul_add_c(a[2],b[3],c3,c1,c2);
1483 addu $25,$1
1484 addu $7,$25
1485 sltu $1,$7,$25
1486 addu $2,$1
1487 sw $3,4*4($4)
1489 mflo $24
1490 mfhi $25
1491 addu $7,$24
1492 sltu $1,$7,$24
1493 multu $15,$10 # mul_add_c(a[3],b[2],c3,c1,c2);
1494 addu $25,$1
1495 addu $2,$25
1496 sltu $3,$2,$25
1497 mflo $24
1498 mfhi $25
1499 addu $7,$24
1500 sltu $1,$7,$24
1501 multu $15,$11 # mul_add_c(a[3],b[3],c1,c2,c3);
1502 addu $25,$1
1503 addu $2,$25
1504 sltu $1,$2,$25
1505 addu $3,$1
1506 sw $7,5*4($4)
1508 mflo $24
1509 mfhi $25
1510 addu $2,$24
1511 sltu $1,$2,$24
1512 addu $25,$1
1513 addu $3,$25
1514 sw $2,6*4($4)
1515 sw $3,7*4($4)
1517 .set noreorder
1518 jr $31
1520 .end bn_mul_comba4
1522 .align 5
1523 .globl bn_sqr_comba8
1524 .ent bn_sqr_comba8
1525 bn_sqr_comba8:
1526 .set reorder
1527 lw $12,0($5)
1528 lw $13,4($5)
1529 lw $14,2*4($5)
1530 lw $15,3*4($5)
1532 multu $12,$12 # mul_add_c(a[0],b[0],c1,c2,c3);
1533 lw $8,4*4($5)
1534 lw $9,5*4($5)
1535 lw $10,6*4($5)
1536 lw $11,7*4($5)
1537 mflo $2
1538 mfhi $3
1539 sw $2,0($4)
1541 multu $12,$13 # mul_add_c2(a[0],b[1],c2,c3,c1);
1542 mflo $24
1543 mfhi $25
1544 slt $2,$25,$0
1545 sll $25,1
1546 multu $14,$12 # mul_add_c2(a[2],b[0],c3,c1,c2);
1547 slt $6,$24,$0
1548 addu $25,$6
1549 sll $24,1
1550 addu $3,$24
1551 sltu $1,$3,$24
1552 addu $7,$25,$1
1553 sw $3,4($4)
1555 mflo $24
1556 mfhi $25
1557 slt $3,$25,$0
1558 sll $25,1
1559 multu $13,$13 # mul_add_c(a[1],b[1],c3,c1,c2);
1560 slt $6,$24,$0
1561 addu $25,$6
1562 sll $24,1
1563 addu $7,$24
1564 sltu $1,$7,$24
1565 addu $25,$1
1566 addu $2,$25
1567 sltu $1,$2,$25
1568 addu $3,$1
1569 mflo $24
1570 mfhi $25
1571 addu $7,$24
1572 sltu $1,$7,$24
1573 multu $12,$15 # mul_add_c2(a[0],b[3],c1,c2,c3);
1574 addu $25,$1
1575 addu $2,$25
1576 sltu $1,$2,$25
1577 addu $3,$1
1578 sw $7,2*4($4)
1580 mflo $24
1581 mfhi $25
1582 slt $7,$25,$0
1583 sll $25,1
1584 multu $13,$14 # mul_add_c2(a[1],b[2],c1,c2,c3);
1585 slt $6,$24,$0
1586 addu $25,$6
1587 sll $24,1
1588 addu $2,$24
1589 sltu $1,$2,$24
1590 addu $25,$1
1591 addu $3,$25
1592 sltu $1,$3,$25
1593 addu $7,$1
1594 mflo $24
1595 mfhi $25
1596 slt $1,$25,$0
1597 addu $7,$1
1598 multu $8,$12 # mul_add_c2(a[4],b[0],c2,c3,c1);
1599 sll $25,1
1600 slt $6,$24,$0
1601 addu $25,$6
1602 sll $24,1
1603 addu $2,$24
1604 sltu $1,$2,$24
1605 addu $25,$1
1606 addu $3,$25
1607 sltu $1,$3,$25
1608 addu $7,$1
1609 sw $2,3*4($4)
1611 mflo $24
1612 mfhi $25
1613 slt $2,$25,$0
1614 sll $25,1
1615 multu $15,$13 # mul_add_c2(a[3],b[1],c2,c3,c1);
1616 slt $6,$24,$0
1617 addu $25,$6
1618 sll $24,1
1619 addu $3,$24
1620 sltu $1,$3,$24
1621 addu $25,$1
1622 addu $7,$25
1623 sltu $1,$7,$25
1624 addu $2,$1
1625 mflo $24
1626 mfhi $25
1627 slt $1,$25,$0
1628 addu $2,$1
1629 multu $14,$14 # mul_add_c(a[2],b[2],c2,c3,c1);
1630 sll $25,1
1631 slt $6,$24,$0
1632 addu $25,$6
1633 sll $24,1
1634 addu $3,$24
1635 sltu $1,$3,$24
1636 addu $25,$1
1637 addu $7,$25
1638 sltu $1,$7,$25
1639 addu $2,$1
1640 mflo $24
1641 mfhi $25
1642 addu $3,$24
1643 sltu $1,$3,$24
1644 multu $12,$9 # mul_add_c2(a[0],b[5],c3,c1,c2);
1645 addu $25,$1
1646 addu $7,$25
1647 sltu $1,$7,$25
1648 addu $2,$1
1649 sw $3,4*4($4)
1651 mflo $24
1652 mfhi $25
1653 slt $3,$25,$0
1654 sll $25,1
1655 multu $13,$8 # mul_add_c2(a[1],b[4],c3,c1,c2);
1656 slt $6,$24,$0
1657 addu $25,$6
1658 sll $24,1
1659 addu $7,$24
1660 sltu $1,$7,$24
1661 addu $25,$1
1662 addu $2,$25
1663 sltu $1,$2,$25
1664 addu $3,$1
1665 mflo $24
1666 mfhi $25
1667 slt $1,$25,$0
1668 addu $3,$1
1669 multu $14,$15 # mul_add_c2(a[2],b[3],c3,c1,c2);
1670 sll $25,1
1671 slt $6,$24,$0
1672 addu $25,$6
1673 sll $24,1
1674 addu $7,$24
1675 sltu $1,$7,$24
1676 addu $25,$1
1677 addu $2,$25
1678 sltu $1,$2,$25
1679 addu $3,$1
1680 mflo $24
1681 mfhi $25
1682 slt $1,$25,$0
1683 multu $10,$12 # mul_add_c2(a[6],b[0],c1,c2,c3);
1684 addu $3,$1
1685 sll $25,1
1686 slt $6,$24,$0
1687 addu $25,$6
1688 sll $24,1
1689 addu $7,$24
1690 sltu $1,$7,$24
1691 addu $25,$1
1692 addu $2,$25
1693 sltu $1,$2,$25
1694 addu $3,$1
1695 sw $7,5*4($4)
1697 mflo $24
1698 mfhi $25
1699 slt $7,$25,$0
1700 sll $25,1
1701 multu $9,$13 # mul_add_c2(a[5],b[1],c1,c2,c3);
1702 slt $6,$24,$0
1703 addu $25,$6
1704 sll $24,1
1705 addu $2,$24
1706 sltu $1,$2,$24
1707 addu $25,$1
1708 addu $3,$25
1709 sltu $1,$3,$25
1710 addu $7,$1
1711 mflo $24
1712 mfhi $25
1713 slt $1,$25,$0
1714 addu $7,$1
1715 multu $8,$14 # mul_add_c2(a[4],b[2],c1,c2,c3);
1716 sll $25,1
1717 slt $6,$24,$0
1718 addu $25,$6
1719 sll $24,1
1720 addu $2,$24
1721 sltu $1,$2,$24
1722 addu $25,$1
1723 addu $3,$25
1724 sltu $1,$3,$25
1725 addu $7,$1
1726 mflo $24
1727 mfhi $25
1728 slt $1,$25,$0
1729 addu $7,$1
1730 multu $15,$15 # mul_add_c(a[3],b[3],c1,c2,c3);
1731 sll $25,1
1732 slt $6,$24,$0
1733 addu $25,$6
1734 sll $24,1
1735 addu $2,$24
1736 sltu $1,$2,$24
1737 addu $25,$1
1738 addu $3,$25
1739 sltu $1,$3,$25
1740 addu $7,$1
1741 mflo $24
1742 mfhi $25
1743 addu $2,$24
1744 sltu $1,$2,$24
1745 multu $12,$11 # mul_add_c2(a[0],b[7],c2,c3,c1);
1746 addu $25,$1
1747 addu $3,$25
1748 sltu $1,$3,$25
1749 addu $7,$1
1750 sw $2,6*4($4)
1752 mflo $24
1753 mfhi $25
1754 slt $2,$25,$0
1755 sll $25,1
1756 multu $13,$10 # mul_add_c2(a[1],b[6],c2,c3,c1);
1757 slt $6,$24,$0
1758 addu $25,$6
1759 sll $24,1
1760 addu $3,$24
1761 sltu $1,$3,$24
1762 addu $25,$1
1763 addu $7,$25
1764 sltu $1,$7,$25
1765 addu $2,$1
1766 mflo $24
1767 mfhi $25
1768 slt $1,$25,$0
1769 addu $2,$1
1770 multu $14,$9 # mul_add_c2(a[2],b[5],c2,c3,c1);
1771 sll $25,1
1772 slt $6,$24,$0
1773 addu $25,$6
1774 sll $24,1
1775 addu $3,$24
1776 sltu $1,$3,$24
1777 addu $25,$1
1778 addu $7,$25
1779 sltu $1,$7,$25
1780 addu $2,$1
1781 mflo $24
1782 mfhi $25
1783 slt $1,$25,$0
1784 addu $2,$1
1785 multu $15,$8 # mul_add_c2(a[3],b[4],c2,c3,c1);
1786 sll $25,1
1787 slt $6,$24,$0
1788 addu $25,$6
1789 sll $24,1
1790 addu $3,$24
1791 sltu $1,$3,$24
1792 addu $25,$1
1793 addu $7,$25
1794 sltu $1,$7,$25
1795 addu $2,$1
1796 mflo $24
1797 mfhi $25
1798 slt $1,$25,$0
1799 addu $2,$1
1800 multu $11,$13 # mul_add_c2(a[7],b[1],c3,c1,c2);
1801 sll $25,1
1802 slt $6,$24,$0
1803 addu $25,$6
1804 sll $24,1
1805 addu $3,$24
1806 sltu $1,$3,$24
1807 addu $25,$1
1808 addu $7,$25
1809 sltu $1,$7,$25
1810 addu $2,$1
1811 sw $3,7*4($4)
1813 mflo $24
1814 mfhi $25
1815 slt $3,$25,$0
1816 sll $25,1
1817 multu $10,$14 # mul_add_c2(a[6],b[2],c3,c1,c2);
1818 slt $6,$24,$0
1819 addu $25,$6
1820 sll $24,1
1821 addu $7,$24
1822 sltu $1,$7,$24
1823 addu $25,$1
1824 addu $2,$25
1825 sltu $1,$2,$25
1826 addu $3,$1
1827 mflo $24
1828 mfhi $25
1829 slt $1,$25,$0
1830 addu $3,$1
1831 multu $9,$15 # mul_add_c2(a[5],b[3],c3,c1,c2);
1832 sll $25,1
1833 slt $6,$24,$0
1834 addu $25,$6
1835 sll $24,1
1836 addu $7,$24
1837 sltu $1,$7,$24
1838 addu $25,$1
1839 addu $2,$25
1840 sltu $1,$2,$25
1841 addu $3,$1
1842 mflo $24
1843 mfhi $25
1844 slt $1,$25,$0
1845 addu $3,$1
1846 multu $8,$8 # mul_add_c(a[4],b[4],c3,c1,c2);
1847 sll $25,1
1848 slt $6,$24,$0
1849 addu $25,$6
1850 sll $24,1
1851 addu $7,$24
1852 sltu $1,$7,$24
1853 addu $25,$1
1854 addu $2,$25
1855 sltu $1,$2,$25
1856 addu $3,$1
1857 mflo $24
1858 mfhi $25
1859 addu $7,$24
1860 sltu $1,$7,$24
1861 multu $14,$11 # mul_add_c2(a[2],b[7],c1,c2,c3);
1862 addu $25,$1
1863 addu $2,$25
1864 sltu $1,$2,$25
1865 addu $3,$1
1866 sw $7,8*4($4)
1868 mflo $24
1869 mfhi $25
1870 slt $7,$25,$0
1871 sll $25,1
1872 multu $15,$10 # mul_add_c2(a[3],b[6],c1,c2,c3);
1873 slt $6,$24,$0
1874 addu $25,$6
1875 sll $24,1
1876 addu $2,$24
1877 sltu $1,$2,$24
1878 addu $25,$1
1879 addu $3,$25
1880 sltu $1,$3,$25
1881 addu $7,$1
1882 mflo $24
1883 mfhi $25
1884 slt $1,$25,$0
1885 addu $7,$1
1886 multu $8,$9 # mul_add_c2(a[4],b[5],c1,c2,c3);
1887 sll $25,1
1888 slt $6,$24,$0
1889 addu $25,$6
1890 sll $24,1
1891 addu $2,$24
1892 sltu $1,$2,$24
1893 addu $25,$1
1894 addu $3,$25
1895 sltu $1,$3,$25
1896 addu $7,$1
1897 mflo $24
1898 mfhi $25
1899 slt $1,$25,$0
1900 addu $7,$1
1901 multu $11,$15 # mul_add_c2(a[7],b[3],c2,c3,c1);
1902 sll $25,1
1903 slt $6,$24,$0
1904 addu $25,$6
1905 sll $24,1
1906 addu $2,$24
1907 sltu $1,$2,$24
1908 addu $25,$1
1909 addu $3,$25
1910 sltu $1,$3,$25
1911 addu $7,$1
1912 sw $2,9*4($4)
1914 mflo $24
1915 mfhi $25
1916 slt $2,$25,$0
1917 sll $25,1
1918 multu $10,$8 # mul_add_c2(a[6],b[4],c2,c3,c1);
1919 slt $6,$24,$0
1920 addu $25,$6
1921 sll $24,1
1922 addu $3,$24
1923 sltu $1,$3,$24
1924 addu $25,$1
1925 addu $7,$25
1926 sltu $1,$7,$25
1927 addu $2,$1
1928 mflo $24
1929 mfhi $25
1930 slt $1,$25,$0
1931 addu $2,$1
1932 multu $9,$9 # mul_add_c(a[5],b[5],c2,c3,c1);
1933 sll $25,1
1934 slt $6,$24,$0
1935 addu $25,$6
1936 sll $24,1
1937 addu $3,$24
1938 sltu $1,$3,$24
1939 addu $25,$1
1940 addu $7,$25
1941 sltu $1,$7,$25
1942 addu $2,$1
1943 mflo $24
1944 mfhi $25
1945 addu $3,$24
1946 sltu $1,$3,$24
1947 multu $8,$11 # mul_add_c2(a[4],b[7],c3,c1,c2);
1948 addu $25,$1
1949 addu $7,$25
1950 sltu $1,$7,$25
1951 addu $2,$1
1952 sw $3,10*4($4)
1954 mflo $24
1955 mfhi $25
1956 slt $3,$25,$0
1957 sll $25,1
1958 multu $9,$10 # mul_add_c2(a[5],b[6],c3,c1,c2);
1959 slt $6,$24,$0
1960 addu $25,$6
1961 sll $24,1
1962 addu $7,$24
1963 sltu $1,$7,$24
1964 addu $25,$1
1965 addu $2,$25
1966 sltu $1,$2,$25
1967 addu $3,$1
1968 mflo $24
1969 mfhi $25
1970 slt $1,$25,$0
1971 addu $3,$1
1972 multu $11,$9 # mul_add_c2(a[7],b[5],c1,c2,c3);
1973 sll $25,1
1974 slt $6,$24,$0
1975 addu $25,$6
1976 sll $24,1
1977 addu $7,$24
1978 sltu $1,$7,$24
1979 addu $25,$1
1980 addu $2,$25
1981 sltu $1,$2,$25
1982 addu $3,$1
1983 sw $7,11*4($4)
1985 mflo $24
1986 mfhi $25
1987 slt $7,$25,$0
1988 sll $25,1
1989 multu $10,$10 # mul_add_c(a[6],b[6],c1,c2,c3);
1990 slt $6,$24,$0
1991 addu $25,$6
1992 sll $24,1
1993 addu $2,$24
1994 sltu $1,$2,$24
1995 addu $25,$1
1996 addu $3,$25
1997 sltu $1,$3,$25
1998 addu $7,$1
1999 mflo $24
2000 mfhi $25
2001 addu $2,$24
2002 sltu $1,$2,$24
2003 multu $10,$11 # mul_add_c2(a[6],b[7],c2,c3,c1);
2004 addu $25,$1
2005 addu $3,$25
2006 sltu $1,$3,$25
2007 addu $7,$1
2008 sw $2,12*4($4)
2010 mflo $24
2011 mfhi $25
2012 slt $2,$25,$0
2013 sll $25,1
2014 multu $11,$11 # mul_add_c(a[7],b[7],c3,c1,c2);
2015 slt $6,$24,$0
2016 addu $25,$6
2017 sll $24,1
2018 addu $3,$24
2019 sltu $1,$3,$24
2020 addu $25,$1
2021 addu $7,$25
2022 sltu $1,$7,$25
2023 addu $2,$1
2024 sw $3,13*4($4)
2026 mflo $24
2027 mfhi $25
2028 addu $7,$24
2029 sltu $1,$7,$24
2030 addu $25,$1
2031 addu $2,$25
2032 sw $7,14*4($4)
2033 sw $2,15*4($4)
2035 .set noreorder
2036 jr $31
2038 .end bn_sqr_comba8
2040 .align 5
2041 .globl bn_sqr_comba4
2042 .ent bn_sqr_comba4
2043 bn_sqr_comba4:
2044 .set reorder
2045 lw $12,0($5)
2046 lw $13,4($5)
2047 multu $12,$12 # mul_add_c(a[0],b[0],c1,c2,c3);
2048 lw $14,2*4($5)
2049 lw $15,3*4($5)
2050 mflo $2
2051 mfhi $3
2052 sw $2,0($4)
2054 multu $12,$13 # mul_add_c2(a[0],b[1],c2,c3,c1);
2055 mflo $24
2056 mfhi $25
2057 slt $2,$25,$0
2058 sll $25,1
2059 multu $14,$12 # mul_add_c2(a[2],b[0],c3,c1,c2);
2060 slt $6,$24,$0
2061 addu $25,$6
2062 sll $24,1
2063 addu $3,$24
2064 sltu $1,$3,$24
2065 addu $7,$25,$1
2066 sw $3,4($4)
2068 mflo $24
2069 mfhi $25
2070 slt $3,$25,$0
2071 sll $25,1
2072 multu $13,$13 # mul_add_c(a[1],b[1],c3,c1,c2);
2073 slt $6,$24,$0
2074 addu $25,$6
2075 sll $24,1
2076 addu $7,$24
2077 sltu $1,$7,$24
2078 addu $25,$1
2079 addu $2,$25
2080 sltu $1,$2,$25
2081 addu $3,$1
2082 mflo $24
2083 mfhi $25
2084 addu $7,$24
2085 sltu $1,$7,$24
2086 multu $12,$15 # mul_add_c2(a[0],b[3],c1,c2,c3);
2087 addu $25,$1
2088 addu $2,$25
2089 sltu $1,$2,$25
2090 addu $3,$1
2091 sw $7,2*4($4)
2093 mflo $24
2094 mfhi $25
2095 slt $7,$25,$0
2096 sll $25,1
2097 multu $13,$14 # mul_add_c(a2[1],b[2],c1,c2,c3);
2098 slt $6,$24,$0
2099 addu $25,$6
2100 sll $24,1
2101 addu $2,$24
2102 sltu $1,$2,$24
2103 addu $25,$1
2104 addu $3,$25
2105 sltu $1,$3,$25
2106 addu $7,$1
2107 mflo $24
2108 mfhi $25
2109 slt $1,$25,$0
2110 addu $7,$1
2111 multu $15,$13 # mul_add_c2(a[3],b[1],c2,c3,c1);
2112 sll $25,1
2113 slt $6,$24,$0
2114 addu $25,$6
2115 sll $24,1
2116 addu $2,$24
2117 sltu $1,$2,$24
2118 addu $25,$1
2119 addu $3,$25
2120 sltu $1,$3,$25
2121 addu $7,$1
2122 sw $2,3*4($4)
2124 mflo $24
2125 mfhi $25
2126 slt $2,$25,$0
2127 sll $25,1
2128 multu $14,$14 # mul_add_c(a[2],b[2],c2,c3,c1);
2129 slt $6,$24,$0
2130 addu $25,$6
2131 sll $24,1
2132 addu $3,$24
2133 sltu $1,$3,$24
2134 addu $25,$1
2135 addu $7,$25
2136 sltu $1,$7,$25
2137 addu $2,$1
2138 mflo $24
2139 mfhi $25
2140 addu $3,$24
2141 sltu $1,$3,$24
2142 multu $14,$15 # mul_add_c2(a[2],b[3],c3,c1,c2);
2143 addu $25,$1
2144 addu $7,$25
2145 sltu $1,$7,$25
2146 addu $2,$1
2147 sw $3,4*4($4)
2149 mflo $24
2150 mfhi $25
2151 slt $3,$25,$0
2152 sll $25,1
2153 multu $15,$15 # mul_add_c(a[3],b[3],c1,c2,c3);
2154 slt $6,$24,$0
2155 addu $25,$6
2156 sll $24,1
2157 addu $7,$24
2158 sltu $1,$7,$24
2159 addu $25,$1
2160 addu $2,$25
2161 sltu $1,$2,$25
2162 addu $3,$1
2163 sw $7,5*4($4)
2165 mflo $24
2166 mfhi $25
2167 addu $2,$24
2168 sltu $1,$2,$24
2169 addu $25,$1
2170 addu $3,$25
2171 sw $2,6*4($4)
2172 sw $3,7*4($4)
2174 .set noreorder
2175 jr $31
2177 .end bn_sqr_comba4