- fix Building without Nagra not possible at Nagra_Merlin https://trac.streamboard...
[oscam.git] / cscrypt / bn_asm.c
blob4cce0dc4f6ca4dc6856d7fcaea615b139215b042
1 #include "bn.h"
3 #ifndef WITH_LIBCRYPTO
4 //FIXME Not checked on threadsafety yet; after checking please remove this line
5 /* crypto/bn/bn_asm.c */
6 /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
7 * All rights reserved.
9 * This package is an SSL implementation written
10 * by Eric Young (eay@cryptsoft.com).
11 * The implementation was written so as to conform with Netscapes SSL.
13 * This library is free for commercial and non-commercial use as long as
14 * the following conditions are aheared to. The following conditions
15 * apply to all code found in this distribution, be it the RC4, RSA,
16 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
17 * included with this distribution is covered by the same copyright terms
18 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
20 * Copyright remains Eric Young's, and as such any Copyright notices in
21 * the code are not to be removed.
22 * If this package is used in a product, Eric Young should be given attribution
23 * as the author of the parts of the library used.
24 * This can be in the form of a textual message at program startup or
25 * in documentation (online or textual) provided with the package.
27 * Redistribution and use in source and binary forms, with or without
28 * modification, are permitted provided that the following conditions
29 * are met:
30 * 1. Redistributions of source code must retain the copyright
31 * notice, this list of conditions and the following disclaimer.
32 * 2. Redistributions in binary form must reproduce the above copyright
33 * notice, this list of conditions and the following disclaimer in the
34 * documentation and/or other materials provided with the distribution.
35 * 3. All advertising materials mentioning features or use of this software
36 * must display the following acknowledgement:
37 * "This product includes cryptographic software written by
38 * Eric Young (eay@cryptsoft.com)"
39 * The word 'cryptographic' can be left out if the rouines from the library
40 * being used are not cryptographic related :-).
41 * 4. If you include any Windows specific code (or a derivative thereof) from
42 * the apps directory (application code) you must include an acknowledgement:
43 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
45 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
48 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55 * SUCH DAMAGE.
57 * The license and distribution terms for any publically available version or
58 * derivative of this code cannot be changed. i.e. this code cannot simply be
59 * copied and put under another distribution license
60 * [including the GNU Public License.]
63 #ifndef BN_DEBUG
64 # undef NDEBUG /* avoid conflicting definitions */
65 # define NDEBUG
66 #endif
68 #include <stdio.h>
69 #include <assert.h>
70 #include "bn_lcl.h"
71 #include "openssl_mods.h"
73 #if defined(BN_LLONG) || defined(BN_UMULT_HIGH)
75 BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
77 BN_ULONG c1 = 0;
79 assert(num >= 0);
80 if(num <= 0) { return (c1); }
82 while(num&~3)
84 mul_add(rp[0], ap[0], w, c1);
85 mul_add(rp[1], ap[1], w, c1);
86 mul_add(rp[2], ap[2], w, c1);
87 mul_add(rp[3], ap[3], w, c1);
88 ap += 4;
89 rp += 4;
90 num -= 4;
92 if(num)
94 mul_add(rp[0], ap[0], w, c1);
95 if(--num == 0) { return c1; }
96 mul_add(rp[1], ap[1], w, c1);
97 if(--num == 0) { return c1; }
98 mul_add(rp[2], ap[2], w, c1);
99 return c1;
102 return (c1);
105 BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
107 BN_ULONG c1 = 0;
109 assert(num >= 0);
110 if(num <= 0) { return (c1); }
112 while(num&~3)
114 mul(rp[0], ap[0], w, c1);
115 mul(rp[1], ap[1], w, c1);
116 mul(rp[2], ap[2], w, c1);
117 mul(rp[3], ap[3], w, c1);
118 ap += 4;
119 rp += 4;
120 num -= 4;
122 if(num)
124 mul(rp[0], ap[0], w, c1);
125 if(--num == 0) { return c1; }
126 mul(rp[1], ap[1], w, c1);
127 if(--num == 0) { return c1; }
128 mul(rp[2], ap[2], w, c1);
130 return (c1);
133 void bn_sqr_words(BN_ULONG *r, BN_ULONG *a, int n)
135 assert(n >= 0);
136 if(n <= 0) { return; }
137 while(n&~3)
139 sqr(r[0], r[1], a[0]);
140 sqr(r[2], r[3], a[1]);
141 sqr(r[4], r[5], a[2]);
142 sqr(r[6], r[7], a[3]);
143 a += 4;
144 r += 8;
145 n -= 4;
147 if(n)
149 sqr(r[0], r[1], a[0]);
150 if(--n == 0) { return; }
151 sqr(r[2], r[3], a[1]);
152 if(--n == 0) { return; }
153 sqr(r[4], r[5], a[2]);
157 #else /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */
159 BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
161 BN_ULONG c = 0;
162 BN_ULONG bl, bh;
164 assert(num >= 0);
165 if(num <= 0) { return ((BN_ULONG)0); }
167 bl = LBITS(w);
168 bh = HBITS(w);
170 for(;;)
172 mul_add(rp[0], ap[0], bl, bh, c);
173 if(--num == 0) { break; }
174 mul_add(rp[1], ap[1], bl, bh, c);
175 if(--num == 0) { break; }
176 mul_add(rp[2], ap[2], bl, bh, c);
177 if(--num == 0) { break; }
178 mul_add(rp[3], ap[3], bl, bh, c);
179 if(--num == 0) { break; }
180 ap += 4;
181 rp += 4;
183 return (c);
186 BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
188 BN_ULONG carry = 0;
189 BN_ULONG bl, bh;
191 assert(num >= 0);
192 if(num <= 0) { return ((BN_ULONG)0); }
194 bl = LBITS(w);
195 bh = HBITS(w);
197 for(;;)
199 mul(rp[0], ap[0], bl, bh, carry);
200 if(--num == 0) { break; }
201 mul(rp[1], ap[1], bl, bh, carry);
202 if(--num == 0) { break; }
203 mul(rp[2], ap[2], bl, bh, carry);
204 if(--num == 0) { break; }
205 mul(rp[3], ap[3], bl, bh, carry);
206 if(--num == 0) { break; }
207 ap += 4;
208 rp += 4;
210 return (carry);
213 void bn_sqr_words(BN_ULONG *r, BN_ULONG *a, int n)
215 assert(n >= 0);
216 if(n <= 0) { return; }
217 for(;;)
219 sqr64(r[0], r[1], a[0]);
220 if(--n == 0) { break; }
222 sqr64(r[2], r[3], a[1]);
223 if(--n == 0) { break; }
225 sqr64(r[4], r[5], a[2]);
226 if(--n == 0) { break; }
228 sqr64(r[6], r[7], a[3]);
229 if(--n == 0) { break; }
231 a += 4;
232 r += 8;
236 #endif /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */
238 #if defined(BN_LLONG) && defined(BN_DIV2W)
240 BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
242 return ((BN_ULONG)(((((BN_ULLONG)h) << BN_BITS2) | l) / (BN_ULLONG)d));
245 #else
247 /* Divide h,l by d and return the result. */
248 /* I need to test this some more :-( */
249 BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
251 BN_ULONG dh, dl, q, ret = 0, th, tl, t;
252 int i, count = 2;
254 if(d == 0) { return (BN_MASK2); }
256 i = BN_num_bits_word(d);
257 assert((i == BN_BITS2) || (h > (BN_ULONG)1 << i));
259 i = BN_BITS2 - i;
260 if(h >= d) { h -= d; }
262 if(i)
264 d <<= i;
265 h = (h << i) | (l >> (BN_BITS2 - i));
266 l <<= i;
268 dh = (d & BN_MASK2h) >> BN_BITS4;
269 dl = (d & BN_MASK2l);
270 for(;;)
272 if((h >> BN_BITS4) == dh)
273 { q = BN_MASK2l; }
274 else
275 { q = h / dh; }
277 th = q * dh;
278 tl = dl * q;
279 for(;;)
281 t = h - th;
282 if((t & BN_MASK2h) ||
283 ((tl) <= (
284 (t << BN_BITS4) |
285 ((l & BN_MASK2h) >> BN_BITS4))))
286 { break; }
287 q--;
288 th -= dh;
289 tl -= dl;
291 t = (tl >> BN_BITS4);
292 tl = (tl << BN_BITS4)&BN_MASK2h;
293 th += t;
295 if(l < tl) { th++; }
296 l -= tl;
297 if(h < th)
299 h += d;
300 q--;
302 h -= th;
304 if(--count == 0) { break; }
306 ret = q << BN_BITS4;
307 h = ((h << BN_BITS4) | (l >> BN_BITS4))&BN_MASK2;
308 l = (l & BN_MASK2l) << BN_BITS4;
310 ret |= q;
311 return (ret);
313 #endif /* !defined(BN_LLONG) && defined(BN_DIV2W) */
315 #ifdef BN_LLONG
316 BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
318 BN_ULLONG ll = 0;
320 assert(n >= 0);
321 if(n <= 0) { return ((BN_ULONG)0); }
323 for(;;)
325 ll += (BN_ULLONG)a[0] + b[0];
326 r[0] = (BN_ULONG)ll & BN_MASK2;
327 ll >>= BN_BITS2;
328 if(--n <= 0) { break; }
330 ll += (BN_ULLONG)a[1] + b[1];
331 r[1] = (BN_ULONG)ll & BN_MASK2;
332 ll >>= BN_BITS2;
333 if(--n <= 0) { break; }
335 ll += (BN_ULLONG)a[2] + b[2];
336 r[2] = (BN_ULONG)ll & BN_MASK2;
337 ll >>= BN_BITS2;
338 if(--n <= 0) { break; }
340 ll += (BN_ULLONG)a[3] + b[3];
341 r[3] = (BN_ULONG)ll & BN_MASK2;
342 ll >>= BN_BITS2;
343 if(--n <= 0) { break; }
345 a += 4;
346 b += 4;
347 r += 4;
349 return ((BN_ULONG)ll);
351 #else /* !BN_LLONG */
352 BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
354 BN_ULONG c, l, t;
356 assert(n >= 0);
357 if(n <= 0) { return ((BN_ULONG)0); }
359 c = 0;
360 for(;;)
362 t = a[0];
363 t = (t + c)&BN_MASK2;
364 c = (t < c);
365 l = (t + b[0])&BN_MASK2;
366 c += (l < t);
367 r[0] = l;
368 if(--n <= 0) { break; }
370 t = a[1];
371 t = (t + c)&BN_MASK2;
372 c = (t < c);
373 l = (t + b[1])&BN_MASK2;
374 c += (l < t);
375 r[1] = l;
376 if(--n <= 0) { break; }
378 t = a[2];
379 t = (t + c)&BN_MASK2;
380 c = (t < c);
381 l = (t + b[2])&BN_MASK2;
382 c += (l < t);
383 r[2] = l;
384 if(--n <= 0) { break; }
386 t = a[3];
387 t = (t + c)&BN_MASK2;
388 c = (t < c);
389 l = (t + b[3])&BN_MASK2;
390 c += (l < t);
391 r[3] = l;
392 if(--n <= 0) { break; }
394 a += 4;
395 b += 4;
396 r += 4;
398 return ((BN_ULONG)c);
400 #endif /* !BN_LLONG */
402 BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
404 BN_ULONG t1, t2;
405 int c = 0;
407 assert(n >= 0);
408 if(n <= 0) { return ((BN_ULONG)0); }
410 for(;;)
412 t1 = a[0];
413 t2 = b[0];
414 r[0] = (t1 - t2 - c)&BN_MASK2;
415 if(t1 != t2) { c = (t1 < t2); }
416 if(--n <= 0) { break; }
418 t1 = a[1];
419 t2 = b[1];
420 r[1] = (t1 - t2 - c)&BN_MASK2;
421 if(t1 != t2) { c = (t1 < t2); }
422 if(--n <= 0) { break; }
424 t1 = a[2];
425 t2 = b[2];
426 r[2] = (t1 - t2 - c)&BN_MASK2;
427 if(t1 != t2) { c = (t1 < t2); }
428 if(--n <= 0) { break; }
430 t1 = a[3];
431 t2 = b[3];
432 r[3] = (t1 - t2 - c)&BN_MASK2;
433 if(t1 != t2) { c = (t1 < t2); }
434 if(--n <= 0) { break; }
436 a += 4;
437 b += 4;
438 r += 4;
440 return (c);
443 #ifdef BN_MUL_COMBA
445 #undef bn_mul_comba8
446 #undef bn_mul_comba4
447 #undef bn_sqr_comba8
448 #undef bn_sqr_comba4
450 /* mul_add_c(a,b,c0,c1,c2) -- c+=a*b for three word number c=(c2,c1,c0) */
451 /* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */
452 /* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
453 /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */
455 #ifdef BN_LLONG
456 #define mul_add_c(a,b,c0,c1,c2) \
457 t=(BN_ULLONG)a*b; \
458 t1=(BN_ULONG)Lw(t); \
459 t2=(BN_ULONG)Hw(t); \
460 c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
461 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
463 #define mul_add_c2(a,b,c0,c1,c2) \
464 t=(BN_ULLONG)a*b; \
465 tt=(t+t)&BN_MASK; \
466 if (tt < t) c2++; \
467 t1=(BN_ULONG)Lw(tt); \
468 t2=(BN_ULONG)Hw(tt); \
469 c0=(c0+t1)&BN_MASK2; \
470 if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \
471 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
473 #define sqr_add_c(a,i,c0,c1,c2) \
474 t=(BN_ULLONG)a[i]*a[i]; \
475 t1=(BN_ULONG)Lw(t); \
476 t2=(BN_ULONG)Hw(t); \
477 c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
478 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
480 #define sqr_add_c2(a,i,j,c0,c1,c2) \
481 mul_add_c2((a)[i],(a)[j],c0,c1,c2)
483 #elif defined(BN_UMULT_HIGH)
485 #define mul_add_c(a,b,c0,c1,c2) { \
486 BN_ULONG ta=(a),tb=(b); \
487 t1 = ta * tb; \
488 t2 = BN_UMULT_HIGH(ta,tb); \
489 c0 += t1; t2 += (c0<t1)?1:0; \
490 c1 += t2; c2 += (c1<t2)?1:0; \
493 #define mul_add_c2(a,b,c0,c1,c2) { \
494 BN_ULONG ta=(a),tb=(b),t0; \
495 t1 = BN_UMULT_HIGH(ta,tb); \
496 t0 = ta * tb; \
497 t2 = t1+t1; c2 += (t2<t1)?1:0; \
498 t1 = t0+t0; t2 += (t1<t0)?1:0; \
499 c0 += t1; t2 += (c0<t1)?1:0; \
500 c1 += t2; c2 += (c1<t2)?1:0; \
503 #define sqr_add_c(a,i,c0,c1,c2) { \
504 BN_ULONG ta=(a)[i]; \
505 t1 = ta * ta; \
506 t2 = BN_UMULT_HIGH(ta,ta); \
507 c0 += t1; t2 += (c0<t1)?1:0; \
508 c1 += t2; c2 += (c1<t2)?1:0; \
511 #define sqr_add_c2(a,i,j,c0,c1,c2) \
512 mul_add_c2((a)[i],(a)[j],c0,c1,c2)
514 #else /* !BN_LLONG */
515 #define mul_add_c(a,b,c0,c1,c2) \
516 t1=LBITS(a); t2=HBITS(a); \
517 bl=LBITS(b); bh=HBITS(b); \
518 mul64(t1,t2,bl,bh); \
519 c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
520 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
522 #define mul_add_c2(a,b,c0,c1,c2) \
523 t1=LBITS(a); t2=HBITS(a); \
524 bl=LBITS(b); bh=HBITS(b); \
525 mul64(t1,t2,bl,bh); \
526 if (t2 & BN_TBIT) c2++; \
527 t2=(t2+t2)&BN_MASK2; \
528 if (t1 & BN_TBIT) t2++; \
529 t1=(t1+t1)&BN_MASK2; \
530 c0=(c0+t1)&BN_MASK2; \
531 if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \
532 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
534 #define sqr_add_c(a,i,c0,c1,c2) \
535 sqr64(t1,t2,(a)[i]); \
536 c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
537 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
539 #define sqr_add_c2(a,i,j,c0,c1,c2) \
540 mul_add_c2((a)[i],(a)[j],c0,c1,c2)
541 #endif /* !BN_LLONG */
543 void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
545 #ifdef BN_LLONG
546 BN_ULLONG t;
547 #else
548 BN_ULONG bl, bh;
549 #endif
550 BN_ULONG t1, t2;
551 BN_ULONG c1, c2, c3;
553 c1 = 0;
554 c2 = 0;
555 c3 = 0;
556 mul_add_c(a[0], b[0], c1, c2, c3);
557 r[0] = c1;
558 c1 = 0;
559 mul_add_c(a[0], b[1], c2, c3, c1);
560 mul_add_c(a[1], b[0], c2, c3, c1);
561 r[1] = c2;
562 c2 = 0;
563 mul_add_c(a[2], b[0], c3, c1, c2);
564 mul_add_c(a[1], b[1], c3, c1, c2);
565 mul_add_c(a[0], b[2], c3, c1, c2);
566 r[2] = c3;
567 c3 = 0;
568 mul_add_c(a[0], b[3], c1, c2, c3);
569 mul_add_c(a[1], b[2], c1, c2, c3);
570 mul_add_c(a[2], b[1], c1, c2, c3);
571 mul_add_c(a[3], b[0], c1, c2, c3);
572 r[3] = c1;
573 c1 = 0;
574 mul_add_c(a[4], b[0], c2, c3, c1);
575 mul_add_c(a[3], b[1], c2, c3, c1);
576 mul_add_c(a[2], b[2], c2, c3, c1);
577 mul_add_c(a[1], b[3], c2, c3, c1);
578 mul_add_c(a[0], b[4], c2, c3, c1);
579 r[4] = c2;
580 c2 = 0;
581 mul_add_c(a[0], b[5], c3, c1, c2);
582 mul_add_c(a[1], b[4], c3, c1, c2);
583 mul_add_c(a[2], b[3], c3, c1, c2);
584 mul_add_c(a[3], b[2], c3, c1, c2);
585 mul_add_c(a[4], b[1], c3, c1, c2);
586 mul_add_c(a[5], b[0], c3, c1, c2);
587 r[5] = c3;
588 c3 = 0;
589 mul_add_c(a[6], b[0], c1, c2, c3);
590 mul_add_c(a[5], b[1], c1, c2, c3);
591 mul_add_c(a[4], b[2], c1, c2, c3);
592 mul_add_c(a[3], b[3], c1, c2, c3);
593 mul_add_c(a[2], b[4], c1, c2, c3);
594 mul_add_c(a[1], b[5], c1, c2, c3);
595 mul_add_c(a[0], b[6], c1, c2, c3);
596 r[6] = c1;
597 c1 = 0;
598 mul_add_c(a[0], b[7], c2, c3, c1);
599 mul_add_c(a[1], b[6], c2, c3, c1);
600 mul_add_c(a[2], b[5], c2, c3, c1);
601 mul_add_c(a[3], b[4], c2, c3, c1);
602 mul_add_c(a[4], b[3], c2, c3, c1);
603 mul_add_c(a[5], b[2], c2, c3, c1);
604 mul_add_c(a[6], b[1], c2, c3, c1);
605 mul_add_c(a[7], b[0], c2, c3, c1);
606 r[7] = c2;
607 c2 = 0;
608 mul_add_c(a[7], b[1], c3, c1, c2);
609 mul_add_c(a[6], b[2], c3, c1, c2);
610 mul_add_c(a[5], b[3], c3, c1, c2);
611 mul_add_c(a[4], b[4], c3, c1, c2);
612 mul_add_c(a[3], b[5], c3, c1, c2);
613 mul_add_c(a[2], b[6], c3, c1, c2);
614 mul_add_c(a[1], b[7], c3, c1, c2);
615 r[8] = c3;
616 c3 = 0;
617 mul_add_c(a[2], b[7], c1, c2, c3);
618 mul_add_c(a[3], b[6], c1, c2, c3);
619 mul_add_c(a[4], b[5], c1, c2, c3);
620 mul_add_c(a[5], b[4], c1, c2, c3);
621 mul_add_c(a[6], b[3], c1, c2, c3);
622 mul_add_c(a[7], b[2], c1, c2, c3);
623 r[9] = c1;
624 c1 = 0;
625 mul_add_c(a[7], b[3], c2, c3, c1);
626 mul_add_c(a[6], b[4], c2, c3, c1);
627 mul_add_c(a[5], b[5], c2, c3, c1);
628 mul_add_c(a[4], b[6], c2, c3, c1);
629 mul_add_c(a[3], b[7], c2, c3, c1);
630 r[10] = c2;
631 c2 = 0;
632 mul_add_c(a[4], b[7], c3, c1, c2);
633 mul_add_c(a[5], b[6], c3, c1, c2);
634 mul_add_c(a[6], b[5], c3, c1, c2);
635 mul_add_c(a[7], b[4], c3, c1, c2);
636 r[11] = c3;
637 c3 = 0;
638 mul_add_c(a[7], b[5], c1, c2, c3);
639 mul_add_c(a[6], b[6], c1, c2, c3);
640 mul_add_c(a[5], b[7], c1, c2, c3);
641 r[12] = c1;
642 c1 = 0;
643 mul_add_c(a[6], b[7], c2, c3, c1);
644 mul_add_c(a[7], b[6], c2, c3, c1);
645 r[13] = c2;
646 c2 = 0;
647 mul_add_c(a[7], b[7], c3, c1, c2);
648 r[14] = c3;
649 r[15] = c1;
652 void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
654 #ifdef BN_LLONG
655 BN_ULLONG t;
656 #else
657 BN_ULONG bl, bh;
658 #endif
659 BN_ULONG t1, t2;
660 BN_ULONG c1, c2, c3;
662 c1 = 0;
663 c2 = 0;
664 c3 = 0;
665 mul_add_c(a[0], b[0], c1, c2, c3);
666 r[0] = c1;
667 c1 = 0;
668 mul_add_c(a[0], b[1], c2, c3, c1);
669 mul_add_c(a[1], b[0], c2, c3, c1);
670 r[1] = c2;
671 c2 = 0;
672 mul_add_c(a[2], b[0], c3, c1, c2);
673 mul_add_c(a[1], b[1], c3, c1, c2);
674 mul_add_c(a[0], b[2], c3, c1, c2);
675 r[2] = c3;
676 c3 = 0;
677 mul_add_c(a[0], b[3], c1, c2, c3);
678 mul_add_c(a[1], b[2], c1, c2, c3);
679 mul_add_c(a[2], b[1], c1, c2, c3);
680 mul_add_c(a[3], b[0], c1, c2, c3);
681 r[3] = c1;
682 c1 = 0;
683 mul_add_c(a[3], b[1], c2, c3, c1);
684 mul_add_c(a[2], b[2], c2, c3, c1);
685 mul_add_c(a[1], b[3], c2, c3, c1);
686 r[4] = c2;
687 c2 = 0;
688 mul_add_c(a[2], b[3], c3, c1, c2);
689 mul_add_c(a[3], b[2], c3, c1, c2);
690 r[5] = c3;
691 c3 = 0;
692 mul_add_c(a[3], b[3], c1, c2, c3);
693 r[6] = c1;
694 r[7] = c2;
697 void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a)
699 #ifdef BN_LLONG
700 BN_ULLONG t, tt;
701 #else
702 BN_ULONG bl, bh;
703 #endif
704 BN_ULONG t1, t2;
705 BN_ULONG c1, c2, c3;
707 c1 = 0;
708 c2 = 0;
709 c3 = 0;
710 sqr_add_c(a, 0, c1, c2, c3);
711 r[0] = c1;
712 c1 = 0;
713 sqr_add_c2(a, 1, 0, c2, c3, c1);
714 r[1] = c2;
715 c2 = 0;
716 sqr_add_c(a, 1, c3, c1, c2);
717 sqr_add_c2(a, 2, 0, c3, c1, c2);
718 r[2] = c3;
719 c3 = 0;
720 sqr_add_c2(a, 3, 0, c1, c2, c3);
721 sqr_add_c2(a, 2, 1, c1, c2, c3);
722 r[3] = c1;
723 c1 = 0;
724 sqr_add_c(a, 2, c2, c3, c1);
725 sqr_add_c2(a, 3, 1, c2, c3, c1);
726 sqr_add_c2(a, 4, 0, c2, c3, c1);
727 r[4] = c2;
728 c2 = 0;
729 sqr_add_c2(a, 5, 0, c3, c1, c2);
730 sqr_add_c2(a, 4, 1, c3, c1, c2);
731 sqr_add_c2(a, 3, 2, c3, c1, c2);
732 r[5] = c3;
733 c3 = 0;
734 sqr_add_c(a, 3, c1, c2, c3);
735 sqr_add_c2(a, 4, 2, c1, c2, c3);
736 sqr_add_c2(a, 5, 1, c1, c2, c3);
737 sqr_add_c2(a, 6, 0, c1, c2, c3);
738 r[6] = c1;
739 c1 = 0;
740 sqr_add_c2(a, 7, 0, c2, c3, c1);
741 sqr_add_c2(a, 6, 1, c2, c3, c1);
742 sqr_add_c2(a, 5, 2, c2, c3, c1);
743 sqr_add_c2(a, 4, 3, c2, c3, c1);
744 r[7] = c2;
745 c2 = 0;
746 sqr_add_c(a, 4, c3, c1, c2);
747 sqr_add_c2(a, 5, 3, c3, c1, c2);
748 sqr_add_c2(a, 6, 2, c3, c1, c2);
749 sqr_add_c2(a, 7, 1, c3, c1, c2);
750 r[8] = c3;
751 c3 = 0;
752 sqr_add_c2(a, 7, 2, c1, c2, c3);
753 sqr_add_c2(a, 6, 3, c1, c2, c3);
754 sqr_add_c2(a, 5, 4, c1, c2, c3);
755 r[9] = c1;
756 c1 = 0;
757 sqr_add_c(a, 5, c2, c3, c1);
758 sqr_add_c2(a, 6, 4, c2, c3, c1);
759 sqr_add_c2(a, 7, 3, c2, c3, c1);
760 r[10] = c2;
761 c2 = 0;
762 sqr_add_c2(a, 7, 4, c3, c1, c2);
763 sqr_add_c2(a, 6, 5, c3, c1, c2);
764 r[11] = c3;
765 c3 = 0;
766 sqr_add_c(a, 6, c1, c2, c3);
767 sqr_add_c2(a, 7, 5, c1, c2, c3);
768 r[12] = c1;
769 c1 = 0;
770 sqr_add_c2(a, 7, 6, c2, c3, c1);
771 r[13] = c2;
772 c2 = 0;
773 sqr_add_c(a, 7, c3, c1, c2);
774 r[14] = c3;
775 r[15] = c1;
778 void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a)
780 #ifdef BN_LLONG
781 BN_ULLONG t, tt;
782 #else
783 BN_ULONG bl, bh;
784 #endif
785 BN_ULONG t1, t2;
786 BN_ULONG c1, c2, c3;
788 c1 = 0;
789 c2 = 0;
790 c3 = 0;
791 sqr_add_c(a, 0, c1, c2, c3);
792 r[0] = c1;
793 c1 = 0;
794 sqr_add_c2(a, 1, 0, c2, c3, c1);
795 r[1] = c2;
796 c2 = 0;
797 sqr_add_c(a, 1, c3, c1, c2);
798 sqr_add_c2(a, 2, 0, c3, c1, c2);
799 r[2] = c3;
800 c3 = 0;
801 sqr_add_c2(a, 3, 0, c1, c2, c3);
802 sqr_add_c2(a, 2, 1, c1, c2, c3);
803 r[3] = c1;
804 c1 = 0;
805 sqr_add_c(a, 2, c2, c3, c1);
806 sqr_add_c2(a, 3, 1, c2, c3, c1);
807 r[4] = c2;
808 c2 = 0;
809 sqr_add_c2(a, 3, 2, c3, c1, c2);
810 r[5] = c3;
811 c3 = 0;
812 sqr_add_c(a, 3, c1, c2, c3);
813 r[6] = c1;
814 r[7] = c2;
816 #else /* !BN_MUL_COMBA */
818 /* hmm... is it faster just to do a multiply? */
819 #undef bn_sqr_comba4
820 void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a)
822 BN_ULONG t[8];
823 bn_sqr_normal(r, a, 4, t);
826 #undef bn_sqr_comba8
827 void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a)
829 BN_ULONG t[16];
830 bn_sqr_normal(r, a, 8, t);
833 void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
835 r[4] = bn_mul_words(&(r[0]), a, 4, b[0]);
836 r[5] = bn_mul_add_words(&(r[1]), a, 4, b[1]);
837 r[6] = bn_mul_add_words(&(r[2]), a, 4, b[2]);
838 r[7] = bn_mul_add_words(&(r[3]), a, 4, b[3]);
841 void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
843 r[ 8] = bn_mul_words(&(r[0]), a, 8, b[0]);
844 r[ 9] = bn_mul_add_words(&(r[1]), a, 8, b[1]);
845 r[10] = bn_mul_add_words(&(r[2]), a, 8, b[2]);
846 r[11] = bn_mul_add_words(&(r[3]), a, 8, b[3]);
847 r[12] = bn_mul_add_words(&(r[4]), a, 8, b[4]);
848 r[13] = bn_mul_add_words(&(r[5]), a, 8, b[5]);
849 r[14] = bn_mul_add_words(&(r[6]), a, 8, b[6]);
850 r[15] = bn_mul_add_words(&(r[7]), a, 8, b[7]);
853 #endif /* !BN_MUL_COMBA */
854 #endif