1 /**********************************************************************
2 * Copyright (c) 2013, 2014 Pieter Wuille *
3 * Distributed under the MIT software license, see the accompanying *
4 * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
5 **********************************************************************/
7 #ifndef SECP256K1_FIELD_INNER5X52_IMPL_H
8 #define SECP256K1_FIELD_INNER5X52_IMPL_H
13 #define VERIFY_BITS(x, n) VERIFY_CHECK(((x) >> (n)) == 0)
15 #define VERIFY_BITS(x, n) do { } while(0)
18 SECP256K1_INLINE
static void secp256k1_fe_mul_inner(uint64_t *r
, const uint64_t *a
, const uint64_t * SECP256K1_RESTRICT b
) {
20 uint64_t t3
, t4
, tx
, u0
;
21 uint64_t a0
= a
[0], a1
= a
[1], a2
= a
[2], a3
= a
[3], a4
= a
[4];
22 const uint64_t M
= 0xFFFFFFFFFFFFFULL
, R
= 0x1000003D10ULL
;
24 VERIFY_BITS(a
[0], 56);
25 VERIFY_BITS(a
[1], 56);
26 VERIFY_BITS(a
[2], 56);
27 VERIFY_BITS(a
[3], 56);
28 VERIFY_BITS(a
[4], 52);
29 VERIFY_BITS(b
[0], 56);
30 VERIFY_BITS(b
[1], 56);
31 VERIFY_BITS(b
[2], 56);
32 VERIFY_BITS(b
[3], 56);
33 VERIFY_BITS(b
[4], 52);
36 /* [... a b c] is a shorthand for ... + a<<104 + b<<52 + c<<0 mod n.
37 * px is a shorthand for sum(a[i]*b[x-i], i=0..x).
38 * Note that [x 0 0 0 0 0] = [x*R].
41 d
= (uint128_t
)a0
* b
[3]
42 + (uint128_t
)a1
* b
[2]
43 + (uint128_t
)a2
* b
[1]
44 + (uint128_t
)a3
* b
[0];
46 /* [d 0 0 0] = [p3 0 0 0] */
47 c
= (uint128_t
)a4
* b
[4];
49 /* [c 0 0 0 0 d 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */
50 d
+= (c
& M
) * R
; c
>>= 52;
53 /* [c 0 0 0 0 0 d 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */
57 /* [c 0 0 0 0 d t3 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */
59 d
+= (uint128_t
)a0
* b
[4]
60 + (uint128_t
)a1
* b
[3]
61 + (uint128_t
)a2
* b
[2]
62 + (uint128_t
)a3
* b
[1]
63 + (uint128_t
)a4
* b
[0];
65 /* [c 0 0 0 0 d t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
68 /* [d t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
72 /* [d t4 t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
73 tx
= (t4
>> 48); t4
&= (M
>> 4);
76 /* [d t4+(tx<<48) t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
78 c
= (uint128_t
)a0
* b
[0];
80 /* [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 0 p4 p3 0 0 p0] */
81 d
+= (uint128_t
)a1
* b
[4]
82 + (uint128_t
)a2
* b
[3]
83 + (uint128_t
)a3
* b
[2]
84 + (uint128_t
)a4
* b
[1];
86 /* [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
90 /* [d u0 t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
91 /* [d 0 t4+(tx<<48)+(u0<<52) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
94 /* [d 0 t4+(u0<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
95 c
+= (uint128_t
)u0
* (R
>> 4);
97 /* [d 0 t4 t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
98 r
[0] = c
& M
; c
>>= 52;
99 VERIFY_BITS(r
[0], 52);
101 /* [d 0 t4 t3 0 c r0] = [p8 0 0 p5 p4 p3 0 0 p0] */
103 c
+= (uint128_t
)a0
* b
[1]
104 + (uint128_t
)a1
* b
[0];
106 /* [d 0 t4 t3 0 c r0] = [p8 0 0 p5 p4 p3 0 p1 p0] */
107 d
+= (uint128_t
)a2
* b
[4]
108 + (uint128_t
)a3
* b
[3]
109 + (uint128_t
)a4
* b
[2];
111 /* [d 0 t4 t3 0 c r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */
112 c
+= (d
& M
) * R
; d
>>= 52;
115 /* [d 0 0 t4 t3 0 c r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */
116 r
[1] = c
& M
; c
>>= 52;
117 VERIFY_BITS(r
[1], 52);
119 /* [d 0 0 t4 t3 c r1 r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */
121 c
+= (uint128_t
)a0
* b
[2]
122 + (uint128_t
)a1
* b
[1]
123 + (uint128_t
)a2
* b
[0];
125 /* [d 0 0 t4 t3 c r1 r0] = [p8 0 p6 p5 p4 p3 p2 p1 p0] */
126 d
+= (uint128_t
)a3
* b
[4]
127 + (uint128_t
)a4
* b
[3];
129 /* [d 0 0 t4 t3 c t1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
130 c
+= (d
& M
) * R
; d
>>= 52;
133 /* [d 0 0 0 t4 t3 c r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
135 /* [d 0 0 0 t4 t3 c r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
136 r
[2] = c
& M
; c
>>= 52;
137 VERIFY_BITS(r
[2], 52);
139 /* [d 0 0 0 t4 t3+c r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
142 /* [t4 c r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
143 r
[3] = c
& M
; c
>>= 52;
144 VERIFY_BITS(r
[3], 52);
146 /* [t4+c r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
149 /* [c r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
151 VERIFY_BITS(r
[4], 49);
152 /* [r4 r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
155 SECP256K1_INLINE
static void secp256k1_fe_sqr_inner(uint64_t *r
, const uint64_t *a
) {
157 uint64_t a0
= a
[0], a1
= a
[1], a2
= a
[2], a3
= a
[3], a4
= a
[4];
158 int64_t t3
, t4
, tx
, u0
;
159 const uint64_t M
= 0xFFFFFFFFFFFFFULL
, R
= 0x1000003D10ULL
;
161 VERIFY_BITS(a
[0], 56);
162 VERIFY_BITS(a
[1], 56);
163 VERIFY_BITS(a
[2], 56);
164 VERIFY_BITS(a
[3], 56);
165 VERIFY_BITS(a
[4], 52);
167 /** [... a b c] is a shorthand for ... + a<<104 + b<<52 + c<<0 mod n.
168 * px is a shorthand for sum(a[i]*a[x-i], i=0..x).
169 * Note that [x 0 0 0 0 0] = [x*R].
172 d
= (uint128_t
)(a0
*2) * a3
173 + (uint128_t
)(a1
*2) * a2
;
175 /* [d 0 0 0] = [p3 0 0 0] */
176 c
= (uint128_t
)a4
* a4
;
178 /* [c 0 0 0 0 d 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */
179 d
+= (c
& M
) * R
; c
>>= 52;
182 /* [c 0 0 0 0 0 d 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */
183 t3
= d
& M
; d
>>= 52;
186 /* [c 0 0 0 0 d t3 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */
189 d
+= (uint128_t
)a0
* a4
190 + (uint128_t
)(a1
*2) * a3
191 + (uint128_t
)a2
* a2
;
193 /* [c 0 0 0 0 d t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
196 /* [d t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
197 t4
= d
& M
; d
>>= 52;
200 /* [d t4 t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
201 tx
= (t4
>> 48); t4
&= (M
>> 4);
204 /* [d t4+(tx<<48) t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
206 c
= (uint128_t
)a0
* a0
;
208 /* [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 0 p4 p3 0 0 p0] */
209 d
+= (uint128_t
)a1
* a4
210 + (uint128_t
)(a2
*2) * a3
;
212 /* [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
213 u0
= d
& M
; d
>>= 52;
216 /* [d u0 t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
217 /* [d 0 t4+(tx<<48)+(u0<<52) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
220 /* [d 0 t4+(u0<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
221 c
+= (uint128_t
)u0
* (R
>> 4);
223 /* [d 0 t4 t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
224 r
[0] = c
& M
; c
>>= 52;
225 VERIFY_BITS(r
[0], 52);
227 /* [d 0 t4 t3 0 c r0] = [p8 0 0 p5 p4 p3 0 0 p0] */
230 c
+= (uint128_t
)a0
* a1
;
232 /* [d 0 t4 t3 0 c r0] = [p8 0 0 p5 p4 p3 0 p1 p0] */
233 d
+= (uint128_t
)a2
* a4
234 + (uint128_t
)a3
* a3
;
236 /* [d 0 t4 t3 0 c r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */
237 c
+= (d
& M
) * R
; d
>>= 52;
240 /* [d 0 0 t4 t3 0 c r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */
241 r
[1] = c
& M
; c
>>= 52;
242 VERIFY_BITS(r
[1], 52);
244 /* [d 0 0 t4 t3 c r1 r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */
246 c
+= (uint128_t
)a0
* a2
247 + (uint128_t
)a1
* a1
;
249 /* [d 0 0 t4 t3 c r1 r0] = [p8 0 p6 p5 p4 p3 p2 p1 p0] */
250 d
+= (uint128_t
)a3
* a4
;
252 /* [d 0 0 t4 t3 c r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
253 c
+= (d
& M
) * R
; d
>>= 52;
256 /* [d 0 0 0 t4 t3 c r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
257 r
[2] = c
& M
; c
>>= 52;
258 VERIFY_BITS(r
[2], 52);
260 /* [d 0 0 0 t4 t3+c r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
264 /* [t4 c r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
265 r
[3] = c
& M
; c
>>= 52;
266 VERIFY_BITS(r
[3], 52);
268 /* [t4+c r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
271 /* [c r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
273 VERIFY_BITS(r
[4], 49);
274 /* [r4 r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
277 #endif /* SECP256K1_FIELD_INNER5X52_IMPL_H */