1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
6 * curve25519-donna: Curve25519 elliptic curve, public key function
8 * http://code.google.com/p/curve25519-donna/
10 * Adam Langley <agl@imperialviolet.org>
12 * Derived from public domain C code by Daniel J. Bernstein <djb@cr.yp.to>
14 * More information about curve25519 can be found here
15 * http://cr.yp.to/ecdh.html
17 * djb's sample implementation of curve25519 is written in a special assembly
18 * language called qhasm and uses the floating point registers.
20 * This is, almost, a clean room reimplementation from the curve25519 paper. It
21 * uses many of the tricks described therein. Only the crecip function is taken
22 * from the sample implementation.
32 /* Field element representation:
34 * Field elements are written as an array of signed, 64-bit limbs, least
35 * significant first. The value of the field element is:
36 * x[0] + 2^26·x[1] + x^51·x[2] + 2^102·x[3] + ...
38 * i.e. the limbs are 26, 25, 26, 25, ... bits wide.
41 /* Sum two numbers: output += in */
42 static void fsum(limb
*output
, const limb
*in
) {
44 for (i
= 0; i
< 10; i
+= 2) {
45 output
[0+i
] = (output
[0+i
] + in
[0+i
]);
46 output
[1+i
] = (output
[1+i
] + in
[1+i
]);
50 /* Find the difference of two numbers: output = in - output
51 * (note the order of the arguments!)
53 static void fdifference(limb
*output
, const limb
*in
) {
55 for (i
= 0; i
< 10; ++i
) {
56 output
[i
] = (in
[i
] - output
[i
]);
60 /* Multiply a number my a scalar: output = in * scalar */
61 static void fscalar_product(limb
*output
, const limb
*in
, const limb scalar
) {
63 for (i
= 0; i
< 10; ++i
) {
64 output
[i
] = in
[i
] * scalar
;
68 /* Multiply two numbers: output = in2 * in
70 * output must be distinct to both inputs. The inputs are reduced coefficient
71 * form, the output is not.
73 static void fproduct(limb
*output
, const limb
*in2
, const limb
*in
) {
74 output
[0] = ((limb
) ((s32
) in2
[0])) * ((s32
) in
[0]);
75 output
[1] = ((limb
) ((s32
) in2
[0])) * ((s32
) in
[1]) +
76 ((limb
) ((s32
) in2
[1])) * ((s32
) in
[0]);
77 output
[2] = 2 * ((limb
) ((s32
) in2
[1])) * ((s32
) in
[1]) +
78 ((limb
) ((s32
) in2
[0])) * ((s32
) in
[2]) +
79 ((limb
) ((s32
) in2
[2])) * ((s32
) in
[0]);
80 output
[3] = ((limb
) ((s32
) in2
[1])) * ((s32
) in
[2]) +
81 ((limb
) ((s32
) in2
[2])) * ((s32
) in
[1]) +
82 ((limb
) ((s32
) in2
[0])) * ((s32
) in
[3]) +
83 ((limb
) ((s32
) in2
[3])) * ((s32
) in
[0]);
84 output
[4] = ((limb
) ((s32
) in2
[2])) * ((s32
) in
[2]) +
85 2 * (((limb
) ((s32
) in2
[1])) * ((s32
) in
[3]) +
86 ((limb
) ((s32
) in2
[3])) * ((s32
) in
[1])) +
87 ((limb
) ((s32
) in2
[0])) * ((s32
) in
[4]) +
88 ((limb
) ((s32
) in2
[4])) * ((s32
) in
[0]);
89 output
[5] = ((limb
) ((s32
) in2
[2])) * ((s32
) in
[3]) +
90 ((limb
) ((s32
) in2
[3])) * ((s32
) in
[2]) +
91 ((limb
) ((s32
) in2
[1])) * ((s32
) in
[4]) +
92 ((limb
) ((s32
) in2
[4])) * ((s32
) in
[1]) +
93 ((limb
) ((s32
) in2
[0])) * ((s32
) in
[5]) +
94 ((limb
) ((s32
) in2
[5])) * ((s32
) in
[0]);
95 output
[6] = 2 * (((limb
) ((s32
) in2
[3])) * ((s32
) in
[3]) +
96 ((limb
) ((s32
) in2
[1])) * ((s32
) in
[5]) +
97 ((limb
) ((s32
) in2
[5])) * ((s32
) in
[1])) +
98 ((limb
) ((s32
) in2
[2])) * ((s32
) in
[4]) +
99 ((limb
) ((s32
) in2
[4])) * ((s32
) in
[2]) +
100 ((limb
) ((s32
) in2
[0])) * ((s32
) in
[6]) +
101 ((limb
) ((s32
) in2
[6])) * ((s32
) in
[0]);
102 output
[7] = ((limb
) ((s32
) in2
[3])) * ((s32
) in
[4]) +
103 ((limb
) ((s32
) in2
[4])) * ((s32
) in
[3]) +
104 ((limb
) ((s32
) in2
[2])) * ((s32
) in
[5]) +
105 ((limb
) ((s32
) in2
[5])) * ((s32
) in
[2]) +
106 ((limb
) ((s32
) in2
[1])) * ((s32
) in
[6]) +
107 ((limb
) ((s32
) in2
[6])) * ((s32
) in
[1]) +
108 ((limb
) ((s32
) in2
[0])) * ((s32
) in
[7]) +
109 ((limb
) ((s32
) in2
[7])) * ((s32
) in
[0]);
110 output
[8] = ((limb
) ((s32
) in2
[4])) * ((s32
) in
[4]) +
111 2 * (((limb
) ((s32
) in2
[3])) * ((s32
) in
[5]) +
112 ((limb
) ((s32
) in2
[5])) * ((s32
) in
[3]) +
113 ((limb
) ((s32
) in2
[1])) * ((s32
) in
[7]) +
114 ((limb
) ((s32
) in2
[7])) * ((s32
) in
[1])) +
115 ((limb
) ((s32
) in2
[2])) * ((s32
) in
[6]) +
116 ((limb
) ((s32
) in2
[6])) * ((s32
) in
[2]) +
117 ((limb
) ((s32
) in2
[0])) * ((s32
) in
[8]) +
118 ((limb
) ((s32
) in2
[8])) * ((s32
) in
[0]);
119 output
[9] = ((limb
) ((s32
) in2
[4])) * ((s32
) in
[5]) +
120 ((limb
) ((s32
) in2
[5])) * ((s32
) in
[4]) +
121 ((limb
) ((s32
) in2
[3])) * ((s32
) in
[6]) +
122 ((limb
) ((s32
) in2
[6])) * ((s32
) in
[3]) +
123 ((limb
) ((s32
) in2
[2])) * ((s32
) in
[7]) +
124 ((limb
) ((s32
) in2
[7])) * ((s32
) in
[2]) +
125 ((limb
) ((s32
) in2
[1])) * ((s32
) in
[8]) +
126 ((limb
) ((s32
) in2
[8])) * ((s32
) in
[1]) +
127 ((limb
) ((s32
) in2
[0])) * ((s32
) in
[9]) +
128 ((limb
) ((s32
) in2
[9])) * ((s32
) in
[0]);
129 output
[10] = 2 * (((limb
) ((s32
) in2
[5])) * ((s32
) in
[5]) +
130 ((limb
) ((s32
) in2
[3])) * ((s32
) in
[7]) +
131 ((limb
) ((s32
) in2
[7])) * ((s32
) in
[3]) +
132 ((limb
) ((s32
) in2
[1])) * ((s32
) in
[9]) +
133 ((limb
) ((s32
) in2
[9])) * ((s32
) in
[1])) +
134 ((limb
) ((s32
) in2
[4])) * ((s32
) in
[6]) +
135 ((limb
) ((s32
) in2
[6])) * ((s32
) in
[4]) +
136 ((limb
) ((s32
) in2
[2])) * ((s32
) in
[8]) +
137 ((limb
) ((s32
) in2
[8])) * ((s32
) in
[2]);
138 output
[11] = ((limb
) ((s32
) in2
[5])) * ((s32
) in
[6]) +
139 ((limb
) ((s32
) in2
[6])) * ((s32
) in
[5]) +
140 ((limb
) ((s32
) in2
[4])) * ((s32
) in
[7]) +
141 ((limb
) ((s32
) in2
[7])) * ((s32
) in
[4]) +
142 ((limb
) ((s32
) in2
[3])) * ((s32
) in
[8]) +
143 ((limb
) ((s32
) in2
[8])) * ((s32
) in
[3]) +
144 ((limb
) ((s32
) in2
[2])) * ((s32
) in
[9]) +
145 ((limb
) ((s32
) in2
[9])) * ((s32
) in
[2]);
146 output
[12] = ((limb
) ((s32
) in2
[6])) * ((s32
) in
[6]) +
147 2 * (((limb
) ((s32
) in2
[5])) * ((s32
) in
[7]) +
148 ((limb
) ((s32
) in2
[7])) * ((s32
) in
[5]) +
149 ((limb
) ((s32
) in2
[3])) * ((s32
) in
[9]) +
150 ((limb
) ((s32
) in2
[9])) * ((s32
) in
[3])) +
151 ((limb
) ((s32
) in2
[4])) * ((s32
) in
[8]) +
152 ((limb
) ((s32
) in2
[8])) * ((s32
) in
[4]);
153 output
[13] = ((limb
) ((s32
) in2
[6])) * ((s32
) in
[7]) +
154 ((limb
) ((s32
) in2
[7])) * ((s32
) in
[6]) +
155 ((limb
) ((s32
) in2
[5])) * ((s32
) in
[8]) +
156 ((limb
) ((s32
) in2
[8])) * ((s32
) in
[5]) +
157 ((limb
) ((s32
) in2
[4])) * ((s32
) in
[9]) +
158 ((limb
) ((s32
) in2
[9])) * ((s32
) in
[4]);
159 output
[14] = 2 * (((limb
) ((s32
) in2
[7])) * ((s32
) in
[7]) +
160 ((limb
) ((s32
) in2
[5])) * ((s32
) in
[9]) +
161 ((limb
) ((s32
) in2
[9])) * ((s32
) in
[5])) +
162 ((limb
) ((s32
) in2
[6])) * ((s32
) in
[8]) +
163 ((limb
) ((s32
) in2
[8])) * ((s32
) in
[6]);
164 output
[15] = ((limb
) ((s32
) in2
[7])) * ((s32
) in
[8]) +
165 ((limb
) ((s32
) in2
[8])) * ((s32
) in
[7]) +
166 ((limb
) ((s32
) in2
[6])) * ((s32
) in
[9]) +
167 ((limb
) ((s32
) in2
[9])) * ((s32
) in
[6]);
168 output
[16] = ((limb
) ((s32
) in2
[8])) * ((s32
) in
[8]) +
169 2 * (((limb
) ((s32
) in2
[7])) * ((s32
) in
[9]) +
170 ((limb
) ((s32
) in2
[9])) * ((s32
) in
[7]));
171 output
[17] = ((limb
) ((s32
) in2
[8])) * ((s32
) in
[9]) +
172 ((limb
) ((s32
) in2
[9])) * ((s32
) in
[8]);
173 output
[18] = 2 * ((limb
) ((s32
) in2
[9])) * ((s32
) in
[9]);
176 /* Reduce a long form to a short form by taking the input mod 2^255 - 19. */
177 static void freduce_degree(limb
*output
) {
178 /* Each of these shifts and adds ends up multiplying the value by 19. */
179 output
[8] += output
[18] << 4;
180 output
[8] += output
[18] << 1;
181 output
[8] += output
[18];
182 output
[7] += output
[17] << 4;
183 output
[7] += output
[17] << 1;
184 output
[7] += output
[17];
185 output
[6] += output
[16] << 4;
186 output
[6] += output
[16] << 1;
187 output
[6] += output
[16];
188 output
[5] += output
[15] << 4;
189 output
[5] += output
[15] << 1;
190 output
[5] += output
[15];
191 output
[4] += output
[14] << 4;
192 output
[4] += output
[14] << 1;
193 output
[4] += output
[14];
194 output
[3] += output
[13] << 4;
195 output
[3] += output
[13] << 1;
196 output
[3] += output
[13];
197 output
[2] += output
[12] << 4;
198 output
[2] += output
[12] << 1;
199 output
[2] += output
[12];
200 output
[1] += output
[11] << 4;
201 output
[1] += output
[11] << 1;
202 output
[1] += output
[11];
203 output
[0] += output
[10] << 4;
204 output
[0] += output
[10] << 1;
205 output
[0] += output
[10];
208 /* Reduce all coefficients of the short form input so that |x| < 2^26.
210 * On entry: |output[i]| < 2^62
212 static void freduce_coefficients(limb
*output
) {
217 for (i
= 0; i
< 10; i
+= 2) {
218 limb over
= output
[i
] / 0x4000000l
;
220 output
[i
] -= over
* 0x4000000l
;
222 over
= output
[i
+1] / 0x2000000;
224 output
[i
+1] -= over
* 0x2000000;
226 output
[0] += 19 * output
[10];
227 } while (output
[10]);
230 /* A helpful wrapper around fproduct: output = in * in2.
232 * output must be distinct to both inputs. The output is reduced degree and
233 * reduced coefficient.
236 fmul(limb
*output
, const limb
*in
, const limb
*in2
) {
238 fproduct(t
, in
, in2
);
240 freduce_coefficients(t
);
241 memcpy(output
, t
, sizeof(limb
) * 10);
244 static void fsquare_inner(limb
*output
, const limb
*in
) {
245 output
[0] = ((limb
) ((s32
) in
[0])) * ((s32
) in
[0]);
246 output
[1] = 2 * ((limb
) ((s32
) in
[0])) * ((s32
) in
[1]);
247 output
[2] = 2 * (((limb
) ((s32
) in
[1])) * ((s32
) in
[1]) +
248 ((limb
) ((s32
) in
[0])) * ((s32
) in
[2]));
249 output
[3] = 2 * (((limb
) ((s32
) in
[1])) * ((s32
) in
[2]) +
250 ((limb
) ((s32
) in
[0])) * ((s32
) in
[3]));
251 output
[4] = ((limb
) ((s32
) in
[2])) * ((s32
) in
[2]) +
252 4 * ((limb
) ((s32
) in
[1])) * ((s32
) in
[3]) +
253 2 * ((limb
) ((s32
) in
[0])) * ((s32
) in
[4]);
254 output
[5] = 2 * (((limb
) ((s32
) in
[2])) * ((s32
) in
[3]) +
255 ((limb
) ((s32
) in
[1])) * ((s32
) in
[4]) +
256 ((limb
) ((s32
) in
[0])) * ((s32
) in
[5]));
257 output
[6] = 2 * (((limb
) ((s32
) in
[3])) * ((s32
) in
[3]) +
258 ((limb
) ((s32
) in
[2])) * ((s32
) in
[4]) +
259 ((limb
) ((s32
) in
[0])) * ((s32
) in
[6]) +
260 2 * ((limb
) ((s32
) in
[1])) * ((s32
) in
[5]));
261 output
[7] = 2 * (((limb
) ((s32
) in
[3])) * ((s32
) in
[4]) +
262 ((limb
) ((s32
) in
[2])) * ((s32
) in
[5]) +
263 ((limb
) ((s32
) in
[1])) * ((s32
) in
[6]) +
264 ((limb
) ((s32
) in
[0])) * ((s32
) in
[7]));
265 output
[8] = ((limb
) ((s32
) in
[4])) * ((s32
) in
[4]) +
266 2 * (((limb
) ((s32
) in
[2])) * ((s32
) in
[6]) +
267 ((limb
) ((s32
) in
[0])) * ((s32
) in
[8]) +
268 2 * (((limb
) ((s32
) in
[1])) * ((s32
) in
[7]) +
269 ((limb
) ((s32
) in
[3])) * ((s32
) in
[5])));
270 output
[9] = 2 * (((limb
) ((s32
) in
[4])) * ((s32
) in
[5]) +
271 ((limb
) ((s32
) in
[3])) * ((s32
) in
[6]) +
272 ((limb
) ((s32
) in
[2])) * ((s32
) in
[7]) +
273 ((limb
) ((s32
) in
[1])) * ((s32
) in
[8]) +
274 ((limb
) ((s32
) in
[0])) * ((s32
) in
[9]));
275 output
[10] = 2 * (((limb
) ((s32
) in
[5])) * ((s32
) in
[5]) +
276 ((limb
) ((s32
) in
[4])) * ((s32
) in
[6]) +
277 ((limb
) ((s32
) in
[2])) * ((s32
) in
[8]) +
278 2 * (((limb
) ((s32
) in
[3])) * ((s32
) in
[7]) +
279 ((limb
) ((s32
) in
[1])) * ((s32
) in
[9])));
280 output
[11] = 2 * (((limb
) ((s32
) in
[5])) * ((s32
) in
[6]) +
281 ((limb
) ((s32
) in
[4])) * ((s32
) in
[7]) +
282 ((limb
) ((s32
) in
[3])) * ((s32
) in
[8]) +
283 ((limb
) ((s32
) in
[2])) * ((s32
) in
[9]));
284 output
[12] = ((limb
) ((s32
) in
[6])) * ((s32
) in
[6]) +
285 2 * (((limb
) ((s32
) in
[4])) * ((s32
) in
[8]) +
286 2 * (((limb
) ((s32
) in
[5])) * ((s32
) in
[7]) +
287 ((limb
) ((s32
) in
[3])) * ((s32
) in
[9])));
288 output
[13] = 2 * (((limb
) ((s32
) in
[6])) * ((s32
) in
[7]) +
289 ((limb
) ((s32
) in
[5])) * ((s32
) in
[8]) +
290 ((limb
) ((s32
) in
[4])) * ((s32
) in
[9]));
291 output
[14] = 2 * (((limb
) ((s32
) in
[7])) * ((s32
) in
[7]) +
292 ((limb
) ((s32
) in
[6])) * ((s32
) in
[8]) +
293 2 * ((limb
) ((s32
) in
[5])) * ((s32
) in
[9]));
294 output
[15] = 2 * (((limb
) ((s32
) in
[7])) * ((s32
) in
[8]) +
295 ((limb
) ((s32
) in
[6])) * ((s32
) in
[9]));
296 output
[16] = ((limb
) ((s32
) in
[8])) * ((s32
) in
[8]) +
297 4 * ((limb
) ((s32
) in
[7])) * ((s32
) in
[9]);
298 output
[17] = 2 * ((limb
) ((s32
) in
[8])) * ((s32
) in
[9]);
299 output
[18] = 2 * ((limb
) ((s32
) in
[9])) * ((s32
) in
[9]);
303 fsquare(limb
*output
, const limb
*in
) {
305 fsquare_inner(t
, in
);
307 freduce_coefficients(t
);
308 memcpy(output
, t
, sizeof(limb
) * 10);
311 /* Take a little-endian, 32-byte number and expand it into polynomial form */
313 fexpand(limb
*output
, const u8
*input
) {
314 #define F(n,start,shift,mask) \
315 output[n] = ((((limb) input[start + 0]) | \
316 ((limb) input[start + 1]) << 8 | \
317 ((limb) input[start + 2]) << 16 | \
318 ((limb) input[start + 3]) << 24) >> shift) & mask;
319 F(0, 0, 0, 0x3ffffff);
320 F(1, 3, 2, 0x1ffffff);
321 F(2, 6, 3, 0x3ffffff);
322 F(3, 9, 5, 0x1ffffff);
323 F(4, 12, 6, 0x3ffffff);
324 F(5, 16, 0, 0x1ffffff);
325 F(6, 19, 1, 0x3ffffff);
326 F(7, 22, 3, 0x1ffffff);
327 F(8, 25, 4, 0x3ffffff);
328 F(9, 28, 6, 0x1ffffff);
332 /* Take a fully reduced polynomial form number and contract it into a
333 * little-endian, 32-byte array
336 fcontract(u8
*output
, limb
*input
) {
340 for (i
= 0; i
< 9; ++i
) {
342 while (input
[i
] < 0) {
343 input
[i
] += 0x2000000;
347 while (input
[i
] < 0) {
348 input
[i
] += 0x4000000;
353 while (input
[9] < 0) {
354 input
[9] += 0x2000000;
357 } while (input
[0] < 0);
368 output[s+0] |= input[i] & 0xff; \
369 output[s+1] = (input[i] >> 8) & 0xff; \
370 output[s+2] = (input[i] >> 16) & 0xff; \
371 output[s+3] = (input[i] >> 24) & 0xff;
387 /* Input: Q, Q', Q-Q'
392 * x z: short form, destroyed
393 * xprime zprime: short form, destroyed
394 * qmqp: short form, preserved
396 static void fmonty(limb
*x2
, limb
*z2
, /* output 2Q */
397 limb
*x3
, limb
*z3
, /* output Q + Q' */
398 limb
*x
, limb
*z
, /* input Q */
399 limb
*xprime
, limb
*zprime
, /* input Q' */
400 const limb
*qmqp
/* input Q - Q' */) {
401 limb origx
[10], origxprime
[10], zzz
[19], xx
[19], zz
[19], xxprime
[19],
402 zzprime
[19], zzzprime
[19], xxxprime
[19];
404 memcpy(origx
, x
, 10 * sizeof(limb
));
406 fdifference(z
, origx
); // does x - z
408 memcpy(origxprime
, xprime
, sizeof(limb
) * 10);
409 fsum(xprime
, zprime
);
410 fdifference(zprime
, origxprime
);
411 fproduct(xxprime
, xprime
, z
);
412 fproduct(zzprime
, x
, zprime
);
413 freduce_degree(xxprime
);
414 freduce_coefficients(xxprime
);
415 freduce_degree(zzprime
);
416 freduce_coefficients(zzprime
);
417 memcpy(origxprime
, xxprime
, sizeof(limb
) * 10);
418 fsum(xxprime
, zzprime
);
419 fdifference(zzprime
, origxprime
);
420 fsquare(xxxprime
, xxprime
);
421 fsquare(zzzprime
, zzprime
);
422 fproduct(zzprime
, zzzprime
, qmqp
);
423 freduce_degree(zzprime
);
424 freduce_coefficients(zzprime
);
425 memcpy(x3
, xxxprime
, sizeof(limb
) * 10);
426 memcpy(z3
, zzprime
, sizeof(limb
) * 10);
430 fproduct(x2
, xx
, zz
);
432 freduce_coefficients(x2
);
433 fdifference(zz
, xx
); // does zz = xx - zz
434 memset(zzz
+ 10, 0, sizeof(limb
) * 9);
435 fscalar_product(zzz
, zz
, 121665);
437 freduce_coefficients(zzz
);
439 fproduct(z2
, zz
, zzz
);
441 freduce_coefficients(z2
);
444 /* Calculates nQ where Q is the x-coordinate of a point on the curve
446 * resultx/resultz: the x coordinate of the resulting curve point (short form)
447 * n: a little endian, 32-byte number
448 * q: a point of the curve (short form)
451 cmult(limb
*resultx
, limb
*resultz
, const u8
*n
, const limb
*q
) {
452 limb a
[19] = {0}, b
[19] = {1}, c
[19] = {1}, d
[19] = {0};
453 limb
*nqpqx
= a
, *nqpqz
= b
, *nqx
= c
, *nqz
= d
, *t
;
454 limb e
[19] = {0}, f
[19] = {1}, g
[19] = {0}, h
[19] = {1};
455 limb
*nqpqx2
= e
, *nqpqz2
= f
, *nqx2
= g
, *nqz2
= h
;
459 memcpy(nqpqx
, q
, sizeof(limb
) * 10);
461 for (i
= 0; i
< 32; ++i
) {
463 for (j
= 0; j
< 8; ++j
) {
465 fmonty(nqpqx2
, nqpqz2
,
495 memcpy(resultx
, nqx
, sizeof(limb
) * 10);
496 memcpy(resultz
, nqz
, sizeof(limb
) * 10);
499 // -----------------------------------------------------------------------------
500 // Shamelessly copied from djb's code
501 // -----------------------------------------------------------------------------
503 crecip(limb
*out
, const limb
*z
) {
516 /* 2 */ fsquare(z2
,z
);
517 /* 4 */ fsquare(t1
,z2
);
518 /* 8 */ fsquare(t0
,t1
);
519 /* 9 */ fmul(z9
,t0
,z
);
520 /* 11 */ fmul(z11
,z9
,z2
);
521 /* 22 */ fsquare(t0
,z11
);
522 /* 2^5 - 2^0 = 31 */ fmul(z2_5_0
,t0
,z9
);
524 /* 2^6 - 2^1 */ fsquare(t0
,z2_5_0
);
525 /* 2^7 - 2^2 */ fsquare(t1
,t0
);
526 /* 2^8 - 2^3 */ fsquare(t0
,t1
);
527 /* 2^9 - 2^4 */ fsquare(t1
,t0
);
528 /* 2^10 - 2^5 */ fsquare(t0
,t1
);
529 /* 2^10 - 2^0 */ fmul(z2_10_0
,t0
,z2_5_0
);
531 /* 2^11 - 2^1 */ fsquare(t0
,z2_10_0
);
532 /* 2^12 - 2^2 */ fsquare(t1
,t0
);
534 for (i
= 2;i
< 10;i
+= 2) { fsquare(t0
,t1
); fsquare(t1
,t0
); }
535 /* 2^20 - 2^0 */ fmul(z2_20_0
,t1
,z2_10_0
);
537 /* 2^21 - 2^1 */ fsquare(t0
,z2_20_0
);
538 /* 2^22 - 2^2 */ fsquare(t1
,t0
);
540 for (i
= 2;i
< 20;i
+= 2) { fsquare(t0
,t1
); fsquare(t1
,t0
); }
541 /* 2^40 - 2^0 */ fmul(t0
,t1
,z2_20_0
);
543 /* 2^41 - 2^1 */ fsquare(t1
,t0
);
544 /* 2^42 - 2^2 */ fsquare(t0
,t1
);
546 for (i
= 2;i
< 10;i
+= 2) { fsquare(t1
,t0
); fsquare(t0
,t1
); }
547 /* 2^50 - 2^0 */ fmul(z2_50_0
,t0
,z2_10_0
);
549 /* 2^51 - 2^1 */ fsquare(t0
,z2_50_0
);
550 /* 2^52 - 2^2 */ fsquare(t1
,t0
);
552 for (i
= 2;i
< 50;i
+= 2) { fsquare(t0
,t1
); fsquare(t1
,t0
); }
553 /* 2^100 - 2^0 */ fmul(z2_100_0
,t1
,z2_50_0
);
555 /* 2^101 - 2^1 */ fsquare(t1
,z2_100_0
);
556 /* 2^102 - 2^2 */ fsquare(t0
,t1
);
558 for (i
= 2;i
< 100;i
+= 2) { fsquare(t1
,t0
); fsquare(t0
,t1
); }
559 /* 2^200 - 2^0 */ fmul(t1
,t0
,z2_100_0
);
561 /* 2^201 - 2^1 */ fsquare(t0
,t1
);
562 /* 2^202 - 2^2 */ fsquare(t1
,t0
);
564 for (i
= 2;i
< 50;i
+= 2) { fsquare(t0
,t1
); fsquare(t1
,t0
); }
565 /* 2^250 - 2^0 */ fmul(t0
,t1
,z2_50_0
);
567 /* 2^251 - 2^1 */ fsquare(t1
,t0
);
568 /* 2^252 - 2^2 */ fsquare(t0
,t1
);
569 /* 2^253 - 2^3 */ fsquare(t1
,t0
);
570 /* 2^254 - 2^4 */ fsquare(t0
,t1
);
571 /* 2^255 - 2^5 */ fsquare(t1
,t0
);
572 /* 2^255 - 21 */ fmul(out
,t1
,z11
);
576 curve25519_donna(u8
*mypublic
, const u8
*secret
, const u8
*basepoint
) {
577 limb bp
[10], x
[10], z
[10], zmone
[10];
581 for (i
= 0; i
< 32; ++i
) e
[i
] = secret
[i
];
586 fexpand(bp
, basepoint
);
590 fcontract(mypublic
, z
);