OpenSSL: update to 1.0.1m
[tomato.git] / release / src / router / openssl / crypto / ec / ecp_nistp224.c
blobed09f97ade683738a834d668091bc42fc12fdd3e
1 /* crypto/ec/ecp_nistp224.c */
2 /*
3 * Written by Emilia Kasper (Google) for the OpenSSL project.
4 */
5 /* Copyright 2011 Google Inc.
7 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
12 * http://www.apache.org/licenses/LICENSE-2.0
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
22 * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
24 * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
25 * and Adam Langley's public domain 64-bit C implementation of curve25519
28 #include <openssl/opensslconf.h>
29 #ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
31 # ifndef OPENSSL_SYS_VMS
32 # include <stdint.h>
33 # else
34 # include <inttypes.h>
35 # endif
37 # include <string.h>
38 # include <openssl/err.h>
39 # include "ec_lcl.h"
41 # if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
42 /* even with gcc, the typedef won't work for 32-bit platforms */
43 typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit
44 * platforms */
45 # else
46 # error "Need GCC 3.1 or later to define type uint128_t"
47 # endif
49 typedef uint8_t u8;
50 typedef uint64_t u64;
51 typedef int64_t s64;
53 /******************************************************************************/
54 /*-
55 * INTERNAL REPRESENTATION OF FIELD ELEMENTS
57 * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
58 * using 64-bit coefficients called 'limbs',
59 * and sometimes (for multiplication results) as
60 * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
61 * using 128-bit coefficients called 'widelimbs'.
62 * A 4-limb representation is an 'felem';
63 * a 7-widelimb representation is a 'widefelem'.
64 * Even within felems, bits of adjacent limbs overlap, and we don't always
65 * reduce the representations: we ensure that inputs to each felem
66 * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
67 * and fit into a 128-bit word without overflow. The coefficients are then
68 * again partially reduced to obtain an felem satisfying a_i < 2^57.
69 * We only reduce to the unique minimal representation at the end of the
70 * computation.
73 typedef uint64_t limb;
74 typedef uint128_t widelimb;
76 typedef limb felem[4];
77 typedef widelimb widefelem[7];
80 * Field element represented as a byte arrary. 28*8 = 224 bits is also the
81 * group order size for the elliptic curve, and we also use this type for
82 * scalars for point multiplication.
84 typedef u8 felem_bytearray[28];
86 static const felem_bytearray nistp224_curve_params[5] = {
87 {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */
88 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
89 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
90 {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */
91 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
92 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE},
93 {0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */
94 0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA,
95 0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4},
96 {0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */
97 0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22,
98 0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21},
99 {0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */
100 0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64,
101 0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34}
105 * Precomputed multiples of the standard generator
106 * Points are given in coordinates (X, Y, Z) where Z normally is 1
107 * (0 for the point at infinity).
108 * For each field element, slice a_0 is word 0, etc.
110 * The table has 2 * 16 elements, starting with the following:
111 * index | bits | point
112 * ------+---------+------------------------------
113 * 0 | 0 0 0 0 | 0G
114 * 1 | 0 0 0 1 | 1G
115 * 2 | 0 0 1 0 | 2^56G
116 * 3 | 0 0 1 1 | (2^56 + 1)G
117 * 4 | 0 1 0 0 | 2^112G
118 * 5 | 0 1 0 1 | (2^112 + 1)G
119 * 6 | 0 1 1 0 | (2^112 + 2^56)G
120 * 7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
121 * 8 | 1 0 0 0 | 2^168G
122 * 9 | 1 0 0 1 | (2^168 + 1)G
123 * 10 | 1 0 1 0 | (2^168 + 2^56)G
124 * 11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
125 * 12 | 1 1 0 0 | (2^168 + 2^112)G
126 * 13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
127 * 14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
128 * 15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
129 * followed by a copy of this with each element multiplied by 2^28.
131 * The reason for this is so that we can clock bits into four different
132 * locations when doing simple scalar multiplies against the base point,
133 * and then another four locations using the second 16 elements.
135 static const felem gmul[2][16][3] = { {{{0, 0, 0, 0},
136 {0, 0, 0, 0},
137 {0, 0, 0, 0}},
138 {{0x3280d6115c1d21, 0xc1d356c2112234,
139 0x7f321390b94a03, 0xb70e0cbd6bb4bf},
140 {0xd5819985007e34, 0x75a05a07476444,
141 0xfb4c22dfe6cd43, 0xbd376388b5f723},
142 {1, 0, 0, 0}},
143 {{0xfd9675666ebbe9, 0xbca7664d40ce5e,
144 0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
145 {0x29e0b892dc9c43, 0xece8608436e662,
146 0xdc858f185310d0, 0x9812dd4eb8d321},
147 {1, 0, 0, 0}},
148 {{0x6d3e678d5d8eb8, 0x559eed1cb362f1,
149 0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
150 {0xf19f90ed50266d, 0xabf2b4bf65f9df,
151 0x313865468fafec, 0x5cb379ba910a17},
152 {1, 0, 0, 0}},
153 {{0x0641966cab26e3, 0x91fb2991fab0a0,
154 0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
155 {0x7510407766af5d, 0x84d929610d5450,
156 0x81d77aae82f706, 0x6916f6d4338c5b},
157 {1, 0, 0, 0}},
158 {{0xea95ac3b1f15c6, 0x086000905e82d4,
159 0xdd323ae4d1c8b1, 0x932b56be7685a3},
160 {0x9ef93dea25dbbf, 0x41665960f390f0,
161 0xfdec76dbe2a8a7, 0x523e80f019062a},
162 {1, 0, 0, 0}},
163 {{0x822fdd26732c73, 0xa01c83531b5d0f,
164 0x363f37347c1ba4, 0xc391b45c84725c},
165 {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec,
166 0xc393da7e222a7f, 0x1efb7890ede244},
167 {1, 0, 0, 0}},
168 {{0x4c9e90ca217da1, 0xd11beca79159bb,
169 0xff8d33c2c98b7c, 0x2610b39409f849},
170 {0x44d1352ac64da0, 0xcdbb7b2c46b4fb,
171 0x966c079b753c89, 0xfe67e4e820b112},
172 {1, 0, 0, 0}},
173 {{0xe28cae2df5312d, 0xc71b61d16f5c6e,
174 0x79b7619a3e7c4c, 0x05c73240899b47},
175 {0x9f7f6382c73e3a, 0x18615165c56bda,
176 0x641fab2116fd56, 0x72855882b08394},
177 {1, 0, 0, 0}},
178 {{0x0469182f161c09, 0x74a98ca8d00fb5,
179 0xb89da93489a3e0, 0x41c98768fb0c1d},
180 {0xe5ea05fb32da81, 0x3dce9ffbca6855,
181 0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
182 {1, 0, 0, 0}},
183 {{0xdab22b2333e87f, 0x4430137a5dd2f6,
184 0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
185 {0x764a7df0c8fda5, 0x185ba5c3fa2044,
186 0x9281d688bcbe50, 0xc40331df893881},
187 {1, 0, 0, 0}},
188 {{0xb89530796f0f60, 0xade92bd26909a3,
189 0x1a0c83fb4884da, 0x1765bf22a5a984},
190 {0x772a9ee75db09e, 0x23bc6c67cec16f,
191 0x4c1edba8b14e2f, 0xe2a215d9611369},
192 {1, 0, 0, 0}},
193 {{0x571e509fb5efb3, 0xade88696410552,
194 0xc8ae85fada74fe, 0x6c7e4be83bbde3},
195 {0xff9f51160f4652, 0xb47ce2495a6539,
196 0xa2946c53b582f4, 0x286d2db3ee9a60},
197 {1, 0, 0, 0}},
198 {{0x40bbd5081a44af, 0x0995183b13926c,
199 0xbcefba6f47f6d0, 0x215619e9cc0057},
200 {0x8bc94d3b0df45e, 0xf11c54a3694f6f,
201 0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
202 {1, 0, 0, 0}},
203 {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8,
204 0x1c29819435d2c6, 0xc813132f4c07e9},
205 {0x2891425503b11f, 0x08781030579fea,
206 0xf5426ba5cc9674, 0x1e28ebf18562bc},
207 {1, 0, 0, 0}},
208 {{0x9f31997cc864eb, 0x06cd91d28b5e4c,
209 0xff17036691a973, 0xf1aef351497c58},
210 {0xdd1f2d600564ff, 0xdead073b1402db,
211 0x74a684435bd693, 0xeea7471f962558},
212 {1, 0, 0, 0}}},
213 {{{0, 0, 0, 0},
214 {0, 0, 0, 0},
215 {0, 0, 0, 0}},
216 {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
217 {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
218 {1, 0, 0, 0}},
219 {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
220 {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
221 {1, 0, 0, 0}},
222 {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
223 {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
224 {1, 0, 0, 0}},
225 {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
226 {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
227 {1, 0, 0, 0}},
228 {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
229 {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
230 {1, 0, 0, 0}},
231 {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
232 {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
233 {1, 0, 0, 0}},
234 {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
235 {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
236 {1, 0, 0, 0}},
237 {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
238 {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
239 {1, 0, 0, 0}},
240 {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
241 {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
242 {1, 0, 0, 0}},
243 {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
244 {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
245 {1, 0, 0, 0}},
246 {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
247 {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
248 {1, 0, 0, 0}},
249 {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
250 {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
251 {1, 0, 0, 0}},
252 {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
253 {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
254 {1, 0, 0, 0}},
255 {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
256 {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
257 {1, 0, 0, 0}},
258 {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
259 {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
260 {1, 0, 0, 0}}}
263 /* Precomputation for the group generator. */
264 typedef struct {
265 felem g_pre_comp[2][16][3];
266 int references;
267 } NISTP224_PRE_COMP;
269 const EC_METHOD *EC_GFp_nistp224_method(void)
271 static const EC_METHOD ret = {
272 EC_FLAGS_DEFAULT_OCT,
273 NID_X9_62_prime_field,
274 ec_GFp_nistp224_group_init,
275 ec_GFp_simple_group_finish,
276 ec_GFp_simple_group_clear_finish,
277 ec_GFp_nist_group_copy,
278 ec_GFp_nistp224_group_set_curve,
279 ec_GFp_simple_group_get_curve,
280 ec_GFp_simple_group_get_degree,
281 ec_GFp_simple_group_check_discriminant,
282 ec_GFp_simple_point_init,
283 ec_GFp_simple_point_finish,
284 ec_GFp_simple_point_clear_finish,
285 ec_GFp_simple_point_copy,
286 ec_GFp_simple_point_set_to_infinity,
287 ec_GFp_simple_set_Jprojective_coordinates_GFp,
288 ec_GFp_simple_get_Jprojective_coordinates_GFp,
289 ec_GFp_simple_point_set_affine_coordinates,
290 ec_GFp_nistp224_point_get_affine_coordinates,
291 0 /* point_set_compressed_coordinates */ ,
292 0 /* point2oct */ ,
293 0 /* oct2point */ ,
294 ec_GFp_simple_add,
295 ec_GFp_simple_dbl,
296 ec_GFp_simple_invert,
297 ec_GFp_simple_is_at_infinity,
298 ec_GFp_simple_is_on_curve,
299 ec_GFp_simple_cmp,
300 ec_GFp_simple_make_affine,
301 ec_GFp_simple_points_make_affine,
302 ec_GFp_nistp224_points_mul,
303 ec_GFp_nistp224_precompute_mult,
304 ec_GFp_nistp224_have_precompute_mult,
305 ec_GFp_nist_field_mul,
306 ec_GFp_nist_field_sqr,
307 0 /* field_div */ ,
308 0 /* field_encode */ ,
309 0 /* field_decode */ ,
310 0 /* field_set_to_one */
313 return &ret;
317 * Helper functions to convert field elements to/from internal representation
319 static void bin28_to_felem(felem out, const u8 in[28])
321 out[0] = *((const uint64_t *)(in)) & 0x00ffffffffffffff;
322 out[1] = (*((const uint64_t *)(in + 7))) & 0x00ffffffffffffff;
323 out[2] = (*((const uint64_t *)(in + 14))) & 0x00ffffffffffffff;
324 out[3] = (*((const uint64_t *)(in+20))) >> 8;
327 static void felem_to_bin28(u8 out[28], const felem in)
329 unsigned i;
330 for (i = 0; i < 7; ++i) {
331 out[i] = in[0] >> (8 * i);
332 out[i + 7] = in[1] >> (8 * i);
333 out[i + 14] = in[2] >> (8 * i);
334 out[i + 21] = in[3] >> (8 * i);
338 /* To preserve endianness when using BN_bn2bin and BN_bin2bn */
339 static void flip_endian(u8 *out, const u8 *in, unsigned len)
341 unsigned i;
342 for (i = 0; i < len; ++i)
343 out[i] = in[len - 1 - i];
346 /* From OpenSSL BIGNUM to internal representation */
347 static int BN_to_felem(felem out, const BIGNUM *bn)
349 felem_bytearray b_in;
350 felem_bytearray b_out;
351 unsigned num_bytes;
353 /* BN_bn2bin eats leading zeroes */
354 memset(b_out, 0, sizeof b_out);
355 num_bytes = BN_num_bytes(bn);
356 if (num_bytes > sizeof b_out) {
357 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
358 return 0;
360 if (BN_is_negative(bn)) {
361 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
362 return 0;
364 num_bytes = BN_bn2bin(bn, b_in);
365 flip_endian(b_out, b_in, num_bytes);
366 bin28_to_felem(out, b_out);
367 return 1;
370 /* From internal representation to OpenSSL BIGNUM */
371 static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
373 felem_bytearray b_in, b_out;
374 felem_to_bin28(b_in, in);
375 flip_endian(b_out, b_in, sizeof b_out);
376 return BN_bin2bn(b_out, sizeof b_out, out);
379 /******************************************************************************/
381 * FIELD OPERATIONS
383 * Field operations, using the internal representation of field elements.
384 * NB! These operations are specific to our point multiplication and cannot be
385 * expected to be correct in general - e.g., multiplication with a large scalar
386 * will cause an overflow.
390 static void felem_one(felem out)
392 out[0] = 1;
393 out[1] = 0;
394 out[2] = 0;
395 out[3] = 0;
398 static void felem_assign(felem out, const felem in)
400 out[0] = in[0];
401 out[1] = in[1];
402 out[2] = in[2];
403 out[3] = in[3];
406 /* Sum two field elements: out += in */
407 static void felem_sum(felem out, const felem in)
409 out[0] += in[0];
410 out[1] += in[1];
411 out[2] += in[2];
412 out[3] += in[3];
415 /* Get negative value: out = -in */
416 /* Assumes in[i] < 2^57 */
417 static void felem_neg(felem out, const felem in)
419 static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
420 static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
421 static const limb two58m42m2 = (((limb) 1) << 58) -
422 (((limb) 1) << 42) - (((limb) 1) << 2);
424 /* Set to 0 mod 2^224-2^96+1 to ensure out > in */
425 out[0] = two58p2 - in[0];
426 out[1] = two58m42m2 - in[1];
427 out[2] = two58m2 - in[2];
428 out[3] = two58m2 - in[3];
431 /* Subtract field elements: out -= in */
432 /* Assumes in[i] < 2^57 */
433 static void felem_diff(felem out, const felem in)
435 static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
436 static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
437 static const limb two58m42m2 = (((limb) 1) << 58) -
438 (((limb) 1) << 42) - (((limb) 1) << 2);
440 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
441 out[0] += two58p2;
442 out[1] += two58m42m2;
443 out[2] += two58m2;
444 out[3] += two58m2;
446 out[0] -= in[0];
447 out[1] -= in[1];
448 out[2] -= in[2];
449 out[3] -= in[3];
452 /* Subtract in unreduced 128-bit mode: out -= in */
453 /* Assumes in[i] < 2^119 */
454 static void widefelem_diff(widefelem out, const widefelem in)
456 static const widelimb two120 = ((widelimb) 1) << 120;
457 static const widelimb two120m64 = (((widelimb) 1) << 120) -
458 (((widelimb) 1) << 64);
459 static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
460 (((widelimb) 1) << 104) - (((widelimb) 1) << 64);
462 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
463 out[0] += two120;
464 out[1] += two120m64;
465 out[2] += two120m64;
466 out[3] += two120;
467 out[4] += two120m104m64;
468 out[5] += two120m64;
469 out[6] += two120m64;
471 out[0] -= in[0];
472 out[1] -= in[1];
473 out[2] -= in[2];
474 out[3] -= in[3];
475 out[4] -= in[4];
476 out[5] -= in[5];
477 out[6] -= in[6];
480 /* Subtract in mixed mode: out128 -= in64 */
481 /* in[i] < 2^63 */
482 static void felem_diff_128_64(widefelem out, const felem in)
484 static const widelimb two64p8 = (((widelimb) 1) << 64) +
485 (((widelimb) 1) << 8);
486 static const widelimb two64m8 = (((widelimb) 1) << 64) -
487 (((widelimb) 1) << 8);
488 static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
489 (((widelimb) 1) << 48) - (((widelimb) 1) << 8);
491 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
492 out[0] += two64p8;
493 out[1] += two64m48m8;
494 out[2] += two64m8;
495 out[3] += two64m8;
497 out[0] -= in[0];
498 out[1] -= in[1];
499 out[2] -= in[2];
500 out[3] -= in[3];
504 * Multiply a field element by a scalar: out = out * scalar The scalars we
505 * actually use are small, so results fit without overflow
507 static void felem_scalar(felem out, const limb scalar)
509 out[0] *= scalar;
510 out[1] *= scalar;
511 out[2] *= scalar;
512 out[3] *= scalar;
516 * Multiply an unreduced field element by a scalar: out = out * scalar The
517 * scalars we actually use are small, so results fit without overflow
519 static void widefelem_scalar(widefelem out, const widelimb scalar)
521 out[0] *= scalar;
522 out[1] *= scalar;
523 out[2] *= scalar;
524 out[3] *= scalar;
525 out[4] *= scalar;
526 out[5] *= scalar;
527 out[6] *= scalar;
530 /* Square a field element: out = in^2 */
531 static void felem_square(widefelem out, const felem in)
533 limb tmp0, tmp1, tmp2;
534 tmp0 = 2 * in[0];
535 tmp1 = 2 * in[1];
536 tmp2 = 2 * in[2];
537 out[0] = ((widelimb) in[0]) * in[0];
538 out[1] = ((widelimb) in[0]) * tmp1;
539 out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
540 out[3] = ((widelimb) in[3]) * tmp0 + ((widelimb) in[1]) * tmp2;
541 out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
542 out[5] = ((widelimb) in[3]) * tmp2;
543 out[6] = ((widelimb) in[3]) * in[3];
546 /* Multiply two field elements: out = in1 * in2 */
547 static void felem_mul(widefelem out, const felem in1, const felem in2)
549 out[0] = ((widelimb) in1[0]) * in2[0];
550 out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
551 out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
552 ((widelimb) in1[2]) * in2[0];
553 out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
554 ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
555 out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
556 ((widelimb) in1[3]) * in2[1];
557 out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
558 out[6] = ((widelimb) in1[3]) * in2[3];
562 * Reduce seven 128-bit coefficients to four 64-bit coefficients.
563 * Requires in[i] < 2^126,
564 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
565 static void felem_reduce(felem out, const widefelem in)
567 static const widelimb two127p15 = (((widelimb) 1) << 127) +
568 (((widelimb) 1) << 15);
569 static const widelimb two127m71 = (((widelimb) 1) << 127) -
570 (((widelimb) 1) << 71);
571 static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
572 (((widelimb) 1) << 71) - (((widelimb) 1) << 55);
573 widelimb output[5];
575 /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
576 output[0] = in[0] + two127p15;
577 output[1] = in[1] + two127m71m55;
578 output[2] = in[2] + two127m71;
579 output[3] = in[3];
580 output[4] = in[4];
582 /* Eliminate in[4], in[5], in[6] */
583 output[4] += in[6] >> 16;
584 output[3] += (in[6] & 0xffff) << 40;
585 output[2] -= in[6];
587 output[3] += in[5] >> 16;
588 output[2] += (in[5] & 0xffff) << 40;
589 output[1] -= in[5];
591 output[2] += output[4] >> 16;
592 output[1] += (output[4] & 0xffff) << 40;
593 output[0] -= output[4];
595 /* Carry 2 -> 3 -> 4 */
596 output[3] += output[2] >> 56;
597 output[2] &= 0x00ffffffffffffff;
599 output[4] = output[3] >> 56;
600 output[3] &= 0x00ffffffffffffff;
602 /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
604 /* Eliminate output[4] */
605 output[2] += output[4] >> 16;
606 /* output[2] < 2^56 + 2^56 = 2^57 */
607 output[1] += (output[4] & 0xffff) << 40;
608 output[0] -= output[4];
610 /* Carry 0 -> 1 -> 2 -> 3 */
611 output[1] += output[0] >> 56;
612 out[0] = output[0] & 0x00ffffffffffffff;
614 output[2] += output[1] >> 56;
615 /* output[2] < 2^57 + 2^72 */
616 out[1] = output[1] & 0x00ffffffffffffff;
617 output[3] += output[2] >> 56;
618 /* output[3] <= 2^56 + 2^16 */
619 out[2] = output[2] & 0x00ffffffffffffff;
622 * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
623 * out[3] <= 2^56 + 2^16 (due to final carry),
624 * so out < 2*p
626 out[3] = output[3];
629 static void felem_square_reduce(felem out, const felem in)
631 widefelem tmp;
632 felem_square(tmp, in);
633 felem_reduce(out, tmp);
636 static void felem_mul_reduce(felem out, const felem in1, const felem in2)
638 widefelem tmp;
639 felem_mul(tmp, in1, in2);
640 felem_reduce(out, tmp);
644 * Reduce to unique minimal representation. Requires 0 <= in < 2*p (always
645 * call felem_reduce first)
647 static void felem_contract(felem out, const felem in)
649 static const int64_t two56 = ((limb) 1) << 56;
650 /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
651 /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
652 int64_t tmp[4], a;
653 tmp[0] = in[0];
654 tmp[1] = in[1];
655 tmp[2] = in[2];
656 tmp[3] = in[3];
657 /* Case 1: a = 1 iff in >= 2^224 */
658 a = (in[3] >> 56);
659 tmp[0] -= a;
660 tmp[1] += a << 40;
661 tmp[3] &= 0x00ffffffffffffff;
663 * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1
664 * and the lower part is non-zero
666 a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) |
667 (((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
668 a &= 0x00ffffffffffffff;
669 /* turn a into an all-one mask (if a = 0) or an all-zero mask */
670 a = (a - 1) >> 63;
671 /* subtract 2^224 - 2^96 + 1 if a is all-one */
672 tmp[3] &= a ^ 0xffffffffffffffff;
673 tmp[2] &= a ^ 0xffffffffffffffff;
674 tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
675 tmp[0] -= 1 & a;
678 * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
679 * non-zero, so we only need one step
681 a = tmp[0] >> 63;
682 tmp[0] += two56 & a;
683 tmp[1] -= 1 & a;
685 /* carry 1 -> 2 -> 3 */
686 tmp[2] += tmp[1] >> 56;
687 tmp[1] &= 0x00ffffffffffffff;
689 tmp[3] += tmp[2] >> 56;
690 tmp[2] &= 0x00ffffffffffffff;
692 /* Now 0 <= out < p */
693 out[0] = tmp[0];
694 out[1] = tmp[1];
695 out[2] = tmp[2];
696 out[3] = tmp[3];
700 * Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field
701 * elements are reduced to in < 2^225, so we only need to check three cases:
702 * 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2
704 static limb felem_is_zero(const felem in)
706 limb zero, two224m96p1, two225m97p2;
708 zero = in[0] | in[1] | in[2] | in[3];
709 zero = (((int64_t) (zero) - 1) >> 63) & 1;
710 two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
711 | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
712 two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1;
713 two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
714 | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
715 two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1;
716 return (zero | two224m96p1 | two225m97p2);
719 static limb felem_is_zero_int(const felem in)
721 return (int)(felem_is_zero(in) & ((limb) 1));
724 /* Invert a field element */
725 /* Computation chain copied from djb's code */
726 static void felem_inv(felem out, const felem in)
728 felem ftmp, ftmp2, ftmp3, ftmp4;
729 widefelem tmp;
730 unsigned i;
732 felem_square(tmp, in);
733 felem_reduce(ftmp, tmp); /* 2 */
734 felem_mul(tmp, in, ftmp);
735 felem_reduce(ftmp, tmp); /* 2^2 - 1 */
736 felem_square(tmp, ftmp);
737 felem_reduce(ftmp, tmp); /* 2^3 - 2 */
738 felem_mul(tmp, in, ftmp);
739 felem_reduce(ftmp, tmp); /* 2^3 - 1 */
740 felem_square(tmp, ftmp);
741 felem_reduce(ftmp2, tmp); /* 2^4 - 2 */
742 felem_square(tmp, ftmp2);
743 felem_reduce(ftmp2, tmp); /* 2^5 - 4 */
744 felem_square(tmp, ftmp2);
745 felem_reduce(ftmp2, tmp); /* 2^6 - 8 */
746 felem_mul(tmp, ftmp2, ftmp);
747 felem_reduce(ftmp, tmp); /* 2^6 - 1 */
748 felem_square(tmp, ftmp);
749 felem_reduce(ftmp2, tmp); /* 2^7 - 2 */
750 for (i = 0; i < 5; ++i) { /* 2^12 - 2^6 */
751 felem_square(tmp, ftmp2);
752 felem_reduce(ftmp2, tmp);
754 felem_mul(tmp, ftmp2, ftmp);
755 felem_reduce(ftmp2, tmp); /* 2^12 - 1 */
756 felem_square(tmp, ftmp2);
757 felem_reduce(ftmp3, tmp); /* 2^13 - 2 */
758 for (i = 0; i < 11; ++i) { /* 2^24 - 2^12 */
759 felem_square(tmp, ftmp3);
760 felem_reduce(ftmp3, tmp);
762 felem_mul(tmp, ftmp3, ftmp2);
763 felem_reduce(ftmp2, tmp); /* 2^24 - 1 */
764 felem_square(tmp, ftmp2);
765 felem_reduce(ftmp3, tmp); /* 2^25 - 2 */
766 for (i = 0; i < 23; ++i) { /* 2^48 - 2^24 */
767 felem_square(tmp, ftmp3);
768 felem_reduce(ftmp3, tmp);
770 felem_mul(tmp, ftmp3, ftmp2);
771 felem_reduce(ftmp3, tmp); /* 2^48 - 1 */
772 felem_square(tmp, ftmp3);
773 felem_reduce(ftmp4, tmp); /* 2^49 - 2 */
774 for (i = 0; i < 47; ++i) { /* 2^96 - 2^48 */
775 felem_square(tmp, ftmp4);
776 felem_reduce(ftmp4, tmp);
778 felem_mul(tmp, ftmp3, ftmp4);
779 felem_reduce(ftmp3, tmp); /* 2^96 - 1 */
780 felem_square(tmp, ftmp3);
781 felem_reduce(ftmp4, tmp); /* 2^97 - 2 */
782 for (i = 0; i < 23; ++i) { /* 2^120 - 2^24 */
783 felem_square(tmp, ftmp4);
784 felem_reduce(ftmp4, tmp);
786 felem_mul(tmp, ftmp2, ftmp4);
787 felem_reduce(ftmp2, tmp); /* 2^120 - 1 */
788 for (i = 0; i < 6; ++i) { /* 2^126 - 2^6 */
789 felem_square(tmp, ftmp2);
790 felem_reduce(ftmp2, tmp);
792 felem_mul(tmp, ftmp2, ftmp);
793 felem_reduce(ftmp, tmp); /* 2^126 - 1 */
794 felem_square(tmp, ftmp);
795 felem_reduce(ftmp, tmp); /* 2^127 - 2 */
796 felem_mul(tmp, ftmp, in);
797 felem_reduce(ftmp, tmp); /* 2^127 - 1 */
798 for (i = 0; i < 97; ++i) { /* 2^224 - 2^97 */
799 felem_square(tmp, ftmp);
800 felem_reduce(ftmp, tmp);
802 felem_mul(tmp, ftmp, ftmp3);
803 felem_reduce(out, tmp); /* 2^224 - 2^96 - 1 */
807 * Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy
808 * out to itself.
810 static void copy_conditional(felem out, const felem in, limb icopy)
812 unsigned i;
814 * icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one
816 const limb copy = -icopy;
817 for (i = 0; i < 4; ++i) {
818 const limb tmp = copy & (in[i] ^ out[i]);
819 out[i] ^= tmp;
823 /******************************************************************************/
825 * ELLIPTIC CURVE POINT OPERATIONS
827 * Points are represented in Jacobian projective coordinates:
828 * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
829 * or to the point at infinity if Z == 0.
834 * Double an elliptic curve point:
835 * (X', Y', Z') = 2 * (X, Y, Z), where
836 * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
837 * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^2
838 * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
839 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
840 * while x_out == y_in is not (maybe this works, but it's not tested).
842 static void
843 point_double(felem x_out, felem y_out, felem z_out,
844 const felem x_in, const felem y_in, const felem z_in)
846 widefelem tmp, tmp2;
847 felem delta, gamma, beta, alpha, ftmp, ftmp2;
849 felem_assign(ftmp, x_in);
850 felem_assign(ftmp2, x_in);
852 /* delta = z^2 */
853 felem_square(tmp, z_in);
854 felem_reduce(delta, tmp);
856 /* gamma = y^2 */
857 felem_square(tmp, y_in);
858 felem_reduce(gamma, tmp);
860 /* beta = x*gamma */
861 felem_mul(tmp, x_in, gamma);
862 felem_reduce(beta, tmp);
864 /* alpha = 3*(x-delta)*(x+delta) */
865 felem_diff(ftmp, delta);
866 /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
867 felem_sum(ftmp2, delta);
868 /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
869 felem_scalar(ftmp2, 3);
870 /* ftmp2[i] < 3 * 2^58 < 2^60 */
871 felem_mul(tmp, ftmp, ftmp2);
872 /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
873 felem_reduce(alpha, tmp);
875 /* x' = alpha^2 - 8*beta */
876 felem_square(tmp, alpha);
877 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
878 felem_assign(ftmp, beta);
879 felem_scalar(ftmp, 8);
880 /* ftmp[i] < 8 * 2^57 = 2^60 */
881 felem_diff_128_64(tmp, ftmp);
882 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
883 felem_reduce(x_out, tmp);
885 /* z' = (y + z)^2 - gamma - delta */
886 felem_sum(delta, gamma);
887 /* delta[i] < 2^57 + 2^57 = 2^58 */
888 felem_assign(ftmp, y_in);
889 felem_sum(ftmp, z_in);
890 /* ftmp[i] < 2^57 + 2^57 = 2^58 */
891 felem_square(tmp, ftmp);
892 /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
893 felem_diff_128_64(tmp, delta);
894 /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
895 felem_reduce(z_out, tmp);
897 /* y' = alpha*(4*beta - x') - 8*gamma^2 */
898 felem_scalar(beta, 4);
899 /* beta[i] < 4 * 2^57 = 2^59 */
900 felem_diff(beta, x_out);
901 /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
902 felem_mul(tmp, alpha, beta);
903 /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
904 felem_square(tmp2, gamma);
905 /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
906 widefelem_scalar(tmp2, 8);
907 /* tmp2[i] < 8 * 2^116 = 2^119 */
908 widefelem_diff(tmp, tmp2);
909 /* tmp[i] < 2^119 + 2^120 < 2^121 */
910 felem_reduce(y_out, tmp);
914 * Add two elliptic curve points:
915 * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
916 * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
917 * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
918 * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
919 * Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
920 * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
922 * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
926 * This function is not entirely constant-time: it includes a branch for
927 * checking whether the two input points are equal, (while not equal to the
928 * point at infinity). This case never happens during single point
929 * multiplication, so there is no timing leak for ECDH or ECDSA signing.
931 static void point_add(felem x3, felem y3, felem z3,
932 const felem x1, const felem y1, const felem z1,
933 const int mixed, const felem x2, const felem y2,
934 const felem z2)
936 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
937 widefelem tmp, tmp2;
938 limb z1_is_zero, z2_is_zero, x_equal, y_equal;
940 if (!mixed) {
941 /* ftmp2 = z2^2 */
942 felem_square(tmp, z2);
943 felem_reduce(ftmp2, tmp);
945 /* ftmp4 = z2^3 */
946 felem_mul(tmp, ftmp2, z2);
947 felem_reduce(ftmp4, tmp);
949 /* ftmp4 = z2^3*y1 */
950 felem_mul(tmp2, ftmp4, y1);
951 felem_reduce(ftmp4, tmp2);
953 /* ftmp2 = z2^2*x1 */
954 felem_mul(tmp2, ftmp2, x1);
955 felem_reduce(ftmp2, tmp2);
956 } else {
958 * We'll assume z2 = 1 (special case z2 = 0 is handled later)
961 /* ftmp4 = z2^3*y1 */
962 felem_assign(ftmp4, y1);
964 /* ftmp2 = z2^2*x1 */
965 felem_assign(ftmp2, x1);
968 /* ftmp = z1^2 */
969 felem_square(tmp, z1);
970 felem_reduce(ftmp, tmp);
972 /* ftmp3 = z1^3 */
973 felem_mul(tmp, ftmp, z1);
974 felem_reduce(ftmp3, tmp);
976 /* tmp = z1^3*y2 */
977 felem_mul(tmp, ftmp3, y2);
978 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
980 /* ftmp3 = z1^3*y2 - z2^3*y1 */
981 felem_diff_128_64(tmp, ftmp4);
982 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
983 felem_reduce(ftmp3, tmp);
985 /* tmp = z1^2*x2 */
986 felem_mul(tmp, ftmp, x2);
987 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
989 /* ftmp = z1^2*x2 - z2^2*x1 */
990 felem_diff_128_64(tmp, ftmp2);
991 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
992 felem_reduce(ftmp, tmp);
995 * the formulae are incorrect if the points are equal so we check for
996 * this and do doubling if this happens
998 x_equal = felem_is_zero(ftmp);
999 y_equal = felem_is_zero(ftmp3);
1000 z1_is_zero = felem_is_zero(z1);
1001 z2_is_zero = felem_is_zero(z2);
1002 /* In affine coordinates, (X_1, Y_1) == (X_2, Y_2) */
1003 if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) {
1004 point_double(x3, y3, z3, x1, y1, z1);
1005 return;
1008 /* ftmp5 = z1*z2 */
1009 if (!mixed) {
1010 felem_mul(tmp, z1, z2);
1011 felem_reduce(ftmp5, tmp);
1012 } else {
1013 /* special case z2 = 0 is handled later */
1014 felem_assign(ftmp5, z1);
1017 /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
1018 felem_mul(tmp, ftmp, ftmp5);
1019 felem_reduce(z_out, tmp);
1021 /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
1022 felem_assign(ftmp5, ftmp);
1023 felem_square(tmp, ftmp);
1024 felem_reduce(ftmp, tmp);
1026 /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
1027 felem_mul(tmp, ftmp, ftmp5);
1028 felem_reduce(ftmp5, tmp);
1030 /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1031 felem_mul(tmp, ftmp2, ftmp);
1032 felem_reduce(ftmp2, tmp);
1034 /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
1035 felem_mul(tmp, ftmp4, ftmp5);
1036 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
1038 /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
1039 felem_square(tmp2, ftmp3);
1040 /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
1042 /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
1043 felem_diff_128_64(tmp2, ftmp5);
1044 /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
1046 /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1047 felem_assign(ftmp5, ftmp2);
1048 felem_scalar(ftmp5, 2);
1049 /* ftmp5[i] < 2 * 2^57 = 2^58 */
1052 * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
1053 * 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2
1055 felem_diff_128_64(tmp2, ftmp5);
1056 /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
1057 felem_reduce(x_out, tmp2);
1059 /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
1060 felem_diff(ftmp2, x_out);
1061 /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
1064 * tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out)
1066 felem_mul(tmp2, ftmp3, ftmp2);
1067 /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
1070 * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) -
1071 * z2^3*y1*(z1^2*x2 - z2^2*x1)^3
1073 widefelem_diff(tmp2, tmp);
1074 /* tmp2[i] < 2^118 + 2^120 < 2^121 */
1075 felem_reduce(y_out, tmp2);
1078 * the result (x_out, y_out, z_out) is incorrect if one of the inputs is
1079 * the point at infinity, so we need to check for this separately
1083 * if point 1 is at infinity, copy point 2 to output, and vice versa
1085 copy_conditional(x_out, x2, z1_is_zero);
1086 copy_conditional(x_out, x1, z2_is_zero);
1087 copy_conditional(y_out, y2, z1_is_zero);
1088 copy_conditional(y_out, y1, z2_is_zero);
1089 copy_conditional(z_out, z2, z1_is_zero);
1090 copy_conditional(z_out, z1, z2_is_zero);
1091 felem_assign(x3, x_out);
1092 felem_assign(y3, y_out);
1093 felem_assign(z3, z_out);
1097 * select_point selects the |idx|th point from a precomputation table and
1098 * copies it to out.
1099 * The pre_comp array argument should be size of |size| argument
1101 static void select_point(const u64 idx, unsigned int size,
1102 const felem pre_comp[][3], felem out[3])
1104 unsigned i, j;
1105 limb *outlimbs = &out[0][0];
1106 memset(outlimbs, 0, 3 * sizeof(felem));
1108 for (i = 0; i < size; i++) {
1109 const limb *inlimbs = &pre_comp[i][0][0];
1110 u64 mask = i ^ idx;
1111 mask |= mask >> 4;
1112 mask |= mask >> 2;
1113 mask |= mask >> 1;
1114 mask &= 1;
1115 mask--;
1116 for (j = 0; j < 4 * 3; j++)
1117 outlimbs[j] |= inlimbs[j] & mask;
1121 /* get_bit returns the |i|th bit in |in| */
1122 static char get_bit(const felem_bytearray in, unsigned i)
1124 if (i >= 224)
1125 return 0;
1126 return (in[i >> 3] >> (i & 7)) & 1;
1130 * Interleaved point multiplication using precomputed point multiples: The
1131 * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
1132 * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1133 * generator, using certain (large) precomputed multiples in g_pre_comp.
1134 * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1136 static void batch_mul(felem x_out, felem y_out, felem z_out,
1137 const felem_bytearray scalars[],
1138 const unsigned num_points, const u8 *g_scalar,
1139 const int mixed, const felem pre_comp[][17][3],
1140 const felem g_pre_comp[2][16][3])
1142 int i, skip;
1143 unsigned num;
1144 unsigned gen_mul = (g_scalar != NULL);
1145 felem nq[3], tmp[4];
1146 u64 bits;
1147 u8 sign, digit;
1149 /* set nq to the point at infinity */
1150 memset(nq, 0, 3 * sizeof(felem));
1153 * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1154 * of the generator (two in each of the last 28 rounds) and additions of
1155 * other points multiples (every 5th round).
1157 skip = 1; /* save two point operations in the first
1158 * round */
1159 for (i = (num_points ? 220 : 27); i >= 0; --i) {
1160 /* double */
1161 if (!skip)
1162 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1164 /* add multiples of the generator */
1165 if (gen_mul && (i <= 27)) {
1166 /* first, look 28 bits upwards */
1167 bits = get_bit(g_scalar, i + 196) << 3;
1168 bits |= get_bit(g_scalar, i + 140) << 2;
1169 bits |= get_bit(g_scalar, i + 84) << 1;
1170 bits |= get_bit(g_scalar, i + 28);
1171 /* select the point to add, in constant time */
1172 select_point(bits, 16, g_pre_comp[1], tmp);
1174 if (!skip) {
1175 /* value 1 below is argument for "mixed" */
1176 point_add(nq[0], nq[1], nq[2],
1177 nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1178 } else {
1179 memcpy(nq, tmp, 3 * sizeof(felem));
1180 skip = 0;
1183 /* second, look at the current position */
1184 bits = get_bit(g_scalar, i + 168) << 3;
1185 bits |= get_bit(g_scalar, i + 112) << 2;
1186 bits |= get_bit(g_scalar, i + 56) << 1;
1187 bits |= get_bit(g_scalar, i);
1188 /* select the point to add, in constant time */
1189 select_point(bits, 16, g_pre_comp[0], tmp);
1190 point_add(nq[0], nq[1], nq[2],
1191 nq[0], nq[1], nq[2],
1192 1 /* mixed */ , tmp[0], tmp[1], tmp[2]);
1195 /* do other additions every 5 doublings */
1196 if (num_points && (i % 5 == 0)) {
1197 /* loop over all scalars */
1198 for (num = 0; num < num_points; ++num) {
1199 bits = get_bit(scalars[num], i + 4) << 5;
1200 bits |= get_bit(scalars[num], i + 3) << 4;
1201 bits |= get_bit(scalars[num], i + 2) << 3;
1202 bits |= get_bit(scalars[num], i + 1) << 2;
1203 bits |= get_bit(scalars[num], i) << 1;
1204 bits |= get_bit(scalars[num], i - 1);
1205 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1207 /* select the point to add or subtract */
1208 select_point(digit, 17, pre_comp[num], tmp);
1209 felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1210 * point */
1211 copy_conditional(tmp[1], tmp[3], sign);
1213 if (!skip) {
1214 point_add(nq[0], nq[1], nq[2],
1215 nq[0], nq[1], nq[2],
1216 mixed, tmp[0], tmp[1], tmp[2]);
1217 } else {
1218 memcpy(nq, tmp, 3 * sizeof(felem));
1219 skip = 0;
1224 felem_assign(x_out, nq[0]);
1225 felem_assign(y_out, nq[1]);
1226 felem_assign(z_out, nq[2]);
1229 /******************************************************************************/
1231 * FUNCTIONS TO MANAGE PRECOMPUTATION
1234 static NISTP224_PRE_COMP *nistp224_pre_comp_new()
1236 NISTP224_PRE_COMP *ret = NULL;
1237 ret = (NISTP224_PRE_COMP *) OPENSSL_malloc(sizeof *ret);
1238 if (!ret) {
1239 ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1240 return ret;
1242 memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp));
1243 ret->references = 1;
1244 return ret;
1247 static void *nistp224_pre_comp_dup(void *src_)
1249 NISTP224_PRE_COMP *src = src_;
1251 /* no need to actually copy, these objects never change! */
1252 CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP);
1254 return src_;
1257 static void nistp224_pre_comp_free(void *pre_)
1259 int i;
1260 NISTP224_PRE_COMP *pre = pre_;
1262 if (!pre)
1263 return;
1265 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1266 if (i > 0)
1267 return;
1269 OPENSSL_free(pre);
1272 static void nistp224_pre_comp_clear_free(void *pre_)
1274 int i;
1275 NISTP224_PRE_COMP *pre = pre_;
1277 if (!pre)
1278 return;
1280 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1281 if (i > 0)
1282 return;
1284 OPENSSL_cleanse(pre, sizeof *pre);
1285 OPENSSL_free(pre);
1288 /******************************************************************************/
1290 * OPENSSL EC_METHOD FUNCTIONS
1293 int ec_GFp_nistp224_group_init(EC_GROUP *group)
1295 int ret;
1296 ret = ec_GFp_simple_group_init(group);
1297 group->a_is_minus3 = 1;
1298 return ret;
1301 int ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1302 const BIGNUM *a, const BIGNUM *b,
1303 BN_CTX *ctx)
1305 int ret = 0;
1306 BN_CTX *new_ctx = NULL;
1307 BIGNUM *curve_p, *curve_a, *curve_b;
1309 if (ctx == NULL)
1310 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1311 return 0;
1312 BN_CTX_start(ctx);
1313 if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
1314 ((curve_a = BN_CTX_get(ctx)) == NULL) ||
1315 ((curve_b = BN_CTX_get(ctx)) == NULL))
1316 goto err;
1317 BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1318 BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1319 BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1320 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1321 ECerr(EC_F_EC_GFP_NISTP224_GROUP_SET_CURVE,
1322 EC_R_WRONG_CURVE_PARAMETERS);
1323 goto err;
1325 group->field_mod_func = BN_nist_mod_224;
1326 ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1327 err:
1328 BN_CTX_end(ctx);
1329 if (new_ctx != NULL)
1330 BN_CTX_free(new_ctx);
1331 return ret;
1335 * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1336 * (X/Z^2, Y/Z^3)
1338 int ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
1339 const EC_POINT *point,
1340 BIGNUM *x, BIGNUM *y,
1341 BN_CTX *ctx)
1343 felem z1, z2, x_in, y_in, x_out, y_out;
1344 widefelem tmp;
1346 if (EC_POINT_is_at_infinity(group, point)) {
1347 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1348 EC_R_POINT_AT_INFINITY);
1349 return 0;
1351 if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) ||
1352 (!BN_to_felem(z1, &point->Z)))
1353 return 0;
1354 felem_inv(z2, z1);
1355 felem_square(tmp, z2);
1356 felem_reduce(z1, tmp);
1357 felem_mul(tmp, x_in, z1);
1358 felem_reduce(x_in, tmp);
1359 felem_contract(x_out, x_in);
1360 if (x != NULL) {
1361 if (!felem_to_BN(x, x_out)) {
1362 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1363 ERR_R_BN_LIB);
1364 return 0;
1367 felem_mul(tmp, z1, z2);
1368 felem_reduce(z1, tmp);
1369 felem_mul(tmp, y_in, z1);
1370 felem_reduce(y_in, tmp);
1371 felem_contract(y_out, y_in);
1372 if (y != NULL) {
1373 if (!felem_to_BN(y, y_out)) {
1374 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1375 ERR_R_BN_LIB);
1376 return 0;
1379 return 1;
1382 static void make_points_affine(size_t num, felem points[ /* num */ ][3],
1383 felem tmp_felems[ /* num+1 */ ])
1386 * Runs in constant time, unless an input is the point at infinity (which
1387 * normally shouldn't happen).
1389 ec_GFp_nistp_points_make_affine_internal(num,
1390 points,
1391 sizeof(felem),
1392 tmp_felems,
1393 (void (*)(void *))felem_one,
1394 (int (*)(const void *))
1395 felem_is_zero_int,
1396 (void (*)(void *, const void *))
1397 felem_assign,
1398 (void (*)(void *, const void *))
1399 felem_square_reduce, (void (*)
1400 (void *,
1401 const void
1403 const void
1405 felem_mul_reduce,
1406 (void (*)(void *, const void *))
1407 felem_inv,
1408 (void (*)(void *, const void *))
1409 felem_contract);
1413 * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
1414 * values Result is stored in r (r can equal one of the inputs).
1416 int ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
1417 const BIGNUM *scalar, size_t num,
1418 const EC_POINT *points[],
1419 const BIGNUM *scalars[], BN_CTX *ctx)
1421 int ret = 0;
1422 int j;
1423 unsigned i;
1424 int mixed = 0;
1425 BN_CTX *new_ctx = NULL;
1426 BIGNUM *x, *y, *z, *tmp_scalar;
1427 felem_bytearray g_secret;
1428 felem_bytearray *secrets = NULL;
1429 felem(*pre_comp)[17][3] = NULL;
1430 felem *tmp_felems = NULL;
1431 felem_bytearray tmp;
1432 unsigned num_bytes;
1433 int have_pre_comp = 0;
1434 size_t num_points = num;
1435 felem x_in, y_in, z_in, x_out, y_out, z_out;
1436 NISTP224_PRE_COMP *pre = NULL;
1437 const felem(*g_pre_comp)[16][3] = NULL;
1438 EC_POINT *generator = NULL;
1439 const EC_POINT *p = NULL;
1440 const BIGNUM *p_scalar = NULL;
1442 if (ctx == NULL)
1443 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1444 return 0;
1445 BN_CTX_start(ctx);
1446 if (((x = BN_CTX_get(ctx)) == NULL) ||
1447 ((y = BN_CTX_get(ctx)) == NULL) ||
1448 ((z = BN_CTX_get(ctx)) == NULL) ||
1449 ((tmp_scalar = BN_CTX_get(ctx)) == NULL))
1450 goto err;
1452 if (scalar != NULL) {
1453 pre = EC_EX_DATA_get_data(group->extra_data,
1454 nistp224_pre_comp_dup,
1455 nistp224_pre_comp_free,
1456 nistp224_pre_comp_clear_free);
1457 if (pre)
1458 /* we have precomputation, try to use it */
1459 g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp;
1460 else
1461 /* try to use the standard precomputation */
1462 g_pre_comp = &gmul[0];
1463 generator = EC_POINT_new(group);
1464 if (generator == NULL)
1465 goto err;
1466 /* get the generator from precomputation */
1467 if (!felem_to_BN(x, g_pre_comp[0][1][0]) ||
1468 !felem_to_BN(y, g_pre_comp[0][1][1]) ||
1469 !felem_to_BN(z, g_pre_comp[0][1][2])) {
1470 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1471 goto err;
1473 if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1474 generator, x, y, z,
1475 ctx))
1476 goto err;
1477 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1478 /* precomputation matches generator */
1479 have_pre_comp = 1;
1480 else
1482 * we don't have valid precomputation: treat the generator as a
1483 * random point
1485 num_points = num_points + 1;
1488 if (num_points > 0) {
1489 if (num_points >= 3) {
1491 * unless we precompute multiples for just one or two points,
1492 * converting those into affine form is time well spent
1494 mixed = 1;
1496 secrets = OPENSSL_malloc(num_points * sizeof(felem_bytearray));
1497 pre_comp = OPENSSL_malloc(num_points * 17 * 3 * sizeof(felem));
1498 if (mixed)
1499 tmp_felems =
1500 OPENSSL_malloc((num_points * 17 + 1) * sizeof(felem));
1501 if ((secrets == NULL) || (pre_comp == NULL)
1502 || (mixed && (tmp_felems == NULL))) {
1503 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1504 goto err;
1508 * we treat NULL scalars as 0, and NULL points as points at infinity,
1509 * i.e., they contribute nothing to the linear combination
1511 memset(secrets, 0, num_points * sizeof(felem_bytearray));
1512 memset(pre_comp, 0, num_points * 17 * 3 * sizeof(felem));
1513 for (i = 0; i < num_points; ++i) {
1514 if (i == num)
1515 /* the generator */
1517 p = EC_GROUP_get0_generator(group);
1518 p_scalar = scalar;
1519 } else
1520 /* the i^th point */
1522 p = points[i];
1523 p_scalar = scalars[i];
1525 if ((p_scalar != NULL) && (p != NULL)) {
1526 /* reduce scalar to 0 <= scalar < 2^224 */
1527 if ((BN_num_bits(p_scalar) > 224)
1528 || (BN_is_negative(p_scalar))) {
1530 * this is an unusual input, and we don't guarantee
1531 * constant-timeness
1533 if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx)) {
1534 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1535 goto err;
1537 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1538 } else
1539 num_bytes = BN_bn2bin(p_scalar, tmp);
1540 flip_endian(secrets[i], tmp, num_bytes);
1541 /* precompute multiples */
1542 if ((!BN_to_felem(x_out, &p->X)) ||
1543 (!BN_to_felem(y_out, &p->Y)) ||
1544 (!BN_to_felem(z_out, &p->Z)))
1545 goto err;
1546 felem_assign(pre_comp[i][1][0], x_out);
1547 felem_assign(pre_comp[i][1][1], y_out);
1548 felem_assign(pre_comp[i][1][2], z_out);
1549 for (j = 2; j <= 16; ++j) {
1550 if (j & 1) {
1551 point_add(pre_comp[i][j][0], pre_comp[i][j][1],
1552 pre_comp[i][j][2], pre_comp[i][1][0],
1553 pre_comp[i][1][1], pre_comp[i][1][2], 0,
1554 pre_comp[i][j - 1][0],
1555 pre_comp[i][j - 1][1],
1556 pre_comp[i][j - 1][2]);
1557 } else {
1558 point_double(pre_comp[i][j][0], pre_comp[i][j][1],
1559 pre_comp[i][j][2], pre_comp[i][j / 2][0],
1560 pre_comp[i][j / 2][1],
1561 pre_comp[i][j / 2][2]);
1566 if (mixed)
1567 make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1570 /* the scalar for the generator */
1571 if ((scalar != NULL) && (have_pre_comp)) {
1572 memset(g_secret, 0, sizeof g_secret);
1573 /* reduce scalar to 0 <= scalar < 2^224 */
1574 if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) {
1576 * this is an unusual input, and we don't guarantee
1577 * constant-timeness
1579 if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx)) {
1580 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1581 goto err;
1583 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1584 } else
1585 num_bytes = BN_bn2bin(scalar, tmp);
1586 flip_endian(g_secret, tmp, num_bytes);
1587 /* do the multiplication with generator precomputation */
1588 batch_mul(x_out, y_out, z_out,
1589 (const felem_bytearray(*))secrets, num_points,
1590 g_secret,
1591 mixed, (const felem(*)[17][3])pre_comp, g_pre_comp);
1592 } else
1593 /* do the multiplication without generator precomputation */
1594 batch_mul(x_out, y_out, z_out,
1595 (const felem_bytearray(*))secrets, num_points,
1596 NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
1597 /* reduce the output to its unique minimal representation */
1598 felem_contract(x_in, x_out);
1599 felem_contract(y_in, y_out);
1600 felem_contract(z_in, z_out);
1601 if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1602 (!felem_to_BN(z, z_in))) {
1603 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1604 goto err;
1606 ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
1608 err:
1609 BN_CTX_end(ctx);
1610 if (generator != NULL)
1611 EC_POINT_free(generator);
1612 if (new_ctx != NULL)
1613 BN_CTX_free(new_ctx);
1614 if (secrets != NULL)
1615 OPENSSL_free(secrets);
1616 if (pre_comp != NULL)
1617 OPENSSL_free(pre_comp);
1618 if (tmp_felems != NULL)
1619 OPENSSL_free(tmp_felems);
1620 return ret;
1623 int ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1625 int ret = 0;
1626 NISTP224_PRE_COMP *pre = NULL;
1627 int i, j;
1628 BN_CTX *new_ctx = NULL;
1629 BIGNUM *x, *y;
1630 EC_POINT *generator = NULL;
1631 felem tmp_felems[32];
1633 /* throw away old precomputation */
1634 EC_EX_DATA_free_data(&group->extra_data, nistp224_pre_comp_dup,
1635 nistp224_pre_comp_free,
1636 nistp224_pre_comp_clear_free);
1637 if (ctx == NULL)
1638 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1639 return 0;
1640 BN_CTX_start(ctx);
1641 if (((x = BN_CTX_get(ctx)) == NULL) || ((y = BN_CTX_get(ctx)) == NULL))
1642 goto err;
1643 /* get the generator */
1644 if (group->generator == NULL)
1645 goto err;
1646 generator = EC_POINT_new(group);
1647 if (generator == NULL)
1648 goto err;
1649 BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x);
1650 BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y);
1651 if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
1652 goto err;
1653 if ((pre = nistp224_pre_comp_new()) == NULL)
1654 goto err;
1656 * if the generator is the standard one, use built-in precomputation
1658 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
1659 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1660 ret = 1;
1661 goto err;
1663 if ((!BN_to_felem(pre->g_pre_comp[0][1][0], &group->generator->X)) ||
1664 (!BN_to_felem(pre->g_pre_comp[0][1][1], &group->generator->Y)) ||
1665 (!BN_to_felem(pre->g_pre_comp[0][1][2], &group->generator->Z)))
1666 goto err;
1668 * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G, 2^84*G,
1669 * 2^140*G, 2^196*G for the second one
1671 for (i = 1; i <= 8; i <<= 1) {
1672 point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1673 pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
1674 pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1675 for (j = 0; j < 27; ++j) {
1676 point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1677 pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0],
1678 pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1680 if (i == 8)
1681 break;
1682 point_double(pre->g_pre_comp[0][2 * i][0],
1683 pre->g_pre_comp[0][2 * i][1],
1684 pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0],
1685 pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1686 for (j = 0; j < 27; ++j) {
1687 point_double(pre->g_pre_comp[0][2 * i][0],
1688 pre->g_pre_comp[0][2 * i][1],
1689 pre->g_pre_comp[0][2 * i][2],
1690 pre->g_pre_comp[0][2 * i][0],
1691 pre->g_pre_comp[0][2 * i][1],
1692 pre->g_pre_comp[0][2 * i][2]);
1695 for (i = 0; i < 2; i++) {
1696 /* g_pre_comp[i][0] is the point at infinity */
1697 memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1698 /* the remaining multiples */
1699 /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
1700 point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1701 pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1702 pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1703 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1704 pre->g_pre_comp[i][2][2]);
1705 /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
1706 point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1707 pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1708 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1709 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1710 pre->g_pre_comp[i][2][2]);
1711 /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
1712 point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1713 pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1714 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1715 0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1716 pre->g_pre_comp[i][4][2]);
1718 * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G
1720 point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1721 pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1722 pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1723 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1724 pre->g_pre_comp[i][2][2]);
1725 for (j = 1; j < 8; ++j) {
1726 /* odd multiples: add G resp. 2^28*G */
1727 point_add(pre->g_pre_comp[i][2 * j + 1][0],
1728 pre->g_pre_comp[i][2 * j + 1][1],
1729 pre->g_pre_comp[i][2 * j + 1][2],
1730 pre->g_pre_comp[i][2 * j][0],
1731 pre->g_pre_comp[i][2 * j][1],
1732 pre->g_pre_comp[i][2 * j][2], 0,
1733 pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1734 pre->g_pre_comp[i][1][2]);
1737 make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1739 if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp224_pre_comp_dup,
1740 nistp224_pre_comp_free,
1741 nistp224_pre_comp_clear_free))
1742 goto err;
1743 ret = 1;
1744 pre = NULL;
1745 err:
1746 BN_CTX_end(ctx);
1747 if (generator != NULL)
1748 EC_POINT_free(generator);
1749 if (new_ctx != NULL)
1750 BN_CTX_free(new_ctx);
1751 if (pre)
1752 nistp224_pre_comp_free(pre);
1753 return ret;
1756 int ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
1758 if (EC_EX_DATA_get_data(group->extra_data, nistp224_pre_comp_dup,
1759 nistp224_pre_comp_free,
1760 nistp224_pre_comp_clear_free)
1761 != NULL)
1762 return 1;
1763 else
1764 return 0;
1767 #else
1768 static void *dummy = &dummy;
1769 #endif