update libressl to 2.8.2
[unleashed.git] / lib / libcrypto / ec / ecp_nistp224.c
blob643e9a69a68674c42edd250533d506226746c658
1 /* $OpenBSD: ecp_nistp224.c,v 1.22 2018/07/15 16:27:39 tb Exp $ */
2 /*
3 * Written by Emilia Kasper (Google) for the OpenSSL project.
4 */
5 /*
6 * Copyright (c) 2011 Google Inc.
8 * Permission to use, copy, modify, and distribute this software for any
9 * purpose with or without fee is hereby granted, provided that the above
10 * copyright notice and this permission notice appear in all copies.
12 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
13 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
14 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
15 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
16 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
17 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
18 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
22 * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
24 * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
25 * and Adam Langley's public domain 64-bit C implementation of curve25519
28 #include <stdint.h>
29 #include <string.h>
31 #include <openssl/opensslconf.h>
33 #ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
35 #include <openssl/err.h>
36 #include "ec_lcl.h"
38 #if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
39 /* even with gcc, the typedef won't work for 32-bit platforms */
40 typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit platforms */
41 #else
42 #error "Need GCC 3.1 or later to define type uint128_t"
43 #endif
45 typedef uint8_t u8;
46 typedef uint64_t u64;
47 typedef int64_t s64;
50 /******************************************************************************/
51 /* INTERNAL REPRESENTATION OF FIELD ELEMENTS
53 * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
54 * using 64-bit coefficients called 'limbs',
55 * and sometimes (for multiplication results) as
56 * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
57 * using 128-bit coefficients called 'widelimbs'.
58 * A 4-limb representation is an 'felem';
59 * a 7-widelimb representation is a 'widefelem'.
60 * Even within felems, bits of adjacent limbs overlap, and we don't always
61 * reduce the representations: we ensure that inputs to each felem
62 * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
63 * and fit into a 128-bit word without overflow. The coefficients are then
64 * again partially reduced to obtain an felem satisfying a_i < 2^57.
65 * We only reduce to the unique minimal representation at the end of the
66 * computation.
69 typedef uint64_t limb;
70 typedef uint128_t widelimb;
72 typedef limb felem[4];
73 typedef widelimb widefelem[7];
75 /* Field element represented as a byte arrary.
76 * 28*8 = 224 bits is also the group order size for the elliptic curve,
77 * and we also use this type for scalars for point multiplication.
79 typedef u8 felem_bytearray[28];
81 static const felem_bytearray nistp224_curve_params[5] = {
82 {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, /* p */
83 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0x00,0x00,0x00,0x00,
84 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01},
85 {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, /* a */
86 0xFF,0xFF,0xFF,0xFF,0xFF,0xFE,0xFF,0xFF,0xFF,0xFF,
87 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFE},
88 {0xB4,0x05,0x0A,0x85,0x0C,0x04,0xB3,0xAB,0xF5,0x41, /* b */
89 0x32,0x56,0x50,0x44,0xB0,0xB7,0xD7,0xBF,0xD8,0xBA,
90 0x27,0x0B,0x39,0x43,0x23,0x55,0xFF,0xB4},
91 {0xB7,0x0E,0x0C,0xBD,0x6B,0xB4,0xBF,0x7F,0x32,0x13, /* x */
92 0x90,0xB9,0x4A,0x03,0xC1,0xD3,0x56,0xC2,0x11,0x22,
93 0x34,0x32,0x80,0xD6,0x11,0x5C,0x1D,0x21},
94 {0xbd,0x37,0x63,0x88,0xb5,0xf7,0x23,0xfb,0x4c,0x22, /* y */
95 0xdf,0xe6,0xcd,0x43,0x75,0xa0,0x5a,0x07,0x47,0x64,
96 0x44,0xd5,0x81,0x99,0x85,0x00,0x7e,0x34}
99 /* Precomputed multiples of the standard generator
100 * Points are given in coordinates (X, Y, Z) where Z normally is 1
101 * (0 for the point at infinity).
102 * For each field element, slice a_0 is word 0, etc.
104 * The table has 2 * 16 elements, starting with the following:
105 * index | bits | point
106 * ------+---------+------------------------------
107 * 0 | 0 0 0 0 | 0G
108 * 1 | 0 0 0 1 | 1G
109 * 2 | 0 0 1 0 | 2^56G
110 * 3 | 0 0 1 1 | (2^56 + 1)G
111 * 4 | 0 1 0 0 | 2^112G
112 * 5 | 0 1 0 1 | (2^112 + 1)G
113 * 6 | 0 1 1 0 | (2^112 + 2^56)G
114 * 7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
115 * 8 | 1 0 0 0 | 2^168G
116 * 9 | 1 0 0 1 | (2^168 + 1)G
117 * 10 | 1 0 1 0 | (2^168 + 2^56)G
118 * 11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
119 * 12 | 1 1 0 0 | (2^168 + 2^112)G
120 * 13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
121 * 14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
122 * 15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
123 * followed by a copy of this with each element multiplied by 2^28.
125 * The reason for this is so that we can clock bits into four different
126 * locations when doing simple scalar multiplies against the base point,
127 * and then another four locations using the second 16 elements.
129 static const felem gmul[2][16][3] =
130 {{{{0, 0, 0, 0},
131 {0, 0, 0, 0},
132 {0, 0, 0, 0}},
133 {{0x3280d6115c1d21, 0xc1d356c2112234, 0x7f321390b94a03, 0xb70e0cbd6bb4bf},
134 {0xd5819985007e34, 0x75a05a07476444, 0xfb4c22dfe6cd43, 0xbd376388b5f723},
135 {1, 0, 0, 0}},
136 {{0xfd9675666ebbe9, 0xbca7664d40ce5e, 0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
137 {0x29e0b892dc9c43, 0xece8608436e662, 0xdc858f185310d0, 0x9812dd4eb8d321},
138 {1, 0, 0, 0}},
139 {{0x6d3e678d5d8eb8, 0x559eed1cb362f1, 0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
140 {0xf19f90ed50266d, 0xabf2b4bf65f9df, 0x313865468fafec, 0x5cb379ba910a17},
141 {1, 0, 0, 0}},
142 {{0x0641966cab26e3, 0x91fb2991fab0a0, 0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
143 {0x7510407766af5d, 0x84d929610d5450, 0x81d77aae82f706, 0x6916f6d4338c5b},
144 {1, 0, 0, 0}},
145 {{0xea95ac3b1f15c6, 0x086000905e82d4, 0xdd323ae4d1c8b1, 0x932b56be7685a3},
146 {0x9ef93dea25dbbf, 0x41665960f390f0, 0xfdec76dbe2a8a7, 0x523e80f019062a},
147 {1, 0, 0, 0}},
148 {{0x822fdd26732c73, 0xa01c83531b5d0f, 0x363f37347c1ba4, 0xc391b45c84725c},
149 {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec, 0xc393da7e222a7f, 0x1efb7890ede244},
150 {1, 0, 0, 0}},
151 {{0x4c9e90ca217da1, 0xd11beca79159bb, 0xff8d33c2c98b7c, 0x2610b39409f849},
152 {0x44d1352ac64da0, 0xcdbb7b2c46b4fb, 0x966c079b753c89, 0xfe67e4e820b112},
153 {1, 0, 0, 0}},
154 {{0xe28cae2df5312d, 0xc71b61d16f5c6e, 0x79b7619a3e7c4c, 0x05c73240899b47},
155 {0x9f7f6382c73e3a, 0x18615165c56bda, 0x641fab2116fd56, 0x72855882b08394},
156 {1, 0, 0, 0}},
157 {{0x0469182f161c09, 0x74a98ca8d00fb5, 0xb89da93489a3e0, 0x41c98768fb0c1d},
158 {0xe5ea05fb32da81, 0x3dce9ffbca6855, 0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
159 {1, 0, 0, 0}},
160 {{0xdab22b2333e87f, 0x4430137a5dd2f6, 0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
161 {0x764a7df0c8fda5, 0x185ba5c3fa2044, 0x9281d688bcbe50, 0xc40331df893881},
162 {1, 0, 0, 0}},
163 {{0xb89530796f0f60, 0xade92bd26909a3, 0x1a0c83fb4884da, 0x1765bf22a5a984},
164 {0x772a9ee75db09e, 0x23bc6c67cec16f, 0x4c1edba8b14e2f, 0xe2a215d9611369},
165 {1, 0, 0, 0}},
166 {{0x571e509fb5efb3, 0xade88696410552, 0xc8ae85fada74fe, 0x6c7e4be83bbde3},
167 {0xff9f51160f4652, 0xb47ce2495a6539, 0xa2946c53b582f4, 0x286d2db3ee9a60},
168 {1, 0, 0, 0}},
169 {{0x40bbd5081a44af, 0x0995183b13926c, 0xbcefba6f47f6d0, 0x215619e9cc0057},
170 {0x8bc94d3b0df45e, 0xf11c54a3694f6f, 0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
171 {1, 0, 0, 0}},
172 {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8, 0x1c29819435d2c6, 0xc813132f4c07e9},
173 {0x2891425503b11f, 0x08781030579fea, 0xf5426ba5cc9674, 0x1e28ebf18562bc},
174 {1, 0, 0, 0}},
175 {{0x9f31997cc864eb, 0x06cd91d28b5e4c, 0xff17036691a973, 0xf1aef351497c58},
176 {0xdd1f2d600564ff, 0xdead073b1402db, 0x74a684435bd693, 0xeea7471f962558},
177 {1, 0, 0, 0}}},
178 {{{0, 0, 0, 0},
179 {0, 0, 0, 0},
180 {0, 0, 0, 0}},
181 {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
182 {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
183 {1, 0, 0, 0}},
184 {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
185 {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
186 {1, 0, 0, 0}},
187 {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
188 {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
189 {1, 0, 0, 0}},
190 {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
191 {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
192 {1, 0, 0, 0}},
193 {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
194 {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
195 {1, 0, 0, 0}},
196 {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
197 {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
198 {1, 0, 0, 0}},
199 {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
200 {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
201 {1, 0, 0, 0}},
202 {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
203 {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
204 {1, 0, 0, 0}},
205 {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
206 {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
207 {1, 0, 0, 0}},
208 {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
209 {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
210 {1, 0, 0, 0}},
211 {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
212 {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
213 {1, 0, 0, 0}},
214 {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
215 {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
216 {1, 0, 0, 0}},
217 {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
218 {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
219 {1, 0, 0, 0}},
220 {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
221 {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
222 {1, 0, 0, 0}},
223 {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
224 {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
225 {1, 0, 0, 0}}}};
227 /* Precomputation for the group generator. */
228 typedef struct {
229 felem g_pre_comp[2][16][3];
230 int references;
231 } NISTP224_PRE_COMP;
233 const EC_METHOD *
234 EC_GFp_nistp224_method(void)
236 static const EC_METHOD ret = {
237 .flags = EC_FLAGS_DEFAULT_OCT,
238 .field_type = NID_X9_62_prime_field,
239 .group_init = ec_GFp_nistp224_group_init,
240 .group_finish = ec_GFp_simple_group_finish,
241 .group_clear_finish = ec_GFp_simple_group_clear_finish,
242 .group_copy = ec_GFp_nist_group_copy,
243 .group_set_curve = ec_GFp_nistp224_group_set_curve,
244 .group_get_curve = ec_GFp_simple_group_get_curve,
245 .group_get_degree = ec_GFp_simple_group_get_degree,
246 .group_check_discriminant =
247 ec_GFp_simple_group_check_discriminant,
248 .point_init = ec_GFp_simple_point_init,
249 .point_finish = ec_GFp_simple_point_finish,
250 .point_clear_finish = ec_GFp_simple_point_clear_finish,
251 .point_copy = ec_GFp_simple_point_copy,
252 .point_set_to_infinity = ec_GFp_simple_point_set_to_infinity,
253 .point_set_Jprojective_coordinates_GFp =
254 ec_GFp_simple_set_Jprojective_coordinates_GFp,
255 .point_get_Jprojective_coordinates_GFp =
256 ec_GFp_simple_get_Jprojective_coordinates_GFp,
257 .point_set_affine_coordinates =
258 ec_GFp_simple_point_set_affine_coordinates,
259 .point_get_affine_coordinates =
260 ec_GFp_nistp224_point_get_affine_coordinates,
261 .add = ec_GFp_simple_add,
262 .dbl = ec_GFp_simple_dbl,
263 .invert = ec_GFp_simple_invert,
264 .is_at_infinity = ec_GFp_simple_is_at_infinity,
265 .is_on_curve = ec_GFp_simple_is_on_curve,
266 .point_cmp = ec_GFp_simple_cmp,
267 .make_affine = ec_GFp_simple_make_affine,
268 .points_make_affine = ec_GFp_simple_points_make_affine,
269 .mul = ec_GFp_nistp224_points_mul,
270 .precompute_mult = ec_GFp_nistp224_precompute_mult,
271 .have_precompute_mult = ec_GFp_nistp224_have_precompute_mult,
272 .field_mul = ec_GFp_nist_field_mul,
273 .field_sqr = ec_GFp_nist_field_sqr
276 return &ret;
279 /* Helper functions to convert field elements to/from internal representation */
280 static void
281 bin28_to_felem(felem out, const u8 in[28])
283 out[0] = *((const uint64_t *) (in)) & 0x00ffffffffffffff;
284 out[1] = (*((const uint64_t *) (in + 7))) & 0x00ffffffffffffff;
285 out[2] = (*((const uint64_t *) (in + 14))) & 0x00ffffffffffffff;
286 out[3] = (*((const uint64_t *) (in + 21))) & 0x00ffffffffffffff;
289 static void
290 felem_to_bin28(u8 out[28], const felem in)
292 unsigned i;
293 for (i = 0; i < 7; ++i) {
294 out[i] = in[0] >> (8 * i);
295 out[i + 7] = in[1] >> (8 * i);
296 out[i + 14] = in[2] >> (8 * i);
297 out[i + 21] = in[3] >> (8 * i);
301 /* To preserve endianness when using BN_bn2bin and BN_bin2bn */
302 static void
303 flip_endian(u8 * out, const u8 * in, unsigned len)
305 unsigned i;
306 for (i = 0; i < len; ++i)
307 out[i] = in[len - 1 - i];
310 /* From OpenSSL BIGNUM to internal representation */
311 static int
312 BN_to_felem(felem out, const BIGNUM * bn)
314 felem_bytearray b_in;
315 felem_bytearray b_out;
316 unsigned num_bytes;
318 /* BN_bn2bin eats leading zeroes */
319 memset(b_out, 0, sizeof b_out);
320 num_bytes = BN_num_bytes(bn);
321 if (num_bytes > sizeof b_out) {
322 ECerror(EC_R_BIGNUM_OUT_OF_RANGE);
323 return 0;
325 if (BN_is_negative(bn)) {
326 ECerror(EC_R_BIGNUM_OUT_OF_RANGE);
327 return 0;
329 num_bytes = BN_bn2bin(bn, b_in);
330 flip_endian(b_out, b_in, num_bytes);
331 bin28_to_felem(out, b_out);
332 return 1;
335 /* From internal representation to OpenSSL BIGNUM */
336 static BIGNUM *
337 felem_to_BN(BIGNUM * out, const felem in)
339 felem_bytearray b_in, b_out;
340 felem_to_bin28(b_in, in);
341 flip_endian(b_out, b_in, sizeof b_out);
342 return BN_bin2bn(b_out, sizeof b_out, out);
345 /******************************************************************************/
346 /* FIELD OPERATIONS
348 * Field operations, using the internal representation of field elements.
349 * NB! These operations are specific to our point multiplication and cannot be
350 * expected to be correct in general - e.g., multiplication with a large scalar
351 * will cause an overflow.
355 static void
356 felem_one(felem out)
358 out[0] = 1;
359 out[1] = 0;
360 out[2] = 0;
361 out[3] = 0;
364 static void
365 felem_assign(felem out, const felem in)
367 out[0] = in[0];
368 out[1] = in[1];
369 out[2] = in[2];
370 out[3] = in[3];
373 /* Sum two field elements: out += in */
374 static void
375 felem_sum(felem out, const felem in)
377 out[0] += in[0];
378 out[1] += in[1];
379 out[2] += in[2];
380 out[3] += in[3];
383 /* Get negative value: out = -in */
384 /* Assumes in[i] < 2^57 */
385 static void
386 felem_neg(felem out, const felem in)
388 static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
389 static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
390 static const limb two58m42m2 = (((limb) 1) << 58) -
391 (((limb) 1) << 42) - (((limb) 1) << 2);
393 /* Set to 0 mod 2^224-2^96+1 to ensure out > in */
394 out[0] = two58p2 - in[0];
395 out[1] = two58m42m2 - in[1];
396 out[2] = two58m2 - in[2];
397 out[3] = two58m2 - in[3];
400 /* Subtract field elements: out -= in */
401 /* Assumes in[i] < 2^57 */
402 static void
403 felem_diff(felem out, const felem in)
405 static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
406 static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
407 static const limb two58m42m2 = (((limb) 1) << 58) -
408 (((limb) 1) << 42) - (((limb) 1) << 2);
410 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
411 out[0] += two58p2;
412 out[1] += two58m42m2;
413 out[2] += two58m2;
414 out[3] += two58m2;
416 out[0] -= in[0];
417 out[1] -= in[1];
418 out[2] -= in[2];
419 out[3] -= in[3];
422 /* Subtract in unreduced 128-bit mode: out -= in */
423 /* Assumes in[i] < 2^119 */
424 static void
425 widefelem_diff(widefelem out, const widefelem in)
427 static const widelimb two120 = ((widelimb) 1) << 120;
428 static const widelimb two120m64 = (((widelimb) 1) << 120) -
429 (((widelimb) 1) << 64);
430 static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
431 (((widelimb) 1) << 104) - (((widelimb) 1) << 64);
433 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
434 out[0] += two120;
435 out[1] += two120m64;
436 out[2] += two120m64;
437 out[3] += two120;
438 out[4] += two120m104m64;
439 out[5] += two120m64;
440 out[6] += two120m64;
442 out[0] -= in[0];
443 out[1] -= in[1];
444 out[2] -= in[2];
445 out[3] -= in[3];
446 out[4] -= in[4];
447 out[5] -= in[5];
448 out[6] -= in[6];
451 /* Subtract in mixed mode: out128 -= in64 */
452 /* in[i] < 2^63 */
453 static void
454 felem_diff_128_64(widefelem out, const felem in)
456 static const widelimb two64p8 = (((widelimb) 1) << 64) +
457 (((widelimb) 1) << 8);
458 static const widelimb two64m8 = (((widelimb) 1) << 64) -
459 (((widelimb) 1) << 8);
460 static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
461 (((widelimb) 1) << 48) - (((widelimb) 1) << 8);
463 /* Add 0 mod 2^224-2^96+1 to ensure out > in */
464 out[0] += two64p8;
465 out[1] += two64m48m8;
466 out[2] += two64m8;
467 out[3] += two64m8;
469 out[0] -= in[0];
470 out[1] -= in[1];
471 out[2] -= in[2];
472 out[3] -= in[3];
475 /* Multiply a field element by a scalar: out = out * scalar
476 * The scalars we actually use are small, so results fit without overflow */
477 static void
478 felem_scalar(felem out, const limb scalar)
480 out[0] *= scalar;
481 out[1] *= scalar;
482 out[2] *= scalar;
483 out[3] *= scalar;
486 /* Multiply an unreduced field element by a scalar: out = out * scalar
487 * The scalars we actually use are small, so results fit without overflow */
488 static void
489 widefelem_scalar(widefelem out, const widelimb scalar)
491 out[0] *= scalar;
492 out[1] *= scalar;
493 out[2] *= scalar;
494 out[3] *= scalar;
495 out[4] *= scalar;
496 out[5] *= scalar;
497 out[6] *= scalar;
500 /* Square a field element: out = in^2 */
501 static void
502 felem_square(widefelem out, const felem in)
504 limb tmp0, tmp1, tmp2;
505 tmp0 = 2 * in[0];
506 tmp1 = 2 * in[1];
507 tmp2 = 2 * in[2];
508 out[0] = ((widelimb) in[0]) * in[0];
509 out[1] = ((widelimb) in[0]) * tmp1;
510 out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
511 out[3] = ((widelimb) in[3]) * tmp0 +
512 ((widelimb) in[1]) * tmp2;
513 out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
514 out[5] = ((widelimb) in[3]) * tmp2;
515 out[6] = ((widelimb) in[3]) * in[3];
518 /* Multiply two field elements: out = in1 * in2 */
519 static void
520 felem_mul(widefelem out, const felem in1, const felem in2)
522 out[0] = ((widelimb) in1[0]) * in2[0];
523 out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
524 out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
525 ((widelimb) in1[2]) * in2[0];
526 out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
527 ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
528 out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
529 ((widelimb) in1[3]) * in2[1];
530 out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
531 out[6] = ((widelimb) in1[3]) * in2[3];
534 /* Reduce seven 128-bit coefficients to four 64-bit coefficients.
535 * Requires in[i] < 2^126,
536 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
537 static void
538 felem_reduce(felem out, const widefelem in)
540 static const widelimb two127p15 = (((widelimb) 1) << 127) +
541 (((widelimb) 1) << 15);
542 static const widelimb two127m71 = (((widelimb) 1) << 127) -
543 (((widelimb) 1) << 71);
544 static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
545 (((widelimb) 1) << 71) - (((widelimb) 1) << 55);
546 widelimb output[5];
548 /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
549 output[0] = in[0] + two127p15;
550 output[1] = in[1] + two127m71m55;
551 output[2] = in[2] + two127m71;
552 output[3] = in[3];
553 output[4] = in[4];
555 /* Eliminate in[4], in[5], in[6] */
556 output[4] += in[6] >> 16;
557 output[3] += (in[6] & 0xffff) << 40;
558 output[2] -= in[6];
560 output[3] += in[5] >> 16;
561 output[2] += (in[5] & 0xffff) << 40;
562 output[1] -= in[5];
564 output[2] += output[4] >> 16;
565 output[1] += (output[4] & 0xffff) << 40;
566 output[0] -= output[4];
568 /* Carry 2 -> 3 -> 4 */
569 output[3] += output[2] >> 56;
570 output[2] &= 0x00ffffffffffffff;
572 output[4] = output[3] >> 56;
573 output[3] &= 0x00ffffffffffffff;
575 /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
577 /* Eliminate output[4] */
578 output[2] += output[4] >> 16;
579 /* output[2] < 2^56 + 2^56 = 2^57 */
580 output[1] += (output[4] & 0xffff) << 40;
581 output[0] -= output[4];
583 /* Carry 0 -> 1 -> 2 -> 3 */
584 output[1] += output[0] >> 56;
585 out[0] = output[0] & 0x00ffffffffffffff;
587 output[2] += output[1] >> 56;
588 /* output[2] < 2^57 + 2^72 */
589 out[1] = output[1] & 0x00ffffffffffffff;
590 output[3] += output[2] >> 56;
591 /* output[3] <= 2^56 + 2^16 */
592 out[2] = output[2] & 0x00ffffffffffffff;
595 * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16
596 * (due to final carry), so out < 2*p
598 out[3] = output[3];
601 static void
602 felem_square_reduce(felem out, const felem in)
604 widefelem tmp;
605 felem_square(tmp, in);
606 felem_reduce(out, tmp);
609 static void
610 felem_mul_reduce(felem out, const felem in1, const felem in2)
612 widefelem tmp;
613 felem_mul(tmp, in1, in2);
614 felem_reduce(out, tmp);
617 /* Reduce to unique minimal representation.
618 * Requires 0 <= in < 2*p (always call felem_reduce first) */
619 static void
620 felem_contract(felem out, const felem in)
622 static const int64_t two56 = ((limb) 1) << 56;
623 /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
624 /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
625 int64_t tmp[4], a;
626 tmp[0] = in[0];
627 tmp[1] = in[1];
628 tmp[2] = in[2];
629 tmp[3] = in[3];
630 /* Case 1: a = 1 iff in >= 2^224 */
631 a = (in[3] >> 56);
632 tmp[0] -= a;
633 tmp[1] += a << 40;
634 tmp[3] &= 0x00ffffffffffffff;
636 * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all
637 * 1 and the lower part is non-zero
639 a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) |
640 (((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
641 a &= 0x00ffffffffffffff;
642 /* turn a into an all-one mask (if a = 0) or an all-zero mask */
643 a = (a - 1) >> 63;
644 /* subtract 2^224 - 2^96 + 1 if a is all-one */
645 tmp[3] &= a ^ 0xffffffffffffffff;
646 tmp[2] &= a ^ 0xffffffffffffffff;
647 tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
648 tmp[0] -= 1 & a;
651 * eliminate negative coefficients: if tmp[0] is negative, tmp[1]
652 * must be non-zero, so we only need one step
654 a = tmp[0] >> 63;
655 tmp[0] += two56 & a;
656 tmp[1] -= 1 & a;
658 /* carry 1 -> 2 -> 3 */
659 tmp[2] += tmp[1] >> 56;
660 tmp[1] &= 0x00ffffffffffffff;
662 tmp[3] += tmp[2] >> 56;
663 tmp[2] &= 0x00ffffffffffffff;
665 /* Now 0 <= out < p */
666 out[0] = tmp[0];
667 out[1] = tmp[1];
668 out[2] = tmp[2];
669 out[3] = tmp[3];
672 /* Zero-check: returns 1 if input is 0, and 0 otherwise.
673 * We know that field elements are reduced to in < 2^225,
674 * so we only need to check three cases: 0, 2^224 - 2^96 + 1,
675 * and 2^225 - 2^97 + 2 */
676 static limb
677 felem_is_zero(const felem in)
679 limb zero, two224m96p1, two225m97p2;
681 zero = in[0] | in[1] | in[2] | in[3];
682 zero = (((int64_t) (zero) - 1) >> 63) & 1;
683 two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
684 | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
685 two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1;
686 two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
687 | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
688 two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1;
689 return (zero | two224m96p1 | two225m97p2);
692 static limb
693 felem_is_zero_int(const felem in)
695 return (int) (felem_is_zero(in) & ((limb) 1));
698 /* Invert a field element */
699 /* Computation chain copied from djb's code */
700 static void
701 felem_inv(felem out, const felem in)
703 felem ftmp, ftmp2, ftmp3, ftmp4;
704 widefelem tmp;
705 unsigned i;
707 felem_square(tmp, in);
708 felem_reduce(ftmp, tmp);/* 2 */
709 felem_mul(tmp, in, ftmp);
710 felem_reduce(ftmp, tmp);/* 2^2 - 1 */
711 felem_square(tmp, ftmp);
712 felem_reduce(ftmp, tmp);/* 2^3 - 2 */
713 felem_mul(tmp, in, ftmp);
714 felem_reduce(ftmp, tmp);/* 2^3 - 1 */
715 felem_square(tmp, ftmp);
716 felem_reduce(ftmp2, tmp); /* 2^4 - 2 */
717 felem_square(tmp, ftmp2);
718 felem_reduce(ftmp2, tmp); /* 2^5 - 4 */
719 felem_square(tmp, ftmp2);
720 felem_reduce(ftmp2, tmp); /* 2^6 - 8 */
721 felem_mul(tmp, ftmp2, ftmp);
722 felem_reduce(ftmp, tmp);/* 2^6 - 1 */
723 felem_square(tmp, ftmp);
724 felem_reduce(ftmp2, tmp); /* 2^7 - 2 */
725 for (i = 0; i < 5; ++i) { /* 2^12 - 2^6 */
726 felem_square(tmp, ftmp2);
727 felem_reduce(ftmp2, tmp);
729 felem_mul(tmp, ftmp2, ftmp);
730 felem_reduce(ftmp2, tmp); /* 2^12 - 1 */
731 felem_square(tmp, ftmp2);
732 felem_reduce(ftmp3, tmp); /* 2^13 - 2 */
733 for (i = 0; i < 11; ++i) { /* 2^24 - 2^12 */
734 felem_square(tmp, ftmp3);
735 felem_reduce(ftmp3, tmp);
737 felem_mul(tmp, ftmp3, ftmp2);
738 felem_reduce(ftmp2, tmp); /* 2^24 - 1 */
739 felem_square(tmp, ftmp2);
740 felem_reduce(ftmp3, tmp); /* 2^25 - 2 */
741 for (i = 0; i < 23; ++i) { /* 2^48 - 2^24 */
742 felem_square(tmp, ftmp3);
743 felem_reduce(ftmp3, tmp);
745 felem_mul(tmp, ftmp3, ftmp2);
746 felem_reduce(ftmp3, tmp); /* 2^48 - 1 */
747 felem_square(tmp, ftmp3);
748 felem_reduce(ftmp4, tmp); /* 2^49 - 2 */
749 for (i = 0; i < 47; ++i) { /* 2^96 - 2^48 */
750 felem_square(tmp, ftmp4);
751 felem_reduce(ftmp4, tmp);
753 felem_mul(tmp, ftmp3, ftmp4);
754 felem_reduce(ftmp3, tmp); /* 2^96 - 1 */
755 felem_square(tmp, ftmp3);
756 felem_reduce(ftmp4, tmp); /* 2^97 - 2 */
757 for (i = 0; i < 23; ++i) { /* 2^120 - 2^24 */
758 felem_square(tmp, ftmp4);
759 felem_reduce(ftmp4, tmp);
761 felem_mul(tmp, ftmp2, ftmp4);
762 felem_reduce(ftmp2, tmp); /* 2^120 - 1 */
763 for (i = 0; i < 6; ++i) { /* 2^126 - 2^6 */
764 felem_square(tmp, ftmp2);
765 felem_reduce(ftmp2, tmp);
767 felem_mul(tmp, ftmp2, ftmp);
768 felem_reduce(ftmp, tmp);/* 2^126 - 1 */
769 felem_square(tmp, ftmp);
770 felem_reduce(ftmp, tmp);/* 2^127 - 2 */
771 felem_mul(tmp, ftmp, in);
772 felem_reduce(ftmp, tmp);/* 2^127 - 1 */
773 for (i = 0; i < 97; ++i) { /* 2^224 - 2^97 */
774 felem_square(tmp, ftmp);
775 felem_reduce(ftmp, tmp);
777 felem_mul(tmp, ftmp, ftmp3);
778 felem_reduce(out, tmp); /* 2^224 - 2^96 - 1 */
781 /* Copy in constant time:
782 * if icopy == 1, copy in to out,
783 * if icopy == 0, copy out to itself. */
784 static void
785 copy_conditional(felem out, const felem in, limb icopy)
787 unsigned i;
788 /* icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one */
789 const limb copy = -icopy;
790 for (i = 0; i < 4; ++i) {
791 const limb tmp = copy & (in[i] ^ out[i]);
792 out[i] ^= tmp;
796 /******************************************************************************/
797 /* ELLIPTIC CURVE POINT OPERATIONS
799 * Points are represented in Jacobian projective coordinates:
800 * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
801 * or to the point at infinity if Z == 0.
805 /* Double an elliptic curve point:
806 * (X', Y', Z') = 2 * (X, Y, Z), where
807 * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
808 * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^2
809 * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
810 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
811 * while x_out == y_in is not (maybe this works, but it's not tested). */
812 static void
813 point_double(felem x_out, felem y_out, felem z_out,
814 const felem x_in, const felem y_in, const felem z_in)
816 widefelem tmp, tmp2;
817 felem delta, gamma, beta, alpha, ftmp, ftmp2;
819 felem_assign(ftmp, x_in);
820 felem_assign(ftmp2, x_in);
822 /* delta = z^2 */
823 felem_square(tmp, z_in);
824 felem_reduce(delta, tmp);
826 /* gamma = y^2 */
827 felem_square(tmp, y_in);
828 felem_reduce(gamma, tmp);
830 /* beta = x*gamma */
831 felem_mul(tmp, x_in, gamma);
832 felem_reduce(beta, tmp);
834 /* alpha = 3*(x-delta)*(x+delta) */
835 felem_diff(ftmp, delta);
836 /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
837 felem_sum(ftmp2, delta);
838 /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
839 felem_scalar(ftmp2, 3);
840 /* ftmp2[i] < 3 * 2^58 < 2^60 */
841 felem_mul(tmp, ftmp, ftmp2);
842 /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
843 felem_reduce(alpha, tmp);
845 /* x' = alpha^2 - 8*beta */
846 felem_square(tmp, alpha);
847 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
848 felem_assign(ftmp, beta);
849 felem_scalar(ftmp, 8);
850 /* ftmp[i] < 8 * 2^57 = 2^60 */
851 felem_diff_128_64(tmp, ftmp);
852 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
853 felem_reduce(x_out, tmp);
855 /* z' = (y + z)^2 - gamma - delta */
856 felem_sum(delta, gamma);
857 /* delta[i] < 2^57 + 2^57 = 2^58 */
858 felem_assign(ftmp, y_in);
859 felem_sum(ftmp, z_in);
860 /* ftmp[i] < 2^57 + 2^57 = 2^58 */
861 felem_square(tmp, ftmp);
862 /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
863 felem_diff_128_64(tmp, delta);
864 /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
865 felem_reduce(z_out, tmp);
867 /* y' = alpha*(4*beta - x') - 8*gamma^2 */
868 felem_scalar(beta, 4);
869 /* beta[i] < 4 * 2^57 = 2^59 */
870 felem_diff(beta, x_out);
871 /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
872 felem_mul(tmp, alpha, beta);
873 /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
874 felem_square(tmp2, gamma);
875 /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
876 widefelem_scalar(tmp2, 8);
877 /* tmp2[i] < 8 * 2^116 = 2^119 */
878 widefelem_diff(tmp, tmp2);
879 /* tmp[i] < 2^119 + 2^120 < 2^121 */
880 felem_reduce(y_out, tmp);
883 /* Add two elliptic curve points:
884 * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
885 * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
886 * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
887 * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
888 * Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
889 * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
891 * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
894 /* This function is not entirely constant-time:
895 * it includes a branch for checking whether the two input points are equal,
896 * (while not equal to the point at infinity).
897 * This case never happens during single point multiplication,
898 * so there is no timing leak for ECDH or ECDSA signing. */
899 static void
900 point_add(felem x3, felem y3, felem z3,
901 const felem x1, const felem y1, const felem z1,
902 const int mixed, const felem x2, const felem y2, const felem z2)
904 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
905 widefelem tmp, tmp2;
906 limb z1_is_zero, z2_is_zero, x_equal, y_equal;
908 if (!mixed) {
909 /* ftmp2 = z2^2 */
910 felem_square(tmp, z2);
911 felem_reduce(ftmp2, tmp);
913 /* ftmp4 = z2^3 */
914 felem_mul(tmp, ftmp2, z2);
915 felem_reduce(ftmp4, tmp);
917 /* ftmp4 = z2^3*y1 */
918 felem_mul(tmp2, ftmp4, y1);
919 felem_reduce(ftmp4, tmp2);
921 /* ftmp2 = z2^2*x1 */
922 felem_mul(tmp2, ftmp2, x1);
923 felem_reduce(ftmp2, tmp2);
924 } else {
925 /* We'll assume z2 = 1 (special case z2 = 0 is handled later) */
927 /* ftmp4 = z2^3*y1 */
928 felem_assign(ftmp4, y1);
930 /* ftmp2 = z2^2*x1 */
931 felem_assign(ftmp2, x1);
934 /* ftmp = z1^2 */
935 felem_square(tmp, z1);
936 felem_reduce(ftmp, tmp);
938 /* ftmp3 = z1^3 */
939 felem_mul(tmp, ftmp, z1);
940 felem_reduce(ftmp3, tmp);
942 /* tmp = z1^3*y2 */
943 felem_mul(tmp, ftmp3, y2);
944 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
946 /* ftmp3 = z1^3*y2 - z2^3*y1 */
947 felem_diff_128_64(tmp, ftmp4);
948 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
949 felem_reduce(ftmp3, tmp);
951 /* tmp = z1^2*x2 */
952 felem_mul(tmp, ftmp, x2);
953 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
955 /* ftmp = z1^2*x2 - z2^2*x1 */
956 felem_diff_128_64(tmp, ftmp2);
957 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
958 felem_reduce(ftmp, tmp);
961 * the formulae are incorrect if the points are equal so we check for
962 * this and do doubling if this happens
964 x_equal = felem_is_zero(ftmp);
965 y_equal = felem_is_zero(ftmp3);
966 z1_is_zero = felem_is_zero(z1);
967 z2_is_zero = felem_is_zero(z2);
968 /* In affine coordinates, (X_1, Y_1) == (X_2, Y_2) */
969 if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) {
970 point_double(x3, y3, z3, x1, y1, z1);
971 return;
973 /* ftmp5 = z1*z2 */
974 if (!mixed) {
975 felem_mul(tmp, z1, z2);
976 felem_reduce(ftmp5, tmp);
977 } else {
978 /* special case z2 = 0 is handled later */
979 felem_assign(ftmp5, z1);
982 /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
983 felem_mul(tmp, ftmp, ftmp5);
984 felem_reduce(z_out, tmp);
986 /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
987 felem_assign(ftmp5, ftmp);
988 felem_square(tmp, ftmp);
989 felem_reduce(ftmp, tmp);
991 /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
992 felem_mul(tmp, ftmp, ftmp5);
993 felem_reduce(ftmp5, tmp);
995 /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
996 felem_mul(tmp, ftmp2, ftmp);
997 felem_reduce(ftmp2, tmp);
999 /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
1000 felem_mul(tmp, ftmp4, ftmp5);
1001 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
1003 /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
1004 felem_square(tmp2, ftmp3);
1005 /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
1007 /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
1008 felem_diff_128_64(tmp2, ftmp5);
1009 /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
1011 /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1012 felem_assign(ftmp5, ftmp2);
1013 felem_scalar(ftmp5, 2);
1014 /* ftmp5[i] < 2 * 2^57 = 2^58 */
1017 * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
1018 * 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2
1020 felem_diff_128_64(tmp2, ftmp5);
1021 /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
1022 felem_reduce(x_out, tmp2);
1024 /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
1025 felem_diff(ftmp2, x_out);
1026 /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
1028 /* tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) */
1029 felem_mul(tmp2, ftmp3, ftmp2);
1030 /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
1033 * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 -
1034 * x_out) - z2^3*y1*(z1^2*x2 - z2^2*x1)^3
1036 widefelem_diff(tmp2, tmp);
1037 /* tmp2[i] < 2^118 + 2^120 < 2^121 */
1038 felem_reduce(y_out, tmp2);
1041 * the result (x_out, y_out, z_out) is incorrect if one of the inputs
1042 * is the point at infinity, so we need to check for this separately
1045 /* if point 1 is at infinity, copy point 2 to output, and vice versa */
1046 copy_conditional(x_out, x2, z1_is_zero);
1047 copy_conditional(x_out, x1, z2_is_zero);
1048 copy_conditional(y_out, y2, z1_is_zero);
1049 copy_conditional(y_out, y1, z2_is_zero);
1050 copy_conditional(z_out, z2, z1_is_zero);
1051 copy_conditional(z_out, z1, z2_is_zero);
1052 felem_assign(x3, x_out);
1053 felem_assign(y3, y_out);
1054 felem_assign(z3, z_out);
1057 /* select_point selects the |idx|th point from a precomputation table and
1058 * copies it to out. */
1059 static void
1060 select_point(const u64 idx, unsigned int size, const felem pre_comp[ /* size */ ][3], felem out[3])
1062 unsigned i, j;
1063 limb *outlimbs = &out[0][0];
1064 memset(outlimbs, 0, 3 * sizeof(felem));
1066 for (i = 0; i < size; i++) {
1067 const limb *inlimbs = &pre_comp[i][0][0];
1068 u64 mask = i ^ idx;
1069 mask |= mask >> 4;
1070 mask |= mask >> 2;
1071 mask |= mask >> 1;
1072 mask &= 1;
1073 mask--;
1074 for (j = 0; j < 4 * 3; j++)
1075 outlimbs[j] |= inlimbs[j] & mask;
1079 /* get_bit returns the |i|th bit in |in| */
1080 static char
1081 get_bit(const felem_bytearray in, unsigned i)
1083 if (i >= 224)
1084 return 0;
1085 return (in[i >> 3] >> (i & 7)) & 1;
1088 /* Interleaved point multiplication using precomputed point multiples:
1089 * The small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[],
1090 * the scalars in scalars[]. If g_scalar is non-NULL, we also add this multiple
1091 * of the generator, using certain (large) precomputed multiples in g_pre_comp.
1092 * Output point (X, Y, Z) is stored in x_out, y_out, z_out */
1093 static void
1094 batch_mul(felem x_out, felem y_out, felem z_out,
1095 const felem_bytearray scalars[], const unsigned num_points, const u8 * g_scalar,
1096 const int mixed, const felem pre_comp[][17][3], const felem g_pre_comp[2][16][3])
1098 int i, skip;
1099 unsigned num;
1100 unsigned gen_mul = (g_scalar != NULL);
1101 felem nq[3], tmp[4];
1102 u64 bits;
1103 u8 sign, digit;
1105 /* set nq to the point at infinity */
1106 memset(nq, 0, 3 * sizeof(felem));
1109 * Loop over all scalars msb-to-lsb, interleaving additions of
1110 * multiples of the generator (two in each of the last 28 rounds) and
1111 * additions of other points multiples (every 5th round).
1113 skip = 1; /* save two point operations in the first
1114 * round */
1115 for (i = (num_points ? 220 : 27); i >= 0; --i) {
1116 /* double */
1117 if (!skip)
1118 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1120 /* add multiples of the generator */
1121 if (gen_mul && (i <= 27)) {
1122 /* first, look 28 bits upwards */
1123 bits = get_bit(g_scalar, i + 196) << 3;
1124 bits |= get_bit(g_scalar, i + 140) << 2;
1125 bits |= get_bit(g_scalar, i + 84) << 1;
1126 bits |= get_bit(g_scalar, i + 28);
1127 /* select the point to add, in constant time */
1128 select_point(bits, 16, g_pre_comp[1], tmp);
1130 if (!skip) {
1131 point_add(nq[0], nq[1], nq[2],
1132 nq[0], nq[1], nq[2],
1133 1 /* mixed */ , tmp[0], tmp[1], tmp[2]);
1134 } else {
1135 memcpy(nq, tmp, 3 * sizeof(felem));
1136 skip = 0;
1139 /* second, look at the current position */
1140 bits = get_bit(g_scalar, i + 168) << 3;
1141 bits |= get_bit(g_scalar, i + 112) << 2;
1142 bits |= get_bit(g_scalar, i + 56) << 1;
1143 bits |= get_bit(g_scalar, i);
1144 /* select the point to add, in constant time */
1145 select_point(bits, 16, g_pre_comp[0], tmp);
1146 point_add(nq[0], nq[1], nq[2],
1147 nq[0], nq[1], nq[2],
1148 1 /* mixed */ , tmp[0], tmp[1], tmp[2]);
1150 /* do other additions every 5 doublings */
1151 if (num_points && (i % 5 == 0)) {
1152 /* loop over all scalars */
1153 for (num = 0; num < num_points; ++num) {
1154 bits = get_bit(scalars[num], i + 4) << 5;
1155 bits |= get_bit(scalars[num], i + 3) << 4;
1156 bits |= get_bit(scalars[num], i + 2) << 3;
1157 bits |= get_bit(scalars[num], i + 1) << 2;
1158 bits |= get_bit(scalars[num], i) << 1;
1159 bits |= get_bit(scalars[num], i - 1);
1160 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1162 /* select the point to add or subtract */
1163 select_point(digit, 17, pre_comp[num], tmp);
1164 felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the
1165 * negative point */
1166 copy_conditional(tmp[1], tmp[3], sign);
1168 if (!skip) {
1169 point_add(nq[0], nq[1], nq[2],
1170 nq[0], nq[1], nq[2],
1171 mixed, tmp[0], tmp[1], tmp[2]);
1172 } else {
1173 memcpy(nq, tmp, 3 * sizeof(felem));
1174 skip = 0;
1179 felem_assign(x_out, nq[0]);
1180 felem_assign(y_out, nq[1]);
1181 felem_assign(z_out, nq[2]);
1184 /******************************************************************************/
1185 /* FUNCTIONS TO MANAGE PRECOMPUTATION
1188 static NISTP224_PRE_COMP *
1189 nistp224_pre_comp_new()
1191 NISTP224_PRE_COMP *ret = NULL;
1192 ret = malloc(sizeof *ret);
1193 if (!ret) {
1194 ECerror(ERR_R_MALLOC_FAILURE);
1195 return ret;
1197 memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp));
1198 ret->references = 1;
1199 return ret;
1202 static void *
1203 nistp224_pre_comp_dup(void *src_)
1205 NISTP224_PRE_COMP *src = src_;
1207 /* no need to actually copy, these objects never change! */
1208 CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP);
1210 return src_;
1213 static void
1214 nistp224_pre_comp_free(void *pre_)
1216 int i;
1217 NISTP224_PRE_COMP *pre = pre_;
1219 if (!pre)
1220 return;
1222 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1223 if (i > 0)
1224 return;
1226 free(pre);
1229 static void
1230 nistp224_pre_comp_clear_free(void *pre_)
1232 int i;
1233 NISTP224_PRE_COMP *pre = pre_;
1235 if (!pre)
1236 return;
1238 i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1239 if (i > 0)
1240 return;
1242 freezero(pre, sizeof *pre);
1245 /******************************************************************************/
1246 /* OPENSSL EC_METHOD FUNCTIONS
1249 int
1250 ec_GFp_nistp224_group_init(EC_GROUP * group)
1252 int ret;
1253 ret = ec_GFp_simple_group_init(group);
1254 group->a_is_minus3 = 1;
1255 return ret;
1258 int
1259 ec_GFp_nistp224_group_set_curve(EC_GROUP * group, const BIGNUM * p,
1260 const BIGNUM * a, const BIGNUM * b, BN_CTX * ctx)
1262 int ret = 0;
1263 BN_CTX *new_ctx = NULL;
1264 BIGNUM *curve_p, *curve_a, *curve_b;
1266 if (ctx == NULL)
1267 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1268 return 0;
1269 BN_CTX_start(ctx);
1270 if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
1271 ((curve_a = BN_CTX_get(ctx)) == NULL) ||
1272 ((curve_b = BN_CTX_get(ctx)) == NULL))
1273 goto err;
1274 BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1275 BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1276 BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1277 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) ||
1278 (BN_cmp(curve_b, b))) {
1279 ECerror(EC_R_WRONG_CURVE_PARAMETERS);
1280 goto err;
1282 group->field_mod_func = BN_nist_mod_224;
1283 ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1284 err:
1285 BN_CTX_end(ctx);
1286 BN_CTX_free(new_ctx);
1287 return ret;
1290 /* Takes the Jacobian coordinates (X, Y, Z) of a point and returns
1291 * (X', Y') = (X/Z^2, Y/Z^3) */
1292 int
1293 ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP * group,
1294 const EC_POINT * point, BIGNUM * x, BIGNUM * y, BN_CTX * ctx)
1296 felem z1, z2, x_in, y_in, x_out, y_out;
1297 widefelem tmp;
1299 if (EC_POINT_is_at_infinity(group, point) > 0) {
1300 ECerror(EC_R_POINT_AT_INFINITY);
1301 return 0;
1303 if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) ||
1304 (!BN_to_felem(z1, &point->Z)))
1305 return 0;
1306 felem_inv(z2, z1);
1307 felem_square(tmp, z2);
1308 felem_reduce(z1, tmp);
1309 felem_mul(tmp, x_in, z1);
1310 felem_reduce(x_in, tmp);
1311 felem_contract(x_out, x_in);
1312 if (x != NULL) {
1313 if (!felem_to_BN(x, x_out)) {
1314 ECerror(ERR_R_BN_LIB);
1315 return 0;
1318 felem_mul(tmp, z1, z2);
1319 felem_reduce(z1, tmp);
1320 felem_mul(tmp, y_in, z1);
1321 felem_reduce(y_in, tmp);
1322 felem_contract(y_out, y_in);
1323 if (y != NULL) {
1324 if (!felem_to_BN(y, y_out)) {
1325 ECerror(ERR_R_BN_LIB);
1326 return 0;
1329 return 1;
1332 static void
1333 make_points_affine(size_t num, felem points[ /* num */ ][3], felem tmp_felems[ /* num+1 */ ])
1336 * Runs in constant time, unless an input is the point at infinity
1337 * (which normally shouldn't happen).
1339 ec_GFp_nistp_points_make_affine_internal(
1340 num,
1341 points,
1342 sizeof(felem),
1343 tmp_felems,
1344 (void (*) (void *)) felem_one,
1345 (int (*) (const void *)) felem_is_zero_int,
1346 (void (*) (void *, const void *)) felem_assign,
1347 (void (*) (void *, const void *)) felem_square_reduce,
1348 (void (*) (void *, const void *, const void *)) felem_mul_reduce,
1349 (void (*) (void *, const void *)) felem_inv,
1350 (void (*) (void *, const void *)) felem_contract);
1353 /* Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL values
1354 * Result is stored in r (r can equal one of the inputs). */
1355 int
1356 ec_GFp_nistp224_points_mul(const EC_GROUP * group, EC_POINT * r,
1357 const BIGNUM * scalar, size_t num, const EC_POINT * points[],
1358 const BIGNUM * scalars[], BN_CTX * ctx)
1360 int ret = 0;
1361 int j;
1362 unsigned i;
1363 int mixed = 0;
1364 BN_CTX *new_ctx = NULL;
1365 BIGNUM *x, *y, *z, *tmp_scalar;
1366 felem_bytearray g_secret;
1367 felem_bytearray *secrets = NULL;
1368 felem(*pre_comp)[17][3] = NULL;
1369 felem *tmp_felems = NULL;
1370 felem_bytearray tmp;
1371 unsigned num_bytes;
1372 int have_pre_comp = 0;
1373 size_t num_points = num;
1374 felem x_in, y_in, z_in, x_out, y_out, z_out;
1375 NISTP224_PRE_COMP *pre = NULL;
1376 const felem(*g_pre_comp)[16][3] = NULL;
1377 EC_POINT *generator = NULL;
1378 const EC_POINT *p = NULL;
1379 const BIGNUM *p_scalar = NULL;
1381 if (ctx == NULL)
1382 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1383 return 0;
1384 BN_CTX_start(ctx);
1385 if (((x = BN_CTX_get(ctx)) == NULL) ||
1386 ((y = BN_CTX_get(ctx)) == NULL) ||
1387 ((z = BN_CTX_get(ctx)) == NULL) ||
1388 ((tmp_scalar = BN_CTX_get(ctx)) == NULL))
1389 goto err;
1391 if (scalar != NULL) {
1392 pre = EC_EX_DATA_get_data(group->extra_data,
1393 nistp224_pre_comp_dup, nistp224_pre_comp_free,
1394 nistp224_pre_comp_clear_free);
1395 if (pre)
1396 /* we have precomputation, try to use it */
1397 g_pre_comp = (const felem(*)[16][3]) pre->g_pre_comp;
1398 else
1399 /* try to use the standard precomputation */
1400 g_pre_comp = &gmul[0];
1401 generator = EC_POINT_new(group);
1402 if (generator == NULL)
1403 goto err;
1404 /* get the generator from precomputation */
1405 if (!felem_to_BN(x, g_pre_comp[0][1][0]) ||
1406 !felem_to_BN(y, g_pre_comp[0][1][1]) ||
1407 !felem_to_BN(z, g_pre_comp[0][1][2])) {
1408 ECerror(ERR_R_BN_LIB);
1409 goto err;
1411 if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1412 generator, x, y, z, ctx))
1413 goto err;
1414 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1415 /* precomputation matches generator */
1416 have_pre_comp = 1;
1417 else
1419 * we don't have valid precomputation: treat the
1420 * generator as a random point
1422 num_points = num_points + 1;
1424 if (num_points > 0) {
1425 if (num_points >= 3) {
1427 * unless we precompute multiples for just one or two
1428 * points, converting those into affine form is time
1429 * well spent
1431 mixed = 1;
1433 secrets = calloc(num_points, sizeof(felem_bytearray));
1434 pre_comp = calloc(num_points, 17 * 3 * sizeof(felem));
1435 if (mixed) {
1436 /* XXX should do more int overflow checking */
1437 tmp_felems = reallocarray(NULL,
1438 (num_points * 17 + 1), sizeof(felem));
1440 if ((secrets == NULL) || (pre_comp == NULL) || (mixed && (tmp_felems == NULL))) {
1441 ECerror(ERR_R_MALLOC_FAILURE);
1442 goto err;
1445 * we treat NULL scalars as 0, and NULL points as points at
1446 * infinity, i.e., they contribute nothing to the linear
1447 * combination
1449 for (i = 0; i < num_points; ++i) {
1450 if (i == num)
1451 /* the generator */
1453 p = EC_GROUP_get0_generator(group);
1454 p_scalar = scalar;
1455 } else
1456 /* the i^th point */
1458 p = points[i];
1459 p_scalar = scalars[i];
1461 if ((p_scalar != NULL) && (p != NULL)) {
1462 /* reduce scalar to 0 <= scalar < 2^224 */
1463 if ((BN_num_bits(p_scalar) > 224) || (BN_is_negative(p_scalar))) {
1465 * this is an unusual input, and we
1466 * don't guarantee constant-timeness
1468 if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx)) {
1469 ECerror(ERR_R_BN_LIB);
1470 goto err;
1472 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1473 } else
1474 num_bytes = BN_bn2bin(p_scalar, tmp);
1475 flip_endian(secrets[i], tmp, num_bytes);
1476 /* precompute multiples */
1477 if ((!BN_to_felem(x_out, &p->X)) ||
1478 (!BN_to_felem(y_out, &p->Y)) ||
1479 (!BN_to_felem(z_out, &p->Z)))
1480 goto err;
1481 felem_assign(pre_comp[i][1][0], x_out);
1482 felem_assign(pre_comp[i][1][1], y_out);
1483 felem_assign(pre_comp[i][1][2], z_out);
1484 for (j = 2; j <= 16; ++j) {
1485 if (j & 1) {
1486 point_add(
1487 pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
1488 pre_comp[i][1][0], pre_comp[i][1][1], pre_comp[i][1][2],
1489 0, pre_comp[i][j - 1][0], pre_comp[i][j - 1][1], pre_comp[i][j - 1][2]);
1490 } else {
1491 point_double(
1492 pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
1493 pre_comp[i][j / 2][0], pre_comp[i][j / 2][1], pre_comp[i][j / 2][2]);
1498 if (mixed)
1499 make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1501 /* the scalar for the generator */
1502 if ((scalar != NULL) && (have_pre_comp)) {
1503 memset(g_secret, 0, sizeof g_secret);
1504 /* reduce scalar to 0 <= scalar < 2^224 */
1505 if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) {
1507 * this is an unusual input, and we don't guarantee
1508 * constant-timeness
1510 if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx)) {
1511 ECerror(ERR_R_BN_LIB);
1512 goto err;
1514 num_bytes = BN_bn2bin(tmp_scalar, tmp);
1515 } else
1516 num_bytes = BN_bn2bin(scalar, tmp);
1517 flip_endian(g_secret, tmp, num_bytes);
1518 /* do the multiplication with generator precomputation */
1519 batch_mul(x_out, y_out, z_out,
1520 (const felem_bytearray(*)) secrets, num_points,
1521 g_secret,
1522 mixed, (const felem(*)[17][3]) pre_comp,
1523 g_pre_comp);
1524 } else
1525 /* do the multiplication without generator precomputation */
1526 batch_mul(x_out, y_out, z_out,
1527 (const felem_bytearray(*)) secrets, num_points,
1528 NULL, mixed, (const felem(*)[17][3]) pre_comp, NULL);
1529 /* reduce the output to its unique minimal representation */
1530 felem_contract(x_in, x_out);
1531 felem_contract(y_in, y_out);
1532 felem_contract(z_in, z_out);
1533 if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1534 (!felem_to_BN(z, z_in))) {
1535 ECerror(ERR_R_BN_LIB);
1536 goto err;
1538 ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
1540 err:
1541 BN_CTX_end(ctx);
1542 EC_POINT_free(generator);
1543 BN_CTX_free(new_ctx);
1544 free(secrets);
1545 free(pre_comp);
1546 free(tmp_felems);
1547 return ret;
1550 int
1551 ec_GFp_nistp224_precompute_mult(EC_GROUP * group, BN_CTX * ctx)
1553 int ret = 0;
1554 NISTP224_PRE_COMP *pre = NULL;
1555 int i, j;
1556 BN_CTX *new_ctx = NULL;
1557 BIGNUM *x, *y;
1558 EC_POINT *generator = NULL;
1559 felem tmp_felems[32];
1561 /* throw away old precomputation */
1562 EC_EX_DATA_free_data(&group->extra_data, nistp224_pre_comp_dup,
1563 nistp224_pre_comp_free, nistp224_pre_comp_clear_free);
1564 if (ctx == NULL)
1565 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1566 return 0;
1567 BN_CTX_start(ctx);
1568 if (((x = BN_CTX_get(ctx)) == NULL) ||
1569 ((y = BN_CTX_get(ctx)) == NULL))
1570 goto err;
1571 /* get the generator */
1572 if (group->generator == NULL)
1573 goto err;
1574 generator = EC_POINT_new(group);
1575 if (generator == NULL)
1576 goto err;
1577 BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x);
1578 BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y);
1579 if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
1580 goto err;
1581 if ((pre = nistp224_pre_comp_new()) == NULL)
1582 goto err;
1583 /* if the generator is the standard one, use built-in precomputation */
1584 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
1585 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1586 ret = 1;
1587 goto err;
1589 if ((!BN_to_felem(pre->g_pre_comp[0][1][0], &group->generator->X)) ||
1590 (!BN_to_felem(pre->g_pre_comp[0][1][1], &group->generator->Y)) ||
1591 (!BN_to_felem(pre->g_pre_comp[0][1][2], &group->generator->Z)))
1592 goto err;
1594 * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G,
1595 * 2^84*G, 2^140*G, 2^196*G for the second one
1597 for (i = 1; i <= 8; i <<= 1) {
1598 point_double(
1599 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2],
1600 pre->g_pre_comp[0][i][0], pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1601 for (j = 0; j < 27; ++j) {
1602 point_double(
1603 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2],
1604 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1606 if (i == 8)
1607 break;
1608 point_double(
1609 pre->g_pre_comp[0][2 * i][0], pre->g_pre_comp[0][2 * i][1], pre->g_pre_comp[0][2 * i][2],
1610 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1611 for (j = 0; j < 27; ++j) {
1612 point_double(
1613 pre->g_pre_comp[0][2 * i][0], pre->g_pre_comp[0][2 * i][1], pre->g_pre_comp[0][2 * i][2],
1614 pre->g_pre_comp[0][2 * i][0], pre->g_pre_comp[0][2 * i][1], pre->g_pre_comp[0][2 * i][2]);
1617 for (i = 0; i < 2; i++) {
1618 /* g_pre_comp[i][0] is the point at infinity */
1619 memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1620 /* the remaining multiples */
1621 /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
1622 point_add(
1623 pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1624 pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1625 pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1626 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1627 pre->g_pre_comp[i][2][2]);
1628 /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
1629 point_add(
1630 pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1631 pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1632 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1633 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1634 pre->g_pre_comp[i][2][2]);
1635 /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
1636 point_add(
1637 pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1638 pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1639 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1640 0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1641 pre->g_pre_comp[i][4][2]);
1643 * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G +
1644 * 2^196*G
1646 point_add(
1647 pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1648 pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1649 pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1650 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1651 pre->g_pre_comp[i][2][2]);
1652 for (j = 1; j < 8; ++j) {
1653 /* odd multiples: add G resp. 2^28*G */
1654 point_add(
1655 pre->g_pre_comp[i][2 * j + 1][0], pre->g_pre_comp[i][2 * j + 1][1],
1656 pre->g_pre_comp[i][2 * j + 1][2], pre->g_pre_comp[i][2 * j][0],
1657 pre->g_pre_comp[i][2 * j][1], pre->g_pre_comp[i][2 * j][2],
1658 0, pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1659 pre->g_pre_comp[i][1][2]);
1662 make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1664 if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp224_pre_comp_dup,
1665 nistp224_pre_comp_free, nistp224_pre_comp_clear_free))
1666 goto err;
1667 ret = 1;
1668 pre = NULL;
1669 err:
1670 BN_CTX_end(ctx);
1671 EC_POINT_free(generator);
1672 BN_CTX_free(new_ctx);
1673 nistp224_pre_comp_free(pre);
1674 return ret;
1677 int
1678 ec_GFp_nistp224_have_precompute_mult(const EC_GROUP * group)
1680 if (EC_EX_DATA_get_data(group->extra_data, nistp224_pre_comp_dup,
1681 nistp224_pre_comp_free, nistp224_pre_comp_clear_free)
1682 != NULL)
1683 return 1;
1684 else
1685 return 0;
1688 #endif