source: EcnlProtoTool/trunk/openssl-1.1.0e/crypto/ec/ecp_nistp256.c@ 331

Last change on this file since 331 was 331, checked in by coas-nagasima, 6 years ago

prototoolに関連するプロジェクトをnewlibからmuslを使うよう変更・更新
ntshellをnewlibの下位の実装から、muslのsyscallの実装に変更・更新
以下のOSSをアップデート
・mruby-1.3.0
・musl-1.1.18
・onigmo-6.1.3
・tcc-0.9.27
以下のOSSを追加
・openssl-1.1.0e
・curl-7.57.0
・zlib-1.2.11
以下のmrbgemsを追加
・iij/mruby-digest
・iij/mruby-env
・iij/mruby-errno
・iij/mruby-iijson
・iij/mruby-ipaddr
・iij/mruby-mock
・iij/mruby-require
・iij/mruby-tls-openssl

  • Property svn:eol-style set to native
  • Property svn:mime-type set to text/x-csrc
File size: 73.2 KB
Line 
1/*
2 * Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved.
3 *
4 * Licensed under the OpenSSL license (the "License"). You may not use
5 * this file except in compliance with the License. You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
8 */
9
10/* Copyright 2011 Google Inc.
11 *
12 * Licensed under the Apache License, Version 2.0 (the "License");
13 *
14 * you may not use this file except in compliance with the License.
15 * You may obtain a copy of the License at
16 *
17 * http://www.apache.org/licenses/LICENSE-2.0
18 *
19 * Unless required by applicable law or agreed to in writing, software
20 * distributed under the License is distributed on an "AS IS" BASIS,
21 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22 * See the License for the specific language governing permissions and
23 * limitations under the License.
24 */
25
26/*
27 * A 64-bit implementation of the NIST P-256 elliptic curve point multiplication
28 *
29 * OpenSSL integration was taken from Emilia Kasper's work in ecp_nistp224.c.
30 * Otherwise based on Emilia's P224 work, which was inspired by my curve25519
31 * work which got its smarts from Daniel J. Bernstein's work on the same.
32 */
33
34#include <openssl/opensslconf.h>
35#ifdef OPENSSL_NO_EC_NISTP_64_GCC_128
36NON_EMPTY_TRANSLATION_UNIT
37#else
38
39# include <stdint.h>
40# include <string.h>
41# include <openssl/err.h>
42# include "ec_lcl.h"
43
44# if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
45 /* even with gcc, the typedef won't work for 32-bit platforms */
46typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit
47 * platforms */
48typedef __int128_t int128_t;
49# else
50# error "Need GCC 3.1 or later to define type uint128_t"
51# endif
52
53typedef uint8_t u8;
54typedef uint32_t u32;
55typedef uint64_t u64;
56typedef int64_t s64;
57
58/*
59 * The underlying field. P256 operates over GF(2^256-2^224+2^192+2^96-1). We
60 * can serialise an element of this field into 32 bytes. We call this an
61 * felem_bytearray.
62 */
63
64typedef u8 felem_bytearray[32];
65
66/*
67 * These are the parameters of P256, taken from FIPS 186-3, page 86. These
68 * values are big-endian.
69 */
70static const felem_bytearray nistp256_curve_params[5] = {
71 {0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, /* p */
72 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
73 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
74 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
75 {0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, /* a = -3 */
76 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
77 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
78 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfc}, /* b */
79 {0x5a, 0xc6, 0x35, 0xd8, 0xaa, 0x3a, 0x93, 0xe7,
80 0xb3, 0xeb, 0xbd, 0x55, 0x76, 0x98, 0x86, 0xbc,
81 0x65, 0x1d, 0x06, 0xb0, 0xcc, 0x53, 0xb0, 0xf6,
82 0x3b, 0xce, 0x3c, 0x3e, 0x27, 0xd2, 0x60, 0x4b},
83 {0x6b, 0x17, 0xd1, 0xf2, 0xe1, 0x2c, 0x42, 0x47, /* x */
84 0xf8, 0xbc, 0xe6, 0xe5, 0x63, 0xa4, 0x40, 0xf2,
85 0x77, 0x03, 0x7d, 0x81, 0x2d, 0xeb, 0x33, 0xa0,
86 0xf4, 0xa1, 0x39, 0x45, 0xd8, 0x98, 0xc2, 0x96},
87 {0x4f, 0xe3, 0x42, 0xe2, 0xfe, 0x1a, 0x7f, 0x9b, /* y */
88 0x8e, 0xe7, 0xeb, 0x4a, 0x7c, 0x0f, 0x9e, 0x16,
89 0x2b, 0xce, 0x33, 0x57, 0x6b, 0x31, 0x5e, 0xce,
90 0xcb, 0xb6, 0x40, 0x68, 0x37, 0xbf, 0x51, 0xf5}
91};
92
93/*-
94 * The representation of field elements.
95 * ------------------------------------
96 *
97 * We represent field elements with either four 128-bit values, eight 128-bit
98 * values, or four 64-bit values. The field element represented is:
99 * v[0]*2^0 + v[1]*2^64 + v[2]*2^128 + v[3]*2^192 (mod p)
100 * or:
101 * v[0]*2^0 + v[1]*2^64 + v[2]*2^128 + ... + v[8]*2^512 (mod p)
102 *
103 * 128-bit values are called 'limbs'. Since the limbs are spaced only 64 bits
104 * apart, but are 128-bits wide, the most significant bits of each limb overlap
105 * with the least significant bits of the next.
106 *
107 * A field element with four limbs is an 'felem'. One with eight limbs is a
108 * 'longfelem'
109 *
110 * A field element with four, 64-bit values is called a 'smallfelem'. Small
111 * values are used as intermediate values before multiplication.
112 */
113
114# define NLIMBS 4
115
116typedef uint128_t limb;
117typedef limb felem[NLIMBS];
118typedef limb longfelem[NLIMBS * 2];
119typedef u64 smallfelem[NLIMBS];
120
121/* This is the value of the prime as four 64-bit words, little-endian. */
122static const u64 kPrime[4] =
123 { 0xfffffffffffffffful, 0xffffffff, 0, 0xffffffff00000001ul };
124static const u64 bottom63bits = 0x7ffffffffffffffful;
125
126/*
127 * bin32_to_felem takes a little-endian byte array and converts it into felem
128 * form. This assumes that the CPU is little-endian.
129 */
130static void bin32_to_felem(felem out, const u8 in[32])
131{
132 out[0] = *((u64 *)&in[0]);
133 out[1] = *((u64 *)&in[8]);
134 out[2] = *((u64 *)&in[16]);
135 out[3] = *((u64 *)&in[24]);
136}
137
138/*
139 * smallfelem_to_bin32 takes a smallfelem and serialises into a little
140 * endian, 32 byte array. This assumes that the CPU is little-endian.
141 */
142static void smallfelem_to_bin32(u8 out[32], const smallfelem in)
143{
144 *((u64 *)&out[0]) = in[0];
145 *((u64 *)&out[8]) = in[1];
146 *((u64 *)&out[16]) = in[2];
147 *((u64 *)&out[24]) = in[3];
148}
149
150/* To preserve endianness when using BN_bn2bin and BN_bin2bn */
151static void flip_endian(u8 *out, const u8 *in, unsigned len)
152{
153 unsigned i;
154 for (i = 0; i < len; ++i)
155 out[i] = in[len - 1 - i];
156}
157
158/* BN_to_felem converts an OpenSSL BIGNUM into an felem */
159static int BN_to_felem(felem out, const BIGNUM *bn)
160{
161 felem_bytearray b_in;
162 felem_bytearray b_out;
163 unsigned num_bytes;
164
165 /* BN_bn2bin eats leading zeroes */
166 memset(b_out, 0, sizeof(b_out));
167 num_bytes = BN_num_bytes(bn);
168 if (num_bytes > sizeof b_out) {
169 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
170 return 0;
171 }
172 if (BN_is_negative(bn)) {
173 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
174 return 0;
175 }
176 num_bytes = BN_bn2bin(bn, b_in);
177 flip_endian(b_out, b_in, num_bytes);
178 bin32_to_felem(out, b_out);
179 return 1;
180}
181
182/* felem_to_BN converts an felem into an OpenSSL BIGNUM */
183static BIGNUM *smallfelem_to_BN(BIGNUM *out, const smallfelem in)
184{
185 felem_bytearray b_in, b_out;
186 smallfelem_to_bin32(b_in, in);
187 flip_endian(b_out, b_in, sizeof b_out);
188 return BN_bin2bn(b_out, sizeof b_out, out);
189}
190
191/*-
192 * Field operations
193 * ----------------
194 */
195
196static void smallfelem_one(smallfelem out)
197{
198 out[0] = 1;
199 out[1] = 0;
200 out[2] = 0;
201 out[3] = 0;
202}
203
204static void smallfelem_assign(smallfelem out, const smallfelem in)
205{
206 out[0] = in[0];
207 out[1] = in[1];
208 out[2] = in[2];
209 out[3] = in[3];
210}
211
212static void felem_assign(felem out, const felem in)
213{
214 out[0] = in[0];
215 out[1] = in[1];
216 out[2] = in[2];
217 out[3] = in[3];
218}
219
220/* felem_sum sets out = out + in. */
221static void felem_sum(felem out, const felem in)
222{
223 out[0] += in[0];
224 out[1] += in[1];
225 out[2] += in[2];
226 out[3] += in[3];
227}
228
229/* felem_small_sum sets out = out + in. */
230static void felem_small_sum(felem out, const smallfelem in)
231{
232 out[0] += in[0];
233 out[1] += in[1];
234 out[2] += in[2];
235 out[3] += in[3];
236}
237
238/* felem_scalar sets out = out * scalar */
239static void felem_scalar(felem out, const u64 scalar)
240{
241 out[0] *= scalar;
242 out[1] *= scalar;
243 out[2] *= scalar;
244 out[3] *= scalar;
245}
246
247/* longfelem_scalar sets out = out * scalar */
248static void longfelem_scalar(longfelem out, const u64 scalar)
249{
250 out[0] *= scalar;
251 out[1] *= scalar;
252 out[2] *= scalar;
253 out[3] *= scalar;
254 out[4] *= scalar;
255 out[5] *= scalar;
256 out[6] *= scalar;
257 out[7] *= scalar;
258}
259
260# define two105m41m9 (((limb)1) << 105) - (((limb)1) << 41) - (((limb)1) << 9)
261# define two105 (((limb)1) << 105)
262# define two105m41p9 (((limb)1) << 105) - (((limb)1) << 41) + (((limb)1) << 9)
263
264/* zero105 is 0 mod p */
265static const felem zero105 =
266 { two105m41m9, two105, two105m41p9, two105m41p9 };
267
268/*-
269 * smallfelem_neg sets |out| to |-small|
270 * On exit:
271 * out[i] < out[i] + 2^105
272 */
273static void smallfelem_neg(felem out, const smallfelem small)
274{
275 /* In order to prevent underflow, we subtract from 0 mod p. */
276 out[0] = zero105[0] - small[0];
277 out[1] = zero105[1] - small[1];
278 out[2] = zero105[2] - small[2];
279 out[3] = zero105[3] - small[3];
280}
281
282/*-
283 * felem_diff subtracts |in| from |out|
284 * On entry:
285 * in[i] < 2^104
286 * On exit:
287 * out[i] < out[i] + 2^105
288 */
289static void felem_diff(felem out, const felem in)
290{
291 /*
292 * In order to prevent underflow, we add 0 mod p before subtracting.
293 */
294 out[0] += zero105[0];
295 out[1] += zero105[1];
296 out[2] += zero105[2];
297 out[3] += zero105[3];
298
299 out[0] -= in[0];
300 out[1] -= in[1];
301 out[2] -= in[2];
302 out[3] -= in[3];
303}
304
305# define two107m43m11 (((limb)1) << 107) - (((limb)1) << 43) - (((limb)1) << 11)
306# define two107 (((limb)1) << 107)
307# define two107m43p11 (((limb)1) << 107) - (((limb)1) << 43) + (((limb)1) << 11)
308
309/* zero107 is 0 mod p */
310static const felem zero107 =
311 { two107m43m11, two107, two107m43p11, two107m43p11 };
312
313/*-
314 * An alternative felem_diff for larger inputs |in|
315 * felem_diff_zero107 subtracts |in| from |out|
316 * On entry:
317 * in[i] < 2^106
318 * On exit:
319 * out[i] < out[i] + 2^107
320 */
321static void felem_diff_zero107(felem out, const felem in)
322{
323 /*
324 * In order to prevent underflow, we add 0 mod p before subtracting.
325 */
326 out[0] += zero107[0];
327 out[1] += zero107[1];
328 out[2] += zero107[2];
329 out[3] += zero107[3];
330
331 out[0] -= in[0];
332 out[1] -= in[1];
333 out[2] -= in[2];
334 out[3] -= in[3];
335}
336
337/*-
338 * longfelem_diff subtracts |in| from |out|
339 * On entry:
340 * in[i] < 7*2^67
341 * On exit:
342 * out[i] < out[i] + 2^70 + 2^40
343 */
344static void longfelem_diff(longfelem out, const longfelem in)
345{
346 static const limb two70m8p6 =
347 (((limb) 1) << 70) - (((limb) 1) << 8) + (((limb) 1) << 6);
348 static const limb two70p40 = (((limb) 1) << 70) + (((limb) 1) << 40);
349 static const limb two70 = (((limb) 1) << 70);
350 static const limb two70m40m38p6 =
351 (((limb) 1) << 70) - (((limb) 1) << 40) - (((limb) 1) << 38) +
352 (((limb) 1) << 6);
353 static const limb two70m6 = (((limb) 1) << 70) - (((limb) 1) << 6);
354
355 /* add 0 mod p to avoid underflow */
356 out[0] += two70m8p6;
357 out[1] += two70p40;
358 out[2] += two70;
359 out[3] += two70m40m38p6;
360 out[4] += two70m6;
361 out[5] += two70m6;
362 out[6] += two70m6;
363 out[7] += two70m6;
364
365 /* in[i] < 7*2^67 < 2^70 - 2^40 - 2^38 + 2^6 */
366 out[0] -= in[0];
367 out[1] -= in[1];
368 out[2] -= in[2];
369 out[3] -= in[3];
370 out[4] -= in[4];
371 out[5] -= in[5];
372 out[6] -= in[6];
373 out[7] -= in[7];
374}
375
376# define two64m0 (((limb)1) << 64) - 1
377# define two110p32m0 (((limb)1) << 110) + (((limb)1) << 32) - 1
378# define two64m46 (((limb)1) << 64) - (((limb)1) << 46)
379# define two64m32 (((limb)1) << 64) - (((limb)1) << 32)
380
381/* zero110 is 0 mod p */
382static const felem zero110 = { two64m0, two110p32m0, two64m46, two64m32 };
383
384/*-
385 * felem_shrink converts an felem into a smallfelem. The result isn't quite
386 * minimal as the value may be greater than p.
387 *
388 * On entry:
389 * in[i] < 2^109
390 * On exit:
391 * out[i] < 2^64
392 */
393static void felem_shrink(smallfelem out, const felem in)
394{
395 felem tmp;
396 u64 a, b, mask;
397 s64 high, low;
398 static const u64 kPrime3Test = 0x7fffffff00000001ul; /* 2^63 - 2^32 + 1 */
399
400 /* Carry 2->3 */
401 tmp[3] = zero110[3] + in[3] + ((u64)(in[2] >> 64));
402 /* tmp[3] < 2^110 */
403
404 tmp[2] = zero110[2] + (u64)in[2];
405 tmp[0] = zero110[0] + in[0];
406 tmp[1] = zero110[1] + in[1];
407 /* tmp[0] < 2**110, tmp[1] < 2^111, tmp[2] < 2**65 */
408
409 /*
410 * We perform two partial reductions where we eliminate the high-word of
411 * tmp[3]. We don't update the other words till the end.
412 */
413 a = tmp[3] >> 64; /* a < 2^46 */
414 tmp[3] = (u64)tmp[3];
415 tmp[3] -= a;
416 tmp[3] += ((limb) a) << 32;
417 /* tmp[3] < 2^79 */
418
419 b = a;
420 a = tmp[3] >> 64; /* a < 2^15 */
421 b += a; /* b < 2^46 + 2^15 < 2^47 */
422 tmp[3] = (u64)tmp[3];
423 tmp[3] -= a;
424 tmp[3] += ((limb) a) << 32;
425 /* tmp[3] < 2^64 + 2^47 */
426
427 /*
428 * This adjusts the other two words to complete the two partial
429 * reductions.
430 */
431 tmp[0] += b;
432 tmp[1] -= (((limb) b) << 32);
433
434 /*
435 * In order to make space in tmp[3] for the carry from 2 -> 3, we
436 * conditionally subtract kPrime if tmp[3] is large enough.
437 */
438 high = tmp[3] >> 64;
439 /* As tmp[3] < 2^65, high is either 1 or 0 */
440 high <<= 63;
441 high >>= 63;
442 /*-
443 * high is:
444 * all ones if the high word of tmp[3] is 1
445 * all zeros if the high word of tmp[3] if 0 */
446 low = tmp[3];
447 mask = low >> 63;
448 /*-
449 * mask is:
450 * all ones if the MSB of low is 1
451 * all zeros if the MSB of low if 0 */
452 low &= bottom63bits;
453 low -= kPrime3Test;
454 /* if low was greater than kPrime3Test then the MSB is zero */
455 low = ~low;
456 low >>= 63;
457 /*-
458 * low is:
459 * all ones if low was > kPrime3Test
460 * all zeros if low was <= kPrime3Test */
461 mask = (mask & low) | high;
462 tmp[0] -= mask & kPrime[0];
463 tmp[1] -= mask & kPrime[1];
464 /* kPrime[2] is zero, so omitted */
465 tmp[3] -= mask & kPrime[3];
466 /* tmp[3] < 2**64 - 2**32 + 1 */
467
468 tmp[1] += ((u64)(tmp[0] >> 64));
469 tmp[0] = (u64)tmp[0];
470 tmp[2] += ((u64)(tmp[1] >> 64));
471 tmp[1] = (u64)tmp[1];
472 tmp[3] += ((u64)(tmp[2] >> 64));
473 tmp[2] = (u64)tmp[2];
474 /* tmp[i] < 2^64 */
475
476 out[0] = tmp[0];
477 out[1] = tmp[1];
478 out[2] = tmp[2];
479 out[3] = tmp[3];
480}
481
482/* smallfelem_expand converts a smallfelem to an felem */
483static void smallfelem_expand(felem out, const smallfelem in)
484{
485 out[0] = in[0];
486 out[1] = in[1];
487 out[2] = in[2];
488 out[3] = in[3];
489}
490
491/*-
492 * smallfelem_square sets |out| = |small|^2
493 * On entry:
494 * small[i] < 2^64
495 * On exit:
496 * out[i] < 7 * 2^64 < 2^67
497 */
498static void smallfelem_square(longfelem out, const smallfelem small)
499{
500 limb a;
501 u64 high, low;
502
503 a = ((uint128_t) small[0]) * small[0];
504 low = a;
505 high = a >> 64;
506 out[0] = low;
507 out[1] = high;
508
509 a = ((uint128_t) small[0]) * small[1];
510 low = a;
511 high = a >> 64;
512 out[1] += low;
513 out[1] += low;
514 out[2] = high;
515
516 a = ((uint128_t) small[0]) * small[2];
517 low = a;
518 high = a >> 64;
519 out[2] += low;
520 out[2] *= 2;
521 out[3] = high;
522
523 a = ((uint128_t) small[0]) * small[3];
524 low = a;
525 high = a >> 64;
526 out[3] += low;
527 out[4] = high;
528
529 a = ((uint128_t) small[1]) * small[2];
530 low = a;
531 high = a >> 64;
532 out[3] += low;
533 out[3] *= 2;
534 out[4] += high;
535
536 a = ((uint128_t) small[1]) * small[1];
537 low = a;
538 high = a >> 64;
539 out[2] += low;
540 out[3] += high;
541
542 a = ((uint128_t) small[1]) * small[3];
543 low = a;
544 high = a >> 64;
545 out[4] += low;
546 out[4] *= 2;
547 out[5] = high;
548
549 a = ((uint128_t) small[2]) * small[3];
550 low = a;
551 high = a >> 64;
552 out[5] += low;
553 out[5] *= 2;
554 out[6] = high;
555 out[6] += high;
556
557 a = ((uint128_t) small[2]) * small[2];
558 low = a;
559 high = a >> 64;
560 out[4] += low;
561 out[5] += high;
562
563 a = ((uint128_t) small[3]) * small[3];
564 low = a;
565 high = a >> 64;
566 out[6] += low;
567 out[7] = high;
568}
569
570/*-
571 * felem_square sets |out| = |in|^2
572 * On entry:
573 * in[i] < 2^109
574 * On exit:
575 * out[i] < 7 * 2^64 < 2^67
576 */
577static void felem_square(longfelem out, const felem in)
578{
579 u64 small[4];
580 felem_shrink(small, in);
581 smallfelem_square(out, small);
582}
583
584/*-
585 * smallfelem_mul sets |out| = |small1| * |small2|
586 * On entry:
587 * small1[i] < 2^64
588 * small2[i] < 2^64
589 * On exit:
590 * out[i] < 7 * 2^64 < 2^67
591 */
592static void smallfelem_mul(longfelem out, const smallfelem small1,
593 const smallfelem small2)
594{
595 limb a;
596 u64 high, low;
597
598 a = ((uint128_t) small1[0]) * small2[0];
599 low = a;
600 high = a >> 64;
601 out[0] = low;
602 out[1] = high;
603
604 a = ((uint128_t) small1[0]) * small2[1];
605 low = a;
606 high = a >> 64;
607 out[1] += low;
608 out[2] = high;
609
610 a = ((uint128_t) small1[1]) * small2[0];
611 low = a;
612 high = a >> 64;
613 out[1] += low;
614 out[2] += high;
615
616 a = ((uint128_t) small1[0]) * small2[2];
617 low = a;
618 high = a >> 64;
619 out[2] += low;
620 out[3] = high;
621
622 a = ((uint128_t) small1[1]) * small2[1];
623 low = a;
624 high = a >> 64;
625 out[2] += low;
626 out[3] += high;
627
628 a = ((uint128_t) small1[2]) * small2[0];
629 low = a;
630 high = a >> 64;
631 out[2] += low;
632 out[3] += high;
633
634 a = ((uint128_t) small1[0]) * small2[3];
635 low = a;
636 high = a >> 64;
637 out[3] += low;
638 out[4] = high;
639
640 a = ((uint128_t) small1[1]) * small2[2];
641 low = a;
642 high = a >> 64;
643 out[3] += low;
644 out[4] += high;
645
646 a = ((uint128_t) small1[2]) * small2[1];
647 low = a;
648 high = a >> 64;
649 out[3] += low;
650 out[4] += high;
651
652 a = ((uint128_t) small1[3]) * small2[0];
653 low = a;
654 high = a >> 64;
655 out[3] += low;
656 out[4] += high;
657
658 a = ((uint128_t) small1[1]) * small2[3];
659 low = a;
660 high = a >> 64;
661 out[4] += low;
662 out[5] = high;
663
664 a = ((uint128_t) small1[2]) * small2[2];
665 low = a;
666 high = a >> 64;
667 out[4] += low;
668 out[5] += high;
669
670 a = ((uint128_t) small1[3]) * small2[1];
671 low = a;
672 high = a >> 64;
673 out[4] += low;
674 out[5] += high;
675
676 a = ((uint128_t) small1[2]) * small2[3];
677 low = a;
678 high = a >> 64;
679 out[5] += low;
680 out[6] = high;
681
682 a = ((uint128_t) small1[3]) * small2[2];
683 low = a;
684 high = a >> 64;
685 out[5] += low;
686 out[6] += high;
687
688 a = ((uint128_t) small1[3]) * small2[3];
689 low = a;
690 high = a >> 64;
691 out[6] += low;
692 out[7] = high;
693}
694
695/*-
696 * felem_mul sets |out| = |in1| * |in2|
697 * On entry:
698 * in1[i] < 2^109
699 * in2[i] < 2^109
700 * On exit:
701 * out[i] < 7 * 2^64 < 2^67
702 */
703static void felem_mul(longfelem out, const felem in1, const felem in2)
704{
705 smallfelem small1, small2;
706 felem_shrink(small1, in1);
707 felem_shrink(small2, in2);
708 smallfelem_mul(out, small1, small2);
709}
710
711/*-
712 * felem_small_mul sets |out| = |small1| * |in2|
713 * On entry:
714 * small1[i] < 2^64
715 * in2[i] < 2^109
716 * On exit:
717 * out[i] < 7 * 2^64 < 2^67
718 */
719static void felem_small_mul(longfelem out, const smallfelem small1,
720 const felem in2)
721{
722 smallfelem small2;
723 felem_shrink(small2, in2);
724 smallfelem_mul(out, small1, small2);
725}
726
727# define two100m36m4 (((limb)1) << 100) - (((limb)1) << 36) - (((limb)1) << 4)
728# define two100 (((limb)1) << 100)
729# define two100m36p4 (((limb)1) << 100) - (((limb)1) << 36) + (((limb)1) << 4)
730/* zero100 is 0 mod p */
731static const felem zero100 =
732 { two100m36m4, two100, two100m36p4, two100m36p4 };
733
734/*-
735 * Internal function for the different flavours of felem_reduce.
736 * felem_reduce_ reduces the higher coefficients in[4]-in[7].
737 * On entry:
738 * out[0] >= in[6] + 2^32*in[6] + in[7] + 2^32*in[7]
739 * out[1] >= in[7] + 2^32*in[4]
740 * out[2] >= in[5] + 2^32*in[5]
741 * out[3] >= in[4] + 2^32*in[5] + 2^32*in[6]
742 * On exit:
743 * out[0] <= out[0] + in[4] + 2^32*in[5]
744 * out[1] <= out[1] + in[5] + 2^33*in[6]
745 * out[2] <= out[2] + in[7] + 2*in[6] + 2^33*in[7]
746 * out[3] <= out[3] + 2^32*in[4] + 3*in[7]
747 */
748static void felem_reduce_(felem out, const longfelem in)
749{
750 int128_t c;
751 /* combine common terms from below */
752 c = in[4] + (in[5] << 32);
753 out[0] += c;
754 out[3] -= c;
755
756 c = in[5] - in[7];
757 out[1] += c;
758 out[2] -= c;
759
760 /* the remaining terms */
761 /* 256: [(0,1),(96,-1),(192,-1),(224,1)] */
762 out[1] -= (in[4] << 32);
763 out[3] += (in[4] << 32);
764
765 /* 320: [(32,1),(64,1),(128,-1),(160,-1),(224,-1)] */
766 out[2] -= (in[5] << 32);
767
768 /* 384: [(0,-1),(32,-1),(96,2),(128,2),(224,-1)] */
769 out[0] -= in[6];
770 out[0] -= (in[6] << 32);
771 out[1] += (in[6] << 33);
772 out[2] += (in[6] * 2);
773 out[3] -= (in[6] << 32);
774
775 /* 448: [(0,-1),(32,-1),(64,-1),(128,1),(160,2),(192,3)] */
776 out[0] -= in[7];
777 out[0] -= (in[7] << 32);
778 out[2] += (in[7] << 33);
779 out[3] += (in[7] * 3);
780}
781
782/*-
783 * felem_reduce converts a longfelem into an felem.
784 * To be called directly after felem_square or felem_mul.
785 * On entry:
786 * in[0] < 2^64, in[1] < 3*2^64, in[2] < 5*2^64, in[3] < 7*2^64
787 * in[4] < 7*2^64, in[5] < 5*2^64, in[6] < 3*2^64, in[7] < 2*64
788 * On exit:
789 * out[i] < 2^101
790 */
791static void felem_reduce(felem out, const longfelem in)
792{
793 out[0] = zero100[0] + in[0];
794 out[1] = zero100[1] + in[1];
795 out[2] = zero100[2] + in[2];
796 out[3] = zero100[3] + in[3];
797
798 felem_reduce_(out, in);
799
800 /*-
801 * out[0] > 2^100 - 2^36 - 2^4 - 3*2^64 - 3*2^96 - 2^64 - 2^96 > 0
802 * out[1] > 2^100 - 2^64 - 7*2^96 > 0
803 * out[2] > 2^100 - 2^36 + 2^4 - 5*2^64 - 5*2^96 > 0
804 * out[3] > 2^100 - 2^36 + 2^4 - 7*2^64 - 5*2^96 - 3*2^96 > 0
805 *
806 * out[0] < 2^100 + 2^64 + 7*2^64 + 5*2^96 < 2^101
807 * out[1] < 2^100 + 3*2^64 + 5*2^64 + 3*2^97 < 2^101
808 * out[2] < 2^100 + 5*2^64 + 2^64 + 3*2^65 + 2^97 < 2^101
809 * out[3] < 2^100 + 7*2^64 + 7*2^96 + 3*2^64 < 2^101
810 */
811}
812
813/*-
814 * felem_reduce_zero105 converts a larger longfelem into an felem.
815 * On entry:
816 * in[0] < 2^71
817 * On exit:
818 * out[i] < 2^106
819 */
820static void felem_reduce_zero105(felem out, const longfelem in)
821{
822 out[0] = zero105[0] + in[0];
823 out[1] = zero105[1] + in[1];
824 out[2] = zero105[2] + in[2];
825 out[3] = zero105[3] + in[3];
826
827 felem_reduce_(out, in);
828
829 /*-
830 * out[0] > 2^105 - 2^41 - 2^9 - 2^71 - 2^103 - 2^71 - 2^103 > 0
831 * out[1] > 2^105 - 2^71 - 2^103 > 0
832 * out[2] > 2^105 - 2^41 + 2^9 - 2^71 - 2^103 > 0
833 * out[3] > 2^105 - 2^41 + 2^9 - 2^71 - 2^103 - 2^103 > 0
834 *
835 * out[0] < 2^105 + 2^71 + 2^71 + 2^103 < 2^106
836 * out[1] < 2^105 + 2^71 + 2^71 + 2^103 < 2^106
837 * out[2] < 2^105 + 2^71 + 2^71 + 2^71 + 2^103 < 2^106
838 * out[3] < 2^105 + 2^71 + 2^103 + 2^71 < 2^106
839 */
840}
841
842/*
843 * subtract_u64 sets *result = *result - v and *carry to one if the
844 * subtraction underflowed.
845 */
846static void subtract_u64(u64 *result, u64 *carry, u64 v)
847{
848 uint128_t r = *result;
849 r -= v;
850 *carry = (r >> 64) & 1;
851 *result = (u64)r;
852}
853
854/*
855 * felem_contract converts |in| to its unique, minimal representation. On
856 * entry: in[i] < 2^109
857 */
858static void felem_contract(smallfelem out, const felem in)
859{
860 unsigned i;
861 u64 all_equal_so_far = 0, result = 0, carry;
862
863 felem_shrink(out, in);
864 /* small is minimal except that the value might be > p */
865
866 all_equal_so_far--;
867 /*
868 * We are doing a constant time test if out >= kPrime. We need to compare
869 * each u64, from most-significant to least significant. For each one, if
870 * all words so far have been equal (m is all ones) then a non-equal
871 * result is the answer. Otherwise we continue.
872 */
873 for (i = 3; i < 4; i--) {
874 u64 equal;
875 uint128_t a = ((uint128_t) kPrime[i]) - out[i];
876 /*
877 * if out[i] > kPrime[i] then a will underflow and the high 64-bits
878 * will all be set.
879 */
880 result |= all_equal_so_far & ((u64)(a >> 64));
881
882 /*
883 * if kPrime[i] == out[i] then |equal| will be all zeros and the
884 * decrement will make it all ones.
885 */
886 equal = kPrime[i] ^ out[i];
887 equal--;
888 equal &= equal << 32;
889 equal &= equal << 16;
890 equal &= equal << 8;
891 equal &= equal << 4;
892 equal &= equal << 2;
893 equal &= equal << 1;
894 equal = ((s64) equal) >> 63;
895
896 all_equal_so_far &= equal;
897 }
898
899 /*
900 * if all_equal_so_far is still all ones then the two values are equal
901 * and so out >= kPrime is true.
902 */
903 result |= all_equal_so_far;
904
905 /* if out >= kPrime then we subtract kPrime. */
906 subtract_u64(&out[0], &carry, result & kPrime[0]);
907 subtract_u64(&out[1], &carry, carry);
908 subtract_u64(&out[2], &carry, carry);
909 subtract_u64(&out[3], &carry, carry);
910
911 subtract_u64(&out[1], &carry, result & kPrime[1]);
912 subtract_u64(&out[2], &carry, carry);
913 subtract_u64(&out[3], &carry, carry);
914
915 subtract_u64(&out[2], &carry, result & kPrime[2]);
916 subtract_u64(&out[3], &carry, carry);
917
918 subtract_u64(&out[3], &carry, result & kPrime[3]);
919}
920
921static void smallfelem_square_contract(smallfelem out, const smallfelem in)
922{
923 longfelem longtmp;
924 felem tmp;
925
926 smallfelem_square(longtmp, in);
927 felem_reduce(tmp, longtmp);
928 felem_contract(out, tmp);
929}
930
931static void smallfelem_mul_contract(smallfelem out, const smallfelem in1,
932 const smallfelem in2)
933{
934 longfelem longtmp;
935 felem tmp;
936
937 smallfelem_mul(longtmp, in1, in2);
938 felem_reduce(tmp, longtmp);
939 felem_contract(out, tmp);
940}
941
942/*-
943 * felem_is_zero returns a limb with all bits set if |in| == 0 (mod p) and 0
944 * otherwise.
945 * On entry:
946 * small[i] < 2^64
947 */
948static limb smallfelem_is_zero(const smallfelem small)
949{
950 limb result;
951 u64 is_p;
952
953 u64 is_zero = small[0] | small[1] | small[2] | small[3];
954 is_zero--;
955 is_zero &= is_zero << 32;
956 is_zero &= is_zero << 16;
957 is_zero &= is_zero << 8;
958 is_zero &= is_zero << 4;
959 is_zero &= is_zero << 2;
960 is_zero &= is_zero << 1;
961 is_zero = ((s64) is_zero) >> 63;
962
963 is_p = (small[0] ^ kPrime[0]) |
964 (small[1] ^ kPrime[1]) |
965 (small[2] ^ kPrime[2]) | (small[3] ^ kPrime[3]);
966 is_p--;
967 is_p &= is_p << 32;
968 is_p &= is_p << 16;
969 is_p &= is_p << 8;
970 is_p &= is_p << 4;
971 is_p &= is_p << 2;
972 is_p &= is_p << 1;
973 is_p = ((s64) is_p) >> 63;
974
975 is_zero |= is_p;
976
977 result = is_zero;
978 result |= ((limb) is_zero) << 64;
979 return result;
980}
981
982static int smallfelem_is_zero_int(const smallfelem small)
983{
984 return (int)(smallfelem_is_zero(small) & ((limb) 1));
985}
986
987/*-
988 * felem_inv calculates |out| = |in|^{-1}
989 *
990 * Based on Fermat's Little Theorem:
991 * a^p = a (mod p)
992 * a^{p-1} = 1 (mod p)
993 * a^{p-2} = a^{-1} (mod p)
994 */
995static void felem_inv(felem out, const felem in)
996{
997 felem ftmp, ftmp2;
998 /* each e_I will hold |in|^{2^I - 1} */
999 felem e2, e4, e8, e16, e32, e64;
1000 longfelem tmp;
1001 unsigned i;
1002
1003 felem_square(tmp, in);
1004 felem_reduce(ftmp, tmp); /* 2^1 */
1005 felem_mul(tmp, in, ftmp);
1006 felem_reduce(ftmp, tmp); /* 2^2 - 2^0 */
1007 felem_assign(e2, ftmp);
1008 felem_square(tmp, ftmp);
1009 felem_reduce(ftmp, tmp); /* 2^3 - 2^1 */
1010 felem_square(tmp, ftmp);
1011 felem_reduce(ftmp, tmp); /* 2^4 - 2^2 */
1012 felem_mul(tmp, ftmp, e2);
1013 felem_reduce(ftmp, tmp); /* 2^4 - 2^0 */
1014 felem_assign(e4, ftmp);
1015 felem_square(tmp, ftmp);
1016 felem_reduce(ftmp, tmp); /* 2^5 - 2^1 */
1017 felem_square(tmp, ftmp);
1018 felem_reduce(ftmp, tmp); /* 2^6 - 2^2 */
1019 felem_square(tmp, ftmp);
1020 felem_reduce(ftmp, tmp); /* 2^7 - 2^3 */
1021 felem_square(tmp, ftmp);
1022 felem_reduce(ftmp, tmp); /* 2^8 - 2^4 */
1023 felem_mul(tmp, ftmp, e4);
1024 felem_reduce(ftmp, tmp); /* 2^8 - 2^0 */
1025 felem_assign(e8, ftmp);
1026 for (i = 0; i < 8; i++) {
1027 felem_square(tmp, ftmp);
1028 felem_reduce(ftmp, tmp);
1029 } /* 2^16 - 2^8 */
1030 felem_mul(tmp, ftmp, e8);
1031 felem_reduce(ftmp, tmp); /* 2^16 - 2^0 */
1032 felem_assign(e16, ftmp);
1033 for (i = 0; i < 16; i++) {
1034 felem_square(tmp, ftmp);
1035 felem_reduce(ftmp, tmp);
1036 } /* 2^32 - 2^16 */
1037 felem_mul(tmp, ftmp, e16);
1038 felem_reduce(ftmp, tmp); /* 2^32 - 2^0 */
1039 felem_assign(e32, ftmp);
1040 for (i = 0; i < 32; i++) {
1041 felem_square(tmp, ftmp);
1042 felem_reduce(ftmp, tmp);
1043 } /* 2^64 - 2^32 */
1044 felem_assign(e64, ftmp);
1045 felem_mul(tmp, ftmp, in);
1046 felem_reduce(ftmp, tmp); /* 2^64 - 2^32 + 2^0 */
1047 for (i = 0; i < 192; i++) {
1048 felem_square(tmp, ftmp);
1049 felem_reduce(ftmp, tmp);
1050 } /* 2^256 - 2^224 + 2^192 */
1051
1052 felem_mul(tmp, e64, e32);
1053 felem_reduce(ftmp2, tmp); /* 2^64 - 2^0 */
1054 for (i = 0; i < 16; i++) {
1055 felem_square(tmp, ftmp2);
1056 felem_reduce(ftmp2, tmp);
1057 } /* 2^80 - 2^16 */
1058 felem_mul(tmp, ftmp2, e16);
1059 felem_reduce(ftmp2, tmp); /* 2^80 - 2^0 */
1060 for (i = 0; i < 8; i++) {
1061 felem_square(tmp, ftmp2);
1062 felem_reduce(ftmp2, tmp);
1063 } /* 2^88 - 2^8 */
1064 felem_mul(tmp, ftmp2, e8);
1065 felem_reduce(ftmp2, tmp); /* 2^88 - 2^0 */
1066 for (i = 0; i < 4; i++) {
1067 felem_square(tmp, ftmp2);
1068 felem_reduce(ftmp2, tmp);
1069 } /* 2^92 - 2^4 */
1070 felem_mul(tmp, ftmp2, e4);
1071 felem_reduce(ftmp2, tmp); /* 2^92 - 2^0 */
1072 felem_square(tmp, ftmp2);
1073 felem_reduce(ftmp2, tmp); /* 2^93 - 2^1 */
1074 felem_square(tmp, ftmp2);
1075 felem_reduce(ftmp2, tmp); /* 2^94 - 2^2 */
1076 felem_mul(tmp, ftmp2, e2);
1077 felem_reduce(ftmp2, tmp); /* 2^94 - 2^0 */
1078 felem_square(tmp, ftmp2);
1079 felem_reduce(ftmp2, tmp); /* 2^95 - 2^1 */
1080 felem_square(tmp, ftmp2);
1081 felem_reduce(ftmp2, tmp); /* 2^96 - 2^2 */
1082 felem_mul(tmp, ftmp2, in);
1083 felem_reduce(ftmp2, tmp); /* 2^96 - 3 */
1084
1085 felem_mul(tmp, ftmp2, ftmp);
1086 felem_reduce(out, tmp); /* 2^256 - 2^224 + 2^192 + 2^96 - 3 */
1087}
1088
1089static void smallfelem_inv_contract(smallfelem out, const smallfelem in)
1090{
1091 felem tmp;
1092
1093 smallfelem_expand(tmp, in);
1094 felem_inv(tmp, tmp);
1095 felem_contract(out, tmp);
1096}
1097
1098/*-
1099 * Group operations
1100 * ----------------
1101 *
1102 * Building on top of the field operations we have the operations on the
1103 * elliptic curve group itself. Points on the curve are represented in Jacobian
1104 * coordinates
1105 */
1106
1107/*-
1108 * point_double calculates 2*(x_in, y_in, z_in)
1109 *
1110 * The method is taken from:
1111 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
1112 *
1113 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed.
1114 * while x_out == y_in is not (maybe this works, but it's not tested).
1115 */
1116static void
1117point_double(felem x_out, felem y_out, felem z_out,
1118 const felem x_in, const felem y_in, const felem z_in)
1119{
1120 longfelem tmp, tmp2;
1121 felem delta, gamma, beta, alpha, ftmp, ftmp2;
1122 smallfelem small1, small2;
1123
1124 felem_assign(ftmp, x_in);
1125 /* ftmp[i] < 2^106 */
1126 felem_assign(ftmp2, x_in);
1127 /* ftmp2[i] < 2^106 */
1128
1129 /* delta = z^2 */
1130 felem_square(tmp, z_in);
1131 felem_reduce(delta, tmp);
1132 /* delta[i] < 2^101 */
1133
1134 /* gamma = y^2 */
1135 felem_square(tmp, y_in);
1136 felem_reduce(gamma, tmp);
1137 /* gamma[i] < 2^101 */
1138 felem_shrink(small1, gamma);
1139
1140 /* beta = x*gamma */
1141 felem_small_mul(tmp, small1, x_in);
1142 felem_reduce(beta, tmp);
1143 /* beta[i] < 2^101 */
1144
1145 /* alpha = 3*(x-delta)*(x+delta) */
1146 felem_diff(ftmp, delta);
1147 /* ftmp[i] < 2^105 + 2^106 < 2^107 */
1148 felem_sum(ftmp2, delta);
1149 /* ftmp2[i] < 2^105 + 2^106 < 2^107 */
1150 felem_scalar(ftmp2, 3);
1151 /* ftmp2[i] < 3 * 2^107 < 2^109 */
1152 felem_mul(tmp, ftmp, ftmp2);
1153 felem_reduce(alpha, tmp);
1154 /* alpha[i] < 2^101 */
1155 felem_shrink(small2, alpha);
1156
1157 /* x' = alpha^2 - 8*beta */
1158 smallfelem_square(tmp, small2);
1159 felem_reduce(x_out, tmp);
1160 felem_assign(ftmp, beta);
1161 felem_scalar(ftmp, 8);
1162 /* ftmp[i] < 8 * 2^101 = 2^104 */
1163 felem_diff(x_out, ftmp);
1164 /* x_out[i] < 2^105 + 2^101 < 2^106 */
1165
1166 /* z' = (y + z)^2 - gamma - delta */
1167 felem_sum(delta, gamma);
1168 /* delta[i] < 2^101 + 2^101 = 2^102 */
1169 felem_assign(ftmp, y_in);
1170 felem_sum(ftmp, z_in);
1171 /* ftmp[i] < 2^106 + 2^106 = 2^107 */
1172 felem_square(tmp, ftmp);
1173 felem_reduce(z_out, tmp);
1174 felem_diff(z_out, delta);
1175 /* z_out[i] < 2^105 + 2^101 < 2^106 */
1176
1177 /* y' = alpha*(4*beta - x') - 8*gamma^2 */
1178 felem_scalar(beta, 4);
1179 /* beta[i] < 4 * 2^101 = 2^103 */
1180 felem_diff_zero107(beta, x_out);
1181 /* beta[i] < 2^107 + 2^103 < 2^108 */
1182 felem_small_mul(tmp, small2, beta);
1183 /* tmp[i] < 7 * 2^64 < 2^67 */
1184 smallfelem_square(tmp2, small1);
1185 /* tmp2[i] < 7 * 2^64 */
1186 longfelem_scalar(tmp2, 8);
1187 /* tmp2[i] < 8 * 7 * 2^64 = 7 * 2^67 */
1188 longfelem_diff(tmp, tmp2);
1189 /* tmp[i] < 2^67 + 2^70 + 2^40 < 2^71 */
1190 felem_reduce_zero105(y_out, tmp);
1191 /* y_out[i] < 2^106 */
1192}
1193
1194/*
1195 * point_double_small is the same as point_double, except that it operates on
1196 * smallfelems
1197 */
1198static void
1199point_double_small(smallfelem x_out, smallfelem y_out, smallfelem z_out,
1200 const smallfelem x_in, const smallfelem y_in,
1201 const smallfelem z_in)
1202{
1203 felem felem_x_out, felem_y_out, felem_z_out;
1204 felem felem_x_in, felem_y_in, felem_z_in;
1205
1206 smallfelem_expand(felem_x_in, x_in);
1207 smallfelem_expand(felem_y_in, y_in);
1208 smallfelem_expand(felem_z_in, z_in);
1209 point_double(felem_x_out, felem_y_out, felem_z_out,
1210 felem_x_in, felem_y_in, felem_z_in);
1211 felem_shrink(x_out, felem_x_out);
1212 felem_shrink(y_out, felem_y_out);
1213 felem_shrink(z_out, felem_z_out);
1214}
1215
1216/* copy_conditional copies in to out iff mask is all ones. */
1217static void copy_conditional(felem out, const felem in, limb mask)
1218{
1219 unsigned i;
1220 for (i = 0; i < NLIMBS; ++i) {
1221 const limb tmp = mask & (in[i] ^ out[i]);
1222 out[i] ^= tmp;
1223 }
1224}
1225
1226/* copy_small_conditional copies in to out iff mask is all ones. */
1227static void copy_small_conditional(felem out, const smallfelem in, limb mask)
1228{
1229 unsigned i;
1230 const u64 mask64 = mask;
1231 for (i = 0; i < NLIMBS; ++i) {
1232 out[i] = ((limb) (in[i] & mask64)) | (out[i] & ~mask);
1233 }
1234}
1235
1236/*-
1237 * point_add calculates (x1, y1, z1) + (x2, y2, z2)
1238 *
1239 * The method is taken from:
1240 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl,
1241 * adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity).
1242 *
1243 * This function includes a branch for checking whether the two input points
1244 * are equal, (while not equal to the point at infinity). This case never
1245 * happens during single point multiplication, so there is no timing leak for
1246 * ECDH or ECDSA signing.
1247 */
1248static void point_add(felem x3, felem y3, felem z3,
1249 const felem x1, const felem y1, const felem z1,
1250 const int mixed, const smallfelem x2,
1251 const smallfelem y2, const smallfelem z2)
1252{
1253 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, ftmp6, x_out, y_out, z_out;
1254 longfelem tmp, tmp2;
1255 smallfelem small1, small2, small3, small4, small5;
1256 limb x_equal, y_equal, z1_is_zero, z2_is_zero;
1257
1258 felem_shrink(small3, z1);
1259
1260 z1_is_zero = smallfelem_is_zero(small3);
1261 z2_is_zero = smallfelem_is_zero(z2);
1262
1263 /* ftmp = z1z1 = z1**2 */
1264 smallfelem_square(tmp, small3);
1265 felem_reduce(ftmp, tmp);
1266 /* ftmp[i] < 2^101 */
1267 felem_shrink(small1, ftmp);
1268
1269 if (!mixed) {
1270 /* ftmp2 = z2z2 = z2**2 */
1271 smallfelem_square(tmp, z2);
1272 felem_reduce(ftmp2, tmp);
1273 /* ftmp2[i] < 2^101 */
1274 felem_shrink(small2, ftmp2);
1275
1276 felem_shrink(small5, x1);
1277
1278 /* u1 = ftmp3 = x1*z2z2 */
1279 smallfelem_mul(tmp, small5, small2);
1280 felem_reduce(ftmp3, tmp);
1281 /* ftmp3[i] < 2^101 */
1282
1283 /* ftmp5 = z1 + z2 */
1284 felem_assign(ftmp5, z1);
1285 felem_small_sum(ftmp5, z2);
1286 /* ftmp5[i] < 2^107 */
1287
1288 /* ftmp5 = (z1 + z2)**2 - (z1z1 + z2z2) = 2z1z2 */
1289 felem_square(tmp, ftmp5);
1290 felem_reduce(ftmp5, tmp);
1291 /* ftmp2 = z2z2 + z1z1 */
1292 felem_sum(ftmp2, ftmp);
1293 /* ftmp2[i] < 2^101 + 2^101 = 2^102 */
1294 felem_diff(ftmp5, ftmp2);
1295 /* ftmp5[i] < 2^105 + 2^101 < 2^106 */
1296
1297 /* ftmp2 = z2 * z2z2 */
1298 smallfelem_mul(tmp, small2, z2);
1299 felem_reduce(ftmp2, tmp);
1300
1301 /* s1 = ftmp2 = y1 * z2**3 */
1302 felem_mul(tmp, y1, ftmp2);
1303 felem_reduce(ftmp6, tmp);
1304 /* ftmp6[i] < 2^101 */
1305 } else {
1306 /*
1307 * We'll assume z2 = 1 (special case z2 = 0 is handled later)
1308 */
1309
1310 /* u1 = ftmp3 = x1*z2z2 */
1311 felem_assign(ftmp3, x1);
1312 /* ftmp3[i] < 2^106 */
1313
1314 /* ftmp5 = 2z1z2 */
1315 felem_assign(ftmp5, z1);
1316 felem_scalar(ftmp5, 2);
1317 /* ftmp5[i] < 2*2^106 = 2^107 */
1318
1319 /* s1 = ftmp2 = y1 * z2**3 */
1320 felem_assign(ftmp6, y1);
1321 /* ftmp6[i] < 2^106 */
1322 }
1323
1324 /* u2 = x2*z1z1 */
1325 smallfelem_mul(tmp, x2, small1);
1326 felem_reduce(ftmp4, tmp);
1327
1328 /* h = ftmp4 = u2 - u1 */
1329 felem_diff_zero107(ftmp4, ftmp3);
1330 /* ftmp4[i] < 2^107 + 2^101 < 2^108 */
1331 felem_shrink(small4, ftmp4);
1332
1333 x_equal = smallfelem_is_zero(small4);
1334
1335 /* z_out = ftmp5 * h */
1336 felem_small_mul(tmp, small4, ftmp5);
1337 felem_reduce(z_out, tmp);
1338 /* z_out[i] < 2^101 */
1339
1340 /* ftmp = z1 * z1z1 */
1341 smallfelem_mul(tmp, small1, small3);
1342 felem_reduce(ftmp, tmp);
1343
1344 /* s2 = tmp = y2 * z1**3 */
1345 felem_small_mul(tmp, y2, ftmp);
1346 felem_reduce(ftmp5, tmp);
1347
1348 /* r = ftmp5 = (s2 - s1)*2 */
1349 felem_diff_zero107(ftmp5, ftmp6);
1350 /* ftmp5[i] < 2^107 + 2^107 = 2^108 */
1351 felem_scalar(ftmp5, 2);
1352 /* ftmp5[i] < 2^109 */
1353 felem_shrink(small1, ftmp5);
1354 y_equal = smallfelem_is_zero(small1);
1355
1356 if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) {
1357 point_double(x3, y3, z3, x1, y1, z1);
1358 return;
1359 }
1360
1361 /* I = ftmp = (2h)**2 */
1362 felem_assign(ftmp, ftmp4);
1363 felem_scalar(ftmp, 2);
1364 /* ftmp[i] < 2*2^108 = 2^109 */
1365 felem_square(tmp, ftmp);
1366 felem_reduce(ftmp, tmp);
1367
1368 /* J = ftmp2 = h * I */
1369 felem_mul(tmp, ftmp4, ftmp);
1370 felem_reduce(ftmp2, tmp);
1371
1372 /* V = ftmp4 = U1 * I */
1373 felem_mul(tmp, ftmp3, ftmp);
1374 felem_reduce(ftmp4, tmp);
1375
1376 /* x_out = r**2 - J - 2V */
1377 smallfelem_square(tmp, small1);
1378 felem_reduce(x_out, tmp);
1379 felem_assign(ftmp3, ftmp4);
1380 felem_scalar(ftmp4, 2);
1381 felem_sum(ftmp4, ftmp2);
1382 /* ftmp4[i] < 2*2^101 + 2^101 < 2^103 */
1383 felem_diff(x_out, ftmp4);
1384 /* x_out[i] < 2^105 + 2^101 */
1385
1386 /* y_out = r(V-x_out) - 2 * s1 * J */
1387 felem_diff_zero107(ftmp3, x_out);
1388 /* ftmp3[i] < 2^107 + 2^101 < 2^108 */
1389 felem_small_mul(tmp, small1, ftmp3);
1390 felem_mul(tmp2, ftmp6, ftmp2);
1391 longfelem_scalar(tmp2, 2);
1392 /* tmp2[i] < 2*2^67 = 2^68 */
1393 longfelem_diff(tmp, tmp2);
1394 /* tmp[i] < 2^67 + 2^70 + 2^40 < 2^71 */
1395 felem_reduce_zero105(y_out, tmp);
1396 /* y_out[i] < 2^106 */
1397
1398 copy_small_conditional(x_out, x2, z1_is_zero);
1399 copy_conditional(x_out, x1, z2_is_zero);
1400 copy_small_conditional(y_out, y2, z1_is_zero);
1401 copy_conditional(y_out, y1, z2_is_zero);
1402 copy_small_conditional(z_out, z2, z1_is_zero);
1403 copy_conditional(z_out, z1, z2_is_zero);
1404 felem_assign(x3, x_out);
1405 felem_assign(y3, y_out);
1406 felem_assign(z3, z_out);
1407}
1408
1409/*
1410 * point_add_small is the same as point_add, except that it operates on
1411 * smallfelems
1412 */
1413static void point_add_small(smallfelem x3, smallfelem y3, smallfelem z3,
1414 smallfelem x1, smallfelem y1, smallfelem z1,
1415 smallfelem x2, smallfelem y2, smallfelem z2)
1416{
1417 felem felem_x3, felem_y3, felem_z3;
1418 felem felem_x1, felem_y1, felem_z1;
1419 smallfelem_expand(felem_x1, x1);
1420 smallfelem_expand(felem_y1, y1);
1421 smallfelem_expand(felem_z1, z1);
1422 point_add(felem_x3, felem_y3, felem_z3, felem_x1, felem_y1, felem_z1, 0,
1423 x2, y2, z2);
1424 felem_shrink(x3, felem_x3);
1425 felem_shrink(y3, felem_y3);
1426 felem_shrink(z3, felem_z3);
1427}
1428
1429/*-
1430 * Base point pre computation
1431 * --------------------------
1432 *
1433 * Two different sorts of precomputed tables are used in the following code.
1434 * Each contain various points on the curve, where each point is three field
1435 * elements (x, y, z).
1436 *
1437 * For the base point table, z is usually 1 (0 for the point at infinity).
1438 * This table has 2 * 16 elements, starting with the following:
1439 * index | bits | point
1440 * ------+---------+------------------------------
1441 * 0 | 0 0 0 0 | 0G
1442 * 1 | 0 0 0 1 | 1G
1443 * 2 | 0 0 1 0 | 2^64G
1444 * 3 | 0 0 1 1 | (2^64 + 1)G
1445 * 4 | 0 1 0 0 | 2^128G
1446 * 5 | 0 1 0 1 | (2^128 + 1)G
1447 * 6 | 0 1 1 0 | (2^128 + 2^64)G
1448 * 7 | 0 1 1 1 | (2^128 + 2^64 + 1)G
1449 * 8 | 1 0 0 0 | 2^192G
1450 * 9 | 1 0 0 1 | (2^192 + 1)G
1451 * 10 | 1 0 1 0 | (2^192 + 2^64)G
1452 * 11 | 1 0 1 1 | (2^192 + 2^64 + 1)G
1453 * 12 | 1 1 0 0 | (2^192 + 2^128)G
1454 * 13 | 1 1 0 1 | (2^192 + 2^128 + 1)G
1455 * 14 | 1 1 1 0 | (2^192 + 2^128 + 2^64)G
1456 * 15 | 1 1 1 1 | (2^192 + 2^128 + 2^64 + 1)G
1457 * followed by a copy of this with each element multiplied by 2^32.
1458 *
1459 * The reason for this is so that we can clock bits into four different
1460 * locations when doing simple scalar multiplies against the base point,
1461 * and then another four locations using the second 16 elements.
1462 *
1463 * Tables for other points have table[i] = iG for i in 0 .. 16. */
1464
1465/* gmul is the table of precomputed base points */
1466static const smallfelem gmul[2][16][3] = {
1467 {{{0, 0, 0, 0},
1468 {0, 0, 0, 0},
1469 {0, 0, 0, 0}},
1470 {{0xf4a13945d898c296, 0x77037d812deb33a0, 0xf8bce6e563a440f2,
1471 0x6b17d1f2e12c4247},
1472 {0xcbb6406837bf51f5, 0x2bce33576b315ece, 0x8ee7eb4a7c0f9e16,
1473 0x4fe342e2fe1a7f9b},
1474 {1, 0, 0, 0}},
1475 {{0x90e75cb48e14db63, 0x29493baaad651f7e, 0x8492592e326e25de,
1476 0x0fa822bc2811aaa5},
1477 {0xe41124545f462ee7, 0x34b1a65050fe82f5, 0x6f4ad4bcb3df188b,
1478 0xbff44ae8f5dba80d},
1479 {1, 0, 0, 0}},
1480 {{0x93391ce2097992af, 0xe96c98fd0d35f1fa, 0xb257c0de95e02789,
1481 0x300a4bbc89d6726f},
1482 {0xaa54a291c08127a0, 0x5bb1eeada9d806a5, 0x7f1ddb25ff1e3c6f,
1483 0x72aac7e0d09b4644},
1484 {1, 0, 0, 0}},
1485 {{0x57c84fc9d789bd85, 0xfc35ff7dc297eac3, 0xfb982fd588c6766e,
1486 0x447d739beedb5e67},
1487 {0x0c7e33c972e25b32, 0x3d349b95a7fae500, 0xe12e9d953a4aaff7,
1488 0x2d4825ab834131ee},
1489 {1, 0, 0, 0}},
1490 {{0x13949c932a1d367f, 0xef7fbd2b1a0a11b7, 0xddc6068bb91dfc60,
1491 0xef9519328a9c72ff},
1492 {0x196035a77376d8a8, 0x23183b0895ca1740, 0xc1ee9807022c219c,
1493 0x611e9fc37dbb2c9b},
1494 {1, 0, 0, 0}},
1495 {{0xcae2b1920b57f4bc, 0x2936df5ec6c9bc36, 0x7dea6482e11238bf,
1496 0x550663797b51f5d8},
1497 {0x44ffe216348a964c, 0x9fb3d576dbdefbe1, 0x0afa40018d9d50e5,
1498 0x157164848aecb851},
1499 {1, 0, 0, 0}},
1500 {{0xe48ecafffc5cde01, 0x7ccd84e70d715f26, 0xa2e8f483f43e4391,
1501 0xeb5d7745b21141ea},
1502 {0xcac917e2731a3479, 0x85f22cfe2844b645, 0x0990e6a158006cee,
1503 0xeafd72ebdbecc17b},
1504 {1, 0, 0, 0}},
1505 {{0x6cf20ffb313728be, 0x96439591a3c6b94a, 0x2736ff8344315fc5,
1506 0xa6d39677a7849276},
1507 {0xf2bab833c357f5f4, 0x824a920c2284059b, 0x66b8babd2d27ecdf,
1508 0x674f84749b0b8816},
1509 {1, 0, 0, 0}},
1510 {{0x2df48c04677c8a3e, 0x74e02f080203a56b, 0x31855f7db8c7fedb,
1511 0x4e769e7672c9ddad},
1512 {0xa4c36165b824bbb0, 0xfb9ae16f3b9122a5, 0x1ec0057206947281,
1513 0x42b99082de830663},
1514 {1, 0, 0, 0}},
1515 {{0x6ef95150dda868b9, 0xd1f89e799c0ce131, 0x7fdc1ca008a1c478,
1516 0x78878ef61c6ce04d},
1517 {0x9c62b9121fe0d976, 0x6ace570ebde08d4f, 0xde53142c12309def,
1518 0xb6cb3f5d7b72c321},
1519 {1, 0, 0, 0}},
1520 {{0x7f991ed2c31a3573, 0x5b82dd5bd54fb496, 0x595c5220812ffcae,
1521 0x0c88bc4d716b1287},
1522 {0x3a57bf635f48aca8, 0x7c8181f4df2564f3, 0x18d1b5b39c04e6aa,
1523 0xdd5ddea3f3901dc6},
1524 {1, 0, 0, 0}},
1525 {{0xe96a79fb3e72ad0c, 0x43a0a28c42ba792f, 0xefe0a423083e49f3,
1526 0x68f344af6b317466},
1527 {0xcdfe17db3fb24d4a, 0x668bfc2271f5c626, 0x604ed93c24d67ff3,
1528 0x31b9c405f8540a20},
1529 {1, 0, 0, 0}},
1530 {{0xd36b4789a2582e7f, 0x0d1a10144ec39c28, 0x663c62c3edbad7a0,
1531 0x4052bf4b6f461db9},
1532 {0x235a27c3188d25eb, 0xe724f33999bfcc5b, 0x862be6bd71d70cc8,
1533 0xfecf4d5190b0fc61},
1534 {1, 0, 0, 0}},
1535 {{0x74346c10a1d4cfac, 0xafdf5cc08526a7a4, 0x123202a8f62bff7a,
1536 0x1eddbae2c802e41a},
1537 {0x8fa0af2dd603f844, 0x36e06b7e4c701917, 0x0c45f45273db33a0,
1538 0x43104d86560ebcfc},
1539 {1, 0, 0, 0}},
1540 {{0x9615b5110d1d78e5, 0x66b0de3225c4744b, 0x0a4a46fb6aaf363a,
1541 0xb48e26b484f7a21c},
1542 {0x06ebb0f621a01b2d, 0xc004e4048b7b0f98, 0x64131bcdfed6f668,
1543 0xfac015404d4d3dab},
1544 {1, 0, 0, 0}}},
1545 {{{0, 0, 0, 0},
1546 {0, 0, 0, 0},
1547 {0, 0, 0, 0}},
1548 {{0x3a5a9e22185a5943, 0x1ab919365c65dfb6, 0x21656b32262c71da,
1549 0x7fe36b40af22af89},
1550 {0xd50d152c699ca101, 0x74b3d5867b8af212, 0x9f09f40407dca6f1,
1551 0xe697d45825b63624},
1552 {1, 0, 0, 0}},
1553 {{0xa84aa9397512218e, 0xe9a521b074ca0141, 0x57880b3a18a2e902,
1554 0x4a5b506612a677a6},
1555 {0x0beada7a4c4f3840, 0x626db15419e26d9d, 0xc42604fbe1627d40,
1556 0xeb13461ceac089f1},
1557 {1, 0, 0, 0}},
1558 {{0xf9faed0927a43281, 0x5e52c4144103ecbc, 0xc342967aa815c857,
1559 0x0781b8291c6a220a},
1560 {0x5a8343ceeac55f80, 0x88f80eeee54a05e3, 0x97b2a14f12916434,
1561 0x690cde8df0151593},
1562 {1, 0, 0, 0}},
1563 {{0xaee9c75df7f82f2a, 0x9e4c35874afdf43a, 0xf5622df437371326,
1564 0x8a535f566ec73617},
1565 {0xc5f9a0ac223094b7, 0xcde533864c8c7669, 0x37e02819085a92bf,
1566 0x0455c08468b08bd7},
1567 {1, 0, 0, 0}},
1568 {{0x0c0a6e2c9477b5d9, 0xf9a4bf62876dc444, 0x5050a949b6cdc279,
1569 0x06bada7ab77f8276},
1570 {0xc8b4aed1ea48dac9, 0xdebd8a4b7ea1070f, 0x427d49101366eb70,
1571 0x5b476dfd0e6cb18a},
1572 {1, 0, 0, 0}},
1573 {{0x7c5c3e44278c340a, 0x4d54606812d66f3b, 0x29a751b1ae23c5d8,
1574 0x3e29864e8a2ec908},
1575 {0x142d2a6626dbb850, 0xad1744c4765bd780, 0x1f150e68e322d1ed,
1576 0x239b90ea3dc31e7e},
1577 {1, 0, 0, 0}},
1578 {{0x78c416527a53322a, 0x305dde6709776f8e, 0xdbcab759f8862ed4,
1579 0x820f4dd949f72ff7},
1580 {0x6cc544a62b5debd4, 0x75be5d937b4e8cc4, 0x1b481b1b215c14d3,
1581 0x140406ec783a05ec},
1582 {1, 0, 0, 0}},
1583 {{0x6a703f10e895df07, 0xfd75f3fa01876bd8, 0xeb5b06e70ce08ffe,
1584 0x68f6b8542783dfee},
1585 {0x90c76f8a78712655, 0xcf5293d2f310bf7f, 0xfbc8044dfda45028,
1586 0xcbe1feba92e40ce6},
1587 {1, 0, 0, 0}},
1588 {{0xe998ceea4396e4c1, 0xfc82ef0b6acea274, 0x230f729f2250e927,
1589 0xd0b2f94d2f420109},
1590 {0x4305adddb38d4966, 0x10b838f8624c3b45, 0x7db2636658954e7a,
1591 0x971459828b0719e5},
1592 {1, 0, 0, 0}},
1593 {{0x4bd6b72623369fc9, 0x57f2929e53d0b876, 0xc2d5cba4f2340687,
1594 0x961610004a866aba},
1595 {0x49997bcd2e407a5e, 0x69ab197d92ddcb24, 0x2cf1f2438fe5131c,
1596 0x7acb9fadcee75e44},
1597 {1, 0, 0, 0}},
1598 {{0x254e839423d2d4c0, 0xf57f0c917aea685b, 0xa60d880f6f75aaea,
1599 0x24eb9acca333bf5b},
1600 {0xe3de4ccb1cda5dea, 0xfeef9341c51a6b4f, 0x743125f88bac4c4d,
1601 0x69f891c5acd079cc},
1602 {1, 0, 0, 0}},
1603 {{0xeee44b35702476b5, 0x7ed031a0e45c2258, 0xb422d1e7bd6f8514,
1604 0xe51f547c5972a107},
1605 {0xa25bcd6fc9cf343d, 0x8ca922ee097c184e, 0xa62f98b3a9fe9a06,
1606 0x1c309a2b25bb1387},
1607 {1, 0, 0, 0}},
1608 {{0x9295dbeb1967c459, 0xb00148833472c98e, 0xc504977708011828,
1609 0x20b87b8aa2c4e503},
1610 {0x3063175de057c277, 0x1bd539338fe582dd, 0x0d11adef5f69a044,
1611 0xf5c6fa49919776be},
1612 {1, 0, 0, 0}},
1613 {{0x8c944e760fd59e11, 0x3876cba1102fad5f, 0xa454c3fad83faa56,
1614 0x1ed7d1b9332010b9},
1615 {0xa1011a270024b889, 0x05e4d0dcac0cd344, 0x52b520f0eb6a2a24,
1616 0x3a2b03f03217257a},
1617 {1, 0, 0, 0}},
1618 {{0xf20fc2afdf1d043d, 0xf330240db58d5a62, 0xfc7d229ca0058c3b,
1619 0x15fee545c78dd9f6},
1620 {0x501e82885bc98cda, 0x41ef80e5d046ac04, 0x557d9f49461210fb,
1621 0x4ab5b6b2b8753f81},
1622 {1, 0, 0, 0}}}
1623};
1624
1625/*
1626 * select_point selects the |idx|th point from a precomputation table and
1627 * copies it to out.
1628 */
1629static void select_point(const u64 idx, unsigned int size,
1630 const smallfelem pre_comp[16][3], smallfelem out[3])
1631{
1632 unsigned i, j;
1633 u64 *outlimbs = &out[0][0];
1634
1635 memset(out, 0, sizeof(*out) * 3);
1636
1637 for (i = 0; i < size; i++) {
1638 const u64 *inlimbs = (u64 *)&pre_comp[i][0][0];
1639 u64 mask = i ^ idx;
1640 mask |= mask >> 4;
1641 mask |= mask >> 2;
1642 mask |= mask >> 1;
1643 mask &= 1;
1644 mask--;
1645 for (j = 0; j < NLIMBS * 3; j++)
1646 outlimbs[j] |= inlimbs[j] & mask;
1647 }
1648}
1649
1650/* get_bit returns the |i|th bit in |in| */
1651static char get_bit(const felem_bytearray in, int i)
1652{
1653 if ((i < 0) || (i >= 256))
1654 return 0;
1655 return (in[i >> 3] >> (i & 7)) & 1;
1656}
1657
1658/*
1659 * Interleaved point multiplication using precomputed point multiples: The
1660 * small point multiples 0*P, 1*P, ..., 17*P are in pre_comp[], the scalars
1661 * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1662 * generator, using certain (large) precomputed multiples in g_pre_comp.
1663 * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1664 */
1665static void batch_mul(felem x_out, felem y_out, felem z_out,
1666 const felem_bytearray scalars[],
1667 const unsigned num_points, const u8 *g_scalar,
1668 const int mixed, const smallfelem pre_comp[][17][3],
1669 const smallfelem g_pre_comp[2][16][3])
1670{
1671 int i, skip;
1672 unsigned num, gen_mul = (g_scalar != NULL);
1673 felem nq[3], ftmp;
1674 smallfelem tmp[3];
1675 u64 bits;
1676 u8 sign, digit;
1677
1678 /* set nq to the point at infinity */
1679 memset(nq, 0, sizeof(nq));
1680
1681 /*
1682 * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1683 * of the generator (two in each of the last 32 rounds) and additions of
1684 * other points multiples (every 5th round).
1685 */
1686 skip = 1; /* save two point operations in the first
1687 * round */
1688 for (i = (num_points ? 255 : 31); i >= 0; --i) {
1689 /* double */
1690 if (!skip)
1691 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1692
1693 /* add multiples of the generator */
1694 if (gen_mul && (i <= 31)) {
1695 /* first, look 32 bits upwards */
1696 bits = get_bit(g_scalar, i + 224) << 3;
1697 bits |= get_bit(g_scalar, i + 160) << 2;
1698 bits |= get_bit(g_scalar, i + 96) << 1;
1699 bits |= get_bit(g_scalar, i + 32);
1700 /* select the point to add, in constant time */
1701 select_point(bits, 16, g_pre_comp[1], tmp);
1702
1703 if (!skip) {
1704 /* Arg 1 below is for "mixed" */
1705 point_add(nq[0], nq[1], nq[2],
1706 nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1707 } else {
1708 smallfelem_expand(nq[0], tmp[0]);
1709 smallfelem_expand(nq[1], tmp[1]);
1710 smallfelem_expand(nq[2], tmp[2]);
1711 skip = 0;
1712 }
1713
1714 /* second, look at the current position */
1715 bits = get_bit(g_scalar, i + 192) << 3;
1716 bits |= get_bit(g_scalar, i + 128) << 2;
1717 bits |= get_bit(g_scalar, i + 64) << 1;
1718 bits |= get_bit(g_scalar, i);
1719 /* select the point to add, in constant time */
1720 select_point(bits, 16, g_pre_comp[0], tmp);
1721 /* Arg 1 below is for "mixed" */
1722 point_add(nq[0], nq[1], nq[2],
1723 nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1724 }
1725
1726 /* do other additions every 5 doublings */
1727 if (num_points && (i % 5 == 0)) {
1728 /* loop over all scalars */
1729 for (num = 0; num < num_points; ++num) {
1730 bits = get_bit(scalars[num], i + 4) << 5;
1731 bits |= get_bit(scalars[num], i + 3) << 4;
1732 bits |= get_bit(scalars[num], i + 2) << 3;
1733 bits |= get_bit(scalars[num], i + 1) << 2;
1734 bits |= get_bit(scalars[num], i) << 1;
1735 bits |= get_bit(scalars[num], i - 1);
1736 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1737
1738 /*
1739 * select the point to add or subtract, in constant time
1740 */
1741 select_point(digit, 17, pre_comp[num], tmp);
1742 smallfelem_neg(ftmp, tmp[1]); /* (X, -Y, Z) is the negative
1743 * point */
1744 copy_small_conditional(ftmp, tmp[1], (((limb) sign) - 1));
1745 felem_contract(tmp[1], ftmp);
1746
1747 if (!skip) {
1748 point_add(nq[0], nq[1], nq[2],
1749 nq[0], nq[1], nq[2],
1750 mixed, tmp[0], tmp[1], tmp[2]);
1751 } else {
1752 smallfelem_expand(nq[0], tmp[0]);
1753 smallfelem_expand(nq[1], tmp[1]);
1754 smallfelem_expand(nq[2], tmp[2]);
1755 skip = 0;
1756 }
1757 }
1758 }
1759 }
1760 felem_assign(x_out, nq[0]);
1761 felem_assign(y_out, nq[1]);
1762 felem_assign(z_out, nq[2]);
1763}
1764
1765/* Precomputation for the group generator. */
1766struct nistp256_pre_comp_st {
1767 smallfelem g_pre_comp[2][16][3];
1768 int references;
1769 CRYPTO_RWLOCK *lock;
1770};
1771
1772const EC_METHOD *EC_GFp_nistp256_method(void)
1773{
1774 static const EC_METHOD ret = {
1775 EC_FLAGS_DEFAULT_OCT,
1776 NID_X9_62_prime_field,
1777 ec_GFp_nistp256_group_init,
1778 ec_GFp_simple_group_finish,
1779 ec_GFp_simple_group_clear_finish,
1780 ec_GFp_nist_group_copy,
1781 ec_GFp_nistp256_group_set_curve,
1782 ec_GFp_simple_group_get_curve,
1783 ec_GFp_simple_group_get_degree,
1784 ec_group_simple_order_bits,
1785 ec_GFp_simple_group_check_discriminant,
1786 ec_GFp_simple_point_init,
1787 ec_GFp_simple_point_finish,
1788 ec_GFp_simple_point_clear_finish,
1789 ec_GFp_simple_point_copy,
1790 ec_GFp_simple_point_set_to_infinity,
1791 ec_GFp_simple_set_Jprojective_coordinates_GFp,
1792 ec_GFp_simple_get_Jprojective_coordinates_GFp,
1793 ec_GFp_simple_point_set_affine_coordinates,
1794 ec_GFp_nistp256_point_get_affine_coordinates,
1795 0 /* point_set_compressed_coordinates */ ,
1796 0 /* point2oct */ ,
1797 0 /* oct2point */ ,
1798 ec_GFp_simple_add,
1799 ec_GFp_simple_dbl,
1800 ec_GFp_simple_invert,
1801 ec_GFp_simple_is_at_infinity,
1802 ec_GFp_simple_is_on_curve,
1803 ec_GFp_simple_cmp,
1804 ec_GFp_simple_make_affine,
1805 ec_GFp_simple_points_make_affine,
1806 ec_GFp_nistp256_points_mul,
1807 ec_GFp_nistp256_precompute_mult,
1808 ec_GFp_nistp256_have_precompute_mult,
1809 ec_GFp_nist_field_mul,
1810 ec_GFp_nist_field_sqr,
1811 0 /* field_div */ ,
1812 0 /* field_encode */ ,
1813 0 /* field_decode */ ,
1814 0, /* field_set_to_one */
1815 ec_key_simple_priv2oct,
1816 ec_key_simple_oct2priv,
1817 0, /* set private */
1818 ec_key_simple_generate_key,
1819 ec_key_simple_check_key,
1820 ec_key_simple_generate_public_key,
1821 0, /* keycopy */
1822 0, /* keyfinish */
1823 ecdh_simple_compute_key
1824 };
1825
1826 return &ret;
1827}
1828
1829/******************************************************************************/
1830/*
1831 * FUNCTIONS TO MANAGE PRECOMPUTATION
1832 */
1833
1834static NISTP256_PRE_COMP *nistp256_pre_comp_new()
1835{
1836 NISTP256_PRE_COMP *ret = OPENSSL_zalloc(sizeof(*ret));
1837
1838 if (ret == NULL) {
1839 ECerr(EC_F_NISTP256_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1840 return ret;
1841 }
1842
1843 ret->references = 1;
1844
1845 ret->lock = CRYPTO_THREAD_lock_new();
1846 if (ret->lock == NULL) {
1847 ECerr(EC_F_NISTP256_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1848 OPENSSL_free(ret);
1849 return NULL;
1850 }
1851 return ret;
1852}
1853
1854NISTP256_PRE_COMP *EC_nistp256_pre_comp_dup(NISTP256_PRE_COMP *p)
1855{
1856 int i;
1857 if (p != NULL)
1858 CRYPTO_atomic_add(&p->references, 1, &i, p->lock);
1859 return p;
1860}
1861
1862void EC_nistp256_pre_comp_free(NISTP256_PRE_COMP *pre)
1863{
1864 int i;
1865
1866 if (pre == NULL)
1867 return;
1868
1869 CRYPTO_atomic_add(&pre->references, -1, &i, pre->lock);
1870 REF_PRINT_COUNT("EC_nistp256", x);
1871 if (i > 0)
1872 return;
1873 REF_ASSERT_ISNT(i < 0);
1874
1875 CRYPTO_THREAD_lock_free(pre->lock);
1876 OPENSSL_free(pre);
1877}
1878
1879/******************************************************************************/
1880/*
1881 * OPENSSL EC_METHOD FUNCTIONS
1882 */
1883
1884int ec_GFp_nistp256_group_init(EC_GROUP *group)
1885{
1886 int ret;
1887 ret = ec_GFp_simple_group_init(group);
1888 group->a_is_minus3 = 1;
1889 return ret;
1890}
1891
1892int ec_GFp_nistp256_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1893 const BIGNUM *a, const BIGNUM *b,
1894 BN_CTX *ctx)
1895{
1896 int ret = 0;
1897 BN_CTX *new_ctx = NULL;
1898 BIGNUM *curve_p, *curve_a, *curve_b;
1899
1900 if (ctx == NULL)
1901 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1902 return 0;
1903 BN_CTX_start(ctx);
1904 if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
1905 ((curve_a = BN_CTX_get(ctx)) == NULL) ||
1906 ((curve_b = BN_CTX_get(ctx)) == NULL))
1907 goto err;
1908 BN_bin2bn(nistp256_curve_params[0], sizeof(felem_bytearray), curve_p);
1909 BN_bin2bn(nistp256_curve_params[1], sizeof(felem_bytearray), curve_a);
1910 BN_bin2bn(nistp256_curve_params[2], sizeof(felem_bytearray), curve_b);
1911 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1912 ECerr(EC_F_EC_GFP_NISTP256_GROUP_SET_CURVE,
1913 EC_R_WRONG_CURVE_PARAMETERS);
1914 goto err;
1915 }
1916 group->field_mod_func = BN_nist_mod_256;
1917 ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1918 err:
1919 BN_CTX_end(ctx);
1920 BN_CTX_free(new_ctx);
1921 return ret;
1922}
1923
1924/*
1925 * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1926 * (X/Z^2, Y/Z^3)
1927 */
1928int ec_GFp_nistp256_point_get_affine_coordinates(const EC_GROUP *group,
1929 const EC_POINT *point,
1930 BIGNUM *x, BIGNUM *y,
1931 BN_CTX *ctx)
1932{
1933 felem z1, z2, x_in, y_in;
1934 smallfelem x_out, y_out;
1935 longfelem tmp;
1936
1937 if (EC_POINT_is_at_infinity(group, point)) {
1938 ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES,
1939 EC_R_POINT_AT_INFINITY);
1940 return 0;
1941 }
1942 if ((!BN_to_felem(x_in, point->X)) || (!BN_to_felem(y_in, point->Y)) ||
1943 (!BN_to_felem(z1, point->Z)))
1944 return 0;
1945 felem_inv(z2, z1);
1946 felem_square(tmp, z2);
1947 felem_reduce(z1, tmp);
1948 felem_mul(tmp, x_in, z1);
1949 felem_reduce(x_in, tmp);
1950 felem_contract(x_out, x_in);
1951 if (x != NULL) {
1952 if (!smallfelem_to_BN(x, x_out)) {
1953 ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES,
1954 ERR_R_BN_LIB);
1955 return 0;
1956 }
1957 }
1958 felem_mul(tmp, z1, z2);
1959 felem_reduce(z1, tmp);
1960 felem_mul(tmp, y_in, z1);
1961 felem_reduce(y_in, tmp);
1962 felem_contract(y_out, y_in);
1963 if (y != NULL) {
1964 if (!smallfelem_to_BN(y, y_out)) {
1965 ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES,
1966 ERR_R_BN_LIB);
1967 return 0;
1968 }
1969 }
1970 return 1;
1971}
1972
1973/* points below is of size |num|, and tmp_smallfelems is of size |num+1| */
1974static void make_points_affine(size_t num, smallfelem points[][3],
1975 smallfelem tmp_smallfelems[])
1976{
1977 /*
1978 * Runs in constant time, unless an input is the point at infinity (which
1979 * normally shouldn't happen).
1980 */
1981 ec_GFp_nistp_points_make_affine_internal(num,
1982 points,
1983 sizeof(smallfelem),
1984 tmp_smallfelems,
1985 (void (*)(void *))smallfelem_one,
1986 (int (*)(const void *))
1987 smallfelem_is_zero_int,
1988 (void (*)(void *, const void *))
1989 smallfelem_assign,
1990 (void (*)(void *, const void *))
1991 smallfelem_square_contract,
1992 (void (*)
1993 (void *, const void *,
1994 const void *))
1995 smallfelem_mul_contract,
1996 (void (*)(void *, const void *))
1997 smallfelem_inv_contract,
1998 /* nothing to contract */
1999 (void (*)(void *, const void *))
2000 smallfelem_assign);
2001}
2002
2003/*
2004 * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
2005 * values Result is stored in r (r can equal one of the inputs).
2006 */
2007int ec_GFp_nistp256_points_mul(const EC_GROUP *group, EC_POINT *r,
2008 const BIGNUM *scalar, size_t num,
2009 const EC_POINT *points[],
2010 const BIGNUM *scalars[], BN_CTX *ctx)
2011{
2012 int ret = 0;
2013 int j;
2014 int mixed = 0;
2015 BN_CTX *new_ctx = NULL;
2016 BIGNUM *x, *y, *z, *tmp_scalar;
2017 felem_bytearray g_secret;
2018 felem_bytearray *secrets = NULL;
2019 smallfelem (*pre_comp)[17][3] = NULL;
2020 smallfelem *tmp_smallfelems = NULL;
2021 felem_bytearray tmp;
2022 unsigned i, num_bytes;
2023 int have_pre_comp = 0;
2024 size_t num_points = num;
2025 smallfelem x_in, y_in, z_in;
2026 felem x_out, y_out, z_out;
2027 NISTP256_PRE_COMP *pre = NULL;
2028 const smallfelem(*g_pre_comp)[16][3] = NULL;
2029 EC_POINT *generator = NULL;
2030 const EC_POINT *p = NULL;
2031 const BIGNUM *p_scalar = NULL;
2032
2033 if (ctx == NULL)
2034 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
2035 return 0;
2036 BN_CTX_start(ctx);
2037 if (((x = BN_CTX_get(ctx)) == NULL) ||
2038 ((y = BN_CTX_get(ctx)) == NULL) ||
2039 ((z = BN_CTX_get(ctx)) == NULL) ||
2040 ((tmp_scalar = BN_CTX_get(ctx)) == NULL))
2041 goto err;
2042
2043 if (scalar != NULL) {
2044 pre = group->pre_comp.nistp256;
2045 if (pre)
2046 /* we have precomputation, try to use it */
2047 g_pre_comp = (const smallfelem(*)[16][3])pre->g_pre_comp;
2048 else
2049 /* try to use the standard precomputation */
2050 g_pre_comp = &gmul[0];
2051 generator = EC_POINT_new(group);
2052 if (generator == NULL)
2053 goto err;
2054 /* get the generator from precomputation */
2055 if (!smallfelem_to_BN(x, g_pre_comp[0][1][0]) ||
2056 !smallfelem_to_BN(y, g_pre_comp[0][1][1]) ||
2057 !smallfelem_to_BN(z, g_pre_comp[0][1][2])) {
2058 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
2059 goto err;
2060 }
2061 if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
2062 generator, x, y, z,
2063 ctx))
2064 goto err;
2065 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
2066 /* precomputation matches generator */
2067 have_pre_comp = 1;
2068 else
2069 /*
2070 * we don't have valid precomputation: treat the generator as a
2071 * random point
2072 */
2073 num_points++;
2074 }
2075 if (num_points > 0) {
2076 if (num_points >= 3) {
2077 /*
2078 * unless we precompute multiples for just one or two points,
2079 * converting those into affine form is time well spent
2080 */
2081 mixed = 1;
2082 }
2083 secrets = OPENSSL_malloc(sizeof(*secrets) * num_points);
2084 pre_comp = OPENSSL_malloc(sizeof(*pre_comp) * num_points);
2085 if (mixed)
2086 tmp_smallfelems =
2087 OPENSSL_malloc(sizeof(*tmp_smallfelems) * (num_points * 17 + 1));
2088 if ((secrets == NULL) || (pre_comp == NULL)
2089 || (mixed && (tmp_smallfelems == NULL))) {
2090 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_MALLOC_FAILURE);
2091 goto err;
2092 }
2093
2094 /*
2095 * we treat NULL scalars as 0, and NULL points as points at infinity,
2096 * i.e., they contribute nothing to the linear combination
2097 */
2098 memset(secrets, 0, sizeof(*secrets) * num_points);
2099 memset(pre_comp, 0, sizeof(*pre_comp) * num_points);
2100 for (i = 0; i < num_points; ++i) {
2101 if (i == num)
2102 /*
2103 * we didn't have a valid precomputation, so we pick the
2104 * generator
2105 */
2106 {
2107 p = EC_GROUP_get0_generator(group);
2108 p_scalar = scalar;
2109 } else
2110 /* the i^th point */
2111 {
2112 p = points[i];
2113 p_scalar = scalars[i];
2114 }
2115 if ((p_scalar != NULL) && (p != NULL)) {
2116 /* reduce scalar to 0 <= scalar < 2^256 */
2117 if ((BN_num_bits(p_scalar) > 256)
2118 || (BN_is_negative(p_scalar))) {
2119 /*
2120 * this is an unusual input, and we don't guarantee
2121 * constant-timeness
2122 */
2123 if (!BN_nnmod(tmp_scalar, p_scalar, group->order, ctx)) {
2124 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
2125 goto err;
2126 }
2127 num_bytes = BN_bn2bin(tmp_scalar, tmp);
2128 } else
2129 num_bytes = BN_bn2bin(p_scalar, tmp);
2130 flip_endian(secrets[i], tmp, num_bytes);
2131 /* precompute multiples */
2132 if ((!BN_to_felem(x_out, p->X)) ||
2133 (!BN_to_felem(y_out, p->Y)) ||
2134 (!BN_to_felem(z_out, p->Z)))
2135 goto err;
2136 felem_shrink(pre_comp[i][1][0], x_out);
2137 felem_shrink(pre_comp[i][1][1], y_out);
2138 felem_shrink(pre_comp[i][1][2], z_out);
2139 for (j = 2; j <= 16; ++j) {
2140 if (j & 1) {
2141 point_add_small(pre_comp[i][j][0], pre_comp[i][j][1],
2142 pre_comp[i][j][2], pre_comp[i][1][0],
2143 pre_comp[i][1][1], pre_comp[i][1][2],
2144 pre_comp[i][j - 1][0],
2145 pre_comp[i][j - 1][1],
2146 pre_comp[i][j - 1][2]);
2147 } else {
2148 point_double_small(pre_comp[i][j][0],
2149 pre_comp[i][j][1],
2150 pre_comp[i][j][2],
2151 pre_comp[i][j / 2][0],
2152 pre_comp[i][j / 2][1],
2153 pre_comp[i][j / 2][2]);
2154 }
2155 }
2156 }
2157 }
2158 if (mixed)
2159 make_points_affine(num_points * 17, pre_comp[0], tmp_smallfelems);
2160 }
2161
2162 /* the scalar for the generator */
2163 if ((scalar != NULL) && (have_pre_comp)) {
2164 memset(g_secret, 0, sizeof(g_secret));
2165 /* reduce scalar to 0 <= scalar < 2^256 */
2166 if ((BN_num_bits(scalar) > 256) || (BN_is_negative(scalar))) {
2167 /*
2168 * this is an unusual input, and we don't guarantee
2169 * constant-timeness
2170 */
2171 if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) {
2172 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
2173 goto err;
2174 }
2175 num_bytes = BN_bn2bin(tmp_scalar, tmp);
2176 } else
2177 num_bytes = BN_bn2bin(scalar, tmp);
2178 flip_endian(g_secret, tmp, num_bytes);
2179 /* do the multiplication with generator precomputation */
2180 batch_mul(x_out, y_out, z_out,
2181 (const felem_bytearray(*))secrets, num_points,
2182 g_secret,
2183 mixed, (const smallfelem(*)[17][3])pre_comp, g_pre_comp);
2184 } else
2185 /* do the multiplication without generator precomputation */
2186 batch_mul(x_out, y_out, z_out,
2187 (const felem_bytearray(*))secrets, num_points,
2188 NULL, mixed, (const smallfelem(*)[17][3])pre_comp, NULL);
2189 /* reduce the output to its unique minimal representation */
2190 felem_contract(x_in, x_out);
2191 felem_contract(y_in, y_out);
2192 felem_contract(z_in, z_out);
2193 if ((!smallfelem_to_BN(x, x_in)) || (!smallfelem_to_BN(y, y_in)) ||
2194 (!smallfelem_to_BN(z, z_in))) {
2195 ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB);
2196 goto err;
2197 }
2198 ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
2199
2200 err:
2201 BN_CTX_end(ctx);
2202 EC_POINT_free(generator);
2203 BN_CTX_free(new_ctx);
2204 OPENSSL_free(secrets);
2205 OPENSSL_free(pre_comp);
2206 OPENSSL_free(tmp_smallfelems);
2207 return ret;
2208}
2209
2210int ec_GFp_nistp256_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
2211{
2212 int ret = 0;
2213 NISTP256_PRE_COMP *pre = NULL;
2214 int i, j;
2215 BN_CTX *new_ctx = NULL;
2216 BIGNUM *x, *y;
2217 EC_POINT *generator = NULL;
2218 smallfelem tmp_smallfelems[32];
2219 felem x_tmp, y_tmp, z_tmp;
2220
2221 /* throw away old precomputation */
2222 EC_pre_comp_free(group);
2223 if (ctx == NULL)
2224 if ((ctx = new_ctx = BN_CTX_new()) == NULL)
2225 return 0;
2226 BN_CTX_start(ctx);
2227 if (((x = BN_CTX_get(ctx)) == NULL) || ((y = BN_CTX_get(ctx)) == NULL))
2228 goto err;
2229 /* get the generator */
2230 if (group->generator == NULL)
2231 goto err;
2232 generator = EC_POINT_new(group);
2233 if (generator == NULL)
2234 goto err;
2235 BN_bin2bn(nistp256_curve_params[3], sizeof(felem_bytearray), x);
2236 BN_bin2bn(nistp256_curve_params[4], sizeof(felem_bytearray), y);
2237 if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
2238 goto err;
2239 if ((pre = nistp256_pre_comp_new()) == NULL)
2240 goto err;
2241 /*
2242 * if the generator is the standard one, use built-in precomputation
2243 */
2244 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
2245 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
2246 goto done;
2247 }
2248 if ((!BN_to_felem(x_tmp, group->generator->X)) ||
2249 (!BN_to_felem(y_tmp, group->generator->Y)) ||
2250 (!BN_to_felem(z_tmp, group->generator->Z)))
2251 goto err;
2252 felem_shrink(pre->g_pre_comp[0][1][0], x_tmp);
2253 felem_shrink(pre->g_pre_comp[0][1][1], y_tmp);
2254 felem_shrink(pre->g_pre_comp[0][1][2], z_tmp);
2255 /*
2256 * compute 2^64*G, 2^128*G, 2^192*G for the first table, 2^32*G, 2^96*G,
2257 * 2^160*G, 2^224*G for the second one
2258 */
2259 for (i = 1; i <= 8; i <<= 1) {
2260 point_double_small(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
2261 pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
2262 pre->g_pre_comp[0][i][1],
2263 pre->g_pre_comp[0][i][2]);
2264 for (j = 0; j < 31; ++j) {
2265 point_double_small(pre->g_pre_comp[1][i][0],
2266 pre->g_pre_comp[1][i][1],
2267 pre->g_pre_comp[1][i][2],
2268 pre->g_pre_comp[1][i][0],
2269 pre->g_pre_comp[1][i][1],
2270 pre->g_pre_comp[1][i][2]);
2271 }
2272 if (i == 8)
2273 break;
2274 point_double_small(pre->g_pre_comp[0][2 * i][0],
2275 pre->g_pre_comp[0][2 * i][1],
2276 pre->g_pre_comp[0][2 * i][2],
2277 pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
2278 pre->g_pre_comp[1][i][2]);
2279 for (j = 0; j < 31; ++j) {
2280 point_double_small(pre->g_pre_comp[0][2 * i][0],
2281 pre->g_pre_comp[0][2 * i][1],
2282 pre->g_pre_comp[0][2 * i][2],
2283 pre->g_pre_comp[0][2 * i][0],
2284 pre->g_pre_comp[0][2 * i][1],
2285 pre->g_pre_comp[0][2 * i][2]);
2286 }
2287 }
2288 for (i = 0; i < 2; i++) {
2289 /* g_pre_comp[i][0] is the point at infinity */
2290 memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
2291 /* the remaining multiples */
2292 /* 2^64*G + 2^128*G resp. 2^96*G + 2^160*G */
2293 point_add_small(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
2294 pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
2295 pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
2296 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
2297 pre->g_pre_comp[i][2][2]);
2298 /* 2^64*G + 2^192*G resp. 2^96*G + 2^224*G */
2299 point_add_small(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
2300 pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
2301 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
2302 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
2303 pre->g_pre_comp[i][2][2]);
2304 /* 2^128*G + 2^192*G resp. 2^160*G + 2^224*G */
2305 point_add_small(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
2306 pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
2307 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
2308 pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
2309 pre->g_pre_comp[i][4][2]);
2310 /*
2311 * 2^64*G + 2^128*G + 2^192*G resp. 2^96*G + 2^160*G + 2^224*G
2312 */
2313 point_add_small(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
2314 pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
2315 pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
2316 pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
2317 pre->g_pre_comp[i][2][2]);
2318 for (j = 1; j < 8; ++j) {
2319 /* odd multiples: add G resp. 2^32*G */
2320 point_add_small(pre->g_pre_comp[i][2 * j + 1][0],
2321 pre->g_pre_comp[i][2 * j + 1][1],
2322 pre->g_pre_comp[i][2 * j + 1][2],
2323 pre->g_pre_comp[i][2 * j][0],
2324 pre->g_pre_comp[i][2 * j][1],
2325 pre->g_pre_comp[i][2 * j][2],
2326 pre->g_pre_comp[i][1][0],
2327 pre->g_pre_comp[i][1][1],
2328 pre->g_pre_comp[i][1][2]);
2329 }
2330 }
2331 make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_smallfelems);
2332
2333 done:
2334 SETPRECOMP(group, nistp256, pre);
2335 pre = NULL;
2336 ret = 1;
2337
2338 err:
2339 BN_CTX_end(ctx);
2340 EC_POINT_free(generator);
2341 BN_CTX_free(new_ctx);
2342 EC_nistp256_pre_comp_free(pre);
2343 return ret;
2344}
2345
2346int ec_GFp_nistp256_have_precompute_mult(const EC_GROUP *group)
2347{
2348 return HAVEPRECOMP(group, nistp256);
2349}
2350#endif
Note: See TracBrowser for help on using the repository browser.