Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

ecp_nistp224.c@ 331

Last change on this file since 331 was 331, checked in by coas-nagasima, 6 years ago
prototoolに関連するプロジェクトをnewlibからmuslを使うよう変更・更新 ntshellをnewlibの下位の実装から、muslのsyscallの実装に変更・更新以下のOSSをアップデート・mruby-1.3.0 ・musl-1.1.18 ・onigmo-6.1.3 ・tcc-0.9.27 以下のOSSを追加・openssl-1.1.0e ・curl-7.57.0 ・zlib-1.2.11 以下のmrbgemsを追加・iij/mruby-digest ・iij/mruby-env ・iij/mruby-errno ・iij/mruby-iijson ・iij/mruby-ipaddr ・iij/mruby-mock ・iij/mruby-require ・iij/mruby-tls-openssl
Property svn:eol-style set to `native` Property svn:mime-type set to `text/x-csrc`
File size: 58.8 KB

Line
1	/*
2	* Copyright 2010-2016 The OpenSSL Project Authors. All Rights Reserved.
3	*
4	* Licensed under the OpenSSL license (the "License"). You may not use
5	* this file except in compliance with the License. You can obtain a copy
6	* in the file LICENSE in the source distribution or at
7	* https://www.openssl.org/source/license.html
8	*/
9
10	/* Copyright 2011 Google Inc.
11	*
12	* Licensed under the Apache License, Version 2.0 (the "License");
13	*
14	* you may not use this file except in compliance with the License.
15	* You may obtain a copy of the License at
16	*
17	* http://www.apache.org/licenses/LICENSE-2.0
18	*
19	* Unless required by applicable law or agreed to in writing, software
20	* distributed under the License is distributed on an "AS IS" BASIS,
21	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22	* See the License for the specific language governing permissions and
23	* limitations under the License.
24	*/
25
26	/*
27	* A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
28	*
29	* Inspired by Daniel J. Bernstein's public domain nistp224 implementation
30	* and Adam Langley's public domain 64-bit C implementation of curve25519
31	*/
32
33	#include <openssl/opensslconf.h>
34	#ifdef OPENSSL_NO_EC_NISTP_64_GCC_128
35	NON_EMPTY_TRANSLATION_UNIT
36	#else
37
38	# include <stdint.h>
39	# include <string.h>
40	# include <openssl/err.h>
41	# include "ec_lcl.h"
42
43	# if defined(__GNUC__) && (__GNUC__ > 3 \|\| (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
44	/* even with gcc, the typedef won't work for 32-bit platforms */
45	typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit
46	* platforms */
47	# else
48	# error "Need GCC 3.1 or later to define type uint128_t"
49	# endif
50
51	typedef uint8_t u8;
52	typedef uint64_t u64;
53	typedef int64_t s64;
54
55	/******************************************************************************/
56	/*-
57	* INTERNAL REPRESENTATION OF FIELD ELEMENTS
58	*
59	* Field elements are represented as a_0 + 2^56a_1 + 2^112a_2 + 2^168*a_3
60	* using 64-bit coefficients called 'limbs',
61	* and sometimes (for multiplication results) as
62	* b_0 + 2^56b_1 + 2^112b_2 + 2^168b_3 + 2^224b_4 + 2^280b_5 + 2^336b_6
63	* using 128-bit coefficients called 'widelimbs'.
64	* A 4-limb representation is an 'felem';
65	* a 7-widelimb representation is a 'widefelem'.
66	* Even within felems, bits of adjacent limbs overlap, and we don't always
67	* reduce the representations: we ensure that inputs to each felem
68	* multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 42^602^60,
69	* and fit into a 128-bit word without overflow. The coefficients are then
70	* again partially reduced to obtain an felem satisfying a_i < 2^57.
71	* We only reduce to the unique minimal representation at the end of the
72	* computation.
73	*/
74
75	typedef uint64_t limb;
76	typedef uint128_t widelimb;
77
78	typedef limb felem[4];
79	typedef widelimb widefelem[7];
80
81	/*
82	* Field element represented as a byte arrary. 28*8 = 224 bits is also the
83	* group order size for the elliptic curve, and we also use this type for
84	* scalars for point multiplication.
85	*/
86	typedef u8 felem_bytearray[28];
87
88	static const felem_bytearray nistp224_curve_params[5] = {
89	{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */
90	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
91	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
92	{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */
93	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
94	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE},
95	{0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */
96	0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA,
97	0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4},
98	{0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */
99	0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22,
100	0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21},
101	{0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */
102	0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64,
103	0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34}
104	};
105
106	/*-
107	* Precomputed multiples of the standard generator
108	* Points are given in coordinates (X, Y, Z) where Z normally is 1
109	* (0 for the point at infinity).
110	* For each field element, slice a_0 is word 0, etc.
111	*
112	* The table has 2 * 16 elements, starting with the following:
113	* index \| bits \| point
114	* ------+---------+------------------------------
115	* 0 \| 0 0 0 0 \| 0G
116	* 1 \| 0 0 0 1 \| 1G
117	* 2 \| 0 0 1 0 \| 2^56G
118	* 3 \| 0 0 1 1 \| (2^56 + 1)G
119	* 4 \| 0 1 0 0 \| 2^112G
120	* 5 \| 0 1 0 1 \| (2^112 + 1)G
121	* 6 \| 0 1 1 0 \| (2^112 + 2^56)G
122	* 7 \| 0 1 1 1 \| (2^112 + 2^56 + 1)G
123	* 8 \| 1 0 0 0 \| 2^168G
124	* 9 \| 1 0 0 1 \| (2^168 + 1)G
125	* 10 \| 1 0 1 0 \| (2^168 + 2^56)G
126	* 11 \| 1 0 1 1 \| (2^168 + 2^56 + 1)G
127	* 12 \| 1 1 0 0 \| (2^168 + 2^112)G
128	* 13 \| 1 1 0 1 \| (2^168 + 2^112 + 1)G
129	* 14 \| 1 1 1 0 \| (2^168 + 2^112 + 2^56)G
130	* 15 \| 1 1 1 1 \| (2^168 + 2^112 + 2^56 + 1)G
131	* followed by a copy of this with each element multiplied by 2^28.
132	*
133	* The reason for this is so that we can clock bits into four different
134	* locations when doing simple scalar multiplies against the base point,
135	* and then another four locations using the second 16 elements.
136	*/
137	static const felem gmul[2][16][3] = {
138	{{{0, 0, 0, 0},
139	{0, 0, 0, 0},
140	{0, 0, 0, 0}},
141	{{0x3280d6115c1d21, 0xc1d356c2112234, 0x7f321390b94a03, 0xb70e0cbd6bb4bf},
142	{0xd5819985007e34, 0x75a05a07476444, 0xfb4c22dfe6cd43, 0xbd376388b5f723},
143	{1, 0, 0, 0}},
144	{{0xfd9675666ebbe9, 0xbca7664d40ce5e, 0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
145	{0x29e0b892dc9c43, 0xece8608436e662, 0xdc858f185310d0, 0x9812dd4eb8d321},
146	{1, 0, 0, 0}},
147	{{0x6d3e678d5d8eb8, 0x559eed1cb362f1, 0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
148	{0xf19f90ed50266d, 0xabf2b4bf65f9df, 0x313865468fafec, 0x5cb379ba910a17},
149	{1, 0, 0, 0}},
150	{{0x0641966cab26e3, 0x91fb2991fab0a0, 0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
151	{0x7510407766af5d, 0x84d929610d5450, 0x81d77aae82f706, 0x6916f6d4338c5b},
152	{1, 0, 0, 0}},
153	{{0xea95ac3b1f15c6, 0x086000905e82d4, 0xdd323ae4d1c8b1, 0x932b56be7685a3},
154	{0x9ef93dea25dbbf, 0x41665960f390f0, 0xfdec76dbe2a8a7, 0x523e80f019062a},
155	{1, 0, 0, 0}},
156	{{0x822fdd26732c73, 0xa01c83531b5d0f, 0x363f37347c1ba4, 0xc391b45c84725c},
157	{0xbbd5e1b2d6ad24, 0xddfbcde19dfaec, 0xc393da7e222a7f, 0x1efb7890ede244},
158	{1, 0, 0, 0}},
159	{{0x4c9e90ca217da1, 0xd11beca79159bb, 0xff8d33c2c98b7c, 0x2610b39409f849},
160	{0x44d1352ac64da0, 0xcdbb7b2c46b4fb, 0x966c079b753c89, 0xfe67e4e820b112},
161	{1, 0, 0, 0}},
162	{{0xe28cae2df5312d, 0xc71b61d16f5c6e, 0x79b7619a3e7c4c, 0x05c73240899b47},
163	{0x9f7f6382c73e3a, 0x18615165c56bda, 0x641fab2116fd56, 0x72855882b08394},
164	{1, 0, 0, 0}},
165	{{0x0469182f161c09, 0x74a98ca8d00fb5, 0xb89da93489a3e0, 0x41c98768fb0c1d},
166	{0xe5ea05fb32da81, 0x3dce9ffbca6855, 0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
167	{1, 0, 0, 0}},
168	{{0xdab22b2333e87f, 0x4430137a5dd2f6, 0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
169	{0x764a7df0c8fda5, 0x185ba5c3fa2044, 0x9281d688bcbe50, 0xc40331df893881},
170	{1, 0, 0, 0}},
171	{{0xb89530796f0f60, 0xade92bd26909a3, 0x1a0c83fb4884da, 0x1765bf22a5a984},
172	{0x772a9ee75db09e, 0x23bc6c67cec16f, 0x4c1edba8b14e2f, 0xe2a215d9611369},
173	{1, 0, 0, 0}},
174	{{0x571e509fb5efb3, 0xade88696410552, 0xc8ae85fada74fe, 0x6c7e4be83bbde3},
175	{0xff9f51160f4652, 0xb47ce2495a6539, 0xa2946c53b582f4, 0x286d2db3ee9a60},
176	{1, 0, 0, 0}},
177	{{0x40bbd5081a44af, 0x0995183b13926c, 0xbcefba6f47f6d0, 0x215619e9cc0057},
178	{0x8bc94d3b0df45e, 0xf11c54a3694f6f, 0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
179	{1, 0, 0, 0}},
180	{{0xb17048ab3e1c7b, 0xac38f36ff8a1d8, 0x1c29819435d2c6, 0xc813132f4c07e9},
181	{0x2891425503b11f, 0x08781030579fea, 0xf5426ba5cc9674, 0x1e28ebf18562bc},
182	{1, 0, 0, 0}},
183	{{0x9f31997cc864eb, 0x06cd91d28b5e4c, 0xff17036691a973, 0xf1aef351497c58},
184	{0xdd1f2d600564ff, 0xdead073b1402db, 0x74a684435bd693, 0xeea7471f962558},
185	{1, 0, 0, 0}}},
186	{{{0, 0, 0, 0},
187	{0, 0, 0, 0},
188	{0, 0, 0, 0}},
189	{{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
190	{0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
191	{1, 0, 0, 0}},
192	{{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
193	{0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
194	{1, 0, 0, 0}},
195	{{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
196	{0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
197	{1, 0, 0, 0}},
198	{{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
199	{0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
200	{1, 0, 0, 0}},
201	{{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
202	{0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
203	{1, 0, 0, 0}},
204	{{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
205	{0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
206	{1, 0, 0, 0}},
207	{{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
208	{0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
209	{1, 0, 0, 0}},
210	{{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
211	{0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
212	{1, 0, 0, 0}},
213	{{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
214	{0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
215	{1, 0, 0, 0}},
216	{{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
217	{0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
218	{1, 0, 0, 0}},
219	{{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
220	{0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
221	{1, 0, 0, 0}},
222	{{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
223	{0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
224	{1, 0, 0, 0}},
225	{{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
226	{0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
227	{1, 0, 0, 0}},
228	{{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
229	{0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
230	{1, 0, 0, 0}},
231	{{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
232	{0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
233	{1, 0, 0, 0}}}
234	};
235
236	/* Precomputation for the group generator. */
237	struct nistp224_pre_comp_st {
238	felem g_pre_comp[2][16][3];
239	int references;
240	CRYPTO_RWLOCK *lock;
241	};
242
243	const EC_METHOD *EC_GFp_nistp224_method(void)
244	{
245	static const EC_METHOD ret = {
246	EC_FLAGS_DEFAULT_OCT,
247	NID_X9_62_prime_field,
248	ec_GFp_nistp224_group_init,
249	ec_GFp_simple_group_finish,
250	ec_GFp_simple_group_clear_finish,
251	ec_GFp_nist_group_copy,
252	ec_GFp_nistp224_group_set_curve,
253	ec_GFp_simple_group_get_curve,
254	ec_GFp_simple_group_get_degree,
255	ec_group_simple_order_bits,
256	ec_GFp_simple_group_check_discriminant,
257	ec_GFp_simple_point_init,
258	ec_GFp_simple_point_finish,
259	ec_GFp_simple_point_clear_finish,
260	ec_GFp_simple_point_copy,
261	ec_GFp_simple_point_set_to_infinity,
262	ec_GFp_simple_set_Jprojective_coordinates_GFp,
263	ec_GFp_simple_get_Jprojective_coordinates_GFp,
264	ec_GFp_simple_point_set_affine_coordinates,
265	ec_GFp_nistp224_point_get_affine_coordinates,
266	0 /* point_set_compressed_coordinates */ ,
267	0 /* point2oct */ ,
268	0 /* oct2point */ ,
269	ec_GFp_simple_add,
270	ec_GFp_simple_dbl,
271	ec_GFp_simple_invert,
272	ec_GFp_simple_is_at_infinity,
273	ec_GFp_simple_is_on_curve,
274	ec_GFp_simple_cmp,
275	ec_GFp_simple_make_affine,
276	ec_GFp_simple_points_make_affine,
277	ec_GFp_nistp224_points_mul,
278	ec_GFp_nistp224_precompute_mult,
279	ec_GFp_nistp224_have_precompute_mult,
280	ec_GFp_nist_field_mul,
281	ec_GFp_nist_field_sqr,
282	0 /* field_div */ ,
283	0 /* field_encode */ ,
284	0 /* field_decode */ ,
285	0, /* field_set_to_one */
286	ec_key_simple_priv2oct,
287	ec_key_simple_oct2priv,
288	0, /* set private */
289	ec_key_simple_generate_key,
290	ec_key_simple_check_key,
291	ec_key_simple_generate_public_key,
292	0, /* keycopy */
293	0, /* keyfinish */
294	ecdh_simple_compute_key
295	};
296
297	return &ret;
298	}
299
300	/*
301	* Helper functions to convert field elements to/from internal representation
302	*/
303	static void bin28_to_felem(felem out, const u8 in[28])
304	{
305	out[0] = ((const uint64_t )(in)) & 0x00ffffffffffffff;
306	out[1] = (((const uint64_t )(in + 7))) & 0x00ffffffffffffff;
307	out[2] = (((const uint64_t )(in + 14))) & 0x00ffffffffffffff;
308	out[3] = (((const uint64_t )(in+20))) >> 8;
309	}
310
311	static void felem_to_bin28(u8 out[28], const felem in)
312	{
313	unsigned i;
314	for (i = 0; i < 7; ++i) {
315	out[i] = in[0] >> (8 * i);
316	out[i + 7] = in[1] >> (8 * i);
317	out[i + 14] = in[2] >> (8 * i);
318	out[i + 21] = in[3] >> (8 * i);
319	}
320	}
321
322	/* To preserve endianness when using BN_bn2bin and BN_bin2bn */
323	static void flip_endian(u8 out, const u8 in, unsigned len)
324	{
325	unsigned i;
326	for (i = 0; i < len; ++i)
327	out[i] = in[len - 1 - i];
328	}
329
330	/* From OpenSSL BIGNUM to internal representation */
331	static int BN_to_felem(felem out, const BIGNUM *bn)
332	{
333	felem_bytearray b_in;
334	felem_bytearray b_out;
335	unsigned num_bytes;
336
337	/* BN_bn2bin eats leading zeroes */
338	memset(b_out, 0, sizeof(b_out));
339	num_bytes = BN_num_bytes(bn);
340	if (num_bytes > sizeof b_out) {
341	ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
342	return 0;
343	}
344	if (BN_is_negative(bn)) {
345	ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
346	return 0;
347	}
348	num_bytes = BN_bn2bin(bn, b_in);
349	flip_endian(b_out, b_in, num_bytes);
350	bin28_to_felem(out, b_out);
351	return 1;
352	}
353
354	/* From internal representation to OpenSSL BIGNUM */
355	static BIGNUM felem_to_BN(BIGNUM out, const felem in)
356	{
357	felem_bytearray b_in, b_out;
358	felem_to_bin28(b_in, in);
359	flip_endian(b_out, b_in, sizeof b_out);
360	return BN_bin2bn(b_out, sizeof b_out, out);
361	}
362
363	/******************************************************************************/
364	/*-
365	* FIELD OPERATIONS
366	*
367	* Field operations, using the internal representation of field elements.
368	* NB! These operations are specific to our point multiplication and cannot be
369	* expected to be correct in general - e.g., multiplication with a large scalar
370	* will cause an overflow.
371	*
372	*/
373
374	static void felem_one(felem out)
375	{
376	out[0] = 1;
377	out[1] = 0;
378	out[2] = 0;
379	out[3] = 0;
380	}
381
382	static void felem_assign(felem out, const felem in)
383	{
384	out[0] = in[0];
385	out[1] = in[1];
386	out[2] = in[2];
387	out[3] = in[3];
388	}
389
390	/* Sum two field elements: out += in */
391	static void felem_sum(felem out, const felem in)
392	{
393	out[0] += in[0];
394	out[1] += in[1];
395	out[2] += in[2];
396	out[3] += in[3];
397	}
398
399	/* Get negative value: out = -in */
400	/* Assumes in[i] < 2^57 */
401	static void felem_neg(felem out, const felem in)
402	{
403	static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
404	static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
405	static const limb two58m42m2 = (((limb) 1) << 58) -
406	(((limb) 1) << 42) - (((limb) 1) << 2);
407
408	/* Set to 0 mod 2^224-2^96+1 to ensure out > in */
409	out[0] = two58p2 - in[0];
410	out[1] = two58m42m2 - in[1];
411	out[2] = two58m2 - in[2];
412	out[3] = two58m2 - in[3];
413	}
414
415	/* Subtract field elements: out -= in */
416	/* Assumes in[i] < 2^57 */
417	static void felem_diff(felem out, const felem in)
418	{
419	static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
420	static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
421	static const limb two58m42m2 = (((limb) 1) << 58) -
422	(((limb) 1) << 42) - (((limb) 1) << 2);
423
424	/* Add 0 mod 2^224-2^96+1 to ensure out > in */
425	out[0] += two58p2;
426	out[1] += two58m42m2;
427	out[2] += two58m2;
428	out[3] += two58m2;
429
430	out[0] -= in[0];
431	out[1] -= in[1];
432	out[2] -= in[2];
433	out[3] -= in[3];
434	}
435
436	/* Subtract in unreduced 128-bit mode: out -= in */
437	/* Assumes in[i] < 2^119 */
438	static void widefelem_diff(widefelem out, const widefelem in)
439	{
440	static const widelimb two120 = ((widelimb) 1) << 120;
441	static const widelimb two120m64 = (((widelimb) 1) << 120) -
442	(((widelimb) 1) << 64);
443	static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
444	(((widelimb) 1) << 104) - (((widelimb) 1) << 64);
445
446	/* Add 0 mod 2^224-2^96+1 to ensure out > in */
447	out[0] += two120;
448	out[1] += two120m64;
449	out[2] += two120m64;
450	out[3] += two120;
451	out[4] += two120m104m64;
452	out[5] += two120m64;
453	out[6] += two120m64;
454
455	out[0] -= in[0];
456	out[1] -= in[1];
457	out[2] -= in[2];
458	out[3] -= in[3];
459	out[4] -= in[4];
460	out[5] -= in[5];
461	out[6] -= in[6];
462	}
463
464	/* Subtract in mixed mode: out128 -= in64 */
465	/* in[i] < 2^63 */
466	static void felem_diff_128_64(widefelem out, const felem in)
467	{
468	static const widelimb two64p8 = (((widelimb) 1) << 64) +
469	(((widelimb) 1) << 8);
470	static const widelimb two64m8 = (((widelimb) 1) << 64) -
471	(((widelimb) 1) << 8);
472	static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
473	(((widelimb) 1) << 48) - (((widelimb) 1) << 8);
474
475	/* Add 0 mod 2^224-2^96+1 to ensure out > in */
476	out[0] += two64p8;
477	out[1] += two64m48m8;
478	out[2] += two64m8;
479	out[3] += two64m8;
480
481	out[0] -= in[0];
482	out[1] -= in[1];
483	out[2] -= in[2];
484	out[3] -= in[3];
485	}
486
487	/*
488	* Multiply a field element by a scalar: out = out * scalar The scalars we
489	* actually use are small, so results fit without overflow
490	*/
491	static void felem_scalar(felem out, const limb scalar)
492	{
493	out[0] *= scalar;
494	out[1] *= scalar;
495	out[2] *= scalar;
496	out[3] *= scalar;
497	}
498
499	/*
500	* Multiply an unreduced field element by a scalar: out = out * scalar The
501	* scalars we actually use are small, so results fit without overflow
502	*/
503	static void widefelem_scalar(widefelem out, const widelimb scalar)
504	{
505	out[0] *= scalar;
506	out[1] *= scalar;
507	out[2] *= scalar;
508	out[3] *= scalar;
509	out[4] *= scalar;
510	out[5] *= scalar;
511	out[6] *= scalar;
512	}
513
514	/* Square a field element: out = in^2 */
515	static void felem_square(widefelem out, const felem in)
516	{
517	limb tmp0, tmp1, tmp2;
518	tmp0 = 2 * in[0];
519	tmp1 = 2 * in[1];
520	tmp2 = 2 * in[2];
521	out[0] = ((widelimb) in[0]) * in[0];
522	out[1] = ((widelimb) in[0]) * tmp1;
523	out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
524	out[3] = ((widelimb) in[3]) * tmp0 + ((widelimb) in[1]) * tmp2;
525	out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
526	out[5] = ((widelimb) in[3]) * tmp2;
527	out[6] = ((widelimb) in[3]) * in[3];
528	}
529
530	/* Multiply two field elements: out = in1 * in2 */
531	static void felem_mul(widefelem out, const felem in1, const felem in2)
532	{
533	out[0] = ((widelimb) in1[0]) * in2[0];
534	out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
535	out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
536	((widelimb) in1[2]) * in2[0];
537	out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
538	((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
539	out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
540	((widelimb) in1[3]) * in2[1];
541	out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
542	out[6] = ((widelimb) in1[3]) * in2[3];
543	}
544
545	/*-
546	* Reduce seven 128-bit coefficients to four 64-bit coefficients.
547	* Requires in[i] < 2^126,
548	* ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
549	static void felem_reduce(felem out, const widefelem in)
550	{
551	static const widelimb two127p15 = (((widelimb) 1) << 127) +
552	(((widelimb) 1) << 15);
553	static const widelimb two127m71 = (((widelimb) 1) << 127) -
554	(((widelimb) 1) << 71);
555	static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
556	(((widelimb) 1) << 71) - (((widelimb) 1) << 55);
557	widelimb output[5];
558
559	/* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
560	output[0] = in[0] + two127p15;
561	output[1] = in[1] + two127m71m55;
562	output[2] = in[2] + two127m71;
563	output[3] = in[3];
564	output[4] = in[4];
565
566	/* Eliminate in[4], in[5], in[6] */
567	output[4] += in[6] >> 16;
568	output[3] += (in[6] & 0xffff) << 40;
569	output[2] -= in[6];
570
571	output[3] += in[5] >> 16;
572	output[2] += (in[5] & 0xffff) << 40;
573	output[1] -= in[5];
574
575	output[2] += output[4] >> 16;
576	output[1] += (output[4] & 0xffff) << 40;
577	output[0] -= output[4];
578
579	/* Carry 2 -> 3 -> 4 */
580	output[3] += output[2] >> 56;
581	output[2] &= 0x00ffffffffffffff;
582
583	output[4] = output[3] >> 56;
584	output[3] &= 0x00ffffffffffffff;
585
586	/* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
587
588	/* Eliminate output[4] */
589	output[2] += output[4] >> 16;
590	/* output[2] < 2^56 + 2^56 = 2^57 */
591	output[1] += (output[4] & 0xffff) << 40;
592	output[0] -= output[4];
593
594	/* Carry 0 -> 1 -> 2 -> 3 */
595	output[1] += output[0] >> 56;
596	out[0] = output[0] & 0x00ffffffffffffff;
597
598	output[2] += output[1] >> 56;
599	/* output[2] < 2^57 + 2^72 */
600	out[1] = output[1] & 0x00ffffffffffffff;
601	output[3] += output[2] >> 56;
602	/* output[3] <= 2^56 + 2^16 */
603	out[2] = output[2] & 0x00ffffffffffffff;
604
605	/*-
606	* out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
607	* out[3] <= 2^56 + 2^16 (due to final carry),
608	* so out < 2*p
609	*/
610	out[3] = output[3];
611	}
612
613	static void felem_square_reduce(felem out, const felem in)
614	{
615	widefelem tmp;
616	felem_square(tmp, in);
617	felem_reduce(out, tmp);
618	}
619
620	static void felem_mul_reduce(felem out, const felem in1, const felem in2)
621	{
622	widefelem tmp;
623	felem_mul(tmp, in1, in2);
624	felem_reduce(out, tmp);
625	}
626
627	/*
628	* Reduce to unique minimal representation. Requires 0 <= in < 2*p (always
629	* call felem_reduce first)
630	*/
631	static void felem_contract(felem out, const felem in)
632	{
633	static const int64_t two56 = ((limb) 1) << 56;
634	/* 0 <= in < 2p, p = 2^224 - 2^96 + 1 /
635	/* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
636	int64_t tmp[4], a;
637	tmp[0] = in[0];
638	tmp[1] = in[1];
639	tmp[2] = in[2];
640	tmp[3] = in[3];
641	/* Case 1: a = 1 iff in >= 2^224 */
642	a = (in[3] >> 56);
643	tmp[0] -= a;
644	tmp[1] += a << 40;
645	tmp[3] &= 0x00ffffffffffffff;
646	/*
647	* Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1
648	* and the lower part is non-zero
649	*/
650	a = ((in[3] & in[2] & (in[1] \| 0x000000ffffffffff)) + 1) \|
651	(((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
652	a &= 0x00ffffffffffffff;
653	/* turn a into an all-one mask (if a = 0) or an all-zero mask */
654	a = (a - 1) >> 63;
655	/* subtract 2^224 - 2^96 + 1 if a is all-one */
656	tmp[3] &= a ^ 0xffffffffffffffff;
657	tmp[2] &= a ^ 0xffffffffffffffff;
658	tmp[1] &= (a ^ 0xffffffffffffffff) \| 0x000000ffffffffff;
659	tmp[0] -= 1 & a;
660
661	/*
662	* eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
663	* non-zero, so we only need one step
664	*/
665	a = tmp[0] >> 63;
666	tmp[0] += two56 & a;
667	tmp[1] -= 1 & a;
668
669	/* carry 1 -> 2 -> 3 */
670	tmp[2] += tmp[1] >> 56;
671	tmp[1] &= 0x00ffffffffffffff;
672
673	tmp[3] += tmp[2] >> 56;
674	tmp[2] &= 0x00ffffffffffffff;
675
676	/* Now 0 <= out < p */
677	out[0] = tmp[0];
678	out[1] = tmp[1];
679	out[2] = tmp[2];
680	out[3] = tmp[3];
681	}
682
683	/*
684	* Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field
685	* elements are reduced to in < 2^225, so we only need to check three cases:
686	* 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2
687	*/
688	static limb felem_is_zero(const felem in)
689	{
690	limb zero, two224m96p1, two225m97p2;
691
692	zero = in[0] \| in[1] \| in[2] \| in[3];
693	zero = (((int64_t) (zero) - 1) >> 63) & 1;
694	two224m96p1 = (in[0] ^ 1) \| (in[1] ^ 0x00ffff0000000000)
695	\| (in[2] ^ 0x00ffffffffffffff) \| (in[3] ^ 0x00ffffffffffffff);
696	two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1;
697	two225m97p2 = (in[0] ^ 2) \| (in[1] ^ 0x00fffe0000000000)
698	\| (in[2] ^ 0x00ffffffffffffff) \| (in[3] ^ 0x01ffffffffffffff);
699	two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1;
700	return (zero \| two224m96p1 \| two225m97p2);
701	}
702
703	static limb felem_is_zero_int(const felem in)
704	{
705	return (int)(felem_is_zero(in) & ((limb) 1));
706	}
707
708	/* Invert a field element */
709	/* Computation chain copied from djb's code */
710	static void felem_inv(felem out, const felem in)
711	{
712	felem ftmp, ftmp2, ftmp3, ftmp4;
713	widefelem tmp;
714	unsigned i;
715
716	felem_square(tmp, in);
717	felem_reduce(ftmp, tmp); /* 2 */
718	felem_mul(tmp, in, ftmp);
719	felem_reduce(ftmp, tmp); /* 2^2 - 1 */
720	felem_square(tmp, ftmp);
721	felem_reduce(ftmp, tmp); /* 2^3 - 2 */
722	felem_mul(tmp, in, ftmp);
723	felem_reduce(ftmp, tmp); /* 2^3 - 1 */
724	felem_square(tmp, ftmp);
725	felem_reduce(ftmp2, tmp); /* 2^4 - 2 */
726	felem_square(tmp, ftmp2);
727	felem_reduce(ftmp2, tmp); /* 2^5 - 4 */
728	felem_square(tmp, ftmp2);
729	felem_reduce(ftmp2, tmp); /* 2^6 - 8 */
730	felem_mul(tmp, ftmp2, ftmp);
731	felem_reduce(ftmp, tmp); /* 2^6 - 1 */
732	felem_square(tmp, ftmp);
733	felem_reduce(ftmp2, tmp); /* 2^7 - 2 */
734	for (i = 0; i < 5; ++i) { /* 2^12 - 2^6 */
735	felem_square(tmp, ftmp2);
736	felem_reduce(ftmp2, tmp);
737	}
738	felem_mul(tmp, ftmp2, ftmp);
739	felem_reduce(ftmp2, tmp); /* 2^12 - 1 */
740	felem_square(tmp, ftmp2);
741	felem_reduce(ftmp3, tmp); /* 2^13 - 2 */
742	for (i = 0; i < 11; ++i) { /* 2^24 - 2^12 */
743	felem_square(tmp, ftmp3);
744	felem_reduce(ftmp3, tmp);
745	}
746	felem_mul(tmp, ftmp3, ftmp2);
747	felem_reduce(ftmp2, tmp); /* 2^24 - 1 */
748	felem_square(tmp, ftmp2);
749	felem_reduce(ftmp3, tmp); /* 2^25 - 2 */
750	for (i = 0; i < 23; ++i) { /* 2^48 - 2^24 */
751	felem_square(tmp, ftmp3);
752	felem_reduce(ftmp3, tmp);
753	}
754	felem_mul(tmp, ftmp3, ftmp2);
755	felem_reduce(ftmp3, tmp); /* 2^48 - 1 */
756	felem_square(tmp, ftmp3);
757	felem_reduce(ftmp4, tmp); /* 2^49 - 2 */
758	for (i = 0; i < 47; ++i) { /* 2^96 - 2^48 */
759	felem_square(tmp, ftmp4);
760	felem_reduce(ftmp4, tmp);
761	}
762	felem_mul(tmp, ftmp3, ftmp4);
763	felem_reduce(ftmp3, tmp); /* 2^96 - 1 */
764	felem_square(tmp, ftmp3);
765	felem_reduce(ftmp4, tmp); /* 2^97 - 2 */
766	for (i = 0; i < 23; ++i) { /* 2^120 - 2^24 */
767	felem_square(tmp, ftmp4);
768	felem_reduce(ftmp4, tmp);
769	}
770	felem_mul(tmp, ftmp2, ftmp4);
771	felem_reduce(ftmp2, tmp); /* 2^120 - 1 */
772	for (i = 0; i < 6; ++i) { /* 2^126 - 2^6 */
773	felem_square(tmp, ftmp2);
774	felem_reduce(ftmp2, tmp);
775	}
776	felem_mul(tmp, ftmp2, ftmp);
777	felem_reduce(ftmp, tmp); /* 2^126 - 1 */
778	felem_square(tmp, ftmp);
779	felem_reduce(ftmp, tmp); /* 2^127 - 2 */
780	felem_mul(tmp, ftmp, in);
781	felem_reduce(ftmp, tmp); /* 2^127 - 1 */
782	for (i = 0; i < 97; ++i) { /* 2^224 - 2^97 */
783	felem_square(tmp, ftmp);
784	felem_reduce(ftmp, tmp);
785	}
786	felem_mul(tmp, ftmp, ftmp3);
787	felem_reduce(out, tmp); /* 2^224 - 2^96 - 1 */
788	}
789
790	/*
791	* Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy
792	* out to itself.
793	*/
794	static void copy_conditional(felem out, const felem in, limb icopy)
795	{
796	unsigned i;
797	/*
798	* icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one
799	*/
800	const limb copy = -icopy;
801	for (i = 0; i < 4; ++i) {
802	const limb tmp = copy & (in[i] ^ out[i]);
803	out[i] ^= tmp;
804	}
805	}
806
807	/******************************************************************************/
808	/*-
809	* ELLIPTIC CURVE POINT OPERATIONS
810	*
811	* Points are represented in Jacobian projective coordinates:
812	* (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
813	* or to the point at infinity if Z == 0.
814	*
815	*/
816
817	/*-
818	* Double an elliptic curve point:
819	* (X', Y', Z') = 2 * (X, Y, Z), where
820	* X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
821	* Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^2
822	* Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
823	* Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
824	* while x_out == y_in is not (maybe this works, but it's not tested).
825	*/
826	static void
827	point_double(felem x_out, felem y_out, felem z_out,
828	const felem x_in, const felem y_in, const felem z_in)
829	{
830	widefelem tmp, tmp2;
831	felem delta, gamma, beta, alpha, ftmp, ftmp2;
832
833	felem_assign(ftmp, x_in);
834	felem_assign(ftmp2, x_in);
835
836	/* delta = z^2 */
837	felem_square(tmp, z_in);
838	felem_reduce(delta, tmp);
839
840	/* gamma = y^2 */
841	felem_square(tmp, y_in);
842	felem_reduce(gamma, tmp);
843
844	/* beta = xgamma /
845	felem_mul(tmp, x_in, gamma);
846	felem_reduce(beta, tmp);
847
848	/* alpha = 3(x-delta)(x+delta) */
849	felem_diff(ftmp, delta);
850	/* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
851	felem_sum(ftmp2, delta);
852	/* ftmp2[i] < 2^57 + 2^57 = 2^58 */
853	felem_scalar(ftmp2, 3);
854	/* ftmp2[i] < 3 * 2^58 < 2^60 */
855	felem_mul(tmp, ftmp, ftmp2);
856	/* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
857	felem_reduce(alpha, tmp);
858
859	/* x' = alpha^2 - 8beta /
860	felem_square(tmp, alpha);
861	/* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
862	felem_assign(ftmp, beta);
863	felem_scalar(ftmp, 8);
864	/* ftmp[i] < 8 * 2^57 = 2^60 */
865	felem_diff_128_64(tmp, ftmp);
866	/* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
867	felem_reduce(x_out, tmp);
868
869	/* z' = (y + z)^2 - gamma - delta */
870	felem_sum(delta, gamma);
871	/* delta[i] < 2^57 + 2^57 = 2^58 */
872	felem_assign(ftmp, y_in);
873	felem_sum(ftmp, z_in);
874	/* ftmp[i] < 2^57 + 2^57 = 2^58 */
875	felem_square(tmp, ftmp);
876	/* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
877	felem_diff_128_64(tmp, delta);
878	/* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
879	felem_reduce(z_out, tmp);
880
881	/* y' = alpha(4beta - x') - 8gamma^2 /
882	felem_scalar(beta, 4);
883	/* beta[i] < 4 * 2^57 = 2^59 */
884	felem_diff(beta, x_out);
885	/* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
886	felem_mul(tmp, alpha, beta);
887	/* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
888	felem_square(tmp2, gamma);
889	/* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
890	widefelem_scalar(tmp2, 8);
891	/* tmp2[i] < 8 * 2^116 = 2^119 */
892	widefelem_diff(tmp, tmp2);
893	/* tmp[i] < 2^119 + 2^120 < 2^121 */
894	felem_reduce(y_out, tmp);
895	}
896
897	/*-
898	* Add two elliptic curve points:
899	* (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
900	* X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
901	* 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
902	* Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
903	* Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
904	* Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
905	*
906	* This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
907	*/
908
909	/*
910	* This function is not entirely constant-time: it includes a branch for
911	* checking whether the two input points are equal, (while not equal to the
912	* point at infinity). This case never happens during single point
913	* multiplication, so there is no timing leak for ECDH or ECDSA signing.
914	*/
915	static void point_add(felem x3, felem y3, felem z3,
916	const felem x1, const felem y1, const felem z1,
917	const int mixed, const felem x2, const felem y2,
918	const felem z2)
919	{
920	felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
921	widefelem tmp, tmp2;
922	limb z1_is_zero, z2_is_zero, x_equal, y_equal;
923
924	if (!mixed) {
925	/* ftmp2 = z2^2 */
926	felem_square(tmp, z2);
927	felem_reduce(ftmp2, tmp);
928
929	/* ftmp4 = z2^3 */
930	felem_mul(tmp, ftmp2, z2);
931	felem_reduce(ftmp4, tmp);
932
933	/* ftmp4 = z2^3y1 /
934	felem_mul(tmp2, ftmp4, y1);
935	felem_reduce(ftmp4, tmp2);
936
937	/* ftmp2 = z2^2x1 /
938	felem_mul(tmp2, ftmp2, x1);
939	felem_reduce(ftmp2, tmp2);
940	} else {
941	/*
942	* We'll assume z2 = 1 (special case z2 = 0 is handled later)
943	*/
944
945	/* ftmp4 = z2^3y1 /
946	felem_assign(ftmp4, y1);
947
948	/* ftmp2 = z2^2x1 /
949	felem_assign(ftmp2, x1);
950	}
951
952	/* ftmp = z1^2 */
953	felem_square(tmp, z1);
954	felem_reduce(ftmp, tmp);
955
956	/* ftmp3 = z1^3 */
957	felem_mul(tmp, ftmp, z1);
958	felem_reduce(ftmp3, tmp);
959
960	/* tmp = z1^3y2 /
961	felem_mul(tmp, ftmp3, y2);
962	/* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
963
964	/* ftmp3 = z1^3y2 - z2^3y1 */
965	felem_diff_128_64(tmp, ftmp4);
966	/* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
967	felem_reduce(ftmp3, tmp);
968
969	/* tmp = z1^2x2 /
970	felem_mul(tmp, ftmp, x2);
971	/* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
972
973	/* ftmp = z1^2x2 - z2^2x1 */
974	felem_diff_128_64(tmp, ftmp2);
975	/* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
976	felem_reduce(ftmp, tmp);
977
978	/*
979	* the formulae are incorrect if the points are equal so we check for
980	* this and do doubling if this happens
981	*/
982	x_equal = felem_is_zero(ftmp);
983	y_equal = felem_is_zero(ftmp3);
984	z1_is_zero = felem_is_zero(z1);
985	z2_is_zero = felem_is_zero(z2);
986	/* In affine coordinates, (X_1, Y_1) == (X_2, Y_2) */
987	if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) {
988	point_double(x3, y3, z3, x1, y1, z1);
989	return;
990	}
991
992	/* ftmp5 = z1z2 /
993	if (!mixed) {
994	felem_mul(tmp, z1, z2);
995	felem_reduce(ftmp5, tmp);
996	} else {
997	/* special case z2 = 0 is handled later */
998	felem_assign(ftmp5, z1);
999	}
1000
1001	/* z_out = (z1^2x2 - z2^2x1)(z1z2) */
1002	felem_mul(tmp, ftmp, ftmp5);
1003	felem_reduce(z_out, tmp);
1004
1005	/* ftmp = (z1^2x2 - z2^2x1)^2 */
1006	felem_assign(ftmp5, ftmp);
1007	felem_square(tmp, ftmp);
1008	felem_reduce(ftmp, tmp);
1009
1010	/* ftmp5 = (z1^2x2 - z2^2x1)^3 */
1011	felem_mul(tmp, ftmp, ftmp5);
1012	felem_reduce(ftmp5, tmp);
1013
1014	/* ftmp2 = z2^2x1(z1^2x2 - z2^2x1)^2 */
1015	felem_mul(tmp, ftmp2, ftmp);
1016	felem_reduce(ftmp2, tmp);
1017
1018	/* tmp = z2^3y1(z1^2x2 - z2^2x1)^3 */
1019	felem_mul(tmp, ftmp4, ftmp5);
1020	/* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
1021
1022	/* tmp2 = (z1^3y2 - z2^3y1)^2 */
1023	felem_square(tmp2, ftmp3);
1024	/* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
1025
1026	/* tmp2 = (z1^3y2 - z2^3y1)^2 - (z1^2x2 - z2^2x1)^3 */
1027	felem_diff_128_64(tmp2, ftmp5);
1028	/* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
1029
1030	/* ftmp5 = 2z2^2x1(z1^2x2 - z2^2x1)^2 /
1031	felem_assign(ftmp5, ftmp2);
1032	felem_scalar(ftmp5, 2);
1033	/* ftmp5[i] < 2 * 2^57 = 2^58 */
1034
1035	/*-
1036	* x_out = (z1^3y2 - z2^3y1)^2 - (z1^2x2 - z2^2x1)^3 -
1037	* 2z2^2x1(z1^2x2 - z2^2*x1)^2
1038	*/
1039	felem_diff_128_64(tmp2, ftmp5);
1040	/* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
1041	felem_reduce(x_out, tmp2);
1042
1043	/* ftmp2 = z2^2x1(z1^2x2 - z2^2x1)^2 - x_out */
1044	felem_diff(ftmp2, x_out);
1045	/* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
1046
1047	/*
1048	* tmp2 = (z1^3y2 - z2^3y1)(z2^2x1(z1^2x2 - z2^2*x1)^2 - x_out)
1049	*/
1050	felem_mul(tmp2, ftmp3, ftmp2);
1051	/* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
1052
1053	/*-
1054	* y_out = (z1^3y2 - z2^3y1)(z2^2x1(z1^2x2 - z2^2*x1)^2 - x_out) -
1055	* z2^3y1(z1^2x2 - z2^2x1)^3
1056	*/
1057	widefelem_diff(tmp2, tmp);
1058	/* tmp2[i] < 2^118 + 2^120 < 2^121 */
1059	felem_reduce(y_out, tmp2);
1060
1061	/*
1062	* the result (x_out, y_out, z_out) is incorrect if one of the inputs is
1063	* the point at infinity, so we need to check for this separately
1064	*/
1065
1066	/*
1067	* if point 1 is at infinity, copy point 2 to output, and vice versa
1068	*/
1069	copy_conditional(x_out, x2, z1_is_zero);
1070	copy_conditional(x_out, x1, z2_is_zero);
1071	copy_conditional(y_out, y2, z1_is_zero);
1072	copy_conditional(y_out, y1, z2_is_zero);
1073	copy_conditional(z_out, z2, z1_is_zero);
1074	copy_conditional(z_out, z1, z2_is_zero);
1075	felem_assign(x3, x_out);
1076	felem_assign(y3, y_out);
1077	felem_assign(z3, z_out);
1078	}
1079
1080	/*
1081	* select_point selects the \|idx\|th point from a precomputation table and
1082	* copies it to out.
1083	* The pre_comp array argument should be size of \|size\| argument
1084	*/
1085	static void select_point(const u64 idx, unsigned int size,
1086	const felem pre_comp[][3], felem out[3])
1087	{
1088	unsigned i, j;
1089	limb *outlimbs = &out[0][0];
1090
1091	memset(out, 0, sizeof(out) 3);
1092	for (i = 0; i < size; i++) {
1093	const limb *inlimbs = &pre_comp[i][0][0];
1094	u64 mask = i ^ idx;
1095	mask \|= mask >> 4;
1096	mask \|= mask >> 2;
1097	mask \|= mask >> 1;
1098	mask &= 1;
1099	mask--;
1100	for (j = 0; j < 4 * 3; j++)
1101	outlimbs[j] \|= inlimbs[j] & mask;
1102	}
1103	}
1104
1105	/* get_bit returns the \|i\|th bit in \|in\| */
1106	static char get_bit(const felem_bytearray in, unsigned i)
1107	{
1108	if (i >= 224)
1109	return 0;
1110	return (in[i >> 3] >> (i & 7)) & 1;
1111	}
1112
1113	/*
1114	* Interleaved point multiplication using precomputed point multiples: The
1115	* small point multiples 0P, 1P, ..., 16*P are in pre_comp[], the scalars
1116	* in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1117	* generator, using certain (large) precomputed multiples in g_pre_comp.
1118	* Output point (X, Y, Z) is stored in x_out, y_out, z_out
1119	*/
1120	static void batch_mul(felem x_out, felem y_out, felem z_out,
1121	const felem_bytearray scalars[],
1122	const unsigned num_points, const u8 *g_scalar,
1123	const int mixed, const felem pre_comp[][17][3],
1124	const felem g_pre_comp[2][16][3])
1125	{
1126	int i, skip;
1127	unsigned num;
1128	unsigned gen_mul = (g_scalar != NULL);
1129	felem nq[3], tmp[4];
1130	u64 bits;
1131	u8 sign, digit;
1132
1133	/* set nq to the point at infinity */
1134	memset(nq, 0, sizeof(nq));
1135
1136	/*
1137	* Loop over all scalars msb-to-lsb, interleaving additions of multiples
1138	* of the generator (two in each of the last 28 rounds) and additions of
1139	* other points multiples (every 5th round).
1140	*/
1141	skip = 1; /* save two point operations in the first
1142	* round */
1143	for (i = (num_points ? 220 : 27); i >= 0; --i) {
1144	/* double */
1145	if (!skip)
1146	point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1147
1148	/* add multiples of the generator */
1149	if (gen_mul && (i <= 27)) {
1150	/* first, look 28 bits upwards */
1151	bits = get_bit(g_scalar, i + 196) << 3;
1152	bits \|= get_bit(g_scalar, i + 140) << 2;
1153	bits \|= get_bit(g_scalar, i + 84) << 1;
1154	bits \|= get_bit(g_scalar, i + 28);
1155	/* select the point to add, in constant time */
1156	select_point(bits, 16, g_pre_comp[1], tmp);
1157
1158	if (!skip) {
1159	/* value 1 below is argument for "mixed" */
1160	point_add(nq[0], nq[1], nq[2],
1161	nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1162	} else {
1163	memcpy(nq, tmp, 3 * sizeof(felem));
1164	skip = 0;
1165	}
1166
1167	/* second, look at the current position */
1168	bits = get_bit(g_scalar, i + 168) << 3;
1169	bits \|= get_bit(g_scalar, i + 112) << 2;
1170	bits \|= get_bit(g_scalar, i + 56) << 1;
1171	bits \|= get_bit(g_scalar, i);
1172	/* select the point to add, in constant time */
1173	select_point(bits, 16, g_pre_comp[0], tmp);
1174	point_add(nq[0], nq[1], nq[2],
1175	nq[0], nq[1], nq[2],
1176	1 /* mixed */ , tmp[0], tmp[1], tmp[2]);
1177	}
1178
1179	/* do other additions every 5 doublings */
1180	if (num_points && (i % 5 == 0)) {
1181	/* loop over all scalars */
1182	for (num = 0; num < num_points; ++num) {
1183	bits = get_bit(scalars[num], i + 4) << 5;
1184	bits \|= get_bit(scalars[num], i + 3) << 4;
1185	bits \|= get_bit(scalars[num], i + 2) << 3;
1186	bits \|= get_bit(scalars[num], i + 1) << 2;
1187	bits \|= get_bit(scalars[num], i) << 1;
1188	bits \|= get_bit(scalars[num], i - 1);
1189	ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1190
1191	/* select the point to add or subtract */
1192	select_point(digit, 17, pre_comp[num], tmp);
1193	felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1194	* point */
1195	copy_conditional(tmp[1], tmp[3], sign);
1196
1197	if (!skip) {
1198	point_add(nq[0], nq[1], nq[2],
1199	nq[0], nq[1], nq[2],
1200	mixed, tmp[0], tmp[1], tmp[2]);
1201	} else {
1202	memcpy(nq, tmp, 3 * sizeof(felem));
1203	skip = 0;
1204	}
1205	}
1206	}
1207	}
1208	felem_assign(x_out, nq[0]);
1209	felem_assign(y_out, nq[1]);
1210	felem_assign(z_out, nq[2]);
1211	}
1212
1213	/******************************************************************************/
1214	/*
1215	* FUNCTIONS TO MANAGE PRECOMPUTATION
1216	*/
1217
1218	static NISTP224_PRE_COMP *nistp224_pre_comp_new()
1219	{
1220	NISTP224_PRE_COMP ret = OPENSSL_zalloc(sizeof(ret));
1221
1222	if (!ret) {
1223	ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1224	return ret;
1225	}
1226
1227	ret->references = 1;
1228
1229	ret->lock = CRYPTO_THREAD_lock_new();
1230	if (ret->lock == NULL) {
1231	ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1232	OPENSSL_free(ret);
1233	return NULL;
1234	}
1235	return ret;
1236	}
1237
1238	NISTP224_PRE_COMP EC_nistp224_pre_comp_dup(NISTP224_PRE_COMP p)
1239	{
1240	int i;
1241	if (p != NULL)
1242	CRYPTO_atomic_add(&p->references, 1, &i, p->lock);
1243	return p;
1244	}
1245
1246	void EC_nistp224_pre_comp_free(NISTP224_PRE_COMP *p)
1247	{
1248	int i;
1249
1250	if (p == NULL)
1251	return;
1252
1253	CRYPTO_atomic_add(&p->references, -1, &i, p->lock);
1254	REF_PRINT_COUNT("EC_nistp224", x);
1255	if (i > 0)
1256	return;
1257	REF_ASSERT_ISNT(i < 0);
1258
1259	CRYPTO_THREAD_lock_free(p->lock);
1260	OPENSSL_free(p);
1261	}
1262
1263	/******************************************************************************/
1264	/*
1265	* OPENSSL EC_METHOD FUNCTIONS
1266	*/
1267
1268	int ec_GFp_nistp224_group_init(EC_GROUP *group)
1269	{
1270	int ret;
1271	ret = ec_GFp_simple_group_init(group);
1272	group->a_is_minus3 = 1;
1273	return ret;
1274	}
1275
1276	int ec_GFp_nistp224_group_set_curve(EC_GROUP group, const BIGNUM p,
1277	const BIGNUM a, const BIGNUM b,
1278	BN_CTX *ctx)
1279	{
1280	int ret = 0;
1281	BN_CTX *new_ctx = NULL;
1282	BIGNUM curve_p, curve_a, *curve_b;
1283
1284	if (ctx == NULL)
1285	if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1286	return 0;
1287	BN_CTX_start(ctx);
1288	if (((curve_p = BN_CTX_get(ctx)) == NULL) \|\|
1289	((curve_a = BN_CTX_get(ctx)) == NULL) \|\|
1290	((curve_b = BN_CTX_get(ctx)) == NULL))
1291	goto err;
1292	BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1293	BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1294	BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1295	if ((BN_cmp(curve_p, p)) \|\| (BN_cmp(curve_a, a)) \|\| (BN_cmp(curve_b, b))) {
1296	ECerr(EC_F_EC_GFP_NISTP224_GROUP_SET_CURVE,
1297	EC_R_WRONG_CURVE_PARAMETERS);
1298	goto err;
1299	}
1300	group->field_mod_func = BN_nist_mod_224;
1301	ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1302	err:
1303	BN_CTX_end(ctx);
1304	BN_CTX_free(new_ctx);
1305	return ret;
1306	}
1307
1308	/*
1309	* Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1310	* (X/Z^2, Y/Z^3)
1311	*/
1312	int ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
1313	const EC_POINT *point,
1314	BIGNUM x, BIGNUM y,
1315	BN_CTX *ctx)
1316	{
1317	felem z1, z2, x_in, y_in, x_out, y_out;
1318	widefelem tmp;
1319
1320	if (EC_POINT_is_at_infinity(group, point)) {
1321	ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1322	EC_R_POINT_AT_INFINITY);
1323	return 0;
1324	}
1325	if ((!BN_to_felem(x_in, point->X)) \|\| (!BN_to_felem(y_in, point->Y)) \|\|
1326	(!BN_to_felem(z1, point->Z)))
1327	return 0;
1328	felem_inv(z2, z1);
1329	felem_square(tmp, z2);
1330	felem_reduce(z1, tmp);
1331	felem_mul(tmp, x_in, z1);
1332	felem_reduce(x_in, tmp);
1333	felem_contract(x_out, x_in);
1334	if (x != NULL) {
1335	if (!felem_to_BN(x, x_out)) {
1336	ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1337	ERR_R_BN_LIB);
1338	return 0;
1339	}
1340	}
1341	felem_mul(tmp, z1, z2);
1342	felem_reduce(z1, tmp);
1343	felem_mul(tmp, y_in, z1);
1344	felem_reduce(y_in, tmp);
1345	felem_contract(y_out, y_in);
1346	if (y != NULL) {
1347	if (!felem_to_BN(y, y_out)) {
1348	ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1349	ERR_R_BN_LIB);
1350	return 0;
1351	}
1352	}
1353	return 1;
1354	}
1355
1356	static void make_points_affine(size_t num, felem points[ /* num */ ][3],
1357	felem tmp_felems[ /* num+1 */ ])
1358	{
1359	/*
1360	* Runs in constant time, unless an input is the point at infinity (which
1361	* normally shouldn't happen).
1362	*/
1363	ec_GFp_nistp_points_make_affine_internal(num,
1364	points,
1365	sizeof(felem),
1366	tmp_felems,
1367	(void ()(void ))felem_one,
1368	(int ()(const void ))
1369	felem_is_zero_int,
1370	(void ()(void , const void *))
1371	felem_assign,
1372	(void ()(void , const void *))
1373	felem_square_reduce, (void (*)
1374	(void *,
1375	const void
1376	*,
1377	const void
1378	*))
1379	felem_mul_reduce,
1380	(void ()(void , const void *))
1381	felem_inv,
1382	(void ()(void , const void *))
1383	felem_contract);
1384	}
1385
1386	/*
1387	* Computes scalargenerator + \sum scalars[i]points[i], ignoring NULL
1388	* values Result is stored in r (r can equal one of the inputs).
1389	*/
1390	int ec_GFp_nistp224_points_mul(const EC_GROUP group, EC_POINT r,
1391	const BIGNUM *scalar, size_t num,
1392	const EC_POINT *points[],
1393	const BIGNUM scalars[], BN_CTX ctx)
1394	{
1395	int ret = 0;
1396	int j;
1397	unsigned i;
1398	int mixed = 0;
1399	BN_CTX *new_ctx = NULL;
1400	BIGNUM x, y, z, tmp_scalar;
1401	felem_bytearray g_secret;
1402	felem_bytearray *secrets = NULL;
1403	felem (*pre_comp)[17][3] = NULL;
1404	felem *tmp_felems = NULL;
1405	felem_bytearray tmp;
1406	unsigned num_bytes;
1407	int have_pre_comp = 0;
1408	size_t num_points = num;
1409	felem x_in, y_in, z_in, x_out, y_out, z_out;
1410	NISTP224_PRE_COMP *pre = NULL;
1411	const felem(*g_pre_comp)[16][3] = NULL;
1412	EC_POINT *generator = NULL;
1413	const EC_POINT *p = NULL;
1414	const BIGNUM *p_scalar = NULL;
1415
1416	if (ctx == NULL)
1417	if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1418	return 0;
1419	BN_CTX_start(ctx);
1420	if (((x = BN_CTX_get(ctx)) == NULL) \|\|
1421	((y = BN_CTX_get(ctx)) == NULL) \|\|
1422	((z = BN_CTX_get(ctx)) == NULL) \|\|
1423	((tmp_scalar = BN_CTX_get(ctx)) == NULL))
1424	goto err;
1425
1426	if (scalar != NULL) {
1427	pre = group->pre_comp.nistp224;
1428	if (pre)
1429	/* we have precomputation, try to use it */
1430	g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp;
1431	else
1432	/* try to use the standard precomputation */
1433	g_pre_comp = &gmul[0];
1434	generator = EC_POINT_new(group);
1435	if (generator == NULL)
1436	goto err;
1437	/* get the generator from precomputation */
1438	if (!felem_to_BN(x, g_pre_comp[0][1][0]) \|\|
1439	!felem_to_BN(y, g_pre_comp[0][1][1]) \|\|
1440	!felem_to_BN(z, g_pre_comp[0][1][2])) {
1441	ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1442	goto err;
1443	}
1444	if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1445	generator, x, y, z,
1446	ctx))
1447	goto err;
1448	if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1449	/* precomputation matches generator */
1450	have_pre_comp = 1;
1451	else
1452	/*
1453	* we don't have valid precomputation: treat the generator as a
1454	* random point
1455	*/
1456	num_points = num_points + 1;
1457	}
1458
1459	if (num_points > 0) {
1460	if (num_points >= 3) {
1461	/*
1462	* unless we precompute multiples for just one or two points,
1463	* converting those into affine form is time well spent
1464	*/
1465	mixed = 1;
1466	}
1467	secrets = OPENSSL_zalloc(sizeof(secrets) num_points);
1468	pre_comp = OPENSSL_zalloc(sizeof(pre_comp) num_points);
1469	if (mixed)
1470	tmp_felems =
1471	OPENSSL_malloc(sizeof(felem) * (num_points * 17 + 1));
1472	if ((secrets == NULL) \|\| (pre_comp == NULL)
1473	\|\| (mixed && (tmp_felems == NULL))) {
1474	ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1475	goto err;
1476	}
1477
1478	/*
1479	* we treat NULL scalars as 0, and NULL points as points at infinity,
1480	* i.e., they contribute nothing to the linear combination
1481	*/
1482	for (i = 0; i < num_points; ++i) {
1483	if (i == num)
1484	/* the generator */
1485	{
1486	p = EC_GROUP_get0_generator(group);
1487	p_scalar = scalar;
1488	} else
1489	/* the i^th point */
1490	{
1491	p = points[i];
1492	p_scalar = scalars[i];
1493	}
1494	if ((p_scalar != NULL) && (p != NULL)) {
1495	/* reduce scalar to 0 <= scalar < 2^224 */
1496	if ((BN_num_bits(p_scalar) > 224)
1497	\|\| (BN_is_negative(p_scalar))) {
1498	/*
1499	* this is an unusual input, and we don't guarantee
1500	* constant-timeness
1501	*/
1502	if (!BN_nnmod(tmp_scalar, p_scalar, group->order, ctx)) {
1503	ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1504	goto err;
1505	}
1506	num_bytes = BN_bn2bin(tmp_scalar, tmp);
1507	} else
1508	num_bytes = BN_bn2bin(p_scalar, tmp);
1509	flip_endian(secrets[i], tmp, num_bytes);
1510	/* precompute multiples */
1511	if ((!BN_to_felem(x_out, p->X)) \|\|
1512	(!BN_to_felem(y_out, p->Y)) \|\|
1513	(!BN_to_felem(z_out, p->Z)))
1514	goto err;
1515	felem_assign(pre_comp[i][1][0], x_out);
1516	felem_assign(pre_comp[i][1][1], y_out);
1517	felem_assign(pre_comp[i][1][2], z_out);
1518	for (j = 2; j <= 16; ++j) {
1519	if (j & 1) {
1520	point_add(pre_comp[i][j][0], pre_comp[i][j][1],
1521	pre_comp[i][j][2], pre_comp[i][1][0],
1522	pre_comp[i][1][1], pre_comp[i][1][2], 0,
1523	pre_comp[i][j - 1][0],
1524	pre_comp[i][j - 1][1],
1525	pre_comp[i][j - 1][2]);
1526	} else {
1527	point_double(pre_comp[i][j][0], pre_comp[i][j][1],
1528	pre_comp[i][j][2], pre_comp[i][j / 2][0],
1529	pre_comp[i][j / 2][1],
1530	pre_comp[i][j / 2][2]);
1531	}
1532	}
1533	}
1534	}
1535	if (mixed)
1536	make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1537	}
1538
1539	/* the scalar for the generator */
1540	if ((scalar != NULL) && (have_pre_comp)) {
1541	memset(g_secret, 0, sizeof(g_secret));
1542	/* reduce scalar to 0 <= scalar < 2^224 */
1543	if ((BN_num_bits(scalar) > 224) \|\| (BN_is_negative(scalar))) {
1544	/*
1545	* this is an unusual input, and we don't guarantee
1546	* constant-timeness
1547	*/
1548	if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) {
1549	ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1550	goto err;
1551	}
1552	num_bytes = BN_bn2bin(tmp_scalar, tmp);
1553	} else
1554	num_bytes = BN_bn2bin(scalar, tmp);
1555	flip_endian(g_secret, tmp, num_bytes);
1556	/* do the multiplication with generator precomputation */
1557	batch_mul(x_out, y_out, z_out,
1558	(const felem_bytearray(*))secrets, num_points,
1559	g_secret,
1560	mixed, (const felem(*)[17][3])pre_comp, g_pre_comp);
1561	} else
1562	/* do the multiplication without generator precomputation */
1563	batch_mul(x_out, y_out, z_out,
1564	(const felem_bytearray(*))secrets, num_points,
1565	NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
1566	/* reduce the output to its unique minimal representation */
1567	felem_contract(x_in, x_out);
1568	felem_contract(y_in, y_out);
1569	felem_contract(z_in, z_out);
1570	if ((!felem_to_BN(x, x_in)) \|\| (!felem_to_BN(y, y_in)) \|\|
1571	(!felem_to_BN(z, z_in))) {
1572	ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1573	goto err;
1574	}
1575	ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
1576
1577	err:
1578	BN_CTX_end(ctx);
1579	EC_POINT_free(generator);
1580	BN_CTX_free(new_ctx);
1581	OPENSSL_free(secrets);
1582	OPENSSL_free(pre_comp);
1583	OPENSSL_free(tmp_felems);
1584	return ret;
1585	}
1586
1587	int ec_GFp_nistp224_precompute_mult(EC_GROUP group, BN_CTX ctx)
1588	{
1589	int ret = 0;
1590	NISTP224_PRE_COMP *pre = NULL;
1591	int i, j;
1592	BN_CTX *new_ctx = NULL;
1593	BIGNUM x, y;
1594	EC_POINT *generator = NULL;
1595	felem tmp_felems[32];
1596
1597	/* throw away old precomputation */
1598	EC_pre_comp_free(group);
1599	if (ctx == NULL)
1600	if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1601	return 0;
1602	BN_CTX_start(ctx);
1603	if (((x = BN_CTX_get(ctx)) == NULL) \|\| ((y = BN_CTX_get(ctx)) == NULL))
1604	goto err;
1605	/* get the generator */
1606	if (group->generator == NULL)
1607	goto err;
1608	generator = EC_POINT_new(group);
1609	if (generator == NULL)
1610	goto err;
1611	BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x);
1612	BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y);
1613	if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
1614	goto err;
1615	if ((pre = nistp224_pre_comp_new()) == NULL)
1616	goto err;
1617	/*
1618	* if the generator is the standard one, use built-in precomputation
1619	*/
1620	if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
1621	memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1622	goto done;
1623	}
1624	if ((!BN_to_felem(pre->g_pre_comp[0][1][0], group->generator->X)) \|\|
1625	(!BN_to_felem(pre->g_pre_comp[0][1][1], group->generator->Y)) \|\|
1626	(!BN_to_felem(pre->g_pre_comp[0][1][2], group->generator->Z)))
1627	goto err;
1628	/*
1629	* compute 2^56G, 2^112G, 2^168G for the first table, 2^28G, 2^84*G,
1630	* 2^140G, 2^196G for the second one
1631	*/
1632	for (i = 1; i <= 8; i <<= 1) {
1633	point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1634	pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
1635	pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1636	for (j = 0; j < 27; ++j) {
1637	point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1638	pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0],
1639	pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1640	}
1641	if (i == 8)
1642	break;
1643	point_double(pre->g_pre_comp[0][2 * i][0],
1644	pre->g_pre_comp[0][2 * i][1],
1645	pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0],
1646	pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1647	for (j = 0; j < 27; ++j) {
1648	point_double(pre->g_pre_comp[0][2 * i][0],
1649	pre->g_pre_comp[0][2 * i][1],
1650	pre->g_pre_comp[0][2 * i][2],
1651	pre->g_pre_comp[0][2 * i][0],
1652	pre->g_pre_comp[0][2 * i][1],
1653	pre->g_pre_comp[0][2 * i][2]);
1654	}
1655	}
1656	for (i = 0; i < 2; i++) {
1657	/* g_pre_comp[i][0] is the point at infinity */
1658	memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1659	/* the remaining multiples */
1660	/* 2^56G + 2^112G resp. 2^84G + 2^140G */
1661	point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1662	pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1663	pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1664	0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1665	pre->g_pre_comp[i][2][2]);
1666	/* 2^56G + 2^168G resp. 2^84G + 2^196G */
1667	point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1668	pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1669	pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1670	0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1671	pre->g_pre_comp[i][2][2]);
1672	/* 2^112G + 2^168G resp. 2^140G + 2^196G */
1673	point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1674	pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1675	pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1676	0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1677	pre->g_pre_comp[i][4][2]);
1678	/*
1679	* 2^56G + 2^112G + 2^168G resp. 2^84G + 2^140G + 2^196G
1680	*/
1681	point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1682	pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1683	pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1684	0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1685	pre->g_pre_comp[i][2][2]);
1686	for (j = 1; j < 8; ++j) {
1687	/* odd multiples: add G resp. 2^28G /
1688	point_add(pre->g_pre_comp[i][2 * j + 1][0],
1689	pre->g_pre_comp[i][2 * j + 1][1],
1690	pre->g_pre_comp[i][2 * j + 1][2],
1691	pre->g_pre_comp[i][2 * j][0],
1692	pre->g_pre_comp[i][2 * j][1],
1693	pre->g_pre_comp[i][2 * j][2], 0,
1694	pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1695	pre->g_pre_comp[i][1][2]);
1696	}
1697	}
1698	make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1699
1700	done:
1701	SETPRECOMP(group, nistp224, pre);
1702	pre = NULL;
1703	ret = 1;
1704	err:
1705	BN_CTX_end(ctx);
1706	EC_POINT_free(generator);
1707	BN_CTX_free(new_ctx);
1708	EC_nistp224_pre_comp_free(pre);
1709	return ret;
1710	}
1711
1712	int ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
1713	{
1714	return HAVEPRECOMP(group, nistp224);
1715	}
1716
1717	#endif

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: EcnlProtoTool/trunk/openssl-1.1.0e/crypto/ec/ecp_nistp224.c@ 331

Download in other formats: