Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

bn_exp.c@ 331

Last change on this file since 331 was 331, checked in by coas-nagasima, 6 years ago
prototoolに関連するプロジェクトをnewlibからmuslを使うよう変更・更新 ntshellをnewlibの下位の実装から、muslのsyscallの実装に変更・更新以下のOSSをアップデート・mruby-1.3.0 ・musl-1.1.18 ・onigmo-6.1.3 ・tcc-0.9.27 以下のOSSを追加・openssl-1.1.0e ・curl-7.57.0 ・zlib-1.2.11 以下のmrbgemsを追加・iij/mruby-digest ・iij/mruby-env ・iij/mruby-errno ・iij/mruby-iijson ・iij/mruby-ipaddr ・iij/mruby-mock ・iij/mruby-require ・iij/mruby-tls-openssl
Property svn:eol-style set to `native` Property svn:mime-type set to `text/x-csrc`
File size: 42.6 KB

Line
1	/*
2	* Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved.
3	*
4	* Licensed under the OpenSSL license (the "License"). You may not use
5	* this file except in compliance with the License. You can obtain a copy
6	* in the file LICENSE in the source distribution or at
7	* https://www.openssl.org/source/license.html
8	*/
9
10	#include "internal/cryptlib.h"
11	#include "internal/constant_time_locl.h"
12	#include "bn_lcl.h"
13
14	#include <stdlib.h>
15	#ifdef _WIN32
16	# include <malloc.h>
17	# ifndef alloca
18	# define alloca _alloca
19	# endif
20	#elif defined(__GNUC__)
21	# ifndef alloca
22	# define alloca(s) __builtin_alloca((s))
23	# endif
24	#elif defined(__sun)
25	# include <alloca.h>
26	#endif
27
28	#include "rsaz_exp.h"
29
30	#undef SPARC_T4_MONT
31	#if defined(OPENSSL_BN_ASM_MONT) && (defined(__sparc__) \|\| defined(__sparc))
32	# include "sparc_arch.h"
33	extern unsigned int OPENSSL_sparcv9cap_P[];
34	# define SPARC_T4_MONT
35	#endif
36
37	/* maximum precomputation table size for variable sliding windows */
38	#define TABLE_SIZE 32
39
40	/* this one works - simple but works */
41	int BN_exp(BIGNUM r, const BIGNUM a, const BIGNUM p, BN_CTX ctx)
42	{
43	int i, bits, ret = 0;
44	BIGNUM v, rr;
45
46	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
47	/* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
48	BNerr(BN_F_BN_EXP, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
49	return 0;
50	}
51
52	BN_CTX_start(ctx);
53	if ((r == a) \|\| (r == p))
54	rr = BN_CTX_get(ctx);
55	else
56	rr = r;
57	v = BN_CTX_get(ctx);
58	if (rr == NULL \|\| v == NULL)
59	goto err;
60
61	if (BN_copy(v, a) == NULL)
62	goto err;
63	bits = BN_num_bits(p);
64
65	if (BN_is_odd(p)) {
66	if (BN_copy(rr, a) == NULL)
67	goto err;
68	} else {
69	if (!BN_one(rr))
70	goto err;
71	}
72
73	for (i = 1; i < bits; i++) {
74	if (!BN_sqr(v, v, ctx))
75	goto err;
76	if (BN_is_bit_set(p, i)) {
77	if (!BN_mul(rr, rr, v, ctx))
78	goto err;
79	}
80	}
81	if (r != rr && BN_copy(r, rr) == NULL)
82	goto err;
83
84	ret = 1;
85	err:
86	BN_CTX_end(ctx);
87	bn_check_top(r);
88	return (ret);
89	}
90
91	int BN_mod_exp(BIGNUM r, const BIGNUM a, const BIGNUM p, const BIGNUM m,
92	BN_CTX *ctx)
93	{
94	int ret;
95
96	bn_check_top(a);
97	bn_check_top(p);
98	bn_check_top(m);
99
100	/*-
101	* For even modulus m = 2^k*m_odd, it might make sense to compute
102	* a^p mod m_odd and a^p mod 2^k separately (with Montgomery
103	* exponentiation for the odd part), using appropriate exponent
104	* reductions, and combine the results using the CRT.
105	*
106	* For now, we use Montgomery only if the modulus is odd; otherwise,
107	* exponentiation using the reciprocal-based quick remaindering
108	* algorithm is used.
109	*
110	* (Timing obtained with expspeed.c [computations a^p mod m
111	* where a, p, m are of the same length: 256, 512, 1024, 2048,
112	* 4096, 8192 bits], compared to the running time of the
113	* standard algorithm:
114	*
115	* BN_mod_exp_mont 33 .. 40 % [AMD K6-2, Linux, debug configuration]
116	* 55 .. 77 % [UltraSparc processor, but
117	* debug-solaris-sparcv8-gcc conf.]
118	*
119	* BN_mod_exp_recp 50 .. 70 % [AMD K6-2, Linux, debug configuration]
120	* 62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc]
121	*
122	* On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont
123	* at 2048 and more bits, but at 512 and 1024 bits, it was
124	* slower even than the standard algorithm!
125	*
126	* "Real" timings [linux-elf, solaris-sparcv9-gcc configurations]
127	* should be obtained when the new Montgomery reduction code
128	* has been integrated into OpenSSL.)
129	*/
130
131	#define MONT_MUL_MOD
132	#define MONT_EXP_WORD
133	#define RECP_MUL_MOD
134
135	#ifdef MONT_MUL_MOD
136	/*
137	* I have finally been able to take out this pre-condition of the top bit
138	* being set. It was caused by an error in BN_div with negatives. There
139	* was also another problem when for a^b%m a >= m. eay 07-May-97
140	*/
141	/* if ((m->d[m->top-1]&BN_TBIT) && BN_is_odd(m)) */
142
143	if (BN_is_odd(m)) {
144	# ifdef MONT_EXP_WORD
145	if (a->top == 1 && !a->neg
146	&& (BN_get_flags(p, BN_FLG_CONSTTIME) == 0)) {
147	BN_ULONG A = a->d[0];
148	ret = BN_mod_exp_mont_word(r, A, p, m, ctx, NULL);
149	} else
150	# endif
151	ret = BN_mod_exp_mont(r, a, p, m, ctx, NULL);
152	} else
153	#endif
154	#ifdef RECP_MUL_MOD
155	{
156	ret = BN_mod_exp_recp(r, a, p, m, ctx);
157	}
158	#else
159	{
160	ret = BN_mod_exp_simple(r, a, p, m, ctx);
161	}
162	#endif
163
164	bn_check_top(r);
165	return (ret);
166	}
167
168	int BN_mod_exp_recp(BIGNUM r, const BIGNUM a, const BIGNUM *p,
169	const BIGNUM m, BN_CTX ctx)
170	{
171	int i, j, bits, ret = 0, wstart, wend, window, wvalue;
172	int start = 1;
173	BIGNUM *aa;
174	/* Table of variables obtained from 'ctx' */
175	BIGNUM *val[TABLE_SIZE];
176	BN_RECP_CTX recp;
177
178	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
179	/* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
180	BNerr(BN_F_BN_MOD_EXP_RECP, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
181	return 0;
182	}
183
184	bits = BN_num_bits(p);
185	if (bits == 0) {
186	/* x*0 mod 1 is still zero. /
187	if (BN_is_one(m)) {
188	ret = 1;
189	BN_zero(r);
190	} else {
191	ret = BN_one(r);
192	}
193	return ret;
194	}
195
196	BN_CTX_start(ctx);
197	aa = BN_CTX_get(ctx);
198	val[0] = BN_CTX_get(ctx);
199	if (!aa \|\| !val[0])
200	goto err;
201
202	BN_RECP_CTX_init(&recp);
203	if (m->neg) {
204	/* ignore sign of 'm' */
205	if (!BN_copy(aa, m))
206	goto err;
207	aa->neg = 0;
208	if (BN_RECP_CTX_set(&recp, aa, ctx) <= 0)
209	goto err;
210	} else {
211	if (BN_RECP_CTX_set(&recp, m, ctx) <= 0)
212	goto err;
213	}
214
215	if (!BN_nnmod(val[0], a, m, ctx))
216	goto err; /* 1 */
217	if (BN_is_zero(val[0])) {
218	BN_zero(r);
219	ret = 1;
220	goto err;
221	}
222
223	window = BN_window_bits_for_exponent_size(bits);
224	if (window > 1) {
225	if (!BN_mod_mul_reciprocal(aa, val[0], val[0], &recp, ctx))
226	goto err; /* 2 */
227	j = 1 << (window - 1);
228	for (i = 1; i < j; i++) {
229	if (((val[i] = BN_CTX_get(ctx)) == NULL) \|\|
230	!BN_mod_mul_reciprocal(val[i], val[i - 1], aa, &recp, ctx))
231	goto err;
232	}
233	}
234
235	start = 1; /* This is used to avoid multiplication etc
236	* when there is only the value '1' in the
237	* buffer. */
238	wvalue = 0; /* The 'value' of the window */
239	wstart = bits - 1; /* The top bit of the window */
240	wend = 0; /* The bottom bit of the window */
241
242	if (!BN_one(r))
243	goto err;
244
245	for (;;) {
246	if (BN_is_bit_set(p, wstart) == 0) {
247	if (!start)
248	if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx))
249	goto err;
250	if (wstart == 0)
251	break;
252	wstart--;
253	continue;
254	}
255	/*
256	* We now have wstart on a 'set' bit, we now need to work out how bit
257	* a window to do. To do this we need to scan forward until the last
258	* set bit before the end of the window
259	*/
260	j = wstart;
261	wvalue = 1;
262	wend = 0;
263	for (i = 1; i < window; i++) {
264	if (wstart - i < 0)
265	break;
266	if (BN_is_bit_set(p, wstart - i)) {
267	wvalue <<= (i - wend);
268	wvalue \|= 1;
269	wend = i;
270	}
271	}
272
273	/* wend is the size of the current window */
274	j = wend + 1;
275	/* add the 'bytes above' */
276	if (!start)
277	for (i = 0; i < j; i++) {
278	if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx))
279	goto err;
280	}
281
282	/* wvalue will be an odd number < 2^window */
283	if (!BN_mod_mul_reciprocal(r, r, val[wvalue >> 1], &recp, ctx))
284	goto err;
285
286	/* move the 'window' down further */
287	wstart -= wend + 1;
288	wvalue = 0;
289	start = 0;
290	if (wstart < 0)
291	break;
292	}
293	ret = 1;
294	err:
295	BN_CTX_end(ctx);
296	BN_RECP_CTX_free(&recp);
297	bn_check_top(r);
298	return (ret);
299	}
300
301	int BN_mod_exp_mont(BIGNUM rr, const BIGNUM a, const BIGNUM *p,
302	const BIGNUM m, BN_CTX ctx, BN_MONT_CTX *in_mont)
303	{
304	int i, j, bits, ret = 0, wstart, wend, window, wvalue;
305	int start = 1;
306	BIGNUM d, r;
307	const BIGNUM *aa;
308	/* Table of variables obtained from 'ctx' */
309	BIGNUM *val[TABLE_SIZE];
310	BN_MONT_CTX *mont = NULL;
311
312	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
313	return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont);
314	}
315
316	bn_check_top(a);
317	bn_check_top(p);
318	bn_check_top(m);
319
320	if (!BN_is_odd(m)) {
321	BNerr(BN_F_BN_MOD_EXP_MONT, BN_R_CALLED_WITH_EVEN_MODULUS);
322	return (0);
323	}
324	bits = BN_num_bits(p);
325	if (bits == 0) {
326	/* x*0 mod 1 is still zero. /
327	if (BN_is_one(m)) {
328	ret = 1;
329	BN_zero(rr);
330	} else {
331	ret = BN_one(rr);
332	}
333	return ret;
334	}
335
336	BN_CTX_start(ctx);
337	d = BN_CTX_get(ctx);
338	r = BN_CTX_get(ctx);
339	val[0] = BN_CTX_get(ctx);
340	if (!d \|\| !r \|\| !val[0])
341	goto err;
342
343	/*
344	* If this is not done, things will break in the montgomery part
345	*/
346
347	if (in_mont != NULL)
348	mont = in_mont;
349	else {
350	if ((mont = BN_MONT_CTX_new()) == NULL)
351	goto err;
352	if (!BN_MONT_CTX_set(mont, m, ctx))
353	goto err;
354	}
355
356	if (a->neg \|\| BN_ucmp(a, m) >= 0) {
357	if (!BN_nnmod(val[0], a, m, ctx))
358	goto err;
359	aa = val[0];
360	} else
361	aa = a;
362	if (BN_is_zero(aa)) {
363	BN_zero(rr);
364	ret = 1;
365	goto err;
366	}
367	if (!BN_to_montgomery(val[0], aa, mont, ctx))
368	goto err; /* 1 */
369
370	window = BN_window_bits_for_exponent_size(bits);
371	if (window > 1) {
372	if (!BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx))
373	goto err; /* 2 */
374	j = 1 << (window - 1);
375	for (i = 1; i < j; i++) {
376	if (((val[i] = BN_CTX_get(ctx)) == NULL) \|\|
377	!BN_mod_mul_montgomery(val[i], val[i - 1], d, mont, ctx))
378	goto err;
379	}
380	}
381
382	start = 1; /* This is used to avoid multiplication etc
383	* when there is only the value '1' in the
384	* buffer. */
385	wvalue = 0; /* The 'value' of the window */
386	wstart = bits - 1; /* The top bit of the window */
387	wend = 0; /* The bottom bit of the window */
388
389	#if 1 /* by Shay Gueron's suggestion */
390	j = m->top; /* borrow j */
391	if (m->d[j - 1] & (((BN_ULONG)1) << (BN_BITS2 - 1))) {
392	if (bn_wexpand(r, j) == NULL)
393	goto err;
394	/* 2^(topBN_BITS2) - m /
395	r->d[0] = (0 - m->d[0]) & BN_MASK2;
396	for (i = 1; i < j; i++)
397	r->d[i] = (~m->d[i]) & BN_MASK2;
398	r->top = j;
399	/*
400	* Upper words will be zero if the corresponding words of 'm' were
401	* 0xfff[...], so decrement r->top accordingly.
402	*/
403	bn_correct_top(r);
404	} else
405	#endif
406	if (!BN_to_montgomery(r, BN_value_one(), mont, ctx))
407	goto err;
408	for (;;) {
409	if (BN_is_bit_set(p, wstart) == 0) {
410	if (!start) {
411	if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
412	goto err;
413	}
414	if (wstart == 0)
415	break;
416	wstart--;
417	continue;
418	}
419	/*
420	* We now have wstart on a 'set' bit, we now need to work out how bit
421	* a window to do. To do this we need to scan forward until the last
422	* set bit before the end of the window
423	*/
424	j = wstart;
425	wvalue = 1;
426	wend = 0;
427	for (i = 1; i < window; i++) {
428	if (wstart - i < 0)
429	break;
430	if (BN_is_bit_set(p, wstart - i)) {
431	wvalue <<= (i - wend);
432	wvalue \|= 1;
433	wend = i;
434	}
435	}
436
437	/* wend is the size of the current window */
438	j = wend + 1;
439	/* add the 'bytes above' */
440	if (!start)
441	for (i = 0; i < j; i++) {
442	if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
443	goto err;
444	}
445
446	/* wvalue will be an odd number < 2^window */
447	if (!BN_mod_mul_montgomery(r, r, val[wvalue >> 1], mont, ctx))
448	goto err;
449
450	/* move the 'window' down further */
451	wstart -= wend + 1;
452	wvalue = 0;
453	start = 0;
454	if (wstart < 0)
455	break;
456	}
457	#if defined(SPARC_T4_MONT)
458	if (OPENSSL_sparcv9cap_P[0] & (SPARCV9_VIS3 \| SPARCV9_PREFER_FPU)) {
459	j = mont->N.top; /* borrow j */
460	val[0]->d[0] = 1; /* borrow val[0] */
461	for (i = 1; i < j; i++)
462	val[0]->d[i] = 0;
463	val[0]->top = j;
464	if (!BN_mod_mul_montgomery(rr, r, val[0], mont, ctx))
465	goto err;
466	} else
467	#endif
468	if (!BN_from_montgomery(rr, r, mont, ctx))
469	goto err;
470	ret = 1;
471	err:
472	if (in_mont == NULL)
473	BN_MONT_CTX_free(mont);
474	BN_CTX_end(ctx);
475	bn_check_top(rr);
476	return (ret);
477	}
478
479	#if defined(SPARC_T4_MONT)
480	static BN_ULONG bn_get_bits(const BIGNUM *a, int bitpos)
481	{
482	BN_ULONG ret = 0;
483	int wordpos;
484
485	wordpos = bitpos / BN_BITS2;
486	bitpos %= BN_BITS2;
487	if (wordpos >= 0 && wordpos < a->top) {
488	ret = a->d[wordpos] & BN_MASK2;
489	if (bitpos) {
490	ret >>= bitpos;
491	if (++wordpos < a->top)
492	ret \|= a->d[wordpos] << (BN_BITS2 - bitpos);
493	}
494	}
495
496	return ret & BN_MASK2;
497	}
498	#endif
499
500	/*
501	* BN_mod_exp_mont_consttime() stores the precomputed powers in a specific
502	* layout so that accessing any of these table values shows the same access
503	* pattern as far as cache lines are concerned. The following functions are
504	* used to transfer a BIGNUM from/to that table.
505	*/
506
507	static int MOD_EXP_CTIME_COPY_TO_PREBUF(const BIGNUM *b, int top,
508	unsigned char *buf, int idx,
509	int window)
510	{
511	int i, j;
512	int width = 1 << window;
513	BN_ULONG table = (BN_ULONG )buf;
514
515	if (top > b->top)
516	top = b->top; /* this works because 'buf' is explicitly
517	* zeroed */
518	for (i = 0, j = idx; i < top; i++, j += width) {
519	table[j] = b->d[i];
520	}
521
522	return 1;
523	}
524
525	static int MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top,
526	unsigned char *buf, int idx,
527	int window)
528	{
529	int i, j;
530	int width = 1 << window;
531	/*
532	* We declare table 'volatile' in order to discourage compiler
533	* from reordering loads from the table. Concern is that if
534	* reordered in specific manner loads might give away the
535	* information we are trying to conceal. Some would argue that
536	* compiler can reorder them anyway, but it can as well be
537	* argued that doing so would be violation of standard...
538	*/
539	volatile BN_ULONG table = (volatile BN_ULONG )buf;
540
541	if (bn_wexpand(b, top) == NULL)
542	return 0;
543
544	if (window <= 3) {
545	for (i = 0; i < top; i++, table += width) {
546	BN_ULONG acc = 0;
547
548	for (j = 0; j < width; j++) {
549	acc \|= table[j] &
550	((BN_ULONG)0 - (constant_time_eq_int(j,idx)&1));
551	}
552
553	b->d[i] = acc;
554	}
555	} else {
556	int xstride = 1 << (window - 2);
557	BN_ULONG y0, y1, y2, y3;
558
559	i = idx >> (window - 2); /* equivalent of idx / xstride */
560	idx &= xstride - 1; /* equivalent of idx % xstride */
561
562	y0 = (BN_ULONG)0 - (constant_time_eq_int(i,0)&1);
563	y1 = (BN_ULONG)0 - (constant_time_eq_int(i,1)&1);
564	y2 = (BN_ULONG)0 - (constant_time_eq_int(i,2)&1);
565	y3 = (BN_ULONG)0 - (constant_time_eq_int(i,3)&1);
566
567	for (i = 0; i < top; i++, table += width) {
568	BN_ULONG acc = 0;
569
570	for (j = 0; j < xstride; j++) {
571	acc \|= ( (table[j + 0 * xstride] & y0) \|
572	(table[j + 1 * xstride] & y1) \|
573	(table[j + 2 * xstride] & y2) \|
574	(table[j + 3 * xstride] & y3) )
575	& ((BN_ULONG)0 - (constant_time_eq_int(j,idx)&1));
576	}
577
578	b->d[i] = acc;
579	}
580	}
581
582	b->top = top;
583	bn_correct_top(b);
584	return 1;
585	}
586
587	/*
588	* Given a pointer value, compute the next address that is a cache line
589	* multiple.
590	*/
591	#define MOD_EXP_CTIME_ALIGN(x_) \
592	((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((size_t)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK))))
593
594	/*
595	* This variant of BN_mod_exp_mont() uses fixed windows and the special
596	* precomputation memory layout to limit data-dependency to a minimum to
597	* protect secret exponents (cf. the hyper-threading timing attacks pointed
598	* out by Colin Percival,
599	* http://www.daemonology.net/hyperthreading-considered-harmful/)
600	*/
601	int BN_mod_exp_mont_consttime(BIGNUM rr, const BIGNUM a, const BIGNUM *p,
602	const BIGNUM m, BN_CTX ctx,
603	BN_MONT_CTX *in_mont)
604	{
605	int i, bits, ret = 0, window, wvalue;
606	int top;
607	BN_MONT_CTX *mont = NULL;
608
609	int numPowers;
610	unsigned char *powerbufFree = NULL;
611	int powerbufLen = 0;
612	unsigned char *powerbuf = NULL;
613	BIGNUM tmp, am;
614	#if defined(SPARC_T4_MONT)
615	unsigned int t4 = 0;
616	#endif
617
618	bn_check_top(a);
619	bn_check_top(p);
620	bn_check_top(m);
621
622	if (!BN_is_odd(m)) {
623	BNerr(BN_F_BN_MOD_EXP_MONT_CONSTTIME, BN_R_CALLED_WITH_EVEN_MODULUS);
624	return (0);
625	}
626
627	top = m->top;
628
629	bits = BN_num_bits(p);
630	if (bits == 0) {
631	/* x*0 mod 1 is still zero. /
632	if (BN_is_one(m)) {
633	ret = 1;
634	BN_zero(rr);
635	} else {
636	ret = BN_one(rr);
637	}
638	return ret;
639	}
640
641	BN_CTX_start(ctx);
642
643	/*
644	* Allocate a montgomery context if it was not supplied by the caller. If
645	* this is not done, things will break in the montgomery part.
646	*/
647	if (in_mont != NULL)
648	mont = in_mont;
649	else {
650	if ((mont = BN_MONT_CTX_new()) == NULL)
651	goto err;
652	if (!BN_MONT_CTX_set(mont, m, ctx))
653	goto err;
654	}
655
656	#ifdef RSAZ_ENABLED
657	/*
658	* If the size of the operands allow it, perform the optimized
659	* RSAZ exponentiation. For further information see
660	* crypto/bn/rsaz_exp.c and accompanying assembly modules.
661	*/
662	if ((16 == a->top) && (16 == p->top) && (BN_num_bits(m) == 1024)
663	&& rsaz_avx2_eligible()) {
664	if (NULL == bn_wexpand(rr, 16))
665	goto err;
666	RSAZ_1024_mod_exp_avx2(rr->d, a->d, p->d, m->d, mont->RR.d,
667	mont->n0[0]);
668	rr->top = 16;
669	rr->neg = 0;
670	bn_correct_top(rr);
671	ret = 1;
672	goto err;
673	} else if ((8 == a->top) && (8 == p->top) && (BN_num_bits(m) == 512)) {
674	if (NULL == bn_wexpand(rr, 8))
675	goto err;
676	RSAZ_512_mod_exp(rr->d, a->d, p->d, m->d, mont->n0[0], mont->RR.d);
677	rr->top = 8;
678	rr->neg = 0;
679	bn_correct_top(rr);
680	ret = 1;
681	goto err;
682	}
683	#endif
684
685	/* Get the window size to use with size of p. */
686	window = BN_window_bits_for_ctime_exponent_size(bits);
687	#if defined(SPARC_T4_MONT)
688	if (window >= 5 && (top & 15) == 0 && top <= 64 &&
689	(OPENSSL_sparcv9cap_P[1] & (CFR_MONTMUL \| CFR_MONTSQR)) ==
690	(CFR_MONTMUL \| CFR_MONTSQR) && (t4 = OPENSSL_sparcv9cap_P[0]))
691	window = 5;
692	else
693	#endif
694	#if defined(OPENSSL_BN_ASM_MONT5)
695	if (window >= 5) {
696	window = 5; /* ~5% improvement for RSA2048 sign, and even
697	* for RSA4096 */
698	/* reserve space for mont->N.d[] copy */
699	powerbufLen += top * sizeof(mont->N.d[0]);
700	}
701	#endif
702	(void)0;
703
704	/*
705	* Allocate a buffer large enough to hold all of the pre-computed powers
706	* of am, am itself and tmp.
707	*/
708	numPowers = 1 << window;
709	powerbufLen += sizeof(m->d[0]) * (top * numPowers +
710	((2 * top) >
711	numPowers ? (2 * top) : numPowers));
712	#ifdef alloca
713	if (powerbufLen < 3072)
714	powerbufFree =
715	alloca(powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH);
716	else
717	#endif
718	if ((powerbufFree =
719	OPENSSL_malloc(powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH))
720	== NULL)
721	goto err;
722
723	powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree);
724	memset(powerbuf, 0, powerbufLen);
725
726	#ifdef alloca
727	if (powerbufLen < 3072)
728	powerbufFree = NULL;
729	#endif
730
731	/* lay down tmp and am right after powers table */
732	tmp.d = (BN_ULONG )(powerbuf + sizeof(m->d[0]) top * numPowers);
733	am.d = tmp.d + top;
734	tmp.top = am.top = 0;
735	tmp.dmax = am.dmax = top;
736	tmp.neg = am.neg = 0;
737	tmp.flags = am.flags = BN_FLG_STATIC_DATA;
738
739	/* prepare a^0 in Montgomery domain */
740	#if 1 /* by Shay Gueron's suggestion */
741	if (m->d[top - 1] & (((BN_ULONG)1) << (BN_BITS2 - 1))) {
742	/* 2^(topBN_BITS2) - m /
743	tmp.d[0] = (0 - m->d[0]) & BN_MASK2;
744	for (i = 1; i < top; i++)
745	tmp.d[i] = (~m->d[i]) & BN_MASK2;
746	tmp.top = top;
747	} else
748	#endif
749	if (!BN_to_montgomery(&tmp, BN_value_one(), mont, ctx))
750	goto err;
751
752	/* prepare a^1 in Montgomery domain */
753	if (a->neg \|\| BN_ucmp(a, m) >= 0) {
754	if (!BN_mod(&am, a, m, ctx))
755	goto err;
756	if (!BN_to_montgomery(&am, &am, mont, ctx))
757	goto err;
758	} else if (!BN_to_montgomery(&am, a, mont, ctx))
759	goto err;
760
761	#if defined(SPARC_T4_MONT)
762	if (t4) {
763	typedef int (bn_pwr5_mont_f) (BN_ULONG tp, const BN_ULONG *np,
764	const BN_ULONG n0, const void table,
765	int power, int bits);
766	int bn_pwr5_mont_t4_8(BN_ULONG tp, const BN_ULONG np,
767	const BN_ULONG n0, const void table,
768	int power, int bits);
769	int bn_pwr5_mont_t4_16(BN_ULONG tp, const BN_ULONG np,
770	const BN_ULONG n0, const void table,
771	int power, int bits);
772	int bn_pwr5_mont_t4_24(BN_ULONG tp, const BN_ULONG np,
773	const BN_ULONG n0, const void table,
774	int power, int bits);
775	int bn_pwr5_mont_t4_32(BN_ULONG tp, const BN_ULONG np,
776	const BN_ULONG n0, const void table,
777	int power, int bits);
778	static const bn_pwr5_mont_f pwr5_funcs[4] = {
779	bn_pwr5_mont_t4_8, bn_pwr5_mont_t4_16,
780	bn_pwr5_mont_t4_24, bn_pwr5_mont_t4_32
781	};
782	bn_pwr5_mont_f pwr5_worker = pwr5_funcs[top / 16 - 1];
783
784	typedef int (bn_mul_mont_f) (BN_ULONG rp, const BN_ULONG *ap,
785	const void bp, const BN_ULONG np,
786	const BN_ULONG *n0);
787	int bn_mul_mont_t4_8(BN_ULONG rp, const BN_ULONG ap, const void *bp,
788	const BN_ULONG np, const BN_ULONG n0);
789	int bn_mul_mont_t4_16(BN_ULONG rp, const BN_ULONG ap,
790	const void bp, const BN_ULONG np,
791	const BN_ULONG *n0);
792	int bn_mul_mont_t4_24(BN_ULONG rp, const BN_ULONG ap,
793	const void bp, const BN_ULONG np,
794	const BN_ULONG *n0);
795	int bn_mul_mont_t4_32(BN_ULONG rp, const BN_ULONG ap,
796	const void bp, const BN_ULONG np,
797	const BN_ULONG *n0);
798	static const bn_mul_mont_f mul_funcs[4] = {
799	bn_mul_mont_t4_8, bn_mul_mont_t4_16,
800	bn_mul_mont_t4_24, bn_mul_mont_t4_32
801	};
802	bn_mul_mont_f mul_worker = mul_funcs[top / 16 - 1];
803
804	void bn_mul_mont_vis3(BN_ULONG rp, const BN_ULONG ap,
805	const void bp, const BN_ULONG np,
806	const BN_ULONG *n0, int num);
807	void bn_mul_mont_t4(BN_ULONG rp, const BN_ULONG ap,
808	const void bp, const BN_ULONG np,
809	const BN_ULONG *n0, int num);
810	void bn_mul_mont_gather5_t4(BN_ULONG rp, const BN_ULONG ap,
811	const void table, const BN_ULONG np,
812	const BN_ULONG *n0, int num, int power);
813	void bn_flip_n_scatter5_t4(const BN_ULONG *inp, size_t num,
814	void *table, size_t power);
815	void bn_gather5_t4(BN_ULONG *out, size_t num,
816	void *table, size_t power);
817	void bn_flip_t4(BN_ULONG dst, BN_ULONG src, size_t num);
818
819	BN_ULONG np = mont->N.d, n0 = mont->n0;
820	int stride = 5 * (6 - (top / 16 - 1)); /* multiple of 5, but less
821	* than 32 */
822
823	/*
824	* BN_to_montgomery can contaminate words above .top [in
825	* BN_DEBUG[_DEBUG] build]...
826	*/
827	for (i = am.top; i < top; i++)
828	am.d[i] = 0;
829	for (i = tmp.top; i < top; i++)
830	tmp.d[i] = 0;
831
832	bn_flip_n_scatter5_t4(tmp.d, top, powerbuf, 0);
833	bn_flip_n_scatter5_t4(am.d, top, powerbuf, 1);
834	if (!(*mul_worker) (tmp.d, am.d, am.d, np, n0) &&
835	!(*mul_worker) (tmp.d, am.d, am.d, np, n0))
836	bn_mul_mont_vis3(tmp.d, am.d, am.d, np, n0, top);
837	bn_flip_n_scatter5_t4(tmp.d, top, powerbuf, 2);
838
839	for (i = 3; i < 32; i++) {
840	/* Calculate a^i = a^(i-1) * a */
841	if (!(*mul_worker) (tmp.d, tmp.d, am.d, np, n0) &&
842	!(*mul_worker) (tmp.d, tmp.d, am.d, np, n0))
843	bn_mul_mont_vis3(tmp.d, tmp.d, am.d, np, n0, top);
844	bn_flip_n_scatter5_t4(tmp.d, top, powerbuf, i);
845	}
846
847	/* switch to 64-bit domain */
848	np = alloca(top * sizeof(BN_ULONG));
849	top /= 2;
850	bn_flip_t4(np, mont->N.d, top);
851
852	bits--;
853	for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--)
854	wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
855	bn_gather5_t4(tmp.d, top, powerbuf, wvalue);
856
857	/*
858	* Scan the exponent one window at a time starting from the most
859	* significant bits.
860	*/
861	while (bits >= 0) {
862	if (bits < stride)
863	stride = bits + 1;
864	bits -= stride;
865	wvalue = bn_get_bits(p, bits + 1);
866
867	if ((*pwr5_worker) (tmp.d, np, n0, powerbuf, wvalue, stride))
868	continue;
869	/* retry once and fall back */
870	if ((*pwr5_worker) (tmp.d, np, n0, powerbuf, wvalue, stride))
871	continue;
872
873	bits += stride - 5;
874	wvalue >>= stride - 5;
875	wvalue &= 31;
876	bn_mul_mont_t4(tmp.d, tmp.d, tmp.d, np, n0, top);
877	bn_mul_mont_t4(tmp.d, tmp.d, tmp.d, np, n0, top);
878	bn_mul_mont_t4(tmp.d, tmp.d, tmp.d, np, n0, top);
879	bn_mul_mont_t4(tmp.d, tmp.d, tmp.d, np, n0, top);
880	bn_mul_mont_t4(tmp.d, tmp.d, tmp.d, np, n0, top);
881	bn_mul_mont_gather5_t4(tmp.d, tmp.d, powerbuf, np, n0, top,
882	wvalue);
883	}
884
885	bn_flip_t4(tmp.d, tmp.d, top);
886	top *= 2;
887	/* back to 32-bit domain */
888	tmp.top = top;
889	bn_correct_top(&tmp);
890	OPENSSL_cleanse(np, top * sizeof(BN_ULONG));
891	} else
892	#endif
893	#if defined(OPENSSL_BN_ASM_MONT5)
894	if (window == 5 && top > 1) {
895	/*
896	* This optimization uses ideas from http://eprint.iacr.org/2011/239,
897	* specifically optimization of cache-timing attack countermeasures
898	* and pre-computation optimization.
899	*/
900
901	/*
902	* Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as
903	* 512-bit RSA is hardly relevant, we omit it to spare size...
904	*/
905	void bn_mul_mont_gather5(BN_ULONG rp, const BN_ULONG ap,
906	const void table, const BN_ULONG np,
907	const BN_ULONG *n0, int num, int power);
908	void bn_scatter5(const BN_ULONG *inp, size_t num,
909	void *table, size_t power);
910	void bn_gather5(BN_ULONG out, size_t num, void table, size_t power);
911	void bn_power5(BN_ULONG rp, const BN_ULONG ap,
912	const void table, const BN_ULONG np,
913	const BN_ULONG *n0, int num, int power);
914	int bn_get_bits5(const BN_ULONG *ap, int off);
915	int bn_from_montgomery(BN_ULONG rp, const BN_ULONG ap,
916	const BN_ULONG not_used, const BN_ULONG np,
917	const BN_ULONG *n0, int num);
918
919	BN_ULONG n0 = mont->n0, np;
920
921	/*
922	* BN_to_montgomery can contaminate words above .top [in
923	* BN_DEBUG[_DEBUG] build]...
924	*/
925	for (i = am.top; i < top; i++)
926	am.d[i] = 0;
927	for (i = tmp.top; i < top; i++)
928	tmp.d[i] = 0;
929
930	/*
931	* copy mont->N.d[] to improve cache locality
932	*/
933	for (np = am.d + top, i = 0; i < top; i++)
934	np[i] = mont->N.d[i];
935
936	bn_scatter5(tmp.d, top, powerbuf, 0);
937	bn_scatter5(am.d, am.top, powerbuf, 1);
938	bn_mul_mont(tmp.d, am.d, am.d, np, n0, top);
939	bn_scatter5(tmp.d, top, powerbuf, 2);
940
941	# if 0
942	for (i = 3; i < 32; i++) {
943	/* Calculate a^i = a^(i-1) * a */
944	bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1);
945	bn_scatter5(tmp.d, top, powerbuf, i);
946	}
947	# else
948	/* same as above, but uses squaring for 1/2 of operations */
949	for (i = 4; i < 32; i *= 2) {
950	bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
951	bn_scatter5(tmp.d, top, powerbuf, i);
952	}
953	for (i = 3; i < 8; i += 2) {
954	int j;
955	bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1);
956	bn_scatter5(tmp.d, top, powerbuf, i);
957	for (j = 2 * i; j < 32; j *= 2) {
958	bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
959	bn_scatter5(tmp.d, top, powerbuf, j);
960	}
961	}
962	for (; i < 16; i += 2) {
963	bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1);
964	bn_scatter5(tmp.d, top, powerbuf, i);
965	bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
966	bn_scatter5(tmp.d, top, powerbuf, 2 * i);
967	}
968	for (; i < 32; i += 2) {
969	bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1);
970	bn_scatter5(tmp.d, top, powerbuf, i);
971	}
972	# endif
973	bits--;
974	for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--)
975	wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
976	bn_gather5(tmp.d, top, powerbuf, wvalue);
977
978	/*
979	* Scan the exponent one window at a time starting from the most
980	* significant bits.
981	*/
982	if (top & 7)
983	while (bits >= 0) {
984	for (wvalue = 0, i = 0; i < 5; i++, bits--)
985	wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
986
987	bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
988	bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
989	bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
990	bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
991	bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
992	bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top,
993	wvalue);
994	} else {
995	while (bits >= 0) {
996	wvalue = bn_get_bits5(p->d, bits - 4);
997	bits -= 5;
998	bn_power5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue);
999	}
1000	}
1001
1002	ret = bn_from_montgomery(tmp.d, tmp.d, NULL, np, n0, top);
1003	tmp.top = top;
1004	bn_correct_top(&tmp);
1005	if (ret) {
1006	if (!BN_copy(rr, &tmp))
1007	ret = 0;
1008	goto err; /* non-zero ret means it's not error */
1009	}
1010	} else
1011	#endif
1012	{
1013	if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 0, window))
1014	goto err;
1015	if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&am, top, powerbuf, 1, window))
1016	goto err;
1017
1018	/*
1019	* If the window size is greater than 1, then calculate
1020	* val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1) (even
1021	* powers could instead be computed as (a^(i/2))^2 to use the slight
1022	* performance advantage of sqr over mul).
1023	*/
1024	if (window > 1) {
1025	if (!BN_mod_mul_montgomery(&tmp, &am, &am, mont, ctx))
1026	goto err;
1027	if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 2,
1028	window))
1029	goto err;
1030	for (i = 3; i < numPowers; i++) {
1031	/* Calculate a^i = a^(i-1) * a */
1032	if (!BN_mod_mul_montgomery(&tmp, &am, &tmp, mont, ctx))
1033	goto err;
1034	if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, i,
1035	window))
1036	goto err;
1037	}
1038	}
1039
1040	bits--;
1041	for (wvalue = 0, i = bits % window; i >= 0; i--, bits--)
1042	wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
1043	if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&tmp, top, powerbuf, wvalue,
1044	window))
1045	goto err;
1046
1047	/*
1048	* Scan the exponent one window at a time starting from the most
1049	* significant bits.
1050	*/
1051	while (bits >= 0) {
1052	wvalue = 0; /* The 'value' of the window */
1053
1054	/* Scan the window, squaring the result as we go */
1055	for (i = 0; i < window; i++, bits--) {
1056	if (!BN_mod_mul_montgomery(&tmp, &tmp, &tmp, mont, ctx))
1057	goto err;
1058	wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
1059	}
1060
1061	/*
1062	* Fetch the appropriate pre-computed value from the pre-buf
1063	*/
1064	if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&am, top, powerbuf, wvalue,
1065	window))
1066	goto err;
1067
1068	/* Multiply the result into the intermediate result */
1069	if (!BN_mod_mul_montgomery(&tmp, &tmp, &am, mont, ctx))
1070	goto err;
1071	}
1072	}
1073
1074	/* Convert the final result from montgomery to standard format */
1075	#if defined(SPARC_T4_MONT)
1076	if (OPENSSL_sparcv9cap_P[0] & (SPARCV9_VIS3 \| SPARCV9_PREFER_FPU)) {
1077	am.d[0] = 1; /* borrow am */
1078	for (i = 1; i < top; i++)
1079	am.d[i] = 0;
1080	if (!BN_mod_mul_montgomery(rr, &tmp, &am, mont, ctx))
1081	goto err;
1082	} else
1083	#endif
1084	if (!BN_from_montgomery(rr, &tmp, mont, ctx))
1085	goto err;
1086	ret = 1;
1087	err:
1088	if (in_mont == NULL)
1089	BN_MONT_CTX_free(mont);
1090	if (powerbuf != NULL) {
1091	OPENSSL_cleanse(powerbuf, powerbufLen);
1092	OPENSSL_free(powerbufFree);
1093	}
1094	BN_CTX_end(ctx);
1095	return (ret);
1096	}
1097
1098	int BN_mod_exp_mont_word(BIGNUM rr, BN_ULONG a, const BIGNUM p,
1099	const BIGNUM m, BN_CTX ctx, BN_MONT_CTX *in_mont)
1100	{
1101	BN_MONT_CTX *mont = NULL;
1102	int b, bits, ret = 0;
1103	int r_is_one;
1104	BN_ULONG w, next_w;
1105	BIGNUM d, r, *t;
1106	BIGNUM *swap_tmp;
1107	#define BN_MOD_MUL_WORD(r, w, m) \
1108	(BN_mul_word(r, (w)) && \
1109	(/* BN_ucmp(r, (m)) < 0 ? 1 :*/ \
1110	(BN_mod(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1))))
1111	/*
1112	* BN_MOD_MUL_WORD is only used with 'w' large, so the BN_ucmp test is
1113	* probably more overhead than always using BN_mod (which uses BN_copy if
1114	* a similar test returns true).
1115	*/
1116	/*
1117	* We can use BN_mod and do not need BN_nnmod because our accumulator is
1118	* never negative (the result of BN_mod does not depend on the sign of
1119	* the modulus).
1120	*/
1121	#define BN_TO_MONTGOMERY_WORD(r, w, mont) \
1122	(BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx))
1123
1124	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
1125	/* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
1126	BNerr(BN_F_BN_MOD_EXP_MONT_WORD, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
1127	return 0;
1128	}
1129
1130	bn_check_top(p);
1131	bn_check_top(m);
1132
1133	if (!BN_is_odd(m)) {
1134	BNerr(BN_F_BN_MOD_EXP_MONT_WORD, BN_R_CALLED_WITH_EVEN_MODULUS);
1135	return (0);
1136	}
1137	if (m->top == 1)
1138	a %= m->d[0]; /* make sure that 'a' is reduced */
1139
1140	bits = BN_num_bits(p);
1141	if (bits == 0) {
1142	/* x*0 mod 1 is still zero. /
1143	if (BN_is_one(m)) {
1144	ret = 1;
1145	BN_zero(rr);
1146	} else {
1147	ret = BN_one(rr);
1148	}
1149	return ret;
1150	}
1151	if (a == 0) {
1152	BN_zero(rr);
1153	ret = 1;
1154	return ret;
1155	}
1156
1157	BN_CTX_start(ctx);
1158	d = BN_CTX_get(ctx);
1159	r = BN_CTX_get(ctx);
1160	t = BN_CTX_get(ctx);
1161	if (d == NULL \|\| r == NULL \|\| t == NULL)
1162	goto err;
1163
1164	if (in_mont != NULL)
1165	mont = in_mont;
1166	else {
1167	if ((mont = BN_MONT_CTX_new()) == NULL)
1168	goto err;
1169	if (!BN_MONT_CTX_set(mont, m, ctx))
1170	goto err;
1171	}
1172
1173	r_is_one = 1; /* except for Montgomery factor */
1174
1175	/* bits-1 >= 0 */
1176
1177	/* The result is accumulated in the product rw. /
1178	w = a; /* bit 'bits-1' of 'p' is always set */
1179	for (b = bits - 2; b >= 0; b--) {
1180	/* First, square rw. /
1181	next_w = w * w;
1182	if ((next_w / w) != w) { /* overflow */
1183	if (r_is_one) {
1184	if (!BN_TO_MONTGOMERY_WORD(r, w, mont))
1185	goto err;
1186	r_is_one = 0;
1187	} else {
1188	if (!BN_MOD_MUL_WORD(r, w, m))
1189	goto err;
1190	}
1191	next_w = 1;
1192	}
1193	w = next_w;
1194	if (!r_is_one) {
1195	if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
1196	goto err;
1197	}
1198
1199	/* Second, multiply rw by 'a' if exponent bit is set. /
1200	if (BN_is_bit_set(p, b)) {
1201	next_w = w * a;
1202	if ((next_w / a) != w) { /* overflow */
1203	if (r_is_one) {
1204	if (!BN_TO_MONTGOMERY_WORD(r, w, mont))
1205	goto err;
1206	r_is_one = 0;
1207	} else {
1208	if (!BN_MOD_MUL_WORD(r, w, m))
1209	goto err;
1210	}
1211	next_w = a;
1212	}
1213	w = next_w;
1214	}
1215	}
1216
1217	/* Finally, set r:=rw. /
1218	if (w != 1) {
1219	if (r_is_one) {
1220	if (!BN_TO_MONTGOMERY_WORD(r, w, mont))
1221	goto err;
1222	r_is_one = 0;
1223	} else {
1224	if (!BN_MOD_MUL_WORD(r, w, m))
1225	goto err;
1226	}
1227	}
1228
1229	if (r_is_one) { /* can happen only if a == 1 */
1230	if (!BN_one(rr))
1231	goto err;
1232	} else {
1233	if (!BN_from_montgomery(rr, r, mont, ctx))
1234	goto err;
1235	}
1236	ret = 1;
1237	err:
1238	if (in_mont == NULL)
1239	BN_MONT_CTX_free(mont);
1240	BN_CTX_end(ctx);
1241	bn_check_top(rr);
1242	return (ret);
1243	}
1244
1245	/* The old fallback, simple version :-) */
1246	int BN_mod_exp_simple(BIGNUM r, const BIGNUM a, const BIGNUM *p,
1247	const BIGNUM m, BN_CTX ctx)
1248	{
1249	int i, j, bits, ret = 0, wstart, wend, window, wvalue;
1250	int start = 1;
1251	BIGNUM *d;
1252	/* Table of variables obtained from 'ctx' */
1253	BIGNUM *val[TABLE_SIZE];
1254
1255	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
1256	/* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
1257	BNerr(BN_F_BN_MOD_EXP_SIMPLE, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
1258	return 0;
1259	}
1260
1261	bits = BN_num_bits(p);
1262	if (bits == 0) {
1263	/* x*0 mod 1 is still zero. /
1264	if (BN_is_one(m)) {
1265	ret = 1;
1266	BN_zero(r);
1267	} else {
1268	ret = BN_one(r);
1269	}
1270	return ret;
1271	}
1272
1273	BN_CTX_start(ctx);
1274	d = BN_CTX_get(ctx);
1275	val[0] = BN_CTX_get(ctx);
1276	if (!d \|\| !val[0])
1277	goto err;
1278
1279	if (!BN_nnmod(val[0], a, m, ctx))
1280	goto err; /* 1 */
1281	if (BN_is_zero(val[0])) {
1282	BN_zero(r);
1283	ret = 1;
1284	goto err;
1285	}
1286
1287	window = BN_window_bits_for_exponent_size(bits);
1288	if (window > 1) {
1289	if (!BN_mod_mul(d, val[0], val[0], m, ctx))
1290	goto err; /* 2 */
1291	j = 1 << (window - 1);
1292	for (i = 1; i < j; i++) {
1293	if (((val[i] = BN_CTX_get(ctx)) == NULL) \|\|
1294	!BN_mod_mul(val[i], val[i - 1], d, m, ctx))
1295	goto err;
1296	}
1297	}
1298
1299	start = 1; /* This is used to avoid multiplication etc
1300	* when there is only the value '1' in the
1301	* buffer. */
1302	wvalue = 0; /* The 'value' of the window */
1303	wstart = bits - 1; /* The top bit of the window */
1304	wend = 0; /* The bottom bit of the window */
1305
1306	if (!BN_one(r))
1307	goto err;
1308
1309	for (;;) {
1310	if (BN_is_bit_set(p, wstart) == 0) {
1311	if (!start)
1312	if (!BN_mod_mul(r, r, r, m, ctx))
1313	goto err;
1314	if (wstart == 0)
1315	break;
1316	wstart--;
1317	continue;
1318	}
1319	/*
1320	* We now have wstart on a 'set' bit, we now need to work out how bit
1321	* a window to do. To do this we need to scan forward until the last
1322	* set bit before the end of the window
1323	*/
1324	j = wstart;
1325	wvalue = 1;
1326	wend = 0;
1327	for (i = 1; i < window; i++) {
1328	if (wstart - i < 0)
1329	break;
1330	if (BN_is_bit_set(p, wstart - i)) {
1331	wvalue <<= (i - wend);
1332	wvalue \|= 1;
1333	wend = i;
1334	}
1335	}
1336
1337	/* wend is the size of the current window */
1338	j = wend + 1;
1339	/* add the 'bytes above' */
1340	if (!start)
1341	for (i = 0; i < j; i++) {
1342	if (!BN_mod_mul(r, r, r, m, ctx))
1343	goto err;
1344	}
1345
1346	/* wvalue will be an odd number < 2^window */
1347	if (!BN_mod_mul(r, r, val[wvalue >> 1], m, ctx))
1348	goto err;
1349
1350	/* move the 'window' down further */
1351	wstart -= wend + 1;
1352	wvalue = 0;
1353	start = 0;
1354	if (wstart < 0)
1355	break;
1356	}
1357	ret = 1;
1358	err:
1359	BN_CTX_end(ctx);
1360	bn_check_top(r);
1361	return (ret);
1362	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: EcnlProtoTool/trunk/openssl-1.1.0e/crypto/bn/bn_exp.c@ 331

Download in other formats: