Context Navigation

integer.c@ 372

Last change on this file since 372 was 372, checked in by coas-nagasima, 5 years ago
wolfsslを3.15.7にバージョンアップ
Property svn:eol-style set to `native` Property svn:mime-type set to `text/x-csrc;charset=UTF-8`
File size: 110.5 KB

Line
1	/* integer.c
2	*
3	* Copyright (C) 2006-2017 wolfSSL Inc.
4	*
5	* This file is part of wolfSSL.
6	*
7	* wolfSSL is free software; you can redistribute it and/or modify
8	* it under the terms of the GNU General Public License as published by
9	* the Free Software Foundation; either version 2 of the License, or
10	* (at your option) any later version.
11	*
12	* wolfSSL is distributed in the hope that it will be useful,
13	* but WITHOUT ANY WARRANTY; without even the implied warranty of
14	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15	* GNU General Public License for more details.
16	*
17	* You should have received a copy of the GNU General Public License
18	* along with this program; if not, write to the Free Software
19	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
20	*/
21
22
23
24	/*
25	* Based on public domain LibTomMath 0.38 by Tom St Denis, tomstdenis@iahu.ca,
26	* http://math.libtomcrypt.com
27	*/
28
29
30	#ifdef HAVE_CONFIG_H
31	#include <config.h>
32	#endif
33
34	/* in case user set USE_FAST_MATH there */
35	#include <wolfssl/wolfcrypt/settings.h>
36
37	#ifdef NO_INLINE
38	#include <wolfssl/wolfcrypt/misc.h>
39	#else
40	#define WOLFSSL_MISC_INCLUDED
41	#include <wolfcrypt/src/misc.c>
42	#endif
43
44	#ifndef NO_BIG_INT
45
46	#ifndef USE_FAST_MATH
47
48	#ifndef WOLFSSL_SP_MATH
49
50	#include <wolfssl/wolfcrypt/integer.h>
51
52	#if defined(FREESCALE_LTC_TFM)
53	#include <wolfssl/wolfcrypt/port/nxp/ksdk_port.h>
54	#endif
55	#ifdef WOLFSSL_DEBUG_MATH
56	#include <stdio.h>
57	#endif
58
59	#ifndef NO_WOLFSSL_SMALL_STACK
60	#ifndef WOLFSSL_SMALL_STACK
61	#define WOLFSSL_SMALL_STACK
62	#endif
63	#endif
64
65	#ifdef SHOW_GEN
66	#if defined(FREESCALE_MQX) \|\| defined(FREESCALE_KSDK_MQX)
67	#if MQX_USE_IO_OLD
68	#include <fio.h>
69	#else
70	#include <nio.h>
71	#endif
72	#else
73	#include <stdio.h>
74	#endif
75	#endif
76
77	/* reverse an array, used for radix code */
78	static void
79	bn_reverse (unsigned char *s, int len)
80	{
81	int ix, iy;
82	unsigned char t;
83
84	ix = 0;
85	iy = len - 1;
86	while (ix < iy) {
87	t = s[ix];
88	s[ix] = s[iy];
89	s[iy] = t;
90	++ix;
91	--iy;
92	}
93	}
94
95	/* math settings check */
96	word32 CheckRunTimeSettings(void)
97	{
98	return CTC_SETTINGS;
99	}
100
101
102	/* handle up to 6 inits */
103	int mp_init_multi(mp_int* a, mp_int* b, mp_int* c, mp_int* d, mp_int* e,
104	mp_int* f)
105	{
106	int res = MP_OKAY;
107
108	if (a) XMEMSET(a, 0, sizeof(mp_int));
109	if (b) XMEMSET(b, 0, sizeof(mp_int));
110	if (c) XMEMSET(c, 0, sizeof(mp_int));
111	if (d) XMEMSET(d, 0, sizeof(mp_int));
112	if (e) XMEMSET(e, 0, sizeof(mp_int));
113	if (f) XMEMSET(f, 0, sizeof(mp_int));
114
115	if (a && ((res = mp_init(a)) != MP_OKAY))
116	return res;
117
118	if (b && ((res = mp_init(b)) != MP_OKAY)) {
119	mp_clear(a);
120	return res;
121	}
122
123	if (c && ((res = mp_init(c)) != MP_OKAY)) {
124	mp_clear(a); mp_clear(b);
125	return res;
126	}
127
128	if (d && ((res = mp_init(d)) != MP_OKAY)) {
129	mp_clear(a); mp_clear(b); mp_clear(c);
130	return res;
131	}
132
133	if (e && ((res = mp_init(e)) != MP_OKAY)) {
134	mp_clear(a); mp_clear(b); mp_clear(c); mp_clear(d);
135	return res;
136	}
137
138	if (f && ((res = mp_init(f)) != MP_OKAY)) {
139	mp_clear(a); mp_clear(b); mp_clear(c); mp_clear(d); mp_clear(e);
140	return res;
141	}
142
143	return res;
144	}
145
146
147	/* init a new mp_int */
148	int mp_init (mp_int * a)
149	{
150	/* Safeguard against passing in a null pointer */
151	if (a == NULL)
152	return MP_VAL;
153
154	/* defer allocation until mp_grow */
155	a->dp = NULL;
156
157	/* set the used to zero, allocated digits to the default precision
158	* and sign to positive */
159	a->used = 0;
160	a->alloc = 0;
161	a->sign = MP_ZPOS;
162	#ifdef HAVE_WOLF_BIGINT
163	wc_bigint_init(&a->raw);
164	#endif
165
166	return MP_OKAY;
167	}
168
169
170	/* clear one (frees) */
171	void mp_clear (mp_int * a)
172	{
173	int i;
174
175	if (a == NULL)
176	return;
177
178	/* only do anything if a hasn't been freed previously */
179	if (a->dp != NULL) {
180	/* first zero the digits */
181	for (i = 0; i < a->used; i++) {
182	a->dp[i] = 0;
183	}
184
185	/* free ram */
186	mp_free(a);
187
188	/* reset members to make debugging easier */
189	a->alloc = a->used = 0;
190	a->sign = MP_ZPOS;
191	}
192	}
193
194	void mp_free (mp_int * a)
195	{
196	/* only do anything if a hasn't been freed previously */
197	if (a->dp != NULL) {
198	/* free ram */
199	XFREE(a->dp, 0, DYNAMIC_TYPE_BIGINT);
200	a->dp = NULL;
201	}
202
203	#ifdef HAVE_WOLF_BIGINT
204	wc_bigint_free(&a->raw);
205	#endif
206	}
207
208	void mp_forcezero(mp_int * a)
209	{
210	if (a == NULL)
211	return;
212
213	/* only do anything if a hasn't been freed previously */
214	if (a->dp != NULL) {
215	/* force zero the used digits */
216	ForceZero(a->dp, a->used * sizeof(mp_digit));
217	#ifdef HAVE_WOLF_BIGINT
218	wc_bigint_zero(&a->raw);
219	#endif
220	/* free ram */
221	mp_free(a);
222
223	/* reset members to make debugging easier */
224	a->alloc = a->used = 0;
225	a->sign = MP_ZPOS;
226	}
227
228	a->sign = MP_ZPOS;
229	a->used = 0;
230	}
231
232
233	/* get the size for an unsigned equivalent */
234	int mp_unsigned_bin_size (mp_int * a)
235	{
236	int size = mp_count_bits (a);
237	return (size / 8 + ((size & 7) != 0 ? 1 : 0));
238	}
239
240
241	/* returns the number of bits in an int */
242	int mp_count_bits (mp_int * a)
243	{
244	int r;
245	mp_digit q;
246
247	/* shortcut */
248	if (a->used == 0) {
249	return 0;
250	}
251
252	/* get number of digits and add that */
253	r = (a->used - 1) * DIGIT_BIT;
254
255	/* take the last digit and count the bits in it */
256	q = a->dp[a->used - 1];
257	while (q > ((mp_digit) 0)) {
258	++r;
259	q >>= ((mp_digit) 1);
260	}
261	return r;
262	}
263
264
265	int mp_leading_bit (mp_int * a)
266	{
267	int bit = 0;
268	mp_int t;
269
270	if (mp_init_copy(&t, a) != MP_OKAY)
271	return 0;
272
273	while (mp_iszero(&t) == MP_NO) {
274	#ifndef MP_8BIT
275	bit = (t.dp[0] & 0x80) != 0;
276	#else
277	bit = (t.dp[0] \| ((t.dp[1] & 0x01) << 7)) & 0x80 != 0;
278	#endif
279	if (mp_div_2d (&t, 8, &t, NULL) != MP_OKAY)
280	break;
281	}
282	mp_clear(&t);
283	return bit;
284	}
285
286	int mp_to_unsigned_bin_at_pos(int x, mp_int t, unsigned char b)
287	{
288	int res = 0;
289	while (mp_iszero(t) == MP_NO) {
290	#ifndef MP_8BIT
291	b[x++] = (unsigned char) (t->dp[0] & 255);
292	#else
293	b[x++] = (unsigned char) (t->dp[0] \| ((t->dp[1] & 0x01) << 7));
294	#endif
295	if ((res = mp_div_2d (t, 8, t, NULL)) != MP_OKAY) {
296	return res;
297	}
298	res = x;
299	}
300	return res;
301	}
302
303	/* store in unsigned [big endian] format */
304	int mp_to_unsigned_bin (mp_int * a, unsigned char *b)
305	{
306	int x, res;
307	mp_int t;
308
309	if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
310	return res;
311	}
312
313	x = mp_to_unsigned_bin_at_pos(0, &t, b);
314	if (x < 0) {
315	mp_clear(&t);
316	return x;
317	}
318
319	bn_reverse (b, x);
320	mp_clear (&t);
321	return res;
322	}
323
324	int mp_to_unsigned_bin_len(mp_int * a, unsigned char *b, int c)
325	{
326	int i, len;
327
328	len = mp_unsigned_bin_size(a);
329
330	/* pad front w/ zeros to match length */
331	for (i = 0; i < c - len; i++)
332	b[i] = 0x00;
333	return mp_to_unsigned_bin(a, b + i);
334	}
335
336	/* creates "a" then copies b into it */
337	int mp_init_copy (mp_int * a, mp_int * b)
338	{
339	int res;
340
341	if ((res = mp_init_size (a, b->used)) != MP_OKAY) {
342	return res;
343	}
344
345	if((res = mp_copy (b, a)) != MP_OKAY) {
346	mp_clear(a);
347	}
348
349	return res;
350	}
351
352
353	/* copy, b = a */
354	int mp_copy (mp_int * a, mp_int * b)
355	{
356	int res, n;
357
358	/* Safeguard against passing in a null pointer */
359	if (a == NULL \|\| b == NULL)
360	return MP_VAL;
361
362	/* if dst == src do nothing */
363	if (a == b) {
364	return MP_OKAY;
365	}
366
367	/* grow dest */
368	if (b->alloc < a->used \|\| b->alloc == 0) {
369	if ((res = mp_grow (b, a->used)) != MP_OKAY) {
370	return res;
371	}
372	}
373
374	/* zero b and copy the parameters over */
375	{
376	mp_digit tmpa, tmpb;
377
378	/* pointer aliases */
379
380	/* source */
381	tmpa = a->dp;
382
383	/* destination */
384	tmpb = b->dp;
385
386	/* copy all the digits */
387	for (n = 0; n < a->used; n++) {
388	tmpb++ = tmpa++;
389	}
390
391	/* clear high digits */
392	for (; n < b->used && b->dp; n++) {
393	*tmpb++ = 0;
394	}
395	}
396
397	/* copy used count and sign */
398	b->used = a->used;
399	b->sign = a->sign;
400	return MP_OKAY;
401	}
402
403
404	/* grow as required */
405	int mp_grow (mp_int * a, int size)
406	{
407	int i;
408	mp_digit *tmp;
409
410	/* if the alloc size is smaller alloc more ram */
411	if (a->alloc < size \|\| size == 0) {
412	/* ensure there are always at least MP_PREC digits extra on top */
413	size += (MP_PREC * 2) - (size % MP_PREC);
414
415	/* reallocate the array a->dp
416	*
417	* We store the return in a temporary variable
418	* in case the operation failed we don't want
419	* to overwrite the dp member of a.
420	*/
421	tmp = OPT_CAST(mp_digit) XREALLOC (a->dp, sizeof (mp_digit) * size, NULL,
422	DYNAMIC_TYPE_BIGINT);
423	if (tmp == NULL) {
424	/* reallocation failed but "a" is still valid [can be freed] */
425	return MP_MEM;
426	}
427
428	/* reallocation succeeded so set a->dp */
429	a->dp = tmp;
430
431	/* zero excess digits */
432	i = a->alloc;
433	a->alloc = size;
434	for (; i < a->alloc; i++) {
435	a->dp[i] = 0;
436	}
437	}
438	return MP_OKAY;
439	}
440
441
442	/* shift right by a certain bit count (store quotient in c, optional
443	remainder in d) */
444	int mp_div_2d (mp_int * a, int b, mp_int * c, mp_int * d)
445	{
446	int D, res;
447	mp_int t;
448
449
450	/* if the shift count is <= 0 then we do no work */
451	if (b <= 0) {
452	res = mp_copy (a, c);
453	if (d != NULL) {
454	mp_zero (d);
455	}
456	return res;
457	}
458
459	if ((res = mp_init (&t)) != MP_OKAY) {
460	return res;
461	}
462
463	/* get the remainder */
464	if (d != NULL) {
465	if ((res = mp_mod_2d (a, b, &t)) != MP_OKAY) {
466	mp_clear (&t);
467	return res;
468	}
469	}
470
471	/* copy */
472	if ((res = mp_copy (a, c)) != MP_OKAY) {
473	mp_clear (&t);
474	return res;
475	}
476
477	/* shift by as many digits in the bit count */
478	if (b >= (int)DIGIT_BIT) {
479	mp_rshd (c, b / DIGIT_BIT);
480	}
481
482	/* shift any bit count < DIGIT_BIT */
483	D = (b % DIGIT_BIT);
484	if (D != 0) {
485	mp_rshb(c, D);
486	}
487	mp_clamp (c);
488	if (d != NULL) {
489	mp_exch (&t, d);
490	}
491	mp_clear (&t);
492	return MP_OKAY;
493	}
494
495
496	/* set to zero */
497	void mp_zero (mp_int * a)
498	{
499	int n;
500	mp_digit *tmp;
501
502	if (a == NULL)
503	return;
504
505	a->sign = MP_ZPOS;
506	a->used = 0;
507
508	tmp = a->dp;
509	for (n = 0; n < a->alloc; n++) {
510	*tmp++ = 0;
511	}
512	}
513
514
515	/* trim unused digits
516	*
517	* This is used to ensure that leading zero digits are
518	* trimmed and the leading "used" digit will be non-zero
519	* Typically very fast. Also fixes the sign if there
520	* are no more leading digits
521	*/
522	void mp_clamp (mp_int * a)
523	{
524	/* decrease used while the most significant digit is
525	* zero.
526	*/
527	while (a->used > 0 && a->dp[a->used - 1] == 0) {
528	--(a->used);
529	}
530
531	/* reset the sign flag if used == 0 */
532	if (a->used == 0) {
533	a->sign = MP_ZPOS;
534	}
535	}
536
537
538	/* swap the elements of two integers, for cases where you can't simply swap the
539	* mp_int pointers around
540	*/
541	void mp_exch (mp_int * a, mp_int * b)
542	{
543	mp_int t;
544
545	t = *a;
546	a = b;
547	*b = t;
548	}
549
550
551	/* shift right a certain number of bits */
552	void mp_rshb (mp_int *c, int x)
553	{
554	mp_digit *tmpc, mask, shift;
555	mp_digit r, rr;
556	mp_digit D = x;
557
558	/* mask */
559	mask = (((mp_digit)1) << D) - 1;
560
561	/* shift for lsb */
562	shift = DIGIT_BIT - D;
563
564	/* alias */
565	tmpc = c->dp + (c->used - 1);
566
567	/* carry */
568	r = 0;
569	for (x = c->used - 1; x >= 0; x--) {
570	/* get the lower bits of this word in a temp */
571	rr = *tmpc & mask;
572
573	/* shift the current word and mix in the carry bits from previous word */
574	tmpc = (tmpc >> D) \| (r << shift);
575	--tmpc;
576
577	/* set the carry to the carry bits of the current word found above */
578	r = rr;
579	}
580	mp_clamp(c);
581	}
582
583
584	/* shift right a certain amount of digits */
585	void mp_rshd (mp_int * a, int b)
586	{
587	int x;
588
589	/* if b <= 0 then ignore it */
590	if (b <= 0) {
591	return;
592	}
593
594	/* if b > used then simply zero it and return */
595	if (a->used <= b) {
596	mp_zero (a);
597	return;
598	}
599
600	{
601	mp_digit bottom, top;
602
603	/* shift the digits down */
604
605	/* bottom */
606	bottom = a->dp;
607
608	/* top [offset into digits] */
609	top = a->dp + b;
610
611	/* this is implemented as a sliding window where
612	* the window is b-digits long and digits from
613	* the top of the window are copied to the bottom
614	*
615	* e.g.
616
617	b-2 \| b-1 \| b0 \| b1 \| b2 \| ... \| bb \| ---->
618	/\ \| ---->
619	\-------------------/ ---->
620	*/
621	for (x = 0; x < (a->used - b); x++) {
622	bottom++ = top++;
623	}
624
625	/* zero the top digits */
626	for (; x < a->used; x++) {
627	*bottom++ = 0;
628	}
629	}
630
631	/* remove excess digits */
632	a->used -= b;
633	}
634
635
636	/* calc a value mod 2*b /
637	int mp_mod_2d (mp_int * a, int b, mp_int * c)
638	{
639	int x, res;
640
641	/* if b is <= 0 then zero the int */
642	if (b <= 0) {
643	mp_zero (c);
644	return MP_OKAY;
645	}
646
647	/* if the modulus is larger than the value than return */
648	if (b >= (int) (a->used * DIGIT_BIT)) {
649	res = mp_copy (a, c);
650	return res;
651	}
652
653	/* copy */
654	if ((res = mp_copy (a, c)) != MP_OKAY) {
655	return res;
656	}
657
658	/* zero digits above the last digit of the modulus */
659	for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) {
660	c->dp[x] = 0;
661	}
662	/* clear the digit that is not completely outside/inside the modulus */
663	c->dp[b / DIGIT_BIT] &= (mp_digit) ((((mp_digit) 1) <<
664	(((mp_digit) b) % DIGIT_BIT)) - ((mp_digit) 1));
665	mp_clamp (c);
666	return MP_OKAY;
667	}
668
669
670	/* reads a unsigned char array, assumes the msb is stored first [big endian] */
671	int mp_read_unsigned_bin (mp_int * a, const unsigned char *b, int c)
672	{
673	int res;
674
675	/* make sure there are at least two digits */
676	if (a->alloc < 2) {
677	if ((res = mp_grow(a, 2)) != MP_OKAY) {
678	return res;
679	}
680	}
681
682	/* zero the int */
683	mp_zero (a);
684
685	/* read the bytes in */
686	while (c-- > 0) {
687	if ((res = mp_mul_2d (a, 8, a)) != MP_OKAY) {
688	return res;
689	}
690
691	#ifndef MP_8BIT
692	a->dp[0] \|= *b++;
693	a->used += 1;
694	#else
695	a->dp[0] = (*b & MP_MASK);
696	a->dp[1] \|= ((*b++ >> 7U) & 1);
697	a->used += 2;
698	#endif
699	}
700	mp_clamp (a);
701	return MP_OKAY;
702	}
703
704
705	/* shift left by a certain bit count */
706	int mp_mul_2d (mp_int * a, int b, mp_int * c)
707	{
708	mp_digit d;
709	int res;
710
711	/* copy */
712	if (a != c) {
713	if ((res = mp_copy (a, c)) != MP_OKAY) {
714	return res;
715	}
716	}
717
718	if (c->alloc < (int)(c->used + b/DIGIT_BIT + 1)) {
719	if ((res = mp_grow (c, c->used + b / DIGIT_BIT + 1)) != MP_OKAY) {
720	return res;
721	}
722	}
723
724	/* shift by as many digits in the bit count */
725	if (b >= (int)DIGIT_BIT) {
726	if ((res = mp_lshd (c, b / DIGIT_BIT)) != MP_OKAY) {
727	return res;
728	}
729	}
730
731	/* shift any bit count < DIGIT_BIT */
732	d = (mp_digit) (b % DIGIT_BIT);
733	if (d != 0) {
734	mp_digit *tmpc, shift, mask, r, rr;
735	int x;
736
737	/* bitmask for carries */
738	mask = (((mp_digit)1) << d) - 1;
739
740	/* shift for msbs */
741	shift = DIGIT_BIT - d;
742
743	/* alias */
744	tmpc = c->dp;
745
746	/* carry */
747	r = 0;
748	for (x = 0; x < c->used; x++) {
749	/* get the higher bits of the current word */
750	rr = (*tmpc >> shift) & mask;
751
752	/* shift the current word and OR in the carry */
753	tmpc = (mp_digit)(((tmpc << d) \| r) & MP_MASK);
754	++tmpc;
755
756	/* set the carry to the carry bits of the current word */
757	r = rr;
758	}
759
760	/* set final carry */
761	if (r != 0) {
762	c->dp[(c->used)++] = r;
763	}
764	}
765	mp_clamp (c);
766	return MP_OKAY;
767	}
768
769
770	/* shift left a certain amount of digits */
771	int mp_lshd (mp_int * a, int b)
772	{
773	int x, res;
774
775	/* if its less than zero return */
776	if (b <= 0) {
777	return MP_OKAY;
778	}
779
780	/* grow to fit the new digits */
781	if (a->alloc < a->used + b) {
782	if ((res = mp_grow (a, a->used + b)) != MP_OKAY) {
783	return res;
784	}
785	}
786
787	{
788	mp_digit top, bottom;
789
790	/* increment the used by the shift amount then copy upwards */
791	a->used += b;
792
793	/* top */
794	top = a->dp + a->used - 1;
795
796	/* base */
797	bottom = a->dp + a->used - 1 - b;
798
799	/* much like mp_rshd this is implemented using a sliding window
800	* except the window goes the other way around. Copying from
801	* the bottom to the top. see bn_mp_rshd.c for more info.
802	*/
803	for (x = a->used - 1; x >= b; x--) {
804	top-- = bottom--;
805	}
806
807	/* zero the lower digits */
808	top = a->dp;
809	for (x = 0; x < b; x++) {
810	*top++ = 0;
811	}
812	}
813	return MP_OKAY;
814	}
815
816
817	/* this is a shell function that calls either the normal or Montgomery
818	* exptmod functions. Originally the call to the montgomery code was
819	* embedded in the normal function but that wasted a lot of stack space
820	* for nothing (since 99% of the time the Montgomery code would be called)
821	*/
822	#if defined(FREESCALE_LTC_TFM)
823	int wolfcrypt_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
824	#else
825	int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
826	#endif
827	{
828	int dr;
829
830	/* modulus P must be positive */
831	if (P->sign == MP_NEG) {
832	return MP_VAL;
833	}
834
835	/* if exponent X is negative we have to recurse */
836	if (X->sign == MP_NEG) {
837	#ifdef BN_MP_INVMOD_C
838	mp_int tmpG, tmpX;
839	int err;
840
841	/* first compute 1/G mod P */
842	if ((err = mp_init(&tmpG)) != MP_OKAY) {
843	return err;
844	}
845	if ((err = mp_invmod(G, P, &tmpG)) != MP_OKAY) {
846	mp_clear(&tmpG);
847	return err;
848	}
849
850	/* now get \|X\| */
851	if ((err = mp_init(&tmpX)) != MP_OKAY) {
852	mp_clear(&tmpG);
853	return err;
854	}
855	if ((err = mp_abs(X, &tmpX)) != MP_OKAY) {
856	mp_clear(&tmpG);
857	mp_clear(&tmpX);
858	return err;
859	}
860
861	/* and now compute (1/G)\|X\| instead of GX [X < 0] */
862	err = mp_exptmod(&tmpG, &tmpX, P, Y);
863	mp_clear(&tmpG);
864	mp_clear(&tmpX);
865	return err;
866	#else
867	/* no invmod */
868	return MP_VAL;
869	#endif
870	}
871
872	/* modified diminished radix reduction */
873	#if defined(BN_MP_REDUCE_IS_2K_L_C) && defined(BN_MP_REDUCE_2K_L_C) && \
874	defined(BN_S_MP_EXPTMOD_C)
875	if (mp_reduce_is_2k_l(P) == MP_YES) {
876	return s_mp_exptmod(G, X, P, Y, 1);
877	}
878	#endif
879
880	#ifdef BN_MP_DR_IS_MODULUS_C
881	/* is it a DR modulus? */
882	dr = mp_dr_is_modulus(P);
883	#else
884	/* default to no */
885	dr = 0;
886	#endif
887
888	#ifdef BN_MP_REDUCE_IS_2K_C
889	/* if not, is it a unrestricted DR modulus? */
890	if (dr == 0) {
891	dr = mp_reduce_is_2k(P) << 1;
892	}
893	#endif
894
895	/* if the modulus is odd or dr != 0 use the montgomery method */
896	#ifdef BN_MP_EXPTMOD_FAST_C
897	if (mp_isodd (P) == MP_YES \|\| dr != 0) {
898	return mp_exptmod_fast (G, X, P, Y, dr);
899	} else {
900	#endif
901	#ifdef BN_S_MP_EXPTMOD_C
902	/* otherwise use the generic Barrett reduction technique */
903	return s_mp_exptmod (G, X, P, Y, 0);
904	#else
905	/* no exptmod for evens */
906	return MP_VAL;
907	#endif
908	#ifdef BN_MP_EXPTMOD_FAST_C
909	}
910	#endif
911	}
912
913
914	/* b = \|a\|
915	*
916	* Simple function copies the input and fixes the sign to positive
917	*/
918	int mp_abs (mp_int * a, mp_int * b)
919	{
920	int res;
921
922	/* copy a to b */
923	if (a != b) {
924	if ((res = mp_copy (a, b)) != MP_OKAY) {
925	return res;
926	}
927	}
928
929	/* force the sign of b to positive */
930	b->sign = MP_ZPOS;
931
932	return MP_OKAY;
933	}
934
935
936	/* hac 14.61, pp608 */
937	#if defined(FREESCALE_LTC_TFM)
938	int wolfcrypt_mp_invmod(mp_int * a, mp_int * b, mp_int * c)
939	#else
940	int mp_invmod (mp_int * a, mp_int * b, mp_int * c)
941	#endif
942	{
943	/* b cannot be negative */
944	if (b->sign == MP_NEG \|\| mp_iszero(b) == MP_YES) {
945	return MP_VAL;
946	}
947
948	#ifdef BN_FAST_MP_INVMOD_C
949	/* if the modulus is odd we can use a faster routine instead */
950	if ((mp_isodd(b) == MP_YES) && (mp_cmp_d(b, 1) != MP_EQ)) {
951	return fast_mp_invmod (a, b, c);
952	}
953	#endif
954
955	#ifdef BN_MP_INVMOD_SLOW_C
956	return mp_invmod_slow(a, b, c);
957	#else
958	return MP_VAL;
959	#endif
960	}
961
962
963	/* computes the modular inverse via binary extended euclidean algorithm,
964	* that is c = 1/a mod b
965	*
966	* Based on slow invmod except this is optimized for the case where b is
967	* odd as per HAC Note 14.64 on pp. 610
968	*/
969	int fast_mp_invmod (mp_int * a, mp_int * b, mp_int * c)
970	{
971	mp_int x, y, u, v, B, D;
972	int res, neg, loop_check = 0;
973
974	/* 2. [modified] b must be odd */
975	if (mp_iseven (b) == MP_YES) {
976	return MP_VAL;
977	}
978
979	/* init all our temps */
980	if ((res = mp_init_multi(&x, &y, &u, &v, &B, &D)) != MP_OKAY) {
981	return res;
982	}
983
984	/* x == modulus, y == value to invert */
985	if ((res = mp_copy (b, &x)) != MP_OKAY) {
986	goto LBL_ERR;
987	}
988
989	/* we need y = \|a\| */
990	if ((res = mp_mod (a, b, &y)) != MP_OKAY) {
991	goto LBL_ERR;
992	}
993
994	/* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
995	if ((res = mp_copy (&x, &u)) != MP_OKAY) {
996	goto LBL_ERR;
997	}
998	if ((res = mp_copy (&y, &v)) != MP_OKAY) {
999	goto LBL_ERR;
1000	}
1001	if ((res = mp_set (&D, 1)) != MP_OKAY) {
1002	goto LBL_ERR;
1003	}
1004
1005	top:
1006	/* 4. while u is even do */
1007	while (mp_iseven (&u) == MP_YES) {
1008	/* 4.1 u = u/2 */
1009	if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
1010	goto LBL_ERR;
1011	}
1012	/* 4.2 if B is odd then */
1013	if (mp_isodd (&B) == MP_YES) {
1014	if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
1015	goto LBL_ERR;
1016	}
1017	}
1018	/* B = B/2 */
1019	if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
1020	goto LBL_ERR;
1021	}
1022	}
1023
1024	/* 5. while v is even do */
1025	while (mp_iseven (&v) == MP_YES) {
1026	/* 5.1 v = v/2 */
1027	if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
1028	goto LBL_ERR;
1029	}
1030	/* 5.2 if D is odd then */
1031	if (mp_isodd (&D) == MP_YES) {
1032	/* D = (D-x)/2 */
1033	if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
1034	goto LBL_ERR;
1035	}
1036	}
1037	/* D = D/2 */
1038	if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
1039	goto LBL_ERR;
1040	}
1041	}
1042
1043	/* 6. if u >= v then */
1044	if (mp_cmp (&u, &v) != MP_LT) {
1045	/* u = u - v, B = B - D */
1046	if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
1047	goto LBL_ERR;
1048	}
1049
1050	if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
1051	goto LBL_ERR;
1052	}
1053	} else {
1054	/* v - v - u, D = D - B */
1055	if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
1056	goto LBL_ERR;
1057	}
1058
1059	if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
1060	goto LBL_ERR;
1061	}
1062	}
1063
1064	/* if not zero goto step 4 */
1065	if (mp_iszero (&u) == MP_NO) {
1066	if (++loop_check > MAX_INVMOD_SZ) {
1067	res = MP_VAL;
1068	goto LBL_ERR;
1069	}
1070	goto top;
1071	}
1072
1073	/* now a = C, b = D, gcd == gv /
1074
1075	/* if v != 1 then there is no inverse */
1076	if (mp_cmp_d (&v, 1) != MP_EQ) {
1077	res = MP_VAL;
1078	goto LBL_ERR;
1079	}
1080
1081	/* b is now the inverse */
1082	neg = a->sign;
1083	while (D.sign == MP_NEG) {
1084	if ((res = mp_add (&D, b, &D)) != MP_OKAY) {
1085	goto LBL_ERR;
1086	}
1087	}
1088	/* too big */
1089	while (mp_cmp_mag(&D, b) != MP_LT) {
1090	if ((res = mp_sub(&D, b, &D)) != MP_OKAY) {
1091	goto LBL_ERR;
1092	}
1093	}
1094	mp_exch (&D, c);
1095	c->sign = neg;
1096	res = MP_OKAY;
1097
1098	LBL_ERR:mp_clear(&x);
1099	mp_clear(&y);
1100	mp_clear(&u);
1101	mp_clear(&v);
1102	mp_clear(&B);
1103	mp_clear(&D);
1104	return res;
1105	}
1106
1107
1108	/* hac 14.61, pp608 */
1109	int mp_invmod_slow (mp_int * a, mp_int * b, mp_int * c)
1110	{
1111	mp_int x, y, u, v, A, B, C, D;
1112	int res;
1113
1114	/* b cannot be negative */
1115	if (b->sign == MP_NEG \|\| mp_iszero(b) == MP_YES) {
1116	return MP_VAL;
1117	}
1118
1119	/* init temps */
1120	if ((res = mp_init_multi(&x, &y, &u, &v,
1121	&A, &B)) != MP_OKAY) {
1122	return res;
1123	}
1124
1125	/* init rest of tmps temps */
1126	if ((res = mp_init_multi(&C, &D, 0, 0, 0, 0)) != MP_OKAY) {
1127	mp_clear(&x);
1128	mp_clear(&y);
1129	mp_clear(&u);
1130	mp_clear(&v);
1131	mp_clear(&A);
1132	mp_clear(&B);
1133	return res;
1134	}
1135
1136	/* x = a, y = b */
1137	if ((res = mp_mod(a, b, &x)) != MP_OKAY) {
1138	goto LBL_ERR;
1139	}
1140	if ((res = mp_copy (b, &y)) != MP_OKAY) {
1141	goto LBL_ERR;
1142	}
1143
1144	/* 2. [modified] if x,y are both even then return an error! */
1145	if (mp_iseven (&x) == MP_YES && mp_iseven (&y) == MP_YES) {
1146	res = MP_VAL;
1147	goto LBL_ERR;
1148	}
1149
1150	/* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
1151	if ((res = mp_copy (&x, &u)) != MP_OKAY) {
1152	goto LBL_ERR;
1153	}
1154	if ((res = mp_copy (&y, &v)) != MP_OKAY) {
1155	goto LBL_ERR;
1156	}
1157	if ((res = mp_set (&A, 1)) != MP_OKAY) {
1158	goto LBL_ERR;
1159	}
1160	if ((res = mp_set (&D, 1)) != MP_OKAY) {
1161	goto LBL_ERR;
1162	}
1163
1164	top:
1165	/* 4. while u is even do */
1166	while (mp_iseven (&u) == MP_YES) {
1167	/* 4.1 u = u/2 */
1168	if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
1169	goto LBL_ERR;
1170	}
1171	/* 4.2 if A or B is odd then */
1172	if (mp_isodd (&A) == MP_YES \|\| mp_isodd (&B) == MP_YES) {
1173	/* A = (A+y)/2, B = (B-x)/2 */
1174	if ((res = mp_add (&A, &y, &A)) != MP_OKAY) {
1175	goto LBL_ERR;
1176	}
1177	if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
1178	goto LBL_ERR;
1179	}
1180	}
1181	/* A = A/2, B = B/2 */
1182	if ((res = mp_div_2 (&A, &A)) != MP_OKAY) {
1183	goto LBL_ERR;
1184	}
1185	if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
1186	goto LBL_ERR;
1187	}
1188	}
1189
1190	/* 5. while v is even do */
1191	while (mp_iseven (&v) == MP_YES) {
1192	/* 5.1 v = v/2 */
1193	if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
1194	goto LBL_ERR;
1195	}
1196	/* 5.2 if C or D is odd then */
1197	if (mp_isodd (&C) == MP_YES \|\| mp_isodd (&D) == MP_YES) {
1198	/* C = (C+y)/2, D = (D-x)/2 */
1199	if ((res = mp_add (&C, &y, &C)) != MP_OKAY) {
1200	goto LBL_ERR;
1201	}
1202	if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
1203	goto LBL_ERR;
1204	}
1205	}
1206	/* C = C/2, D = D/2 */
1207	if ((res = mp_div_2 (&C, &C)) != MP_OKAY) {
1208	goto LBL_ERR;
1209	}
1210	if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
1211	goto LBL_ERR;
1212	}
1213	}
1214
1215	/* 6. if u >= v then */
1216	if (mp_cmp (&u, &v) != MP_LT) {
1217	/* u = u - v, A = A - C, B = B - D */
1218	if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
1219	goto LBL_ERR;
1220	}
1221
1222	if ((res = mp_sub (&A, &C, &A)) != MP_OKAY) {
1223	goto LBL_ERR;
1224	}
1225
1226	if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
1227	goto LBL_ERR;
1228	}
1229	} else {
1230	/* v - v - u, C = C - A, D = D - B */
1231	if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
1232	goto LBL_ERR;
1233	}
1234
1235	if ((res = mp_sub (&C, &A, &C)) != MP_OKAY) {
1236	goto LBL_ERR;
1237	}
1238
1239	if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
1240	goto LBL_ERR;
1241	}
1242	}
1243
1244	/* if not zero goto step 4 */
1245	if (mp_iszero (&u) == MP_NO)
1246	goto top;
1247
1248	/* now a = C, b = D, gcd == gv /
1249
1250	/* if v != 1 then there is no inverse */
1251	if (mp_cmp_d (&v, 1) != MP_EQ) {
1252	res = MP_VAL;
1253	goto LBL_ERR;
1254	}
1255
1256	/* if its too low */
1257	while (mp_cmp_d(&C, 0) == MP_LT) {
1258	if ((res = mp_add(&C, b, &C)) != MP_OKAY) {
1259	goto LBL_ERR;
1260	}
1261	}
1262
1263	/* too big */
1264	while (mp_cmp_mag(&C, b) != MP_LT) {
1265	if ((res = mp_sub(&C, b, &C)) != MP_OKAY) {
1266	goto LBL_ERR;
1267	}
1268	}
1269
1270	/* C is now the inverse */
1271	mp_exch (&C, c);
1272	res = MP_OKAY;
1273	LBL_ERR:mp_clear(&x);
1274	mp_clear(&y);
1275	mp_clear(&u);
1276	mp_clear(&v);
1277	mp_clear(&A);
1278	mp_clear(&B);
1279	mp_clear(&C);
1280	mp_clear(&D);
1281	return res;
1282	}
1283
1284
1285	/* compare magnitude of two ints (unsigned) */
1286	int mp_cmp_mag (mp_int * a, mp_int * b)
1287	{
1288	int n;
1289	mp_digit tmpa, tmpb;
1290
1291	/* compare based on # of non-zero digits */
1292	if (a->used > b->used) {
1293	return MP_GT;
1294	}
1295
1296	if (a->used < b->used) {
1297	return MP_LT;
1298	}
1299
1300	/* alias for a */
1301	tmpa = a->dp + (a->used - 1);
1302
1303	/* alias for b */
1304	tmpb = b->dp + (a->used - 1);
1305
1306	/* compare based on digits */
1307	for (n = 0; n < a->used; ++n, --tmpa, --tmpb) {
1308	if (tmpa > tmpb) {
1309	return MP_GT;
1310	}
1311
1312	if (tmpa < tmpb) {
1313	return MP_LT;
1314	}
1315	}
1316	return MP_EQ;
1317	}
1318
1319
1320	/* compare two ints (signed)*/
1321	int mp_cmp (mp_int * a, mp_int * b)
1322	{
1323	/* compare based on sign */
1324	if (a->sign != b->sign) {
1325	if (a->sign == MP_NEG) {
1326	return MP_LT;
1327	} else {
1328	return MP_GT;
1329	}
1330	}
1331
1332	/* compare digits */
1333	if (a->sign == MP_NEG) {
1334	/* if negative compare opposite direction */
1335	return mp_cmp_mag(b, a);
1336	} else {
1337	return mp_cmp_mag(a, b);
1338	}
1339	}
1340
1341
1342	/* compare a digit */
1343	int mp_cmp_d(mp_int * a, mp_digit b)
1344	{
1345	/* special case for zero*/
1346	if (a->used == 0 && b == 0)
1347	return MP_EQ;
1348
1349	/* compare based on sign */
1350	if ((b && a->used == 0) \|\| a->sign == MP_NEG) {
1351	return MP_LT;
1352	}
1353
1354	/* compare based on magnitude */
1355	if (a->used > 1) {
1356	return MP_GT;
1357	}
1358
1359	/* compare the only digit of a to b */
1360	if (a->dp[0] > b) {
1361	return MP_GT;
1362	} else if (a->dp[0] < b) {
1363	return MP_LT;
1364	} else {
1365	return MP_EQ;
1366	}
1367	}
1368
1369
1370	/* set to a digit */
1371	int mp_set (mp_int * a, mp_digit b)
1372	{
1373	int res;
1374	mp_zero (a);
1375	res = mp_grow (a, 1);
1376	if (res == MP_OKAY) {
1377	a->dp[0] = (mp_digit)(b & MP_MASK);
1378	a->used = (a->dp[0] != 0) ? 1 : 0;
1379	}
1380	return res;
1381	}
1382
1383	/* chek if a bit is set */
1384	int mp_is_bit_set (mp_int *a, mp_digit b)
1385	{
1386	if ((mp_digit)a->used < b/DIGIT_BIT)
1387	return 0;
1388
1389	return (int)((a->dp[b/DIGIT_BIT] >> b%DIGIT_BIT) & (mp_digit)1);
1390	}
1391
1392	/* c = a mod b, 0 <= c < b */
1393	#if defined(FREESCALE_LTC_TFM)
1394	int wolfcrypt_mp_mod(mp_int * a, mp_int * b, mp_int * c)
1395	#else
1396	int mp_mod (mp_int * a, mp_int * b, mp_int * c)
1397	#endif
1398	{
1399	mp_int t;
1400	int res;
1401
1402	if ((res = mp_init_size (&t, b->used)) != MP_OKAY) {
1403	return res;
1404	}
1405
1406	if ((res = mp_div (a, b, NULL, &t)) != MP_OKAY) {
1407	mp_clear (&t);
1408	return res;
1409	}
1410
1411	if ((mp_iszero(&t) != MP_NO) \|\| (t.sign == b->sign)) {
1412	res = MP_OKAY;
1413	mp_exch (&t, c);
1414	} else {
1415	res = mp_add (b, &t, c);
1416	}
1417
1418	mp_clear (&t);
1419	return res;
1420	}
1421
1422
1423	/* slower bit-bang division... also smaller */
1424	int mp_div(mp_int * a, mp_int * b, mp_int * c, mp_int * d)
1425	{
1426	mp_int ta, tb, tq, q;
1427	int res, n, n2;
1428
1429	/* is divisor zero ? */
1430	if (mp_iszero (b) == MP_YES) {
1431	return MP_VAL;
1432	}
1433
1434	/* if a < b then q=0, r = a */
1435	if (mp_cmp_mag (a, b) == MP_LT) {
1436	if (d != NULL) {
1437	res = mp_copy (a, d);
1438	} else {
1439	res = MP_OKAY;
1440	}
1441	if (c != NULL) {
1442	mp_zero (c);
1443	}
1444	return res;
1445	}
1446
1447	/* init our temps */
1448	if ((res = mp_init_multi(&ta, &tb, &tq, &q, 0, 0)) != MP_OKAY) {
1449	return res;
1450	}
1451
1452	if ((res = mp_set(&tq, 1)) != MP_OKAY) {
1453	return res;
1454	}
1455	n = mp_count_bits(a) - mp_count_bits(b);
1456	if (((res = mp_abs(a, &ta)) != MP_OKAY) \|\|
1457	((res = mp_abs(b, &tb)) != MP_OKAY) \|\|
1458	((res = mp_mul_2d(&tb, n, &tb)) != MP_OKAY) \|\|
1459	((res = mp_mul_2d(&tq, n, &tq)) != MP_OKAY)) {
1460	goto LBL_ERR;
1461	}
1462
1463	while (n-- >= 0) {
1464	if (mp_cmp(&tb, &ta) != MP_GT) {
1465	if (((res = mp_sub(&ta, &tb, &ta)) != MP_OKAY) \|\|
1466	((res = mp_add(&q, &tq, &q)) != MP_OKAY)) {
1467	goto LBL_ERR;
1468	}
1469	}
1470	if (((res = mp_div_2d(&tb, 1, &tb, NULL)) != MP_OKAY) \|\|
1471	((res = mp_div_2d(&tq, 1, &tq, NULL)) != MP_OKAY)) {
1472	goto LBL_ERR;
1473	}
1474	}
1475
1476	/* now q == quotient and ta == remainder */
1477	n = a->sign;
1478	n2 = (a->sign == b->sign ? MP_ZPOS : MP_NEG);
1479	if (c != NULL) {
1480	mp_exch(c, &q);
1481	c->sign = (mp_iszero(c) == MP_YES) ? MP_ZPOS : n2;
1482	}
1483	if (d != NULL) {
1484	mp_exch(d, &ta);
1485	d->sign = (mp_iszero(d) == MP_YES) ? MP_ZPOS : n;
1486	}
1487	LBL_ERR:
1488	mp_clear(&ta);
1489	mp_clear(&tb);
1490	mp_clear(&tq);
1491	mp_clear(&q);
1492	return res;
1493	}
1494
1495
1496	/* b = a/2 */
1497	int mp_div_2(mp_int * a, mp_int * b)
1498	{
1499	int x, res, oldused;
1500
1501	/* copy */
1502	if (b->alloc < a->used) {
1503	if ((res = mp_grow (b, a->used)) != MP_OKAY) {
1504	return res;
1505	}
1506	}
1507
1508	oldused = b->used;
1509	b->used = a->used;
1510	{
1511	mp_digit r, rr, tmpa, tmpb;
1512
1513	/* source alias */
1514	tmpa = a->dp + b->used - 1;
1515
1516	/* dest alias */
1517	tmpb = b->dp + b->used - 1;
1518
1519	/* carry */
1520	r = 0;
1521	for (x = b->used - 1; x >= 0; x--) {
1522	/* get the carry for the next iteration */
1523	rr = *tmpa & 1;
1524
1525	/* shift the current digit, add in carry and store */
1526	tmpb-- = (tmpa-- >> 1) \| (r << (DIGIT_BIT - 1));
1527
1528	/* forward carry to next iteration */
1529	r = rr;
1530	}
1531
1532	/* zero excess digits */
1533	tmpb = b->dp + b->used;
1534	for (x = b->used; x < oldused; x++) {
1535	*tmpb++ = 0;
1536	}
1537	}
1538	b->sign = a->sign;
1539	mp_clamp (b);
1540	return MP_OKAY;
1541	}
1542
1543
1544	/* high level addition (handles signs) */
1545	int mp_add (mp_int * a, mp_int * b, mp_int * c)
1546	{
1547	int sa, sb, res;
1548
1549	/* get sign of both inputs */
1550	sa = a->sign;
1551	sb = b->sign;
1552
1553	/* handle two cases, not four */
1554	if (sa == sb) {
1555	/* both positive or both negative */
1556	/* add their magnitudes, copy the sign */
1557	c->sign = sa;
1558	res = s_mp_add (a, b, c);
1559	} else {
1560	/* one positive, the other negative */
1561	/* subtract the one with the greater magnitude from */
1562	/* the one of the lesser magnitude. The result gets */
1563	/* the sign of the one with the greater magnitude. */
1564	if (mp_cmp_mag (a, b) == MP_LT) {
1565	c->sign = sb;
1566	res = s_mp_sub (b, a, c);
1567	} else {
1568	c->sign = sa;
1569	res = s_mp_sub (a, b, c);
1570	}
1571	}
1572	return res;
1573	}
1574
1575
1576	/* low level addition, based on HAC pp.594, Algorithm 14.7 */
1577	int s_mp_add (mp_int * a, mp_int * b, mp_int * c)
1578	{
1579	mp_int *x;
1580	int olduse, res, min_ab, max_ab;
1581
1582	/* find sizes, we let \|a\| <= \|b\| which means we have to sort
1583	* them. "x" will point to the input with the most digits
1584	*/
1585	if (a->used > b->used) {
1586	min_ab = b->used;
1587	max_ab = a->used;
1588	x = a;
1589	} else {
1590	min_ab = a->used;
1591	max_ab = b->used;
1592	x = b;
1593	}
1594
1595	/* init result */
1596	if (c->alloc < max_ab + 1) {
1597	if ((res = mp_grow (c, max_ab + 1)) != MP_OKAY) {
1598	return res;
1599	}
1600	}
1601
1602	/* get old used digit count and set new one */
1603	olduse = c->used;
1604	c->used = max_ab + 1;
1605
1606	{
1607	mp_digit u, tmpa, tmpb, *tmpc;
1608	int i;
1609
1610	/* alias for digit pointers */
1611
1612	/* first input */
1613	tmpa = a->dp;
1614
1615	/* second input */
1616	tmpb = b->dp;
1617
1618	/* destination */
1619	tmpc = c->dp;
1620
1621	/* zero the carry */
1622	u = 0;
1623	for (i = 0; i < min_ab; i++) {
1624	/* Compute the sum at one digit, T[i] = A[i] + B[i] + U */
1625	tmpc = tmpa++ + *tmpb++ + u;
1626
1627	/* U = carry bit of T[i] */
1628	u = *tmpc >> ((mp_digit)DIGIT_BIT);
1629
1630	/* take away carry bit from T[i] */
1631	*tmpc++ &= MP_MASK;
1632	}
1633
1634	/* now copy higher words if any, that is in A+B
1635	* if A or B has more digits add those in
1636	*/
1637	if (min_ab != max_ab) {
1638	for (; i < max_ab; i++) {
1639	/* T[i] = X[i] + U */
1640	*tmpc = x->dp[i] + u;
1641
1642	/* U = carry bit of T[i] */
1643	u = *tmpc >> ((mp_digit)DIGIT_BIT);
1644
1645	/* take away carry bit from T[i] */
1646	*tmpc++ &= MP_MASK;
1647	}
1648	}
1649
1650	/* add carry */
1651	*tmpc++ = u;
1652
1653	/* clear digits above olduse */
1654	for (i = c->used; i < olduse; i++) {
1655	*tmpc++ = 0;
1656	}
1657	}
1658
1659	mp_clamp (c);
1660	return MP_OKAY;
1661	}
1662
1663
1664	/* low level subtraction (assumes \|a\| > \|b\|), HAC pp.595 Algorithm 14.9 */
1665	int s_mp_sub (mp_int * a, mp_int * b, mp_int * c)
1666	{
1667	int olduse, res, min_b, max_a;
1668
1669	/* find sizes */
1670	min_b = b->used;
1671	max_a = a->used;
1672
1673	/* init result */
1674	if (c->alloc < max_a) {
1675	if ((res = mp_grow (c, max_a)) != MP_OKAY) {
1676	return res;
1677	}
1678	}
1679
1680	/* sanity check on destination */
1681	if (c->dp == NULL)
1682	return MP_VAL;
1683
1684	olduse = c->used;
1685	c->used = max_a;
1686
1687	{
1688	mp_digit u, tmpa, tmpb, *tmpc;
1689	int i;
1690
1691	/* alias for digit pointers */
1692	tmpa = a->dp;
1693	tmpb = b->dp;
1694	tmpc = c->dp;
1695
1696	/* set carry to zero */
1697	u = 0;
1698	for (i = 0; i < min_b; i++) {
1699	/* T[i] = A[i] - B[i] - U */
1700	tmpc = tmpa++ - *tmpb++ - u;
1701
1702	/* U = carry bit of T[i]
1703	* Note this saves performing an AND operation since
1704	* if a carry does occur it will propagate all the way to the
1705	* MSB. As a result a single shift is enough to get the carry
1706	*/
1707	u = tmpc >> ((mp_digit)(CHAR_BIT sizeof (mp_digit) - 1));
1708
1709	/* Clear carry from T[i] */
1710	*tmpc++ &= MP_MASK;
1711	}
1712
1713	/* now copy higher words if any, e.g. if A has more digits than B */
1714	for (; i < max_a; i++) {
1715	/* T[i] = A[i] - U */
1716	tmpc = tmpa++ - u;
1717
1718	/* U = carry bit of T[i] */
1719	u = tmpc >> ((mp_digit)(CHAR_BIT sizeof (mp_digit) - 1));
1720
1721	/* Clear carry from T[i] */
1722	*tmpc++ &= MP_MASK;
1723	}
1724
1725	/* clear digits above used (since we may not have grown result above) */
1726	for (i = c->used; i < olduse; i++) {
1727	*tmpc++ = 0;
1728	}
1729	}
1730
1731	mp_clamp (c);
1732	return MP_OKAY;
1733	}
1734
1735
1736	/* high level subtraction (handles signs) */
1737	int mp_sub (mp_int * a, mp_int * b, mp_int * c)
1738	{
1739	int sa, sb, res;
1740
1741	sa = a->sign;
1742	sb = b->sign;
1743
1744	if (sa != sb) {
1745	/* subtract a negative from a positive, OR */
1746	/* subtract a positive from a negative. */
1747	/* In either case, ADD their magnitudes, */
1748	/* and use the sign of the first number. */
1749	c->sign = sa;
1750	res = s_mp_add (a, b, c);
1751	} else {
1752	/* subtract a positive from a positive, OR */
1753	/* subtract a negative from a negative. */
1754	/* First, take the difference between their */
1755	/* magnitudes, then... */
1756	if (mp_cmp_mag (a, b) != MP_LT) {
1757	/* Copy the sign from the first */
1758	c->sign = sa;
1759	/* The first has a larger or equal magnitude */
1760	res = s_mp_sub (a, b, c);
1761	} else {
1762	/* The result has the opposite sign from */
1763	/* the first number. */
1764	c->sign = (sa == MP_ZPOS) ? MP_NEG : MP_ZPOS;
1765	/* The second has a larger magnitude */
1766	res = s_mp_sub (b, a, c);
1767	}
1768	}
1769	return res;
1770	}
1771
1772
1773	/* determines if reduce_2k_l can be used */
1774	int mp_reduce_is_2k_l(mp_int *a)
1775	{
1776	int ix, iy;
1777
1778	if (a->used == 0) {
1779	return MP_NO;
1780	} else if (a->used == 1) {
1781	return MP_YES;
1782	} else if (a->used > 1) {
1783	/* if more than half of the digits are -1 we're sold */
1784	for (iy = ix = 0; ix < a->used; ix++) {
1785	if (a->dp[ix] == MP_MASK) {
1786	++iy;
1787	}
1788	}
1789	return (iy >= (a->used/2)) ? MP_YES : MP_NO;
1790
1791	}
1792	return MP_NO;
1793	}
1794
1795
1796	/* determines if mp_reduce_2k can be used */
1797	int mp_reduce_is_2k(mp_int *a)
1798	{
1799	int ix, iy, iw;
1800	mp_digit iz;
1801
1802	if (a->used == 0) {
1803	return MP_NO;
1804	} else if (a->used == 1) {
1805	return MP_YES;
1806	} else if (a->used > 1) {
1807	iy = mp_count_bits(a);
1808	iz = 1;
1809	iw = 1;
1810
1811	/* Test every bit from the second digit up, must be 1 */
1812	for (ix = DIGIT_BIT; ix < iy; ix++) {
1813	if ((a->dp[iw] & iz) == 0) {
1814	return MP_NO;
1815	}
1816	iz <<= 1;
1817	if (iz > (mp_digit)MP_MASK) {
1818	++iw;
1819	iz = 1;
1820	}
1821	}
1822	}
1823	return MP_YES;
1824	}
1825
1826
1827	/* determines if a number is a valid DR modulus */
1828	int mp_dr_is_modulus(mp_int *a)
1829	{
1830	int ix;
1831
1832	/* must be at least two digits */
1833	if (a->used < 2) {
1834	return 0;
1835	}
1836
1837	/* must be of the form b**k - a [a <= b] so all
1838	* but the first digit must be equal to -1 (mod b).
1839	*/
1840	for (ix = 1; ix < a->used; ix++) {
1841	if (a->dp[ix] != MP_MASK) {
1842	return 0;
1843	}
1844	}
1845	return 1;
1846	}
1847
1848
1849	/* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85
1850	*
1851	* Uses a left-to-right k-ary sliding window to compute the modular
1852	* exponentiation.
1853	* The value of k changes based on the size of the exponent.
1854	*
1855	* Uses Montgomery or Diminished Radix reduction [whichever appropriate]
1856	*/
1857
1858	#ifdef MP_LOW_MEM
1859	#define TAB_SIZE 32
1860	#else
1861	#define TAB_SIZE 256
1862	#endif
1863
1864	int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y,
1865	int redmode)
1866	{
1867	mp_int res;
1868	mp_digit buf, mp;
1869	int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
1870	#ifdef WOLFSSL_SMALL_STACK
1871	mp_int* M = NULL;
1872	#else
1873	mp_int M[TAB_SIZE];
1874	#endif
1875	/* use a pointer to the reduction algorithm. This allows us to use
1876	* one of many reduction algorithms without modding the guts of
1877	* the code with if statements everywhere.
1878	*/
1879	int (redux)(mp_int,mp_int*,mp_digit);
1880
1881	#ifdef WOLFSSL_SMALL_STACK
1882	M = (mp_int) XMALLOC(sizeof(mp_int) TAB_SIZE, NULL,
1883	DYNAMIC_TYPE_TMP_BUFFER);
1884	if (M == NULL)
1885	return MP_MEM;
1886	#endif
1887
1888	/* find window size */
1889	x = mp_count_bits (X);
1890	if (x <= 7) {
1891	winsize = 2;
1892	} else if (x <= 36) {
1893	winsize = 3;
1894	} else if (x <= 140) {
1895	winsize = 4;
1896	} else if (x <= 450) {
1897	winsize = 5;
1898	} else if (x <= 1303) {
1899	winsize = 6;
1900	} else if (x <= 3529) {
1901	winsize = 7;
1902	} else {
1903	winsize = 8;
1904	}
1905
1906	#ifdef MP_LOW_MEM
1907	if (winsize > 5) {
1908	winsize = 5;
1909	}
1910	#endif
1911
1912	/* init M array */
1913	/* init first cell */
1914	if ((err = mp_init_size(&M[1], P->alloc)) != MP_OKAY) {
1915	#ifdef WOLFSSL_SMALL_STACK
1916	XFREE(M, NULL, DYNAMIC_TYPE_TMP_BUFFER);
1917	#endif
1918
1919	return err;
1920	}
1921
1922	/* now init the second half of the array */
1923	for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
1924	if ((err = mp_init_size(&M[x], P->alloc)) != MP_OKAY) {
1925	for (y = 1<<(winsize-1); y < x; y++) {
1926	mp_clear (&M[y]);
1927	}
1928	mp_clear(&M[1]);
1929
1930	#ifdef WOLFSSL_SMALL_STACK
1931	XFREE(M, NULL, DYNAMIC_TYPE_TMP_BUFFER);
1932	#endif
1933
1934	return err;
1935	}
1936	}
1937
1938	/* determine and setup reduction code */
1939	if (redmode == 0) {
1940	#ifdef BN_MP_MONTGOMERY_SETUP_C
1941	/* now setup montgomery */
1942	if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) {
1943	goto LBL_M;
1944	}
1945	#else
1946	err = MP_VAL;
1947	goto LBL_M;
1948	#endif
1949
1950	/* automatically pick the comba one if available (saves quite a few
1951	calls/ifs) */
1952	#ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C
1953	if (((P->used * 2 + 1) < MP_WARRAY) &&
1954	P->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
1955	redux = fast_mp_montgomery_reduce;
1956	} else
1957	#endif
1958	{
1959	#ifdef BN_MP_MONTGOMERY_REDUCE_C
1960	/* use slower baseline Montgomery method */
1961	redux = mp_montgomery_reduce;
1962	#else
1963	err = MP_VAL;
1964	goto LBL_M;
1965	#endif
1966	}
1967	} else if (redmode == 1) {
1968	#if defined(BN_MP_DR_SETUP_C) && defined(BN_MP_DR_REDUCE_C)
1969	/* setup DR reduction for moduli of the form B*k - b /
1970	mp_dr_setup(P, &mp);
1971	redux = mp_dr_reduce;
1972	#else
1973	err = MP_VAL;
1974	goto LBL_M;
1975	#endif
1976	} else {
1977	#if defined(BN_MP_REDUCE_2K_SETUP_C) && defined(BN_MP_REDUCE_2K_C)
1978	/* setup DR reduction for moduli of the form 2*k - b /
1979	if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) {
1980	goto LBL_M;
1981	}
1982	redux = mp_reduce_2k;
1983	#else
1984	err = MP_VAL;
1985	goto LBL_M;
1986	#endif
1987	}
1988
1989	/* setup result */
1990	if ((err = mp_init_size (&res, P->alloc)) != MP_OKAY) {
1991	goto LBL_M;
1992	}
1993
1994	/* create M table
1995	*
1996
1997	*
1998	* The first half of the table is not computed though accept for M[0] and M[1]
1999	*/
2000
2001	if (redmode == 0) {
2002	#ifdef BN_MP_MONTGOMERY_CALC_NORMALIZATION_C
2003	/* now we need R mod m */
2004	if ((err = mp_montgomery_calc_normalization (&res, P)) != MP_OKAY) {
2005	goto LBL_RES;
2006	}
2007
2008	/* now set M[1] to G * R mod m */
2009	if ((err = mp_mulmod (G, &res, P, &M[1])) != MP_OKAY) {
2010	goto LBL_RES;
2011	}
2012	#else
2013	err = MP_VAL;
2014	goto LBL_RES;
2015	#endif
2016	} else {
2017	if ((err = mp_set(&res, 1)) != MP_OKAY) {
2018	goto LBL_RES;
2019	}
2020	if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) {
2021	goto LBL_RES;
2022	}
2023	}
2024
2025	/* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times*/
2026	if ((err = mp_copy (&M[1], &M[(mp_digit)(1 << (winsize - 1))])) != MP_OKAY) {
2027	goto LBL_RES;
2028	}
2029
2030	for (x = 0; x < (winsize - 1); x++) {
2031	if ((err = mp_sqr (&M[(mp_digit)(1 << (winsize - 1))],
2032	&M[(mp_digit)(1 << (winsize - 1))])) != MP_OKAY) {
2033	goto LBL_RES;
2034	}
2035	if ((err = redux (&M[(mp_digit)(1 << (winsize - 1))], P, mp)) != MP_OKAY) {
2036	goto LBL_RES;
2037	}
2038	}
2039
2040	/* create upper table */
2041	for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
2042	if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
2043	goto LBL_RES;
2044	}
2045	if ((err = redux (&M[x], P, mp)) != MP_OKAY) {
2046	goto LBL_RES;
2047	}
2048	}
2049
2050	/* set initial mode and bit cnt */
2051	mode = 0;
2052	bitcnt = 1;
2053	buf = 0;
2054	digidx = X->used - 1;
2055	bitcpy = 0;
2056	bitbuf = 0;
2057
2058	for (;;) {
2059	/* grab next digit as required */
2060	if (--bitcnt == 0) {
2061	/* if digidx == -1 we are out of digits so break */
2062	if (digidx == -1) {
2063	break;
2064	}
2065	/* read next digit and reset bitcnt */
2066	buf = X->dp[digidx--];
2067	bitcnt = (int)DIGIT_BIT;
2068	}
2069
2070	/* grab the next msb from the exponent */
2071	y = (int)(buf >> (DIGIT_BIT - 1)) & 1;
2072	buf <<= (mp_digit)1;
2073
2074	/* if the bit is zero and mode == 0 then we ignore it
2075	* These represent the leading zero bits before the first 1 bit
2076	* in the exponent. Technically this opt is not required but it
2077	* does lower the # of trivial squaring/reductions used
2078	*/
2079	if (mode == 0 && y == 0) {
2080	continue;
2081	}
2082
2083	/* if the bit is zero and mode == 1 then we square */
2084	if (mode == 1 && y == 0) {
2085	if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
2086	goto LBL_RES;
2087	}
2088	if ((err = redux (&res, P, mp)) != MP_OKAY) {
2089	goto LBL_RES;
2090	}
2091	continue;
2092	}
2093
2094	/* else we add it to the window */
2095	bitbuf \|= (y << (winsize - ++bitcpy));
2096	mode = 2;
2097
2098	if (bitcpy == winsize) {
2099	/* ok window is filled so square as required and multiply */
2100	/* square first */
2101	for (x = 0; x < winsize; x++) {
2102	if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
2103	goto LBL_RES;
2104	}
2105	if ((err = redux (&res, P, mp)) != MP_OKAY) {
2106	goto LBL_RES;
2107	}
2108	}
2109
2110	/* then multiply */
2111	if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
2112	goto LBL_RES;
2113	}
2114	if ((err = redux (&res, P, mp)) != MP_OKAY) {
2115	goto LBL_RES;
2116	}
2117
2118	/* empty window and reset */
2119	bitcpy = 0;
2120	bitbuf = 0;
2121	mode = 1;
2122	}
2123	}
2124
2125	/* if bits remain then square/multiply */
2126	if (mode == 2 && bitcpy > 0) {
2127	/* square then multiply if the bit is set */
2128	for (x = 0; x < bitcpy; x++) {
2129	if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
2130	goto LBL_RES;
2131	}
2132	if ((err = redux (&res, P, mp)) != MP_OKAY) {
2133	goto LBL_RES;
2134	}
2135
2136	/* get next bit of the window */
2137	bitbuf <<= 1;
2138	if ((bitbuf & (1 << winsize)) != 0) {
2139	/* then multiply */
2140	if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
2141	goto LBL_RES;
2142	}
2143	if ((err = redux (&res, P, mp)) != MP_OKAY) {
2144	goto LBL_RES;
2145	}
2146	}
2147	}
2148	}
2149
2150	if (redmode == 0) {
2151	/* fixup result if Montgomery reduction is used
2152	* recall that any value in a Montgomery system is
2153	* actually multiplied by R mod n. So we have
2154	* to reduce one more time to cancel out the factor
2155	* of R.
2156	*/
2157	if ((err = redux(&res, P, mp)) != MP_OKAY) {
2158	goto LBL_RES;
2159	}
2160	}
2161
2162	/* swap res with Y */
2163	mp_exch (&res, Y);
2164	err = MP_OKAY;
2165	LBL_RES:mp_clear (&res);
2166	LBL_M:
2167	mp_clear(&M[1]);
2168	for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
2169	mp_clear (&M[x]);
2170	}
2171
2172	#ifdef WOLFSSL_SMALL_STACK
2173	XFREE(M, NULL, DYNAMIC_TYPE_TMP_BUFFER);
2174	#endif
2175
2176	return err;
2177	}
2178
2179
2180	/* setups the montgomery reduction stuff */
2181	int mp_montgomery_setup (mp_int * n, mp_digit * rho)
2182	{
2183	mp_digit x, b;
2184
2185	/* fast inversion mod 2**k
2186	*
2187	* Based on the fact that
2188	*
2189	* XA = 1 (mod 2n) => (X(2-XA)) A = 1 (mod 22n)
2190	* => 2XA - XXA*A = 1
2191	* => 2*(1) - (1) = 1
2192	*/
2193	b = n->dp[0];
2194
2195	if ((b & 1) == 0) {
2196	return MP_VAL;
2197	}
2198
2199	x = (((b + 2) & 4) << 1) + b; /* here xa==1 mod 24 /
2200	x = 2 - b x; /* here xa==1 mod 28 /
2201	#if !defined(MP_8BIT)
2202	x = 2 - b x; /* here xa==1 mod 216 /
2203	#endif
2204	#if defined(MP_64BIT) \|\| !(defined(MP_8BIT) \|\| defined(MP_16BIT))
2205	x = 2 - b x; /* here xa==1 mod 232 /
2206	#endif
2207	#ifdef MP_64BIT
2208	x = 2 - b x; /* here xa==1 mod 264 /
2209	#endif
2210
2211	/* rho = -1/m mod b */
2212	/* TAO, switched mp_word casts to mp_digit to shut up compiler */
2213	*rho = (mp_digit)((((mp_digit)1 << ((mp_digit) DIGIT_BIT)) - x) & MP_MASK);
2214
2215	return MP_OKAY;
2216	}
2217
2218
2219	/* computes xR**-1 == x (mod N) via Montgomery Reduction
2220	*
2221	* This is an optimized implementation of montgomery_reduce
2222	* which uses the comba method to quickly calculate the columns of the
2223	* reduction.
2224	*
2225	* Based on Algorithm 14.32 on pp.601 of HAC.
2226	*/
2227	int fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
2228	{
2229	int ix, res, olduse;
2230	#ifdef WOLFSSL_SMALL_STACK
2231	mp_word* W; /* uses dynamic memory and slower */
2232	#else
2233	mp_word W[MP_WARRAY];
2234	#endif
2235
2236	/* get old used count */
2237	olduse = x->used;
2238
2239	/* grow a as required */
2240	if (x->alloc < n->used + 1) {
2241	if ((res = mp_grow (x, n->used + 1)) != MP_OKAY) {
2242	return res;
2243	}
2244	}
2245
2246	#ifdef WOLFSSL_SMALL_STACK
2247	W = (mp_word)XMALLOC(sizeof(mp_word) MP_WARRAY, NULL, DYNAMIC_TYPE_BIGINT);
2248	if (W == NULL)
2249	return MP_MEM;
2250	#endif
2251
2252	/* first we have to get the digits of the input into
2253	* an array of double precision words W[...]
2254	*/
2255	{
2256	mp_word *_W;
2257	mp_digit *tmpx;
2258
2259	/* alias for the W[] array */
2260	_W = W;
2261
2262	/* alias for the digits of x*/
2263	tmpx = x->dp;
2264
2265	/* copy the digits of a into W[0..a->used-1] */
2266	for (ix = 0; ix < x->used; ix++) {
2267	_W++ = tmpx++;
2268	}
2269
2270	/* zero the high words of W[a->used..m->used2] /
2271	for (; ix < n->used * 2 + 1; ix++) {
2272	*_W++ = 0;
2273	}
2274	}
2275
2276	/* now we proceed to zero successive digits
2277	* from the least significant upwards
2278	*/
2279	for (ix = 0; ix < n->used; ix++) {
2280	/* mu = ai * m' mod b
2281	*
2282	* We avoid a double precision multiplication (which isn't required)
2283	* by casting the value down to a mp_digit. Note this requires
2284	* that W[ix-1] have the carry cleared (see after the inner loop)
2285	*/
2286	mp_digit mu;
2287	mu = (mp_digit) (((W[ix] & MP_MASK) * rho) & MP_MASK);
2288
2289	/* a = a + mu * m * b**i
2290	*
2291	* This is computed in place and on the fly. The multiplication
2292	* by b**i is handled by offseting which columns the results
2293	* are added to.
2294	*
2295	* Note the comba method normally doesn't handle carries in the
2296	* inner loop In this case we fix the carry from the previous
2297	* column since the Montgomery reduction requires digits of the
2298	* result (so far) [see above] to work. This is
2299	* handled by fixing up one carry after the inner loop. The
2300	* carry fixups are done in order so after these loops the
2301	* first m->used words of W[] have the carries fixed
2302	*/
2303	{
2304	int iy;
2305	mp_digit *tmpn;
2306	mp_word *_W;
2307
2308	/* alias for the digits of the modulus */
2309	tmpn = n->dp;
2310
2311	/* Alias for the columns set by an offset of ix */
2312	_W = W + ix;
2313
2314	/* inner loop */
2315	for (iy = 0; iy < n->used; iy++) {
2316	_W++ += ((mp_word)mu) ((mp_word)*tmpn++);
2317	}
2318	}
2319
2320	/* now fix carry for next digit, W[ix+1] */
2321	W[ix + 1] += W[ix] >> ((mp_word) DIGIT_BIT);
2322	}
2323
2324	/* now we have to propagate the carries and
2325	* shift the words downward [all those least
2326	* significant digits we zeroed].
2327	*/
2328	{
2329	mp_digit *tmpx;
2330	mp_word _W, _W1;
2331
2332	/* nox fix rest of carries */
2333
2334	/* alias for current word */
2335	_W1 = W + ix;
2336
2337	/* alias for next word, where the carry goes */
2338	_W = W + ++ix;
2339
2340	for (; ix <= n->used * 2 + 1; ix++) {
2341	_W++ += _W1++ >> ((mp_word) DIGIT_BIT);
2342	}
2343
2344	/* copy out, A = A/b**n
2345	*
2346	* The result is A/b**n but instead of converting from an
2347	* array of mp_word to mp_digit than calling mp_rshd
2348	* we just copy them in the right order
2349	*/
2350
2351	/* alias for destination word */
2352	tmpx = x->dp;
2353
2354	/* alias for shifted double precision result */
2355	_W = W + n->used;
2356
2357	for (ix = 0; ix < n->used + 1; ix++) {
2358	tmpx++ = (mp_digit)(_W++ & ((mp_word) MP_MASK));
2359	}
2360
2361	/* zero olduse digits, if the input a was larger than
2362	* m->used+1 we'll have to clear the digits
2363	*/
2364	for (; ix < olduse; ix++) {
2365	*tmpx++ = 0;
2366	}
2367	}
2368
2369	/* set the max used and clamp */
2370	x->used = n->used + 1;
2371	mp_clamp (x);
2372
2373	#ifdef WOLFSSL_SMALL_STACK
2374	XFREE(W, NULL, DYNAMIC_TYPE_BIGINT);
2375	#endif
2376
2377	/* if A >= m then A = A - m */
2378	if (mp_cmp_mag (x, n) != MP_LT) {
2379	return s_mp_sub (x, n, x);
2380	}
2381	return MP_OKAY;
2382	}
2383
2384
2385	/* computes xR*-1 == x (mod N) via Montgomery Reduction /
2386	int mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
2387	{
2388	int ix, res, digs;
2389	mp_digit mu;
2390
2391	/* can the fast reduction [comba] method be used?
2392	*
2393	* Note that unlike in mul you're safely allowed less
2394	* than the available columns [255 per default] since carries
2395	* are fixed up in the inner loop.
2396	*/
2397	digs = n->used * 2 + 1;
2398	if ((digs < MP_WARRAY) &&
2399	n->used <
2400	(1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
2401	return fast_mp_montgomery_reduce (x, n, rho);
2402	}
2403
2404	/* grow the input as required */
2405	if (x->alloc < digs) {
2406	if ((res = mp_grow (x, digs)) != MP_OKAY) {
2407	return res;
2408	}
2409	}
2410	x->used = digs;
2411
2412	for (ix = 0; ix < n->used; ix++) {
2413	/* mu = ai * rho mod b
2414	*
2415	* The value of rho must be precalculated via
2416	* montgomery_setup() such that
2417	* it equals -1/n0 mod b this allows the
2418	* following inner loop to reduce the
2419	* input one digit at a time
2420	*/
2421	mu = (mp_digit) (((mp_word)x->dp[ix]) * ((mp_word)rho) & MP_MASK);
2422
2423	/* a = a + mu * m * b*i /
2424	{
2425	int iy;
2426	mp_digit tmpn, tmpx, u;
2427	mp_word r;
2428
2429	/* alias for digits of the modulus */
2430	tmpn = n->dp;
2431
2432	/* alias for the digits of x [the input] */
2433	tmpx = x->dp + ix;
2434
2435	/* set the carry to zero */
2436	u = 0;
2437
2438	/* Multiply and add in place */
2439	for (iy = 0; iy < n->used; iy++) {
2440	/* compute product and sum */
2441	r = ((mp_word)mu) * ((mp_word)*tmpn++) +
2442	((mp_word) u) + ((mp_word) * tmpx);
2443
2444	/* get carry */
2445	u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
2446
2447	/* fix digit */
2448	*tmpx++ = (mp_digit)(r & ((mp_word) MP_MASK));
2449	}
2450	/* At this point the ix'th digit of x should be zero */
2451
2452
2453	/* propagate carries upwards as required*/
2454	while (u) {
2455	*tmpx += u;
2456	u = *tmpx >> DIGIT_BIT;
2457	*tmpx++ &= MP_MASK;
2458	}
2459	}
2460	}
2461
2462	/* at this point the n.used'th least
2463	* significant digits of x are all zero
2464	* which means we can shift x to the
2465	* right by n.used digits and the
2466	* residue is unchanged.
2467	*/
2468
2469	/* x = x/b*n.used /
2470	mp_clamp(x);
2471	mp_rshd (x, n->used);
2472
2473	/* if x >= n then x = x - n */
2474	if (mp_cmp_mag (x, n) != MP_LT) {
2475	return s_mp_sub (x, n, x);
2476	}
2477
2478	return MP_OKAY;
2479	}
2480
2481
2482	/* determines the setup value */
2483	void mp_dr_setup(mp_int a, mp_digit d)
2484	{
2485	/* the casts are required if DIGIT_BIT is one less than
2486	* the number of bits in a mp_digit [e.g. DIGIT_BIT==31]
2487	*/
2488	*d = (mp_digit)((((mp_word)1) << ((mp_word)DIGIT_BIT)) -
2489	((mp_word)a->dp[0]));
2490	}
2491
2492
2493	/* reduce "x" in place modulo "n" using the Diminished Radix algorithm.
2494	*
2495	* Based on algorithm from the paper
2496	*
2497	* "Generating Efficient Primes for Discrete Log Cryptosystems"
2498	* Chae Hoon Lim, Pil Joong Lee,
2499	* POSTECH Information Research Laboratories
2500	*
2501	* The modulus must be of a special format [see manual]
2502	*
2503	* Has been modified to use algorithm 7.10 from the LTM book instead
2504	*
2505	* Input x must be in the range 0 <= x <= (n-1)**2
2506	*/
2507	int mp_dr_reduce (mp_int * x, mp_int * n, mp_digit k)
2508	{
2509	int err, i, m;
2510	mp_word r;
2511	mp_digit mu, tmpx1, tmpx2;
2512
2513	/* m = digits in modulus */
2514	m = n->used;
2515
2516	/* ensure that "x" has at least 2m digits */
2517	if (x->alloc < m + m) {
2518	if ((err = mp_grow (x, m + m)) != MP_OKAY) {
2519	return err;
2520	}
2521	}
2522
2523	/* top of loop, this is where the code resumes if
2524	* another reduction pass is required.
2525	*/
2526	top:
2527	/* aliases for digits */
2528	/* alias for lower half of x */
2529	tmpx1 = x->dp;
2530
2531	/* alias for upper half of x, or x/B*m /
2532	tmpx2 = x->dp + m;
2533
2534	/* set carry to zero */
2535	mu = 0;
2536
2537	/* compute (x mod B*m) + k [x/B*m] inline and inplace /
2538	for (i = 0; i < m; i++) {
2539	r = ((mp_word)tmpx2++) ((mp_word)k) + *tmpx1 + mu;
2540	*tmpx1++ = (mp_digit)(r & MP_MASK);
2541	mu = (mp_digit)(r >> ((mp_word)DIGIT_BIT));
2542	}
2543
2544	/* set final carry */
2545	*tmpx1++ = mu;
2546
2547	/* zero words above m */
2548	for (i = m + 1; i < x->used; i++) {
2549	*tmpx1++ = 0;
2550	}
2551
2552	/* clamp, sub and return */
2553	mp_clamp (x);
2554
2555	/* if x >= n then subtract and reduce again
2556	* Each successive "recursion" makes the input smaller and smaller.
2557	*/
2558	if (mp_cmp_mag (x, n) != MP_LT) {
2559	if ((err = s_mp_sub(x, n, x)) != MP_OKAY) {
2560	return err;
2561	}
2562	goto top;
2563	}
2564	return MP_OKAY;
2565	}
2566
2567
2568	/* reduces a modulo n where n is of the form 2*p - d /
2569	int mp_reduce_2k(mp_int a, mp_int n, mp_digit d)
2570	{
2571	mp_int q;
2572	int p, res;
2573
2574	if ((res = mp_init(&q)) != MP_OKAY) {
2575	return res;
2576	}
2577
2578	p = mp_count_bits(n);
2579	top:
2580	/* q = a/2p, a = a mod 2p */
2581	if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
2582	goto ERR;
2583	}
2584
2585	if (d != 1) {
2586	/* q = q * d */
2587	if ((res = mp_mul_d(&q, d, &q)) != MP_OKAY) {
2588	goto ERR;
2589	}
2590	}
2591
2592	/* a = a + q */
2593	if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
2594	goto ERR;
2595	}
2596
2597	if (mp_cmp_mag(a, n) != MP_LT) {
2598	if ((res = s_mp_sub(a, n, a)) != MP_OKAY) {
2599	goto ERR;
2600	}
2601	goto top;
2602	}
2603
2604	ERR:
2605	mp_clear(&q);
2606	return res;
2607	}
2608
2609
2610	/* determines the setup value */
2611	int mp_reduce_2k_setup(mp_int a, mp_digit d)
2612	{
2613	int res, p;
2614	mp_int tmp;
2615
2616	if ((res = mp_init(&tmp)) != MP_OKAY) {
2617	return res;
2618	}
2619
2620	p = mp_count_bits(a);
2621	if ((res = mp_2expt(&tmp, p)) != MP_OKAY) {
2622	mp_clear(&tmp);
2623	return res;
2624	}
2625
2626	if ((res = s_mp_sub(&tmp, a, &tmp)) != MP_OKAY) {
2627	mp_clear(&tmp);
2628	return res;
2629	}
2630
2631	*d = tmp.dp[0];
2632	mp_clear(&tmp);
2633	return MP_OKAY;
2634	}
2635
2636
2637	/* set the b bit of a */
2638	int mp_set_bit (mp_int * a, int b)
2639	{
2640	int i = b / DIGIT_BIT, res;
2641
2642	if (a->used < (int)(i + 1)) {
2643	/* grow a to accommodate the single bit */
2644	if ((res = mp_grow (a, i + 1)) != MP_OKAY) {
2645	return res;
2646	}
2647
2648	/* set the used count of where the bit will go */
2649	a->used = (int)(i + 1);
2650	}
2651
2652	/* put the single bit in its place */
2653	a->dp[i] \|= ((mp_digit)1) << (b % DIGIT_BIT);
2654
2655	return MP_OKAY;
2656	}
2657
2658	/* computes a = 2**b
2659	*
2660	* Simple algorithm which zeros the int, set the required bit
2661	*/
2662	int mp_2expt (mp_int * a, int b)
2663	{
2664	/* zero a as per default */
2665	mp_zero (a);
2666
2667	return mp_set_bit(a, b);
2668	}
2669
2670	/* multiply by a digit */
2671	int mp_mul_d (mp_int * a, mp_digit b, mp_int * c)
2672	{
2673	mp_digit u, tmpa, tmpc;
2674	mp_word r;
2675	int ix, res, olduse;
2676
2677	/* make sure c is big enough to hold ab /
2678	if (c->alloc < a->used + 1) {
2679	if ((res = mp_grow (c, a->used + 1)) != MP_OKAY) {
2680	return res;
2681	}
2682	}
2683
2684	/* get the original destinations used count */
2685	olduse = c->used;
2686
2687	/* set the sign */
2688	c->sign = a->sign;
2689
2690	/* alias for a->dp [source] */
2691	tmpa = a->dp;
2692
2693	/* alias for c->dp [dest] */
2694	tmpc = c->dp;
2695
2696	/* zero carry */
2697	u = 0;
2698
2699	/* compute columns */
2700	for (ix = 0; ix < a->used; ix++) {
2701	/* compute product and carry sum for this term */
2702	r = ((mp_word) u) + ((mp_word)tmpa++) ((mp_word)b);
2703
2704	/* mask off higher bits to get a single digit */
2705	*tmpc++ = (mp_digit) (r & ((mp_word) MP_MASK));
2706
2707	/* send carry into next iteration */
2708	u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
2709	}
2710
2711	/* store final carry [if any] and increment ix offset */
2712	*tmpc++ = u;
2713	++ix;
2714
2715	/* now zero digits above the top */
2716	while (ix++ < olduse) {
2717	*tmpc++ = 0;
2718	}
2719
2720	/* set used count */
2721	c->used = a->used + 1;
2722	mp_clamp(c);
2723
2724	return MP_OKAY;
2725	}
2726
2727
2728	/* d = a * b (mod c) */
2729	#if defined(FREESCALE_LTC_TFM)
2730	int wolfcrypt_mp_mulmod(mp_int a, mp_int b, mp_int c, mp_int d)
2731	#else
2732	int mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
2733	#endif
2734	{
2735	int res;
2736	mp_int t;
2737
2738	if ((res = mp_init_size (&t, c->used)) != MP_OKAY) {
2739	return res;
2740	}
2741
2742	res = mp_mul (a, b, &t);
2743	if (res == MP_OKAY) {
2744	res = mp_mod (&t, c, d);
2745	}
2746
2747	mp_clear (&t);
2748	return res;
2749	}
2750
2751
2752	/* d = a - b (mod c) */
2753	int mp_submod(mp_int* a, mp_int* b, mp_int* c, mp_int* d)
2754	{
2755	int res;
2756	mp_int t;
2757
2758	if ((res = mp_init (&t)) != MP_OKAY) {
2759	return res;
2760	}
2761
2762	res = mp_sub (a, b, &t);
2763	if (res == MP_OKAY) {
2764	res = mp_mod (&t, c, d);
2765	}
2766
2767	mp_clear (&t);
2768
2769	return res;
2770	}
2771
2772	/* d = a + b (mod c) */
2773	int mp_addmod(mp_int* a, mp_int* b, mp_int* c, mp_int* d)
2774	{
2775	int res;
2776	mp_int t;
2777
2778	if ((res = mp_init (&t)) != MP_OKAY) {
2779	return res;
2780	}
2781
2782	res = mp_add (a, b, &t);
2783	if (res == MP_OKAY) {
2784	res = mp_mod (&t, c, d);
2785	}
2786
2787	mp_clear (&t);
2788
2789	return res;
2790	}
2791
2792	/* computes b = aa /
2793	int mp_sqr (mp_int * a, mp_int * b)
2794	{
2795	int res;
2796
2797	{
2798	#ifdef BN_FAST_S_MP_SQR_C
2799	/* can we use the fast comba multiplier? */
2800	if ((a->used * 2 + 1) < MP_WARRAY &&
2801	a->used <
2802	(1 << (sizeof(mp_word) * CHAR_BIT - 2*DIGIT_BIT - 1))) {
2803	res = fast_s_mp_sqr (a, b);
2804	} else
2805	#endif
2806	#ifdef BN_S_MP_SQR_C
2807	res = s_mp_sqr (a, b);
2808	#else
2809	res = MP_VAL;
2810	#endif
2811	}
2812	b->sign = MP_ZPOS;
2813	return res;
2814	}
2815
2816
2817	/* high level multiplication (handles sign) */
2818	#if defined(FREESCALE_LTC_TFM)
2819	int wolfcrypt_mp_mul(mp_int a, mp_int b, mp_int *c)
2820	#else
2821	int mp_mul (mp_int * a, mp_int * b, mp_int * c)
2822	#endif
2823	{
2824	int res, neg;
2825	neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
2826
2827	{
2828	/* can we use the fast multiplier?
2829	*
2830	* The fast multiplier can be used if the output will
2831	* have less than MP_WARRAY digits and the number of
2832	* digits won't affect carry propagation
2833	*/
2834	int digs = a->used + b->used + 1;
2835
2836	#ifdef BN_FAST_S_MP_MUL_DIGS_C
2837	if ((digs < MP_WARRAY) &&
2838	MIN(a->used, b->used) <=
2839	(1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
2840	res = fast_s_mp_mul_digs (a, b, c, digs);
2841	} else
2842	#endif
2843	#ifdef BN_S_MP_MUL_DIGS_C
2844	res = s_mp_mul (a, b, c); /* uses s_mp_mul_digs */
2845	#else
2846	res = MP_VAL;
2847	#endif
2848
2849	}
2850	c->sign = (c->used > 0) ? neg : MP_ZPOS;
2851	return res;
2852	}
2853
2854
2855	/* b = a2 /
2856	int mp_mul_2(mp_int * a, mp_int * b)
2857	{
2858	int x, res, oldused;
2859
2860	/* grow to accommodate result */
2861	if (b->alloc < a->used + 1) {
2862	if ((res = mp_grow (b, a->used + 1)) != MP_OKAY) {
2863	return res;
2864	}
2865	}
2866
2867	oldused = b->used;
2868	b->used = a->used;
2869
2870	{
2871	mp_digit r, rr, tmpa, tmpb;
2872
2873	/* alias for source */
2874	tmpa = a->dp;
2875
2876	/* alias for dest */
2877	tmpb = b->dp;
2878
2879	/* carry */
2880	r = 0;
2881	for (x = 0; x < a->used; x++) {
2882
2883	/* get what will be the next carry bit from the
2884	* MSB of the current digit
2885	*/
2886	rr = *tmpa >> ((mp_digit)(DIGIT_BIT - 1));
2887
2888	/* now shift up this digit, add in the carry [from the previous] */
2889	tmpb++ = (mp_digit)(((tmpa++ << ((mp_digit)1)) \| r) & MP_MASK);
2890
2891	/* copy the carry that would be from the source
2892	* digit into the next iteration
2893	*/
2894	r = rr;
2895	}
2896
2897	/* new leading digit? */
2898	if (r != 0) {
2899	/* add a MSB which is always 1 at this point */
2900	*tmpb = 1;
2901	++(b->used);
2902	}
2903
2904	/* now zero any excess digits on the destination
2905	* that we didn't write to
2906	*/
2907	tmpb = b->dp + b->used;
2908	for (x = b->used; x < oldused; x++) {
2909	*tmpb++ = 0;
2910	}
2911	}
2912	b->sign = a->sign;
2913	return MP_OKAY;
2914	}
2915
2916
2917	/* divide by three (based on routine from MPI and the GMP manual) */
2918	int mp_div_3 (mp_int * a, mp_int c, mp_digit d)
2919	{
2920	mp_int q;
2921	mp_word w, t;
2922	mp_digit b;
2923	int res, ix;
2924
2925	/* b = 2*DIGIT_BIT / 3 /
2926	b = (mp_digit) ( (((mp_word)1) << ((mp_word)DIGIT_BIT)) / ((mp_word)3) );
2927
2928	if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
2929	return res;
2930	}
2931
2932	q.used = a->used;
2933	q.sign = a->sign;
2934	w = 0;
2935	for (ix = a->used - 1; ix >= 0; ix--) {
2936	w = (w << ((mp_word)DIGIT_BIT)) \| ((mp_word)a->dp[ix]);
2937
2938	if (w >= 3) {
2939	/* multiply w by [1/3] */
2940	t = (w * ((mp_word)b)) >> ((mp_word)DIGIT_BIT);
2941
2942	/* now subtract 3 * [w/3] from w, to get the remainder */
2943	w -= t+t+t;
2944
2945	/* fixup the remainder as required since
2946	* the optimization is not exact.
2947	*/
2948	while (w >= 3) {
2949	t += 1;
2950	w -= 3;
2951	}
2952	} else {
2953	t = 0;
2954	}
2955	q.dp[ix] = (mp_digit)t;
2956	}
2957
2958	/* [optional] store the remainder */
2959	if (d != NULL) {
2960	*d = (mp_digit)w;
2961	}
2962
2963	/* [optional] store the quotient */
2964	if (c != NULL) {
2965	mp_clamp(&q);
2966	mp_exch(&q, c);
2967	}
2968	mp_clear(&q);
2969
2970	return res;
2971	}
2972
2973
2974	/* init an mp_init for a given size */
2975	int mp_init_size (mp_int * a, int size)
2976	{
2977	int x;
2978
2979	/* pad size so there are always extra digits */
2980	size += (MP_PREC * 2) - (size % MP_PREC);
2981
2982	/* alloc mem */
2983	a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * size, NULL,
2984	DYNAMIC_TYPE_BIGINT);
2985	if (a->dp == NULL) {
2986	return MP_MEM;
2987	}
2988
2989	/* set the members */
2990	a->used = 0;
2991	a->alloc = size;
2992	a->sign = MP_ZPOS;
2993	#ifdef HAVE_WOLF_BIGINT
2994	wc_bigint_init(&a->raw);
2995	#endif
2996
2997	/* zero the digits */
2998	for (x = 0; x < size; x++) {
2999	a->dp[x] = 0;
3000	}
3001
3002	return MP_OKAY;
3003	}
3004
3005
3006	/* the jist of squaring...
3007	* you do like mult except the offset of the tmpx [one that
3008	* starts closer to zero] can't equal the offset of tmpy.
3009	* So basically you set up iy like before then you min it with
3010	* (ty-tx) so that it never happens. You double all those
3011	* you add in the inner loop
3012
3013	After that loop you do the squares and add them in.
3014	*/
3015
3016	int fast_s_mp_sqr (mp_int * a, mp_int * b)
3017	{
3018	int olduse, res, pa, ix, iz;
3019	#ifdef WOLFSSL_SMALL_STACK
3020	mp_digit* W; /* uses dynamic memory and slower */
3021	#else
3022	mp_digit W[MP_WARRAY];
3023	#endif
3024	mp_digit *tmpx;
3025	mp_word W1;
3026
3027	/* grow the destination as required */
3028	pa = a->used + a->used;
3029	if (b->alloc < pa) {
3030	if ((res = mp_grow (b, pa)) != MP_OKAY) {
3031	return res;
3032	}
3033	}
3034
3035	if (pa > MP_WARRAY)
3036	return MP_RANGE; /* TAO range check */
3037
3038	#ifdef WOLFSSL_SMALL_STACK
3039	W = (mp_digit)XMALLOC(sizeof(mp_digit) MP_WARRAY, NULL, DYNAMIC_TYPE_BIGINT);
3040	if (W == NULL)
3041	return MP_MEM;
3042	#endif
3043
3044	/* number of output digits to produce */
3045	W1 = 0;
3046	for (ix = 0; ix < pa; ix++) {
3047	int tx, ty, iy;
3048	mp_word _W;
3049	mp_digit *tmpy;
3050
3051	/* clear counter */
3052	_W = 0;
3053
3054	/* get offsets into the two bignums */
3055	ty = MIN(a->used-1, ix);
3056	tx = ix - ty;
3057
3058	/* setup temp aliases */
3059	tmpx = a->dp + tx;
3060	tmpy = a->dp + ty;
3061
3062	/* this is the number of times the loop will iterate, essentially
3063	while (tx++ < a->used && ty-- >= 0) { ... }
3064	*/
3065	iy = MIN(a->used-tx, ty+1);
3066
3067	/* now for squaring tx can never equal ty
3068	* we halve the distance since they approach at a rate of 2x
3069	* and we have to round because odd cases need to be executed
3070	*/
3071	iy = MIN(iy, (ty-tx+1)>>1);
3072
3073	/* execute loop */
3074	for (iz = 0; iz < iy; iz++) {
3075	_W += ((mp_word)tmpx++)((mp_word)*tmpy--);
3076	}
3077
3078	/* double the inner product and add carry */
3079	_W = _W + _W + W1;
3080
3081	/* even columns have the square term in them */
3082	if ((ix&1) == 0) {
3083	_W += ((mp_word)a->dp[ix>>1])*((mp_word)a->dp[ix>>1]);
3084	}
3085
3086	/* store it */
3087	W[ix] = (mp_digit)(_W & MP_MASK);
3088
3089	/* make next carry */
3090	W1 = _W >> ((mp_word)DIGIT_BIT);
3091	}
3092
3093	/* setup dest */
3094	olduse = b->used;
3095	b->used = a->used+a->used;
3096
3097	{
3098	mp_digit *tmpb;
3099	tmpb = b->dp;
3100	for (ix = 0; ix < pa; ix++) {
3101	*tmpb++ = (mp_digit)(W[ix] & MP_MASK);
3102	}
3103
3104	/* clear unused digits [that existed in the old copy of c] */
3105	for (; ix < olduse; ix++) {
3106	*tmpb++ = 0;
3107	}
3108	}
3109	mp_clamp (b);
3110
3111	#ifdef WOLFSSL_SMALL_STACK
3112	XFREE(W, NULL, DYNAMIC_TYPE_BIGINT);
3113	#endif
3114
3115	return MP_OKAY;
3116	}
3117
3118
3119	/* Fast (comba) multiplier
3120	*
3121	* This is the fast column-array [comba] multiplier. It is
3122	* designed to compute the columns of the product first
3123	* then handle the carries afterwards. This has the effect
3124	* of making the nested loops that compute the columns very
3125	* simple and schedulable on super-scalar processors.
3126	*
3127	* This has been modified to produce a variable number of
3128	* digits of output so if say only a half-product is required
3129	* you don't have to compute the upper half (a feature
3130	* required for fast Barrett reduction).
3131	*
3132	* Based on Algorithm 14.12 on pp.595 of HAC.
3133	*
3134	*/
3135	int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
3136	{
3137	int olduse, res, pa, ix, iz;
3138	#ifdef WOLFSSL_SMALL_STACK
3139	mp_digit* W; /* uses dynamic memory and slower */
3140	#else
3141	mp_digit W[MP_WARRAY];
3142	#endif
3143	mp_word _W;
3144
3145	/* grow the destination as required */
3146	if (c->alloc < digs) {
3147	if ((res = mp_grow (c, digs)) != MP_OKAY) {
3148	return res;
3149	}
3150	}
3151
3152	/* number of output digits to produce */
3153	pa = MIN(digs, a->used + b->used);
3154	if (pa > MP_WARRAY)
3155	return MP_RANGE; /* TAO range check */
3156
3157	#ifdef WOLFSSL_SMALL_STACK
3158	W = (mp_digit)XMALLOC(sizeof(mp_digit) MP_WARRAY, NULL, DYNAMIC_TYPE_BIGINT);
3159	if (W == NULL)
3160	return MP_MEM;
3161	#endif
3162
3163	/* clear the carry */
3164	_W = 0;
3165	for (ix = 0; ix < pa; ix++) {
3166	int tx, ty;
3167	int iy;
3168	mp_digit tmpx, tmpy;
3169
3170	/* get offsets into the two bignums */
3171	ty = MIN(b->used-1, ix);
3172	tx = ix - ty;
3173
3174	/* setup temp aliases */
3175	tmpx = a->dp + tx;
3176	tmpy = b->dp + ty;
3177
3178	/* this is the number of times the loop will iterate, essentially
3179	while (tx++ < a->used && ty-- >= 0) { ... }
3180	*/
3181	iy = MIN(a->used-tx, ty+1);
3182
3183	/* execute loop */
3184	for (iz = 0; iz < iy; ++iz) {
3185	_W += ((mp_word)tmpx++)((mp_word)*tmpy--);
3186
3187	}
3188
3189	/* store term */
3190	W[ix] = (mp_digit)(((mp_digit)_W) & MP_MASK);
3191
3192	/* make next carry */
3193	_W = _W >> ((mp_word)DIGIT_BIT);
3194	}
3195
3196	/* setup dest */
3197	olduse = c->used;
3198	c->used = pa;
3199
3200	{
3201	mp_digit *tmpc;
3202	tmpc = c->dp;
3203	for (ix = 0; ix < pa; ix++) { /* JRB, +1 could read uninitialized data */
3204	/* now extract the previous digit [below the carry] */
3205	*tmpc++ = W[ix];
3206	}
3207
3208	/* clear unused digits [that existed in the old copy of c] */
3209	for (; ix < olduse; ix++) {
3210	*tmpc++ = 0;
3211	}
3212	}
3213	mp_clamp (c);
3214
3215	#ifdef WOLFSSL_SMALL_STACK
3216	XFREE(W, NULL, DYNAMIC_TYPE_BIGINT);
3217	#endif
3218
3219	return MP_OKAY;
3220	}
3221
3222
3223	/* low level squaring, b = aa, HAC pp.596-597, Algorithm 14.16 /
3224	int s_mp_sqr (mp_int * a, mp_int * b)
3225	{
3226	mp_int t;
3227	int res, ix, iy, pa;
3228	mp_word r;
3229	mp_digit u, tmpx, *tmpt;
3230
3231	pa = a->used;
3232	if ((res = mp_init_size (&t, 2*pa + 1)) != MP_OKAY) {
3233	return res;
3234	}
3235
3236	/* default used is maximum possible size */
3237	t.used = 2*pa + 1;
3238
3239	for (ix = 0; ix < pa; ix++) {
3240	/* first calculate the digit at 2ix /
3241	/* calculate double precision result */
3242	r = ((mp_word) t.dp[2*ix]) +
3243	((mp_word)a->dp[ix])*((mp_word)a->dp[ix]);
3244
3245	/* store lower part in result */
3246	t.dp[ix+ix] = (mp_digit) (r & ((mp_word) MP_MASK));
3247
3248	/* get the carry */
3249	u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
3250
3251	/* left hand side of A[ix] * A[iy] */
3252	tmpx = a->dp[ix];
3253
3254	/* alias for where to store the results */
3255	tmpt = t.dp + (2*ix + 1);
3256
3257	for (iy = ix + 1; iy < pa; iy++) {
3258	/* first calculate the product */
3259	r = ((mp_word)tmpx) * ((mp_word)a->dp[iy]);
3260
3261	/* now calculate the double precision result, note we use
3262	* addition instead of *2 since it's easier to optimize
3263	*/
3264	r = ((mp_word) *tmpt) + r + r + ((mp_word) u);
3265
3266	/* store lower part */
3267	*tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
3268
3269	/* get carry */
3270	u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
3271	}
3272	/* propagate upwards */
3273	while (u != ((mp_digit) 0)) {
3274	r = ((mp_word) *tmpt) + ((mp_word) u);
3275	*tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
3276	u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
3277	}
3278	}
3279
3280	mp_clamp (&t);
3281	mp_exch (&t, b);
3282	mp_clear (&t);
3283	return MP_OKAY;
3284	}
3285
3286
3287	/* multiplies \|a\| * \|b\| and only computes up to digs digits of result
3288	* HAC pp. 595, Algorithm 14.12 Modified so you can control how
3289	* many digits of output are created.
3290	*/
3291	int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
3292	{
3293	mp_int t;
3294	int res, pa, pb, ix, iy;
3295	mp_digit u;
3296	mp_word r;
3297	mp_digit tmpx, tmpt, tmpy;
3298
3299	/* can we use the fast multiplier? */
3300	if (((digs) < MP_WARRAY) &&
3301	MIN (a->used, b->used) <
3302	(1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
3303	return fast_s_mp_mul_digs (a, b, c, digs);
3304	}
3305
3306	if ((res = mp_init_size (&t, digs)) != MP_OKAY) {
3307	return res;
3308	}
3309	t.used = digs;
3310
3311	/* compute the digits of the product directly */
3312	pa = a->used;
3313	for (ix = 0; ix < pa; ix++) {
3314	/* set the carry to zero */
3315	u = 0;
3316
3317	/* limit ourselves to making digs digits of output */
3318	pb = MIN (b->used, digs - ix);
3319
3320	/* setup some aliases */
3321	/* copy of the digit from a used within the nested loop */
3322	tmpx = a->dp[ix];
3323
3324	/* an alias for the destination shifted ix places */
3325	tmpt = t.dp + ix;
3326
3327	/* an alias for the digits of b */
3328	tmpy = b->dp;
3329
3330	/* compute the columns of the output and propagate the carry */
3331	for (iy = 0; iy < pb; iy++) {
3332	/* compute the column as a mp_word */
3333	r = ((mp_word)*tmpt) +
3334	((mp_word)tmpx) * ((mp_word)*tmpy++) +
3335	((mp_word) u);
3336
3337	/* the new column is the lower part of the result */
3338	*tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
3339
3340	/* get the carry word from the result */
3341	u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
3342	}
3343	/* set carry if it is placed below digs */
3344	if (ix + iy < digs) {
3345	*tmpt = u;
3346	}
3347	}
3348
3349	mp_clamp (&t);
3350	mp_exch (&t, c);
3351
3352	mp_clear (&t);
3353	return MP_OKAY;
3354	}
3355
3356
3357	/*
3358	* shifts with subtractions when the result is greater than b.
3359	*
3360	* The method is slightly modified to shift B unconditionally up to just under
3361	* the leading bit of b. This saves a lot of multiple precision shifting.
3362	*/
3363	int mp_montgomery_calc_normalization (mp_int * a, mp_int * b)
3364	{
3365	int x, bits, res;
3366
3367	/* how many bits of last digit does b use */
3368	bits = mp_count_bits (b) % DIGIT_BIT;
3369
3370	if (b->used > 1) {
3371	if ((res = mp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1))
3372	!= MP_OKAY) {
3373	return res;
3374	}
3375	} else {
3376	if ((res = mp_set(a, 1)) != MP_OKAY) {
3377	return res;
3378	}
3379	bits = 1;
3380	}
3381
3382	/* now compute C = A * B mod b */
3383	for (x = bits - 1; x < (int)DIGIT_BIT; x++) {
3384	if ((res = mp_mul_2 (a, a)) != MP_OKAY) {
3385	return res;
3386	}
3387	if (mp_cmp_mag (a, b) != MP_LT) {
3388	if ((res = s_mp_sub (a, b, a)) != MP_OKAY) {
3389	return res;
3390	}
3391	}
3392	}
3393
3394	return MP_OKAY;
3395	}
3396
3397
3398	#ifdef MP_LOW_MEM
3399	#define TAB_SIZE 32
3400	#else
3401	#define TAB_SIZE 256
3402	#endif
3403
3404	int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
3405	{
3406	mp_int M[TAB_SIZE], res, mu;
3407	mp_digit buf;
3408	int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
3409	int (redux)(mp_int,mp_int,mp_int);
3410
3411	/* find window size */
3412	x = mp_count_bits (X);
3413	if (x <= 7) {
3414	winsize = 2;
3415	} else if (x <= 36) {
3416	winsize = 3;
3417	} else if (x <= 140) {
3418	winsize = 4;
3419	} else if (x <= 450) {
3420	winsize = 5;
3421	} else if (x <= 1303) {
3422	winsize = 6;
3423	} else if (x <= 3529) {
3424	winsize = 7;
3425	} else {
3426	winsize = 8;
3427	}
3428
3429	#ifdef MP_LOW_MEM
3430	if (winsize > 5) {
3431	winsize = 5;
3432	}
3433	#endif
3434
3435	/* init M array */
3436	/* init first cell */
3437	if ((err = mp_init(&M[1])) != MP_OKAY) {
3438	return err;
3439	}
3440
3441	/* now init the second half of the array */
3442	for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
3443	if ((err = mp_init(&M[x])) != MP_OKAY) {
3444	for (y = 1<<(winsize-1); y < x; y++) {
3445	mp_clear (&M[y]);
3446	}
3447	mp_clear(&M[1]);
3448	return err;
3449	}
3450	}
3451
3452	/* create mu, used for Barrett reduction */
3453	if ((err = mp_init (&mu)) != MP_OKAY) {
3454	goto LBL_M;
3455	}
3456
3457	if (redmode == 0) {
3458	if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) {
3459	goto LBL_MU;
3460	}
3461	redux = mp_reduce;
3462	} else {
3463	if ((err = mp_reduce_2k_setup_l (P, &mu)) != MP_OKAY) {
3464	goto LBL_MU;
3465	}
3466	redux = mp_reduce_2k_l;
3467	}
3468
3469	/* create M table
3470	*
3471	* The M table contains powers of the base,
3472	* e.g. M[x] = G**x mod P
3473	*
3474	* The first half of the table is not
3475	* computed though accept for M[0] and M[1]
3476	*/
3477	if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) {
3478	goto LBL_MU;
3479	}
3480
3481	/* compute the value at M[1<<(winsize-1)] by squaring
3482	* M[1] (winsize-1) times
3483	*/
3484	if ((err = mp_copy (&M[1], &M[(mp_digit)(1 << (winsize - 1))])) != MP_OKAY) {
3485	goto LBL_MU;
3486	}
3487
3488	for (x = 0; x < (winsize - 1); x++) {
3489	/* square it */
3490	if ((err = mp_sqr (&M[(mp_digit)(1 << (winsize - 1))],
3491	&M[(mp_digit)(1 << (winsize - 1))])) != MP_OKAY) {
3492	goto LBL_MU;
3493	}
3494
3495	/* reduce modulo P */
3496	if ((err = redux (&M[(mp_digit)(1 << (winsize - 1))], P, &mu)) != MP_OKAY) {
3497	goto LBL_MU;
3498	}
3499	}
3500
3501	/* create upper table, that is M[x] = M[x-1] * M[1] (mod P)
3502	* for x = (2(winsize - 1) + 1) to (2winsize - 1)
3503	*/
3504	for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
3505	if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
3506	goto LBL_MU;
3507	}
3508	if ((err = redux (&M[x], P, &mu)) != MP_OKAY) {
3509	goto LBL_MU;
3510	}
3511	}
3512
3513	/* setup result */
3514	if ((err = mp_init (&res)) != MP_OKAY) {
3515	goto LBL_MU;
3516	}
3517	if ((err = mp_set (&res, 1)) != MP_OKAY) {
3518	goto LBL_MU;
3519	}
3520
3521	/* set initial mode and bit cnt */
3522	mode = 0;
3523	bitcnt = 1;
3524	buf = 0;
3525	digidx = X->used - 1;
3526	bitcpy = 0;
3527	bitbuf = 0;
3528
3529	for (;;) {
3530	/* grab next digit as required */
3531	if (--bitcnt == 0) {
3532	/* if digidx == -1 we are out of digits */
3533	if (digidx == -1) {
3534	break;
3535	}
3536	/* read next digit and reset the bitcnt */
3537	buf = X->dp[digidx--];
3538	bitcnt = (int) DIGIT_BIT;
3539	}
3540
3541	/* grab the next msb from the exponent */
3542	y = (int)(buf >> (mp_digit)(DIGIT_BIT - 1)) & 1;
3543	buf <<= (mp_digit)1;
3544
3545	/* if the bit is zero and mode == 0 then we ignore it
3546	* These represent the leading zero bits before the first 1 bit
3547	* in the exponent. Technically this opt is not required but it
3548	* does lower the # of trivial squaring/reductions used
3549	*/
3550	if (mode == 0 && y == 0) {
3551	continue;
3552	}
3553
3554	/* if the bit is zero and mode == 1 then we square */
3555	if (mode == 1 && y == 0) {
3556	if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
3557	goto LBL_RES;
3558	}
3559	if ((err = redux (&res, P, &mu)) != MP_OKAY) {
3560	goto LBL_RES;
3561	}
3562	continue;
3563	}
3564
3565	/* else we add it to the window */
3566	bitbuf \|= (y << (winsize - ++bitcpy));
3567	mode = 2;
3568
3569	if (bitcpy == winsize) {
3570	/* ok window is filled so square as required and multiply */
3571	/* square first */
3572	for (x = 0; x < winsize; x++) {
3573	if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
3574	goto LBL_RES;
3575	}
3576	if ((err = redux (&res, P, &mu)) != MP_OKAY) {
3577	goto LBL_RES;
3578	}
3579	}
3580
3581	/* then multiply */
3582	if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
3583	goto LBL_RES;
3584	}
3585	if ((err = redux (&res, P, &mu)) != MP_OKAY) {
3586	goto LBL_RES;
3587	}
3588
3589	/* empty window and reset */
3590	bitcpy = 0;
3591	bitbuf = 0;
3592	mode = 1;
3593	}
3594	}
3595
3596	/* if bits remain then square/multiply */
3597	if (mode == 2 && bitcpy > 0) {
3598	/* square then multiply if the bit is set */
3599	for (x = 0; x < bitcpy; x++) {
3600	if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
3601	goto LBL_RES;
3602	}
3603	if ((err = redux (&res, P, &mu)) != MP_OKAY) {
3604	goto LBL_RES;
3605	}
3606
3607	bitbuf <<= 1;
3608	if ((bitbuf & (1 << winsize)) != 0) {
3609	/* then multiply */
3610	if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
3611	goto LBL_RES;
3612	}
3613	if ((err = redux (&res, P, &mu)) != MP_OKAY) {
3614	goto LBL_RES;
3615	}
3616	}
3617	}
3618	}
3619
3620	mp_exch (&res, Y);
3621	err = MP_OKAY;
3622	LBL_RES:mp_clear (&res);
3623	LBL_MU:mp_clear (&mu);
3624	LBL_M:
3625	mp_clear(&M[1]);
3626	for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
3627	mp_clear (&M[x]);
3628	}
3629	return err;
3630	}
3631
3632
3633	/* pre-calculate the value required for Barrett reduction
3634	* For a given modulus "b" it calculates the value required in "a"
3635	*/
3636	int mp_reduce_setup (mp_int * a, mp_int * b)
3637	{
3638	int res;
3639
3640	if ((res = mp_2expt (a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) {
3641	return res;
3642	}
3643	return mp_div (a, b, a, NULL);
3644	}
3645
3646
3647	/* reduces x mod m, assumes 0 < x < m**2, mu is
3648	* precomputed via mp_reduce_setup.
3649	* From HAC pp.604 Algorithm 14.42
3650	*/
3651	int mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
3652	{
3653	mp_int q;
3654	int res, um = m->used;
3655
3656	/* q = x */
3657	if ((res = mp_init_copy (&q, x)) != MP_OKAY) {
3658	return res;
3659	}
3660
3661	/* q1 = x / b*(k-1) /
3662	mp_rshd (&q, um - 1);
3663
3664	/* according to HAC this optimization is ok */
3665	if (((mp_word) um) > (((mp_digit)1) << (DIGIT_BIT - 1))) {
3666	if ((res = mp_mul (&q, mu, &q)) != MP_OKAY) {
3667	goto CLEANUP;
3668	}
3669	} else {
3670	#ifdef BN_S_MP_MUL_HIGH_DIGS_C
3671	if ((res = s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
3672	goto CLEANUP;
3673	}
3674	#elif defined(BN_FAST_S_MP_MUL_HIGH_DIGS_C)
3675	if ((res = fast_s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
3676	goto CLEANUP;
3677	}
3678	#else
3679	{
3680	res = MP_VAL;
3681	goto CLEANUP;
3682	}
3683	#endif
3684	}
3685
3686	/* q3 = q2 / b*(k+1) /
3687	mp_rshd (&q, um + 1);
3688
3689	/* x = x mod b*(k+1), quick (no division) /
3690	if ((res = mp_mod_2d (x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) {
3691	goto CLEANUP;
3692	}
3693
3694	/* q = q * m mod b*(k+1), quick (no division) /
3695	if ((res = s_mp_mul_digs (&q, m, &q, um + 1)) != MP_OKAY) {
3696	goto CLEANUP;
3697	}
3698
3699	/* x = x - q */
3700	if ((res = mp_sub (x, &q, x)) != MP_OKAY) {
3701	goto CLEANUP;
3702	}
3703
3704	/* If x < 0, add b*(k+1) to it /
3705	if (mp_cmp_d (x, 0) == MP_LT) {
3706	if ((res = mp_set (&q, 1)) != MP_OKAY)
3707	goto CLEANUP;
3708	if ((res = mp_lshd (&q, um + 1)) != MP_OKAY)
3709	goto CLEANUP;
3710	if ((res = mp_add (x, &q, x)) != MP_OKAY)
3711	goto CLEANUP;
3712	}
3713
3714	/* Back off if it's too big */
3715	while (mp_cmp (x, m) != MP_LT) {
3716	if ((res = s_mp_sub (x, m, x)) != MP_OKAY) {
3717	goto CLEANUP;
3718	}
3719	}
3720
3721	CLEANUP:
3722	mp_clear (&q);
3723
3724	return res;
3725	}
3726
3727
3728	/* reduces a modulo n where n is of the form 2**p - d
3729	This differs from reduce_2k since "d" can be larger
3730	than a single digit.
3731	*/
3732	int mp_reduce_2k_l(mp_int a, mp_int n, mp_int *d)
3733	{
3734	mp_int q;
3735	int p, res;
3736
3737	if ((res = mp_init(&q)) != MP_OKAY) {
3738	return res;
3739	}
3740
3741	p = mp_count_bits(n);
3742	top:
3743	/* q = a/2p, a = a mod 2p */
3744	if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
3745	goto ERR;
3746	}
3747
3748	/* q = q * d */
3749	if ((res = mp_mul(&q, d, &q)) != MP_OKAY) {
3750	goto ERR;
3751	}
3752
3753	/* a = a + q */
3754	if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
3755	goto ERR;
3756	}
3757
3758	if (mp_cmp_mag(a, n) != MP_LT) {
3759	if ((res = s_mp_sub(a, n, a)) != MP_OKAY) {
3760	goto ERR;
3761	}
3762	goto top;
3763	}
3764
3765	ERR:
3766	mp_clear(&q);
3767	return res;
3768	}
3769
3770
3771	/* determines the setup value */
3772	int mp_reduce_2k_setup_l(mp_int a, mp_int d)
3773	{
3774	int res;
3775	mp_int tmp;
3776
3777	if ((res = mp_init(&tmp)) != MP_OKAY) {
3778	return res;
3779	}
3780
3781	if ((res = mp_2expt(&tmp, mp_count_bits(a))) != MP_OKAY) {
3782	goto ERR;
3783	}
3784
3785	if ((res = s_mp_sub(&tmp, a, d)) != MP_OKAY) {
3786	goto ERR;
3787	}
3788
3789	ERR:
3790	mp_clear(&tmp);
3791	return res;
3792	}
3793
3794
3795	/* multiplies \|a\| * \|b\| and does not compute the lower digs digits
3796	* [meant to get the higher part of the product]
3797	*/
3798	int s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
3799	{
3800	mp_int t;
3801	int res, pa, pb, ix, iy;
3802	mp_digit u;
3803	mp_word r;
3804	mp_digit tmpx, tmpt, tmpy;
3805
3806	/* can we use the fast multiplier? */
3807	#ifdef BN_FAST_S_MP_MUL_HIGH_DIGS_C
3808	if (((a->used + b->used + 1) < MP_WARRAY)
3809	&& MIN (a->used, b->used) <
3810	(1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
3811	return fast_s_mp_mul_high_digs (a, b, c, digs);
3812	}
3813	#endif
3814
3815	if ((res = mp_init_size (&t, a->used + b->used + 1)) != MP_OKAY) {
3816	return res;
3817	}
3818	t.used = a->used + b->used + 1;
3819
3820	pa = a->used;
3821	pb = b->used;
3822	for (ix = 0; ix < pa && a->dp; ix++) {
3823	/* clear the carry */
3824	u = 0;
3825
3826	/* left hand side of A[ix] * B[iy] */
3827	tmpx = a->dp[ix];
3828
3829	/* alias to the address of where the digits will be stored */
3830	tmpt = &(t.dp[digs]);
3831
3832	/* alias for where to read the right hand side from */
3833	tmpy = b->dp + (digs - ix);
3834
3835	for (iy = digs - ix; iy < pb; iy++) {
3836	/* calculate the double precision result */
3837	r = ((mp_word)*tmpt) +
3838	((mp_word)tmpx) * ((mp_word)*tmpy++) +
3839	((mp_word) u);
3840
3841	/* get the lower part */
3842	*tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
3843
3844	/* carry the carry */
3845	u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
3846	}
3847	*tmpt = u;
3848	}
3849	mp_clamp (&t);
3850	mp_exch (&t, c);
3851	mp_clear (&t);
3852	return MP_OKAY;
3853	}
3854
3855
3856	/* this is a modified version of fast_s_mul_digs that only produces
3857	* output digits above digs. See the comments for fast_s_mul_digs
3858	* to see how it works.
3859	*
3860	* This is used in the Barrett reduction since for one of the multiplications
3861	* only the higher digits were needed. This essentially halves the work.
3862	*
3863	* Based on Algorithm 14.12 on pp.595 of HAC.
3864	*/
3865	int fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
3866	{
3867	int olduse, res, pa, ix, iz;
3868	#ifdef WOLFSSL_SMALL_STACK
3869	mp_digit* W; /* uses dynamic memory and slower */
3870	#else
3871	mp_digit W[MP_WARRAY];
3872	#endif
3873	mp_word _W;
3874
3875	if (a->dp == NULL) { /* JRB, avoid reading uninitialized values */
3876	return MP_VAL;
3877	}
3878
3879	/* grow the destination as required */
3880	pa = a->used + b->used;
3881	if (c->alloc < pa) {
3882	if ((res = mp_grow (c, pa)) != MP_OKAY) {
3883	return res;
3884	}
3885	}
3886
3887	if (pa > MP_WARRAY)
3888	return MP_RANGE; /* TAO range check */
3889
3890	#ifdef WOLFSSL_SMALL_STACK
3891	W = (mp_digit)XMALLOC(sizeof(mp_digit) MP_WARRAY, NULL, DYNAMIC_TYPE_BIGINT);
3892	if (W == NULL)
3893	return MP_MEM;
3894	#endif
3895
3896	/* number of output digits to produce */
3897	pa = a->used + b->used;
3898	_W = 0;
3899	for (ix = digs; ix < pa; ix++) { /* JRB, have a->dp check at top of function*/
3900	int tx, ty, iy;
3901	mp_digit tmpx, tmpy;
3902
3903	/* get offsets into the two bignums */
3904	ty = MIN(b->used-1, ix);
3905	tx = ix - ty;
3906
3907	/* setup temp aliases */
3908	tmpx = a->dp + tx;
3909	tmpy = b->dp + ty;
3910
3911	/* this is the number of times the loop will iterate, essentially its
3912	while (tx++ < a->used && ty-- >= 0) { ... }
3913	*/
3914	iy = MIN(a->used-tx, ty+1);
3915
3916	/* execute loop */
3917	for (iz = 0; iz < iy; iz++) {
3918	_W += ((mp_word)tmpx++)((mp_word)*tmpy--);
3919	}
3920
3921	/* store term */
3922	W[ix] = (mp_digit)(((mp_digit)_W) & MP_MASK);
3923
3924	/* make next carry */
3925	_W = _W >> ((mp_word)DIGIT_BIT);
3926	}
3927
3928	/* setup dest */
3929	olduse = c->used;
3930	c->used = pa;
3931
3932	{
3933	mp_digit *tmpc;
3934
3935	tmpc = c->dp + digs;
3936	for (ix = digs; ix < pa; ix++) { /* TAO, <= could potentially overwrite */
3937	/* now extract the previous digit [below the carry] */
3938	*tmpc++ = W[ix];
3939	}
3940
3941	/* clear unused digits [that existed in the old copy of c] */
3942	for (; ix < olduse; ix++) {
3943	*tmpc++ = 0;
3944	}
3945	}
3946	mp_clamp (c);
3947
3948	#ifdef WOLFSSL_SMALL_STACK
3949	XFREE(W, NULL, DYNAMIC_TYPE_BIGINT);
3950	#endif
3951
3952	return MP_OKAY;
3953	}
3954
3955
3956	#ifndef MP_SET_CHUNK_BITS
3957	#define MP_SET_CHUNK_BITS 4
3958	#endif
3959	int mp_set_int (mp_int * a, unsigned long b)
3960	{
3961	int x, res;
3962
3963	/* use direct mp_set if b is less than mp_digit max */
3964	if (b < MP_DIGIT_MAX) {
3965	return mp_set (a, (mp_digit)b);
3966	}
3967
3968	mp_zero (a);
3969
3970	/* set chunk bits at a time */
3971	for (x = 0; x < (int)(sizeof(b) * 8) / MP_SET_CHUNK_BITS; x++) {
3972	/* shift the number up chunk bits */
3973	if ((res = mp_mul_2d (a, MP_SET_CHUNK_BITS, a)) != MP_OKAY) {
3974	return res;
3975	}
3976
3977	/* OR in the top bits of the source */
3978	a->dp[0] \|= (b >> ((sizeof(b) * 8) - MP_SET_CHUNK_BITS)) &
3979	((1 << MP_SET_CHUNK_BITS) - 1);
3980
3981	/* shift the source up to the next chunk bits */
3982	b <<= MP_SET_CHUNK_BITS;
3983
3984	/* ensure that digits are not clamped off */
3985	a->used += 1;
3986	}
3987	mp_clamp (a);
3988	return MP_OKAY;
3989	}
3990
3991
3992	#if defined(WOLFSSL_KEY_GEN) \|\| defined(HAVE_ECC) \|\| !defined(NO_RSA) \|\| \
3993	!defined(NO_DSA) \| !defined(NO_DH)
3994
3995	/* c = a * a (mod b) */
3996	int mp_sqrmod (mp_int * a, mp_int * b, mp_int * c)
3997	{
3998	int res;
3999	mp_int t;
4000
4001	if ((res = mp_init (&t)) != MP_OKAY) {
4002	return res;
4003	}
4004
4005	if ((res = mp_sqr (a, &t)) != MP_OKAY) {
4006	mp_clear (&t);
4007	return res;
4008	}
4009	res = mp_mod (&t, b, c);
4010	mp_clear (&t);
4011	return res;
4012	}
4013
4014	#endif
4015
4016
4017	#if defined(HAVE_ECC) \|\| !defined(NO_PWDBASED) \|\| defined(WOLFSSL_SNIFFER) \|\| \
4018	defined(WOLFSSL_HAVE_WOLFSCEP) \|\| defined(WOLFSSL_KEY_GEN) \|\| \
4019	defined(OPENSSL_EXTRA) \|\| defined(WC_RSA_BLINDING) \|\| \
4020	(!defined(NO_RSA) && !defined(NO_RSA_BOUNDS_CHECK))
4021
4022	/* single digit addition */
4023	int mp_add_d (mp_int* a, mp_digit b, mp_int* c)
4024	{
4025	int res, ix, oldused;
4026	mp_digit tmpa, tmpc, mu;
4027
4028	/* grow c as required */
4029	if (c->alloc < a->used + 1) {
4030	if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
4031	return res;
4032	}
4033	}
4034
4035	/* if a is negative and \|a\| >= b, call c = \|a\| - b */
4036	if (a->sign == MP_NEG && (a->used > 1 \|\| a->dp[0] >= b)) {
4037	/* temporarily fix sign of a */
4038	a->sign = MP_ZPOS;
4039
4040	/* c = \|a\| - b */
4041	res = mp_sub_d(a, b, c);
4042
4043	/* fix sign */
4044	a->sign = c->sign = MP_NEG;
4045
4046	/* clamp */
4047	mp_clamp(c);
4048
4049	return res;
4050	}
4051
4052	/* old number of used digits in c */
4053	oldused = c->used;
4054
4055	/* sign always positive */
4056	c->sign = MP_ZPOS;
4057
4058	/* source alias */
4059	tmpa = a->dp;
4060
4061	/* destination alias */
4062	tmpc = c->dp;
4063
4064	/* if a is positive */
4065	if (a->sign == MP_ZPOS) {
4066	/* add digit, after this we're propagating
4067	* the carry.
4068	*/
4069	tmpc = tmpa++ + b;
4070	mu = *tmpc >> DIGIT_BIT;
4071	*tmpc++ &= MP_MASK;
4072
4073	/* now handle rest of the digits */
4074	for (ix = 1; ix < a->used; ix++) {
4075	tmpc = tmpa++ + mu;
4076	mu = *tmpc >> DIGIT_BIT;
4077	*tmpc++ &= MP_MASK;
4078	}
4079	/* set final carry */
4080	if (ix < c->alloc) {
4081	ix++;
4082	*tmpc++ = mu;
4083	}
4084
4085	/* setup size */
4086	c->used = a->used + 1;
4087	} else {
4088	/* a was negative and \|a\| < b */
4089	c->used = 1;
4090
4091	/* the result is a single digit */
4092	if (a->used == 1) {
4093	*tmpc++ = b - a->dp[0];
4094	} else {
4095	*tmpc++ = b;
4096	}
4097
4098	/* setup count so the clearing of oldused
4099	* can fall through correctly
4100	*/
4101	ix = 1;
4102	}
4103
4104	/* now zero to oldused */
4105	while (ix++ < oldused) {
4106	*tmpc++ = 0;
4107	}
4108	mp_clamp(c);
4109
4110	return MP_OKAY;
4111	}
4112
4113
4114	/* single digit subtraction */
4115	int mp_sub_d (mp_int * a, mp_digit b, mp_int * c)
4116	{
4117	mp_digit tmpa, tmpc, mu;
4118	int res, ix, oldused;
4119
4120	/* grow c as required */
4121	if (c->alloc < a->used + 1) {
4122	if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
4123	return res;
4124	}
4125	}
4126
4127	/* if a is negative just do an unsigned
4128	* addition [with fudged signs]
4129	*/
4130	if (a->sign == MP_NEG) {
4131	a->sign = MP_ZPOS;
4132	res = mp_add_d(a, b, c);
4133	a->sign = c->sign = MP_NEG;
4134
4135	/* clamp */
4136	mp_clamp(c);
4137
4138	return res;
4139	}
4140
4141	/* setup regs */
4142	oldused = c->used;
4143	tmpa = a->dp;
4144	tmpc = c->dp;
4145
4146	/* if a <= b simply fix the single digit */
4147	if ((a->used == 1 && a->dp[0] <= b) \|\| a->used == 0) {
4148	if (a->used == 1) {
4149	tmpc++ = b - tmpa;
4150	} else {
4151	*tmpc++ = b;
4152	}
4153	ix = 1;
4154
4155	/* negative/1digit */
4156	c->sign = MP_NEG;
4157	c->used = 1;
4158	} else {
4159	/* positive/size */
4160	c->sign = MP_ZPOS;
4161	c->used = a->used;
4162
4163	/* subtract first digit */
4164	tmpc = tmpa++ - b;
4165	mu = tmpc >> (sizeof(mp_digit) CHAR_BIT - 1);
4166	*tmpc++ &= MP_MASK;
4167
4168	/* handle rest of the digits */
4169	for (ix = 1; ix < a->used; ix++) {
4170	tmpc = tmpa++ - mu;
4171	mu = tmpc >> (sizeof(mp_digit) CHAR_BIT - 1);
4172	*tmpc++ &= MP_MASK;
4173	}
4174	}
4175
4176	/* zero excess digits */
4177	while (ix++ < oldused) {
4178	*tmpc++ = 0;
4179	}
4180	mp_clamp(c);
4181	return MP_OKAY;
4182	}
4183
4184	#endif /* defined(HAVE_ECC) \|\| !defined(NO_PWDBASED) */
4185
4186
4187	#if defined(WOLFSSL_KEY_GEN) \|\| defined(HAVE_COMP_KEY) \|\| defined(HAVE_ECC) \|\| \
4188	defined(DEBUG_WOLFSSL) \|\| !defined(NO_RSA) \|\| !defined(NO_DSA) \|\| \
4189	!defined(NO_DH)
4190
4191	static const int lnz[16] = {
4192	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
4193	};
4194
4195	/* Counts the number of lsbs which are zero before the first zero bit */
4196	int mp_cnt_lsb(mp_int *a)
4197	{
4198	int x;
4199	mp_digit q = 0, qq;
4200
4201	/* easy out */
4202	if (mp_iszero(a) == MP_YES) {
4203	return 0;
4204	}
4205
4206	/* scan lower digits until non-zero */
4207	for (x = 0; x < a->used && a->dp[x] == 0; x++) {}
4208	if (a->dp)
4209	q = a->dp[x];
4210	x *= DIGIT_BIT;
4211
4212	/* now scan this digit until a 1 is found */
4213	if ((q & 1) == 0) {
4214	do {
4215	qq = q & 15;
4216	x += lnz[qq];
4217	q >>= 4;
4218	} while (qq == 0);
4219	}
4220	return x;
4221	}
4222
4223
4224
4225
4226	static int s_is_power_of_two(mp_digit b, int *p)
4227	{
4228	int x;
4229
4230	/* fast return if no power of two */
4231	if ((b==0) \|\| (b & (b-1))) {
4232	return 0;
4233	}
4234
4235	for (x = 0; x < DIGIT_BIT; x++) {
4236	if (b == (((mp_digit)1)<<x)) {
4237	*p = x;
4238	return 1;
4239	}
4240	}
4241	return 0;
4242	}
4243
4244	/* single digit division (based on routine from MPI) */
4245	static int mp_div_d (mp_int * a, mp_digit b, mp_int * c, mp_digit * d)
4246	{
4247	mp_int q;
4248	mp_word w;
4249	mp_digit t;
4250	int res = MP_OKAY, ix;
4251
4252	/* cannot divide by zero */
4253	if (b == 0) {
4254	return MP_VAL;
4255	}
4256
4257	/* quick outs */
4258	if (b == 1 \|\| mp_iszero(a) == MP_YES) {
4259	if (d != NULL) {
4260	*d = 0;
4261	}
4262	if (c != NULL) {
4263	return mp_copy(a, c);
4264	}
4265	return MP_OKAY;
4266	}
4267
4268	/* power of two ? */
4269	if (s_is_power_of_two(b, &ix) == 1) {
4270	if (d != NULL) {
4271	*d = a->dp[0] & ((((mp_digit)1)<<ix) - 1);
4272	}
4273	if (c != NULL) {
4274	return mp_div_2d(a, ix, c, NULL);
4275	}
4276	return MP_OKAY;
4277	}
4278
4279	#ifdef BN_MP_DIV_3_C
4280	/* three? */
4281	if (b == 3) {
4282	return mp_div_3(a, c, d);
4283	}
4284	#endif
4285
4286	/* no easy answer [c'est la vie]. Just division */
4287	if (c != NULL) {
4288	if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
4289	return res;
4290	}
4291
4292	q.used = a->used;
4293	q.sign = a->sign;
4294	}
4295	else {
4296	if ((res = mp_init(&q)) != MP_OKAY) {
4297	return res;
4298	}
4299	}
4300
4301
4302	w = 0;
4303	for (ix = a->used - 1; ix >= 0; ix--) {
4304	w = (w << ((mp_word)DIGIT_BIT)) \| ((mp_word)a->dp[ix]);
4305
4306	if (w >= b) {
4307	t = (mp_digit)(w / b);
4308	w -= ((mp_word)t) * ((mp_word)b);
4309	} else {
4310	t = 0;
4311	}
4312	if (c != NULL)
4313	q.dp[ix] = (mp_digit)t;
4314	}
4315
4316	if (d != NULL) {
4317	*d = (mp_digit)w;
4318	}
4319
4320	if (c != NULL) {
4321	mp_clamp(&q);
4322	mp_exch(&q, c);
4323	}
4324	mp_clear(&q);
4325
4326	return res;
4327	}
4328
4329
4330	int mp_mod_d (mp_int * a, mp_digit b, mp_digit * c)
4331	{
4332	return mp_div_d(a, b, NULL, c);
4333	}
4334
4335	#endif /* WOLFSSL_KEY_GEN \|\| HAVE_COMP_KEY \|\| HAVE_ECC \|\| DEBUG_WOLFSSL */
4336
4337	#if defined(WOLFSSL_KEY_GEN) \|\| !defined(NO_DH) \|\| !defined(NO_DSA) \|\| !defined(NO_RSA)
4338
4339	const mp_digit ltm_prime_tab[PRIME_SIZE] = {
4340	0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
4341	0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
4342	0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
4343	0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F,
4344	#ifndef MP_8BIT
4345	0x0083,
4346	0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
4347	0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
4348	0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
4349	0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
4350
4351	0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
4352	0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
4353	0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
4354	0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
4355	0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
4356	0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
4357	0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
4358	0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
4359
4360	0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
4361	0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
4362	0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
4363	0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
4364	0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
4365	0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
4366	0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
4367	0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
4368
4369	0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
4370	0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
4371	0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
4372	0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
4373	0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
4374	0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
4375	0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
4376	0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
4377	#endif
4378	};
4379
4380
4381	/* Miller-Rabin test of "a" to the base of "b" as described in
4382	* HAC pp. 139 Algorithm 4.24
4383	*
4384	* Sets result to 0 if definitely composite or 1 if probably prime.
4385	* Randomly the chance of error is no more than 1/4 and often
4386	* very much lower.
4387	*/
4388	static int mp_prime_miller_rabin (mp_int * a, mp_int * b, int *result)
4389	{
4390	mp_int n1, y, r;
4391	int s, j, err;
4392
4393	/* default */
4394	*result = MP_NO;
4395
4396	/* ensure b > 1 */
4397	if (mp_cmp_d(b, 1) != MP_GT) {
4398	return MP_VAL;
4399	}
4400
4401	/* get n1 = a - 1 */
4402	if ((err = mp_init_copy (&n1, a)) != MP_OKAY) {
4403	return err;
4404	}
4405	if ((err = mp_sub_d (&n1, 1, &n1)) != MP_OKAY) {
4406	goto LBL_N1;
4407	}
4408
4409	/* set 2*s r = n1 */
4410	if ((err = mp_init_copy (&r, &n1)) != MP_OKAY) {
4411	goto LBL_N1;
4412	}
4413
4414	/* count the number of least significant bits
4415	* which are zero
4416	*/
4417	s = mp_cnt_lsb(&r);
4418
4419	/* now divide n - 1 by 2*s /
4420	if ((err = mp_div_2d (&r, s, &r, NULL)) != MP_OKAY) {
4421	goto LBL_R;
4422	}
4423
4424	/* compute y = b*r mod a /
4425	if ((err = mp_init (&y)) != MP_OKAY) {
4426	goto LBL_R;
4427	}
4428	if ((err = mp_exptmod (b, &r, a, &y)) != MP_OKAY) {
4429	goto LBL_Y;
4430	}
4431
4432	/* if y != 1 and y != n1 do */
4433	if (mp_cmp_d (&y, 1) != MP_EQ && mp_cmp (&y, &n1) != MP_EQ) {
4434	j = 1;
4435	/* while j <= s-1 and y != n1 */
4436	while ((j <= (s - 1)) && mp_cmp (&y, &n1) != MP_EQ) {
4437	if ((err = mp_sqrmod (&y, a, &y)) != MP_OKAY) {
4438	goto LBL_Y;
4439	}
4440
4441	/* if y == 1 then composite */
4442	if (mp_cmp_d (&y, 1) == MP_EQ) {
4443	goto LBL_Y;
4444	}
4445
4446	++j;
4447	}
4448
4449	/* if y != n1 then composite */
4450	if (mp_cmp (&y, &n1) != MP_EQ) {
4451	goto LBL_Y;
4452	}
4453	}
4454
4455	/* probably prime now */
4456	*result = MP_YES;
4457	LBL_Y:mp_clear (&y);
4458	LBL_R:mp_clear (&r);
4459	LBL_N1:mp_clear (&n1);
4460	return err;
4461	}
4462
4463
4464	/* determines if an integers is divisible by one
4465	* of the first PRIME_SIZE primes or not
4466	*
4467	* sets result to 0 if not, 1 if yes
4468	*/
4469	static int mp_prime_is_divisible (mp_int * a, int *result)
4470	{
4471	int err, ix;
4472	mp_digit res;
4473
4474	/* default to not */
4475	*result = MP_NO;
4476
4477	for (ix = 0; ix < PRIME_SIZE; ix++) {
4478	/* what is a mod LBL_prime_tab[ix] */
4479	if ((err = mp_mod_d (a, ltm_prime_tab[ix], &res)) != MP_OKAY) {
4480	return err;
4481	}
4482
4483	/* is the residue zero? */
4484	if (res == 0) {
4485	*result = MP_YES;
4486	return MP_OKAY;
4487	}
4488	}
4489
4490	return MP_OKAY;
4491	}
4492
4493	/*
4494	* Sets result to 1 if probably prime, 0 otherwise
4495	*/
4496	int mp_prime_is_prime (mp_int * a, int t, int *result)
4497	{
4498	mp_int b;
4499	int ix, err, res;
4500
4501	/* default to no */
4502	*result = MP_NO;
4503
4504	/* valid value of t? */
4505	if (t <= 0 \|\| t > PRIME_SIZE) {
4506	return MP_VAL;
4507	}
4508
4509	/* is the input equal to one of the primes in the table? */
4510	for (ix = 0; ix < PRIME_SIZE; ix++) {
4511	if (mp_cmp_d(a, ltm_prime_tab[ix]) == MP_EQ) {
4512	*result = 1;
4513	return MP_OKAY;
4514	}
4515	}
4516
4517	/* first perform trial division */
4518	if ((err = mp_prime_is_divisible (a, &res)) != MP_OKAY) {
4519	return err;
4520	}
4521
4522	/* return if it was trivially divisible */
4523	if (res == MP_YES) {
4524	return MP_OKAY;
4525	}
4526
4527	/* now perform the miller-rabin rounds */
4528	if ((err = mp_init (&b)) != MP_OKAY) {
4529	return err;
4530	}
4531
4532	for (ix = 0; ix < t; ix++) {
4533	/* set the prime */
4534	if ((err = mp_set (&b, ltm_prime_tab[ix])) != MP_OKAY) {
4535	goto LBL_B;
4536	}
4537
4538	if ((err = mp_prime_miller_rabin (a, &b, &res)) != MP_OKAY) {
4539	goto LBL_B;
4540	}
4541
4542	if (res == MP_NO) {
4543	goto LBL_B;
4544	}
4545	}
4546
4547	/* passed the test */
4548	*result = MP_YES;
4549	LBL_B:mp_clear (&b);
4550	return err;
4551	}
4552
4553
4554	/*
4555	* Sets result to 1 if probably prime, 0 otherwise
4556	*/
4557	int mp_prime_is_prime_ex (mp_int * a, int t, int result, WC_RNG rng)
4558	{
4559	mp_int b, c;
4560	int ix, err, res;
4561	byte* base = NULL;
4562	word32 baseSz = 0;
4563
4564	/* default to no */
4565	*result = MP_NO;
4566
4567	/* valid value of t? */
4568	if (t <= 0 \|\| t > PRIME_SIZE) {
4569	return MP_VAL;
4570	}
4571
4572	/* is the input equal to one of the primes in the table? */
4573	for (ix = 0; ix < PRIME_SIZE; ix++) {
4574	if (mp_cmp_d(a, ltm_prime_tab[ix]) == MP_EQ) {
4575	*result = MP_YES;
4576	return MP_OKAY;
4577	}
4578	}
4579
4580	/* first perform trial division */
4581	if ((err = mp_prime_is_divisible (a, &res)) != MP_OKAY) {
4582	return err;
4583	}
4584
4585	/* return if it was trivially divisible */
4586	if (res == MP_YES) {
4587	return MP_OKAY;
4588	}
4589
4590	/* now perform the miller-rabin rounds */
4591	if ((err = mp_init (&b)) != MP_OKAY) {
4592	return err;
4593	}
4594	if ((err = mp_init (&c)) != MP_OKAY) {
4595	mp_clear(&b);
4596	return err;
4597	}
4598
4599	baseSz = mp_count_bits(a);
4600	baseSz = (baseSz / 8) + ((baseSz % 8) ? 1 : 0);
4601
4602	base = (byte*)XMALLOC(baseSz, NULL, DYNAMIC_TYPE_TMP_BUFFER);
4603	if (base == NULL) {
4604	err = MP_MEM;
4605	goto LBL_B;
4606	}
4607
4608	if ((err = mp_sub_d(a, 2, &c)) != MP_OKAY) {
4609	goto LBL_B;
4610	}
4611
4612	/* now do a miller rabin with up to t random numbers, this should
4613	* give a (1/4)^t chance of a false prime. */
4614	for (ix = 0; ix < t; ix++) {
4615	/* Set a test candidate. */
4616	if ((err = wc_RNG_GenerateBlock(rng, base, baseSz)) != 0) {
4617	goto LBL_B;
4618	}
4619
4620	if ((err = mp_read_unsigned_bin(&b, base, baseSz)) != MP_OKAY) {
4621	goto LBL_B;
4622	}
4623
4624	if (mp_cmp_d(&b, 2) != MP_GT \|\| mp_cmp(&b, &c) != MP_LT)
4625	continue;
4626
4627	if ((err = mp_prime_miller_rabin (a, &b, &res)) != MP_OKAY) {
4628	goto LBL_B;
4629	}
4630
4631	if (res == MP_NO) {
4632	goto LBL_B;
4633	}
4634	}
4635
4636	/* passed the test */
4637	*result = MP_YES;
4638	LBL_B:mp_clear (&b);
4639	mp_clear (&c);
4640	XFREE(base, NULL, DYNAMIC_TYPE_TMP_BUFFER);
4641	return err;
4642	}
4643
4644	#endif /* WOLFSSL_KEY_GEN NO_DH NO_DSA NO_RSA */
4645
4646	#ifdef WOLFSSL_KEY_GEN
4647
4648	static const int USE_BBS = 1;
4649
4650	int mp_rand_prime(mp_int* N, int len, WC_RNG* rng, void* heap)
4651	{
4652	int err, res, type;
4653	byte* buf;
4654
4655	if (N == NULL \|\| rng == NULL)
4656	return MP_VAL;
4657
4658	/* get type */
4659	if (len < 0) {
4660	type = USE_BBS;
4661	len = -len;
4662	} else {
4663	type = 0;
4664	}
4665
4666	/* allow sizes between 2 and 512 bytes for a prime size */
4667	if (len < 2 \|\| len > 512) {
4668	return MP_VAL;
4669	}
4670
4671	/* allocate buffer to work with */
4672	buf = (byte*)XMALLOC(len, heap, DYNAMIC_TYPE_RSA);
4673	if (buf == NULL) {
4674	return MP_MEM;
4675	}
4676	XMEMSET(buf, 0, len);
4677
4678	do {
4679	#ifdef SHOW_GEN
4680	printf(".");
4681	fflush(stdout);
4682	#endif
4683	/* generate value */
4684	err = wc_RNG_GenerateBlock(rng, buf, len);
4685	if (err != 0) {
4686	XFREE(buf, heap, DYNAMIC_TYPE_RSA);
4687	return err;
4688	}
4689
4690	/* munge bits */
4691	buf[0] \|= 0x80 \| 0x40;
4692	buf[len-1] \|= 0x01 \| ((type & USE_BBS) ? 0x02 : 0x00);
4693
4694	/* load value */
4695	if ((err = mp_read_unsigned_bin(N, buf, len)) != MP_OKAY) {
4696	XFREE(buf, heap, DYNAMIC_TYPE_RSA);
4697	return err;
4698	}
4699
4700	/* test */
4701	/* Running Miller-Rabin up to 3 times gives us a 2^{-80} chance
4702	* of a 1024-bit candidate being a false positive, when it is our
4703	* prime candidate. (Note 4.49 of Handbook of Applied Cryptography.)
4704	* Using 8 because we've always used 8. */
4705	if ((err = mp_prime_is_prime_ex(N, 8, &res, rng)) != MP_OKAY) {
4706	XFREE(buf, heap, DYNAMIC_TYPE_RSA);
4707	return err;
4708	}
4709	} while (res == MP_NO);
4710
4711	XMEMSET(buf, 0, len);
4712	XFREE(buf, heap, DYNAMIC_TYPE_RSA);
4713
4714	return MP_OKAY;
4715	}
4716
4717
4718	/* computes least common multiple as \|ab\|/(a, b) /
4719	int mp_lcm (mp_int * a, mp_int * b, mp_int * c)
4720	{
4721	int res;
4722	mp_int t1, t2;
4723
4724
4725	if ((res = mp_init_multi (&t1, &t2, NULL, NULL, NULL, NULL)) != MP_OKAY) {
4726	return res;
4727	}
4728
4729	/* t1 = get the GCD of the two inputs */
4730	if ((res = mp_gcd (a, b, &t1)) != MP_OKAY) {
4731	goto LBL_T;
4732	}
4733
4734	/* divide the smallest by the GCD */
4735	if (mp_cmp_mag(a, b) == MP_LT) {
4736	/* store quotient in t2 such that t2 * b is the LCM */
4737	if ((res = mp_div(a, &t1, &t2, NULL)) != MP_OKAY) {
4738	goto LBL_T;
4739	}
4740	res = mp_mul(b, &t2, c);
4741	} else {
4742	/* store quotient in t2 such that t2 * a is the LCM */
4743	if ((res = mp_div(b, &t1, &t2, NULL)) != MP_OKAY) {
4744	goto LBL_T;
4745	}
4746	res = mp_mul(a, &t2, c);
4747	}
4748
4749	/* fix the sign to positive */
4750	c->sign = MP_ZPOS;
4751
4752	LBL_T:
4753	mp_clear(&t1);
4754	mp_clear(&t2);
4755	return res;
4756	}
4757
4758
4759
4760	/* Greatest Common Divisor using the binary method */
4761	int mp_gcd (mp_int * a, mp_int * b, mp_int * c)
4762	{
4763	mp_int u, v;
4764	int k, u_lsb, v_lsb, res;
4765
4766	/* either zero than gcd is the largest */
4767	if (mp_iszero (a) == MP_YES) {
4768	return mp_abs (b, c);
4769	}
4770	if (mp_iszero (b) == MP_YES) {
4771	return mp_abs (a, c);
4772	}
4773
4774	/* get copies of a and b we can modify */
4775	if ((res = mp_init_copy (&u, a)) != MP_OKAY) {
4776	return res;
4777	}
4778
4779	if ((res = mp_init_copy (&v, b)) != MP_OKAY) {
4780	goto LBL_U;
4781	}
4782
4783	/* must be positive for the remainder of the algorithm */
4784	u.sign = v.sign = MP_ZPOS;
4785
4786	/* B1. Find the common power of two for u and v */
4787	u_lsb = mp_cnt_lsb(&u);
4788	v_lsb = mp_cnt_lsb(&v);
4789	k = MIN(u_lsb, v_lsb);
4790
4791	if (k > 0) {
4792	/* divide the power of two out */
4793	if ((res = mp_div_2d(&u, k, &u, NULL)) != MP_OKAY) {
4794	goto LBL_V;
4795	}
4796
4797	if ((res = mp_div_2d(&v, k, &v, NULL)) != MP_OKAY) {
4798	goto LBL_V;
4799	}
4800	}
4801
4802	/* divide any remaining factors of two out */
4803	if (u_lsb != k) {
4804	if ((res = mp_div_2d(&u, u_lsb - k, &u, NULL)) != MP_OKAY) {
4805	goto LBL_V;
4806	}
4807	}
4808
4809	if (v_lsb != k) {
4810	if ((res = mp_div_2d(&v, v_lsb - k, &v, NULL)) != MP_OKAY) {
4811	goto LBL_V;
4812	}
4813	}
4814
4815	while (mp_iszero(&v) == MP_NO) {
4816	/* make sure v is the largest */
4817	if (mp_cmp_mag(&u, &v) == MP_GT) {
4818	/* swap u and v to make sure v is >= u */
4819	mp_exch(&u, &v);
4820	}
4821
4822	/* subtract smallest from largest */
4823	if ((res = s_mp_sub(&v, &u, &v)) != MP_OKAY) {
4824	goto LBL_V;
4825	}
4826
4827	/* Divide out all factors of two */
4828	if ((res = mp_div_2d(&v, mp_cnt_lsb(&v), &v, NULL)) != MP_OKAY) {
4829	goto LBL_V;
4830	}
4831	}
4832
4833	/* multiply by 2*k which we divided out at the beginning /
4834	if ((res = mp_mul_2d (&u, k, c)) != MP_OKAY) {
4835	goto LBL_V;
4836	}
4837	c->sign = MP_ZPOS;
4838	res = MP_OKAY;
4839	LBL_V:mp_clear (&u);
4840	LBL_U:mp_clear (&v);
4841	return res;
4842	}
4843
4844	#endif /* WOLFSSL_KEY_GEN */
4845
4846
4847	#if !defined(NO_DSA) \|\| defined(HAVE_ECC) \|\| defined(WOLFSSL_KEY_GEN) \|\| \
4848	defined(HAVE_COMP_KEY) \|\| defined(WOLFSSL_DEBUG_MATH) \|\| \
4849	defined(DEBUG_WOLFSSL) \|\| defined(OPENSSL_EXTRA)
4850
4851	/* chars used in radix conversions */
4852	const char *mp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ\
4853	abcdefghijklmnopqrstuvwxyz+/";
4854	#endif
4855
4856	#if !defined(NO_DSA) \|\| defined(HAVE_ECC)
4857	/* read a string [ASCII] in a given radix */
4858	int mp_read_radix (mp_int * a, const char *str, int radix)
4859	{
4860	int y, res, neg;
4861	char ch;
4862
4863	/* zero the digit bignum */
4864	mp_zero(a);
4865
4866	/* make sure the radix is ok */
4867	if (radix < MP_RADIX_BIN \|\| radix > MP_RADIX_MAX) {
4868	return MP_VAL;
4869	}
4870
4871	/* if the leading digit is a
4872	* minus set the sign to negative.
4873	*/
4874	if (*str == '-') {
4875	++str;
4876	neg = MP_NEG;
4877	} else {
4878	neg = MP_ZPOS;
4879	}
4880
4881	/* set the integer to the default of zero */
4882	mp_zero (a);
4883
4884	/* process each digit of the string */
4885	while (*str != '\0') {
4886	/* if the radix <= 36 the conversion is case insensitive
4887	* this allows numbers like 1AB and 1ab to represent the same value
4888	* [e.g. in hex]
4889	*/
4890	ch = (radix <= 36) ? (char)XTOUPPER((unsigned char)str) : str;
4891	for (y = 0; y < 64; y++) {
4892	if (ch == mp_s_rmap[y]) {
4893	break;
4894	}
4895	}
4896
4897	/* if the char was found in the map
4898	* and is less than the given radix add it
4899	* to the number, otherwise exit the loop.
4900	*/
4901	if (y < radix) {
4902	if ((res = mp_mul_d (a, (mp_digit) radix, a)) != MP_OKAY) {
4903	return res;
4904	}
4905	if ((res = mp_add_d (a, (mp_digit) y, a)) != MP_OKAY) {
4906	return res;
4907	}
4908	} else {
4909	break;
4910	}
4911	++str;
4912	}
4913
4914	/* if digit in isn't null term, then invalid character was found */
4915	if (*str != '\0') {
4916	mp_zero (a);
4917	return MP_VAL;
4918	}
4919
4920	/* set the sign only if a != 0 */
4921	if (mp_iszero(a) != MP_YES) {
4922	a->sign = neg;
4923	}
4924	return MP_OKAY;
4925	}
4926	#endif /* !defined(NO_DSA) \|\| defined(HAVE_ECC) */
4927
4928	#ifdef WC_MP_TO_RADIX
4929
4930	/* returns size of ASCII representation */
4931	int mp_radix_size (mp_int a, int radix, int size)
4932	{
4933	int res, digs;
4934	mp_int t;
4935	mp_digit d;
4936
4937	*size = 0;
4938
4939	/* special case for binary */
4940	if (radix == MP_RADIX_BIN) {
4941	*size = mp_count_bits (a) + (a->sign == MP_NEG ? 1 : 0) + 1;
4942	return MP_OKAY;
4943	}
4944
4945	/* make sure the radix is in range */
4946	if (radix < MP_RADIX_BIN \|\| radix > MP_RADIX_MAX) {
4947	return MP_VAL;
4948	}
4949
4950	if (mp_iszero(a) == MP_YES) {
4951	*size = 2;
4952	return MP_OKAY;
4953	}
4954
4955	/* digs is the digit count */
4956	digs = 0;
4957
4958	/* if it's negative add one for the sign */
4959	if (a->sign == MP_NEG) {
4960	++digs;
4961	}
4962
4963	/* init a copy of the input */
4964	if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
4965	return res;
4966	}
4967
4968	/* force temp to positive */
4969	t.sign = MP_ZPOS;
4970
4971	/* fetch out all of the digits */
4972	while (mp_iszero (&t) == MP_NO) {
4973	if ((res = mp_div_d (&t, (mp_digit) radix, &t, &d)) != MP_OKAY) {
4974	mp_clear (&t);
4975	return res;
4976	}
4977	++digs;
4978	}
4979	mp_clear (&t);
4980
4981	/* return digs + 1, the 1 is for the NULL byte that would be required. */
4982	*size = digs + 1;
4983	return MP_OKAY;
4984	}
4985
4986	/* stores a bignum as a ASCII string in a given radix (2..64) */
4987	int mp_toradix (mp_int a, char str, int radix)
4988	{
4989	int res, digs;
4990	mp_int t;
4991	mp_digit d;
4992	char *_s = str;
4993
4994	/* check range of the radix */
4995	if (radix < MP_RADIX_BIN \|\| radix > MP_RADIX_MAX) {
4996	return MP_VAL;
4997	}
4998
4999	/* quick out if its zero */
5000	if (mp_iszero(a) == MP_YES) {
5001	*str++ = '0';
5002	*str = '\0';
5003	return MP_OKAY;
5004	}
5005
5006	if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
5007	return res;
5008	}
5009
5010	/* if it is negative output a - */
5011	if (t.sign == MP_NEG) {
5012	++_s;
5013	*str++ = '-';
5014	t.sign = MP_ZPOS;
5015	}
5016
5017	digs = 0;
5018	while (mp_iszero (&t) == MP_NO) {
5019	if ((res = mp_div_d (&t, (mp_digit) radix, &t, &d)) != MP_OKAY) {
5020	mp_clear (&t);
5021	return res;
5022	}
5023	*str++ = mp_s_rmap[d];
5024	++digs;
5025	}
5026
5027	/* reverse the digits of the string. In this case _s points
5028	* to the first digit [excluding the sign] of the number]
5029	*/
5030	bn_reverse ((unsigned char *)_s, digs);
5031
5032	/* append a NULL so the string is properly terminated */
5033	*str = '\0';
5034
5035	mp_clear (&t);
5036	return MP_OKAY;
5037	}
5038
5039	#ifdef WOLFSSL_DEBUG_MATH
5040	void mp_dump(const char* desc, mp_int* a, byte verbose)
5041	{
5042	char *buffer;
5043	int size = a->alloc;
5044
5045	buffer = (char)XMALLOC(size sizeof(mp_digit) * 2, NULL, DYNAMIC_TYPE_TMP_BUFFER);
5046	if (buffer == NULL) {
5047	return;
5048	}
5049
5050	printf("%s: ptr=%p, used=%d, sign=%d, size=%d, mpd=%d\n",
5051	desc, a, a->used, a->sign, size, (int)sizeof(mp_digit));
5052
5053	mp_tohex(a, buffer);
5054	printf(" %s\n ", buffer);
5055
5056	if (verbose) {
5057	int i;
5058	for(i=0; i<a->alloc * (int)sizeof(mp_digit); i++) {
5059	printf("%02x ", (((byte)a->dp) + i));
5060	}
5061	printf("\n");
5062	}
5063
5064	XFREE(buffer, NULL, DYNAMIC_TYPE_TMP_BUFFER);
5065	}
5066	#endif /* WOLFSSL_DEBUG_MATH */
5067
5068	#endif /* WC_MP_TO_RADIX */
5069
5070	#endif /* WOLFSSL_SP_MATH */
5071
5072	#endif /* USE_FAST_MATH */
5073
5074	#endif /* NO_BIG_INT */
5075

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: asp3_tinet_ecnl_arm/trunk/wolfssl-3.12.2/wolfcrypt/src/integer.c@ 372

Download in other formats: