Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

source: asp3_tinet_ecnl_arm/trunk/wolfssl-3.12.2/wolfcrypt/src/integer.c@ 352

Last change on this file since 352 was 352, checked in by coas-nagasima, 6 years ago
arm向けASP3版ECNLを追加
Property svn:eol-style set to `native` Property svn:mime-type set to `text/x-csrc;charset=UTF-8`
File size: 107.5 KB

Line
1	/* integer.c
2	*
3	* Copyright (C) 2006-2017 wolfSSL Inc.
4	*
5	* This file is part of wolfSSL.
6	*
7	* wolfSSL is free software; you can redistribute it and/or modify
8	* it under the terms of the GNU General Public License as published by
9	* the Free Software Foundation; either version 2 of the License, or
10	* (at your option) any later version.
11	*
12	* wolfSSL is distributed in the hope that it will be useful,
13	* but WITHOUT ANY WARRANTY; without even the implied warranty of
14	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15	* GNU General Public License for more details.
16	*
17	* You should have received a copy of the GNU General Public License
18	* along with this program; if not, write to the Free Software
19	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
20	*/
21
22
23
24	/*
25	* Based on public domain LibTomMath 0.38 by Tom St Denis, tomstdenis@iahu.ca,
26	* http://math.libtomcrypt.com
27	*/
28
29
30	#ifdef HAVE_CONFIG_H
31	#include <config.h>
32	#endif
33
34	/* in case user set USE_FAST_MATH there */
35	#include <wolfssl/wolfcrypt/settings.h>
36
37	#ifdef NO_INLINE
38	#include <wolfssl/wolfcrypt/misc.h>
39	#else
40	#define WOLFSSL_MISC_INCLUDED
41	#include <wolfcrypt/src/misc.c>
42	#endif
43
44	#ifndef NO_BIG_INT
45
46	#ifndef USE_FAST_MATH
47
48	#include <wolfssl/wolfcrypt/integer.h>
49
50	#if defined(FREESCALE_LTC_TFM)
51	#include <wolfssl/wolfcrypt/port/nxp/ksdk_port.h>
52	#endif
53	#ifdef WOLFSSL_DEBUG_MATH
54	#include <stdio.h>
55	#endif
56
57	#ifndef NO_WOLFSSL_SMALL_STACK
58	#ifndef WOLFSSL_SMALL_STACK
59	#define WOLFSSL_SMALL_STACK
60	#endif
61	#endif
62
63	#ifdef SHOW_GEN
64	#if defined(FREESCALE_MQX) \|\| defined(FREESCALE_KSDK_MQX)
65	#if MQX_USE_IO_OLD
66	#include <fio.h>
67	#else
68	#include <nio.h>
69	#endif
70	#else
71	#include <stdio.h>
72	#endif
73	#endif
74
75	/* reverse an array, used for radix code */
76	static void
77	bn_reverse (unsigned char *s, int len)
78	{
79	int ix, iy;
80	unsigned char t;
81
82	ix = 0;
83	iy = len - 1;
84	while (ix < iy) {
85	t = s[ix];
86	s[ix] = s[iy];
87	s[iy] = t;
88	++ix;
89	--iy;
90	}
91	}
92
93	/* math settings check */
94	word32 CheckRunTimeSettings(void)
95	{
96	return CTC_SETTINGS;
97	}
98
99
100	/* handle up to 6 inits */
101	int mp_init_multi(mp_int* a, mp_int* b, mp_int* c, mp_int* d, mp_int* e,
102	mp_int* f)
103	{
104	int res = MP_OKAY;
105
106	if (a) XMEMSET(a, 0, sizeof(mp_int));
107	if (b) XMEMSET(b, 0, sizeof(mp_int));
108	if (c) XMEMSET(c, 0, sizeof(mp_int));
109	if (d) XMEMSET(d, 0, sizeof(mp_int));
110	if (e) XMEMSET(e, 0, sizeof(mp_int));
111	if (f) XMEMSET(f, 0, sizeof(mp_int));
112
113	if (a && ((res = mp_init(a)) != MP_OKAY))
114	return res;
115
116	if (b && ((res = mp_init(b)) != MP_OKAY)) {
117	mp_clear(a);
118	return res;
119	}
120
121	if (c && ((res = mp_init(c)) != MP_OKAY)) {
122	mp_clear(a); mp_clear(b);
123	return res;
124	}
125
126	if (d && ((res = mp_init(d)) != MP_OKAY)) {
127	mp_clear(a); mp_clear(b); mp_clear(c);
128	return res;
129	}
130
131	if (e && ((res = mp_init(e)) != MP_OKAY)) {
132	mp_clear(a); mp_clear(b); mp_clear(c); mp_clear(d);
133	return res;
134	}
135
136	if (f && ((res = mp_init(f)) != MP_OKAY)) {
137	mp_clear(a); mp_clear(b); mp_clear(c); mp_clear(d); mp_clear(e);
138	return res;
139	}
140
141	return res;
142	}
143
144
145	/* init a new mp_int */
146	int mp_init (mp_int * a)
147	{
148	/* Safeguard against passing in a null pointer */
149	if (a == NULL)
150	return MP_VAL;
151
152	/* defer allocation until mp_grow */
153	a->dp = NULL;
154
155	/* set the used to zero, allocated digits to the default precision
156	* and sign to positive */
157	a->used = 0;
158	a->alloc = 0;
159	a->sign = MP_ZPOS;
160	#ifdef HAVE_WOLF_BIGINT
161	wc_bigint_init(&a->raw);
162	#endif
163
164	return MP_OKAY;
165	}
166
167
168	/* clear one (frees) */
169	void mp_clear (mp_int * a)
170	{
171	int i;
172
173	if (a == NULL)
174	return;
175
176	/* only do anything if a hasn't been freed previously */
177	if (a->dp != NULL) {
178	/* first zero the digits */
179	for (i = 0; i < a->used; i++) {
180	a->dp[i] = 0;
181	}
182
183	/* free ram */
184	mp_free(a);
185
186	/* reset members to make debugging easier */
187	a->alloc = a->used = 0;
188	a->sign = MP_ZPOS;
189	}
190	}
191
192	void mp_free (mp_int * a)
193	{
194	/* only do anything if a hasn't been freed previously */
195	if (a->dp != NULL) {
196	/* free ram */
197	XFREE(a->dp, 0, DYNAMIC_TYPE_BIGINT);
198	a->dp = NULL;
199	}
200
201	#ifdef HAVE_WOLF_BIGINT
202	wc_bigint_free(&a->raw);
203	#endif
204	}
205
206	void mp_forcezero(mp_int * a)
207	{
208	if (a == NULL)
209	return;
210
211	/* only do anything if a hasn't been freed previously */
212	if (a->dp != NULL) {
213	/* force zero the used digits */
214	ForceZero(a->dp, a->used * sizeof(mp_digit));
215	#ifdef HAVE_WOLF_BIGINT
216	wc_bigint_zero(&a->raw);
217	#endif
218	/* free ram */
219	mp_free(a);
220
221	/* reset members to make debugging easier */
222	a->alloc = a->used = 0;
223	a->sign = MP_ZPOS;
224	}
225
226	a->sign = MP_ZPOS;
227	a->used = 0;
228	}
229
230
231	/* get the size for an unsigned equivalent */
232	int mp_unsigned_bin_size (mp_int * a)
233	{
234	int size = mp_count_bits (a);
235	return (size / 8 + ((size & 7) != 0 ? 1 : 0));
236	}
237
238
239	/* returns the number of bits in an int */
240	int mp_count_bits (mp_int * a)
241	{
242	int r;
243	mp_digit q;
244
245	/* shortcut */
246	if (a->used == 0) {
247	return 0;
248	}
249
250	/* get number of digits and add that */
251	r = (a->used - 1) * DIGIT_BIT;
252
253	/* take the last digit and count the bits in it */
254	q = a->dp[a->used - 1];
255	while (q > ((mp_digit) 0)) {
256	++r;
257	q >>= ((mp_digit) 1);
258	}
259	return r;
260	}
261
262
263	int mp_leading_bit (mp_int * a)
264	{
265	int bit = 0;
266	mp_int t;
267
268	if (mp_init_copy(&t, a) != MP_OKAY)
269	return 0;
270
271	while (mp_iszero(&t) == MP_NO) {
272	#ifndef MP_8BIT
273	bit = (t.dp[0] & 0x80) != 0;
274	#else
275	bit = (t.dp[0] \| ((t.dp[1] & 0x01) << 7)) & 0x80 != 0;
276	#endif
277	if (mp_div_2d (&t, 8, &t, NULL) != MP_OKAY)
278	break;
279	}
280	mp_clear(&t);
281	return bit;
282	}
283
284	int mp_to_unsigned_bin_at_pos(int x, mp_int t, unsigned char b)
285	{
286	int res = 0;
287	while (mp_iszero(t) == MP_NO) {
288	#ifndef MP_8BIT
289	b[x++] = (unsigned char) (t->dp[0] & 255);
290	#else
291	b[x++] = (unsigned char) (t->dp[0] \| ((t->dp[1] & 0x01) << 7));
292	#endif
293	if ((res = mp_div_2d (t, 8, t, NULL)) != MP_OKAY) {
294	return res;
295	}
296	res = x;
297	}
298	return res;
299	}
300
301	/* store in unsigned [big endian] format */
302	int mp_to_unsigned_bin (mp_int * a, unsigned char *b)
303	{
304	int x, res;
305	mp_int t;
306
307	if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
308	return res;
309	}
310
311	x = mp_to_unsigned_bin_at_pos(0, &t, b);
312	if (x < 0) {
313	mp_clear(&t);
314	return x;
315	}
316
317	bn_reverse (b, x);
318	mp_clear (&t);
319	return res;
320	}
321
322
323	/* creates "a" then copies b into it */
324	int mp_init_copy (mp_int * a, mp_int * b)
325	{
326	int res;
327
328	if ((res = mp_init_size (a, b->used)) != MP_OKAY) {
329	return res;
330	}
331
332	if((res = mp_copy (b, a)) != MP_OKAY) {
333	mp_clear(a);
334	}
335
336	return res;
337	}
338
339
340	/* copy, b = a */
341	int mp_copy (mp_int * a, mp_int * b)
342	{
343	int res, n;
344
345	/* Safeguard against passing in a null pointer */
346	if (a == NULL \|\| b == NULL)
347	return MP_VAL;
348
349	/* if dst == src do nothing */
350	if (a == b) {
351	return MP_OKAY;
352	}
353
354	/* grow dest */
355	if (b->alloc < a->used \|\| b->alloc == 0) {
356	if ((res = mp_grow (b, a->used)) != MP_OKAY) {
357	return res;
358	}
359	}
360
361	/* zero b and copy the parameters over */
362	{
363	mp_digit tmpa, tmpb;
364
365	/* pointer aliases */
366
367	/* source */
368	tmpa = a->dp;
369
370	/* destination */
371	tmpb = b->dp;
372
373	/* copy all the digits */
374	for (n = 0; n < a->used; n++) {
375	tmpb++ = tmpa++;
376	}
377
378	/* clear high digits */
379	for (; n < b->used && b->dp; n++) {
380	*tmpb++ = 0;
381	}
382	}
383
384	/* copy used count and sign */
385	b->used = a->used;
386	b->sign = a->sign;
387	return MP_OKAY;
388	}
389
390
391	/* grow as required */
392	int mp_grow (mp_int * a, int size)
393	{
394	int i;
395	mp_digit *tmp;
396
397	/* if the alloc size is smaller alloc more ram */
398	if (a->alloc < size \|\| size == 0) {
399	/* ensure there are always at least MP_PREC digits extra on top */
400	size += (MP_PREC * 2) - (size % MP_PREC);
401
402	/* reallocate the array a->dp
403	*
404	* We store the return in a temporary variable
405	* in case the operation failed we don't want
406	* to overwrite the dp member of a.
407	*/
408	tmp = OPT_CAST(mp_digit) XREALLOC (a->dp, sizeof (mp_digit) * size, NULL,
409	DYNAMIC_TYPE_BIGINT);
410	if (tmp == NULL) {
411	/* reallocation failed but "a" is still valid [can be freed] */
412	return MP_MEM;
413	}
414
415	/* reallocation succeeded so set a->dp */
416	a->dp = tmp;
417
418	/* zero excess digits */
419	i = a->alloc;
420	a->alloc = size;
421	for (; i < a->alloc; i++) {
422	a->dp[i] = 0;
423	}
424	}
425	return MP_OKAY;
426	}
427
428
429	/* shift right by a certain bit count (store quotient in c, optional
430	remainder in d) */
431	int mp_div_2d (mp_int * a, int b, mp_int * c, mp_int * d)
432	{
433	int D, res;
434	mp_int t;
435
436
437	/* if the shift count is <= 0 then we do no work */
438	if (b <= 0) {
439	res = mp_copy (a, c);
440	if (d != NULL) {
441	mp_zero (d);
442	}
443	return res;
444	}
445
446	if ((res = mp_init (&t)) != MP_OKAY) {
447	return res;
448	}
449
450	/* get the remainder */
451	if (d != NULL) {
452	if ((res = mp_mod_2d (a, b, &t)) != MP_OKAY) {
453	mp_clear (&t);
454	return res;
455	}
456	}
457
458	/* copy */
459	if ((res = mp_copy (a, c)) != MP_OKAY) {
460	mp_clear (&t);
461	return res;
462	}
463
464	/* shift by as many digits in the bit count */
465	if (b >= (int)DIGIT_BIT) {
466	mp_rshd (c, b / DIGIT_BIT);
467	}
468
469	/* shift any bit count < DIGIT_BIT */
470	D = (b % DIGIT_BIT);
471	if (D != 0) {
472	mp_rshb(c, D);
473	}
474	mp_clamp (c);
475	if (d != NULL) {
476	mp_exch (&t, d);
477	}
478	mp_clear (&t);
479	return MP_OKAY;
480	}
481
482
483	/* set to zero */
484	void mp_zero (mp_int * a)
485	{
486	int n;
487	mp_digit *tmp;
488
489	if (a == NULL)
490	return;
491
492	a->sign = MP_ZPOS;
493	a->used = 0;
494
495	tmp = a->dp;
496	for (n = 0; n < a->alloc; n++) {
497	*tmp++ = 0;
498	}
499	}
500
501
502	/* trim unused digits
503	*
504	* This is used to ensure that leading zero digits are
505	* trimmed and the leading "used" digit will be non-zero
506	* Typically very fast. Also fixes the sign if there
507	* are no more leading digits
508	*/
509	void mp_clamp (mp_int * a)
510	{
511	/* decrease used while the most significant digit is
512	* zero.
513	*/
514	while (a->used > 0 && a->dp[a->used - 1] == 0) {
515	--(a->used);
516	}
517
518	/* reset the sign flag if used == 0 */
519	if (a->used == 0) {
520	a->sign = MP_ZPOS;
521	}
522	}
523
524
525	/* swap the elements of two integers, for cases where you can't simply swap the
526	* mp_int pointers around
527	*/
528	void mp_exch (mp_int * a, mp_int * b)
529	{
530	mp_int t;
531
532	t = *a;
533	a = b;
534	*b = t;
535	}
536
537
538	/* shift right a certain number of bits */
539	void mp_rshb (mp_int *c, int x)
540	{
541	mp_digit *tmpc, mask, shift;
542	mp_digit r, rr;
543	mp_digit D = x;
544
545	/* mask */
546	mask = (((mp_digit)1) << D) - 1;
547
548	/* shift for lsb */
549	shift = DIGIT_BIT - D;
550
551	/* alias */
552	tmpc = c->dp + (c->used - 1);
553
554	/* carry */
555	r = 0;
556	for (x = c->used - 1; x >= 0; x--) {
557	/* get the lower bits of this word in a temp */
558	rr = *tmpc & mask;
559
560	/* shift the current word and mix in the carry bits from previous word */
561	tmpc = (tmpc >> D) \| (r << shift);
562	--tmpc;
563
564	/* set the carry to the carry bits of the current word found above */
565	r = rr;
566	}
567	mp_clamp(c);
568	}
569
570
571	/* shift right a certain amount of digits */
572	void mp_rshd (mp_int * a, int b)
573	{
574	int x;
575
576	/* if b <= 0 then ignore it */
577	if (b <= 0) {
578	return;
579	}
580
581	/* if b > used then simply zero it and return */
582	if (a->used <= b) {
583	mp_zero (a);
584	return;
585	}
586
587	{
588	mp_digit bottom, top;
589
590	/* shift the digits down */
591
592	/* bottom */
593	bottom = a->dp;
594
595	/* top [offset into digits] */
596	top = a->dp + b;
597
598	/* this is implemented as a sliding window where
599	* the window is b-digits long and digits from
600	* the top of the window are copied to the bottom
601	*
602	* e.g.
603
604	b-2 \| b-1 \| b0 \| b1 \| b2 \| ... \| bb \| ---->
605	/\ \| ---->
606	\-------------------/ ---->
607	*/
608	for (x = 0; x < (a->used - b); x++) {
609	bottom++ = top++;
610	}
611
612	/* zero the top digits */
613	for (; x < a->used; x++) {
614	*bottom++ = 0;
615	}
616	}
617
618	/* remove excess digits */
619	a->used -= b;
620	}
621
622
623	/* calc a value mod 2*b /
624	int mp_mod_2d (mp_int * a, int b, mp_int * c)
625	{
626	int x, res;
627
628	/* if b is <= 0 then zero the int */
629	if (b <= 0) {
630	mp_zero (c);
631	return MP_OKAY;
632	}
633
634	/* if the modulus is larger than the value than return */
635	if (b >= (int) (a->used * DIGIT_BIT)) {
636	res = mp_copy (a, c);
637	return res;
638	}
639
640	/* copy */
641	if ((res = mp_copy (a, c)) != MP_OKAY) {
642	return res;
643	}
644
645	/* zero digits above the last digit of the modulus */
646	for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) {
647	c->dp[x] = 0;
648	}
649	/* clear the digit that is not completely outside/inside the modulus */
650	c->dp[b / DIGIT_BIT] &= (mp_digit) ((((mp_digit) 1) <<
651	(((mp_digit) b) % DIGIT_BIT)) - ((mp_digit) 1));
652	mp_clamp (c);
653	return MP_OKAY;
654	}
655
656
657	/* reads a unsigned char array, assumes the msb is stored first [big endian] */
658	int mp_read_unsigned_bin (mp_int * a, const unsigned char *b, int c)
659	{
660	int res;
661
662	/* make sure there are at least two digits */
663	if (a->alloc < 2) {
664	if ((res = mp_grow(a, 2)) != MP_OKAY) {
665	return res;
666	}
667	}
668
669	/* zero the int */
670	mp_zero (a);
671
672	/* read the bytes in */
673	while (c-- > 0) {
674	if ((res = mp_mul_2d (a, 8, a)) != MP_OKAY) {
675	return res;
676	}
677
678	#ifndef MP_8BIT
679	a->dp[0] \|= *b++;
680	a->used += 1;
681	#else
682	a->dp[0] = (*b & MP_MASK);
683	a->dp[1] \|= ((*b++ >> 7U) & 1);
684	a->used += 2;
685	#endif
686	}
687	mp_clamp (a);
688	return MP_OKAY;
689	}
690
691
692	/* shift left by a certain bit count */
693	int mp_mul_2d (mp_int * a, int b, mp_int * c)
694	{
695	mp_digit d;
696	int res;
697
698	/* copy */
699	if (a != c) {
700	if ((res = mp_copy (a, c)) != MP_OKAY) {
701	return res;
702	}
703	}
704
705	if (c->alloc < (int)(c->used + b/DIGIT_BIT + 1)) {
706	if ((res = mp_grow (c, c->used + b / DIGIT_BIT + 1)) != MP_OKAY) {
707	return res;
708	}
709	}
710
711	/* shift by as many digits in the bit count */
712	if (b >= (int)DIGIT_BIT) {
713	if ((res = mp_lshd (c, b / DIGIT_BIT)) != MP_OKAY) {
714	return res;
715	}
716	}
717
718	/* shift any bit count < DIGIT_BIT */
719	d = (mp_digit) (b % DIGIT_BIT);
720	if (d != 0) {
721	mp_digit *tmpc, shift, mask, r, rr;
722	int x;
723
724	/* bitmask for carries */
725	mask = (((mp_digit)1) << d) - 1;
726
727	/* shift for msbs */
728	shift = DIGIT_BIT - d;
729
730	/* alias */
731	tmpc = c->dp;
732
733	/* carry */
734	r = 0;
735	for (x = 0; x < c->used; x++) {
736	/* get the higher bits of the current word */
737	rr = (*tmpc >> shift) & mask;
738
739	/* shift the current word and OR in the carry */
740	tmpc = (mp_digit)(((tmpc << d) \| r) & MP_MASK);
741	++tmpc;
742
743	/* set the carry to the carry bits of the current word */
744	r = rr;
745	}
746
747	/* set final carry */
748	if (r != 0) {
749	c->dp[(c->used)++] = r;
750	}
751	}
752	mp_clamp (c);
753	return MP_OKAY;
754	}
755
756
757	/* shift left a certain amount of digits */
758	int mp_lshd (mp_int * a, int b)
759	{
760	int x, res;
761
762	/* if its less than zero return */
763	if (b <= 0) {
764	return MP_OKAY;
765	}
766
767	/* grow to fit the new digits */
768	if (a->alloc < a->used + b) {
769	if ((res = mp_grow (a, a->used + b)) != MP_OKAY) {
770	return res;
771	}
772	}
773
774	{
775	mp_digit top, bottom;
776
777	/* increment the used by the shift amount then copy upwards */
778	a->used += b;
779
780	/* top */
781	top = a->dp + a->used - 1;
782
783	/* base */
784	bottom = a->dp + a->used - 1 - b;
785
786	/* much like mp_rshd this is implemented using a sliding window
787	* except the window goes the other way around. Copying from
788	* the bottom to the top. see bn_mp_rshd.c for more info.
789	*/
790	for (x = a->used - 1; x >= b; x--) {
791	top-- = bottom--;
792	}
793
794	/* zero the lower digits */
795	top = a->dp;
796	for (x = 0; x < b; x++) {
797	*top++ = 0;
798	}
799	}
800	return MP_OKAY;
801	}
802
803
804	/* this is a shell function that calls either the normal or Montgomery
805	* exptmod functions. Originally the call to the montgomery code was
806	* embedded in the normal function but that wasted a lot of stack space
807	* for nothing (since 99% of the time the Montgomery code would be called)
808	*/
809	#if defined(FREESCALE_LTC_TFM)
810	int wolfcrypt_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
811	#else
812	int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
813	#endif
814	{
815	int dr;
816
817	/* modulus P must be positive */
818	if (P->sign == MP_NEG) {
819	return MP_VAL;
820	}
821
822	/* if exponent X is negative we have to recurse */
823	if (X->sign == MP_NEG) {
824	#ifdef BN_MP_INVMOD_C
825	mp_int tmpG, tmpX;
826	int err;
827
828	/* first compute 1/G mod P */
829	if ((err = mp_init(&tmpG)) != MP_OKAY) {
830	return err;
831	}
832	if ((err = mp_invmod(G, P, &tmpG)) != MP_OKAY) {
833	mp_clear(&tmpG);
834	return err;
835	}
836
837	/* now get \|X\| */
838	if ((err = mp_init(&tmpX)) != MP_OKAY) {
839	mp_clear(&tmpG);
840	return err;
841	}
842	if ((err = mp_abs(X, &tmpX)) != MP_OKAY) {
843	mp_clear(&tmpG);
844	mp_clear(&tmpX);
845	return err;
846	}
847
848	/* and now compute (1/G)\|X\| instead of GX [X < 0] */
849	err = mp_exptmod(&tmpG, &tmpX, P, Y);
850	mp_clear(&tmpG);
851	mp_clear(&tmpX);
852	return err;
853	#else
854	/* no invmod */
855	return MP_VAL;
856	#endif
857	}
858
859	/* modified diminished radix reduction */
860	#if defined(BN_MP_REDUCE_IS_2K_L_C) && defined(BN_MP_REDUCE_2K_L_C) && \
861	defined(BN_S_MP_EXPTMOD_C)
862	if (mp_reduce_is_2k_l(P) == MP_YES) {
863	return s_mp_exptmod(G, X, P, Y, 1);
864	}
865	#endif
866
867	#ifdef BN_MP_DR_IS_MODULUS_C
868	/* is it a DR modulus? */
869	dr = mp_dr_is_modulus(P);
870	#else
871	/* default to no */
872	dr = 0;
873	#endif
874
875	#ifdef BN_MP_REDUCE_IS_2K_C
876	/* if not, is it a unrestricted DR modulus? */
877	if (dr == 0) {
878	dr = mp_reduce_is_2k(P) << 1;
879	}
880	#endif
881
882	/* if the modulus is odd or dr != 0 use the montgomery method */
883	#ifdef BN_MP_EXPTMOD_FAST_C
884	if (mp_isodd (P) == MP_YES \|\| dr != 0) {
885	return mp_exptmod_fast (G, X, P, Y, dr);
886	} else {
887	#endif
888	#ifdef BN_S_MP_EXPTMOD_C
889	/* otherwise use the generic Barrett reduction technique */
890	return s_mp_exptmod (G, X, P, Y, 0);
891	#else
892	/* no exptmod for evens */
893	return MP_VAL;
894	#endif
895	#ifdef BN_MP_EXPTMOD_FAST_C
896	}
897	#endif
898	}
899
900
901	/* b = \|a\|
902	*
903	* Simple function copies the input and fixes the sign to positive
904	*/
905	int mp_abs (mp_int * a, mp_int * b)
906	{
907	int res;
908
909	/* copy a to b */
910	if (a != b) {
911	if ((res = mp_copy (a, b)) != MP_OKAY) {
912	return res;
913	}
914	}
915
916	/* force the sign of b to positive */
917	b->sign = MP_ZPOS;
918
919	return MP_OKAY;
920	}
921
922
923	/* hac 14.61, pp608 */
924	#if defined(FREESCALE_LTC_TFM)
925	int wolfcrypt_mp_invmod(mp_int * a, mp_int * b, mp_int * c)
926	#else
927	int mp_invmod (mp_int * a, mp_int * b, mp_int * c)
928	#endif
929	{
930	/* b cannot be negative */
931	if (b->sign == MP_NEG \|\| mp_iszero(b) == MP_YES) {
932	return MP_VAL;
933	}
934
935	#ifdef BN_FAST_MP_INVMOD_C
936	/* if the modulus is odd we can use a faster routine instead */
937	if ((mp_isodd(b) == MP_YES) && (mp_cmp_d(b, 1) != MP_EQ)) {
938	return fast_mp_invmod (a, b, c);
939	}
940	#endif
941
942	#ifdef BN_MP_INVMOD_SLOW_C
943	return mp_invmod_slow(a, b, c);
944	#else
945	return MP_VAL;
946	#endif
947	}
948
949
950	/* computes the modular inverse via binary extended euclidean algorithm,
951	* that is c = 1/a mod b
952	*
953	* Based on slow invmod except this is optimized for the case where b is
954	* odd as per HAC Note 14.64 on pp. 610
955	*/
956	int fast_mp_invmod (mp_int * a, mp_int * b, mp_int * c)
957	{
958	mp_int x, y, u, v, B, D;
959	int res, neg, loop_check = 0;
960
961	/* 2. [modified] b must be odd */
962	if (mp_iseven (b) == MP_YES) {
963	return MP_VAL;
964	}
965
966	/* init all our temps */
967	if ((res = mp_init_multi(&x, &y, &u, &v, &B, &D)) != MP_OKAY) {
968	return res;
969	}
970
971	/* x == modulus, y == value to invert */
972	if ((res = mp_copy (b, &x)) != MP_OKAY) {
973	goto LBL_ERR;
974	}
975
976	/* we need y = \|a\| */
977	if ((res = mp_mod (a, b, &y)) != MP_OKAY) {
978	goto LBL_ERR;
979	}
980
981	/* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
982	if ((res = mp_copy (&x, &u)) != MP_OKAY) {
983	goto LBL_ERR;
984	}
985	if ((res = mp_copy (&y, &v)) != MP_OKAY) {
986	goto LBL_ERR;
987	}
988	if ((res = mp_set (&D, 1)) != MP_OKAY) {
989	goto LBL_ERR;
990	}
991
992	top:
993	/* 4. while u is even do */
994	while (mp_iseven (&u) == MP_YES) {
995	/* 4.1 u = u/2 */
996	if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
997	goto LBL_ERR;
998	}
999	/* 4.2 if B is odd then */
1000	if (mp_isodd (&B) == MP_YES) {
1001	if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
1002	goto LBL_ERR;
1003	}
1004	}
1005	/* B = B/2 */
1006	if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
1007	goto LBL_ERR;
1008	}
1009	}
1010
1011	/* 5. while v is even do */
1012	while (mp_iseven (&v) == MP_YES) {
1013	/* 5.1 v = v/2 */
1014	if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
1015	goto LBL_ERR;
1016	}
1017	/* 5.2 if D is odd then */
1018	if (mp_isodd (&D) == MP_YES) {
1019	/* D = (D-x)/2 */
1020	if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
1021	goto LBL_ERR;
1022	}
1023	}
1024	/* D = D/2 */
1025	if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
1026	goto LBL_ERR;
1027	}
1028	}
1029
1030	/* 6. if u >= v then */
1031	if (mp_cmp (&u, &v) != MP_LT) {
1032	/* u = u - v, B = B - D */
1033	if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
1034	goto LBL_ERR;
1035	}
1036
1037	if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
1038	goto LBL_ERR;
1039	}
1040	} else {
1041	/* v - v - u, D = D - B */
1042	if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
1043	goto LBL_ERR;
1044	}
1045
1046	if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
1047	goto LBL_ERR;
1048	}
1049	}
1050
1051	/* if not zero goto step 4 */
1052	if (mp_iszero (&u) == MP_NO) {
1053	if (++loop_check > MAX_INVMOD_SZ) {
1054	res = MP_VAL;
1055	goto LBL_ERR;
1056	}
1057	goto top;
1058	}
1059
1060	/* now a = C, b = D, gcd == gv /
1061
1062	/* if v != 1 then there is no inverse */
1063	if (mp_cmp_d (&v, 1) != MP_EQ) {
1064	res = MP_VAL;
1065	goto LBL_ERR;
1066	}
1067
1068	/* b is now the inverse */
1069	neg = a->sign;
1070	while (D.sign == MP_NEG) {
1071	if ((res = mp_add (&D, b, &D)) != MP_OKAY) {
1072	goto LBL_ERR;
1073	}
1074	}
1075	/* too big */
1076	while (mp_cmp_mag(&D, b) != MP_LT) {
1077	if ((res = mp_sub(&D, b, &D)) != MP_OKAY) {
1078	goto LBL_ERR;
1079	}
1080	}
1081	mp_exch (&D, c);
1082	c->sign = neg;
1083	res = MP_OKAY;
1084
1085	LBL_ERR:mp_clear(&x);
1086	mp_clear(&y);
1087	mp_clear(&u);
1088	mp_clear(&v);
1089	mp_clear(&B);
1090	mp_clear(&D);
1091	return res;
1092	}
1093
1094
1095	/* hac 14.61, pp608 */
1096	int mp_invmod_slow (mp_int * a, mp_int * b, mp_int * c)
1097	{
1098	mp_int x, y, u, v, A, B, C, D;
1099	int res;
1100
1101	/* b cannot be negative */
1102	if (b->sign == MP_NEG \|\| mp_iszero(b) == MP_YES) {
1103	return MP_VAL;
1104	}
1105
1106	/* init temps */
1107	if ((res = mp_init_multi(&x, &y, &u, &v,
1108	&A, &B)) != MP_OKAY) {
1109	return res;
1110	}
1111
1112	/* init rest of tmps temps */
1113	if ((res = mp_init_multi(&C, &D, 0, 0, 0, 0)) != MP_OKAY) {
1114	mp_clear(&x);
1115	mp_clear(&y);
1116	mp_clear(&u);
1117	mp_clear(&v);
1118	mp_clear(&A);
1119	mp_clear(&B);
1120	return res;
1121	}
1122
1123	/* x = a, y = b */
1124	if ((res = mp_mod(a, b, &x)) != MP_OKAY) {
1125	goto LBL_ERR;
1126	}
1127	if ((res = mp_copy (b, &y)) != MP_OKAY) {
1128	goto LBL_ERR;
1129	}
1130
1131	/* 2. [modified] if x,y are both even then return an error! */
1132	if (mp_iseven (&x) == MP_YES && mp_iseven (&y) == MP_YES) {
1133	res = MP_VAL;
1134	goto LBL_ERR;
1135	}
1136
1137	/* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
1138	if ((res = mp_copy (&x, &u)) != MP_OKAY) {
1139	goto LBL_ERR;
1140	}
1141	if ((res = mp_copy (&y, &v)) != MP_OKAY) {
1142	goto LBL_ERR;
1143	}
1144	if ((res = mp_set (&A, 1)) != MP_OKAY) {
1145	goto LBL_ERR;
1146	}
1147	if ((res = mp_set (&D, 1)) != MP_OKAY) {
1148	goto LBL_ERR;
1149	}
1150
1151	top:
1152	/* 4. while u is even do */
1153	while (mp_iseven (&u) == MP_YES) {
1154	/* 4.1 u = u/2 */
1155	if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
1156	goto LBL_ERR;
1157	}
1158	/* 4.2 if A or B is odd then */
1159	if (mp_isodd (&A) == MP_YES \|\| mp_isodd (&B) == MP_YES) {
1160	/* A = (A+y)/2, B = (B-x)/2 */
1161	if ((res = mp_add (&A, &y, &A)) != MP_OKAY) {
1162	goto LBL_ERR;
1163	}
1164	if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
1165	goto LBL_ERR;
1166	}
1167	}
1168	/* A = A/2, B = B/2 */
1169	if ((res = mp_div_2 (&A, &A)) != MP_OKAY) {
1170	goto LBL_ERR;
1171	}
1172	if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
1173	goto LBL_ERR;
1174	}
1175	}
1176
1177	/* 5. while v is even do */
1178	while (mp_iseven (&v) == MP_YES) {
1179	/* 5.1 v = v/2 */
1180	if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
1181	goto LBL_ERR;
1182	}
1183	/* 5.2 if C or D is odd then */
1184	if (mp_isodd (&C) == MP_YES \|\| mp_isodd (&D) == MP_YES) {
1185	/* C = (C+y)/2, D = (D-x)/2 */
1186	if ((res = mp_add (&C, &y, &C)) != MP_OKAY) {
1187	goto LBL_ERR;
1188	}
1189	if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
1190	goto LBL_ERR;
1191	}
1192	}
1193	/* C = C/2, D = D/2 */
1194	if ((res = mp_div_2 (&C, &C)) != MP_OKAY) {
1195	goto LBL_ERR;
1196	}
1197	if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
1198	goto LBL_ERR;
1199	}
1200	}
1201
1202	/* 6. if u >= v then */
1203	if (mp_cmp (&u, &v) != MP_LT) {
1204	/* u = u - v, A = A - C, B = B - D */
1205	if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
1206	goto LBL_ERR;
1207	}
1208
1209	if ((res = mp_sub (&A, &C, &A)) != MP_OKAY) {
1210	goto LBL_ERR;
1211	}
1212
1213	if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
1214	goto LBL_ERR;
1215	}
1216	} else {
1217	/* v - v - u, C = C - A, D = D - B */
1218	if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
1219	goto LBL_ERR;
1220	}
1221
1222	if ((res = mp_sub (&C, &A, &C)) != MP_OKAY) {
1223	goto LBL_ERR;
1224	}
1225
1226	if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
1227	goto LBL_ERR;
1228	}
1229	}
1230
1231	/* if not zero goto step 4 */
1232	if (mp_iszero (&u) == MP_NO)
1233	goto top;
1234
1235	/* now a = C, b = D, gcd == gv /
1236
1237	/* if v != 1 then there is no inverse */
1238	if (mp_cmp_d (&v, 1) != MP_EQ) {
1239	res = MP_VAL;
1240	goto LBL_ERR;
1241	}
1242
1243	/* if its too low */
1244	while (mp_cmp_d(&C, 0) == MP_LT) {
1245	if ((res = mp_add(&C, b, &C)) != MP_OKAY) {
1246	goto LBL_ERR;
1247	}
1248	}
1249
1250	/* too big */
1251	while (mp_cmp_mag(&C, b) != MP_LT) {
1252	if ((res = mp_sub(&C, b, &C)) != MP_OKAY) {
1253	goto LBL_ERR;
1254	}
1255	}
1256
1257	/* C is now the inverse */
1258	mp_exch (&C, c);
1259	res = MP_OKAY;
1260	LBL_ERR:mp_clear(&x);
1261	mp_clear(&y);
1262	mp_clear(&u);
1263	mp_clear(&v);
1264	mp_clear(&A);
1265	mp_clear(&B);
1266	mp_clear(&C);
1267	mp_clear(&D);
1268	return res;
1269	}
1270
1271
1272	/* compare magnitude of two ints (unsigned) */
1273	int mp_cmp_mag (mp_int * a, mp_int * b)
1274	{
1275	int n;
1276	mp_digit tmpa, tmpb;
1277
1278	/* compare based on # of non-zero digits */
1279	if (a->used > b->used) {
1280	return MP_GT;
1281	}
1282
1283	if (a->used < b->used) {
1284	return MP_LT;
1285	}
1286
1287	/* alias for a */
1288	tmpa = a->dp + (a->used - 1);
1289
1290	/* alias for b */
1291	tmpb = b->dp + (a->used - 1);
1292
1293	/* compare based on digits */
1294	for (n = 0; n < a->used; ++n, --tmpa, --tmpb) {
1295	if (tmpa > tmpb) {
1296	return MP_GT;
1297	}
1298
1299	if (tmpa < tmpb) {
1300	return MP_LT;
1301	}
1302	}
1303	return MP_EQ;
1304	}
1305
1306
1307	/* compare two ints (signed)*/
1308	int mp_cmp (mp_int * a, mp_int * b)
1309	{
1310	/* compare based on sign */
1311	if (a->sign != b->sign) {
1312	if (a->sign == MP_NEG) {
1313	return MP_LT;
1314	} else {
1315	return MP_GT;
1316	}
1317	}
1318
1319	/* compare digits */
1320	if (a->sign == MP_NEG) {
1321	/* if negative compare opposite direction */
1322	return mp_cmp_mag(b, a);
1323	} else {
1324	return mp_cmp_mag(a, b);
1325	}
1326	}
1327
1328
1329	/* compare a digit */
1330	int mp_cmp_d(mp_int * a, mp_digit b)
1331	{
1332	/* special case for zero*/
1333	if (a->used == 0 && b == 0)
1334	return MP_EQ;
1335
1336	/* compare based on sign */
1337	if ((b && a->used == 0) \|\| a->sign == MP_NEG) {
1338	return MP_LT;
1339	}
1340
1341	/* compare based on magnitude */
1342	if (a->used > 1) {
1343	return MP_GT;
1344	}
1345
1346	/* compare the only digit of a to b */
1347	if (a->dp[0] > b) {
1348	return MP_GT;
1349	} else if (a->dp[0] < b) {
1350	return MP_LT;
1351	} else {
1352	return MP_EQ;
1353	}
1354	}
1355
1356
1357	/* set to a digit */
1358	int mp_set (mp_int * a, mp_digit b)
1359	{
1360	int res;
1361	mp_zero (a);
1362	res = mp_grow (a, 1);
1363	if (res == MP_OKAY) {
1364	a->dp[0] = (mp_digit)(b & MP_MASK);
1365	a->used = (a->dp[0] != 0) ? 1 : 0;
1366	}
1367	return res;
1368	}
1369
1370	/* chek if a bit is set */
1371	int mp_is_bit_set (mp_int *a, mp_digit b)
1372	{
1373	if ((mp_digit)a->used < b/DIGIT_BIT)
1374	return 0;
1375
1376	return (int)((a->dp[b/DIGIT_BIT] >> b%DIGIT_BIT) & (mp_digit)1);
1377	}
1378
1379	/* c = a mod b, 0 <= c < b */
1380	#if defined(FREESCALE_LTC_TFM)
1381	int wolfcrypt_mp_mod(mp_int * a, mp_int * b, mp_int * c)
1382	#else
1383	int mp_mod (mp_int * a, mp_int * b, mp_int * c)
1384	#endif
1385	{
1386	mp_int t;
1387	int res;
1388
1389	if ((res = mp_init_size (&t, b->used)) != MP_OKAY) {
1390	return res;
1391	}
1392
1393	if ((res = mp_div (a, b, NULL, &t)) != MP_OKAY) {
1394	mp_clear (&t);
1395	return res;
1396	}
1397
1398	if ((mp_iszero(&t) != MP_NO) \|\| (t.sign == b->sign)) {
1399	res = MP_OKAY;
1400	mp_exch (&t, c);
1401	} else {
1402	res = mp_add (b, &t, c);
1403	}
1404
1405	mp_clear (&t);
1406	return res;
1407	}
1408
1409
1410	/* slower bit-bang division... also smaller */
1411	int mp_div(mp_int * a, mp_int * b, mp_int * c, mp_int * d)
1412	{
1413	mp_int ta, tb, tq, q;
1414	int res, n, n2;
1415
1416	/* is divisor zero ? */
1417	if (mp_iszero (b) == MP_YES) {
1418	return MP_VAL;
1419	}
1420
1421	/* if a < b then q=0, r = a */
1422	if (mp_cmp_mag (a, b) == MP_LT) {
1423	if (d != NULL) {
1424	res = mp_copy (a, d);
1425	} else {
1426	res = MP_OKAY;
1427	}
1428	if (c != NULL) {
1429	mp_zero (c);
1430	}
1431	return res;
1432	}
1433
1434	/* init our temps */
1435	if ((res = mp_init_multi(&ta, &tb, &tq, &q, 0, 0)) != MP_OKAY) {
1436	return res;
1437	}
1438
1439	if ((res = mp_set(&tq, 1)) != MP_OKAY) {
1440	return res;
1441	}
1442	n = mp_count_bits(a) - mp_count_bits(b);
1443	if (((res = mp_abs(a, &ta)) != MP_OKAY) \|\|
1444	((res = mp_abs(b, &tb)) != MP_OKAY) \|\|
1445	((res = mp_mul_2d(&tb, n, &tb)) != MP_OKAY) \|\|
1446	((res = mp_mul_2d(&tq, n, &tq)) != MP_OKAY)) {
1447	goto LBL_ERR;
1448	}
1449
1450	while (n-- >= 0) {
1451	if (mp_cmp(&tb, &ta) != MP_GT) {
1452	if (((res = mp_sub(&ta, &tb, &ta)) != MP_OKAY) \|\|
1453	((res = mp_add(&q, &tq, &q)) != MP_OKAY)) {
1454	goto LBL_ERR;
1455	}
1456	}
1457	if (((res = mp_div_2d(&tb, 1, &tb, NULL)) != MP_OKAY) \|\|
1458	((res = mp_div_2d(&tq, 1, &tq, NULL)) != MP_OKAY)) {
1459	goto LBL_ERR;
1460	}
1461	}
1462
1463	/* now q == quotient and ta == remainder */
1464	n = a->sign;
1465	n2 = (a->sign == b->sign ? MP_ZPOS : MP_NEG);
1466	if (c != NULL) {
1467	mp_exch(c, &q);
1468	c->sign = (mp_iszero(c) == MP_YES) ? MP_ZPOS : n2;
1469	}
1470	if (d != NULL) {
1471	mp_exch(d, &ta);
1472	d->sign = (mp_iszero(d) == MP_YES) ? MP_ZPOS : n;
1473	}
1474	LBL_ERR:
1475	mp_clear(&ta);
1476	mp_clear(&tb);
1477	mp_clear(&tq);
1478	mp_clear(&q);
1479	return res;
1480	}
1481
1482
1483	/* b = a/2 */
1484	int mp_div_2(mp_int * a, mp_int * b)
1485	{
1486	int x, res, oldused;
1487
1488	/* copy */
1489	if (b->alloc < a->used) {
1490	if ((res = mp_grow (b, a->used)) != MP_OKAY) {
1491	return res;
1492	}
1493	}
1494
1495	oldused = b->used;
1496	b->used = a->used;
1497	{
1498	mp_digit r, rr, tmpa, tmpb;
1499
1500	/* source alias */
1501	tmpa = a->dp + b->used - 1;
1502
1503	/* dest alias */
1504	tmpb = b->dp + b->used - 1;
1505
1506	/* carry */
1507	r = 0;
1508	for (x = b->used - 1; x >= 0; x--) {
1509	/* get the carry for the next iteration */
1510	rr = *tmpa & 1;
1511
1512	/* shift the current digit, add in carry and store */
1513	tmpb-- = (tmpa-- >> 1) \| (r << (DIGIT_BIT - 1));
1514
1515	/* forward carry to next iteration */
1516	r = rr;
1517	}
1518
1519	/* zero excess digits */
1520	tmpb = b->dp + b->used;
1521	for (x = b->used; x < oldused; x++) {
1522	*tmpb++ = 0;
1523	}
1524	}
1525	b->sign = a->sign;
1526	mp_clamp (b);
1527	return MP_OKAY;
1528	}
1529
1530
1531	/* high level addition (handles signs) */
1532	int mp_add (mp_int * a, mp_int * b, mp_int * c)
1533	{
1534	int sa, sb, res;
1535
1536	/* get sign of both inputs */
1537	sa = a->sign;
1538	sb = b->sign;
1539
1540	/* handle two cases, not four */
1541	if (sa == sb) {
1542	/* both positive or both negative */
1543	/* add their magnitudes, copy the sign */
1544	c->sign = sa;
1545	res = s_mp_add (a, b, c);
1546	} else {
1547	/* one positive, the other negative */
1548	/* subtract the one with the greater magnitude from */
1549	/* the one of the lesser magnitude. The result gets */
1550	/* the sign of the one with the greater magnitude. */
1551	if (mp_cmp_mag (a, b) == MP_LT) {
1552	c->sign = sb;
1553	res = s_mp_sub (b, a, c);
1554	} else {
1555	c->sign = sa;
1556	res = s_mp_sub (a, b, c);
1557	}
1558	}
1559	return res;
1560	}
1561
1562
1563	/* low level addition, based on HAC pp.594, Algorithm 14.7 */
1564	int s_mp_add (mp_int * a, mp_int * b, mp_int * c)
1565	{
1566	mp_int *x;
1567	int olduse, res, min_ab, max_ab;
1568
1569	/* find sizes, we let \|a\| <= \|b\| which means we have to sort
1570	* them. "x" will point to the input with the most digits
1571	*/
1572	if (a->used > b->used) {
1573	min_ab = b->used;
1574	max_ab = a->used;
1575	x = a;
1576	} else {
1577	min_ab = a->used;
1578	max_ab = b->used;
1579	x = b;
1580	}
1581
1582	/* init result */
1583	if (c->alloc < max_ab + 1) {
1584	if ((res = mp_grow (c, max_ab + 1)) != MP_OKAY) {
1585	return res;
1586	}
1587	}
1588
1589	/* get old used digit count and set new one */
1590	olduse = c->used;
1591	c->used = max_ab + 1;
1592
1593	{
1594	mp_digit u, tmpa, tmpb, *tmpc;
1595	int i;
1596
1597	/* alias for digit pointers */
1598
1599	/* first input */
1600	tmpa = a->dp;
1601
1602	/* second input */
1603	tmpb = b->dp;
1604
1605	/* destination */
1606	tmpc = c->dp;
1607
1608	/* zero the carry */
1609	u = 0;
1610	for (i = 0; i < min_ab; i++) {
1611	/* Compute the sum at one digit, T[i] = A[i] + B[i] + U */
1612	tmpc = tmpa++ + *tmpb++ + u;
1613
1614	/* U = carry bit of T[i] */
1615	u = *tmpc >> ((mp_digit)DIGIT_BIT);
1616
1617	/* take away carry bit from T[i] */
1618	*tmpc++ &= MP_MASK;
1619	}
1620
1621	/* now copy higher words if any, that is in A+B
1622	* if A or B has more digits add those in
1623	*/
1624	if (min_ab != max_ab) {
1625	for (; i < max_ab; i++) {
1626	/* T[i] = X[i] + U */
1627	*tmpc = x->dp[i] + u;
1628
1629	/* U = carry bit of T[i] */
1630	u = *tmpc >> ((mp_digit)DIGIT_BIT);
1631
1632	/* take away carry bit from T[i] */
1633	*tmpc++ &= MP_MASK;
1634	}
1635	}
1636
1637	/* add carry */
1638	*tmpc++ = u;
1639
1640	/* clear digits above olduse */
1641	for (i = c->used; i < olduse; i++) {
1642	*tmpc++ = 0;
1643	}
1644	}
1645
1646	mp_clamp (c);
1647	return MP_OKAY;
1648	}
1649
1650
1651	/* low level subtraction (assumes \|a\| > \|b\|), HAC pp.595 Algorithm 14.9 */
1652	int s_mp_sub (mp_int * a, mp_int * b, mp_int * c)
1653	{
1654	int olduse, res, min_b, max_a;
1655
1656	/* find sizes */
1657	min_b = b->used;
1658	max_a = a->used;
1659
1660	/* init result */
1661	if (c->alloc < max_a) {
1662	if ((res = mp_grow (c, max_a)) != MP_OKAY) {
1663	return res;
1664	}
1665	}
1666
1667	/* sanity check on destination */
1668	if (c->dp == NULL)
1669	return MP_VAL;
1670
1671	olduse = c->used;
1672	c->used = max_a;
1673
1674	{
1675	mp_digit u, tmpa, tmpb, *tmpc;
1676	int i;
1677
1678	/* alias for digit pointers */
1679	tmpa = a->dp;
1680	tmpb = b->dp;
1681	tmpc = c->dp;
1682
1683	/* set carry to zero */
1684	u = 0;
1685	for (i = 0; i < min_b; i++) {
1686	/* T[i] = A[i] - B[i] - U */
1687	tmpc = tmpa++ - *tmpb++ - u;
1688
1689	/* U = carry bit of T[i]
1690	* Note this saves performing an AND operation since
1691	* if a carry does occur it will propagate all the way to the
1692	* MSB. As a result a single shift is enough to get the carry
1693	*/
1694	u = tmpc >> ((mp_digit)(CHAR_BIT sizeof (mp_digit) - 1));
1695
1696	/* Clear carry from T[i] */
1697	*tmpc++ &= MP_MASK;
1698	}
1699
1700	/* now copy higher words if any, e.g. if A has more digits than B */
1701	for (; i < max_a; i++) {
1702	/* T[i] = A[i] - U */
1703	tmpc = tmpa++ - u;
1704
1705	/* U = carry bit of T[i] */
1706	u = tmpc >> ((mp_digit)(CHAR_BIT sizeof (mp_digit) - 1));
1707
1708	/* Clear carry from T[i] */
1709	*tmpc++ &= MP_MASK;
1710	}
1711
1712	/* clear digits above used (since we may not have grown result above) */
1713	for (i = c->used; i < olduse; i++) {
1714	*tmpc++ = 0;
1715	}
1716	}
1717
1718	mp_clamp (c);
1719	return MP_OKAY;
1720	}
1721
1722
1723	/* high level subtraction (handles signs) */
1724	int mp_sub (mp_int * a, mp_int * b, mp_int * c)
1725	{
1726	int sa, sb, res;
1727
1728	sa = a->sign;
1729	sb = b->sign;
1730
1731	if (sa != sb) {
1732	/* subtract a negative from a positive, OR */
1733	/* subtract a positive from a negative. */
1734	/* In either case, ADD their magnitudes, */
1735	/* and use the sign of the first number. */
1736	c->sign = sa;
1737	res = s_mp_add (a, b, c);
1738	} else {
1739	/* subtract a positive from a positive, OR */
1740	/* subtract a negative from a negative. */
1741	/* First, take the difference between their */
1742	/* magnitudes, then... */
1743	if (mp_cmp_mag (a, b) != MP_LT) {
1744	/* Copy the sign from the first */
1745	c->sign = sa;
1746	/* The first has a larger or equal magnitude */
1747	res = s_mp_sub (a, b, c);
1748	} else {
1749	/* The result has the opposite sign from */
1750	/* the first number. */
1751	c->sign = (sa == MP_ZPOS) ? MP_NEG : MP_ZPOS;
1752	/* The second has a larger magnitude */
1753	res = s_mp_sub (b, a, c);
1754	}
1755	}
1756	return res;
1757	}
1758
1759
1760	/* determines if reduce_2k_l can be used */
1761	int mp_reduce_is_2k_l(mp_int *a)
1762	{
1763	int ix, iy;
1764
1765	if (a->used == 0) {
1766	return MP_NO;
1767	} else if (a->used == 1) {
1768	return MP_YES;
1769	} else if (a->used > 1) {
1770	/* if more than half of the digits are -1 we're sold */
1771	for (iy = ix = 0; ix < a->used; ix++) {
1772	if (a->dp[ix] == MP_MASK) {
1773	++iy;
1774	}
1775	}
1776	return (iy >= (a->used/2)) ? MP_YES : MP_NO;
1777
1778	}
1779	return MP_NO;
1780	}
1781
1782
1783	/* determines if mp_reduce_2k can be used */
1784	int mp_reduce_is_2k(mp_int *a)
1785	{
1786	int ix, iy, iw;
1787	mp_digit iz;
1788
1789	if (a->used == 0) {
1790	return MP_NO;
1791	} else if (a->used == 1) {
1792	return MP_YES;
1793	} else if (a->used > 1) {
1794	iy = mp_count_bits(a);
1795	iz = 1;
1796	iw = 1;
1797
1798	/* Test every bit from the second digit up, must be 1 */
1799	for (ix = DIGIT_BIT; ix < iy; ix++) {
1800	if ((a->dp[iw] & iz) == 0) {
1801	return MP_NO;
1802	}
1803	iz <<= 1;
1804	if (iz > (mp_digit)MP_MASK) {
1805	++iw;
1806	iz = 1;
1807	}
1808	}
1809	}
1810	return MP_YES;
1811	}
1812
1813
1814	/* determines if a number is a valid DR modulus */
1815	int mp_dr_is_modulus(mp_int *a)
1816	{
1817	int ix;
1818
1819	/* must be at least two digits */
1820	if (a->used < 2) {
1821	return 0;
1822	}
1823
1824	/* must be of the form b**k - a [a <= b] so all
1825	* but the first digit must be equal to -1 (mod b).
1826	*/
1827	for (ix = 1; ix < a->used; ix++) {
1828	if (a->dp[ix] != MP_MASK) {
1829	return 0;
1830	}
1831	}
1832	return 1;
1833	}
1834
1835
1836	/* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85
1837	*
1838	* Uses a left-to-right k-ary sliding window to compute the modular
1839	* exponentiation.
1840	* The value of k changes based on the size of the exponent.
1841	*
1842	* Uses Montgomery or Diminished Radix reduction [whichever appropriate]
1843	*/
1844
1845	#ifdef MP_LOW_MEM
1846	#define TAB_SIZE 32
1847	#else
1848	#define TAB_SIZE 256
1849	#endif
1850
1851	int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y,
1852	int redmode)
1853	{
1854	mp_int res;
1855	mp_digit buf, mp;
1856	int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
1857	#ifdef WOLFSSL_SMALL_STACK
1858	mp_int* M = NULL;
1859	#else
1860	mp_int M[TAB_SIZE];
1861	#endif
1862	/* use a pointer to the reduction algorithm. This allows us to use
1863	* one of many reduction algorithms without modding the guts of
1864	* the code with if statements everywhere.
1865	*/
1866	int (redux)(mp_int,mp_int*,mp_digit);
1867
1868	#ifdef WOLFSSL_SMALL_STACK
1869	M = (mp_int) XMALLOC(sizeof(mp_int) TAB_SIZE, NULL,
1870	DYNAMIC_TYPE_TMP_BUFFER);
1871	if (M == NULL)
1872	return MP_MEM;
1873	#endif
1874
1875	/* find window size */
1876	x = mp_count_bits (X);
1877	if (x <= 7) {
1878	winsize = 2;
1879	} else if (x <= 36) {
1880	winsize = 3;
1881	} else if (x <= 140) {
1882	winsize = 4;
1883	} else if (x <= 450) {
1884	winsize = 5;
1885	} else if (x <= 1303) {
1886	winsize = 6;
1887	} else if (x <= 3529) {
1888	winsize = 7;
1889	} else {
1890	winsize = 8;
1891	}
1892
1893	#ifdef MP_LOW_MEM
1894	if (winsize > 5) {
1895	winsize = 5;
1896	}
1897	#endif
1898
1899	/* init M array */
1900	/* init first cell */
1901	if ((err = mp_init_size(&M[1], P->alloc)) != MP_OKAY) {
1902	#ifdef WOLFSSL_SMALL_STACK
1903	XFREE(M, NULL, DYNAMIC_TYPE_TMP_BUFFER);
1904	#endif
1905
1906	return err;
1907	}
1908
1909	/* now init the second half of the array */
1910	for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
1911	if ((err = mp_init_size(&M[x], P->alloc)) != MP_OKAY) {
1912	for (y = 1<<(winsize-1); y < x; y++) {
1913	mp_clear (&M[y]);
1914	}
1915	mp_clear(&M[1]);
1916
1917	#ifdef WOLFSSL_SMALL_STACK
1918	XFREE(M, NULL, DYNAMIC_TYPE_TMP_BUFFER);
1919	#endif
1920
1921	return err;
1922	}
1923	}
1924
1925	/* determine and setup reduction code */
1926	if (redmode == 0) {
1927	#ifdef BN_MP_MONTGOMERY_SETUP_C
1928	/* now setup montgomery */
1929	if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) {
1930	goto LBL_M;
1931	}
1932	#else
1933	err = MP_VAL;
1934	goto LBL_M;
1935	#endif
1936
1937	/* automatically pick the comba one if available (saves quite a few
1938	calls/ifs) */
1939	#ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C
1940	if (((P->used * 2 + 1) < MP_WARRAY) &&
1941	P->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
1942	redux = fast_mp_montgomery_reduce;
1943	} else
1944	#endif
1945	{
1946	#ifdef BN_MP_MONTGOMERY_REDUCE_C
1947	/* use slower baseline Montgomery method */
1948	redux = mp_montgomery_reduce;
1949	#else
1950	err = MP_VAL;
1951	goto LBL_M;
1952	#endif
1953	}
1954	} else if (redmode == 1) {
1955	#if defined(BN_MP_DR_SETUP_C) && defined(BN_MP_DR_REDUCE_C)
1956	/* setup DR reduction for moduli of the form B*k - b /
1957	mp_dr_setup(P, &mp);
1958	redux = mp_dr_reduce;
1959	#else
1960	err = MP_VAL;
1961	goto LBL_M;
1962	#endif
1963	} else {
1964	#if defined(BN_MP_REDUCE_2K_SETUP_C) && defined(BN_MP_REDUCE_2K_C)
1965	/* setup DR reduction for moduli of the form 2*k - b /
1966	if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) {
1967	goto LBL_M;
1968	}
1969	redux = mp_reduce_2k;
1970	#else
1971	err = MP_VAL;
1972	goto LBL_M;
1973	#endif
1974	}
1975
1976	/* setup result */
1977	if ((err = mp_init_size (&res, P->alloc)) != MP_OKAY) {
1978	goto LBL_M;
1979	}
1980
1981	/* create M table
1982	*
1983
1984	*
1985	* The first half of the table is not computed though accept for M[0] and M[1]
1986	*/
1987
1988	if (redmode == 0) {
1989	#ifdef BN_MP_MONTGOMERY_CALC_NORMALIZATION_C
1990	/* now we need R mod m */
1991	if ((err = mp_montgomery_calc_normalization (&res, P)) != MP_OKAY) {
1992	goto LBL_RES;
1993	}
1994
1995	/* now set M[1] to G * R mod m */
1996	if ((err = mp_mulmod (G, &res, P, &M[1])) != MP_OKAY) {
1997	goto LBL_RES;
1998	}
1999	#else
2000	err = MP_VAL;
2001	goto LBL_RES;
2002	#endif
2003	} else {
2004	if ((err = mp_set(&res, 1)) != MP_OKAY) {
2005	goto LBL_RES;
2006	}
2007	if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) {
2008	goto LBL_RES;
2009	}
2010	}
2011
2012	/* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times*/
2013	if ((err = mp_copy (&M[1], &M[(mp_digit)(1 << (winsize - 1))])) != MP_OKAY) {
2014	goto LBL_RES;
2015	}
2016
2017	for (x = 0; x < (winsize - 1); x++) {
2018	if ((err = mp_sqr (&M[(mp_digit)(1 << (winsize - 1))],
2019	&M[(mp_digit)(1 << (winsize - 1))])) != MP_OKAY) {
2020	goto LBL_RES;
2021	}
2022	if ((err = redux (&M[(mp_digit)(1 << (winsize - 1))], P, mp)) != MP_OKAY) {
2023	goto LBL_RES;
2024	}
2025	}
2026
2027	/* create upper table */
2028	for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
2029	if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
2030	goto LBL_RES;
2031	}
2032	if ((err = redux (&M[x], P, mp)) != MP_OKAY) {
2033	goto LBL_RES;
2034	}
2035	}
2036
2037	/* set initial mode and bit cnt */
2038	mode = 0;
2039	bitcnt = 1;
2040	buf = 0;
2041	digidx = X->used - 1;
2042	bitcpy = 0;
2043	bitbuf = 0;
2044
2045	for (;;) {
2046	/* grab next digit as required */
2047	if (--bitcnt == 0) {
2048	/* if digidx == -1 we are out of digits so break */
2049	if (digidx == -1) {
2050	break;
2051	}
2052	/* read next digit and reset bitcnt */
2053	buf = X->dp[digidx--];
2054	bitcnt = (int)DIGIT_BIT;
2055	}
2056
2057	/* grab the next msb from the exponent */
2058	y = (int)(buf >> (DIGIT_BIT - 1)) & 1;
2059	buf <<= (mp_digit)1;
2060
2061	/* if the bit is zero and mode == 0 then we ignore it
2062	* These represent the leading zero bits before the first 1 bit
2063	* in the exponent. Technically this opt is not required but it
2064	* does lower the # of trivial squaring/reductions used
2065	*/
2066	if (mode == 0 && y == 0) {
2067	continue;
2068	}
2069
2070	/* if the bit is zero and mode == 1 then we square */
2071	if (mode == 1 && y == 0) {
2072	if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
2073	goto LBL_RES;
2074	}
2075	if ((err = redux (&res, P, mp)) != MP_OKAY) {
2076	goto LBL_RES;
2077	}
2078	continue;
2079	}
2080
2081	/* else we add it to the window */
2082	bitbuf \|= (y << (winsize - ++bitcpy));
2083	mode = 2;
2084
2085	if (bitcpy == winsize) {
2086	/* ok window is filled so square as required and multiply */
2087	/* square first */
2088	for (x = 0; x < winsize; x++) {
2089	if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
2090	goto LBL_RES;
2091	}
2092	if ((err = redux (&res, P, mp)) != MP_OKAY) {
2093	goto LBL_RES;
2094	}
2095	}
2096
2097	/* then multiply */
2098	if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
2099	goto LBL_RES;
2100	}
2101	if ((err = redux (&res, P, mp)) != MP_OKAY) {
2102	goto LBL_RES;
2103	}
2104
2105	/* empty window and reset */
2106	bitcpy = 0;
2107	bitbuf = 0;
2108	mode = 1;
2109	}
2110	}
2111
2112	/* if bits remain then square/multiply */
2113	if (mode == 2 && bitcpy > 0) {
2114	/* square then multiply if the bit is set */
2115	for (x = 0; x < bitcpy; x++) {
2116	if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
2117	goto LBL_RES;
2118	}
2119	if ((err = redux (&res, P, mp)) != MP_OKAY) {
2120	goto LBL_RES;
2121	}
2122
2123	/* get next bit of the window */
2124	bitbuf <<= 1;
2125	if ((bitbuf & (1 << winsize)) != 0) {
2126	/* then multiply */
2127	if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
2128	goto LBL_RES;
2129	}
2130	if ((err = redux (&res, P, mp)) != MP_OKAY) {
2131	goto LBL_RES;
2132	}
2133	}
2134	}
2135	}
2136
2137	if (redmode == 0) {
2138	/* fixup result if Montgomery reduction is used
2139	* recall that any value in a Montgomery system is
2140	* actually multiplied by R mod n. So we have
2141	* to reduce one more time to cancel out the factor
2142	* of R.
2143	*/
2144	if ((err = redux(&res, P, mp)) != MP_OKAY) {
2145	goto LBL_RES;
2146	}
2147	}
2148
2149	/* swap res with Y */
2150	mp_exch (&res, Y);
2151	err = MP_OKAY;
2152	LBL_RES:mp_clear (&res);
2153	LBL_M:
2154	mp_clear(&M[1]);
2155	for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
2156	mp_clear (&M[x]);
2157	}
2158
2159	#ifdef WOLFSSL_SMALL_STACK
2160	XFREE(M, NULL, DYNAMIC_TYPE_TMP_BUFFER);
2161	#endif
2162
2163	return err;
2164	}
2165
2166
2167	/* setups the montgomery reduction stuff */
2168	int mp_montgomery_setup (mp_int * n, mp_digit * rho)
2169	{
2170	mp_digit x, b;
2171
2172	/* fast inversion mod 2**k
2173	*
2174	* Based on the fact that
2175	*
2176	* XA = 1 (mod 2n) => (X(2-XA)) A = 1 (mod 22n)
2177	* => 2XA - XXA*A = 1
2178	* => 2*(1) - (1) = 1
2179	*/
2180	b = n->dp[0];
2181
2182	if ((b & 1) == 0) {
2183	return MP_VAL;
2184	}
2185
2186	x = (((b + 2) & 4) << 1) + b; /* here xa==1 mod 24 /
2187	x = 2 - b x; /* here xa==1 mod 28 /
2188	#if !defined(MP_8BIT)
2189	x = 2 - b x; /* here xa==1 mod 216 /
2190	#endif
2191	#if defined(MP_64BIT) \|\| !(defined(MP_8BIT) \|\| defined(MP_16BIT))
2192	x = 2 - b x; /* here xa==1 mod 232 /
2193	#endif
2194	#ifdef MP_64BIT
2195	x = 2 - b x; /* here xa==1 mod 264 /
2196	#endif
2197
2198	/* rho = -1/m mod b */
2199	/* TAO, switched mp_word casts to mp_digit to shut up compiler */
2200	*rho = (mp_digit)((((mp_digit)1 << ((mp_digit) DIGIT_BIT)) - x) & MP_MASK);
2201
2202	return MP_OKAY;
2203	}
2204
2205
2206	/* computes xR**-1 == x (mod N) via Montgomery Reduction
2207	*
2208	* This is an optimized implementation of montgomery_reduce
2209	* which uses the comba method to quickly calculate the columns of the
2210	* reduction.
2211	*
2212	* Based on Algorithm 14.32 on pp.601 of HAC.
2213	*/
2214	int fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
2215	{
2216	int ix, res, olduse;
2217	#ifdef WOLFSSL_SMALL_STACK
2218	mp_word* W; /* uses dynamic memory and slower */
2219	#else
2220	mp_word W[MP_WARRAY];
2221	#endif
2222
2223	/* get old used count */
2224	olduse = x->used;
2225
2226	/* grow a as required */
2227	if (x->alloc < n->used + 1) {
2228	if ((res = mp_grow (x, n->used + 1)) != MP_OKAY) {
2229	return res;
2230	}
2231	}
2232
2233	#ifdef WOLFSSL_SMALL_STACK
2234	W = (mp_word)XMALLOC(sizeof(mp_word) MP_WARRAY, NULL, DYNAMIC_TYPE_BIGINT);
2235	if (W == NULL)
2236	return MP_MEM;
2237	#endif
2238
2239	/* first we have to get the digits of the input into
2240	* an array of double precision words W[...]
2241	*/
2242	{
2243	mp_word *_W;
2244	mp_digit *tmpx;
2245
2246	/* alias for the W[] array */
2247	_W = W;
2248
2249	/* alias for the digits of x*/
2250	tmpx = x->dp;
2251
2252	/* copy the digits of a into W[0..a->used-1] */
2253	for (ix = 0; ix < x->used; ix++) {
2254	_W++ = tmpx++;
2255	}
2256
2257	/* zero the high words of W[a->used..m->used2] /
2258	for (; ix < n->used * 2 + 1; ix++) {
2259	*_W++ = 0;
2260	}
2261	}
2262
2263	/* now we proceed to zero successive digits
2264	* from the least significant upwards
2265	*/
2266	for (ix = 0; ix < n->used; ix++) {
2267	/* mu = ai * m' mod b
2268	*
2269	* We avoid a double precision multiplication (which isn't required)
2270	* by casting the value down to a mp_digit. Note this requires
2271	* that W[ix-1] have the carry cleared (see after the inner loop)
2272	*/
2273	mp_digit mu;
2274	mu = (mp_digit) (((W[ix] & MP_MASK) * rho) & MP_MASK);
2275
2276	/* a = a + mu * m * b**i
2277	*
2278	* This is computed in place and on the fly. The multiplication
2279	* by b**i is handled by offseting which columns the results
2280	* are added to.
2281	*
2282	* Note the comba method normally doesn't handle carries in the
2283	* inner loop In this case we fix the carry from the previous
2284	* column since the Montgomery reduction requires digits of the
2285	* result (so far) [see above] to work. This is
2286	* handled by fixing up one carry after the inner loop. The
2287	* carry fixups are done in order so after these loops the
2288	* first m->used words of W[] have the carries fixed
2289	*/
2290	{
2291	int iy;
2292	mp_digit *tmpn;
2293	mp_word *_W;
2294
2295	/* alias for the digits of the modulus */
2296	tmpn = n->dp;
2297
2298	/* Alias for the columns set by an offset of ix */
2299	_W = W + ix;
2300
2301	/* inner loop */
2302	for (iy = 0; iy < n->used; iy++) {
2303	_W++ += ((mp_word)mu) ((mp_word)*tmpn++);
2304	}
2305	}
2306
2307	/* now fix carry for next digit, W[ix+1] */
2308	W[ix + 1] += W[ix] >> ((mp_word) DIGIT_BIT);
2309	}
2310
2311	/* now we have to propagate the carries and
2312	* shift the words downward [all those least
2313	* significant digits we zeroed].
2314	*/
2315	{
2316	mp_digit *tmpx;
2317	mp_word _W, _W1;
2318
2319	/* nox fix rest of carries */
2320
2321	/* alias for current word */
2322	_W1 = W + ix;
2323
2324	/* alias for next word, where the carry goes */
2325	_W = W + ++ix;
2326
2327	for (; ix <= n->used * 2 + 1; ix++) {
2328	_W++ += _W1++ >> ((mp_word) DIGIT_BIT);
2329	}
2330
2331	/* copy out, A = A/b**n
2332	*
2333	* The result is A/b**n but instead of converting from an
2334	* array of mp_word to mp_digit than calling mp_rshd
2335	* we just copy them in the right order
2336	*/
2337
2338	/* alias for destination word */
2339	tmpx = x->dp;
2340
2341	/* alias for shifted double precision result */
2342	_W = W + n->used;
2343
2344	for (ix = 0; ix < n->used + 1; ix++) {
2345	tmpx++ = (mp_digit)(_W++ & ((mp_word) MP_MASK));
2346	}
2347
2348	/* zero olduse digits, if the input a was larger than
2349	* m->used+1 we'll have to clear the digits
2350	*/
2351	for (; ix < olduse; ix++) {
2352	*tmpx++ = 0;
2353	}
2354	}
2355
2356	/* set the max used and clamp */
2357	x->used = n->used + 1;
2358	mp_clamp (x);
2359
2360	#ifdef WOLFSSL_SMALL_STACK
2361	XFREE(W, NULL, DYNAMIC_TYPE_BIGINT);
2362	#endif
2363
2364	/* if A >= m then A = A - m */
2365	if (mp_cmp_mag (x, n) != MP_LT) {
2366	return s_mp_sub (x, n, x);
2367	}
2368	return MP_OKAY;
2369	}
2370
2371
2372	/* computes xR*-1 == x (mod N) via Montgomery Reduction /
2373	int mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
2374	{
2375	int ix, res, digs;
2376	mp_digit mu;
2377
2378	/* can the fast reduction [comba] method be used?
2379	*
2380	* Note that unlike in mul you're safely allowed less
2381	* than the available columns [255 per default] since carries
2382	* are fixed up in the inner loop.
2383	*/
2384	digs = n->used * 2 + 1;
2385	if ((digs < MP_WARRAY) &&
2386	n->used <
2387	(1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
2388	return fast_mp_montgomery_reduce (x, n, rho);
2389	}
2390
2391	/* grow the input as required */
2392	if (x->alloc < digs) {
2393	if ((res = mp_grow (x, digs)) != MP_OKAY) {
2394	return res;
2395	}
2396	}
2397	x->used = digs;
2398
2399	for (ix = 0; ix < n->used; ix++) {
2400	/* mu = ai * rho mod b
2401	*
2402	* The value of rho must be precalculated via
2403	* montgomery_setup() such that
2404	* it equals -1/n0 mod b this allows the
2405	* following inner loop to reduce the
2406	* input one digit at a time
2407	*/
2408	mu = (mp_digit) (((mp_word)x->dp[ix]) * ((mp_word)rho) & MP_MASK);
2409
2410	/* a = a + mu * m * b*i /
2411	{
2412	int iy;
2413	mp_digit tmpn, tmpx, u;
2414	mp_word r;
2415
2416	/* alias for digits of the modulus */
2417	tmpn = n->dp;
2418
2419	/* alias for the digits of x [the input] */
2420	tmpx = x->dp + ix;
2421
2422	/* set the carry to zero */
2423	u = 0;
2424
2425	/* Multiply and add in place */
2426	for (iy = 0; iy < n->used; iy++) {
2427	/* compute product and sum */
2428	r = ((mp_word)mu) * ((mp_word)*tmpn++) +
2429	((mp_word) u) + ((mp_word) * tmpx);
2430
2431	/* get carry */
2432	u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
2433
2434	/* fix digit */
2435	*tmpx++ = (mp_digit)(r & ((mp_word) MP_MASK));
2436	}
2437	/* At this point the ix'th digit of x should be zero */
2438
2439
2440	/* propagate carries upwards as required*/
2441	while (u) {
2442	*tmpx += u;
2443	u = *tmpx >> DIGIT_BIT;
2444	*tmpx++ &= MP_MASK;
2445	}
2446	}
2447	}
2448
2449	/* at this point the n.used'th least
2450	* significant digits of x are all zero
2451	* which means we can shift x to the
2452	* right by n.used digits and the
2453	* residue is unchanged.
2454	*/
2455
2456	/* x = x/b*n.used /
2457	mp_clamp(x);
2458	mp_rshd (x, n->used);
2459
2460	/* if x >= n then x = x - n */
2461	if (mp_cmp_mag (x, n) != MP_LT) {
2462	return s_mp_sub (x, n, x);
2463	}
2464
2465	return MP_OKAY;
2466	}
2467
2468
2469	/* determines the setup value */
2470	void mp_dr_setup(mp_int a, mp_digit d)
2471	{
2472	/* the casts are required if DIGIT_BIT is one less than
2473	* the number of bits in a mp_digit [e.g. DIGIT_BIT==31]
2474	*/
2475	*d = (mp_digit)((((mp_word)1) << ((mp_word)DIGIT_BIT)) -
2476	((mp_word)a->dp[0]));
2477	}
2478
2479
2480	/* reduce "x" in place modulo "n" using the Diminished Radix algorithm.
2481	*
2482	* Based on algorithm from the paper
2483	*
2484	* "Generating Efficient Primes for Discrete Log Cryptosystems"
2485	* Chae Hoon Lim, Pil Joong Lee,
2486	* POSTECH Information Research Laboratories
2487	*
2488	* The modulus must be of a special format [see manual]
2489	*
2490	* Has been modified to use algorithm 7.10 from the LTM book instead
2491	*
2492	* Input x must be in the range 0 <= x <= (n-1)**2
2493	*/
2494	int mp_dr_reduce (mp_int * x, mp_int * n, mp_digit k)
2495	{
2496	int err, i, m;
2497	mp_word r;
2498	mp_digit mu, tmpx1, tmpx2;
2499
2500	/* m = digits in modulus */
2501	m = n->used;
2502
2503	/* ensure that "x" has at least 2m digits */
2504	if (x->alloc < m + m) {
2505	if ((err = mp_grow (x, m + m)) != MP_OKAY) {
2506	return err;
2507	}
2508	}
2509
2510	/* top of loop, this is where the code resumes if
2511	* another reduction pass is required.
2512	*/
2513	top:
2514	/* aliases for digits */
2515	/* alias for lower half of x */
2516	tmpx1 = x->dp;
2517
2518	/* alias for upper half of x, or x/B*m /
2519	tmpx2 = x->dp + m;
2520
2521	/* set carry to zero */
2522	mu = 0;
2523
2524	/* compute (x mod B*m) + k [x/B*m] inline and inplace /
2525	for (i = 0; i < m; i++) {
2526	r = ((mp_word)tmpx2++) ((mp_word)k) + *tmpx1 + mu;
2527	*tmpx1++ = (mp_digit)(r & MP_MASK);
2528	mu = (mp_digit)(r >> ((mp_word)DIGIT_BIT));
2529	}
2530
2531	/* set final carry */
2532	*tmpx1++ = mu;
2533
2534	/* zero words above m */
2535	for (i = m + 1; i < x->used; i++) {
2536	*tmpx1++ = 0;
2537	}
2538
2539	/* clamp, sub and return */
2540	mp_clamp (x);
2541
2542	/* if x >= n then subtract and reduce again
2543	* Each successive "recursion" makes the input smaller and smaller.
2544	*/
2545	if (mp_cmp_mag (x, n) != MP_LT) {
2546	if ((err = s_mp_sub(x, n, x)) != MP_OKAY) {
2547	return err;
2548	}
2549	goto top;
2550	}
2551	return MP_OKAY;
2552	}
2553
2554
2555	/* reduces a modulo n where n is of the form 2*p - d /
2556	int mp_reduce_2k(mp_int a, mp_int n, mp_digit d)
2557	{
2558	mp_int q;
2559	int p, res;
2560
2561	if ((res = mp_init(&q)) != MP_OKAY) {
2562	return res;
2563	}
2564
2565	p = mp_count_bits(n);
2566	top:
2567	/* q = a/2p, a = a mod 2p */
2568	if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
2569	goto ERR;
2570	}
2571
2572	if (d != 1) {
2573	/* q = q * d */
2574	if ((res = mp_mul_d(&q, d, &q)) != MP_OKAY) {
2575	goto ERR;
2576	}
2577	}
2578
2579	/* a = a + q */
2580	if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
2581	goto ERR;
2582	}
2583
2584	if (mp_cmp_mag(a, n) != MP_LT) {
2585	if ((res = s_mp_sub(a, n, a)) != MP_OKAY) {
2586	goto ERR;
2587	}
2588	goto top;
2589	}
2590
2591	ERR:
2592	mp_clear(&q);
2593	return res;
2594	}
2595
2596
2597	/* determines the setup value */
2598	int mp_reduce_2k_setup(mp_int a, mp_digit d)
2599	{
2600	int res, p;
2601	mp_int tmp;
2602
2603	if ((res = mp_init(&tmp)) != MP_OKAY) {
2604	return res;
2605	}
2606
2607	p = mp_count_bits(a);
2608	if ((res = mp_2expt(&tmp, p)) != MP_OKAY) {
2609	mp_clear(&tmp);
2610	return res;
2611	}
2612
2613	if ((res = s_mp_sub(&tmp, a, &tmp)) != MP_OKAY) {
2614	mp_clear(&tmp);
2615	return res;
2616	}
2617
2618	*d = tmp.dp[0];
2619	mp_clear(&tmp);
2620	return MP_OKAY;
2621	}
2622
2623
2624	/* set the b bit of a */
2625	int mp_set_bit (mp_int * a, int b)
2626	{
2627	int i = b / DIGIT_BIT, res;
2628
2629	if (a->used < (int)(i + 1)) {
2630	/* grow a to accommodate the single bit */
2631	if ((res = mp_grow (a, i + 1)) != MP_OKAY) {
2632	return res;
2633	}
2634
2635	/* set the used count of where the bit will go */
2636	a->used = (int)(i + 1);
2637	}
2638
2639	/* put the single bit in its place */
2640	a->dp[i] \|= ((mp_digit)1) << (b % DIGIT_BIT);
2641
2642	return MP_OKAY;
2643	}
2644
2645	/* computes a = 2**b
2646	*
2647	* Simple algorithm which zeros the int, set the required bit
2648	*/
2649	int mp_2expt (mp_int * a, int b)
2650	{
2651	/* zero a as per default */
2652	mp_zero (a);
2653
2654	return mp_set_bit(a, b);
2655	}
2656
2657	/* multiply by a digit */
2658	int mp_mul_d (mp_int * a, mp_digit b, mp_int * c)
2659	{
2660	mp_digit u, tmpa, tmpc;
2661	mp_word r;
2662	int ix, res, olduse;
2663
2664	/* make sure c is big enough to hold ab /
2665	if (c->alloc < a->used + 1) {
2666	if ((res = mp_grow (c, a->used + 1)) != MP_OKAY) {
2667	return res;
2668	}
2669	}
2670
2671	/* get the original destinations used count */
2672	olduse = c->used;
2673
2674	/* set the sign */
2675	c->sign = a->sign;
2676
2677	/* alias for a->dp [source] */
2678	tmpa = a->dp;
2679
2680	/* alias for c->dp [dest] */
2681	tmpc = c->dp;
2682
2683	/* zero carry */
2684	u = 0;
2685
2686	/* compute columns */
2687	for (ix = 0; ix < a->used; ix++) {
2688	/* compute product and carry sum for this term */
2689	r = ((mp_word) u) + ((mp_word)tmpa++) ((mp_word)b);
2690
2691	/* mask off higher bits to get a single digit */
2692	*tmpc++ = (mp_digit) (r & ((mp_word) MP_MASK));
2693
2694	/* send carry into next iteration */
2695	u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
2696	}
2697
2698	/* store final carry [if any] and increment ix offset */
2699	*tmpc++ = u;
2700	++ix;
2701
2702	/* now zero digits above the top */
2703	while (ix++ < olduse) {
2704	*tmpc++ = 0;
2705	}
2706
2707	/* set used count */
2708	c->used = a->used + 1;
2709	mp_clamp(c);
2710
2711	return MP_OKAY;
2712	}
2713
2714
2715	/* d = a * b (mod c) */
2716	#if defined(FREESCALE_LTC_TFM)
2717	int wolfcrypt_mp_mulmod(mp_int a, mp_int b, mp_int c, mp_int d)
2718	#else
2719	int mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
2720	#endif
2721	{
2722	int res;
2723	mp_int t;
2724
2725	if ((res = mp_init_size (&t, c->used)) != MP_OKAY) {
2726	return res;
2727	}
2728
2729	res = mp_mul (a, b, &t);
2730	if (res == MP_OKAY) {
2731	res = mp_mod (&t, c, d);
2732	}
2733
2734	mp_clear (&t);
2735	return res;
2736	}
2737
2738
2739	/* d = a - b (mod c) */
2740	int mp_submod(mp_int* a, mp_int* b, mp_int* c, mp_int* d)
2741	{
2742	int res;
2743	mp_int t;
2744
2745	if ((res = mp_init (&t)) != MP_OKAY) {
2746	return res;
2747	}
2748
2749	res = mp_sub (a, b, &t);
2750	if (res == MP_OKAY) {
2751	res = mp_mod (&t, c, d);
2752	}
2753
2754	mp_clear (&t);
2755
2756	return res;
2757	}
2758
2759	/* d = a + b (mod c) */
2760	int mp_addmod(mp_int* a, mp_int* b, mp_int* c, mp_int* d)
2761	{
2762	int res;
2763	mp_int t;
2764
2765	if ((res = mp_init (&t)) != MP_OKAY) {
2766	return res;
2767	}
2768
2769	res = mp_add (a, b, &t);
2770	if (res == MP_OKAY) {
2771	res = mp_mod (&t, c, d);
2772	}
2773
2774	mp_clear (&t);
2775
2776	return res;
2777	}
2778
2779	/* computes b = aa /
2780	int mp_sqr (mp_int * a, mp_int * b)
2781	{
2782	int res;
2783
2784	{
2785	#ifdef BN_FAST_S_MP_SQR_C
2786	/* can we use the fast comba multiplier? */
2787	if ((a->used * 2 + 1) < MP_WARRAY &&
2788	a->used <
2789	(1 << (sizeof(mp_word) * CHAR_BIT - 2*DIGIT_BIT - 1))) {
2790	res = fast_s_mp_sqr (a, b);
2791	} else
2792	#endif
2793	#ifdef BN_S_MP_SQR_C
2794	res = s_mp_sqr (a, b);
2795	#else
2796	res = MP_VAL;
2797	#endif
2798	}
2799	b->sign = MP_ZPOS;
2800	return res;
2801	}
2802
2803
2804	/* high level multiplication (handles sign) */
2805	#if defined(FREESCALE_LTC_TFM)
2806	int wolfcrypt_mp_mul(mp_int a, mp_int b, mp_int *c)
2807	#else
2808	int mp_mul (mp_int * a, mp_int * b, mp_int * c)
2809	#endif
2810	{
2811	int res, neg;
2812	neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
2813
2814	{
2815	/* can we use the fast multiplier?
2816	*
2817	* The fast multiplier can be used if the output will
2818	* have less than MP_WARRAY digits and the number of
2819	* digits won't affect carry propagation
2820	*/
2821	int digs = a->used + b->used + 1;
2822
2823	#ifdef BN_FAST_S_MP_MUL_DIGS_C
2824	if ((digs < MP_WARRAY) &&
2825	MIN(a->used, b->used) <=
2826	(1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
2827	res = fast_s_mp_mul_digs (a, b, c, digs);
2828	} else
2829	#endif
2830	#ifdef BN_S_MP_MUL_DIGS_C
2831	res = s_mp_mul (a, b, c); /* uses s_mp_mul_digs */
2832	#else
2833	res = MP_VAL;
2834	#endif
2835
2836	}
2837	c->sign = (c->used > 0) ? neg : MP_ZPOS;
2838	return res;
2839	}
2840
2841
2842	/* b = a2 /
2843	int mp_mul_2(mp_int * a, mp_int * b)
2844	{
2845	int x, res, oldused;
2846
2847	/* grow to accommodate result */
2848	if (b->alloc < a->used + 1) {
2849	if ((res = mp_grow (b, a->used + 1)) != MP_OKAY) {
2850	return res;
2851	}
2852	}
2853
2854	oldused = b->used;
2855	b->used = a->used;
2856
2857	{
2858	mp_digit r, rr, tmpa, tmpb;
2859
2860	/* alias for source */
2861	tmpa = a->dp;
2862
2863	/* alias for dest */
2864	tmpb = b->dp;
2865
2866	/* carry */
2867	r = 0;
2868	for (x = 0; x < a->used; x++) {
2869
2870	/* get what will be the next carry bit from the
2871	* MSB of the current digit
2872	*/
2873	rr = *tmpa >> ((mp_digit)(DIGIT_BIT - 1));
2874
2875	/* now shift up this digit, add in the carry [from the previous] */
2876	tmpb++ = (mp_digit)(((tmpa++ << ((mp_digit)1)) \| r) & MP_MASK);
2877
2878	/* copy the carry that would be from the source
2879	* digit into the next iteration
2880	*/
2881	r = rr;
2882	}
2883
2884	/* new leading digit? */
2885	if (r != 0) {
2886	/* add a MSB which is always 1 at this point */
2887	*tmpb = 1;
2888	++(b->used);
2889	}
2890
2891	/* now zero any excess digits on the destination
2892	* that we didn't write to
2893	*/
2894	tmpb = b->dp + b->used;
2895	for (x = b->used; x < oldused; x++) {
2896	*tmpb++ = 0;
2897	}
2898	}
2899	b->sign = a->sign;
2900	return MP_OKAY;
2901	}
2902
2903
2904	/* divide by three (based on routine from MPI and the GMP manual) */
2905	int mp_div_3 (mp_int * a, mp_int c, mp_digit d)
2906	{
2907	mp_int q;
2908	mp_word w, t;
2909	mp_digit b;
2910	int res, ix;
2911
2912	/* b = 2*DIGIT_BIT / 3 /
2913	b = (mp_digit) ( (((mp_word)1) << ((mp_word)DIGIT_BIT)) / ((mp_word)3) );
2914
2915	if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
2916	return res;
2917	}
2918
2919	q.used = a->used;
2920	q.sign = a->sign;
2921	w = 0;
2922	for (ix = a->used - 1; ix >= 0; ix--) {
2923	w = (w << ((mp_word)DIGIT_BIT)) \| ((mp_word)a->dp[ix]);
2924
2925	if (w >= 3) {
2926	/* multiply w by [1/3] */
2927	t = (w * ((mp_word)b)) >> ((mp_word)DIGIT_BIT);
2928
2929	/* now subtract 3 * [w/3] from w, to get the remainder */
2930	w -= t+t+t;
2931
2932	/* fixup the remainder as required since
2933	* the optimization is not exact.
2934	*/
2935	while (w >= 3) {
2936	t += 1;
2937	w -= 3;
2938	}
2939	} else {
2940	t = 0;
2941	}
2942	q.dp[ix] = (mp_digit)t;
2943	}
2944
2945	/* [optional] store the remainder */
2946	if (d != NULL) {
2947	*d = (mp_digit)w;
2948	}
2949
2950	/* [optional] store the quotient */
2951	if (c != NULL) {
2952	mp_clamp(&q);
2953	mp_exch(&q, c);
2954	}
2955	mp_clear(&q);
2956
2957	return res;
2958	}
2959
2960
2961	/* init an mp_init for a given size */
2962	int mp_init_size (mp_int * a, int size)
2963	{
2964	int x;
2965
2966	/* pad size so there are always extra digits */
2967	size += (MP_PREC * 2) - (size % MP_PREC);
2968
2969	/* alloc mem */
2970	a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * size, NULL,
2971	DYNAMIC_TYPE_BIGINT);
2972	if (a->dp == NULL) {
2973	return MP_MEM;
2974	}
2975
2976	/* set the members */
2977	a->used = 0;
2978	a->alloc = size;
2979	a->sign = MP_ZPOS;
2980	#ifdef HAVE_WOLF_BIGINT
2981	wc_bigint_init(&a->raw);
2982	#endif
2983
2984	/* zero the digits */
2985	for (x = 0; x < size; x++) {
2986	a->dp[x] = 0;
2987	}
2988
2989	return MP_OKAY;
2990	}
2991
2992
2993	/* the jist of squaring...
2994	* you do like mult except the offset of the tmpx [one that
2995	* starts closer to zero] can't equal the offset of tmpy.
2996	* So basically you set up iy like before then you min it with
2997	* (ty-tx) so that it never happens. You double all those
2998	* you add in the inner loop
2999
3000	After that loop you do the squares and add them in.
3001	*/
3002
3003	int fast_s_mp_sqr (mp_int * a, mp_int * b)
3004	{
3005	int olduse, res, pa, ix, iz;
3006	#ifdef WOLFSSL_SMALL_STACK
3007	mp_digit* W; /* uses dynamic memory and slower */
3008	#else
3009	mp_digit W[MP_WARRAY];
3010	#endif
3011	mp_digit *tmpx;
3012	mp_word W1;
3013
3014	/* grow the destination as required */
3015	pa = a->used + a->used;
3016	if (b->alloc < pa) {
3017	if ((res = mp_grow (b, pa)) != MP_OKAY) {
3018	return res;
3019	}
3020	}
3021
3022	if (pa > MP_WARRAY)
3023	return MP_RANGE; /* TAO range check */
3024
3025	#ifdef WOLFSSL_SMALL_STACK
3026	W = (mp_digit)XMALLOC(sizeof(mp_digit) MP_WARRAY, NULL, DYNAMIC_TYPE_BIGINT);
3027	if (W == NULL)
3028	return MP_MEM;
3029	#endif
3030
3031	/* number of output digits to produce */
3032	W1 = 0;
3033	for (ix = 0; ix < pa; ix++) {
3034	int tx, ty, iy;
3035	mp_word _W;
3036	mp_digit *tmpy;
3037
3038	/* clear counter */
3039	_W = 0;
3040
3041	/* get offsets into the two bignums */
3042	ty = MIN(a->used-1, ix);
3043	tx = ix - ty;
3044
3045	/* setup temp aliases */
3046	tmpx = a->dp + tx;
3047	tmpy = a->dp + ty;
3048
3049	/* this is the number of times the loop will iterate, essentially
3050	while (tx++ < a->used && ty-- >= 0) { ... }
3051	*/
3052	iy = MIN(a->used-tx, ty+1);
3053
3054	/* now for squaring tx can never equal ty
3055	* we halve the distance since they approach at a rate of 2x
3056	* and we have to round because odd cases need to be executed
3057	*/
3058	iy = MIN(iy, (ty-tx+1)>>1);
3059
3060	/* execute loop */
3061	for (iz = 0; iz < iy; iz++) {
3062	_W += ((mp_word)tmpx++)((mp_word)*tmpy--);
3063	}
3064
3065	/* double the inner product and add carry */
3066	_W = _W + _W + W1;
3067
3068	/* even columns have the square term in them */
3069	if ((ix&1) == 0) {
3070	_W += ((mp_word)a->dp[ix>>1])*((mp_word)a->dp[ix>>1]);
3071	}
3072
3073	/* store it */
3074	W[ix] = (mp_digit)(_W & MP_MASK);
3075
3076	/* make next carry */
3077	W1 = _W >> ((mp_word)DIGIT_BIT);
3078	}
3079
3080	/* setup dest */
3081	olduse = b->used;
3082	b->used = a->used+a->used;
3083
3084	{
3085	mp_digit *tmpb;
3086	tmpb = b->dp;
3087	for (ix = 0; ix < pa; ix++) {
3088	*tmpb++ = (mp_digit)(W[ix] & MP_MASK);
3089	}
3090
3091	/* clear unused digits [that existed in the old copy of c] */
3092	for (; ix < olduse; ix++) {
3093	*tmpb++ = 0;
3094	}
3095	}
3096	mp_clamp (b);
3097
3098	#ifdef WOLFSSL_SMALL_STACK
3099	XFREE(W, NULL, DYNAMIC_TYPE_BIGINT);
3100	#endif
3101
3102	return MP_OKAY;
3103	}
3104
3105
3106	/* Fast (comba) multiplier
3107	*
3108	* This is the fast column-array [comba] multiplier. It is
3109	* designed to compute the columns of the product first
3110	* then handle the carries afterwards. This has the effect
3111	* of making the nested loops that compute the columns very
3112	* simple and schedulable on super-scalar processors.
3113	*
3114	* This has been modified to produce a variable number of
3115	* digits of output so if say only a half-product is required
3116	* you don't have to compute the upper half (a feature
3117	* required for fast Barrett reduction).
3118	*
3119	* Based on Algorithm 14.12 on pp.595 of HAC.
3120	*
3121	*/
3122	int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
3123	{
3124	int olduse, res, pa, ix, iz;
3125	#ifdef WOLFSSL_SMALL_STACK
3126	mp_digit* W; /* uses dynamic memory and slower */
3127	#else
3128	mp_digit W[MP_WARRAY];
3129	#endif
3130	mp_word _W;
3131
3132	/* grow the destination as required */
3133	if (c->alloc < digs) {
3134	if ((res = mp_grow (c, digs)) != MP_OKAY) {
3135	return res;
3136	}
3137	}
3138
3139	/* number of output digits to produce */
3140	pa = MIN(digs, a->used + b->used);
3141	if (pa > MP_WARRAY)
3142	return MP_RANGE; /* TAO range check */
3143
3144	#ifdef WOLFSSL_SMALL_STACK
3145	W = (mp_digit)XMALLOC(sizeof(mp_digit) MP_WARRAY, NULL, DYNAMIC_TYPE_BIGINT);
3146	if (W == NULL)
3147	return MP_MEM;
3148	#endif
3149
3150	/* clear the carry */
3151	_W = 0;
3152	for (ix = 0; ix < pa; ix++) {
3153	int tx, ty;
3154	int iy;
3155	mp_digit tmpx, tmpy;
3156
3157	/* get offsets into the two bignums */
3158	ty = MIN(b->used-1, ix);
3159	tx = ix - ty;
3160
3161	/* setup temp aliases */
3162	tmpx = a->dp + tx;
3163	tmpy = b->dp + ty;
3164
3165	/* this is the number of times the loop will iterate, essentially
3166	while (tx++ < a->used && ty-- >= 0) { ... }
3167	*/
3168	iy = MIN(a->used-tx, ty+1);
3169
3170	/* execute loop */
3171	for (iz = 0; iz < iy; ++iz) {
3172	_W += ((mp_word)tmpx++)((mp_word)*tmpy--);
3173
3174	}
3175
3176	/* store term */
3177	W[ix] = (mp_digit)(((mp_digit)_W) & MP_MASK);
3178
3179	/* make next carry */
3180	_W = _W >> ((mp_word)DIGIT_BIT);
3181	}
3182
3183	/* setup dest */
3184	olduse = c->used;
3185	c->used = pa;
3186
3187	{
3188	mp_digit *tmpc;
3189	tmpc = c->dp;
3190	for (ix = 0; ix < pa; ix++) { /* JRB, +1 could read uninitialized data */
3191	/* now extract the previous digit [below the carry] */
3192	*tmpc++ = W[ix];
3193	}
3194
3195	/* clear unused digits [that existed in the old copy of c] */
3196	for (; ix < olduse; ix++) {
3197	*tmpc++ = 0;
3198	}
3199	}
3200	mp_clamp (c);
3201
3202	#ifdef WOLFSSL_SMALL_STACK
3203	XFREE(W, NULL, DYNAMIC_TYPE_BIGINT);
3204	#endif
3205
3206	return MP_OKAY;
3207	}
3208
3209
3210	/* low level squaring, b = aa, HAC pp.596-597, Algorithm 14.16 /
3211	int s_mp_sqr (mp_int * a, mp_int * b)
3212	{
3213	mp_int t;
3214	int res, ix, iy, pa;
3215	mp_word r;
3216	mp_digit u, tmpx, *tmpt;
3217
3218	pa = a->used;
3219	if ((res = mp_init_size (&t, 2*pa + 1)) != MP_OKAY) {
3220	return res;
3221	}
3222
3223	/* default used is maximum possible size */
3224	t.used = 2*pa + 1;
3225
3226	for (ix = 0; ix < pa; ix++) {
3227	/* first calculate the digit at 2ix /
3228	/* calculate double precision result */
3229	r = ((mp_word) t.dp[2*ix]) +
3230	((mp_word)a->dp[ix])*((mp_word)a->dp[ix]);
3231
3232	/* store lower part in result */
3233	t.dp[ix+ix] = (mp_digit) (r & ((mp_word) MP_MASK));
3234
3235	/* get the carry */
3236	u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
3237
3238	/* left hand side of A[ix] * A[iy] */
3239	tmpx = a->dp[ix];
3240
3241	/* alias for where to store the results */
3242	tmpt = t.dp + (2*ix + 1);
3243
3244	for (iy = ix + 1; iy < pa; iy++) {
3245	/* first calculate the product */
3246	r = ((mp_word)tmpx) * ((mp_word)a->dp[iy]);
3247
3248	/* now calculate the double precision result, note we use
3249	* addition instead of *2 since it's easier to optimize
3250	*/
3251	r = ((mp_word) *tmpt) + r + r + ((mp_word) u);
3252
3253	/* store lower part */
3254	*tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
3255
3256	/* get carry */
3257	u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
3258	}
3259	/* propagate upwards */
3260	while (u != ((mp_digit) 0)) {
3261	r = ((mp_word) *tmpt) + ((mp_word) u);
3262	*tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
3263	u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
3264	}
3265	}
3266
3267	mp_clamp (&t);
3268	mp_exch (&t, b);
3269	mp_clear (&t);
3270	return MP_OKAY;
3271	}
3272
3273
3274	/* multiplies \|a\| * \|b\| and only computes up to digs digits of result
3275	* HAC pp. 595, Algorithm 14.12 Modified so you can control how
3276	* many digits of output are created.
3277	*/
3278	int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
3279	{
3280	mp_int t;
3281	int res, pa, pb, ix, iy;
3282	mp_digit u;
3283	mp_word r;
3284	mp_digit tmpx, tmpt, tmpy;
3285
3286	/* can we use the fast multiplier? */
3287	if (((digs) < MP_WARRAY) &&
3288	MIN (a->used, b->used) <
3289	(1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
3290	return fast_s_mp_mul_digs (a, b, c, digs);
3291	}
3292
3293	if ((res = mp_init_size (&t, digs)) != MP_OKAY) {
3294	return res;
3295	}
3296	t.used = digs;
3297
3298	/* compute the digits of the product directly */
3299	pa = a->used;
3300	for (ix = 0; ix < pa; ix++) {
3301	/* set the carry to zero */
3302	u = 0;
3303
3304	/* limit ourselves to making digs digits of output */
3305	pb = MIN (b->used, digs - ix);
3306
3307	/* setup some aliases */
3308	/* copy of the digit from a used within the nested loop */
3309	tmpx = a->dp[ix];
3310
3311	/* an alias for the destination shifted ix places */
3312	tmpt = t.dp + ix;
3313
3314	/* an alias for the digits of b */
3315	tmpy = b->dp;
3316
3317	/* compute the columns of the output and propagate the carry */
3318	for (iy = 0; iy < pb; iy++) {
3319	/* compute the column as a mp_word */
3320	r = ((mp_word)*tmpt) +
3321	((mp_word)tmpx) * ((mp_word)*tmpy++) +
3322	((mp_word) u);
3323
3324	/* the new column is the lower part of the result */
3325	*tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
3326
3327	/* get the carry word from the result */
3328	u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
3329	}
3330	/* set carry if it is placed below digs */
3331	if (ix + iy < digs) {
3332	*tmpt = u;
3333	}
3334	}
3335
3336	mp_clamp (&t);
3337	mp_exch (&t, c);
3338
3339	mp_clear (&t);
3340	return MP_OKAY;
3341	}
3342
3343
3344	/*
3345	* shifts with subtractions when the result is greater than b.
3346	*
3347	* The method is slightly modified to shift B unconditionally up to just under
3348	* the leading bit of b. This saves a lot of multiple precision shifting.
3349	*/
3350	int mp_montgomery_calc_normalization (mp_int * a, mp_int * b)
3351	{
3352	int x, bits, res;
3353
3354	/* how many bits of last digit does b use */
3355	bits = mp_count_bits (b) % DIGIT_BIT;
3356
3357	if (b->used > 1) {
3358	if ((res = mp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1))
3359	!= MP_OKAY) {
3360	return res;
3361	}
3362	} else {
3363	if ((res = mp_set(a, 1)) != MP_OKAY) {
3364	return res;
3365	}
3366	bits = 1;
3367	}
3368
3369	/* now compute C = A * B mod b */
3370	for (x = bits - 1; x < (int)DIGIT_BIT; x++) {
3371	if ((res = mp_mul_2 (a, a)) != MP_OKAY) {
3372	return res;
3373	}
3374	if (mp_cmp_mag (a, b) != MP_LT) {
3375	if ((res = s_mp_sub (a, b, a)) != MP_OKAY) {
3376	return res;
3377	}
3378	}
3379	}
3380
3381	return MP_OKAY;
3382	}
3383
3384
3385	#ifdef MP_LOW_MEM
3386	#define TAB_SIZE 32
3387	#else
3388	#define TAB_SIZE 256
3389	#endif
3390
3391	int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
3392	{
3393	mp_int M[TAB_SIZE], res, mu;
3394	mp_digit buf;
3395	int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
3396	int (redux)(mp_int,mp_int,mp_int);
3397
3398	/* find window size */
3399	x = mp_count_bits (X);
3400	if (x <= 7) {
3401	winsize = 2;
3402	} else if (x <= 36) {
3403	winsize = 3;
3404	} else if (x <= 140) {
3405	winsize = 4;
3406	} else if (x <= 450) {
3407	winsize = 5;
3408	} else if (x <= 1303) {
3409	winsize = 6;
3410	} else if (x <= 3529) {
3411	winsize = 7;
3412	} else {
3413	winsize = 8;
3414	}
3415
3416	#ifdef MP_LOW_MEM
3417	if (winsize > 5) {
3418	winsize = 5;
3419	}
3420	#endif
3421
3422	/* init M array */
3423	/* init first cell */
3424	if ((err = mp_init(&M[1])) != MP_OKAY) {
3425	return err;
3426	}
3427
3428	/* now init the second half of the array */
3429	for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
3430	if ((err = mp_init(&M[x])) != MP_OKAY) {
3431	for (y = 1<<(winsize-1); y < x; y++) {
3432	mp_clear (&M[y]);
3433	}
3434	mp_clear(&M[1]);
3435	return err;
3436	}
3437	}
3438
3439	/* create mu, used for Barrett reduction */
3440	if ((err = mp_init (&mu)) != MP_OKAY) {
3441	goto LBL_M;
3442	}
3443
3444	if (redmode == 0) {
3445	if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) {
3446	goto LBL_MU;
3447	}
3448	redux = mp_reduce;
3449	} else {
3450	if ((err = mp_reduce_2k_setup_l (P, &mu)) != MP_OKAY) {
3451	goto LBL_MU;
3452	}
3453	redux = mp_reduce_2k_l;
3454	}
3455
3456	/* create M table
3457	*
3458	* The M table contains powers of the base,
3459	* e.g. M[x] = G**x mod P
3460	*
3461	* The first half of the table is not
3462	* computed though accept for M[0] and M[1]
3463	*/
3464	if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) {
3465	goto LBL_MU;
3466	}
3467
3468	/* compute the value at M[1<<(winsize-1)] by squaring
3469	* M[1] (winsize-1) times
3470	*/
3471	if ((err = mp_copy (&M[1], &M[(mp_digit)(1 << (winsize - 1))])) != MP_OKAY) {
3472	goto LBL_MU;
3473	}
3474
3475	for (x = 0; x < (winsize - 1); x++) {
3476	/* square it */
3477	if ((err = mp_sqr (&M[(mp_digit)(1 << (winsize - 1))],
3478	&M[(mp_digit)(1 << (winsize - 1))])) != MP_OKAY) {
3479	goto LBL_MU;
3480	}
3481
3482	/* reduce modulo P */
3483	if ((err = redux (&M[(mp_digit)(1 << (winsize - 1))], P, &mu)) != MP_OKAY) {
3484	goto LBL_MU;
3485	}
3486	}
3487
3488	/* create upper table, that is M[x] = M[x-1] * M[1] (mod P)
3489	* for x = (2(winsize - 1) + 1) to (2winsize - 1)
3490	*/
3491	for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
3492	if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
3493	goto LBL_MU;
3494	}
3495	if ((err = redux (&M[x], P, &mu)) != MP_OKAY) {
3496	goto LBL_MU;
3497	}
3498	}
3499
3500	/* setup result */
3501	if ((err = mp_init (&res)) != MP_OKAY) {
3502	goto LBL_MU;
3503	}
3504	if ((err = mp_set (&res, 1)) != MP_OKAY) {
3505	goto LBL_MU;
3506	}
3507
3508	/* set initial mode and bit cnt */
3509	mode = 0;
3510	bitcnt = 1;
3511	buf = 0;
3512	digidx = X->used - 1;
3513	bitcpy = 0;
3514	bitbuf = 0;
3515
3516	for (;;) {
3517	/* grab next digit as required */
3518	if (--bitcnt == 0) {
3519	/* if digidx == -1 we are out of digits */
3520	if (digidx == -1) {
3521	break;
3522	}
3523	/* read next digit and reset the bitcnt */
3524	buf = X->dp[digidx--];
3525	bitcnt = (int) DIGIT_BIT;
3526	}
3527
3528	/* grab the next msb from the exponent */
3529	y = (int)(buf >> (mp_digit)(DIGIT_BIT - 1)) & 1;
3530	buf <<= (mp_digit)1;
3531
3532	/* if the bit is zero and mode == 0 then we ignore it
3533	* These represent the leading zero bits before the first 1 bit
3534	* in the exponent. Technically this opt is not required but it
3535	* does lower the # of trivial squaring/reductions used
3536	*/
3537	if (mode == 0 && y == 0) {
3538	continue;
3539	}
3540
3541	/* if the bit is zero and mode == 1 then we square */
3542	if (mode == 1 && y == 0) {
3543	if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
3544	goto LBL_RES;
3545	}
3546	if ((err = redux (&res, P, &mu)) != MP_OKAY) {
3547	goto LBL_RES;
3548	}
3549	continue;
3550	}
3551
3552	/* else we add it to the window */
3553	bitbuf \|= (y << (winsize - ++bitcpy));
3554	mode = 2;
3555
3556	if (bitcpy == winsize) {
3557	/* ok window is filled so square as required and multiply */
3558	/* square first */
3559	for (x = 0; x < winsize; x++) {
3560	if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
3561	goto LBL_RES;
3562	}
3563	if ((err = redux (&res, P, &mu)) != MP_OKAY) {
3564	goto LBL_RES;
3565	}
3566	}
3567
3568	/* then multiply */
3569	if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
3570	goto LBL_RES;
3571	}
3572	if ((err = redux (&res, P, &mu)) != MP_OKAY) {
3573	goto LBL_RES;
3574	}
3575
3576	/* empty window and reset */
3577	bitcpy = 0;
3578	bitbuf = 0;
3579	mode = 1;
3580	}
3581	}
3582
3583	/* if bits remain then square/multiply */
3584	if (mode == 2 && bitcpy > 0) {
3585	/* square then multiply if the bit is set */
3586	for (x = 0; x < bitcpy; x++) {
3587	if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
3588	goto LBL_RES;
3589	}
3590	if ((err = redux (&res, P, &mu)) != MP_OKAY) {
3591	goto LBL_RES;
3592	}
3593
3594	bitbuf <<= 1;
3595	if ((bitbuf & (1 << winsize)) != 0) {
3596	/* then multiply */
3597	if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
3598	goto LBL_RES;
3599	}
3600	if ((err = redux (&res, P, &mu)) != MP_OKAY) {
3601	goto LBL_RES;
3602	}
3603	}
3604	}
3605	}
3606
3607	mp_exch (&res, Y);
3608	err = MP_OKAY;
3609	LBL_RES:mp_clear (&res);
3610	LBL_MU:mp_clear (&mu);
3611	LBL_M:
3612	mp_clear(&M[1]);
3613	for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
3614	mp_clear (&M[x]);
3615	}
3616	return err;
3617	}
3618
3619
3620	/* pre-calculate the value required for Barrett reduction
3621	* For a given modulus "b" it calculates the value required in "a"
3622	*/
3623	int mp_reduce_setup (mp_int * a, mp_int * b)
3624	{
3625	int res;
3626
3627	if ((res = mp_2expt (a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) {
3628	return res;
3629	}
3630	return mp_div (a, b, a, NULL);
3631	}
3632
3633
3634	/* reduces x mod m, assumes 0 < x < m**2, mu is
3635	* precomputed via mp_reduce_setup.
3636	* From HAC pp.604 Algorithm 14.42
3637	*/
3638	int mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
3639	{
3640	mp_int q;
3641	int res, um = m->used;
3642
3643	/* q = x */
3644	if ((res = mp_init_copy (&q, x)) != MP_OKAY) {
3645	return res;
3646	}
3647
3648	/* q1 = x / b*(k-1) /
3649	mp_rshd (&q, um - 1);
3650
3651	/* according to HAC this optimization is ok */
3652	if (((mp_word) um) > (((mp_digit)1) << (DIGIT_BIT - 1))) {
3653	if ((res = mp_mul (&q, mu, &q)) != MP_OKAY) {
3654	goto CLEANUP;
3655	}
3656	} else {
3657	#ifdef BN_S_MP_MUL_HIGH_DIGS_C
3658	if ((res = s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
3659	goto CLEANUP;
3660	}
3661	#elif defined(BN_FAST_S_MP_MUL_HIGH_DIGS_C)
3662	if ((res = fast_s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
3663	goto CLEANUP;
3664	}
3665	#else
3666	{
3667	res = MP_VAL;
3668	goto CLEANUP;
3669	}
3670	#endif
3671	}
3672
3673	/* q3 = q2 / b*(k+1) /
3674	mp_rshd (&q, um + 1);
3675
3676	/* x = x mod b*(k+1), quick (no division) /
3677	if ((res = mp_mod_2d (x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) {
3678	goto CLEANUP;
3679	}
3680
3681	/* q = q * m mod b*(k+1), quick (no division) /
3682	if ((res = s_mp_mul_digs (&q, m, &q, um + 1)) != MP_OKAY) {
3683	goto CLEANUP;
3684	}
3685
3686	/* x = x - q */
3687	if ((res = mp_sub (x, &q, x)) != MP_OKAY) {
3688	goto CLEANUP;
3689	}
3690
3691	/* If x < 0, add b*(k+1) to it /
3692	if (mp_cmp_d (x, 0) == MP_LT) {
3693	if ((res = mp_set (&q, 1)) != MP_OKAY)
3694	goto CLEANUP;
3695	if ((res = mp_lshd (&q, um + 1)) != MP_OKAY)
3696	goto CLEANUP;
3697	if ((res = mp_add (x, &q, x)) != MP_OKAY)
3698	goto CLEANUP;
3699	}
3700
3701	/* Back off if it's too big */
3702	while (mp_cmp (x, m) != MP_LT) {
3703	if ((res = s_mp_sub (x, m, x)) != MP_OKAY) {
3704	goto CLEANUP;
3705	}
3706	}
3707
3708	CLEANUP:
3709	mp_clear (&q);
3710
3711	return res;
3712	}
3713
3714
3715	/* reduces a modulo n where n is of the form 2**p - d
3716	This differs from reduce_2k since "d" can be larger
3717	than a single digit.
3718	*/
3719	int mp_reduce_2k_l(mp_int a, mp_int n, mp_int *d)
3720	{
3721	mp_int q;
3722	int p, res;
3723
3724	if ((res = mp_init(&q)) != MP_OKAY) {
3725	return res;
3726	}
3727
3728	p = mp_count_bits(n);
3729	top:
3730	/* q = a/2p, a = a mod 2p */
3731	if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
3732	goto ERR;
3733	}
3734
3735	/* q = q * d */
3736	if ((res = mp_mul(&q, d, &q)) != MP_OKAY) {
3737	goto ERR;
3738	}
3739
3740	/* a = a + q */
3741	if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
3742	goto ERR;
3743	}
3744
3745	if (mp_cmp_mag(a, n) != MP_LT) {
3746	if ((res = s_mp_sub(a, n, a)) != MP_OKAY) {
3747	goto ERR;
3748	}
3749	goto top;
3750	}
3751
3752	ERR:
3753	mp_clear(&q);
3754	return res;
3755	}
3756
3757
3758	/* determines the setup value */
3759	int mp_reduce_2k_setup_l(mp_int a, mp_int d)
3760	{
3761	int res;
3762	mp_int tmp;
3763
3764	if ((res = mp_init(&tmp)) != MP_OKAY) {
3765	return res;
3766	}
3767
3768	if ((res = mp_2expt(&tmp, mp_count_bits(a))) != MP_OKAY) {
3769	goto ERR;
3770	}
3771
3772	if ((res = s_mp_sub(&tmp, a, d)) != MP_OKAY) {
3773	goto ERR;
3774	}
3775
3776	ERR:
3777	mp_clear(&tmp);
3778	return res;
3779	}
3780
3781
3782	/* multiplies \|a\| * \|b\| and does not compute the lower digs digits
3783	* [meant to get the higher part of the product]
3784	*/
3785	int s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
3786	{
3787	mp_int t;
3788	int res, pa, pb, ix, iy;
3789	mp_digit u;
3790	mp_word r;
3791	mp_digit tmpx, tmpt, tmpy;
3792
3793	/* can we use the fast multiplier? */
3794	#ifdef BN_FAST_S_MP_MUL_HIGH_DIGS_C
3795	if (((a->used + b->used + 1) < MP_WARRAY)
3796	&& MIN (a->used, b->used) <
3797	(1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
3798	return fast_s_mp_mul_high_digs (a, b, c, digs);
3799	}
3800	#endif
3801
3802	if ((res = mp_init_size (&t, a->used + b->used + 1)) != MP_OKAY) {
3803	return res;
3804	}
3805	t.used = a->used + b->used + 1;
3806
3807	pa = a->used;
3808	pb = b->used;
3809	for (ix = 0; ix < pa && a->dp; ix++) {
3810	/* clear the carry */
3811	u = 0;
3812
3813	/* left hand side of A[ix] * B[iy] */
3814	tmpx = a->dp[ix];
3815
3816	/* alias to the address of where the digits will be stored */
3817	tmpt = &(t.dp[digs]);
3818
3819	/* alias for where to read the right hand side from */
3820	tmpy = b->dp + (digs - ix);
3821
3822	for (iy = digs - ix; iy < pb; iy++) {
3823	/* calculate the double precision result */
3824	r = ((mp_word)*tmpt) +
3825	((mp_word)tmpx) * ((mp_word)*tmpy++) +
3826	((mp_word) u);
3827
3828	/* get the lower part */
3829	*tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
3830
3831	/* carry the carry */
3832	u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
3833	}
3834	*tmpt = u;
3835	}
3836	mp_clamp (&t);
3837	mp_exch (&t, c);
3838	mp_clear (&t);
3839	return MP_OKAY;
3840	}
3841
3842
3843	/* this is a modified version of fast_s_mul_digs that only produces
3844	* output digits above digs. See the comments for fast_s_mul_digs
3845	* to see how it works.
3846	*
3847	* This is used in the Barrett reduction since for one of the multiplications
3848	* only the higher digits were needed. This essentially halves the work.
3849	*
3850	* Based on Algorithm 14.12 on pp.595 of HAC.
3851	*/
3852	int fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
3853	{
3854	int olduse, res, pa, ix, iz;
3855	#ifdef WOLFSSL_SMALL_STACK
3856	mp_digit* W; /* uses dynamic memory and slower */
3857	#else
3858	mp_digit W[MP_WARRAY];
3859	#endif
3860	mp_word _W;
3861
3862	if (a->dp == NULL) { /* JRB, avoid reading uninitialized values */
3863	return MP_VAL;
3864	}
3865
3866	/* grow the destination as required */
3867	pa = a->used + b->used;
3868	if (c->alloc < pa) {
3869	if ((res = mp_grow (c, pa)) != MP_OKAY) {
3870	return res;
3871	}
3872	}
3873
3874	if (pa > MP_WARRAY)
3875	return MP_RANGE; /* TAO range check */
3876
3877	#ifdef WOLFSSL_SMALL_STACK
3878	W = (mp_digit)XMALLOC(sizeof(mp_digit) MP_WARRAY, NULL, DYNAMIC_TYPE_BIGINT);
3879	if (W == NULL)
3880	return MP_MEM;
3881	#endif
3882
3883	/* number of output digits to produce */
3884	pa = a->used + b->used;
3885	_W = 0;
3886	for (ix = digs; ix < pa; ix++) { /* JRB, have a->dp check at top of function*/
3887	int tx, ty, iy;
3888	mp_digit tmpx, tmpy;
3889
3890	/* get offsets into the two bignums */
3891	ty = MIN(b->used-1, ix);
3892	tx = ix - ty;
3893
3894	/* setup temp aliases */
3895	tmpx = a->dp + tx;
3896	tmpy = b->dp + ty;
3897
3898	/* this is the number of times the loop will iterate, essentially its
3899	while (tx++ < a->used && ty-- >= 0) { ... }
3900	*/
3901	iy = MIN(a->used-tx, ty+1);
3902
3903	/* execute loop */
3904	for (iz = 0; iz < iy; iz++) {
3905	_W += ((mp_word)tmpx++)((mp_word)*tmpy--);
3906	}
3907
3908	/* store term */
3909	W[ix] = (mp_digit)(((mp_digit)_W) & MP_MASK);
3910
3911	/* make next carry */
3912	_W = _W >> ((mp_word)DIGIT_BIT);
3913	}
3914
3915	/* setup dest */
3916	olduse = c->used;
3917	c->used = pa;
3918
3919	{
3920	mp_digit *tmpc;
3921
3922	tmpc = c->dp + digs;
3923	for (ix = digs; ix < pa; ix++) { /* TAO, <= could potentially overwrite */
3924	/* now extract the previous digit [below the carry] */
3925	*tmpc++ = W[ix];
3926	}
3927
3928	/* clear unused digits [that existed in the old copy of c] */
3929	for (; ix < olduse; ix++) {
3930	*tmpc++ = 0;
3931	}
3932	}
3933	mp_clamp (c);
3934
3935	#ifdef WOLFSSL_SMALL_STACK
3936	XFREE(W, NULL, DYNAMIC_TYPE_BIGINT);
3937	#endif
3938
3939	return MP_OKAY;
3940	}
3941
3942
3943	#ifndef MP_SET_CHUNK_BITS
3944	#define MP_SET_CHUNK_BITS 4
3945	#endif
3946	int mp_set_int (mp_int * a, unsigned long b)
3947	{
3948	int x, res;
3949
3950	/* use direct mp_set if b is less than mp_digit max */
3951	if (b < MP_DIGIT_MAX) {
3952	return mp_set (a, (mp_digit)b);
3953	}
3954
3955	mp_zero (a);
3956
3957	/* set chunk bits at a time */
3958	for (x = 0; x < (int)(sizeof(b) * 8) / MP_SET_CHUNK_BITS; x++) {
3959	/* shift the number up chunk bits */
3960	if ((res = mp_mul_2d (a, MP_SET_CHUNK_BITS, a)) != MP_OKAY) {
3961	return res;
3962	}
3963
3964	/* OR in the top bits of the source */
3965	a->dp[0] \|= (b >> ((sizeof(b) * 8) - MP_SET_CHUNK_BITS)) &
3966	((1 << MP_SET_CHUNK_BITS) - 1);
3967
3968	/* shift the source up to the next chunk bits */
3969	b <<= MP_SET_CHUNK_BITS;
3970
3971	/* ensure that digits are not clamped off */
3972	a->used += 1;
3973	}
3974	mp_clamp (a);
3975	return MP_OKAY;
3976	}
3977
3978
3979	#if defined(WOLFSSL_KEY_GEN) \|\| defined(HAVE_ECC)
3980
3981	/* c = a * a (mod b) */
3982	int mp_sqrmod (mp_int * a, mp_int * b, mp_int * c)
3983	{
3984	int res;
3985	mp_int t;
3986
3987	if ((res = mp_init (&t)) != MP_OKAY) {
3988	return res;
3989	}
3990
3991	if ((res = mp_sqr (a, &t)) != MP_OKAY) {
3992	mp_clear (&t);
3993	return res;
3994	}
3995	res = mp_mod (&t, b, c);
3996	mp_clear (&t);
3997	return res;
3998	}
3999
4000	#endif
4001
4002
4003	#if defined(HAVE_ECC) \|\| !defined(NO_PWDBASED) \|\| defined(WOLFSSL_SNIFFER) \|\| \
4004	defined(WOLFSSL_HAVE_WOLFSCEP) \|\| defined(WOLFSSL_KEY_GEN) \|\| \
4005	defined(OPENSSL_EXTRA) \|\| defined(WC_RSA_BLINDING)
4006
4007	/* single digit addition */
4008	int mp_add_d (mp_int* a, mp_digit b, mp_int* c)
4009	{
4010	int res, ix, oldused;
4011	mp_digit tmpa, tmpc, mu;
4012
4013	/* grow c as required */
4014	if (c->alloc < a->used + 1) {
4015	if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
4016	return res;
4017	}
4018	}
4019
4020	/* if a is negative and \|a\| >= b, call c = \|a\| - b */
4021	if (a->sign == MP_NEG && (a->used > 1 \|\| a->dp[0] >= b)) {
4022	/* temporarily fix sign of a */
4023	a->sign = MP_ZPOS;
4024
4025	/* c = \|a\| - b */
4026	res = mp_sub_d(a, b, c);
4027
4028	/* fix sign */
4029	a->sign = c->sign = MP_NEG;
4030
4031	/* clamp */
4032	mp_clamp(c);
4033
4034	return res;
4035	}
4036
4037	/* old number of used digits in c */
4038	oldused = c->used;
4039
4040	/* sign always positive */
4041	c->sign = MP_ZPOS;
4042
4043	/* source alias */
4044	tmpa = a->dp;
4045
4046	/* destination alias */
4047	tmpc = c->dp;
4048
4049	/* if a is positive */
4050	if (a->sign == MP_ZPOS) {
4051	/* add digit, after this we're propagating
4052	* the carry.
4053	*/
4054	tmpc = tmpa++ + b;
4055	mu = *tmpc >> DIGIT_BIT;
4056	*tmpc++ &= MP_MASK;
4057
4058	/* now handle rest of the digits */
4059	for (ix = 1; ix < a->used; ix++) {
4060	tmpc = tmpa++ + mu;
4061	mu = *tmpc >> DIGIT_BIT;
4062	*tmpc++ &= MP_MASK;
4063	}
4064	/* set final carry */
4065	if (ix < c->alloc) {
4066	ix++;
4067	*tmpc++ = mu;
4068	}
4069
4070	/* setup size */
4071	c->used = a->used + 1;
4072	} else {
4073	/* a was negative and \|a\| < b */
4074	c->used = 1;
4075
4076	/* the result is a single digit */
4077	if (a->used == 1) {
4078	*tmpc++ = b - a->dp[0];
4079	} else {
4080	*tmpc++ = b;
4081	}
4082
4083	/* setup count so the clearing of oldused
4084	* can fall through correctly
4085	*/
4086	ix = 1;
4087	}
4088
4089	/* now zero to oldused */
4090	while (ix++ < oldused) {
4091	*tmpc++ = 0;
4092	}
4093	mp_clamp(c);
4094
4095	return MP_OKAY;
4096	}
4097
4098
4099	/* single digit subtraction */
4100	int mp_sub_d (mp_int * a, mp_digit b, mp_int * c)
4101	{
4102	mp_digit tmpa, tmpc, mu;
4103	int res, ix, oldused;
4104
4105	/* grow c as required */
4106	if (c->alloc < a->used + 1) {
4107	if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
4108	return res;
4109	}
4110	}
4111
4112	/* if a is negative just do an unsigned
4113	* addition [with fudged signs]
4114	*/
4115	if (a->sign == MP_NEG) {
4116	a->sign = MP_ZPOS;
4117	res = mp_add_d(a, b, c);
4118	a->sign = c->sign = MP_NEG;
4119
4120	/* clamp */
4121	mp_clamp(c);
4122
4123	return res;
4124	}
4125
4126	/* setup regs */
4127	oldused = c->used;
4128	tmpa = a->dp;
4129	tmpc = c->dp;
4130
4131	/* if a <= b simply fix the single digit */
4132	if ((a->used == 1 && a->dp[0] <= b) \|\| a->used == 0) {
4133	if (a->used == 1) {
4134	tmpc++ = b - tmpa;
4135	} else {
4136	*tmpc++ = b;
4137	}
4138	ix = 1;
4139
4140	/* negative/1digit */
4141	c->sign = MP_NEG;
4142	c->used = 1;
4143	} else {
4144	/* positive/size */
4145	c->sign = MP_ZPOS;
4146	c->used = a->used;
4147
4148	/* subtract first digit */
4149	tmpc = tmpa++ - b;
4150	mu = tmpc >> (sizeof(mp_digit) CHAR_BIT - 1);
4151	*tmpc++ &= MP_MASK;
4152
4153	/* handle rest of the digits */
4154	for (ix = 1; ix < a->used; ix++) {
4155	tmpc = tmpa++ - mu;
4156	mu = tmpc >> (sizeof(mp_digit) CHAR_BIT - 1);
4157	*tmpc++ &= MP_MASK;
4158	}
4159	}
4160
4161	/* zero excess digits */
4162	while (ix++ < oldused) {
4163	*tmpc++ = 0;
4164	}
4165	mp_clamp(c);
4166	return MP_OKAY;
4167	}
4168
4169	#endif /* defined(HAVE_ECC) \|\| !defined(NO_PWDBASED) */
4170
4171
4172	#if defined(WOLFSSL_KEY_GEN) \|\| defined(HAVE_COMP_KEY) \|\| defined(HAVE_ECC)
4173
4174	static const int lnz[16] = {
4175	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
4176	};
4177
4178	/* Counts the number of lsbs which are zero before the first zero bit */
4179	int mp_cnt_lsb(mp_int *a)
4180	{
4181	int x;
4182	mp_digit q = 0, qq;
4183
4184	/* easy out */
4185	if (mp_iszero(a) == MP_YES) {
4186	return 0;
4187	}
4188
4189	/* scan lower digits until non-zero */
4190	for (x = 0; x < a->used && a->dp[x] == 0; x++) {}
4191	if (a->dp)
4192	q = a->dp[x];
4193	x *= DIGIT_BIT;
4194
4195	/* now scan this digit until a 1 is found */
4196	if ((q & 1) == 0) {
4197	do {
4198	qq = q & 15;
4199	x += lnz[qq];
4200	q >>= 4;
4201	} while (qq == 0);
4202	}
4203	return x;
4204	}
4205
4206
4207
4208
4209	static int s_is_power_of_two(mp_digit b, int *p)
4210	{
4211	int x;
4212
4213	/* fast return if no power of two */
4214	if ((b==0) \|\| (b & (b-1))) {
4215	return 0;
4216	}
4217
4218	for (x = 0; x < DIGIT_BIT; x++) {
4219	if (b == (((mp_digit)1)<<x)) {
4220	*p = x;
4221	return 1;
4222	}
4223	}
4224	return 0;
4225	}
4226
4227	/* single digit division (based on routine from MPI) */
4228	static int mp_div_d (mp_int * a, mp_digit b, mp_int * c, mp_digit * d)
4229	{
4230	mp_int q;
4231	mp_word w;
4232	mp_digit t;
4233	int res = MP_OKAY, ix;
4234
4235	/* cannot divide by zero */
4236	if (b == 0) {
4237	return MP_VAL;
4238	}
4239
4240	/* quick outs */
4241	if (b == 1 \|\| mp_iszero(a) == MP_YES) {
4242	if (d != NULL) {
4243	*d = 0;
4244	}
4245	if (c != NULL) {
4246	return mp_copy(a, c);
4247	}
4248	return MP_OKAY;
4249	}
4250
4251	/* power of two ? */
4252	if (s_is_power_of_two(b, &ix) == 1) {
4253	if (d != NULL) {
4254	*d = a->dp[0] & ((((mp_digit)1)<<ix) - 1);
4255	}
4256	if (c != NULL) {
4257	return mp_div_2d(a, ix, c, NULL);
4258	}
4259	return MP_OKAY;
4260	}
4261
4262	#ifdef BN_MP_DIV_3_C
4263	/* three? */
4264	if (b == 3) {
4265	return mp_div_3(a, c, d);
4266	}
4267	#endif
4268
4269	/* no easy answer [c'est la vie]. Just division */
4270	if (c != NULL) {
4271	if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
4272	return res;
4273	}
4274
4275	q.used = a->used;
4276	q.sign = a->sign;
4277	}
4278	else {
4279	if ((res = mp_init(&q)) != MP_OKAY) {
4280	return res;
4281	}
4282	}
4283
4284
4285	w = 0;
4286	for (ix = a->used - 1; ix >= 0; ix--) {
4287	w = (w << ((mp_word)DIGIT_BIT)) \| ((mp_word)a->dp[ix]);
4288
4289	if (w >= b) {
4290	t = (mp_digit)(w / b);
4291	w -= ((mp_word)t) * ((mp_word)b);
4292	} else {
4293	t = 0;
4294	}
4295	if (c != NULL)
4296	q.dp[ix] = (mp_digit)t;
4297	}
4298
4299	if (d != NULL) {
4300	*d = (mp_digit)w;
4301	}
4302
4303	if (c != NULL) {
4304	mp_clamp(&q);
4305	mp_exch(&q, c);
4306	}
4307	mp_clear(&q);
4308
4309	return res;
4310	}
4311
4312
4313	int mp_mod_d (mp_int * a, mp_digit b, mp_digit * c)
4314	{
4315	return mp_div_d(a, b, NULL, c);
4316	}
4317
4318	#endif /* defined(WOLFSSL_KEY_GEN)\|\|defined(HAVE_COMP_KEY)\|\|defined(HAVE_ECC) */
4319
4320	#ifdef WOLFSSL_KEY_GEN
4321
4322	const mp_digit ltm_prime_tab[PRIME_SIZE] = {
4323	0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
4324	0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
4325	0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
4326	0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F,
4327	#ifndef MP_8BIT
4328	0x0083,
4329	0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
4330	0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
4331	0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
4332	0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
4333
4334	0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
4335	0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
4336	0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
4337	0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
4338	0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
4339	0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
4340	0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
4341	0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
4342
4343	0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
4344	0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
4345	0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
4346	0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
4347	0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
4348	0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
4349	0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
4350	0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
4351
4352	0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
4353	0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
4354	0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
4355	0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
4356	0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
4357	0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
4358	0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
4359	0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
4360	#endif
4361	};
4362
4363
4364	/* Miller-Rabin test of "a" to the base of "b" as described in
4365	* HAC pp. 139 Algorithm 4.24
4366	*
4367	* Sets result to 0 if definitely composite or 1 if probably prime.
4368	* Randomly the chance of error is no more than 1/4 and often
4369	* very much lower.
4370	*/
4371	static int mp_prime_miller_rabin (mp_int * a, mp_int * b, int *result)
4372	{
4373	mp_int n1, y, r;
4374	int s, j, err;
4375
4376	/* default */
4377	*result = MP_NO;
4378
4379	/* ensure b > 1 */
4380	if (mp_cmp_d(b, 1) != MP_GT) {
4381	return MP_VAL;
4382	}
4383
4384	/* get n1 = a - 1 */
4385	if ((err = mp_init_copy (&n1, a)) != MP_OKAY) {
4386	return err;
4387	}
4388	if ((err = mp_sub_d (&n1, 1, &n1)) != MP_OKAY) {
4389	goto LBL_N1;
4390	}
4391
4392	/* set 2*s r = n1 */
4393	if ((err = mp_init_copy (&r, &n1)) != MP_OKAY) {
4394	goto LBL_N1;
4395	}
4396
4397	/* count the number of least significant bits
4398	* which are zero
4399	*/
4400	s = mp_cnt_lsb(&r);
4401
4402	/* now divide n - 1 by 2*s /
4403	if ((err = mp_div_2d (&r, s, &r, NULL)) != MP_OKAY) {
4404	goto LBL_R;
4405	}
4406
4407	/* compute y = b*r mod a /
4408	if ((err = mp_init (&y)) != MP_OKAY) {
4409	goto LBL_R;
4410	}
4411	if ((err = mp_exptmod (b, &r, a, &y)) != MP_OKAY) {
4412	goto LBL_Y;
4413	}
4414
4415	/* if y != 1 and y != n1 do */
4416	if (mp_cmp_d (&y, 1) != MP_EQ && mp_cmp (&y, &n1) != MP_EQ) {
4417	j = 1;
4418	/* while j <= s-1 and y != n1 */
4419	while ((j <= (s - 1)) && mp_cmp (&y, &n1) != MP_EQ) {
4420	if ((err = mp_sqrmod (&y, a, &y)) != MP_OKAY) {
4421	goto LBL_Y;
4422	}
4423
4424	/* if y == 1 then composite */
4425	if (mp_cmp_d (&y, 1) == MP_EQ) {
4426	goto LBL_Y;
4427	}
4428
4429	++j;
4430	}
4431
4432	/* if y != n1 then composite */
4433	if (mp_cmp (&y, &n1) != MP_EQ) {
4434	goto LBL_Y;
4435	}
4436	}
4437
4438	/* probably prime now */
4439	*result = MP_YES;
4440	LBL_Y:mp_clear (&y);
4441	LBL_R:mp_clear (&r);
4442	LBL_N1:mp_clear (&n1);
4443	return err;
4444	}
4445
4446
4447	/* determines if an integers is divisible by one
4448	* of the first PRIME_SIZE primes or not
4449	*
4450	* sets result to 0 if not, 1 if yes
4451	*/
4452	static int mp_prime_is_divisible (mp_int * a, int *result)
4453	{
4454	int err, ix;
4455	mp_digit res;
4456
4457	/* default to not */
4458	*result = MP_NO;
4459
4460	for (ix = 0; ix < PRIME_SIZE; ix++) {
4461	/* what is a mod LBL_prime_tab[ix] */
4462	if ((err = mp_mod_d (a, ltm_prime_tab[ix], &res)) != MP_OKAY) {
4463	return err;
4464	}
4465
4466	/* is the residue zero? */
4467	if (res == 0) {
4468	*result = MP_YES;
4469	return MP_OKAY;
4470	}
4471	}
4472
4473	return MP_OKAY;
4474	}
4475
4476	static const int USE_BBS = 1;
4477
4478	int mp_rand_prime(mp_int* N, int len, WC_RNG* rng, void* heap)
4479	{
4480	int err, res, type;
4481	byte* buf;
4482
4483	if (N == NULL \|\| rng == NULL)
4484	return MP_VAL;
4485
4486	/* get type */
4487	if (len < 0) {
4488	type = USE_BBS;
4489	len = -len;
4490	} else {
4491	type = 0;
4492	}
4493
4494	/* allow sizes between 2 and 512 bytes for a prime size */
4495	if (len < 2 \|\| len > 512) {
4496	return MP_VAL;
4497	}
4498
4499	/* allocate buffer to work with */
4500	buf = (byte*)XMALLOC(len, heap, DYNAMIC_TYPE_RSA);
4501	if (buf == NULL) {
4502	return MP_MEM;
4503	}
4504	XMEMSET(buf, 0, len);
4505
4506	do {
4507	#ifdef SHOW_GEN
4508	printf(".");
4509	fflush(stdout);
4510	#endif
4511	/* generate value */
4512	err = wc_RNG_GenerateBlock(rng, buf, len);
4513	if (err != 0) {
4514	XFREE(buf, heap, DYNAMIC_TYPE_RSA);
4515	return err;
4516	}
4517
4518	/* munge bits */
4519	buf[0] \|= 0x80 \| 0x40;
4520	buf[len-1] \|= 0x01 \| ((type & USE_BBS) ? 0x02 : 0x00);
4521
4522	/* load value */
4523	if ((err = mp_read_unsigned_bin(N, buf, len)) != MP_OKAY) {
4524	XFREE(buf, heap, DYNAMIC_TYPE_RSA);
4525	return err;
4526	}
4527
4528	/* test */
4529	if ((err = mp_prime_is_prime(N, 8, &res)) != MP_OKAY) {
4530	XFREE(buf, heap, DYNAMIC_TYPE_RSA);
4531	return err;
4532	}
4533	} while (res == MP_NO);
4534
4535	XMEMSET(buf, 0, len);
4536	XFREE(buf, heap, DYNAMIC_TYPE_RSA);
4537
4538	return MP_OKAY;
4539	}
4540
4541	/*
4542	* Sets result to 1 if probably prime, 0 otherwise
4543	*/
4544	int mp_prime_is_prime (mp_int * a, int t, int *result)
4545	{
4546	mp_int b;
4547	int ix, err, res;
4548
4549	/* default to no */
4550	*result = MP_NO;
4551
4552	/* valid value of t? */
4553	if (t <= 0 \|\| t > PRIME_SIZE) {
4554	return MP_VAL;
4555	}
4556
4557	/* is the input equal to one of the primes in the table? */
4558	for (ix = 0; ix < PRIME_SIZE; ix++) {
4559	if (mp_cmp_d(a, ltm_prime_tab[ix]) == MP_EQ) {
4560	*result = 1;
4561	return MP_OKAY;
4562	}
4563	}
4564
4565	/* first perform trial division */
4566	if ((err = mp_prime_is_divisible (a, &res)) != MP_OKAY) {
4567	return err;
4568	}
4569
4570	/* return if it was trivially divisible */
4571	if (res == MP_YES) {
4572	return MP_OKAY;
4573	}
4574
4575	/* now perform the miller-rabin rounds */
4576	if ((err = mp_init (&b)) != MP_OKAY) {
4577	return err;
4578	}
4579
4580	for (ix = 0; ix < t; ix++) {
4581	/* set the prime */
4582	if ((err = mp_set (&b, ltm_prime_tab[ix])) != MP_OKAY) {
4583	goto LBL_B;
4584	}
4585
4586	if ((err = mp_prime_miller_rabin (a, &b, &res)) != MP_OKAY) {
4587	goto LBL_B;
4588	}
4589
4590	if (res == MP_NO) {
4591	goto LBL_B;
4592	}
4593	}
4594
4595	/* passed the test */
4596	*result = MP_YES;
4597	LBL_B:mp_clear (&b);
4598	return err;
4599	}
4600
4601
4602	/* computes least common multiple as \|ab\|/(a, b) /
4603	int mp_lcm (mp_int * a, mp_int * b, mp_int * c)
4604	{
4605	int res;
4606	mp_int t1, t2;
4607
4608
4609	if ((res = mp_init_multi (&t1, &t2, NULL, NULL, NULL, NULL)) != MP_OKAY) {
4610	return res;
4611	}
4612
4613	/* t1 = get the GCD of the two inputs */
4614	if ((res = mp_gcd (a, b, &t1)) != MP_OKAY) {
4615	goto LBL_T;
4616	}
4617
4618	/* divide the smallest by the GCD */
4619	if (mp_cmp_mag(a, b) == MP_LT) {
4620	/* store quotient in t2 such that t2 * b is the LCM */
4621	if ((res = mp_div(a, &t1, &t2, NULL)) != MP_OKAY) {
4622	goto LBL_T;
4623	}
4624	res = mp_mul(b, &t2, c);
4625	} else {
4626	/* store quotient in t2 such that t2 * a is the LCM */
4627	if ((res = mp_div(b, &t1, &t2, NULL)) != MP_OKAY) {
4628	goto LBL_T;
4629	}
4630	res = mp_mul(a, &t2, c);
4631	}
4632
4633	/* fix the sign to positive */
4634	c->sign = MP_ZPOS;
4635
4636	LBL_T:
4637	mp_clear(&t1);
4638	mp_clear(&t2);
4639	return res;
4640	}
4641
4642
4643
4644	/* Greatest Common Divisor using the binary method */
4645	int mp_gcd (mp_int * a, mp_int * b, mp_int * c)
4646	{
4647	mp_int u, v;
4648	int k, u_lsb, v_lsb, res;
4649
4650	/* either zero than gcd is the largest */
4651	if (mp_iszero (a) == MP_YES) {
4652	return mp_abs (b, c);
4653	}
4654	if (mp_iszero (b) == MP_YES) {
4655	return mp_abs (a, c);
4656	}
4657
4658	/* get copies of a and b we can modify */
4659	if ((res = mp_init_copy (&u, a)) != MP_OKAY) {
4660	return res;
4661	}
4662
4663	if ((res = mp_init_copy (&v, b)) != MP_OKAY) {
4664	goto LBL_U;
4665	}
4666
4667	/* must be positive for the remainder of the algorithm */
4668	u.sign = v.sign = MP_ZPOS;
4669
4670	/* B1. Find the common power of two for u and v */
4671	u_lsb = mp_cnt_lsb(&u);
4672	v_lsb = mp_cnt_lsb(&v);
4673	k = MIN(u_lsb, v_lsb);
4674
4675	if (k > 0) {
4676	/* divide the power of two out */
4677	if ((res = mp_div_2d(&u, k, &u, NULL)) != MP_OKAY) {
4678	goto LBL_V;
4679	}
4680
4681	if ((res = mp_div_2d(&v, k, &v, NULL)) != MP_OKAY) {
4682	goto LBL_V;
4683	}
4684	}
4685
4686	/* divide any remaining factors of two out */
4687	if (u_lsb != k) {
4688	if ((res = mp_div_2d(&u, u_lsb - k, &u, NULL)) != MP_OKAY) {
4689	goto LBL_V;
4690	}
4691	}
4692
4693	if (v_lsb != k) {
4694	if ((res = mp_div_2d(&v, v_lsb - k, &v, NULL)) != MP_OKAY) {
4695	goto LBL_V;
4696	}
4697	}
4698
4699	while (mp_iszero(&v) == MP_NO) {
4700	/* make sure v is the largest */
4701	if (mp_cmp_mag(&u, &v) == MP_GT) {
4702	/* swap u and v to make sure v is >= u */
4703	mp_exch(&u, &v);
4704	}
4705
4706	/* subtract smallest from largest */
4707	if ((res = s_mp_sub(&v, &u, &v)) != MP_OKAY) {
4708	goto LBL_V;
4709	}
4710
4711	/* Divide out all factors of two */
4712	if ((res = mp_div_2d(&v, mp_cnt_lsb(&v), &v, NULL)) != MP_OKAY) {
4713	goto LBL_V;
4714	}
4715	}
4716
4717	/* multiply by 2*k which we divided out at the beginning /
4718	if ((res = mp_mul_2d (&u, k, c)) != MP_OKAY) {
4719	goto LBL_V;
4720	}
4721	c->sign = MP_ZPOS;
4722	res = MP_OKAY;
4723	LBL_V:mp_clear (&u);
4724	LBL_U:mp_clear (&v);
4725	return res;
4726	}
4727
4728	#endif /* WOLFSSL_KEY_GEN */
4729
4730
4731	#if defined(HAVE_ECC) \|\| defined(WOLFSSL_KEY_GEN) \|\| defined(HAVE_COMP_KEY)
4732
4733	/* chars used in radix conversions */
4734	const char *mp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ\
4735	abcdefghijklmnopqrstuvwxyz+/";
4736	#endif
4737
4738	#ifdef HAVE_ECC
4739	/* read a string [ASCII] in a given radix */
4740	int mp_read_radix (mp_int * a, const char *str, int radix)
4741	{
4742	int y, res, neg;
4743	char ch;
4744
4745	/* zero the digit bignum */
4746	mp_zero(a);
4747
4748	/* make sure the radix is ok */
4749	if (radix < 2 \|\| radix > 64) {
4750	return MP_VAL;
4751	}
4752
4753	/* if the leading digit is a
4754	* minus set the sign to negative.
4755	*/
4756	if (*str == '-') {
4757	++str;
4758	neg = MP_NEG;
4759	} else {
4760	neg = MP_ZPOS;
4761	}
4762
4763	/* set the integer to the default of zero */
4764	mp_zero (a);
4765
4766	/* process each digit of the string */
4767	while (*str != '\0') {
4768	/* if the radix <= 36 the conversion is case insensitive
4769	* this allows numbers like 1AB and 1ab to represent the same value
4770	* [e.g. in hex]
4771	*/
4772	ch = (radix <= 36) ? (char)XTOUPPER((unsigned char)str) : str;
4773	for (y = 0; y < 64; y++) {
4774	if (ch == mp_s_rmap[y]) {
4775	break;
4776	}
4777	}
4778
4779	/* if the char was found in the map
4780	* and is less than the given radix add it
4781	* to the number, otherwise exit the loop.
4782	*/
4783	if (y < radix) {
4784	if ((res = mp_mul_d (a, (mp_digit) radix, a)) != MP_OKAY) {
4785	return res;
4786	}
4787	if ((res = mp_add_d (a, (mp_digit) y, a)) != MP_OKAY) {
4788	return res;
4789	}
4790	} else {
4791	break;
4792	}
4793	++str;
4794	}
4795
4796	/* if digit in isn't null term, then invalid character was found */
4797	if (*str != '\0') {
4798	mp_zero (a);
4799	return MP_VAL;
4800	}
4801
4802	/* set the sign only if a != 0 */
4803	if (mp_iszero(a) != MP_YES) {
4804	a->sign = neg;
4805	}
4806	return MP_OKAY;
4807	}
4808	#endif /* HAVE_ECC */
4809
4810	#if defined(WOLFSSL_KEY_GEN) \|\| defined(HAVE_COMP_KEY) \|\| \
4811	defined(WOLFSSL_DEBUG_MATH)
4812
4813	/* returns size of ASCII representation */
4814	int mp_radix_size (mp_int a, int radix, int size)
4815	{
4816	int res, digs;
4817	mp_int t;
4818	mp_digit d;
4819
4820	*size = 0;
4821
4822	/* special case for binary */
4823	if (radix == 2) {
4824	*size = mp_count_bits (a) + (a->sign == MP_NEG ? 1 : 0) + 1;
4825	return MP_OKAY;
4826	}
4827
4828	/* make sure the radix is in range */
4829	if (radix < 2 \|\| radix > 64) {
4830	return MP_VAL;
4831	}
4832
4833	if (mp_iszero(a) == MP_YES) {
4834	*size = 2;
4835	return MP_OKAY;
4836	}
4837
4838	/* digs is the digit count */
4839	digs = 0;
4840
4841	/* if it's negative add one for the sign */
4842	if (a->sign == MP_NEG) {
4843	++digs;
4844	}
4845
4846	/* init a copy of the input */
4847	if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
4848	return res;
4849	}
4850
4851	/* force temp to positive */
4852	t.sign = MP_ZPOS;
4853
4854	/* fetch out all of the digits */
4855	while (mp_iszero (&t) == MP_NO) {
4856	if ((res = mp_div_d (&t, (mp_digit) radix, &t, &d)) != MP_OKAY) {
4857	mp_clear (&t);
4858	return res;
4859	}
4860	++digs;
4861	}
4862	mp_clear (&t);
4863
4864	/* return digs + 1, the 1 is for the NULL byte that would be required. */
4865	*size = digs + 1;
4866	return MP_OKAY;
4867	}
4868
4869	/* stores a bignum as a ASCII string in a given radix (2..64) */
4870	int mp_toradix (mp_int a, char str, int radix)
4871	{
4872	int res, digs;
4873	mp_int t;
4874	mp_digit d;
4875	char *_s = str;
4876
4877	/* check range of the radix */
4878	if (radix < 2 \|\| radix > 64) {
4879	return MP_VAL;
4880	}
4881
4882	/* quick out if its zero */
4883	if (mp_iszero(a) == MP_YES) {
4884	*str++ = '0';
4885	*str = '\0';
4886	return MP_OKAY;
4887	}
4888
4889	if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
4890	return res;
4891	}
4892
4893	/* if it is negative output a - */
4894	if (t.sign == MP_NEG) {
4895	++_s;
4896	*str++ = '-';
4897	t.sign = MP_ZPOS;
4898	}
4899
4900	digs = 0;
4901	while (mp_iszero (&t) == MP_NO) {
4902	if ((res = mp_div_d (&t, (mp_digit) radix, &t, &d)) != MP_OKAY) {
4903	mp_clear (&t);
4904	return res;
4905	}
4906	*str++ = mp_s_rmap[d];
4907	++digs;
4908	}
4909
4910	/* reverse the digits of the string. In this case _s points
4911	* to the first digit [excluding the sign] of the number]
4912	*/
4913	bn_reverse ((unsigned char *)_s, digs);
4914
4915	/* append a NULL so the string is properly terminated */
4916	*str = '\0';
4917
4918	mp_clear (&t);
4919	return MP_OKAY;
4920	}
4921
4922	#ifdef WOLFSSL_DEBUG_MATH
4923	void mp_dump(const char* desc, mp_int* a, byte verbose)
4924	{
4925	char *buffer;
4926	int size = a->alloc;
4927
4928	buffer = (char)XMALLOC(size sizeof(mp_digit) * 2, NULL, DYNAMIC_TYPE_TMP_BUFFER);
4929	if (buffer == NULL) {
4930	return;
4931	}
4932
4933	printf("%s: ptr=%p, used=%d, sign=%d, size=%d, mpd=%d\n",
4934	desc, a, a->used, a->sign, size, (int)sizeof(mp_digit));
4935
4936	mp_toradix(a, buffer, 16);
4937	printf(" %s\n ", buffer);
4938
4939	if (verbose) {
4940	int i;
4941	for(i=0; i<a->alloc * (int)sizeof(mp_digit); i++) {
4942	printf("%02x ", (((byte)a->dp) + i));
4943	}
4944	printf("\n");
4945	}
4946
4947	XFREE(buffer, NULL, DYNAMIC_TYPE_TMP_BUFFER);
4948	}
4949	#endif /* WOLFSSL_DEBUG_MATH */
4950
4951	#endif /* defined(WOLFSSL_KEY_GEN) \|\| defined(HAVE_COMP_KEY) \|\| defined(WOLFSSL_DEBUG_MATH) */
4952
4953	#endif /* USE_FAST_MATH */
4954
4955	#endif /* NO_BIG_INT */
4956

Note: See TracBrowser for help on using the repository browser.

Download in other formats: