Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

integer.c@ 464

Last change on this file since 464 was 464, checked in by coas-nagasima, 3 years ago
WolfSSLとAzure IoT SDKを更新
Property svn:eol-style set to `native` Property svn:mime-type set to `text/x-csrc;charset=UTF-8`
File size: 118.4 KB

Line
1	/* integer.c
2	*
3	* Copyright (C) 2006-2020 wolfSSL Inc.
4	*
5	* This file is part of wolfSSL.
6	*
7	* wolfSSL is free software; you can redistribute it and/or modify
8	* it under the terms of the GNU General Public License as published by
9	* the Free Software Foundation; either version 2 of the License, or
10	* (at your option) any later version.
11	*
12	* wolfSSL is distributed in the hope that it will be useful,
13	* but WITHOUT ANY WARRANTY; without even the implied warranty of
14	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15	* GNU General Public License for more details.
16	*
17	* You should have received a copy of the GNU General Public License
18	* along with this program; if not, write to the Free Software
19	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
20	*/
21
22
23
24	/*
25	* Based on public domain LibTomMath 0.38 by Tom St Denis, tomstdenis@iahu.ca,
26	* http://math.libtomcrypt.com
27	*/
28
29
30	#ifdef HAVE_CONFIG_H
31	#include <config.h>
32	#endif
33
34	/* in case user set USE_FAST_MATH there */
35	#include <wolfssl/wolfcrypt/settings.h>
36
37	#ifdef NO_INLINE
38	#include <wolfssl/wolfcrypt/misc.h>
39	#else
40	#define WOLFSSL_MISC_INCLUDED
41	#include <wolfcrypt/src/misc.c>
42	#endif
43
44	#ifndef NO_BIG_INT
45
46	#ifndef USE_FAST_MATH
47
48	#ifndef WOLFSSL_SP_MATH
49
50	#include <wolfssl/wolfcrypt/integer.h>
51
52	#if defined(FREESCALE_LTC_TFM)
53	#include <wolfssl/wolfcrypt/port/nxp/ksdk_port.h>
54	#endif
55	#ifdef WOLFSSL_DEBUG_MATH
56	#include <stdio.h>
57	#endif
58
59	#ifdef SHOW_GEN
60	#ifndef NO_STDIO_FILESYSTEM
61	#include <stdio.h>
62	#endif
63	#endif
64
65	#if defined(WOLFSSL_HAVE_SP_RSA) \|\| defined(WOLFSSL_HAVE_SP_DH)
66	#ifdef __cplusplus
67	extern "C" {
68	#endif
69	WOLFSSL_LOCAL int sp_ModExp_1024(mp_int* base, mp_int* exp, mp_int* mod,
70	mp_int* res);
71	WOLFSSL_LOCAL int sp_ModExp_1536(mp_int* base, mp_int* exp, mp_int* mod,
72	mp_int* res);
73	WOLFSSL_LOCAL int sp_ModExp_2048(mp_int* base, mp_int* exp, mp_int* mod,
74	mp_int* res);
75	WOLFSSL_LOCAL int sp_ModExp_3072(mp_int* base, mp_int* exp, mp_int* mod,
76	mp_int* res);
77	WOLFSSL_LOCAL int sp_ModExp_4096(mp_int* base, mp_int* exp, mp_int* mod,
78	mp_int* res);
79	#ifdef __cplusplus
80	} /* extern "C" */
81	#endif
82	#endif
83
84	/* reverse an array, used for radix code */
85	static void
86	bn_reverse (unsigned char *s, int len)
87	{
88	int ix, iy;
89	unsigned char t;
90
91	ix = 0;
92	iy = len - 1;
93	while (ix < iy) {
94	t = s[ix];
95	s[ix] = s[iy];
96	s[iy] = t;
97	++ix;
98	--iy;
99	}
100	}
101
102	/* math settings check */
103	word32 CheckRunTimeSettings(void)
104	{
105	return CTC_SETTINGS;
106	}
107
108
109	/* handle up to 6 inits */
110	int mp_init_multi(mp_int* a, mp_int* b, mp_int* c, mp_int* d, mp_int* e,
111	mp_int* f)
112	{
113	int res = MP_OKAY;
114
115	if (a) XMEMSET(a, 0, sizeof(mp_int));
116	if (b) XMEMSET(b, 0, sizeof(mp_int));
117	if (c) XMEMSET(c, 0, sizeof(mp_int));
118	if (d) XMEMSET(d, 0, sizeof(mp_int));
119	if (e) XMEMSET(e, 0, sizeof(mp_int));
120	if (f) XMEMSET(f, 0, sizeof(mp_int));
121
122	if (a && ((res = mp_init(a)) != MP_OKAY))
123	return res;
124
125	if (b && ((res = mp_init(b)) != MP_OKAY)) {
126	mp_clear(a);
127	return res;
128	}
129
130	if (c && ((res = mp_init(c)) != MP_OKAY)) {
131	mp_clear(a); mp_clear(b);
132	return res;
133	}
134
135	if (d && ((res = mp_init(d)) != MP_OKAY)) {
136	mp_clear(a); mp_clear(b); mp_clear(c);
137	return res;
138	}
139
140	if (e && ((res = mp_init(e)) != MP_OKAY)) {
141	mp_clear(a); mp_clear(b); mp_clear(c); mp_clear(d);
142	return res;
143	}
144
145	if (f && ((res = mp_init(f)) != MP_OKAY)) {
146	mp_clear(a); mp_clear(b); mp_clear(c); mp_clear(d); mp_clear(e);
147	return res;
148	}
149
150	return res;
151	}
152
153
154	/* init a new mp_int */
155	int mp_init (mp_int * a)
156	{
157	/* Safeguard against passing in a null pointer */
158	if (a == NULL)
159	return MP_VAL;
160
161	/* defer allocation until mp_grow */
162	a->dp = NULL;
163
164	/* set the used to zero, allocated digits to the default precision
165	* and sign to positive */
166	a->used = 0;
167	a->alloc = 0;
168	a->sign = MP_ZPOS;
169	#ifdef HAVE_WOLF_BIGINT
170	wc_bigint_init(&a->raw);
171	#endif
172
173	return MP_OKAY;
174	}
175
176
177	/* clear one (frees) */
178	void mp_clear (mp_int * a)
179	{
180	int i;
181
182	if (a == NULL)
183	return;
184
185	/* only do anything if a hasn't been freed previously */
186	if (a->dp != NULL) {
187	/* first zero the digits */
188	for (i = 0; i < a->used; i++) {
189	a->dp[i] = 0;
190	}
191
192	/* free ram */
193	mp_free(a);
194
195	/* reset members to make debugging easier */
196	a->alloc = a->used = 0;
197	a->sign = MP_ZPOS;
198	}
199	}
200
201	void mp_free (mp_int * a)
202	{
203	/* only do anything if a hasn't been freed previously */
204	if (a->dp != NULL) {
205	/* free ram */
206	XFREE(a->dp, 0, DYNAMIC_TYPE_BIGINT);
207	a->dp = NULL;
208	}
209
210	#ifdef HAVE_WOLF_BIGINT
211	wc_bigint_free(&a->raw);
212	#endif
213	}
214
215	void mp_forcezero(mp_int * a)
216	{
217	if (a == NULL)
218	return;
219
220	/* only do anything if a hasn't been freed previously */
221	if (a->dp != NULL) {
222	/* force zero the used digits */
223	ForceZero(a->dp, a->used * sizeof(mp_digit));
224	#ifdef HAVE_WOLF_BIGINT
225	wc_bigint_zero(&a->raw);
226	#endif
227	/* free ram */
228	mp_free(a);
229
230	/* reset members to make debugging easier */
231	a->alloc = a->used = 0;
232	a->sign = MP_ZPOS;
233	}
234
235	a->sign = MP_ZPOS;
236	a->used = 0;
237	}
238
239
240	/* get the size for an unsigned equivalent */
241	int mp_unsigned_bin_size (mp_int * a)
242	{
243	int size = mp_count_bits (a);
244	return (size / 8 + ((size & 7) != 0 ? 1 : 0));
245	}
246
247
248	/* returns the number of bits in an int */
249	int mp_count_bits (mp_int * a)
250	{
251	int r;
252	mp_digit q;
253
254	/* shortcut */
255	if (a->used == 0) {
256	return 0;
257	}
258
259	/* get number of digits and add that */
260	r = (a->used - 1) * DIGIT_BIT;
261
262	/* take the last digit and count the bits in it */
263	q = a->dp[a->used - 1];
264	while (q > ((mp_digit) 0)) {
265	++r;
266	q >>= ((mp_digit) 1);
267	}
268	return r;
269	}
270
271
272	int mp_leading_bit (mp_int * a)
273	{
274	int c = mp_count_bits(a);
275
276	if (c == 0) return 0;
277	return (c % 8) == 0;
278	}
279
280	int mp_to_unsigned_bin_at_pos(int x, mp_int t, unsigned char b)
281	{
282	int res = 0;
283	while (mp_iszero(t) == MP_NO) {
284	#ifndef MP_8BIT
285	b[x++] = (unsigned char) (t->dp[0] & 255);
286	#else
287	b[x++] = (unsigned char) (t->dp[0] \| ((t->dp[1] & 0x01) << 7));
288	#endif
289	if ((res = mp_div_2d (t, 8, t, NULL)) != MP_OKAY) {
290	return res;
291	}
292	res = x;
293	}
294	return res;
295	}
296
297	/* store in unsigned [big endian] format */
298	int mp_to_unsigned_bin (mp_int * a, unsigned char *b)
299	{
300	int x, res;
301	mp_int t;
302
303	if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
304	return res;
305	}
306
307	x = mp_to_unsigned_bin_at_pos(0, &t, b);
308	if (x < 0) {
309	mp_clear(&t);
310	return x;
311	}
312
313	bn_reverse (b, x);
314	mp_clear (&t);
315	return res;
316	}
317
318	int mp_to_unsigned_bin_len(mp_int * a, unsigned char *b, int c)
319	{
320	int i, len;
321
322	len = mp_unsigned_bin_size(a);
323
324	if (len > c) {
325	return MP_VAL;
326	}
327
328	/* pad front w/ zeros to match length */
329	for (i = 0; i < c - len; i++) {
330	b[i] = 0x00;
331	}
332	return mp_to_unsigned_bin(a, b + i);
333	}
334
335	/* creates "a" then copies b into it */
336	int mp_init_copy (mp_int * a, mp_int * b)
337	{
338	int res;
339
340	if ((res = mp_init_size (a, b->used)) != MP_OKAY) {
341	return res;
342	}
343
344	if((res = mp_copy (b, a)) != MP_OKAY) {
345	mp_clear(a);
346	}
347
348	return res;
349	}
350
351
352	/* copy, b = a */
353	int mp_copy (mp_int * a, mp_int * b)
354	{
355	int res, n;
356
357	/* Safeguard against passing in a null pointer */
358	if (a == NULL \|\| b == NULL)
359	return MP_VAL;
360
361	/* if dst == src do nothing */
362	if (a == b) {
363	return MP_OKAY;
364	}
365
366	/* grow dest */
367	if (b->alloc < a->used \|\| b->alloc == 0) {
368	if ((res = mp_grow (b, a->used)) != MP_OKAY) {
369	return res;
370	}
371	}
372
373	/* zero b and copy the parameters over */
374	{
375	mp_digit tmpa, tmpb;
376
377	/* pointer aliases */
378
379	/* source */
380	tmpa = a->dp;
381
382	/* destination */
383	tmpb = b->dp;
384
385	/* copy all the digits */
386	for (n = 0; n < a->used; n++) {
387	tmpb++ = tmpa++;
388	}
389
390	/* clear high digits */
391	for (; n < b->used && b->dp; n++) {
392	*tmpb++ = 0;
393	}
394	}
395
396	/* copy used count and sign */
397	b->used = a->used;
398	b->sign = a->sign;
399	return MP_OKAY;
400	}
401
402
403	/* grow as required */
404	int mp_grow (mp_int * a, int size)
405	{
406	int i;
407	mp_digit *tmp;
408
409	/* if the alloc size is smaller alloc more ram */
410	if (a->alloc < size \|\| size == 0) {
411	/* ensure there are always at least MP_PREC digits extra on top */
412	size += (MP_PREC * 2) - (size % MP_PREC);
413
414	/* reallocate the array a->dp
415	*
416	* We store the return in a temporary variable
417	* in case the operation failed we don't want
418	* to overwrite the dp member of a.
419	*/
420	tmp = OPT_CAST(mp_digit) XREALLOC (a->dp, sizeof (mp_digit) * size, NULL,
421	DYNAMIC_TYPE_BIGINT);
422	if (tmp == NULL) {
423	/* reallocation failed but "a" is still valid [can be freed] */
424	return MP_MEM;
425	}
426
427	/* reallocation succeeded so set a->dp */
428	a->dp = tmp;
429
430	/* zero excess digits */
431	i = a->alloc;
432	a->alloc = size;
433	for (; i < a->alloc; i++) {
434	a->dp[i] = 0;
435	}
436	}
437	return MP_OKAY;
438	}
439
440
441	/* shift right by a certain bit count (store quotient in c, optional
442	remainder in d) */
443	int mp_div_2d (mp_int * a, int b, mp_int * c, mp_int * d)
444	{
445	int D, res;
446	mp_int t;
447
448
449	/* if the shift count is <= 0 then we do no work */
450	if (b <= 0) {
451	res = mp_copy (a, c);
452	if (d != NULL) {
453	mp_zero (d);
454	}
455	return res;
456	}
457
458	if ((res = mp_init (&t)) != MP_OKAY) {
459	return res;
460	}
461
462	/* get the remainder */
463	if (d != NULL) {
464	if ((res = mp_mod_2d (a, b, &t)) != MP_OKAY) {
465	mp_clear (&t);
466	return res;
467	}
468	}
469
470	/* copy */
471	if ((res = mp_copy (a, c)) != MP_OKAY) {
472	mp_clear (&t);
473	return res;
474	}
475
476	/* shift by as many digits in the bit count */
477	if (b >= (int)DIGIT_BIT) {
478	mp_rshd (c, b / DIGIT_BIT);
479	}
480
481	/* shift any bit count < DIGIT_BIT */
482	D = (b % DIGIT_BIT);
483	if (D != 0) {
484	mp_rshb(c, D);
485	}
486	mp_clamp (c);
487	if (d != NULL) {
488	mp_exch (&t, d);
489	}
490	mp_clear (&t);
491	return MP_OKAY;
492	}
493
494
495	/* set to zero */
496	void mp_zero (mp_int * a)
497	{
498	int n;
499	mp_digit *tmp;
500
501	if (a == NULL)
502	return;
503
504	a->sign = MP_ZPOS;
505	a->used = 0;
506
507	tmp = a->dp;
508	for (n = 0; n < a->alloc; n++) {
509	*tmp++ = 0;
510	}
511	}
512
513
514	/* trim unused digits
515	*
516	* This is used to ensure that leading zero digits are
517	* trimmed and the leading "used" digit will be non-zero
518	* Typically very fast. Also fixes the sign if there
519	* are no more leading digits
520	*/
521	void mp_clamp (mp_int * a)
522	{
523	/* decrease used while the most significant digit is
524	* zero.
525	*/
526	while (a->used > 0 && a->dp[a->used - 1] == 0) {
527	--(a->used);
528	}
529
530	/* reset the sign flag if used == 0 */
531	if (a->used == 0) {
532	a->sign = MP_ZPOS;
533	}
534	}
535
536
537	/* swap the elements of two integers, for cases where you can't simply swap the
538	* mp_int pointers around
539	*/
540	void mp_exch (mp_int * a, mp_int * b)
541	{
542	mp_int t;
543
544	t = *a;
545	a = b;
546	*b = t;
547	}
548
549	int mp_cond_swap_ct (mp_int * a, mp_int * b, int c, int m)
550	{
551	(void)c;
552	if (m == 1)
553	mp_exch(a, b);
554	return MP_OKAY;
555	}
556
557
558	/* shift right a certain number of bits */
559	void mp_rshb (mp_int *c, int x)
560	{
561	mp_digit *tmpc, mask, shift;
562	mp_digit r, rr;
563	mp_digit D = x;
564
565	/* shifting by a negative number not supported, and shifting by
566	* zero changes nothing.
567	*/
568	if (x <= 0) return;
569
570	/* shift digits first if needed */
571	if (x >= DIGIT_BIT) {
572	mp_rshd(c, x / DIGIT_BIT);
573	/* recalculate number of bits to shift */
574	D = x % DIGIT_BIT;
575	/* check if any more shifting needed */
576	if (D == 0) return;
577	}
578
579	/* zero shifted is always zero */
580	if (mp_iszero(c)) return;
581
582	/* mask */
583	mask = (((mp_digit)1) << D) - 1;
584
585	/* shift for lsb */
586	shift = DIGIT_BIT - D;
587
588	/* alias */
589	tmpc = c->dp + (c->used - 1);
590
591	/* carry */
592	r = 0;
593	for (x = c->used - 1; x >= 0; x--) {
594	/* get the lower bits of this word in a temp */
595	rr = *tmpc & mask;
596
597	/* shift the current word and mix in the carry bits from previous word */
598	tmpc = (tmpc >> D) \| (r << shift);
599	--tmpc;
600
601	/* set the carry to the carry bits of the current word found above */
602	r = rr;
603	}
604	mp_clamp(c);
605	}
606
607
608	/* shift right a certain amount of digits */
609	void mp_rshd (mp_int * a, int b)
610	{
611	int x;
612
613	/* if b <= 0 then ignore it */
614	if (b <= 0) {
615	return;
616	}
617
618	/* if b > used then simply zero it and return */
619	if (a->used <= b) {
620	mp_zero (a);
621	return;
622	}
623
624	{
625	mp_digit bottom, top;
626
627	/* shift the digits down */
628
629	/* bottom */
630	bottom = a->dp;
631
632	/* top [offset into digits] */
633	top = a->dp + b;
634
635	/* this is implemented as a sliding window where
636	* the window is b-digits long and digits from
637	* the top of the window are copied to the bottom
638	*
639	* e.g.
640
641	b-2 \| b-1 \| b0 \| b1 \| b2 \| ... \| bb \| ---->
642	/\ \| ---->
643	\-------------------/ ---->
644	*/
645	for (x = 0; x < (a->used - b); x++) {
646	bottom++ = top++;
647	}
648
649	/* zero the top digits */
650	for (; x < a->used; x++) {
651	*bottom++ = 0;
652	}
653	}
654
655	/* remove excess digits */
656	a->used -= b;
657	}
658
659
660	/* calc a value mod 2*b /
661	int mp_mod_2d (mp_int * a, int b, mp_int * c)
662	{
663	int x, res;
664
665	/* if b is <= 0 then zero the int */
666	if (b <= 0) {
667	mp_zero (c);
668	return MP_OKAY;
669	}
670
671	/* if the modulus is larger than the value than return */
672	if (b >= (int) (a->used * DIGIT_BIT)) {
673	res = mp_copy (a, c);
674	return res;
675	}
676
677	/* copy */
678	if ((res = mp_copy (a, c)) != MP_OKAY) {
679	return res;
680	}
681
682	/* zero digits above the last digit of the modulus */
683	for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) {
684	c->dp[x] = 0;
685	}
686	/* clear the digit that is not completely outside/inside the modulus */
687	x = DIGIT_BIT - (b % DIGIT_BIT);
688	if (x != DIGIT_BIT) {
689	c->dp[b / DIGIT_BIT] &= ~((mp_digit)0) >> (x + ((sizeof(mp_digit)*8) - DIGIT_BIT));
690	}
691	mp_clamp (c);
692	return MP_OKAY;
693	}
694
695
696	/* reads a unsigned char array, assumes the msb is stored first [big endian] */
697	int mp_read_unsigned_bin (mp_int * a, const unsigned char *b, int c)
698	{
699	int res;
700	int digits_needed;
701
702	while (c > 0 && b[0] == 0) {
703	c--;
704	b++;
705	}
706
707	digits_needed = ((c * CHAR_BIT) + DIGIT_BIT - 1) / DIGIT_BIT;
708
709	/* make sure there are enough digits available */
710	if (a->alloc < digits_needed) {
711	if ((res = mp_grow(a, digits_needed)) != MP_OKAY) {
712	return res;
713	}
714	}
715
716	/* zero the int */
717	mp_zero (a);
718
719	/* read the bytes in */
720	while (c-- > 0) {
721	if ((res = mp_mul_2d (a, 8, a)) != MP_OKAY) {
722	return res;
723	}
724
725	#ifndef MP_8BIT
726	a->dp[0] \|= *b++;
727	if (a->used == 0)
728	a->used = 1;
729	#else
730	a->dp[0] = (*b & MP_MASK);
731	a->dp[1] \|= ((*b++ >> 7U) & 1);
732	if (a->used == 0)
733	a->used = 2;
734	#endif
735	}
736	mp_clamp (a);
737	return MP_OKAY;
738	}
739
740
741	/* shift left by a certain bit count */
742	int mp_mul_2d (mp_int * a, int b, mp_int * c)
743	{
744	mp_digit d;
745	int res;
746
747	/* copy */
748	if (a != c) {
749	if ((res = mp_copy (a, c)) != MP_OKAY) {
750	return res;
751	}
752	}
753
754	if (c->alloc < (int)(c->used + b/DIGIT_BIT + 1)) {
755	if ((res = mp_grow (c, c->used + b / DIGIT_BIT + 1)) != MP_OKAY) {
756	return res;
757	}
758	}
759
760	/* shift by as many digits in the bit count */
761	if (b >= (int)DIGIT_BIT) {
762	if ((res = mp_lshd (c, b / DIGIT_BIT)) != MP_OKAY) {
763	return res;
764	}
765	}
766
767	/* shift any bit count < DIGIT_BIT */
768	d = (mp_digit) (b % DIGIT_BIT);
769	if (d != 0) {
770	mp_digit *tmpc, shift, mask, r, rr;
771	int x;
772
773	/* bitmask for carries */
774	mask = (((mp_digit)1) << d) - 1;
775
776	/* shift for msbs */
777	shift = DIGIT_BIT - d;
778
779	/* alias */
780	tmpc = c->dp;
781
782	/* carry */
783	r = 0;
784	for (x = 0; x < c->used; x++) {
785	/* get the higher bits of the current word */
786	rr = (*tmpc >> shift) & mask;
787
788	/* shift the current word and OR in the carry */
789	tmpc = (mp_digit)(((tmpc << d) \| r) & MP_MASK);
790	++tmpc;
791
792	/* set the carry to the carry bits of the current word */
793	r = rr;
794	}
795
796	/* set final carry */
797	if (r != 0) {
798	c->dp[(c->used)++] = r;
799	}
800	}
801	mp_clamp (c);
802	return MP_OKAY;
803	}
804
805
806	/* shift left a certain amount of digits */
807	int mp_lshd (mp_int * a, int b)
808	{
809	int x, res;
810
811	/* if its less than zero return */
812	if (b <= 0) {
813	return MP_OKAY;
814	}
815
816	/* grow to fit the new digits */
817	if (a->alloc < a->used + b) {
818	if ((res = mp_grow (a, a->used + b)) != MP_OKAY) {
819	return res;
820	}
821	}
822
823	{
824	mp_digit top, bottom;
825
826	/* increment the used by the shift amount then copy upwards */
827	a->used += b;
828
829	/* top */
830	top = a->dp + a->used - 1;
831
832	/* base */
833	bottom = a->dp + a->used - 1 - b;
834
835	/* much like mp_rshd this is implemented using a sliding window
836	* except the window goes the other way around. Copying from
837	* the bottom to the top. see bn_mp_rshd.c for more info.
838	*/
839	for (x = a->used - 1; x >= b; x--) {
840	top-- = bottom--;
841	}
842
843	/* zero the lower digits */
844	top = a->dp;
845	for (x = 0; x < b; x++) {
846	*top++ = 0;
847	}
848	}
849	return MP_OKAY;
850	}
851
852
853	/* this is a shell function that calls either the normal or Montgomery
854	* exptmod functions. Originally the call to the montgomery code was
855	* embedded in the normal function but that wasted a lot of stack space
856	* for nothing (since 99% of the time the Montgomery code would be called)
857	*/
858	#if defined(FREESCALE_LTC_TFM)
859	int wolfcrypt_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
860	#else
861	int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
862	#endif
863	{
864	int dr;
865
866	/* modulus P must be positive */
867	if (mp_iszero(P) \|\| P->sign == MP_NEG) {
868	return MP_VAL;
869	}
870	if (mp_isone(P)) {
871	mp_set(Y, 0);
872	return MP_OKAY;
873	}
874	if (mp_iszero(X)) {
875	mp_set(Y, 1);
876	return MP_OKAY;
877	}
878	if (mp_iszero(G)) {
879	mp_set(Y, 0);
880	return MP_OKAY;
881	}
882
883	/* if exponent X is negative we have to recurse */
884	if (X->sign == MP_NEG) {
885	#ifdef BN_MP_INVMOD_C
886	mp_int tmpG, tmpX;
887	int err;
888
889	/* first compute 1/G mod P */
890	if ((err = mp_init(&tmpG)) != MP_OKAY) {
891	return err;
892	}
893	if ((err = mp_invmod(G, P, &tmpG)) != MP_OKAY) {
894	mp_clear(&tmpG);
895	return err;
896	}
897
898	/* now get \|X\| */
899	if ((err = mp_init(&tmpX)) != MP_OKAY) {
900	mp_clear(&tmpG);
901	return err;
902	}
903	if ((err = mp_abs(X, &tmpX)) != MP_OKAY) {
904	mp_clear(&tmpG);
905	mp_clear(&tmpX);
906	return err;
907	}
908
909	/* and now compute (1/G)\|X\| instead of GX [X < 0] */
910	err = mp_exptmod(&tmpG, &tmpX, P, Y);
911	mp_clear(&tmpG);
912	mp_clear(&tmpX);
913	return err;
914	#else
915	/* no invmod */
916	return MP_VAL;
917	#endif
918	}
919
920	#ifdef BN_MP_EXPTMOD_BASE_2
921	if (G->used == 1 && G->dp[0] == 2) {
922	return mp_exptmod_base_2(X, P, Y);
923	}
924	#endif
925
926	/* modified diminished radix reduction */
927	#if defined(BN_MP_REDUCE_IS_2K_L_C) && defined(BN_MP_REDUCE_2K_L_C) && \
928	defined(BN_S_MP_EXPTMOD_C)
929	if (mp_reduce_is_2k_l(P) == MP_YES) {
930	return s_mp_exptmod(G, X, P, Y, 1);
931	}
932	#endif
933
934	#ifdef BN_MP_DR_IS_MODULUS_C
935	/* is it a DR modulus? */
936	dr = mp_dr_is_modulus(P);
937	#else
938	/* default to no */
939	dr = 0;
940	#endif
941
942	(void)dr;
943
944	#ifdef BN_MP_REDUCE_IS_2K_C
945	/* if not, is it a unrestricted DR modulus? */
946	if (dr == 0) {
947	dr = mp_reduce_is_2k(P) << 1;
948	}
949	#endif
950
951	/* if the modulus is odd or dr != 0 use the montgomery method */
952	#ifdef BN_MP_EXPTMOD_FAST_C
953	if (mp_isodd (P) == MP_YES \|\| dr != 0) {
954	return mp_exptmod_fast (G, X, P, Y, dr);
955	} else {
956	#endif
957	#ifdef BN_S_MP_EXPTMOD_C
958	/* otherwise use the generic Barrett reduction technique */
959	return s_mp_exptmod (G, X, P, Y, 0);
960	#else
961	/* no exptmod for evens */
962	return MP_VAL;
963	#endif
964	#ifdef BN_MP_EXPTMOD_FAST_C
965	}
966	#endif
967	}
968
969	int mp_exptmod_ex (mp_int * G, mp_int * X, int digits, mp_int * P, mp_int * Y)
970	{
971	(void)digits;
972	return mp_exptmod(G, X, P, Y);
973	}
974
975	/* b = \|a\|
976	*
977	* Simple function copies the input and fixes the sign to positive
978	*/
979	int mp_abs (mp_int * a, mp_int * b)
980	{
981	int res;
982
983	/* copy a to b */
984	if (a != b) {
985	if ((res = mp_copy (a, b)) != MP_OKAY) {
986	return res;
987	}
988	}
989
990	/* force the sign of b to positive */
991	b->sign = MP_ZPOS;
992
993	return MP_OKAY;
994	}
995
996
997	/* hac 14.61, pp608 */
998	#if defined(FREESCALE_LTC_TFM)
999	int wolfcrypt_mp_invmod(mp_int * a, mp_int * b, mp_int * c)
1000	#else
1001	int mp_invmod (mp_int * a, mp_int * b, mp_int * c)
1002	#endif
1003	{
1004	/* b cannot be negative or zero, and can not divide by 0 (1/a mod b) */
1005	if (b->sign == MP_NEG \|\| mp_iszero(b) == MP_YES \|\| mp_iszero(a) == MP_YES) {
1006	return MP_VAL;
1007	}
1008
1009	#ifdef BN_FAST_MP_INVMOD_C
1010	/* if the modulus is odd we can use a faster routine instead */
1011	if ((mp_isodd(b) == MP_YES) && (mp_cmp_d(b, 1) != MP_EQ)) {
1012	return fast_mp_invmod (a, b, c);
1013	}
1014	#endif
1015
1016	#ifdef BN_MP_INVMOD_SLOW_C
1017	return mp_invmod_slow(a, b, c);
1018	#else
1019	return MP_VAL;
1020	#endif
1021	}
1022
1023
1024	/* computes the modular inverse via binary extended euclidean algorithm,
1025	* that is c = 1/a mod b
1026	*
1027	* Based on slow invmod except this is optimized for the case where b is
1028	* odd as per HAC Note 14.64 on pp. 610
1029	*/
1030	int fast_mp_invmod (mp_int * a, mp_int * b, mp_int * c)
1031	{
1032	mp_int x, y, u, v, B, D;
1033	int res, neg, loop_check = 0;
1034
1035	/* 2. [modified] b must be odd */
1036	if (mp_iseven (b) == MP_YES) {
1037	return MP_VAL;
1038	}
1039
1040	/* init all our temps */
1041	if ((res = mp_init_multi(&x, &y, &u, &v, &B, &D)) != MP_OKAY) {
1042	return res;
1043	}
1044
1045	/* x == modulus, y == value to invert */
1046	if ((res = mp_copy (b, &x)) != MP_OKAY) {
1047	goto LBL_ERR;
1048	}
1049
1050	/* we need y = \|a\| */
1051	if ((res = mp_mod (a, b, &y)) != MP_OKAY) {
1052	goto LBL_ERR;
1053	}
1054
1055	/* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
1056	if ((res = mp_copy (&x, &u)) != MP_OKAY) {
1057	goto LBL_ERR;
1058	}
1059	if ((res = mp_copy (&y, &v)) != MP_OKAY) {
1060	goto LBL_ERR;
1061	}
1062	if ((res = mp_set (&D, 1)) != MP_OKAY) {
1063	goto LBL_ERR;
1064	}
1065
1066	top:
1067	/* 4. while u is even do */
1068	while (mp_iseven (&u) == MP_YES) {
1069	/* 4.1 u = u/2 */
1070	if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
1071	goto LBL_ERR;
1072	}
1073	/* 4.2 if B is odd then */
1074	if (mp_isodd (&B) == MP_YES) {
1075	if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
1076	goto LBL_ERR;
1077	}
1078	}
1079	/* B = B/2 */
1080	if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
1081	goto LBL_ERR;
1082	}
1083	}
1084
1085	/* 5. while v is even do */
1086	while (mp_iseven (&v) == MP_YES) {
1087	/* 5.1 v = v/2 */
1088	if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
1089	goto LBL_ERR;
1090	}
1091	/* 5.2 if D is odd then */
1092	if (mp_isodd (&D) == MP_YES) {
1093	/* D = (D-x)/2 */
1094	if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
1095	goto LBL_ERR;
1096	}
1097	}
1098	/* D = D/2 */
1099	if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
1100	goto LBL_ERR;
1101	}
1102	}
1103
1104	/* 6. if u >= v then */
1105	if (mp_cmp (&u, &v) != MP_LT) {
1106	/* u = u - v, B = B - D */
1107	if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
1108	goto LBL_ERR;
1109	}
1110
1111	if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
1112	goto LBL_ERR;
1113	}
1114	} else {
1115	/* v - v - u, D = D - B */
1116	if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
1117	goto LBL_ERR;
1118	}
1119
1120	if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
1121	goto LBL_ERR;
1122	}
1123	}
1124
1125	/* if not zero goto step 4 */
1126	if (mp_iszero (&u) == MP_NO) {
1127	if (++loop_check > MAX_INVMOD_SZ) {
1128	res = MP_VAL;
1129	goto LBL_ERR;
1130	}
1131	goto top;
1132	}
1133
1134	/* now a = C, b = D, gcd == gv /
1135
1136	/* if v != 1 then there is no inverse */
1137	if (mp_cmp_d (&v, 1) != MP_EQ) {
1138	res = MP_VAL;
1139	goto LBL_ERR;
1140	}
1141
1142	/* b is now the inverse */
1143	neg = a->sign;
1144	while (D.sign == MP_NEG) {
1145	if ((res = mp_add (&D, b, &D)) != MP_OKAY) {
1146	goto LBL_ERR;
1147	}
1148	}
1149	/* too big */
1150	while (mp_cmp_mag(&D, b) != MP_LT) {
1151	if ((res = mp_sub(&D, b, &D)) != MP_OKAY) {
1152	goto LBL_ERR;
1153	}
1154	}
1155	mp_exch (&D, c);
1156	c->sign = neg;
1157	res = MP_OKAY;
1158
1159	LBL_ERR:mp_clear(&x);
1160	mp_clear(&y);
1161	mp_clear(&u);
1162	mp_clear(&v);
1163	mp_clear(&B);
1164	mp_clear(&D);
1165	return res;
1166	}
1167
1168
1169	/* hac 14.61, pp608 */
1170	int mp_invmod_slow (mp_int * a, mp_int * b, mp_int * c)
1171	{
1172	mp_int x, y, u, v, A, B, C, D;
1173	int res;
1174
1175	/* b cannot be negative */
1176	if (b->sign == MP_NEG \|\| mp_iszero(b) == MP_YES) {
1177	return MP_VAL;
1178	}
1179
1180	/* init temps */
1181	if ((res = mp_init_multi(&x, &y, &u, &v,
1182	&A, &B)) != MP_OKAY) {
1183	return res;
1184	}
1185
1186	/* init rest of tmps temps */
1187	if ((res = mp_init_multi(&C, &D, 0, 0, 0, 0)) != MP_OKAY) {
1188	mp_clear(&x);
1189	mp_clear(&y);
1190	mp_clear(&u);
1191	mp_clear(&v);
1192	mp_clear(&A);
1193	mp_clear(&B);
1194	return res;
1195	}
1196
1197	/* x = a, y = b */
1198	if ((res = mp_mod(a, b, &x)) != MP_OKAY) {
1199	goto LBL_ERR;
1200	}
1201	if (mp_isone(&x)) {
1202	mp_set(c, 1);
1203	res = MP_OKAY;
1204	goto LBL_ERR;
1205	}
1206	if ((res = mp_copy (b, &y)) != MP_OKAY) {
1207	goto LBL_ERR;
1208	}
1209
1210	/* 2. [modified] if x,y are both even then return an error! */
1211	if (mp_iseven (&x) == MP_YES && mp_iseven (&y) == MP_YES) {
1212	res = MP_VAL;
1213	goto LBL_ERR;
1214	}
1215
1216	/* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
1217	if ((res = mp_copy (&x, &u)) != MP_OKAY) {
1218	goto LBL_ERR;
1219	}
1220	if ((res = mp_copy (&y, &v)) != MP_OKAY) {
1221	goto LBL_ERR;
1222	}
1223	if ((res = mp_set (&A, 1)) != MP_OKAY) {
1224	goto LBL_ERR;
1225	}
1226	if ((res = mp_set (&D, 1)) != MP_OKAY) {
1227	goto LBL_ERR;
1228	}
1229
1230	top:
1231	/* 4. while u is even do */
1232	while (mp_iseven (&u) == MP_YES) {
1233	/* 4.1 u = u/2 */
1234	if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
1235	goto LBL_ERR;
1236	}
1237	/* 4.2 if A or B is odd then */
1238	if (mp_isodd (&A) == MP_YES \|\| mp_isodd (&B) == MP_YES) {
1239	/* A = (A+y)/2, B = (B-x)/2 */
1240	if ((res = mp_add (&A, &y, &A)) != MP_OKAY) {
1241	goto LBL_ERR;
1242	}
1243	if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
1244	goto LBL_ERR;
1245	}
1246	}
1247	/* A = A/2, B = B/2 */
1248	if ((res = mp_div_2 (&A, &A)) != MP_OKAY) {
1249	goto LBL_ERR;
1250	}
1251	if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
1252	goto LBL_ERR;
1253	}
1254	}
1255
1256	/* 5. while v is even do */
1257	while (mp_iseven (&v) == MP_YES) {
1258	/* 5.1 v = v/2 */
1259	if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
1260	goto LBL_ERR;
1261	}
1262	/* 5.2 if C or D is odd then */
1263	if (mp_isodd (&C) == MP_YES \|\| mp_isodd (&D) == MP_YES) {
1264	/* C = (C+y)/2, D = (D-x)/2 */
1265	if ((res = mp_add (&C, &y, &C)) != MP_OKAY) {
1266	goto LBL_ERR;
1267	}
1268	if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
1269	goto LBL_ERR;
1270	}
1271	}
1272	/* C = C/2, D = D/2 */
1273	if ((res = mp_div_2 (&C, &C)) != MP_OKAY) {
1274	goto LBL_ERR;
1275	}
1276	if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
1277	goto LBL_ERR;
1278	}
1279	}
1280
1281	/* 6. if u >= v then */
1282	if (mp_cmp (&u, &v) != MP_LT) {
1283	/* u = u - v, A = A - C, B = B - D */
1284	if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
1285	goto LBL_ERR;
1286	}
1287
1288	if ((res = mp_sub (&A, &C, &A)) != MP_OKAY) {
1289	goto LBL_ERR;
1290	}
1291
1292	if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
1293	goto LBL_ERR;
1294	}
1295	} else {
1296	/* v - v - u, C = C - A, D = D - B */
1297	if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
1298	goto LBL_ERR;
1299	}
1300
1301	if ((res = mp_sub (&C, &A, &C)) != MP_OKAY) {
1302	goto LBL_ERR;
1303	}
1304
1305	if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
1306	goto LBL_ERR;
1307	}
1308	}
1309
1310	/* if not zero goto step 4 */
1311	if (mp_iszero (&u) == MP_NO)
1312	goto top;
1313
1314	/* now a = C, b = D, gcd == gv /
1315
1316	/* if v != 1 then there is no inverse */
1317	if (mp_cmp_d (&v, 1) != MP_EQ) {
1318	res = MP_VAL;
1319	goto LBL_ERR;
1320	}
1321
1322	/* if its too low */
1323	while (mp_cmp_d(&C, 0) == MP_LT) {
1324	if ((res = mp_add(&C, b, &C)) != MP_OKAY) {
1325	goto LBL_ERR;
1326	}
1327	}
1328
1329	/* too big */
1330	while (mp_cmp_mag(&C, b) != MP_LT) {
1331	if ((res = mp_sub(&C, b, &C)) != MP_OKAY) {
1332	goto LBL_ERR;
1333	}
1334	}
1335
1336	/* C is now the inverse */
1337	mp_exch (&C, c);
1338	res = MP_OKAY;
1339	LBL_ERR:mp_clear(&x);
1340	mp_clear(&y);
1341	mp_clear(&u);
1342	mp_clear(&v);
1343	mp_clear(&A);
1344	mp_clear(&B);
1345	mp_clear(&C);
1346	mp_clear(&D);
1347	return res;
1348	}
1349
1350
1351	/* compare magnitude of two ints (unsigned) */
1352	int mp_cmp_mag (mp_int * a, mp_int * b)
1353	{
1354	int n;
1355	mp_digit tmpa, tmpb;
1356
1357	/* compare based on # of non-zero digits */
1358	if (a->used > b->used) {
1359	return MP_GT;
1360	}
1361
1362	if (a->used < b->used) {
1363	return MP_LT;
1364	}
1365
1366	/* alias for a */
1367	tmpa = a->dp + (a->used - 1);
1368
1369	/* alias for b */
1370	tmpb = b->dp + (a->used - 1);
1371
1372	/* compare based on digits */
1373	for (n = 0; n < a->used; ++n, --tmpa, --tmpb) {
1374	if (tmpa > tmpb) {
1375	return MP_GT;
1376	}
1377
1378	if (tmpa < tmpb) {
1379	return MP_LT;
1380	}
1381	}
1382	return MP_EQ;
1383	}
1384
1385
1386	/* compare two ints (signed)*/
1387	int mp_cmp (mp_int * a, mp_int * b)
1388	{
1389	/* compare based on sign */
1390	if (a->sign != b->sign) {
1391	if (a->sign == MP_NEG) {
1392	return MP_LT;
1393	} else {
1394	return MP_GT;
1395	}
1396	}
1397
1398	/* compare digits */
1399	if (a->sign == MP_NEG) {
1400	/* if negative compare opposite direction */
1401	return mp_cmp_mag(b, a);
1402	} else {
1403	return mp_cmp_mag(a, b);
1404	}
1405	}
1406
1407
1408	/* compare a digit */
1409	int mp_cmp_d(mp_int * a, mp_digit b)
1410	{
1411	/* special case for zero*/
1412	if (a->used == 0 && b == 0)
1413	return MP_EQ;
1414
1415	/* compare based on sign */
1416	if ((b && a->used == 0) \|\| a->sign == MP_NEG) {
1417	return MP_LT;
1418	}
1419
1420	/* compare based on magnitude */
1421	if (a->used > 1) {
1422	return MP_GT;
1423	}
1424
1425	/* compare the only digit of a to b */
1426	if (a->dp[0] > b) {
1427	return MP_GT;
1428	} else if (a->dp[0] < b) {
1429	return MP_LT;
1430	} else {
1431	return MP_EQ;
1432	}
1433	}
1434
1435
1436	/* set to a digit */
1437	int mp_set (mp_int * a, mp_digit b)
1438	{
1439	int res;
1440	mp_zero (a);
1441	res = mp_grow (a, 1);
1442	if (res == MP_OKAY) {
1443	a->dp[0] = (mp_digit)(b & MP_MASK);
1444	a->used = (a->dp[0] != 0) ? 1 : 0;
1445	}
1446	return res;
1447	}
1448
1449	/* check if a bit is set */
1450	int mp_is_bit_set (mp_int *a, mp_digit b)
1451	{
1452	if ((mp_digit)a->used < b/DIGIT_BIT)
1453	return 0;
1454
1455	return (int)((a->dp[b/DIGIT_BIT] >> b%DIGIT_BIT) & (mp_digit)1);
1456	}
1457
1458	/* c = a mod b, 0 <= c < b */
1459	#if defined(FREESCALE_LTC_TFM)
1460	int wolfcrypt_mp_mod(mp_int * a, mp_int * b, mp_int * c)
1461	#else
1462	int mp_mod (mp_int * a, mp_int * b, mp_int * c)
1463	#endif
1464	{
1465	mp_int t;
1466	int res;
1467
1468	if ((res = mp_init_size (&t, b->used)) != MP_OKAY) {
1469	return res;
1470	}
1471
1472	if ((res = mp_div (a, b, NULL, &t)) != MP_OKAY) {
1473	mp_clear (&t);
1474	return res;
1475	}
1476
1477	if ((mp_iszero(&t) != MP_NO) \|\| (t.sign == b->sign)) {
1478	res = MP_OKAY;
1479	mp_exch (&t, c);
1480	} else {
1481	res = mp_add (b, &t, c);
1482	}
1483
1484	mp_clear (&t);
1485	return res;
1486	}
1487
1488
1489	/* slower bit-bang division... also smaller */
1490	int mp_div(mp_int * a, mp_int * b, mp_int * c, mp_int * d)
1491	{
1492	mp_int ta, tb, tq, q;
1493	int res, n, n2;
1494
1495	/* is divisor zero ? */
1496	if (mp_iszero (b) == MP_YES) {
1497	return MP_VAL;
1498	}
1499
1500	/* if a < b then q=0, r = a */
1501	if (mp_cmp_mag (a, b) == MP_LT) {
1502	if (d != NULL) {
1503	res = mp_copy (a, d);
1504	} else {
1505	res = MP_OKAY;
1506	}
1507	if (c != NULL) {
1508	mp_zero (c);
1509	}
1510	return res;
1511	}
1512
1513	/* init our temps */
1514	if ((res = mp_init_multi(&ta, &tb, &tq, &q, 0, 0)) != MP_OKAY) {
1515	return res;
1516	}
1517
1518	if ((res = mp_set(&tq, 1)) != MP_OKAY) {
1519	return res;
1520	}
1521	n = mp_count_bits(a) - mp_count_bits(b);
1522	if (((res = mp_abs(a, &ta)) != MP_OKAY) \|\|
1523	((res = mp_abs(b, &tb)) != MP_OKAY) \|\|
1524	((res = mp_mul_2d(&tb, n, &tb)) != MP_OKAY) \|\|
1525	((res = mp_mul_2d(&tq, n, &tq)) != MP_OKAY)) {
1526	goto LBL_ERR;
1527	}
1528
1529	while (n-- >= 0) {
1530	if (mp_cmp(&tb, &ta) != MP_GT) {
1531	if (((res = mp_sub(&ta, &tb, &ta)) != MP_OKAY) \|\|
1532	((res = mp_add(&q, &tq, &q)) != MP_OKAY)) {
1533	goto LBL_ERR;
1534	}
1535	}
1536	if (((res = mp_div_2d(&tb, 1, &tb, NULL)) != MP_OKAY) \|\|
1537	((res = mp_div_2d(&tq, 1, &tq, NULL)) != MP_OKAY)) {
1538	goto LBL_ERR;
1539	}
1540	}
1541
1542	/* now q == quotient and ta == remainder */
1543	n = a->sign;
1544	n2 = (a->sign == b->sign ? MP_ZPOS : MP_NEG);
1545	if (c != NULL) {
1546	mp_exch(c, &q);
1547	c->sign = (mp_iszero(c) == MP_YES) ? MP_ZPOS : n2;
1548	}
1549	if (d != NULL) {
1550	mp_exch(d, &ta);
1551	d->sign = (mp_iszero(d) == MP_YES) ? MP_ZPOS : n;
1552	}
1553	LBL_ERR:
1554	mp_clear(&ta);
1555	mp_clear(&tb);
1556	mp_clear(&tq);
1557	mp_clear(&q);
1558	return res;
1559	}
1560
1561
1562	/* b = a/2 */
1563	int mp_div_2(mp_int * a, mp_int * b)
1564	{
1565	int x, res, oldused;
1566
1567	/* copy */
1568	if (b->alloc < a->used) {
1569	if ((res = mp_grow (b, a->used)) != MP_OKAY) {
1570	return res;
1571	}
1572	}
1573
1574	oldused = b->used;
1575	b->used = a->used;
1576	{
1577	mp_digit r, rr, tmpa, tmpb;
1578
1579	/* source alias */
1580	tmpa = a->dp + b->used - 1;
1581
1582	/* dest alias */
1583	tmpb = b->dp + b->used - 1;
1584
1585	/* carry */
1586	r = 0;
1587	for (x = b->used - 1; x >= 0; x--) {
1588	/* get the carry for the next iteration */
1589	rr = *tmpa & 1;
1590
1591	/* shift the current digit, add in carry and store */
1592	tmpb-- = (tmpa-- >> 1) \| (r << (DIGIT_BIT - 1));
1593
1594	/* forward carry to next iteration */
1595	r = rr;
1596	}
1597
1598	/* zero excess digits */
1599	tmpb = b->dp + b->used;
1600	for (x = b->used; x < oldused; x++) {
1601	*tmpb++ = 0;
1602	}
1603	}
1604	b->sign = a->sign;
1605	mp_clamp (b);
1606	return MP_OKAY;
1607	}
1608
1609	/* c = a / 2 (mod b) - constant time (a < b and positive) */
1610	int mp_div_2_mod_ct(mp_int a, mp_int b, mp_int *c)
1611	{
1612	int res;
1613
1614	if (mp_isodd(a)) {
1615	res = mp_add(a, b, c);
1616	if (res == MP_OKAY) {
1617	res = mp_div_2(c, c);
1618	}
1619	}
1620	else {
1621	res = mp_div_2(a, c);
1622	}
1623
1624	return res;
1625	}
1626
1627
1628	/* high level addition (handles signs) */
1629	int mp_add (mp_int * a, mp_int * b, mp_int * c)
1630	{
1631	int sa, sb, res;
1632
1633	/* get sign of both inputs */
1634	sa = a->sign;
1635	sb = b->sign;
1636
1637	/* handle two cases, not four */
1638	if (sa == sb) {
1639	/* both positive or both negative */
1640	/* add their magnitudes, copy the sign */
1641	c->sign = sa;
1642	res = s_mp_add (a, b, c);
1643	} else {
1644	/* one positive, the other negative */
1645	/* subtract the one with the greater magnitude from */
1646	/* the one of the lesser magnitude. The result gets */
1647	/* the sign of the one with the greater magnitude. */
1648	if (mp_cmp_mag (a, b) == MP_LT) {
1649	c->sign = sb;
1650	res = s_mp_sub (b, a, c);
1651	} else {
1652	c->sign = sa;
1653	res = s_mp_sub (a, b, c);
1654	}
1655	}
1656	return res;
1657	}
1658
1659
1660	/* low level addition, based on HAC pp.594, Algorithm 14.7 */
1661	int s_mp_add (mp_int * a, mp_int * b, mp_int * c)
1662	{
1663	mp_int *x;
1664	int olduse, res, min_ab, max_ab;
1665
1666	/* find sizes, we let \|a\| <= \|b\| which means we have to sort
1667	* them. "x" will point to the input with the most digits
1668	*/
1669	if (a->used > b->used) {
1670	min_ab = b->used;
1671	max_ab = a->used;
1672	x = a;
1673	} else {
1674	min_ab = a->used;
1675	max_ab = b->used;
1676	x = b;
1677	}
1678
1679	/* init result */
1680	if (c->alloc < max_ab + 1) {
1681	if ((res = mp_grow (c, max_ab + 1)) != MP_OKAY) {
1682	return res;
1683	}
1684	}
1685
1686	/* get old used digit count and set new one */
1687	olduse = c->used;
1688	c->used = max_ab + 1;
1689
1690	{
1691	mp_digit u, tmpa, tmpb, *tmpc;
1692	int i;
1693
1694	/* alias for digit pointers */
1695
1696	/* first input */
1697	tmpa = a->dp;
1698
1699	/* second input */
1700	tmpb = b->dp;
1701
1702	/* destination */
1703	tmpc = c->dp;
1704
1705	/* zero the carry */
1706	u = 0;
1707	for (i = 0; i < min_ab; i++) {
1708	/* Compute the sum at one digit, T[i] = A[i] + B[i] + U */
1709	tmpc = tmpa++ + *tmpb++ + u;
1710
1711	/* U = carry bit of T[i] */
1712	u = *tmpc >> ((mp_digit)DIGIT_BIT);
1713
1714	/* take away carry bit from T[i] */
1715	*tmpc++ &= MP_MASK;
1716	}
1717
1718	/* now copy higher words if any, that is in A+B
1719	* if A or B has more digits add those in
1720	*/
1721	if (min_ab != max_ab) {
1722	for (; i < max_ab; i++) {
1723	/* T[i] = X[i] + U */
1724	*tmpc = x->dp[i] + u;
1725
1726	/* U = carry bit of T[i] */
1727	u = *tmpc >> ((mp_digit)DIGIT_BIT);
1728
1729	/* take away carry bit from T[i] */
1730	*tmpc++ &= MP_MASK;
1731	}
1732	}
1733
1734	/* add carry */
1735	*tmpc++ = u;
1736
1737	/* clear digits above olduse */
1738	for (i = c->used; i < olduse; i++) {
1739	*tmpc++ = 0;
1740	}
1741	}
1742
1743	mp_clamp (c);
1744	return MP_OKAY;
1745	}
1746
1747
1748	/* low level subtraction (assumes \|a\| > \|b\|), HAC pp.595 Algorithm 14.9 */
1749	int s_mp_sub (mp_int * a, mp_int * b, mp_int * c)
1750	{
1751	int olduse, res, min_b, max_a;
1752
1753	/* find sizes */
1754	min_b = b->used;
1755	max_a = a->used;
1756
1757	/* init result */
1758	if (c->alloc < max_a) {
1759	if ((res = mp_grow (c, max_a)) != MP_OKAY) {
1760	return res;
1761	}
1762	}
1763
1764	/* sanity check on destination */
1765	if (c->dp == NULL)
1766	return MP_VAL;
1767
1768	olduse = c->used;
1769	c->used = max_a;
1770
1771	{
1772	mp_digit u, tmpa, tmpb, *tmpc;
1773	int i;
1774
1775	/* alias for digit pointers */
1776	tmpa = a->dp;
1777	tmpb = b->dp;
1778	tmpc = c->dp;
1779
1780	/* set carry to zero */
1781	u = 0;
1782	for (i = 0; i < min_b; i++) {
1783	/* T[i] = A[i] - B[i] - U */
1784	tmpc = tmpa++ - *tmpb++ - u;
1785
1786	/* U = carry bit of T[i]
1787	* Note this saves performing an AND operation since
1788	* if a carry does occur it will propagate all the way to the
1789	* MSB. As a result a single shift is enough to get the carry
1790	*/
1791	u = tmpc >> ((mp_digit)(CHAR_BIT sizeof (mp_digit) - 1));
1792
1793	/* Clear carry from T[i] */
1794	*tmpc++ &= MP_MASK;
1795	}
1796
1797	/* now copy higher words if any, e.g. if A has more digits than B */
1798	for (; i < max_a; i++) {
1799	/* T[i] = A[i] - U */
1800	tmpc = tmpa++ - u;
1801
1802	/* U = carry bit of T[i] */
1803	u = tmpc >> ((mp_digit)(CHAR_BIT sizeof (mp_digit) - 1));
1804
1805	/* Clear carry from T[i] */
1806	*tmpc++ &= MP_MASK;
1807	}
1808
1809	/* clear digits above used (since we may not have grown result above) */
1810	for (i = c->used; i < olduse; i++) {
1811	*tmpc++ = 0;
1812	}
1813	}
1814
1815	mp_clamp (c);
1816	return MP_OKAY;
1817	}
1818
1819
1820	/* high level subtraction (handles signs) */
1821	int mp_sub (mp_int * a, mp_int * b, mp_int * c)
1822	{
1823	int sa, sb, res;
1824
1825	sa = a->sign;
1826	sb = b->sign;
1827
1828	if (sa != sb) {
1829	/* subtract a negative from a positive, OR */
1830	/* subtract a positive from a negative. */
1831	/* In either case, ADD their magnitudes, */
1832	/* and use the sign of the first number. */
1833	c->sign = sa;
1834	res = s_mp_add (a, b, c);
1835	} else {
1836	/* subtract a positive from a positive, OR */
1837	/* subtract a negative from a negative. */
1838	/* First, take the difference between their */
1839	/* magnitudes, then... */
1840	if (mp_cmp_mag (a, b) != MP_LT) {
1841	/* Copy the sign from the first */
1842	c->sign = sa;
1843	/* The first has a larger or equal magnitude */
1844	res = s_mp_sub (a, b, c);
1845	} else {
1846	/* The result has the opposite sign from */
1847	/* the first number. */
1848	c->sign = (sa == MP_ZPOS) ? MP_NEG : MP_ZPOS;
1849	/* The second has a larger magnitude */
1850	res = s_mp_sub (b, a, c);
1851	}
1852	}
1853	return res;
1854	}
1855
1856
1857	/* determines if reduce_2k_l can be used */
1858	int mp_reduce_is_2k_l(mp_int *a)
1859	{
1860	int ix, iy;
1861
1862	if (a->used == 0) {
1863	return MP_NO;
1864	} else if (a->used == 1) {
1865	return MP_YES;
1866	} else if (a->used > 1) {
1867	/* if more than half of the digits are -1 we're sold */
1868	for (iy = ix = 0; ix < a->used; ix++) {
1869	if (a->dp[ix] == MP_MASK) {
1870	++iy;
1871	}
1872	}
1873	return (iy >= (a->used/2)) ? MP_YES : MP_NO;
1874
1875	}
1876	return MP_NO;
1877	}
1878
1879
1880	/* determines if mp_reduce_2k can be used */
1881	int mp_reduce_is_2k(mp_int *a)
1882	{
1883	int ix, iy, iw;
1884	mp_digit iz;
1885
1886	if (a->used == 0) {
1887	return MP_NO;
1888	} else if (a->used == 1) {
1889	return MP_YES;
1890	} else if (a->used > 1) {
1891	iy = mp_count_bits(a);
1892	iz = 1;
1893	iw = 1;
1894
1895	/* Test every bit from the second digit up, must be 1 */
1896	for (ix = DIGIT_BIT; ix < iy; ix++) {
1897	if ((a->dp[iw] & iz) == 0) {
1898	return MP_NO;
1899	}
1900	iz <<= 1;
1901	if (iz > (mp_digit)MP_MASK) {
1902	++iw;
1903	iz = 1;
1904	}
1905	}
1906	}
1907	return MP_YES;
1908	}
1909
1910
1911	/* determines if a number is a valid DR modulus */
1912	int mp_dr_is_modulus(mp_int *a)
1913	{
1914	int ix;
1915
1916	/* must be at least two digits */
1917	if (a->used < 2) {
1918	return 0;
1919	}
1920
1921	/* must be of the form b**k - a [a <= b] so all
1922	* but the first digit must be equal to -1 (mod b).
1923	*/
1924	for (ix = 1; ix < a->used; ix++) {
1925	if (a->dp[ix] != MP_MASK) {
1926	return 0;
1927	}
1928	}
1929	return 1;
1930	}
1931
1932
1933	/* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85
1934	*
1935	* Uses a left-to-right k-ary sliding window to compute the modular
1936	* exponentiation.
1937	* The value of k changes based on the size of the exponent.
1938	*
1939	* Uses Montgomery or Diminished Radix reduction [whichever appropriate]
1940	*/
1941
1942	#ifdef MP_LOW_MEM
1943	#define TAB_SIZE 32
1944	#else
1945	#define TAB_SIZE 256
1946	#endif
1947
1948	int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y,
1949	int redmode)
1950	{
1951	mp_int res;
1952	mp_digit buf, mp;
1953	int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
1954	#ifdef WOLFSSL_SMALL_STACK
1955	mp_int* M;
1956	#else
1957	mp_int M[TAB_SIZE];
1958	#endif
1959	/* use a pointer to the reduction algorithm. This allows us to use
1960	* one of many reduction algorithms without modding the guts of
1961	* the code with if statements everywhere.
1962	*/
1963	int (redux)(mp_int,mp_int*,mp_digit) = NULL;
1964
1965	#ifdef WOLFSSL_SMALL_STACK
1966	M = (mp_int) XMALLOC(sizeof(mp_int) TAB_SIZE, NULL,
1967	DYNAMIC_TYPE_BIGINT);
1968	if (M == NULL)
1969	return MP_MEM;
1970	#endif
1971
1972	/* find window size */
1973	x = mp_count_bits (X);
1974	if (x <= 7) {
1975	winsize = 2;
1976	} else if (x <= 36) {
1977	winsize = 3;
1978	} else if (x <= 140) {
1979	winsize = 4;
1980	} else if (x <= 450) {
1981	winsize = 5;
1982	} else if (x <= 1303) {
1983	winsize = 6;
1984	} else if (x <= 3529) {
1985	winsize = 7;
1986	} else {
1987	winsize = 8;
1988	}
1989
1990	#ifdef MP_LOW_MEM
1991	if (winsize > 5) {
1992	winsize = 5;
1993	}
1994	#endif
1995
1996	/* init M array */
1997	/* init first cell */
1998	if ((err = mp_init_size(&M[1], P->alloc)) != MP_OKAY) {
1999	#ifdef WOLFSSL_SMALL_STACK
2000	XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
2001	#endif
2002
2003	return err;
2004	}
2005
2006	/* now init the second half of the array */
2007	for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
2008	if ((err = mp_init_size(&M[x], P->alloc)) != MP_OKAY) {
2009	for (y = 1<<(winsize-1); y < x; y++) {
2010	mp_clear (&M[y]);
2011	}
2012	mp_clear(&M[1]);
2013
2014	#ifdef WOLFSSL_SMALL_STACK
2015	XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
2016	#endif
2017
2018	return err;
2019	}
2020	}
2021
2022	/* determine and setup reduction code */
2023	if (redmode == 0) {
2024	#ifdef BN_MP_MONTGOMERY_SETUP_C
2025	/* now setup montgomery */
2026	if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) {
2027	goto LBL_M;
2028	}
2029	#else
2030	err = MP_VAL;
2031	goto LBL_M;
2032	#endif
2033
2034	/* automatically pick the comba one if available (saves quite a few
2035	calls/ifs) */
2036	#ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C
2037	if (((P->used * 2 + 1) < (int)MP_WARRAY) &&
2038	P->used < (1L << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
2039	redux = fast_mp_montgomery_reduce;
2040	} else
2041	#endif
2042	{
2043	#ifdef BN_MP_MONTGOMERY_REDUCE_C
2044	/* use slower baseline Montgomery method */
2045	redux = mp_montgomery_reduce;
2046	#endif
2047	}
2048	} else if (redmode == 1) {
2049	#if defined(BN_MP_DR_SETUP_C) && defined(BN_MP_DR_REDUCE_C)
2050	/* setup DR reduction for moduli of the form B*k - b /
2051	mp_dr_setup(P, &mp);
2052	redux = mp_dr_reduce;
2053	#endif
2054	} else {
2055	#if defined(BN_MP_REDUCE_2K_SETUP_C) && defined(BN_MP_REDUCE_2K_C)
2056	/* setup DR reduction for moduli of the form 2*k - b /
2057	if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) {
2058	goto LBL_M;
2059	}
2060	redux = mp_reduce_2k;
2061	#endif
2062	}
2063
2064	if (redux == NULL) {
2065	err = MP_VAL;
2066	goto LBL_M;
2067	}
2068
2069	/* setup result */
2070	if ((err = mp_init_size (&res, P->alloc)) != MP_OKAY) {
2071	goto LBL_M;
2072	}
2073
2074	/* create M table
2075	*
2076
2077	*
2078	* The first half of the table is not computed though accept for M[0] and M[1]
2079	*/
2080
2081	if (redmode == 0) {
2082	#ifdef BN_MP_MONTGOMERY_CALC_NORMALIZATION_C
2083	/* now we need R mod m */
2084	if ((err = mp_montgomery_calc_normalization (&res, P)) != MP_OKAY) {
2085	goto LBL_RES;
2086	}
2087
2088	/* now set M[1] to G * R mod m */
2089	if ((err = mp_mulmod (G, &res, P, &M[1])) != MP_OKAY) {
2090	goto LBL_RES;
2091	}
2092	#else
2093	err = MP_VAL;
2094	goto LBL_RES;
2095	#endif
2096	} else {
2097	if ((err = mp_set(&res, 1)) != MP_OKAY) {
2098	goto LBL_RES;
2099	}
2100	if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) {
2101	goto LBL_RES;
2102	}
2103	}
2104
2105	/* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times*/
2106	if ((err = mp_copy (&M[1], &M[(mp_digit)(1 << (winsize - 1))])) != MP_OKAY) {
2107	goto LBL_RES;
2108	}
2109
2110	for (x = 0; x < (winsize - 1); x++) {
2111	if ((err = mp_sqr (&M[(mp_digit)(1 << (winsize - 1))],
2112	&M[(mp_digit)(1 << (winsize - 1))])) != MP_OKAY) {
2113	goto LBL_RES;
2114	}
2115	if ((err = redux (&M[(mp_digit)(1 << (winsize - 1))], P, mp)) != MP_OKAY) {
2116	goto LBL_RES;
2117	}
2118	}
2119
2120	/* create upper table */
2121	for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
2122	if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
2123	goto LBL_RES;
2124	}
2125	if ((err = redux (&M[x], P, mp)) != MP_OKAY) {
2126	goto LBL_RES;
2127	}
2128	}
2129
2130	/* set initial mode and bit cnt */
2131	mode = 0;
2132	bitcnt = 1;
2133	buf = 0;
2134	digidx = X->used - 1;
2135	bitcpy = 0;
2136	bitbuf = 0;
2137
2138	for (;;) {
2139	/* grab next digit as required */
2140	if (--bitcnt == 0) {
2141	/* if digidx == -1 we are out of digits so break */
2142	if (digidx == -1) {
2143	break;
2144	}
2145	/* read next digit and reset bitcnt */
2146	buf = X->dp[digidx--];
2147	bitcnt = (int)DIGIT_BIT;
2148	}
2149
2150	/* grab the next msb from the exponent */
2151	y = (int)(buf >> (DIGIT_BIT - 1)) & 1;
2152	buf <<= (mp_digit)1;
2153
2154	/* if the bit is zero and mode == 0 then we ignore it
2155	* These represent the leading zero bits before the first 1 bit
2156	* in the exponent. Technically this opt is not required but it
2157	* does lower the # of trivial squaring/reductions used
2158	*/
2159	if (mode == 0 && y == 0) {
2160	continue;
2161	}
2162
2163	/* if the bit is zero and mode == 1 then we square */
2164	if (mode == 1 && y == 0) {
2165	if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
2166	goto LBL_RES;
2167	}
2168	if ((err = redux (&res, P, mp)) != MP_OKAY) {
2169	goto LBL_RES;
2170	}
2171	continue;
2172	}
2173
2174	/* else we add it to the window */
2175	bitbuf \|= (y << (winsize - ++bitcpy));
2176	mode = 2;
2177
2178	if (bitcpy == winsize) {
2179	/* ok window is filled so square as required and multiply */
2180	/* square first */
2181	for (x = 0; x < winsize; x++) {
2182	if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
2183	goto LBL_RES;
2184	}
2185	if ((err = redux (&res, P, mp)) != MP_OKAY) {
2186	goto LBL_RES;
2187	}
2188	}
2189
2190	/* then multiply */
2191	if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
2192	goto LBL_RES;
2193	}
2194	if ((err = redux (&res, P, mp)) != MP_OKAY) {
2195	goto LBL_RES;
2196	}
2197
2198	/* empty window and reset */
2199	bitcpy = 0;
2200	bitbuf = 0;
2201	mode = 1;
2202	}
2203	}
2204
2205	/* if bits remain then square/multiply */
2206	if (mode == 2 && bitcpy > 0) {
2207	/* square then multiply if the bit is set */
2208	for (x = 0; x < bitcpy; x++) {
2209	if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
2210	goto LBL_RES;
2211	}
2212	if ((err = redux (&res, P, mp)) != MP_OKAY) {
2213	goto LBL_RES;
2214	}
2215
2216	/* get next bit of the window */
2217	bitbuf <<= 1;
2218	if ((bitbuf & (1 << winsize)) != 0) {
2219	/* then multiply */
2220	if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
2221	goto LBL_RES;
2222	}
2223	if ((err = redux (&res, P, mp)) != MP_OKAY) {
2224	goto LBL_RES;
2225	}
2226	}
2227	}
2228	}
2229
2230	if (redmode == 0) {
2231	/* fixup result if Montgomery reduction is used
2232	* recall that any value in a Montgomery system is
2233	* actually multiplied by R mod n. So we have
2234	* to reduce one more time to cancel out the factor
2235	* of R.
2236	*/
2237	if ((err = redux(&res, P, mp)) != MP_OKAY) {
2238	goto LBL_RES;
2239	}
2240	}
2241
2242	/* swap res with Y */
2243	mp_exch (&res, Y);
2244	err = MP_OKAY;
2245	LBL_RES:mp_clear (&res);
2246	LBL_M:
2247	mp_clear(&M[1]);
2248	for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
2249	mp_clear (&M[x]);
2250	}
2251
2252	#ifdef WOLFSSL_SMALL_STACK
2253	XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
2254	#endif
2255
2256	return err;
2257	}
2258
2259	#ifdef BN_MP_EXPTMOD_BASE_2
2260	#if DIGIT_BIT < 16
2261	#define WINSIZE 3
2262	#elif DIGIT_BIT < 32
2263	#define WINSIZE 4
2264	#elif DIGIT_BIT < 64
2265	#define WINSIZE 5
2266	#elif DIGIT_BIT < 128
2267	#define WINSIZE 6
2268	#endif
2269	int mp_exptmod_base_2(mp_int * X, mp_int * P, mp_int * Y)
2270	{
2271	mp_digit buf, mp;
2272	int err = MP_OKAY, bitbuf, bitcpy, bitcnt, digidx, x, y;
2273	#ifdef WOLFSSL_SMALL_STACK
2274	mp_int *res = NULL;
2275	#else
2276	mp_int res[1];
2277	#endif
2278	int (redux)(mp_int,mp_int*,mp_digit) = NULL;
2279
2280	/* automatically pick the comba one if available (saves quite a few
2281	calls/ifs) */
2282	#ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C
2283	if (((P->used * 2 + 1) < (int)MP_WARRAY) &&
2284	P->used < (1L << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
2285	redux = fast_mp_montgomery_reduce;
2286	} else
2287	#endif
2288	#ifdef BN_MP_MONTGOMERY_REDUCE_C
2289	{
2290	/* use slower baseline Montgomery method */
2291	redux = mp_montgomery_reduce;
2292	}
2293	#endif
2294
2295	if (redux == NULL) {
2296	return MP_VAL;
2297	}
2298
2299	#ifdef WOLFSSL_SMALL_STACK
2300	res = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_TMP_BUFFER);
2301	if (res == NULL) {
2302	return MP_MEM;
2303	}
2304	#endif
2305
2306	/* now setup montgomery */
2307	if ((err = mp_montgomery_setup(P, &mp)) != MP_OKAY) {
2308	goto LBL_M;
2309	}
2310
2311	/* setup result */
2312	if ((err = mp_init(res)) != MP_OKAY) {
2313	goto LBL_M;
2314	}
2315
2316	/* now we need R mod m */
2317	if ((err = mp_montgomery_calc_normalization(res, P)) != MP_OKAY) {
2318	goto LBL_RES;
2319	}
2320
2321	/* Get the top bits left over after taking WINSIZE bits starting at the
2322	* least-significant.
2323	*/
2324	digidx = X->used - 1;
2325	bitcpy = (X->used * DIGIT_BIT) % WINSIZE;
2326	if (bitcpy > 0) {
2327	bitcnt = (int)DIGIT_BIT - bitcpy;
2328	buf = X->dp[digidx--];
2329	bitbuf = (int)(buf >> bitcnt);
2330	/* Multiply montgomery representation of 1 by 2 ^ top */
2331	err = mp_mul_2d(res, bitbuf, res);
2332	if (err != MP_OKAY) {
2333	goto LBL_RES;
2334	}
2335	err = mp_mod(res, P, res);
2336	if (err != MP_OKAY) {
2337	goto LBL_RES;
2338	}
2339	/* Move out bits used */
2340	buf <<= bitcpy;
2341	bitcnt++;
2342	}
2343	else {
2344	bitcnt = 1;
2345	buf = 0;
2346	}
2347
2348	/* empty window and reset */
2349	bitbuf = 0;
2350	bitcpy = 0;
2351
2352	for (;;) {
2353	/* grab next digit as required */
2354	if (--bitcnt == 0) {
2355	/* if digidx == -1 we are out of digits so break */
2356	if (digidx == -1) {
2357	break;
2358	}
2359	/* read next digit and reset bitcnt */
2360	buf = X->dp[digidx--];
2361	bitcnt = (int)DIGIT_BIT;
2362	}
2363
2364	/* grab the next msb from the exponent */
2365	y = (int)(buf >> (DIGIT_BIT - 1)) & 1;
2366	buf <<= (mp_digit)1;
2367	/* add bit to the window */
2368	bitbuf \|= (y << (WINSIZE - ++bitcpy));
2369
2370	if (bitcpy == WINSIZE) {
2371	/* ok window is filled so square as required and multiply */
2372	/* square first */
2373	for (x = 0; x < WINSIZE; x++) {
2374	err = mp_sqr(res, res);
2375	if (err != MP_OKAY) {
2376	goto LBL_RES;
2377	}
2378	err = (*redux)(res, P, mp);
2379	if (err != MP_OKAY) {
2380	goto LBL_RES;
2381	}
2382	}
2383
2384	/* then multiply by 2^bitbuf */
2385	err = mp_mul_2d(res, bitbuf, res);
2386	if (err != MP_OKAY) {
2387	goto LBL_RES;
2388	}
2389	err = mp_mod(res, P, res);
2390	if (err != MP_OKAY) {
2391	goto LBL_RES;
2392	}
2393
2394	/* empty window and reset */
2395	bitcpy = 0;
2396	bitbuf = 0;
2397	}
2398	}
2399
2400	/* fixup result if Montgomery reduction is used
2401	* recall that any value in a Montgomery system is
2402	* actually multiplied by R mod n. So we have
2403	* to reduce one more time to cancel out the factor
2404	* of R.
2405	*/
2406	err = (*redux)(res, P, mp);
2407	if (err != MP_OKAY) {
2408	goto LBL_RES;
2409	}
2410
2411	/* swap res with Y */
2412	mp_copy(res, Y);
2413
2414	LBL_RES:mp_clear (res);
2415	LBL_M:
2416	#ifdef WOLFSSL_SMALL_STACK
2417	XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
2418	#endif
2419	return err;
2420	}
2421
2422	#undef WINSIZE
2423	#endif /* BN_MP_EXPTMOD_BASE_2 */
2424
2425
2426	/* setups the montgomery reduction stuff */
2427	int mp_montgomery_setup (mp_int * n, mp_digit * rho)
2428	{
2429	mp_digit x, b;
2430
2431	/* fast inversion mod 2**k
2432	*
2433	* Based on the fact that
2434	*
2435	* XA = 1 (mod 2n) => (X(2-XA)) A = 1 (mod 22n)
2436	* => 2XA - XXA*A = 1
2437	* => 2*(1) - (1) = 1
2438	*/
2439	b = n->dp[0];
2440
2441	if ((b & 1) == 0) {
2442	return MP_VAL;
2443	}
2444
2445	x = (((b + 2) & 4) << 1) + b; /* here xa==1 mod 24 /
2446	x = 2 - b x; /* here xa==1 mod 28 /
2447	#if !defined(MP_8BIT)
2448	x = 2 - b x; /* here xa==1 mod 216 /
2449	#endif
2450	#if defined(MP_64BIT) \|\| !(defined(MP_8BIT) \|\| defined(MP_16BIT))
2451	x = 2 - b x; /* here xa==1 mod 232 /
2452	#endif
2453	#ifdef MP_64BIT
2454	x = 2 - b x; /* here xa==1 mod 264 /
2455	#endif
2456
2457	/* rho = -1/m mod b */
2458	/* TAO, switched mp_word casts to mp_digit to shut up compiler */
2459	*rho = (mp_digit)((((mp_digit)1 << ((mp_digit) DIGIT_BIT)) - x) & MP_MASK);
2460
2461	return MP_OKAY;
2462	}
2463
2464
2465	/* computes xR**-1 == x (mod N) via Montgomery Reduction
2466	*
2467	* This is an optimized implementation of montgomery_reduce
2468	* which uses the comba method to quickly calculate the columns of the
2469	* reduction.
2470	*
2471	* Based on Algorithm 14.32 on pp.601 of HAC.
2472	*/
2473	int fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
2474	{
2475	int ix, res, olduse;
2476	#ifdef WOLFSSL_SMALL_STACK
2477	mp_word* W; /* uses dynamic memory and slower */
2478	#else
2479	mp_word W[MP_WARRAY];
2480	#endif
2481
2482	/* get old used count */
2483	olduse = x->used;
2484
2485	/* grow a as required */
2486	if (x->alloc < n->used + 1) {
2487	if ((res = mp_grow (x, n->used + 1)) != MP_OKAY) {
2488	return res;
2489	}
2490	}
2491
2492	#ifdef WOLFSSL_SMALL_STACK
2493	W = (mp_word)XMALLOC(sizeof(mp_word) MP_WARRAY, NULL, DYNAMIC_TYPE_BIGINT);
2494	if (W == NULL)
2495	return MP_MEM;
2496	#endif
2497
2498	XMEMSET(W, 0, (n->used * 2 + 1) * sizeof(mp_word));
2499
2500	/* first we have to get the digits of the input into
2501	* an array of double precision words W[...]
2502	*/
2503	{
2504	mp_word *_W;
2505	mp_digit *tmpx;
2506
2507	/* alias for the W[] array */
2508	_W = W;
2509
2510	/* alias for the digits of x*/
2511	tmpx = x->dp;
2512
2513	/* copy the digits of a into W[0..a->used-1] */
2514	for (ix = 0; ix < x->used; ix++) {
2515	_W++ = tmpx++;
2516	}
2517	}
2518
2519	/* now we proceed to zero successive digits
2520	* from the least significant upwards
2521	*/
2522	for (ix = 0; ix < n->used; ix++) {
2523	/* mu = ai * m' mod b
2524	*
2525	* We avoid a double precision multiplication (which isn't required)
2526	* by casting the value down to a mp_digit. Note this requires
2527	* that W[ix-1] have the carry cleared (see after the inner loop)
2528	*/
2529	mp_digit mu;
2530	mu = (mp_digit) (((W[ix] & MP_MASK) * rho) & MP_MASK);
2531
2532	/* a = a + mu * m * b**i
2533	*
2534	* This is computed in place and on the fly. The multiplication
2535	* by b**i is handled by offsetting which columns the results
2536	* are added to.
2537	*
2538	* Note the comba method normally doesn't handle carries in the
2539	* inner loop In this case we fix the carry from the previous
2540	* column since the Montgomery reduction requires digits of the
2541	* result (so far) [see above] to work. This is
2542	* handled by fixing up one carry after the inner loop. The
2543	* carry fixups are done in order so after these loops the
2544	* first m->used words of W[] have the carries fixed
2545	*/
2546	{
2547	int iy;
2548	mp_digit *tmpn;
2549	mp_word *_W;
2550
2551	/* alias for the digits of the modulus */
2552	tmpn = n->dp;
2553
2554	/* Alias for the columns set by an offset of ix */
2555	_W = W + ix;
2556
2557	/* inner loop */
2558	for (iy = 0; iy < n->used; iy++) {
2559	_W++ += ((mp_word)mu) ((mp_word)*tmpn++);
2560	}
2561	}
2562
2563	/* now fix carry for next digit, W[ix+1] */
2564	W[ix + 1] += W[ix] >> ((mp_word) DIGIT_BIT);
2565	}
2566
2567	/* now we have to propagate the carries and
2568	* shift the words downward [all those least
2569	* significant digits we zeroed].
2570	*/
2571	{
2572	mp_digit *tmpx;
2573	mp_word _W, _W1;
2574
2575	/* nox fix rest of carries */
2576
2577	/* alias for current word */
2578	_W1 = W + ix;
2579
2580	/* alias for next word, where the carry goes */
2581	_W = W + ++ix;
2582
2583	for (; ix <= n->used * 2 + 1; ix++) {
2584	_W++ += _W1++ >> ((mp_word) DIGIT_BIT);
2585	}
2586
2587	/* copy out, A = A/b**n
2588	*
2589	* The result is A/b**n but instead of converting from an
2590	* array of mp_word to mp_digit than calling mp_rshd
2591	* we just copy them in the right order
2592	*/
2593
2594	/* alias for destination word */
2595	tmpx = x->dp;
2596
2597	/* alias for shifted double precision result */
2598	_W = W + n->used;
2599
2600	for (ix = 0; ix < n->used + 1; ix++) {
2601	tmpx++ = (mp_digit)(_W++ & ((mp_word) MP_MASK));
2602	}
2603
2604	/* zero olduse digits, if the input a was larger than
2605	* m->used+1 we'll have to clear the digits
2606	*/
2607	for (; ix < olduse; ix++) {
2608	*tmpx++ = 0;
2609	}
2610	}
2611
2612	/* set the max used and clamp */
2613	x->used = n->used + 1;
2614	mp_clamp (x);
2615
2616	#ifdef WOLFSSL_SMALL_STACK
2617	XFREE(W, NULL, DYNAMIC_TYPE_BIGINT);
2618	#endif
2619
2620	/* if A >= m then A = A - m */
2621	if (mp_cmp_mag (x, n) != MP_LT) {
2622	return s_mp_sub (x, n, x);
2623	}
2624	return MP_OKAY;
2625	}
2626
2627
2628	/* computes xR*-1 == x (mod N) via Montgomery Reduction /
2629	int mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
2630	{
2631	int ix, res, digs;
2632	mp_digit mu;
2633
2634	/* can the fast reduction [comba] method be used?
2635	*
2636	* Note that unlike in mul you're safely allowed less
2637	* than the available columns [255 per default] since carries
2638	* are fixed up in the inner loop.
2639	*/
2640	digs = n->used * 2 + 1;
2641	if ((digs < (int)MP_WARRAY) &&
2642	n->used <
2643	(1L << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
2644	return fast_mp_montgomery_reduce (x, n, rho);
2645	}
2646
2647	/* grow the input as required */
2648	if (x->alloc < digs) {
2649	if ((res = mp_grow (x, digs)) != MP_OKAY) {
2650	return res;
2651	}
2652	}
2653	x->used = digs;
2654
2655	for (ix = 0; ix < n->used; ix++) {
2656	/* mu = ai * rho mod b
2657	*
2658	* The value of rho must be precalculated via
2659	* montgomery_setup() such that
2660	* it equals -1/n0 mod b this allows the
2661	* following inner loop to reduce the
2662	* input one digit at a time
2663	*/
2664	mu = (mp_digit) (((mp_word)x->dp[ix]) * ((mp_word)rho) & MP_MASK);
2665
2666	/* a = a + mu * m * b*i /
2667	{
2668	int iy;
2669	mp_digit tmpn, tmpx, u;
2670	mp_word r;
2671
2672	/* alias for digits of the modulus */
2673	tmpn = n->dp;
2674
2675	/* alias for the digits of x [the input] */
2676	tmpx = x->dp + ix;
2677
2678	/* set the carry to zero */
2679	u = 0;
2680
2681	/* Multiply and add in place */
2682	for (iy = 0; iy < n->used; iy++) {
2683	/* compute product and sum */
2684	r = ((mp_word)mu) * ((mp_word)*tmpn++) +
2685	((mp_word) u) + ((mp_word) * tmpx);
2686
2687	/* get carry */
2688	u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
2689
2690	/* fix digit */
2691	*tmpx++ = (mp_digit)(r & ((mp_word) MP_MASK));
2692	}
2693	/* At this point the ix'th digit of x should be zero */
2694
2695
2696	/* propagate carries upwards as required*/
2697	while (u) {
2698	*tmpx += u;
2699	u = *tmpx >> DIGIT_BIT;
2700	*tmpx++ &= MP_MASK;
2701	}
2702	}
2703	}
2704
2705	/* at this point the n.used'th least
2706	* significant digits of x are all zero
2707	* which means we can shift x to the
2708	* right by n.used digits and the
2709	* residue is unchanged.
2710	*/
2711
2712	/* x = x/b*n.used /
2713	mp_clamp(x);
2714	mp_rshd (x, n->used);
2715
2716	/* if x >= n then x = x - n */
2717	if (mp_cmp_mag (x, n) != MP_LT) {
2718	return s_mp_sub (x, n, x);
2719	}
2720
2721	return MP_OKAY;
2722	}
2723
2724
2725	/* determines the setup value */
2726	void mp_dr_setup(mp_int a, mp_digit d)
2727	{
2728	/* the casts are required if DIGIT_BIT is one less than
2729	* the number of bits in a mp_digit [e.g. DIGIT_BIT==31]
2730	*/
2731	*d = (mp_digit)((((mp_word)1) << ((mp_word)DIGIT_BIT)) -
2732	((mp_word)a->dp[0]));
2733	}
2734
2735
2736	/* reduce "x" in place modulo "n" using the Diminished Radix algorithm.
2737	*
2738	* Based on algorithm from the paper
2739	*
2740	* "Generating Efficient Primes for Discrete Log Cryptosystems"
2741	* Chae Hoon Lim, Pil Joong Lee,
2742	* POSTECH Information Research Laboratories
2743	*
2744	* The modulus must be of a special format [see manual]
2745	*
2746	* Has been modified to use algorithm 7.10 from the LTM book instead
2747	*
2748	* Input x must be in the range 0 <= x <= (n-1)**2
2749	*/
2750	int mp_dr_reduce (mp_int * x, mp_int * n, mp_digit k)
2751	{
2752	int err, i, m;
2753	mp_word r;
2754	mp_digit mu, tmpx1, tmpx2;
2755
2756	/* m = digits in modulus */
2757	m = n->used;
2758
2759	/* ensure that "x" has at least 2m digits */
2760	if (x->alloc < m + m) {
2761	if ((err = mp_grow (x, m + m)) != MP_OKAY) {
2762	return err;
2763	}
2764	}
2765
2766	/* top of loop, this is where the code resumes if
2767	* another reduction pass is required.
2768	*/
2769	top:
2770	/* aliases for digits */
2771	/* alias for lower half of x */
2772	tmpx1 = x->dp;
2773
2774	/* alias for upper half of x, or x/B*m /
2775	tmpx2 = x->dp + m;
2776
2777	/* set carry to zero */
2778	mu = 0;
2779
2780	/* compute (x mod B*m) + k [x/B*m] inline and inplace /
2781	for (i = 0; i < m; i++) {
2782	r = ((mp_word)tmpx2++) ((mp_word)k) + *tmpx1 + mu;
2783	*tmpx1++ = (mp_digit)(r & MP_MASK);
2784	mu = (mp_digit)(r >> ((mp_word)DIGIT_BIT));
2785	}
2786
2787	/* set final carry */
2788	*tmpx1++ = mu;
2789
2790	/* zero words above m */
2791	for (i = m + 1; i < x->used; i++) {
2792	*tmpx1++ = 0;
2793	}
2794
2795	/* clamp, sub and return */
2796	mp_clamp (x);
2797
2798	/* if x >= n then subtract and reduce again
2799	* Each successive "recursion" makes the input smaller and smaller.
2800	*/
2801	if (mp_cmp_mag (x, n) != MP_LT) {
2802	if ((err = s_mp_sub(x, n, x)) != MP_OKAY) {
2803	return err;
2804	}
2805	goto top;
2806	}
2807	return MP_OKAY;
2808	}
2809
2810
2811	/* reduces a modulo n where n is of the form 2*p - d /
2812	int mp_reduce_2k(mp_int a, mp_int n, mp_digit d)
2813	{
2814	mp_int q;
2815	int p, res;
2816
2817	if ((res = mp_init(&q)) != MP_OKAY) {
2818	return res;
2819	}
2820
2821	p = mp_count_bits(n);
2822	top:
2823	/* q = a/2p, a = a mod 2p */
2824	if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
2825	goto ERR;
2826	}
2827
2828	if (d != 1) {
2829	/* q = q * d */
2830	if ((res = mp_mul_d(&q, d, &q)) != MP_OKAY) {
2831	goto ERR;
2832	}
2833	}
2834
2835	/* a = a + q */
2836	if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
2837	goto ERR;
2838	}
2839
2840	if (mp_cmp_mag(a, n) != MP_LT) {
2841	if ((res = s_mp_sub(a, n, a)) != MP_OKAY) {
2842	goto ERR;
2843	}
2844	goto top;
2845	}
2846
2847	ERR:
2848	mp_clear(&q);
2849	return res;
2850	}
2851
2852
2853	/* determines the setup value */
2854	int mp_reduce_2k_setup(mp_int a, mp_digit d)
2855	{
2856	int res, p;
2857	mp_int tmp;
2858
2859	if ((res = mp_init(&tmp)) != MP_OKAY) {
2860	return res;
2861	}
2862
2863	p = mp_count_bits(a);
2864	if ((res = mp_2expt(&tmp, p)) != MP_OKAY) {
2865	mp_clear(&tmp);
2866	return res;
2867	}
2868
2869	if ((res = s_mp_sub(&tmp, a, &tmp)) != MP_OKAY) {
2870	mp_clear(&tmp);
2871	return res;
2872	}
2873
2874	*d = tmp.dp[0];
2875	mp_clear(&tmp);
2876	return MP_OKAY;
2877	}
2878
2879
2880	/* set the b bit of a */
2881	int mp_set_bit (mp_int * a, int b)
2882	{
2883	int i = b / DIGIT_BIT, res;
2884
2885	/*
2886	* Require:
2887	* bit index b >= 0
2888	* a->alloc == a->used == 0 if a->dp == NULL
2889	*/
2890	if (b < 0 \|\| (a->dp == NULL && (a->alloc != 0 \|\| a->used != 0)))
2891	return MP_VAL;
2892
2893	if (a->dp == NULL \|\| a->used < (int)(i + 1)) {
2894	/* grow a to accommodate the single bit */
2895	if ((res = mp_grow (a, i + 1)) != MP_OKAY) {
2896	return res;
2897	}
2898
2899	/* set the used count of where the bit will go */
2900	a->used = (int)(i + 1);
2901	}
2902
2903	/* put the single bit in its place */
2904	a->dp[i] \|= ((mp_digit)1) << (b % DIGIT_BIT);
2905
2906	return MP_OKAY;
2907	}
2908
2909	/* computes a = 2**b
2910	*
2911	* Simple algorithm which zeros the int, set the required bit
2912	*/
2913	int mp_2expt (mp_int * a, int b)
2914	{
2915	/* zero a as per default */
2916	mp_zero (a);
2917
2918	return mp_set_bit(a, b);
2919	}
2920
2921	/* multiply by a digit */
2922	int mp_mul_d (mp_int * a, mp_digit b, mp_int * c)
2923	{
2924	mp_digit u, tmpa, tmpc;
2925	mp_word r;
2926	int ix, res, olduse;
2927
2928	/* make sure c is big enough to hold ab /
2929	if (c->alloc < a->used + 1) {
2930	if ((res = mp_grow (c, a->used + 1)) != MP_OKAY) {
2931	return res;
2932	}
2933	}
2934
2935	/* get the original destinations used count */
2936	olduse = c->used;
2937
2938	/* set the sign */
2939	c->sign = a->sign;
2940
2941	/* alias for a->dp [source] */
2942	tmpa = a->dp;
2943
2944	/* alias for c->dp [dest] */
2945	tmpc = c->dp;
2946
2947	/* zero carry */
2948	u = 0;
2949
2950	/* compute columns */
2951	for (ix = 0; ix < a->used; ix++) {
2952	/* compute product and carry sum for this term */
2953	r = ((mp_word) u) + ((mp_word)tmpa++) ((mp_word)b);
2954
2955	/* mask off higher bits to get a single digit */
2956	*tmpc++ = (mp_digit) (r & ((mp_word) MP_MASK));
2957
2958	/* send carry into next iteration */
2959	u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
2960	}
2961
2962	/* store final carry [if any] and increment ix offset */
2963	*tmpc++ = u;
2964	++ix;
2965
2966	/* now zero digits above the top */
2967	while (ix++ < olduse) {
2968	*tmpc++ = 0;
2969	}
2970
2971	/* set used count */
2972	c->used = a->used + 1;
2973	mp_clamp(c);
2974
2975	return MP_OKAY;
2976	}
2977
2978
2979	/* d = a * b (mod c) */
2980	#if defined(FREESCALE_LTC_TFM)
2981	int wolfcrypt_mp_mulmod(mp_int a, mp_int b, mp_int c, mp_int d)
2982	#else
2983	int mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
2984	#endif
2985	{
2986	int res;
2987	mp_int t;
2988
2989	if ((res = mp_init_size (&t, c->used)) != MP_OKAY) {
2990	return res;
2991	}
2992
2993	res = mp_mul (a, b, &t);
2994	if (res == MP_OKAY) {
2995	res = mp_mod (&t, c, d);
2996	}
2997
2998	mp_clear (&t);
2999	return res;
3000	}
3001
3002
3003	/* d = a - b (mod c) */
3004	int mp_submod(mp_int* a, mp_int* b, mp_int* c, mp_int* d)
3005	{
3006	int res;
3007	mp_int t;
3008
3009	if ((res = mp_init (&t)) != MP_OKAY) {
3010	return res;
3011	}
3012
3013	res = mp_sub (a, b, &t);
3014	if (res == MP_OKAY) {
3015	res = mp_mod (&t, c, d);
3016	}
3017
3018	mp_clear (&t);
3019
3020	return res;
3021	}
3022
3023	/* d = a + b (mod c) */
3024	int mp_addmod(mp_int* a, mp_int* b, mp_int* c, mp_int* d)
3025	{
3026	int res;
3027	mp_int t;
3028
3029	if ((res = mp_init (&t)) != MP_OKAY) {
3030	return res;
3031	}
3032
3033	res = mp_add (a, b, &t);
3034	if (res == MP_OKAY) {
3035	res = mp_mod (&t, c, d);
3036	}
3037
3038	mp_clear (&t);
3039
3040	return res;
3041	}
3042
3043	/* d = a - b (mod c) - a < c and b < c and positive */
3044	int mp_submod_ct(mp_int* a, mp_int* b, mp_int* c, mp_int* d)
3045	{
3046	int res;
3047
3048	res = mp_sub(a, b, d);
3049	if (res == MP_OKAY && mp_isneg(d)) {
3050	res = mp_add(d, c, d);
3051	}
3052
3053	return res;
3054	}
3055
3056	/* d = a + b (mod c) - a < c and b < c and positive */
3057	int mp_addmod_ct(mp_int* a, mp_int* b, mp_int* c, mp_int* d)
3058	{
3059	int res;
3060
3061	res = mp_add(a, b, d);
3062	if (res == MP_OKAY && mp_cmp(d, c) != MP_LT) {
3063	res = mp_sub(d, c, d);
3064	}
3065
3066	return res;
3067	}
3068
3069	/* computes b = aa /
3070	int mp_sqr (mp_int * a, mp_int * b)
3071	{
3072	int res;
3073
3074	{
3075	#ifdef BN_FAST_S_MP_SQR_C
3076	/* can we use the fast comba multiplier? */
3077	if ((a->used * 2 + 1) < (int)MP_WARRAY &&
3078	a->used <
3079	(1 << (sizeof(mp_word) * CHAR_BIT - 2*DIGIT_BIT - 1))) {
3080	res = fast_s_mp_sqr (a, b);
3081	} else
3082	#endif
3083	#ifdef BN_S_MP_SQR_C
3084	res = s_mp_sqr (a, b);
3085	#else
3086	res = MP_VAL;
3087	#endif
3088	}
3089	b->sign = MP_ZPOS;
3090	return res;
3091	}
3092
3093
3094	/* high level multiplication (handles sign) */
3095	#if defined(FREESCALE_LTC_TFM)
3096	int wolfcrypt_mp_mul(mp_int a, mp_int b, mp_int *c)
3097	#else
3098	int mp_mul (mp_int * a, mp_int * b, mp_int * c)
3099	#endif
3100	{
3101	int res, neg;
3102	neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
3103
3104	{
3105	#ifdef BN_FAST_S_MP_MUL_DIGS_C
3106	/* can we use the fast multiplier?
3107	*
3108	* The fast multiplier can be used if the output will
3109	* have less than MP_WARRAY digits and the number of
3110	* digits won't affect carry propagation
3111	*/
3112	int digs = a->used + b->used + 1;
3113
3114	if ((digs < (int)MP_WARRAY) &&
3115	MIN(a->used, b->used) <=
3116	(1L << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
3117	res = fast_s_mp_mul_digs (a, b, c, digs);
3118	} else
3119	#endif
3120	#ifdef BN_S_MP_MUL_DIGS_C
3121	res = s_mp_mul (a, b, c); /* uses s_mp_mul_digs */
3122	#else
3123	res = MP_VAL;
3124	#endif
3125
3126	}
3127	c->sign = (c->used > 0) ? neg : MP_ZPOS;
3128	return res;
3129	}
3130
3131
3132	/* b = a2 /
3133	int mp_mul_2(mp_int * a, mp_int * b)
3134	{
3135	int x, res, oldused;
3136
3137	/* grow to accommodate result */
3138	if (b->alloc < a->used + 1) {
3139	if ((res = mp_grow (b, a->used + 1)) != MP_OKAY) {
3140	return res;
3141	}
3142	}
3143
3144	oldused = b->used;
3145	b->used = a->used;
3146
3147	{
3148	mp_digit r, rr, tmpa, tmpb;
3149
3150	/* alias for source */
3151	tmpa = a->dp;
3152
3153	/* alias for dest */
3154	tmpb = b->dp;
3155
3156	/* carry */
3157	r = 0;
3158	for (x = 0; x < a->used; x++) {
3159
3160	/* get what will be the next carry bit from the
3161	* MSB of the current digit
3162	*/
3163	rr = *tmpa >> ((mp_digit)(DIGIT_BIT - 1));
3164
3165	/* now shift up this digit, add in the carry [from the previous] */
3166	tmpb++ = (mp_digit)(((tmpa++ << ((mp_digit)1)) \| r) & MP_MASK);
3167
3168	/* copy the carry that would be from the source
3169	* digit into the next iteration
3170	*/
3171	r = rr;
3172	}
3173
3174	/* new leading digit? */
3175	if (r != 0) {
3176	/* add a MSB which is always 1 at this point */
3177	*tmpb = 1;
3178	++(b->used);
3179	}
3180
3181	/* now zero any excess digits on the destination
3182	* that we didn't write to
3183	*/
3184	tmpb = b->dp + b->used;
3185	for (x = b->used; x < oldused; x++) {
3186	*tmpb++ = 0;
3187	}
3188	}
3189	b->sign = a->sign;
3190	return MP_OKAY;
3191	}
3192
3193
3194	/* divide by three (based on routine from MPI and the GMP manual) */
3195	int mp_div_3 (mp_int * a, mp_int c, mp_digit d)
3196	{
3197	mp_int q;
3198	mp_word w, t;
3199	mp_digit b;
3200	int res, ix;
3201
3202	/* b = 2*DIGIT_BIT / 3 /
3203	b = (mp_digit) ( (((mp_word)1) << ((mp_word)DIGIT_BIT)) / ((mp_word)3) );
3204
3205	if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
3206	return res;
3207	}
3208
3209	q.used = a->used;
3210	q.sign = a->sign;
3211	w = 0;
3212	for (ix = a->used - 1; ix >= 0; ix--) {
3213	w = (w << ((mp_word)DIGIT_BIT)) \| ((mp_word)a->dp[ix]);
3214
3215	if (w >= 3) {
3216	/* multiply w by [1/3] */
3217	t = (w * ((mp_word)b)) >> ((mp_word)DIGIT_BIT);
3218
3219	/* now subtract 3 * [w/3] from w, to get the remainder */
3220	w -= t+t+t;
3221
3222	/* fixup the remainder as required since
3223	* the optimization is not exact.
3224	*/
3225	while (w >= 3) {
3226	t += 1;
3227	w -= 3;
3228	}
3229	} else {
3230	t = 0;
3231	}
3232	q.dp[ix] = (mp_digit)t;
3233	}
3234
3235	/* [optional] store the remainder */
3236	if (d != NULL) {
3237	*d = (mp_digit)w;
3238	}
3239
3240	/* [optional] store the quotient */
3241	if (c != NULL) {
3242	mp_clamp(&q);
3243	mp_exch(&q, c);
3244	}
3245	mp_clear(&q);
3246
3247	return res;
3248	}
3249
3250
3251	/* init an mp_init for a given size */
3252	int mp_init_size (mp_int * a, int size)
3253	{
3254	int x;
3255
3256	/* pad size so there are always extra digits */
3257	size += (MP_PREC * 2) - (size % MP_PREC);
3258
3259	/* alloc mem */
3260	a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * size, NULL,
3261	DYNAMIC_TYPE_BIGINT);
3262	if (a->dp == NULL) {
3263	return MP_MEM;
3264	}
3265
3266	/* set the members */
3267	a->used = 0;
3268	a->alloc = size;
3269	a->sign = MP_ZPOS;
3270	#ifdef HAVE_WOLF_BIGINT
3271	wc_bigint_init(&a->raw);
3272	#endif
3273
3274	/* zero the digits */
3275	for (x = 0; x < size; x++) {
3276	a->dp[x] = 0;
3277	}
3278
3279	return MP_OKAY;
3280	}
3281
3282
3283	/* the jist of squaring...
3284	* you do like mult except the offset of the tmpx [one that
3285	* starts closer to zero] can't equal the offset of tmpy.
3286	* So basically you set up iy like before then you min it with
3287	* (ty-tx) so that it never happens. You double all those
3288	* you add in the inner loop
3289
3290	After that loop you do the squares and add them in.
3291	*/
3292
3293	int fast_s_mp_sqr (mp_int * a, mp_int * b)
3294	{
3295	int olduse, res, pa, ix, iz;
3296	#ifdef WOLFSSL_SMALL_STACK
3297	mp_digit* W; /* uses dynamic memory and slower */
3298	#else
3299	mp_digit W[MP_WARRAY];
3300	#endif
3301	mp_digit *tmpx;
3302	mp_word W1;
3303
3304	/* grow the destination as required */
3305	pa = a->used + a->used;
3306	if (b->alloc < pa) {
3307	if ((res = mp_grow (b, pa)) != MP_OKAY) {
3308	return res;
3309	}
3310	}
3311
3312	if (pa > (int)MP_WARRAY)
3313	return MP_RANGE; /* TAO range check */
3314
3315	#ifdef WOLFSSL_SMALL_STACK
3316	W = (mp_digit)XMALLOC(sizeof(mp_digit) MP_WARRAY, NULL, DYNAMIC_TYPE_BIGINT);
3317	if (W == NULL)
3318	return MP_MEM;
3319	#endif
3320
3321	/* number of output digits to produce */
3322	W1 = 0;
3323	for (ix = 0; ix < pa; ix++) {
3324	int tx, ty, iy;
3325	mp_word _W;
3326	mp_digit *tmpy;
3327
3328	/* clear counter */
3329	_W = 0;
3330
3331	/* get offsets into the two bignums */
3332	ty = MIN(a->used-1, ix);
3333	tx = ix - ty;
3334
3335	/* setup temp aliases */
3336	tmpx = a->dp + tx;
3337	tmpy = a->dp + ty;
3338
3339	/* this is the number of times the loop will iterate, essentially
3340	while (tx++ < a->used && ty-- >= 0) { ... }
3341	*/
3342	iy = MIN(a->used-tx, ty+1);
3343
3344	/* now for squaring tx can never equal ty
3345	* we halve the distance since they approach at a rate of 2x
3346	* and we have to round because odd cases need to be executed
3347	*/
3348	iy = MIN(iy, (ty-tx+1)>>1);
3349
3350	/* execute loop */
3351	for (iz = 0; iz < iy; iz++) {
3352	_W += ((mp_word)tmpx++)((mp_word)*tmpy--);
3353	}
3354
3355	/* double the inner product and add carry */
3356	_W = _W + _W + W1;
3357
3358	/* even columns have the square term in them */
3359	if ((ix&1) == 0) {
3360	_W += ((mp_word)a->dp[ix>>1])*((mp_word)a->dp[ix>>1]);
3361	}
3362
3363	/* store it */
3364	W[ix] = (mp_digit)(_W & MP_MASK);
3365
3366	/* make next carry */
3367	W1 = _W >> ((mp_word)DIGIT_BIT);
3368	}
3369
3370	/* setup dest */
3371	olduse = b->used;
3372	b->used = a->used+a->used;
3373
3374	{
3375	mp_digit *tmpb;
3376	tmpb = b->dp;
3377	for (ix = 0; ix < pa; ix++) {
3378	*tmpb++ = (mp_digit)(W[ix] & MP_MASK);
3379	}
3380
3381	/* clear unused digits [that existed in the old copy of c] */
3382	for (; ix < olduse; ix++) {
3383	*tmpb++ = 0;
3384	}
3385	}
3386	mp_clamp (b);
3387
3388	#ifdef WOLFSSL_SMALL_STACK
3389	XFREE(W, NULL, DYNAMIC_TYPE_BIGINT);
3390	#endif
3391
3392	return MP_OKAY;
3393	}
3394
3395
3396	/* Fast (comba) multiplier
3397	*
3398	* This is the fast column-array [comba] multiplier. It is
3399	* designed to compute the columns of the product first
3400	* then handle the carries afterwards. This has the effect
3401	* of making the nested loops that compute the columns very
3402	* simple and schedulable on super-scalar processors.
3403	*
3404	* This has been modified to produce a variable number of
3405	* digits of output so if say only a half-product is required
3406	* you don't have to compute the upper half (a feature
3407	* required for fast Barrett reduction).
3408	*
3409	* Based on Algorithm 14.12 on pp.595 of HAC.
3410	*
3411	*/
3412	int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
3413	{
3414	int olduse, res, pa, ix, iz;
3415	#ifdef WOLFSSL_SMALL_STACK
3416	mp_digit* W; /* uses dynamic memory and slower */
3417	#else
3418	mp_digit W[MP_WARRAY];
3419	#endif
3420	mp_word _W;
3421
3422	/* grow the destination as required */
3423	if (c->alloc < digs) {
3424	if ((res = mp_grow (c, digs)) != MP_OKAY) {
3425	return res;
3426	}
3427	}
3428
3429	/* number of output digits to produce */
3430	pa = MIN(digs, a->used + b->used);
3431	if (pa > (int)MP_WARRAY)
3432	return MP_RANGE; /* TAO range check */
3433
3434	#ifdef WOLFSSL_SMALL_STACK
3435	W = (mp_digit)XMALLOC(sizeof(mp_digit) MP_WARRAY, NULL, DYNAMIC_TYPE_BIGINT);
3436	if (W == NULL)
3437	return MP_MEM;
3438	#endif
3439
3440	/* clear the carry */
3441	_W = 0;
3442	for (ix = 0; ix < pa; ix++) {
3443	int tx, ty;
3444	int iy;
3445	mp_digit tmpx, tmpy;
3446
3447	/* get offsets into the two bignums */
3448	ty = MIN(b->used-1, ix);
3449	tx = ix - ty;
3450
3451	/* setup temp aliases */
3452	tmpx = a->dp + tx;
3453	tmpy = b->dp + ty;
3454
3455	/* this is the number of times the loop will iterate, essentially
3456	while (tx++ < a->used && ty-- >= 0) { ... }
3457	*/
3458	iy = MIN(a->used-tx, ty+1);
3459
3460	/* execute loop */
3461	for (iz = 0; iz < iy; ++iz) {
3462	_W += ((mp_word)tmpx++)((mp_word)*tmpy--);
3463
3464	}
3465
3466	/* store term */
3467	W[ix] = (mp_digit)(((mp_digit)_W) & MP_MASK);
3468
3469	/* make next carry */
3470	_W = _W >> ((mp_word)DIGIT_BIT);
3471	}
3472
3473	/* setup dest */
3474	olduse = c->used;
3475	c->used = pa;
3476
3477	{
3478	mp_digit *tmpc;
3479	tmpc = c->dp;
3480	for (ix = 0; ix < pa; ix++) { /* JRB, +1 could read uninitialized data */
3481	/* now extract the previous digit [below the carry] */
3482	*tmpc++ = W[ix];
3483	}
3484
3485	/* clear unused digits [that existed in the old copy of c] */
3486	for (; ix < olduse; ix++) {
3487	*tmpc++ = 0;
3488	}
3489	}
3490	mp_clamp (c);
3491
3492	#ifdef WOLFSSL_SMALL_STACK
3493	XFREE(W, NULL, DYNAMIC_TYPE_BIGINT);
3494	#endif
3495
3496	return MP_OKAY;
3497	}
3498
3499
3500	/* low level squaring, b = aa, HAC pp.596-597, Algorithm 14.16 /
3501	int s_mp_sqr (mp_int * a, mp_int * b)
3502	{
3503	mp_int t;
3504	int res, ix, iy, pa;
3505	mp_word r;
3506	mp_digit u, tmpx, *tmpt;
3507
3508	pa = a->used;
3509	if ((res = mp_init_size (&t, 2*pa + 1)) != MP_OKAY) {
3510	return res;
3511	}
3512
3513	/* default used is maximum possible size */
3514	t.used = 2*pa + 1;
3515
3516	for (ix = 0; ix < pa; ix++) {
3517	/* first calculate the digit at 2ix /
3518	/* calculate double precision result */
3519	r = ((mp_word) t.dp[2*ix]) +
3520	((mp_word)a->dp[ix])*((mp_word)a->dp[ix]);
3521
3522	/* store lower part in result */
3523	t.dp[ix+ix] = (mp_digit) (r & ((mp_word) MP_MASK));
3524
3525	/* get the carry */
3526	u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
3527
3528	/* left hand side of A[ix] * A[iy] */
3529	tmpx = a->dp[ix];
3530
3531	/* alias for where to store the results */
3532	tmpt = t.dp + (2*ix + 1);
3533
3534	for (iy = ix + 1; iy < pa; iy++) {
3535	/* first calculate the product */
3536	r = ((mp_word)tmpx) * ((mp_word)a->dp[iy]);
3537
3538	/* now calculate the double precision result, note we use
3539	* addition instead of *2 since it's easier to optimize
3540	*/
3541	r = ((mp_word) *tmpt) + r + r + ((mp_word) u);
3542
3543	/* store lower part */
3544	*tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
3545
3546	/* get carry */
3547	u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
3548	}
3549	/* propagate upwards */
3550	while (u != ((mp_digit) 0)) {
3551	r = ((mp_word) *tmpt) + ((mp_word) u);
3552	*tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
3553	u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
3554	}
3555	}
3556
3557	mp_clamp (&t);
3558	mp_exch (&t, b);
3559	mp_clear (&t);
3560	return MP_OKAY;
3561	}
3562
3563
3564	/* multiplies \|a\| * \|b\| and only computes up to digs digits of result
3565	* HAC pp. 595, Algorithm 14.12 Modified so you can control how
3566	* many digits of output are created.
3567	*/
3568	int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
3569	{
3570	mp_int t;
3571	int res, pa, pb, ix, iy;
3572	mp_digit u;
3573	mp_word r;
3574	mp_digit tmpx, tmpt, tmpy;
3575
3576	/* can we use the fast multiplier? */
3577	if ((digs < (int)MP_WARRAY) &&
3578	MIN (a->used, b->used) <
3579	(1L << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
3580	return fast_s_mp_mul_digs (a, b, c, digs);
3581	}
3582
3583	if ((res = mp_init_size (&t, digs)) != MP_OKAY) {
3584	return res;
3585	}
3586	t.used = digs;
3587
3588	/* compute the digits of the product directly */
3589	pa = a->used;
3590	for (ix = 0; ix < pa; ix++) {
3591	/* set the carry to zero */
3592	u = 0;
3593
3594	/* limit ourselves to making digs digits of output */
3595	pb = MIN (b->used, digs - ix);
3596
3597	/* setup some aliases */
3598	/* copy of the digit from a used within the nested loop */
3599	tmpx = a->dp[ix];
3600
3601	/* an alias for the destination shifted ix places */
3602	tmpt = t.dp + ix;
3603
3604	/* an alias for the digits of b */
3605	tmpy = b->dp;
3606
3607	/* compute the columns of the output and propagate the carry */
3608	for (iy = 0; iy < pb; iy++) {
3609	/* compute the column as a mp_word */
3610	r = ((mp_word)*tmpt) +
3611	((mp_word)tmpx) * ((mp_word)*tmpy++) +
3612	((mp_word) u);
3613
3614	/* the new column is the lower part of the result */
3615	*tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
3616
3617	/* get the carry word from the result */
3618	u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
3619	}
3620	/* set carry if it is placed below digs */
3621	if (ix + iy < digs) {
3622	*tmpt = u;
3623	}
3624	}
3625
3626	mp_clamp (&t);
3627	mp_exch (&t, c);
3628
3629	mp_clear (&t);
3630	return MP_OKAY;
3631	}
3632
3633
3634	/*
3635	* shifts with subtractions when the result is greater than b.
3636	*
3637	* The method is slightly modified to shift B unconditionally up to just under
3638	* the leading bit of b. This saves a lot of multiple precision shifting.
3639	*/
3640	int mp_montgomery_calc_normalization (mp_int * a, mp_int * b)
3641	{
3642	int x, bits, res;
3643
3644	/* how many bits of last digit does b use */
3645	bits = mp_count_bits (b) % DIGIT_BIT;
3646
3647	if (b->used > 1) {
3648	if ((res = mp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1))
3649	!= MP_OKAY) {
3650	return res;
3651	}
3652	} else {
3653	if ((res = mp_set(a, 1)) != MP_OKAY) {
3654	return res;
3655	}
3656	bits = 1;
3657	}
3658
3659	/* now compute C = A * B mod b */
3660	for (x = bits - 1; x < (int)DIGIT_BIT; x++) {
3661	if ((res = mp_mul_2 (a, a)) != MP_OKAY) {
3662	return res;
3663	}
3664	if (mp_cmp_mag (a, b) != MP_LT) {
3665	if ((res = s_mp_sub (a, b, a)) != MP_OKAY) {
3666	return res;
3667	}
3668	}
3669	}
3670
3671	return MP_OKAY;
3672	}
3673
3674
3675	#ifdef MP_LOW_MEM
3676	#define TAB_SIZE 32
3677	#else
3678	#define TAB_SIZE 256
3679	#endif
3680
3681	int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
3682	{
3683	mp_int M[TAB_SIZE], res, mu;
3684	mp_digit buf;
3685	int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
3686	int (redux)(mp_int,mp_int,mp_int);
3687
3688	/* find window size */
3689	x = mp_count_bits (X);
3690	if (x <= 7) {
3691	winsize = 2;
3692	} else if (x <= 36) {
3693	winsize = 3;
3694	} else if (x <= 140) {
3695	winsize = 4;
3696	} else if (x <= 450) {
3697	winsize = 5;
3698	} else if (x <= 1303) {
3699	winsize = 6;
3700	} else if (x <= 3529) {
3701	winsize = 7;
3702	} else {
3703	winsize = 8;
3704	}
3705
3706	#ifdef MP_LOW_MEM
3707	if (winsize > 5) {
3708	winsize = 5;
3709	}
3710	#endif
3711
3712	/* init M array */
3713	/* init first cell */
3714	if ((err = mp_init(&M[1])) != MP_OKAY) {
3715	return err;
3716	}
3717
3718	/* now init the second half of the array */
3719	for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
3720	if ((err = mp_init(&M[x])) != MP_OKAY) {
3721	for (y = 1<<(winsize-1); y < x; y++) {
3722	mp_clear (&M[y]);
3723	}
3724	mp_clear(&M[1]);
3725	return err;
3726	}
3727	}
3728
3729	/* create mu, used for Barrett reduction */
3730	if ((err = mp_init (&mu)) != MP_OKAY) {
3731	goto LBL_M;
3732	}
3733
3734	if (redmode == 0) {
3735	if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) {
3736	goto LBL_MU;
3737	}
3738	redux = mp_reduce;
3739	} else {
3740	if ((err = mp_reduce_2k_setup_l (P, &mu)) != MP_OKAY) {
3741	goto LBL_MU;
3742	}
3743	redux = mp_reduce_2k_l;
3744	}
3745
3746	/* create M table
3747	*
3748	* The M table contains powers of the base,
3749	* e.g. M[x] = G**x mod P
3750	*
3751	* The first half of the table is not
3752	* computed though accept for M[0] and M[1]
3753	*/
3754	if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) {
3755	goto LBL_MU;
3756	}
3757
3758	/* compute the value at M[1<<(winsize-1)] by squaring
3759	* M[1] (winsize-1) times
3760	*/
3761	if ((err = mp_copy (&M[1], &M[(mp_digit)(1 << (winsize - 1))])) != MP_OKAY) {
3762	goto LBL_MU;
3763	}
3764
3765	for (x = 0; x < (winsize - 1); x++) {
3766	/* square it */
3767	if ((err = mp_sqr (&M[(mp_digit)(1 << (winsize - 1))],
3768	&M[(mp_digit)(1 << (winsize - 1))])) != MP_OKAY) {
3769	goto LBL_MU;
3770	}
3771
3772	/* reduce modulo P */
3773	if ((err = redux (&M[(mp_digit)(1 << (winsize - 1))], P, &mu)) != MP_OKAY) {
3774	goto LBL_MU;
3775	}
3776	}
3777
3778	/* create upper table, that is M[x] = M[x-1] * M[1] (mod P)
3779	* for x = (2(winsize - 1) + 1) to (2winsize - 1)
3780	*/
3781	for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
3782	if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
3783	goto LBL_MU;
3784	}
3785	if ((err = redux (&M[x], P, &mu)) != MP_OKAY) {
3786	goto LBL_MU;
3787	}
3788	}
3789
3790	/* setup result */
3791	if ((err = mp_init (&res)) != MP_OKAY) {
3792	goto LBL_MU;
3793	}
3794	if ((err = mp_set (&res, 1)) != MP_OKAY) {
3795	goto LBL_MU;
3796	}
3797
3798	/* set initial mode and bit cnt */
3799	mode = 0;
3800	bitcnt = 1;
3801	buf = 0;
3802	digidx = X->used - 1;
3803	bitcpy = 0;
3804	bitbuf = 0;
3805
3806	for (;;) {
3807	/* grab next digit as required */
3808	if (--bitcnt == 0) {
3809	/* if digidx == -1 we are out of digits */
3810	if (digidx == -1) {
3811	break;
3812	}
3813	/* read next digit and reset the bitcnt */
3814	buf = X->dp[digidx--];
3815	bitcnt = (int) DIGIT_BIT;
3816	}
3817
3818	/* grab the next msb from the exponent */
3819	y = (int)(buf >> (mp_digit)(DIGIT_BIT - 1)) & 1;
3820	buf <<= (mp_digit)1;
3821
3822	/* if the bit is zero and mode == 0 then we ignore it
3823	* These represent the leading zero bits before the first 1 bit
3824	* in the exponent. Technically this opt is not required but it
3825	* does lower the # of trivial squaring/reductions used
3826	*/
3827	if (mode == 0 && y == 0) {
3828	continue;
3829	}
3830
3831	/* if the bit is zero and mode == 1 then we square */
3832	if (mode == 1 && y == 0) {
3833	if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
3834	goto LBL_RES;
3835	}
3836	if ((err = redux (&res, P, &mu)) != MP_OKAY) {
3837	goto LBL_RES;
3838	}
3839	continue;
3840	}
3841
3842	/* else we add it to the window */
3843	bitbuf \|= (y << (winsize - ++bitcpy));
3844	mode = 2;
3845
3846	if (bitcpy == winsize) {
3847	/* ok window is filled so square as required and multiply */
3848	/* square first */
3849	for (x = 0; x < winsize; x++) {
3850	if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
3851	goto LBL_RES;
3852	}
3853	if ((err = redux (&res, P, &mu)) != MP_OKAY) {
3854	goto LBL_RES;
3855	}
3856	}
3857
3858	/* then multiply */
3859	if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
3860	goto LBL_RES;
3861	}
3862	if ((err = redux (&res, P, &mu)) != MP_OKAY) {
3863	goto LBL_RES;
3864	}
3865
3866	/* empty window and reset */
3867	bitcpy = 0;
3868	bitbuf = 0;
3869	mode = 1;
3870	}
3871	}
3872
3873	/* if bits remain then square/multiply */
3874	if (mode == 2 && bitcpy > 0) {
3875	/* square then multiply if the bit is set */
3876	for (x = 0; x < bitcpy; x++) {
3877	if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
3878	goto LBL_RES;
3879	}
3880	if ((err = redux (&res, P, &mu)) != MP_OKAY) {
3881	goto LBL_RES;
3882	}
3883
3884	bitbuf <<= 1;
3885	if ((bitbuf & (1 << winsize)) != 0) {
3886	/* then multiply */
3887	if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
3888	goto LBL_RES;
3889	}
3890	if ((err = redux (&res, P, &mu)) != MP_OKAY) {
3891	goto LBL_RES;
3892	}
3893	}
3894	}
3895	}
3896
3897	mp_exch (&res, Y);
3898	err = MP_OKAY;
3899	LBL_RES:mp_clear (&res);
3900	LBL_MU:mp_clear (&mu);
3901	LBL_M:
3902	mp_clear(&M[1]);
3903	for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
3904	mp_clear (&M[x]);
3905	}
3906	return err;
3907	}
3908
3909
3910	/* pre-calculate the value required for Barrett reduction
3911	* For a given modulus "b" it calculates the value required in "a"
3912	*/
3913	int mp_reduce_setup (mp_int * a, mp_int * b)
3914	{
3915	int res;
3916
3917	if ((res = mp_2expt (a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) {
3918	return res;
3919	}
3920	return mp_div (a, b, a, NULL);
3921	}
3922
3923
3924	/* reduces x mod m, assumes 0 < x < m**2, mu is
3925	* precomputed via mp_reduce_setup.
3926	* From HAC pp.604 Algorithm 14.42
3927	*/
3928	int mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
3929	{
3930	mp_int q;
3931	int res, um = m->used;
3932
3933	/* q = x */
3934	if ((res = mp_init_copy (&q, x)) != MP_OKAY) {
3935	return res;
3936	}
3937
3938	/* q1 = x / b*(k-1) /
3939	mp_rshd (&q, um - 1);
3940
3941	/* according to HAC this optimization is ok */
3942	if (((mp_word) um) > (((mp_digit)1) << (DIGIT_BIT - 1))) {
3943	if ((res = mp_mul (&q, mu, &q)) != MP_OKAY) {
3944	goto CLEANUP;
3945	}
3946	} else {
3947	#ifdef BN_S_MP_MUL_HIGH_DIGS_C
3948	if ((res = s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
3949	goto CLEANUP;
3950	}
3951	#elif defined(BN_FAST_S_MP_MUL_HIGH_DIGS_C)
3952	if ((res = fast_s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
3953	goto CLEANUP;
3954	}
3955	#else
3956	{
3957	res = MP_VAL;
3958	goto CLEANUP;
3959	}
3960	#endif
3961	}
3962
3963	/* q3 = q2 / b*(k+1) /
3964	mp_rshd (&q, um + 1);
3965
3966	/* x = x mod b*(k+1), quick (no division) /
3967	if ((res = mp_mod_2d (x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) {
3968	goto CLEANUP;
3969	}
3970
3971	/* q = q * m mod b*(k+1), quick (no division) /
3972	if ((res = s_mp_mul_digs (&q, m, &q, um + 1)) != MP_OKAY) {
3973	goto CLEANUP;
3974	}
3975
3976	/* x = x - q */
3977	if ((res = mp_sub (x, &q, x)) != MP_OKAY) {
3978	goto CLEANUP;
3979	}
3980
3981	/* If x < 0, add b*(k+1) to it /
3982	if (mp_cmp_d (x, 0) == MP_LT) {
3983	if ((res = mp_set (&q, 1)) != MP_OKAY)
3984	goto CLEANUP;
3985	if ((res = mp_lshd (&q, um + 1)) != MP_OKAY)
3986	goto CLEANUP;
3987	if ((res = mp_add (x, &q, x)) != MP_OKAY)
3988	goto CLEANUP;
3989	}
3990
3991	/* Back off if it's too big */
3992	while (mp_cmp (x, m) != MP_LT) {
3993	if ((res = s_mp_sub (x, m, x)) != MP_OKAY) {
3994	goto CLEANUP;
3995	}
3996	}
3997
3998	CLEANUP:
3999	mp_clear (&q);
4000
4001	return res;
4002	}
4003
4004
4005	/* reduces a modulo n where n is of the form 2**p - d
4006	This differs from reduce_2k since "d" can be larger
4007	than a single digit.
4008	*/
4009	int mp_reduce_2k_l(mp_int a, mp_int n, mp_int *d)
4010	{
4011	mp_int q;
4012	int p, res;
4013
4014	if ((res = mp_init(&q)) != MP_OKAY) {
4015	return res;
4016	}
4017
4018	p = mp_count_bits(n);
4019	top:
4020	/* q = a/2p, a = a mod 2p */
4021	if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
4022	goto ERR;
4023	}
4024
4025	/* q = q * d */
4026	if ((res = mp_mul(&q, d, &q)) != MP_OKAY) {
4027	goto ERR;
4028	}
4029
4030	/* a = a + q */
4031	if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
4032	goto ERR;
4033	}
4034
4035	if (mp_cmp_mag(a, n) != MP_LT) {
4036	if ((res = s_mp_sub(a, n, a)) != MP_OKAY) {
4037	goto ERR;
4038	}
4039	goto top;
4040	}
4041
4042	ERR:
4043	mp_clear(&q);
4044	return res;
4045	}
4046
4047
4048	/* determines the setup value */
4049	int mp_reduce_2k_setup_l(mp_int a, mp_int d)
4050	{
4051	int res;
4052	mp_int tmp;
4053
4054	if ((res = mp_init(&tmp)) != MP_OKAY) {
4055	return res;
4056	}
4057
4058	if ((res = mp_2expt(&tmp, mp_count_bits(a))) != MP_OKAY) {
4059	goto ERR;
4060	}
4061
4062	if ((res = s_mp_sub(&tmp, a, d)) != MP_OKAY) {
4063	goto ERR;
4064	}
4065
4066	ERR:
4067	mp_clear(&tmp);
4068	return res;
4069	}
4070
4071
4072	/* multiplies \|a\| * \|b\| and does not compute the lower digs digits
4073	* [meant to get the higher part of the product]
4074	*/
4075	int s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
4076	{
4077	mp_int t;
4078	int res, pa, pb, ix, iy;
4079	mp_digit u;
4080	mp_word r;
4081	mp_digit tmpx, tmpt, tmpy;
4082
4083	/* can we use the fast multiplier? */
4084	#ifdef BN_FAST_S_MP_MUL_HIGH_DIGS_C
4085	if (((a->used + b->used + 1) < (int)MP_WARRAY)
4086	&& MIN (a->used, b->used) <
4087	(1L << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
4088	return fast_s_mp_mul_high_digs (a, b, c, digs);
4089	}
4090	#endif
4091
4092	if ((res = mp_init_size (&t, a->used + b->used + 1)) != MP_OKAY) {
4093	return res;
4094	}
4095	t.used = a->used + b->used + 1;
4096
4097	pa = a->used;
4098	pb = b->used;
4099	for (ix = 0; ix < pa && a->dp; ix++) {
4100	/* clear the carry */
4101	u = 0;
4102
4103	/* left hand side of A[ix] * B[iy] */
4104	tmpx = a->dp[ix];
4105
4106	/* alias to the address of where the digits will be stored */
4107	tmpt = &(t.dp[digs]);
4108
4109	/* alias for where to read the right hand side from */
4110	tmpy = b->dp + (digs - ix);
4111
4112	for (iy = digs - ix; iy < pb; iy++) {
4113	/* calculate the double precision result */
4114	r = ((mp_word)*tmpt) +
4115	((mp_word)tmpx) * ((mp_word)*tmpy++) +
4116	((mp_word) u);
4117
4118	/* get the lower part */
4119	*tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
4120
4121	/* carry the carry */
4122	u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
4123	}
4124	*tmpt = u;
4125	}
4126	mp_clamp (&t);
4127	mp_exch (&t, c);
4128	mp_clear (&t);
4129	return MP_OKAY;
4130	}
4131
4132
4133	/* this is a modified version of fast_s_mul_digs that only produces
4134	* output digits above digs. See the comments for fast_s_mul_digs
4135	* to see how it works.
4136	*
4137	* This is used in the Barrett reduction since for one of the multiplications
4138	* only the higher digits were needed. This essentially halves the work.
4139	*
4140	* Based on Algorithm 14.12 on pp.595 of HAC.
4141	*/
4142	int fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
4143	{
4144	int olduse, res, pa, ix, iz;
4145	#ifdef WOLFSSL_SMALL_STACK
4146	mp_digit* W; /* uses dynamic memory and slower */
4147	#else
4148	mp_digit W[MP_WARRAY];
4149	#endif
4150	mp_word _W;
4151
4152	if (a->dp == NULL) { /* JRB, avoid reading uninitialized values */
4153	return MP_VAL;
4154	}
4155
4156	/* grow the destination as required */
4157	pa = a->used + b->used;
4158	if (c->alloc < pa) {
4159	if ((res = mp_grow (c, pa)) != MP_OKAY) {
4160	return res;
4161	}
4162	}
4163
4164	if (pa > (int)MP_WARRAY)
4165	return MP_RANGE; /* TAO range check */
4166
4167	#ifdef WOLFSSL_SMALL_STACK
4168	W = (mp_digit)XMALLOC(sizeof(mp_digit) MP_WARRAY, NULL, DYNAMIC_TYPE_BIGINT);
4169	if (W == NULL)
4170	return MP_MEM;
4171	#endif
4172
4173	/* number of output digits to produce */
4174	pa = a->used + b->used;
4175	_W = 0;
4176	for (ix = digs; ix < pa; ix++) { /* JRB, have a->dp check at top of function*/
4177	int tx, ty, iy;
4178	mp_digit tmpx, tmpy;
4179
4180	/* get offsets into the two bignums */
4181	ty = MIN(b->used-1, ix);
4182	tx = ix - ty;
4183
4184	/* setup temp aliases */
4185	tmpx = a->dp + tx;
4186	tmpy = b->dp + ty;
4187
4188	/* this is the number of times the loop will iterate, essentially its
4189	while (tx++ < a->used && ty-- >= 0) { ... }
4190	*/
4191	iy = MIN(a->used-tx, ty+1);
4192
4193	/* execute loop */
4194	for (iz = 0; iz < iy; iz++) {
4195	_W += ((mp_word)tmpx++)((mp_word)*tmpy--);
4196	}
4197
4198	/* store term */
4199	W[ix] = (mp_digit)(((mp_digit)_W) & MP_MASK);
4200
4201	/* make next carry */
4202	_W = _W >> ((mp_word)DIGIT_BIT);
4203	}
4204
4205	/* setup dest */
4206	olduse = c->used;
4207	c->used = pa;
4208
4209	{
4210	mp_digit *tmpc;
4211
4212	tmpc = c->dp + digs;
4213	for (ix = digs; ix < pa; ix++) { /* TAO, <= could potentially overwrite */
4214	/* now extract the previous digit [below the carry] */
4215	*tmpc++ = W[ix];
4216	}
4217
4218	/* clear unused digits [that existed in the old copy of c] */
4219	for (; ix < olduse; ix++) {
4220	*tmpc++ = 0;
4221	}
4222	}
4223	mp_clamp (c);
4224
4225	#ifdef WOLFSSL_SMALL_STACK
4226	XFREE(W, NULL, DYNAMIC_TYPE_BIGINT);
4227	#endif
4228
4229	return MP_OKAY;
4230	}
4231
4232
4233	#ifndef MP_SET_CHUNK_BITS
4234	#define MP_SET_CHUNK_BITS 4
4235	#endif
4236	int mp_set_int (mp_int * a, unsigned long b)
4237	{
4238	int x, res;
4239
4240	/* use direct mp_set if b is less than mp_digit max */
4241	if (b < MP_DIGIT_MAX) {
4242	return mp_set (a, (mp_digit)b);
4243	}
4244
4245	mp_zero (a);
4246
4247	/* set chunk bits at a time */
4248	for (x = 0; x < (int)(sizeof(b) * 8) / MP_SET_CHUNK_BITS; x++) {
4249	/* shift the number up chunk bits */
4250	if ((res = mp_mul_2d (a, MP_SET_CHUNK_BITS, a)) != MP_OKAY) {
4251	return res;
4252	}
4253
4254	/* OR in the top bits of the source */
4255	a->dp[0] \|= (b >> ((sizeof(b) * 8) - MP_SET_CHUNK_BITS)) &
4256	((1 << MP_SET_CHUNK_BITS) - 1);
4257
4258	/* shift the source up to the next chunk bits */
4259	b <<= MP_SET_CHUNK_BITS;
4260
4261	/* ensure that digits are not clamped off */
4262	a->used += 1;
4263	}
4264	mp_clamp (a);
4265	return MP_OKAY;
4266	}
4267
4268
4269	#if defined(WOLFSSL_KEY_GEN) \|\| defined(HAVE_ECC) \|\| !defined(NO_RSA) \|\| \
4270	!defined(NO_DSA) \| !defined(NO_DH)
4271
4272	/* c = a * a (mod b) */
4273	int mp_sqrmod (mp_int * a, mp_int * b, mp_int * c)
4274	{
4275	int res;
4276	mp_int t;
4277
4278	if ((res = mp_init (&t)) != MP_OKAY) {
4279	return res;
4280	}
4281
4282	if ((res = mp_sqr (a, &t)) != MP_OKAY) {
4283	mp_clear (&t);
4284	return res;
4285	}
4286	res = mp_mod (&t, b, c);
4287	mp_clear (&t);
4288	return res;
4289	}
4290
4291	#endif
4292
4293
4294	#if defined(HAVE_ECC) \|\| !defined(NO_PWDBASED) \|\| defined(WOLFSSL_SNIFFER) \|\| \
4295	defined(WOLFSSL_HAVE_WOLFSCEP) \|\| defined(WOLFSSL_KEY_GEN) \|\| \
4296	defined(OPENSSL_EXTRA) \|\| defined(WC_RSA_BLINDING) \|\| \
4297	(!defined(NO_RSA) && !defined(NO_RSA_BOUNDS_CHECK))
4298
4299	/* single digit addition */
4300	int mp_add_d (mp_int* a, mp_digit b, mp_int* c)
4301	{
4302	int res, ix, oldused;
4303	mp_digit tmpa, tmpc, mu;
4304
4305	/* grow c as required */
4306	if (c->alloc < a->used + 1) {
4307	if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
4308	return res;
4309	}
4310	}
4311
4312	/* if a is negative and \|a\| >= b, call c = \|a\| - b */
4313	if (a->sign == MP_NEG && (a->used > 1 \|\| a->dp[0] >= b)) {
4314	/* temporarily fix sign of a */
4315	a->sign = MP_ZPOS;
4316
4317	/* c = \|a\| - b */
4318	res = mp_sub_d(a, b, c);
4319
4320	/* fix sign */
4321	a->sign = c->sign = MP_NEG;
4322
4323	/* clamp */
4324	mp_clamp(c);
4325
4326	return res;
4327	}
4328
4329	/* old number of used digits in c */
4330	oldused = c->used;
4331
4332	/* sign always positive */
4333	c->sign = MP_ZPOS;
4334
4335	/* source alias */
4336	tmpa = a->dp;
4337
4338	/* destination alias */
4339	tmpc = c->dp;
4340
4341	/* if a is positive */
4342	if (a->sign == MP_ZPOS) {
4343	/* add digit, after this we're propagating
4344	* the carry.
4345	*/
4346	tmpc = tmpa++ + b;
4347	mu = *tmpc >> DIGIT_BIT;
4348	*tmpc++ &= MP_MASK;
4349
4350	/* now handle rest of the digits */
4351	for (ix = 1; ix < a->used; ix++) {
4352	tmpc = tmpa++ + mu;
4353	mu = *tmpc >> DIGIT_BIT;
4354	*tmpc++ &= MP_MASK;
4355	}
4356	/* set final carry */
4357	if (ix < c->alloc) {
4358	ix++;
4359	*tmpc++ = mu;
4360	}
4361
4362	/* setup size */
4363	c->used = a->used + 1;
4364	} else {
4365	/* a was negative and \|a\| < b */
4366	c->used = 1;
4367
4368	/* the result is a single digit */
4369	if (a->used == 1) {
4370	*tmpc++ = b - a->dp[0];
4371	} else {
4372	*tmpc++ = b;
4373	}
4374
4375	/* setup count so the clearing of oldused
4376	* can fall through correctly
4377	*/
4378	ix = 1;
4379	}
4380
4381	/* now zero to oldused */
4382	while (ix++ < oldused) {
4383	*tmpc++ = 0;
4384	}
4385	mp_clamp(c);
4386
4387	return MP_OKAY;
4388	}
4389
4390
4391	/* single digit subtraction */
4392	int mp_sub_d (mp_int * a, mp_digit b, mp_int * c)
4393	{
4394	mp_digit tmpa, tmpc, mu;
4395	int res, ix, oldused;
4396
4397	if (b > MP_MASK) return MP_VAL;
4398
4399	/* grow c as required */
4400	if (c->alloc < a->used + 1) {
4401	if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
4402	return res;
4403	}
4404	}
4405
4406	/* if a is negative just do an unsigned
4407	* addition [with fudged signs]
4408	*/
4409	if (a->sign == MP_NEG) {
4410	a->sign = MP_ZPOS;
4411	res = mp_add_d(a, b, c);
4412	a->sign = c->sign = MP_NEG;
4413
4414	/* clamp */
4415	mp_clamp(c);
4416
4417	return res;
4418	}
4419
4420	/* setup regs */
4421	oldused = c->used;
4422	tmpa = a->dp;
4423	tmpc = c->dp;
4424
4425	/* if a <= b simply fix the single digit */
4426	if ((a->used == 1 && a->dp[0] <= b) \|\| a->used == 0) {
4427	if (a->used == 1) {
4428	tmpc++ = b - tmpa;
4429	} else {
4430	*tmpc++ = b;
4431	}
4432	ix = 1;
4433
4434	/* negative/1digit */
4435	c->sign = MP_NEG;
4436	c->used = 1;
4437	} else {
4438	/* positive/size */
4439	c->sign = MP_ZPOS;
4440	c->used = a->used;
4441
4442	/* subtract first digit */
4443	tmpc = tmpa++ - b;
4444	mu = tmpc >> (sizeof(mp_digit) CHAR_BIT - 1);
4445	*tmpc++ &= MP_MASK;
4446
4447	/* handle rest of the digits */
4448	for (ix = 1; ix < a->used; ix++) {
4449	tmpc = tmpa++ - mu;
4450	mu = tmpc >> (sizeof(mp_digit) CHAR_BIT - 1);
4451	*tmpc++ &= MP_MASK;
4452	}
4453	}
4454
4455	/* zero excess digits */
4456	while (ix++ < oldused) {
4457	*tmpc++ = 0;
4458	}
4459	mp_clamp(c);
4460	return MP_OKAY;
4461	}
4462
4463	#endif /* defined(HAVE_ECC) \|\| !defined(NO_PWDBASED) */
4464
4465
4466	#if defined(WOLFSSL_KEY_GEN) \|\| defined(HAVE_COMP_KEY) \|\| defined(HAVE_ECC) \|\| \
4467	defined(DEBUG_WOLFSSL) \|\| !defined(NO_RSA) \|\| !defined(NO_DSA) \|\| \
4468	!defined(NO_DH)
4469
4470	static const int lnz[16] = {
4471	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
4472	};
4473
4474	/* Counts the number of lsbs which are zero before the first zero bit */
4475	int mp_cnt_lsb(mp_int *a)
4476	{
4477	int x;
4478	mp_digit q = 0, qq;
4479
4480	/* easy out */
4481	if (mp_iszero(a) == MP_YES) {
4482	return 0;
4483	}
4484
4485	/* scan lower digits until non-zero */
4486	for (x = 0; x < a->used && a->dp[x] == 0; x++) {}
4487	if (a->dp)
4488	q = a->dp[x];
4489	x *= DIGIT_BIT;
4490
4491	/* now scan this digit until a 1 is found */
4492	if ((q & 1) == 0) {
4493	do {
4494	qq = q & 15;
4495	x += lnz[qq];
4496	q >>= 4;
4497	} while (qq == 0);
4498	}
4499	return x;
4500	}
4501
4502
4503
4504
4505	static int s_is_power_of_two(mp_digit b, int *p)
4506	{
4507	int x;
4508
4509	/* fast return if no power of two */
4510	if ((b==0) \|\| (b & (b-1))) {
4511	return 0;
4512	}
4513
4514	for (x = 0; x < DIGIT_BIT; x++) {
4515	if (b == (((mp_digit)1)<<x)) {
4516	*p = x;
4517	return 1;
4518	}
4519	}
4520	return 0;
4521	}
4522
4523	/* single digit division (based on routine from MPI) */
4524	static int mp_div_d (mp_int * a, mp_digit b, mp_int * c, mp_digit * d)
4525	{
4526	mp_int q;
4527	mp_word w;
4528	mp_digit t;
4529	int res = MP_OKAY, ix;
4530
4531	/* cannot divide by zero */
4532	if (b == 0) {
4533	return MP_VAL;
4534	}
4535
4536	/* quick outs */
4537	if (b == 1 \|\| mp_iszero(a) == MP_YES) {
4538	if (d != NULL) {
4539	*d = 0;
4540	}
4541	if (c != NULL) {
4542	return mp_copy(a, c);
4543	}
4544	return MP_OKAY;
4545	}
4546
4547	/* power of two ? */
4548	if (s_is_power_of_two(b, &ix) == 1) {
4549	if (d != NULL) {
4550	*d = a->dp[0] & ((((mp_digit)1)<<ix) - 1);
4551	}
4552	if (c != NULL) {
4553	return mp_div_2d(a, ix, c, NULL);
4554	}
4555	return MP_OKAY;
4556	}
4557
4558	#ifdef BN_MP_DIV_3_C
4559	/* three? */
4560	if (b == 3) {
4561	return mp_div_3(a, c, d);
4562	}
4563	#endif
4564
4565	/* no easy answer [c'est la vie]. Just division */
4566	if (c != NULL) {
4567	if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
4568	return res;
4569	}
4570
4571	q.used = a->used;
4572	q.sign = a->sign;
4573	}
4574	else {
4575	if ((res = mp_init(&q)) != MP_OKAY) {
4576	return res;
4577	}
4578	}
4579
4580
4581	w = 0;
4582	for (ix = a->used - 1; ix >= 0; ix--) {
4583	w = (w << ((mp_word)DIGIT_BIT)) \| ((mp_word)a->dp[ix]);
4584
4585	if (w >= b) {
4586	#ifdef WOLFSSL_LINUXKM
4587	t = (mp_digit)w;
4588	/* Linux kernel macro for in-place 64 bit integer division. */
4589	do_div(t, b);
4590	#else
4591	t = (mp_digit)(w / b);
4592	#endif
4593	w -= ((mp_word)t) * ((mp_word)b);
4594	} else {
4595	t = 0;
4596	}
4597	if (c != NULL)
4598	q.dp[ix] = (mp_digit)t;
4599	}
4600
4601	if (d != NULL) {
4602	*d = (mp_digit)w;
4603	}
4604
4605	if (c != NULL) {
4606	mp_clamp(&q);
4607	mp_exch(&q, c);
4608	}
4609	mp_clear(&q);
4610
4611	return res;
4612	}
4613
4614
4615	int mp_mod_d (mp_int * a, mp_digit b, mp_digit * c)
4616	{
4617	return mp_div_d(a, b, NULL, c);
4618	}
4619
4620	#endif /* WOLFSSL_KEY_GEN \|\| HAVE_COMP_KEY \|\| HAVE_ECC \|\| DEBUG_WOLFSSL */
4621
4622	#if defined(WOLFSSL_KEY_GEN) \|\| !defined(NO_DH) \|\| !defined(NO_DSA) \|\| !defined(NO_RSA)
4623
4624	const FLASH_QUALIFIER mp_digit ltm_prime_tab[PRIME_SIZE] = {
4625	0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
4626	0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
4627	0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
4628	0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F,
4629	#ifndef MP_8BIT
4630	0x0083,
4631	0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
4632	0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
4633	0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
4634	0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
4635
4636	0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
4637	0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
4638	0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
4639	0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
4640	0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
4641	0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
4642	0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
4643	0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
4644
4645	0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
4646	0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
4647	0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
4648	0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
4649	0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
4650	0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
4651	0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
4652	0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
4653
4654	0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
4655	0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
4656	0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
4657	0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
4658	0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
4659	0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
4660	0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
4661	0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
4662	#endif
4663	};
4664
4665
4666	/* Miller-Rabin test of "a" to the base of "b" as described in
4667	* HAC pp. 139 Algorithm 4.24
4668	*
4669	* Sets result to 0 if definitely composite or 1 if probably prime.
4670	* Randomly the chance of error is no more than 1/4 and often
4671	* very much lower.
4672	*/
4673	static int mp_prime_miller_rabin (mp_int * a, mp_int * b, int *result)
4674	{
4675	mp_int n1, y, r;
4676	int s, j, err;
4677
4678	/* default */
4679	*result = MP_NO;
4680
4681	/* ensure b > 1 */
4682	if (mp_cmp_d(b, 1) != MP_GT) {
4683	return MP_VAL;
4684	}
4685
4686	/* get n1 = a - 1 */
4687	if ((err = mp_init_copy (&n1, a)) != MP_OKAY) {
4688	return err;
4689	}
4690	if ((err = mp_sub_d (&n1, 1, &n1)) != MP_OKAY) {
4691	goto LBL_N1;
4692	}
4693
4694	/* set 2*s r = n1 */
4695	if ((err = mp_init_copy (&r, &n1)) != MP_OKAY) {
4696	goto LBL_N1;
4697	}
4698
4699	/* count the number of least significant bits
4700	* which are zero
4701	*/
4702	s = mp_cnt_lsb(&r);
4703
4704	/* now divide n - 1 by 2*s /
4705	if ((err = mp_div_2d (&r, s, &r, NULL)) != MP_OKAY) {
4706	goto LBL_R;
4707	}
4708
4709	/* compute y = b*r mod a /
4710	if ((err = mp_init (&y)) != MP_OKAY) {
4711	goto LBL_R;
4712	}
4713	#if defined(WOLFSSL_HAVE_SP_RSA) \|\| defined(WOLFSSL_HAVE_SP_DH)
4714	#ifndef WOLFSSL_SP_NO_2048
4715	if (mp_count_bits(a) == 1024 && mp_isodd(a))
4716	err = sp_ModExp_1024(b, &r, a, &y);
4717	else if (mp_count_bits(a) == 2048 && mp_isodd(a))
4718	err = sp_ModExp_2048(b, &r, a, &y);
4719	else
4720	#endif
4721	#ifndef WOLFSSL_SP_NO_3072
4722	if (mp_count_bits(a) == 1536 && mp_isodd(a))
4723	err = sp_ModExp_1536(b, &r, a, &y);
4724	else if (mp_count_bits(a) == 3072 && mp_isodd(a))
4725	err = sp_ModExp_3072(b, &r, a, &y);
4726	else
4727	#endif
4728	#ifdef WOLFSSL_SP_4096
4729	if (mp_count_bits(a) == 4096 && mp_isodd(a))
4730	err = sp_ModExp_4096(b, &r, a, &y);
4731	else
4732	#endif
4733	#endif
4734	err = mp_exptmod (b, &r, a, &y);
4735	if (err != MP_OKAY)
4736	goto LBL_Y;
4737
4738	/* if y != 1 and y != n1 do */
4739	if (mp_cmp_d (&y, 1) != MP_EQ && mp_cmp (&y, &n1) != MP_EQ) {
4740	j = 1;
4741	/* while j <= s-1 and y != n1 */
4742	while ((j <= (s - 1)) && mp_cmp (&y, &n1) != MP_EQ) {
4743	if ((err = mp_sqrmod (&y, a, &y)) != MP_OKAY) {
4744	goto LBL_Y;
4745	}
4746
4747	/* if y == 1 then composite */
4748	if (mp_cmp_d (&y, 1) == MP_EQ) {
4749	goto LBL_Y;
4750	}
4751
4752	++j;
4753	}
4754
4755	/* if y != n1 then composite */
4756	if (mp_cmp (&y, &n1) != MP_EQ) {
4757	goto LBL_Y;
4758	}
4759	}
4760
4761	/* probably prime now */
4762	*result = MP_YES;
4763	LBL_Y:mp_clear (&y);
4764	LBL_R:mp_clear (&r);
4765	LBL_N1:mp_clear (&n1);
4766	return err;
4767	}
4768
4769
4770	/* determines if an integers is divisible by one
4771	* of the first PRIME_SIZE primes or not
4772	*
4773	* sets result to 0 if not, 1 if yes
4774	*/
4775	static int mp_prime_is_divisible (mp_int * a, int *result)
4776	{
4777	int err, ix;
4778	mp_digit res;
4779
4780	/* default to not */
4781	*result = MP_NO;
4782
4783	for (ix = 0; ix < PRIME_SIZE; ix++) {
4784	/* what is a mod LBL_prime_tab[ix] */
4785	if ((err = mp_mod_d (a, ltm_prime_tab[ix], &res)) != MP_OKAY) {
4786	return err;
4787	}
4788
4789	/* is the residue zero? */
4790	if (res == 0) {
4791	*result = MP_YES;
4792	return MP_OKAY;
4793	}
4794	}
4795
4796	return MP_OKAY;
4797	}
4798
4799	/*
4800	* Sets result to 1 if probably prime, 0 otherwise
4801	*/
4802	int mp_prime_is_prime (mp_int * a, int t, int *result)
4803	{
4804	mp_int b;
4805	int ix, err, res;
4806
4807	/* default to no */
4808	*result = MP_NO;
4809
4810	/* valid value of t? */
4811	if (t <= 0 \|\| t > PRIME_SIZE) {
4812	return MP_VAL;
4813	}
4814
4815	if (mp_isone(a)) {
4816	*result = MP_NO;
4817	return MP_OKAY;
4818	}
4819
4820	/* is the input equal to one of the primes in the table? */
4821	for (ix = 0; ix < PRIME_SIZE; ix++) {
4822	if (mp_cmp_d(a, ltm_prime_tab[ix]) == MP_EQ) {
4823	*result = MP_YES;
4824	return MP_OKAY;
4825	}
4826	}
4827
4828	/* first perform trial division */
4829	if ((err = mp_prime_is_divisible (a, &res)) != MP_OKAY) {
4830	return err;
4831	}
4832
4833	/* return if it was trivially divisible */
4834	if (res == MP_YES) {
4835	return MP_OKAY;
4836	}
4837
4838	/* now perform the miller-rabin rounds */
4839	if ((err = mp_init (&b)) != MP_OKAY) {
4840	return err;
4841	}
4842
4843	for (ix = 0; ix < t; ix++) {
4844	/* set the prime */
4845	if ((err = mp_set (&b, ltm_prime_tab[ix])) != MP_OKAY) {
4846	goto LBL_B;
4847	}
4848
4849	if ((err = mp_prime_miller_rabin (a, &b, &res)) != MP_OKAY) {
4850	goto LBL_B;
4851	}
4852
4853	if (res == MP_NO) {
4854	goto LBL_B;
4855	}
4856	}
4857
4858	/* passed the test */
4859	*result = MP_YES;
4860	LBL_B:mp_clear (&b);
4861	return err;
4862	}
4863
4864
4865	/*
4866	* Sets result to 1 if probably prime, 0 otherwise
4867	*/
4868	int mp_prime_is_prime_ex (mp_int * a, int t, int result, WC_RNG rng)
4869	{
4870	mp_int b, c;
4871	int ix, err, res;
4872	byte* base = NULL;
4873	word32 baseSz = 0;
4874
4875	/* default to no */
4876	*result = MP_NO;
4877
4878	/* valid value of t? */
4879	if (t <= 0 \|\| t > PRIME_SIZE) {
4880	return MP_VAL;
4881	}
4882
4883	if (mp_isone(a)) {
4884	*result = MP_NO;
4885	return MP_OKAY;
4886	}
4887
4888	/* is the input equal to one of the primes in the table? */
4889	for (ix = 0; ix < PRIME_SIZE; ix++) {
4890	if (mp_cmp_d(a, ltm_prime_tab[ix]) == MP_EQ) {
4891	*result = MP_YES;
4892	return MP_OKAY;
4893	}
4894	}
4895
4896	/* first perform trial division */
4897	if ((err = mp_prime_is_divisible (a, &res)) != MP_OKAY) {
4898	return err;
4899	}
4900
4901	/* return if it was trivially divisible */
4902	if (res == MP_YES) {
4903	return MP_OKAY;
4904	}
4905
4906	/* now perform the miller-rabin rounds */
4907	if ((err = mp_init (&b)) != MP_OKAY) {
4908	return err;
4909	}
4910	if ((err = mp_init (&c)) != MP_OKAY) {
4911	mp_clear(&b);
4912	return err;
4913	}
4914
4915	baseSz = mp_count_bits(a);
4916	baseSz = (baseSz / 8) + ((baseSz % 8) ? 1 : 0);
4917
4918	base = (byte*)XMALLOC(baseSz, NULL, DYNAMIC_TYPE_TMP_BUFFER);
4919	if (base == NULL) {
4920	err = MP_MEM;
4921	goto LBL_B;
4922	}
4923
4924	if ((err = mp_sub_d(a, 2, &c)) != MP_OKAY) {
4925	goto LBL_B;
4926	}
4927
4928	/* now do a miller rabin with up to t random numbers, this should
4929	* give a (1/4)^t chance of a false prime. */
4930	for (ix = 0; ix < t; ix++) {
4931	/* Set a test candidate. */
4932	if ((err = wc_RNG_GenerateBlock(rng, base, baseSz)) != 0) {
4933	goto LBL_B;
4934	}
4935
4936	if ((err = mp_read_unsigned_bin(&b, base, baseSz)) != MP_OKAY) {
4937	goto LBL_B;
4938	}
4939
4940	if (mp_cmp_d(&b, 2) != MP_GT \|\| mp_cmp(&b, &c) != MP_LT) {
4941	ix--;
4942	continue;
4943	}
4944
4945	if ((err = mp_prime_miller_rabin (a, &b, &res)) != MP_OKAY) {
4946	goto LBL_B;
4947	}
4948
4949	if (res == MP_NO) {
4950	goto LBL_B;
4951	}
4952	}
4953
4954	/* passed the test */
4955	*result = MP_YES;
4956	LBL_B:mp_clear (&b);
4957	mp_clear (&c);
4958	XFREE(base, NULL, DYNAMIC_TYPE_TMP_BUFFER);
4959	return err;
4960	}
4961
4962	#endif /* WOLFSSL_KEY_GEN NO_DH NO_DSA NO_RSA */
4963
4964	#ifdef WOLFSSL_KEY_GEN
4965
4966	static const int USE_BBS = 1;
4967
4968	int mp_rand_prime(mp_int* N, int len, WC_RNG* rng, void* heap)
4969	{
4970	int err, res, type;
4971	byte* buf;
4972
4973	if (N == NULL \|\| rng == NULL)
4974	return MP_VAL;
4975
4976	/* get type */
4977	if (len < 0) {
4978	type = USE_BBS;
4979	len = -len;
4980	} else {
4981	type = 0;
4982	}
4983
4984	/* allow sizes between 2 and 512 bytes for a prime size */
4985	if (len < 2 \|\| len > 512) {
4986	return MP_VAL;
4987	}
4988
4989	/* allocate buffer to work with */
4990	buf = (byte*)XMALLOC(len, heap, DYNAMIC_TYPE_RSA);
4991	if (buf == NULL) {
4992	return MP_MEM;
4993	}
4994	XMEMSET(buf, 0, len);
4995
4996	do {
4997	#ifdef SHOW_GEN
4998	printf(".");
4999	fflush(stdout);
5000	#endif
5001	/* generate value */
5002	err = wc_RNG_GenerateBlock(rng, buf, len);
5003	if (err != 0) {
5004	XFREE(buf, heap, DYNAMIC_TYPE_RSA);
5005	return err;
5006	}
5007
5008	/* munge bits */
5009	buf[0] \|= 0x80 \| 0x40;
5010	buf[len-1] \|= 0x01 \| ((type & USE_BBS) ? 0x02 : 0x00);
5011
5012	/* load value */
5013	if ((err = mp_read_unsigned_bin(N, buf, len)) != MP_OKAY) {
5014	XFREE(buf, heap, DYNAMIC_TYPE_RSA);
5015	return err;
5016	}
5017
5018	/* test */
5019	/* Running Miller-Rabin up to 3 times gives us a 2^{-80} chance
5020	* of a 1024-bit candidate being a false positive, when it is our
5021	* prime candidate. (Note 4.49 of Handbook of Applied Cryptography.)
5022	* Using 8 because we've always used 8. */
5023	if ((err = mp_prime_is_prime_ex(N, 8, &res, rng)) != MP_OKAY) {
5024	XFREE(buf, heap, DYNAMIC_TYPE_RSA);
5025	return err;
5026	}
5027	} while (res == MP_NO);
5028
5029	XMEMSET(buf, 0, len);
5030	XFREE(buf, heap, DYNAMIC_TYPE_RSA);
5031
5032	return MP_OKAY;
5033	}
5034
5035
5036	/* computes least common multiple as \|ab\|/(a, b) /
5037	int mp_lcm (mp_int * a, mp_int * b, mp_int * c)
5038	{
5039	int res;
5040	mp_int t1, t2;
5041
5042
5043	if ((res = mp_init_multi (&t1, &t2, NULL, NULL, NULL, NULL)) != MP_OKAY) {
5044	return res;
5045	}
5046
5047	/* t1 = get the GCD of the two inputs */
5048	if ((res = mp_gcd (a, b, &t1)) != MP_OKAY) {
5049	goto LBL_T;
5050	}
5051
5052	/* divide the smallest by the GCD */
5053	if (mp_cmp_mag(a, b) == MP_LT) {
5054	/* store quotient in t2 such that t2 * b is the LCM */
5055	if ((res = mp_div(a, &t1, &t2, NULL)) != MP_OKAY) {
5056	goto LBL_T;
5057	}
5058	res = mp_mul(b, &t2, c);
5059	} else {
5060	/* store quotient in t2 such that t2 * a is the LCM */
5061	if ((res = mp_div(b, &t1, &t2, NULL)) != MP_OKAY) {
5062	goto LBL_T;
5063	}
5064	res = mp_mul(a, &t2, c);
5065	}
5066
5067	/* fix the sign to positive */
5068	c->sign = MP_ZPOS;
5069
5070	LBL_T:
5071	mp_clear(&t1);
5072	mp_clear(&t2);
5073	return res;
5074	}
5075
5076
5077
5078	/* Greatest Common Divisor using the binary method */
5079	int mp_gcd (mp_int * a, mp_int * b, mp_int * c)
5080	{
5081	mp_int u, v;
5082	int k, u_lsb, v_lsb, res;
5083
5084	/* either zero than gcd is the largest */
5085	if (mp_iszero (a) == MP_YES) {
5086	return mp_abs (b, c);
5087	}
5088	if (mp_iszero (b) == MP_YES) {
5089	return mp_abs (a, c);
5090	}
5091
5092	/* get copies of a and b we can modify */
5093	if ((res = mp_init_copy (&u, a)) != MP_OKAY) {
5094	return res;
5095	}
5096
5097	if ((res = mp_init_copy (&v, b)) != MP_OKAY) {
5098	goto LBL_U;
5099	}
5100
5101	/* must be positive for the remainder of the algorithm */
5102	u.sign = v.sign = MP_ZPOS;
5103
5104	/* B1. Find the common power of two for u and v */
5105	u_lsb = mp_cnt_lsb(&u);
5106	v_lsb = mp_cnt_lsb(&v);
5107	k = MIN(u_lsb, v_lsb);
5108
5109	if (k > 0) {
5110	/* divide the power of two out */
5111	if ((res = mp_div_2d(&u, k, &u, NULL)) != MP_OKAY) {
5112	goto LBL_V;
5113	}
5114
5115	if ((res = mp_div_2d(&v, k, &v, NULL)) != MP_OKAY) {
5116	goto LBL_V;
5117	}
5118	}
5119
5120	/* divide any remaining factors of two out */
5121	if (u_lsb != k) {
5122	if ((res = mp_div_2d(&u, u_lsb - k, &u, NULL)) != MP_OKAY) {
5123	goto LBL_V;
5124	}
5125	}
5126
5127	if (v_lsb != k) {
5128	if ((res = mp_div_2d(&v, v_lsb - k, &v, NULL)) != MP_OKAY) {
5129	goto LBL_V;
5130	}
5131	}
5132
5133	while (mp_iszero(&v) == MP_NO) {
5134	/* make sure v is the largest */
5135	if (mp_cmp_mag(&u, &v) == MP_GT) {
5136	/* swap u and v to make sure v is >= u */
5137	mp_exch(&u, &v);
5138	}
5139
5140	/* subtract smallest from largest */
5141	if ((res = s_mp_sub(&v, &u, &v)) != MP_OKAY) {
5142	goto LBL_V;
5143	}
5144
5145	/* Divide out all factors of two */
5146	if ((res = mp_div_2d(&v, mp_cnt_lsb(&v), &v, NULL)) != MP_OKAY) {
5147	goto LBL_V;
5148	}
5149	}
5150
5151	/* multiply by 2*k which we divided out at the beginning /
5152	if ((res = mp_mul_2d (&u, k, c)) != MP_OKAY) {
5153	goto LBL_V;
5154	}
5155	c->sign = MP_ZPOS;
5156	res = MP_OKAY;
5157	LBL_V:mp_clear (&v);
5158	LBL_U:mp_clear (&u);
5159	return res;
5160	}
5161
5162	#endif /* WOLFSSL_KEY_GEN */
5163
5164
5165	#if !defined(NO_DSA) \|\| defined(HAVE_ECC) \|\| defined(WOLFSSL_KEY_GEN) \|\| \
5166	defined(HAVE_COMP_KEY) \|\| defined(WOLFSSL_DEBUG_MATH) \|\| \
5167	defined(DEBUG_WOLFSSL) \|\| defined(OPENSSL_EXTRA)
5168
5169	/* chars used in radix conversions */
5170	const char *mp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
5171	"abcdefghijklmnopqrstuvwxyz+/";
5172	#endif
5173
5174	#if !defined(NO_DSA) \|\| defined(HAVE_ECC)
5175	/* read a string [ASCII] in a given radix */
5176	int mp_read_radix (mp_int * a, const char *str, int radix)
5177	{
5178	int y, res, neg;
5179	char ch;
5180
5181	/* zero the digit bignum */
5182	mp_zero(a);
5183
5184	/* make sure the radix is ok */
5185	if (radix < MP_RADIX_BIN \|\| radix > MP_RADIX_MAX) {
5186	return MP_VAL;
5187	}
5188
5189	/* if the leading digit is a
5190	* minus set the sign to negative.
5191	*/
5192	if (*str == '-') {
5193	++str;
5194	neg = MP_NEG;
5195	} else {
5196	neg = MP_ZPOS;
5197	}
5198
5199	/* set the integer to the default of zero */
5200	mp_zero (a);
5201
5202	/* process each digit of the string */
5203	while (*str != '\0') {
5204	/* if the radix <= 36 the conversion is case insensitive
5205	* this allows numbers like 1AB and 1ab to represent the same value
5206	* [e.g. in hex]
5207	*/
5208	ch = (radix <= 36) ? (char)XTOUPPER((unsigned char)str) : str;
5209	for (y = 0; y < 64; y++) {
5210	if (ch == mp_s_rmap[y]) {
5211	break;
5212	}
5213	}
5214
5215	/* if the char was found in the map
5216	* and is less than the given radix add it
5217	* to the number, otherwise exit the loop.
5218	*/
5219	if (y < radix) {
5220	if ((res = mp_mul_d (a, (mp_digit) radix, a)) != MP_OKAY) {
5221	return res;
5222	}
5223	if ((res = mp_add_d (a, (mp_digit) y, a)) != MP_OKAY) {
5224	return res;
5225	}
5226	} else {
5227	break;
5228	}
5229	++str;
5230	}
5231
5232	/* if digit in isn't null term, then invalid character was found */
5233	if (*str != '\0') {
5234	mp_zero (a);
5235	return MP_VAL;
5236	}
5237
5238	/* set the sign only if a != 0 */
5239	if (mp_iszero(a) != MP_YES) {
5240	a->sign = neg;
5241	}
5242	return MP_OKAY;
5243	}
5244	#endif /* !defined(NO_DSA) \|\| defined(HAVE_ECC) */
5245
5246	#ifdef WC_MP_TO_RADIX
5247
5248	/* returns size of ASCII representation */
5249	int mp_radix_size (mp_int a, int radix, int size)
5250	{
5251	int res, digs;
5252	mp_int t;
5253	mp_digit d;
5254
5255	*size = 0;
5256
5257	/* special case for binary */
5258	if (radix == MP_RADIX_BIN) {
5259	*size = mp_count_bits(a);
5260	if (*size == 0)
5261	*size = 1;
5262	size += (a->sign == MP_NEG ? 1 : 0) + 1; / "-" sign + null term */
5263	return MP_OKAY;
5264	}
5265
5266	/* make sure the radix is in range */
5267	if (radix < MP_RADIX_BIN \|\| radix > MP_RADIX_MAX) {
5268	return MP_VAL;
5269	}
5270
5271	if (mp_iszero(a) == MP_YES) {
5272	#ifndef WC_DISABLE_RADIX_ZERO_PAD
5273	if (radix == 16)
5274	*size = 3;
5275	else
5276	#endif
5277	*size = 2;
5278	return MP_OKAY;
5279	}
5280
5281	/* digs is the digit count */
5282	digs = 0;
5283
5284	/* init a copy of the input */
5285	if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
5286	return res;
5287	}
5288
5289	/* force temp to positive */
5290	t.sign = MP_ZPOS;
5291
5292	/* fetch out all of the digits */
5293	while (mp_iszero (&t) == MP_NO) {
5294	if ((res = mp_div_d (&t, (mp_digit) radix, &t, &d)) != MP_OKAY) {
5295	mp_clear (&t);
5296	return res;
5297	}
5298	++digs;
5299	}
5300	mp_clear (&t);
5301
5302	#ifndef WC_DISABLE_RADIX_ZERO_PAD
5303	/* For hexadecimal output, add zero padding when number of digits is odd */
5304	if ((digs & 1) && (radix == 16)) {
5305	++digs;
5306	}
5307	#endif
5308
5309	/* if it's negative add one for the sign */
5310	if (a->sign == MP_NEG) {
5311	++digs;
5312	}
5313
5314	/* return digs + 1, the 1 is for the NULL byte that would be required. */
5315	*size = digs + 1;
5316	return MP_OKAY;
5317	}
5318
5319	/* stores a bignum as a ASCII string in a given radix (2..64) */
5320	int mp_toradix (mp_int a, char str, int radix)
5321	{
5322	int res, digs;
5323	mp_int t;
5324	mp_digit d;
5325	char *_s = str;
5326
5327	/* check range of the radix */
5328	if (radix < MP_RADIX_BIN \|\| radix > MP_RADIX_MAX) {
5329	return MP_VAL;
5330	}
5331
5332	/* quick out if its zero */
5333	if (mp_iszero(a) == MP_YES) {
5334	#ifndef WC_DISABLE_RADIX_ZERO_PAD
5335	if (radix == 16) {
5336	*str++ = '0';
5337	}
5338	#endif
5339	*str++ = '0';
5340	*str = '\0';
5341	return MP_OKAY;
5342	}
5343
5344	if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
5345	return res;
5346	}
5347
5348	/* if it is negative output a - */
5349	if (t.sign == MP_NEG) {
5350	++_s;
5351	*str++ = '-';
5352	t.sign = MP_ZPOS;
5353	}
5354
5355	digs = 0;
5356	while (mp_iszero (&t) == MP_NO) {
5357	if ((res = mp_div_d (&t, (mp_digit) radix, &t, &d)) != MP_OKAY) {
5358	mp_clear (&t);
5359	return res;
5360	}
5361	*str++ = mp_s_rmap[d];
5362	++digs;
5363	}
5364	#ifndef WC_DISABLE_RADIX_ZERO_PAD
5365	/* For hexadecimal output, add zero padding when number of digits is odd */
5366	if ((digs & 1) && (radix == 16)) {
5367	*str++ = mp_s_rmap[0];
5368	++digs;
5369	}
5370	#endif
5371	/* reverse the digits of the string. In this case _s points
5372	* to the first digit [excluding the sign] of the number]
5373	*/
5374	bn_reverse ((unsigned char *)_s, digs);
5375
5376	/* append a NULL so the string is properly terminated */
5377	*str = '\0';
5378
5379	mp_clear (&t);
5380	return MP_OKAY;
5381	}
5382
5383	#ifdef WOLFSSL_DEBUG_MATH
5384	void mp_dump(const char* desc, mp_int* a, byte verbose)
5385	{
5386	char *buffer;
5387	int size = a->alloc;
5388
5389	buffer = (char)XMALLOC(size sizeof(mp_digit) * 2, NULL, DYNAMIC_TYPE_TMP_BUFFER);
5390	if (buffer == NULL) {
5391	return;
5392	}
5393
5394	printf("%s: ptr=%p, used=%d, sign=%d, size=%d, mpd=%d\n",
5395	desc, a, a->used, a->sign, size, (int)sizeof(mp_digit));
5396
5397	mp_tohex(a, buffer);
5398	printf(" %s\n ", buffer);
5399
5400	if (verbose) {
5401	int i;
5402	for(i=0; i<a->alloc * (int)sizeof(mp_digit); i++) {
5403	printf("%02x ", (((byte)a->dp) + i));
5404	}
5405	printf("\n");
5406	}
5407
5408	XFREE(buffer, NULL, DYNAMIC_TYPE_TMP_BUFFER);
5409	}
5410	#endif /* WOLFSSL_DEBUG_MATH */
5411
5412	#endif /* WC_MP_TO_RADIX */
5413
5414	#endif /* WOLFSSL_SP_MATH */
5415
5416	#endif /* USE_FAST_MATH */
5417
5418	#endif /* NO_BIG_INT */

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: azure_iot_hub_f767zi/trunk/wolfssl-4.7.0/wolfcrypt/src/integer.c@ 464

Download in other formats: