Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

source: UsbWattMeter/trunk/wolfssl-3.7.0/wolfcrypt/src/integer.c@ 164

Last change on this file since 164 was 164, checked in by coas-nagasima, 8 years ago
TOPPERS/ECNLサンプルアプリ「USB充電器電力計」を追加
Property svn:eol-style set to `native` Property svn:keywords set to `Id` Property svn:mime-type set to `text/x-csrc`
File size: 102.0 KB

Line
1	/* integer.c
2	*
3	* Copyright (C) 2006-2015 wolfSSL Inc.
4	*
5	* This file is part of wolfSSL. (formerly known as CyaSSL)
6	*
7	* wolfSSL is free software; you can redistribute it and/or modify
8	* it under the terms of the GNU General Public License as published by
9	* the Free Software Foundation; either version 2 of the License, or
10	* (at your option) any later version.
11	*
12	* wolfSSL is distributed in the hope that it will be useful,
13	* but WITHOUT ANY WARRANTY; without even the implied warranty of
14	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15	* GNU General Public License for more details.
16	*
17	* You should have received a copy of the GNU General Public License
18	* along with this program; if not, write to the Free Software
19	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
20	*/
21
22
23	/*
24	* Based on public domain LibTomMath 0.38 by Tom St Denis, tomstdenis@iahu.ca,
25	* http://math.libtomcrypt.com
26	*/
27
28
29	#ifdef HAVE_CONFIG_H
30	#include <config.h>
31	#endif
32
33	/* in case user set USE_FAST_MATH there */
34	#include <wolfssl/wolfcrypt/settings.h>
35
36	#ifndef NO_BIG_INT
37
38	#ifndef USE_FAST_MATH
39
40	#include <wolfssl/wolfcrypt/integer.h>
41
42	#ifndef NO_WOLFSSL_SMALL_STACK
43	#ifndef WOLFSSL_SMALL_STACK
44	#define WOLFSSL_SMALL_STACK
45	#endif
46	#endif
47
48	#ifdef SHOW_GEN
49	#ifdef FREESCALE_MQX
50	#if MQX_USE_IO_OLD
51	#include <fio.h>
52	#else
53	#include <nio.h>
54	#endif
55	#else
56	#include <stdio.h>
57	#endif
58	#endif
59
60	/* reverse an array, used for radix code */
61	static void
62	bn_reverse (unsigned char *s, int len)
63	{
64	int ix, iy;
65	unsigned char t;
66
67	ix = 0;
68	iy = len - 1;
69	while (ix < iy) {
70	t = s[ix];
71	s[ix] = s[iy];
72	s[iy] = t;
73	++ix;
74	--iy;
75	}
76	}
77
78	/* math settings check */
79	word32 CheckRunTimeSettings(void)
80	{
81	return CTC_SETTINGS;
82	}
83
84
85	/* handle up to 6 inits */
86	int mp_init_multi(mp_int* a, mp_int* b, mp_int* c, mp_int* d, mp_int* e,
87	mp_int* f)
88	{
89	int res = MP_OKAY;
90
91	if (a && ((res = mp_init(a)) != MP_OKAY))
92	return res;
93
94	if (b && ((res = mp_init(b)) != MP_OKAY)) {
95	mp_clear(a);
96	return res;
97	}
98
99	if (c && ((res = mp_init(c)) != MP_OKAY)) {
100	mp_clear(a); mp_clear(b);
101	return res;
102	}
103
104	if (d && ((res = mp_init(d)) != MP_OKAY)) {
105	mp_clear(a); mp_clear(b); mp_clear(c);
106	return res;
107	}
108
109	if (e && ((res = mp_init(e)) != MP_OKAY)) {
110	mp_clear(a); mp_clear(b); mp_clear(c); mp_clear(d);
111	return res;
112	}
113
114	if (f && ((res = mp_init(f)) != MP_OKAY)) {
115	mp_clear(a); mp_clear(b); mp_clear(c); mp_clear(d); mp_clear(e);
116	return res;
117	}
118
119	return res;
120	}
121
122
123	/* init a new mp_int */
124	int mp_init (mp_int * a)
125	{
126	int i;
127
128	/* allocate memory required and clear it */
129	a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * MP_PREC, 0,
130	DYNAMIC_TYPE_BIGINT);
131	if (a->dp == NULL) {
132	return MP_MEM;
133	}
134
135	/* set the digits to zero */
136	for (i = 0; i < MP_PREC; i++) {
137	a->dp[i] = 0;
138	}
139
140	/* set the used to zero, allocated digits to the default precision
141	* and sign to positive */
142	a->used = 0;
143	a->alloc = MP_PREC;
144	a->sign = MP_ZPOS;
145
146	return MP_OKAY;
147	}
148
149
150	/* clear one (frees) */
151	void
152	mp_clear (mp_int * a)
153	{
154	int i;
155
156	if (a == NULL)
157	return;
158
159	/* only do anything if a hasn't been freed previously */
160	if (a->dp != NULL) {
161	/* first zero the digits */
162	for (i = 0; i < a->used; i++) {
163	a->dp[i] = 0;
164	}
165
166	/* free ram */
167	XFREE(a->dp, 0, DYNAMIC_TYPE_BIGINT);
168
169	/* reset members to make debugging easier */
170	a->dp = NULL;
171	a->alloc = a->used = 0;
172	a->sign = MP_ZPOS;
173	}
174	}
175
176
177	/* get the size for an unsigned equivalent */
178	int mp_unsigned_bin_size (mp_int * a)
179	{
180	int size = mp_count_bits (a);
181	return (size / 8 + ((size & 7) != 0 ? 1 : 0));
182	}
183
184
185	/* returns the number of bits in an int */
186	int
187	mp_count_bits (mp_int * a)
188	{
189	int r;
190	mp_digit q;
191
192	/* shortcut */
193	if (a->used == 0) {
194	return 0;
195	}
196
197	/* get number of digits and add that */
198	r = (a->used - 1) * DIGIT_BIT;
199
200	/* take the last digit and count the bits in it */
201	q = a->dp[a->used - 1];
202	while (q > ((mp_digit) 0)) {
203	++r;
204	q >>= ((mp_digit) 1);
205	}
206	return r;
207	}
208
209
210	int mp_leading_bit (mp_int * a)
211	{
212	int bit = 0;
213	mp_int t;
214
215	if (mp_init_copy(&t, a) != MP_OKAY)
216	return 0;
217
218	while (mp_iszero(&t) == 0) {
219	#ifndef MP_8BIT
220	bit = (t.dp[0] & 0x80) != 0;
221	#else
222	bit = (t.dp[0] \| ((t.dp[1] & 0x01) << 7)) & 0x80 != 0;
223	#endif
224	if (mp_div_2d (&t, 8, &t, NULL) != MP_OKAY)
225	break;
226	}
227	mp_clear(&t);
228	return bit;
229	}
230
231
232	/* store in unsigned [big endian] format */
233	int mp_to_unsigned_bin (mp_int * a, unsigned char *b)
234	{
235	int x, res;
236	mp_int t;
237
238	if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
239	return res;
240	}
241
242	x = 0;
243	while (mp_iszero (&t) == 0) {
244	#ifndef MP_8BIT
245	b[x++] = (unsigned char) (t.dp[0] & 255);
246	#else
247	b[x++] = (unsigned char) (t.dp[0] \| ((t.dp[1] & 0x01) << 7));
248	#endif
249	if ((res = mp_div_2d (&t, 8, &t, NULL)) != MP_OKAY) {
250	mp_clear (&t);
251	return res;
252	}
253	}
254	bn_reverse (b, x);
255	mp_clear (&t);
256	return MP_OKAY;
257	}
258
259
260	/* creates "a" then copies b into it */
261	int mp_init_copy (mp_int * a, mp_int * b)
262	{
263	int res;
264
265	if ((res = mp_init (a)) != MP_OKAY) {
266	return res;
267	}
268	return mp_copy (b, a);
269	}
270
271
272	/* copy, b = a */
273	int
274	mp_copy (mp_int * a, mp_int * b)
275	{
276	int res, n;
277
278	/* if dst == src do nothing */
279	if (a == b) {
280	return MP_OKAY;
281	}
282
283	/* grow dest */
284	if (b->alloc < a->used) {
285	if ((res = mp_grow (b, a->used)) != MP_OKAY) {
286	return res;
287	}
288	}
289
290	/* zero b and copy the parameters over */
291	{
292	register mp_digit tmpa, tmpb;
293
294	/* pointer aliases */
295
296	/* source */
297	tmpa = a->dp;
298
299	/* destination */
300	tmpb = b->dp;
301
302	/* copy all the digits */
303	for (n = 0; n < a->used; n++) {
304	tmpb++ = tmpa++;
305	}
306
307	/* clear high digits */
308	for (; n < b->used; n++) {
309	*tmpb++ = 0;
310	}
311	}
312
313	/* copy used count and sign */
314	b->used = a->used;
315	b->sign = a->sign;
316	return MP_OKAY;
317	}
318
319
320	/* grow as required */
321	int mp_grow (mp_int * a, int size)
322	{
323	int i;
324	mp_digit *tmp;
325
326	/* if the alloc size is smaller alloc more ram */
327	if (a->alloc < size) {
328	/* ensure there are always at least MP_PREC digits extra on top */
329	size += (MP_PREC * 2) - (size % MP_PREC);
330
331	/* reallocate the array a->dp
332	*
333	* We store the return in a temporary variable
334	* in case the operation failed we don't want
335	* to overwrite the dp member of a.
336	*/
337	tmp = OPT_CAST(mp_digit) XREALLOC (a->dp, sizeof (mp_digit) * size, 0,
338	DYNAMIC_TYPE_BIGINT);
339	if (tmp == NULL) {
340	/* reallocation failed but "a" is still valid [can be freed] */
341	return MP_MEM;
342	}
343
344	/* reallocation succeeded so set a->dp */
345	a->dp = tmp;
346
347	/* zero excess digits */
348	i = a->alloc;
349	a->alloc = size;
350	for (; i < a->alloc; i++) {
351	a->dp[i] = 0;
352	}
353	}
354	return MP_OKAY;
355	}
356
357
358	/* shift right by a certain bit count (store quotient in c, optional
359	remainder in d) */
360	int mp_div_2d (mp_int * a, int b, mp_int * c, mp_int * d)
361	{
362	int D, res;
363	mp_int t;
364
365
366	/* if the shift count is <= 0 then we do no work */
367	if (b <= 0) {
368	res = mp_copy (a, c);
369	if (d != NULL) {
370	mp_zero (d);
371	}
372	return res;
373	}
374
375	if ((res = mp_init (&t)) != MP_OKAY) {
376	return res;
377	}
378
379	/* get the remainder */
380	if (d != NULL) {
381	if ((res = mp_mod_2d (a, b, &t)) != MP_OKAY) {
382	mp_clear (&t);
383	return res;
384	}
385	}
386
387	/* copy */
388	if ((res = mp_copy (a, c)) != MP_OKAY) {
389	mp_clear (&t);
390	return res;
391	}
392
393	/* shift by as many digits in the bit count */
394	if (b >= (int)DIGIT_BIT) {
395	mp_rshd (c, b / DIGIT_BIT);
396	}
397
398	/* shift any bit count < DIGIT_BIT */
399	D = (b % DIGIT_BIT);
400	if (D != 0) {
401	mp_rshb(c, D);
402	}
403	mp_clamp (c);
404	if (d != NULL) {
405	mp_exch (&t, d);
406	}
407	mp_clear (&t);
408	return MP_OKAY;
409	}
410
411
412	/* set to zero */
413	void mp_zero (mp_int * a)
414	{
415	int n;
416	mp_digit *tmp;
417
418	a->sign = MP_ZPOS;
419	a->used = 0;
420
421	tmp = a->dp;
422	for (n = 0; n < a->alloc; n++) {
423	*tmp++ = 0;
424	}
425	}
426
427
428	/* trim unused digits
429	*
430	* This is used to ensure that leading zero digits are
431	* trimed and the leading "used" digit will be non-zero
432	* Typically very fast. Also fixes the sign if there
433	* are no more leading digits
434	*/
435	void
436	mp_clamp (mp_int * a)
437	{
438	/* decrease used while the most significant digit is
439	* zero.
440	*/
441	while (a->used > 0 && a->dp[a->used - 1] == 0) {
442	--(a->used);
443	}
444
445	/* reset the sign flag if used == 0 */
446	if (a->used == 0) {
447	a->sign = MP_ZPOS;
448	}
449	}
450
451
452	/* swap the elements of two integers, for cases where you can't simply swap the
453	* mp_int pointers around
454	*/
455	void
456	mp_exch (mp_int * a, mp_int * b)
457	{
458	mp_int t;
459
460	t = *a;
461	a = b;
462	*b = t;
463	}
464
465
466	/* shift right a certain number of bits */
467	void mp_rshb (mp_int *c, int x)
468	{
469	register mp_digit *tmpc, mask, shift;
470	mp_digit r, rr;
471	mp_digit D = x;
472
473	/* mask */
474	mask = (((mp_digit)1) << D) - 1;
475
476	/* shift for lsb */
477	shift = DIGIT_BIT - D;
478
479	/* alias */
480	tmpc = c->dp + (c->used - 1);
481
482	/* carry */
483	r = 0;
484	for (x = c->used - 1; x >= 0; x--) {
485	/* get the lower bits of this word in a temp */
486	rr = *tmpc & mask;
487
488	/* shift the current word and mix in the carry bits from previous word */
489	tmpc = (tmpc >> D) \| (r << shift);
490	--tmpc;
491
492	/* set the carry to the carry bits of the current word found above */
493	r = rr;
494	}
495	}
496
497
498	/* shift right a certain amount of digits */
499	void mp_rshd (mp_int * a, int b)
500	{
501	int x;
502
503	/* if b <= 0 then ignore it */
504	if (b <= 0) {
505	return;
506	}
507
508	/* if b > used then simply zero it and return */
509	if (a->used <= b) {
510	mp_zero (a);
511	return;
512	}
513
514	{
515	register mp_digit bottom, top;
516
517	/* shift the digits down */
518
519	/* bottom */
520	bottom = a->dp;
521
522	/* top [offset into digits] */
523	top = a->dp + b;
524
525	/* this is implemented as a sliding window where
526	* the window is b-digits long and digits from
527	* the top of the window are copied to the bottom
528	*
529	* e.g.
530
531	b-2 \| b-1 \| b0 \| b1 \| b2 \| ... \| bb \| ---->
532	/\ \| ---->
533	\-------------------/ ---->
534	*/
535	for (x = 0; x < (a->used - b); x++) {
536	bottom++ = top++;
537	}
538
539	/* zero the top digits */
540	for (; x < a->used; x++) {
541	*bottom++ = 0;
542	}
543	}
544
545	/* remove excess digits */
546	a->used -= b;
547	}
548
549
550	/* calc a value mod 2*b /
551	int
552	mp_mod_2d (mp_int * a, int b, mp_int * c)
553	{
554	int x, res;
555
556	/* if b is <= 0 then zero the int */
557	if (b <= 0) {
558	mp_zero (c);
559	return MP_OKAY;
560	}
561
562	/* if the modulus is larger than the value than return */
563	if (b >= (int) (a->used * DIGIT_BIT)) {
564	res = mp_copy (a, c);
565	return res;
566	}
567
568	/* copy */
569	if ((res = mp_copy (a, c)) != MP_OKAY) {
570	return res;
571	}
572
573	/* zero digits above the last digit of the modulus */
574	for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) {
575	c->dp[x] = 0;
576	}
577	/* clear the digit that is not completely outside/inside the modulus */
578	c->dp[b / DIGIT_BIT] &= (mp_digit) ((((mp_digit) 1) <<
579	(((mp_digit) b) % DIGIT_BIT)) - ((mp_digit) 1));
580	mp_clamp (c);
581	return MP_OKAY;
582	}
583
584
585	/* reads a unsigned char array, assumes the msb is stored first [big endian] */
586	int mp_read_unsigned_bin (mp_int * a, const unsigned char *b, int c)
587	{
588	int res;
589
590	/* make sure there are at least two digits */
591	if (a->alloc < 2) {
592	if ((res = mp_grow(a, 2)) != MP_OKAY) {
593	return res;
594	}
595	}
596
597	/* zero the int */
598	mp_zero (a);
599
600	/* read the bytes in */
601	while (c-- > 0) {
602	if ((res = mp_mul_2d (a, 8, a)) != MP_OKAY) {
603	return res;
604	}
605
606	#ifndef MP_8BIT
607	a->dp[0] \|= *b++;
608	a->used += 1;
609	#else
610	a->dp[0] = (*b & MP_MASK);
611	a->dp[1] \|= ((*b++ >> 7U) & 1);
612	a->used += 2;
613	#endif
614	}
615	mp_clamp (a);
616	return MP_OKAY;
617	}
618
619
620	/* shift left by a certain bit count */
621	int mp_mul_2d (mp_int * a, int b, mp_int * c)
622	{
623	mp_digit d;
624	int res;
625
626	/* copy */
627	if (a != c) {
628	if ((res = mp_copy (a, c)) != MP_OKAY) {
629	return res;
630	}
631	}
632
633	if (c->alloc < (int)(c->used + b/DIGIT_BIT + 1)) {
634	if ((res = mp_grow (c, c->used + b / DIGIT_BIT + 1)) != MP_OKAY) {
635	return res;
636	}
637	}
638
639	/* shift by as many digits in the bit count */
640	if (b >= (int)DIGIT_BIT) {
641	if ((res = mp_lshd (c, b / DIGIT_BIT)) != MP_OKAY) {
642	return res;
643	}
644	}
645
646	/* shift any bit count < DIGIT_BIT */
647	d = (mp_digit) (b % DIGIT_BIT);
648	if (d != 0) {
649	register mp_digit *tmpc, shift, mask, r, rr;
650	register int x;
651
652	/* bitmask for carries */
653	mask = (((mp_digit)1) << d) - 1;
654
655	/* shift for msbs */
656	shift = DIGIT_BIT - d;
657
658	/* alias */
659	tmpc = c->dp;
660
661	/* carry */
662	r = 0;
663	for (x = 0; x < c->used; x++) {
664	/* get the higher bits of the current word */
665	rr = (*tmpc >> shift) & mask;
666
667	/* shift the current word and OR in the carry */
668	tmpc = ((tmpc << d) \| r) & MP_MASK;
669	++tmpc;
670
671	/* set the carry to the carry bits of the current word */
672	r = rr;
673	}
674
675	/* set final carry */
676	if (r != 0) {
677	c->dp[(c->used)++] = r;
678	}
679	}
680	mp_clamp (c);
681	return MP_OKAY;
682	}
683
684
685	/* shift left a certain amount of digits */
686	int mp_lshd (mp_int * a, int b)
687	{
688	int x, res;
689
690	/* if its less than zero return */
691	if (b <= 0) {
692	return MP_OKAY;
693	}
694
695	/* grow to fit the new digits */
696	if (a->alloc < a->used + b) {
697	if ((res = mp_grow (a, a->used + b)) != MP_OKAY) {
698	return res;
699	}
700	}
701
702	{
703	register mp_digit top, bottom;
704
705	/* increment the used by the shift amount then copy upwards */
706	a->used += b;
707
708	/* top */
709	top = a->dp + a->used - 1;
710
711	/* base */
712	bottom = a->dp + a->used - 1 - b;
713
714	/* much like mp_rshd this is implemented using a sliding window
715	* except the window goes the otherway around. Copying from
716	* the bottom to the top. see bn_mp_rshd.c for more info.
717	*/
718	for (x = a->used - 1; x >= b; x--) {
719	top-- = bottom--;
720	}
721
722	/* zero the lower digits */
723	top = a->dp;
724	for (x = 0; x < b; x++) {
725	*top++ = 0;
726	}
727	}
728	return MP_OKAY;
729	}
730
731
732	/* this is a shell function that calls either the normal or Montgomery
733	* exptmod functions. Originally the call to the montgomery code was
734	* embedded in the normal function but that wasted alot of stack space
735	* for nothing (since 99% of the time the Montgomery code would be called)
736	*/
737	int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
738	{
739	int dr;
740
741	/* modulus P must be positive */
742	if (P->sign == MP_NEG) {
743	return MP_VAL;
744	}
745
746	/* if exponent X is negative we have to recurse */
747	if (X->sign == MP_NEG) {
748	#ifdef BN_MP_INVMOD_C
749	mp_int tmpG, tmpX;
750	int err;
751
752	/* first compute 1/G mod P */
753	if ((err = mp_init(&tmpG)) != MP_OKAY) {
754	return err;
755	}
756	if ((err = mp_invmod(G, P, &tmpG)) != MP_OKAY) {
757	mp_clear(&tmpG);
758	return err;
759	}
760
761	/* now get \|X\| */
762	if ((err = mp_init(&tmpX)) != MP_OKAY) {
763	mp_clear(&tmpG);
764	return err;
765	}
766	if ((err = mp_abs(X, &tmpX)) != MP_OKAY) {
767	mp_clear(&tmpG);
768	mp_clear(&tmpX);
769	return err;
770	}
771
772	/* and now compute (1/G)\|X\| instead of GX [X < 0] */
773	err = mp_exptmod(&tmpG, &tmpX, P, Y);
774	mp_clear(&tmpG);
775	mp_clear(&tmpX);
776	return err;
777	#else
778	/* no invmod */
779	return MP_VAL;
780	#endif
781	}
782
783	/* modified diminished radix reduction */
784	#if defined(BN_MP_REDUCE_IS_2K_L_C) && defined(BN_MP_REDUCE_2K_L_C) && \
785	defined(BN_S_MP_EXPTMOD_C)
786	if (mp_reduce_is_2k_l(P) == MP_YES) {
787	return s_mp_exptmod(G, X, P, Y, 1);
788	}
789	#endif
790
791	#ifdef BN_MP_DR_IS_MODULUS_C
792	/* is it a DR modulus? */
793	dr = mp_dr_is_modulus(P);
794	#else
795	/* default to no */
796	dr = 0;
797	#endif
798
799	#ifdef BN_MP_REDUCE_IS_2K_C
800	/* if not, is it a unrestricted DR modulus? */
801	if (dr == 0) {
802	dr = mp_reduce_is_2k(P) << 1;
803	}
804	#endif
805
806	/* if the modulus is odd or dr != 0 use the montgomery method */
807	#ifdef BN_MP_EXPTMOD_FAST_C
808	if (mp_isodd (P) == 1 \|\| dr != 0) {
809	return mp_exptmod_fast (G, X, P, Y, dr);
810	} else {
811	#endif
812	#ifdef BN_S_MP_EXPTMOD_C
813	/* otherwise use the generic Barrett reduction technique */
814	return s_mp_exptmod (G, X, P, Y, 0);
815	#else
816	/* no exptmod for evens */
817	return MP_VAL;
818	#endif
819	#ifdef BN_MP_EXPTMOD_FAST_C
820	}
821	#endif
822	}
823
824
825	/* b = \|a\|
826	*
827	* Simple function copies the input and fixes the sign to positive
828	*/
829	int
830	mp_abs (mp_int * a, mp_int * b)
831	{
832	int res;
833
834	/* copy a to b */
835	if (a != b) {
836	if ((res = mp_copy (a, b)) != MP_OKAY) {
837	return res;
838	}
839	}
840
841	/* force the sign of b to positive */
842	b->sign = MP_ZPOS;
843
844	return MP_OKAY;
845	}
846
847
848	/* hac 14.61, pp608 */
849	int mp_invmod (mp_int * a, mp_int * b, mp_int * c)
850	{
851	/* b cannot be negative */
852	if (b->sign == MP_NEG \|\| mp_iszero(b) == 1) {
853	return MP_VAL;
854	}
855
856	#ifdef BN_FAST_MP_INVMOD_C
857	/* if the modulus is odd we can use a faster routine instead */
858	if (mp_isodd (b) == 1) {
859	return fast_mp_invmod (a, b, c);
860	}
861	#endif
862
863	#ifdef BN_MP_INVMOD_SLOW_C
864	return mp_invmod_slow(a, b, c);
865	#endif
866	}
867
868
869	/* computes the modular inverse via binary extended euclidean algorithm,
870	* that is c = 1/a mod b
871	*
872	* Based on slow invmod except this is optimized for the case where b is
873	* odd as per HAC Note 14.64 on pp. 610
874	*/
875	int fast_mp_invmod (mp_int * a, mp_int * b, mp_int * c)
876	{
877	mp_int x, y, u, v, B, D;
878	int res, neg, loop_check = 0;
879
880	/* 2. [modified] b must be odd */
881	if (mp_iseven (b) == 1) {
882	return MP_VAL;
883	}
884
885	/* init all our temps */
886	if ((res = mp_init_multi(&x, &y, &u, &v, &B, &D)) != MP_OKAY) {
887	return res;
888	}
889
890	/* x == modulus, y == value to invert */
891	if ((res = mp_copy (b, &x)) != MP_OKAY) {
892	goto LBL_ERR;
893	}
894
895	/* we need y = \|a\| */
896	if ((res = mp_mod (a, b, &y)) != MP_OKAY) {
897	goto LBL_ERR;
898	}
899
900	/* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
901	if ((res = mp_copy (&x, &u)) != MP_OKAY) {
902	goto LBL_ERR;
903	}
904	if ((res = mp_copy (&y, &v)) != MP_OKAY) {
905	goto LBL_ERR;
906	}
907	mp_set (&D, 1);
908
909	top:
910	/* 4. while u is even do */
911	while (mp_iseven (&u) == 1) {
912	/* 4.1 u = u/2 */
913	if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
914	goto LBL_ERR;
915	}
916	/* 4.2 if B is odd then */
917	if (mp_isodd (&B) == 1) {
918	if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
919	goto LBL_ERR;
920	}
921	}
922	/* B = B/2 */
923	if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
924	goto LBL_ERR;
925	}
926	}
927
928	/* 5. while v is even do */
929	while (mp_iseven (&v) == 1) {
930	/* 5.1 v = v/2 */
931	if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
932	goto LBL_ERR;
933	}
934	/* 5.2 if D is odd then */
935	if (mp_isodd (&D) == 1) {
936	/* D = (D-x)/2 */
937	if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
938	goto LBL_ERR;
939	}
940	}
941	/* D = D/2 */
942	if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
943	goto LBL_ERR;
944	}
945	}
946
947	/* 6. if u >= v then */
948	if (mp_cmp (&u, &v) != MP_LT) {
949	/* u = u - v, B = B - D */
950	if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
951	goto LBL_ERR;
952	}
953
954	if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
955	goto LBL_ERR;
956	}
957	} else {
958	/* v - v - u, D = D - B */
959	if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
960	goto LBL_ERR;
961	}
962
963	if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
964	goto LBL_ERR;
965	}
966	}
967
968	/* if not zero goto step 4 */
969	if (mp_iszero (&u) == 0) {
970	if (++loop_check > 4096) {
971	res = MP_VAL;
972	goto LBL_ERR;
973	}
974	goto top;
975	}
976
977	/* now a = C, b = D, gcd == gv /
978
979	/* if v != 1 then there is no inverse */
980	if (mp_cmp_d (&v, 1) != MP_EQ) {
981	res = MP_VAL;
982	goto LBL_ERR;
983	}
984
985	/* b is now the inverse */
986	neg = a->sign;
987	while (D.sign == MP_NEG) {
988	if ((res = mp_add (&D, b, &D)) != MP_OKAY) {
989	goto LBL_ERR;
990	}
991	}
992	/* too big */
993	while (mp_cmp_mag(&D, b) != MP_LT) {
994	if ((res = mp_sub(&D, b, &D)) != MP_OKAY) {
995	goto LBL_ERR;
996	}
997	}
998	mp_exch (&D, c);
999	c->sign = neg;
1000	res = MP_OKAY;
1001
1002	LBL_ERR:mp_clear(&x);
1003	mp_clear(&y);
1004	mp_clear(&u);
1005	mp_clear(&v);
1006	mp_clear(&B);
1007	mp_clear(&D);
1008	return res;
1009	}
1010
1011
1012	/* hac 14.61, pp608 */
1013	int mp_invmod_slow (mp_int * a, mp_int * b, mp_int * c)
1014	{
1015	mp_int x, y, u, v, A, B, C, D;
1016	int res;
1017
1018	/* b cannot be negative */
1019	if (b->sign == MP_NEG \|\| mp_iszero(b) == 1) {
1020	return MP_VAL;
1021	}
1022
1023	/* init temps */
1024	if ((res = mp_init_multi(&x, &y, &u, &v,
1025	&A, &B)) != MP_OKAY) {
1026	return res;
1027	}
1028
1029	/* init rest of tmps temps */
1030	if ((res = mp_init_multi(&C, &D, 0, 0, 0, 0)) != MP_OKAY) {
1031	return res;
1032	}
1033
1034	/* x = a, y = b */
1035	if ((res = mp_mod(a, b, &x)) != MP_OKAY) {
1036	goto LBL_ERR;
1037	}
1038	if ((res = mp_copy (b, &y)) != MP_OKAY) {
1039	goto LBL_ERR;
1040	}
1041
1042	/* 2. [modified] if x,y are both even then return an error! */
1043	if (mp_iseven (&x) == 1 && mp_iseven (&y) == 1) {
1044	res = MP_VAL;
1045	goto LBL_ERR;
1046	}
1047
1048	/* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
1049	if ((res = mp_copy (&x, &u)) != MP_OKAY) {
1050	goto LBL_ERR;
1051	}
1052	if ((res = mp_copy (&y, &v)) != MP_OKAY) {
1053	goto LBL_ERR;
1054	}
1055	mp_set (&A, 1);
1056	mp_set (&D, 1);
1057
1058	top:
1059	/* 4. while u is even do */
1060	while (mp_iseven (&u) == 1) {
1061	/* 4.1 u = u/2 */
1062	if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
1063	goto LBL_ERR;
1064	}
1065	/* 4.2 if A or B is odd then */
1066	if (mp_isodd (&A) == 1 \|\| mp_isodd (&B) == 1) {
1067	/* A = (A+y)/2, B = (B-x)/2 */
1068	if ((res = mp_add (&A, &y, &A)) != MP_OKAY) {
1069	goto LBL_ERR;
1070	}
1071	if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
1072	goto LBL_ERR;
1073	}
1074	}
1075	/* A = A/2, B = B/2 */
1076	if ((res = mp_div_2 (&A, &A)) != MP_OKAY) {
1077	goto LBL_ERR;
1078	}
1079	if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
1080	goto LBL_ERR;
1081	}
1082	}
1083
1084	/* 5. while v is even do */
1085	while (mp_iseven (&v) == 1) {
1086	/* 5.1 v = v/2 */
1087	if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
1088	goto LBL_ERR;
1089	}
1090	/* 5.2 if C or D is odd then */
1091	if (mp_isodd (&C) == 1 \|\| mp_isodd (&D) == 1) {
1092	/* C = (C+y)/2, D = (D-x)/2 */
1093	if ((res = mp_add (&C, &y, &C)) != MP_OKAY) {
1094	goto LBL_ERR;
1095	}
1096	if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
1097	goto LBL_ERR;
1098	}
1099	}
1100	/* C = C/2, D = D/2 */
1101	if ((res = mp_div_2 (&C, &C)) != MP_OKAY) {
1102	goto LBL_ERR;
1103	}
1104	if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
1105	goto LBL_ERR;
1106	}
1107	}
1108
1109	/* 6. if u >= v then */
1110	if (mp_cmp (&u, &v) != MP_LT) {
1111	/* u = u - v, A = A - C, B = B - D */
1112	if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
1113	goto LBL_ERR;
1114	}
1115
1116	if ((res = mp_sub (&A, &C, &A)) != MP_OKAY) {
1117	goto LBL_ERR;
1118	}
1119
1120	if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
1121	goto LBL_ERR;
1122	}
1123	} else {
1124	/* v - v - u, C = C - A, D = D - B */
1125	if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
1126	goto LBL_ERR;
1127	}
1128
1129	if ((res = mp_sub (&C, &A, &C)) != MP_OKAY) {
1130	goto LBL_ERR;
1131	}
1132
1133	if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
1134	goto LBL_ERR;
1135	}
1136	}
1137
1138	/* if not zero goto step 4 */
1139	if (mp_iszero (&u) == 0)
1140	goto top;
1141
1142	/* now a = C, b = D, gcd == gv /
1143
1144	/* if v != 1 then there is no inverse */
1145	if (mp_cmp_d (&v, 1) != MP_EQ) {
1146	res = MP_VAL;
1147	goto LBL_ERR;
1148	}
1149
1150	/* if its too low */
1151	while (mp_cmp_d(&C, 0) == MP_LT) {
1152	if ((res = mp_add(&C, b, &C)) != MP_OKAY) {
1153	goto LBL_ERR;
1154	}
1155	}
1156
1157	/* too big */
1158	while (mp_cmp_mag(&C, b) != MP_LT) {
1159	if ((res = mp_sub(&C, b, &C)) != MP_OKAY) {
1160	goto LBL_ERR;
1161	}
1162	}
1163
1164	/* C is now the inverse */
1165	mp_exch (&C, c);
1166	res = MP_OKAY;
1167	LBL_ERR:mp_clear(&x);
1168	mp_clear(&y);
1169	mp_clear(&u);
1170	mp_clear(&v);
1171	mp_clear(&A);
1172	mp_clear(&B);
1173	mp_clear(&C);
1174	mp_clear(&D);
1175	return res;
1176	}
1177
1178
1179	/* compare maginitude of two ints (unsigned) */
1180	int mp_cmp_mag (mp_int * a, mp_int * b)
1181	{
1182	int n;
1183	mp_digit tmpa, tmpb;
1184
1185	/* compare based on # of non-zero digits */
1186	if (a->used > b->used) {
1187	return MP_GT;
1188	}
1189
1190	if (a->used < b->used) {
1191	return MP_LT;
1192	}
1193
1194	/* alias for a */
1195	tmpa = a->dp + (a->used - 1);
1196
1197	/* alias for b */
1198	tmpb = b->dp + (a->used - 1);
1199
1200	/* compare based on digits */
1201	for (n = 0; n < a->used; ++n, --tmpa, --tmpb) {
1202	if (tmpa > tmpb) {
1203	return MP_GT;
1204	}
1205
1206	if (tmpa < tmpb) {
1207	return MP_LT;
1208	}
1209	}
1210	return MP_EQ;
1211	}
1212
1213
1214	/* compare two ints (signed)*/
1215	int
1216	mp_cmp (mp_int * a, mp_int * b)
1217	{
1218	/* compare based on sign */
1219	if (a->sign != b->sign) {
1220	if (a->sign == MP_NEG) {
1221	return MP_LT;
1222	} else {
1223	return MP_GT;
1224	}
1225	}
1226
1227	/* compare digits */
1228	if (a->sign == MP_NEG) {
1229	/* if negative compare opposite direction */
1230	return mp_cmp_mag(b, a);
1231	} else {
1232	return mp_cmp_mag(a, b);
1233	}
1234	}
1235
1236
1237	/* compare a digit */
1238	int mp_cmp_d(mp_int * a, mp_digit b)
1239	{
1240	/* compare based on sign */
1241	if (a->sign == MP_NEG) {
1242	return MP_LT;
1243	}
1244
1245	/* compare based on magnitude */
1246	if (a->used > 1) {
1247	return MP_GT;
1248	}
1249
1250	/* compare the only digit of a to b */
1251	if (a->dp[0] > b) {
1252	return MP_GT;
1253	} else if (a->dp[0] < b) {
1254	return MP_LT;
1255	} else {
1256	return MP_EQ;
1257	}
1258	}
1259
1260
1261	/* set to a digit */
1262	void mp_set (mp_int * a, mp_digit b)
1263	{
1264	mp_zero (a);
1265	a->dp[0] = b & MP_MASK;
1266	a->used = (a->dp[0] != 0) ? 1 : 0;
1267	}
1268
1269	/* chek if a bit is set */
1270	int mp_is_bit_set (mp_int *a, mp_digit b)
1271	{
1272	if ((mp_digit)a->used < b/DIGIT_BIT)
1273	return 0;
1274
1275	return (int)((a->dp[b/DIGIT_BIT] >> b%DIGIT_BIT) & (mp_digit)1);
1276	}
1277
1278	/* c = a mod b, 0 <= c < b */
1279	int
1280	mp_mod (mp_int * a, mp_int * b, mp_int * c)
1281	{
1282	mp_int t;
1283	int res;
1284
1285	if ((res = mp_init (&t)) != MP_OKAY) {
1286	return res;
1287	}
1288
1289	if ((res = mp_div (a, b, NULL, &t)) != MP_OKAY) {
1290	mp_clear (&t);
1291	return res;
1292	}
1293
1294	if (t.sign != b->sign) {
1295	res = mp_add (b, &t, c);
1296	} else {
1297	res = MP_OKAY;
1298	mp_exch (&t, c);
1299	}
1300
1301	mp_clear (&t);
1302	return res;
1303	}
1304
1305
1306	/* slower bit-bang division... also smaller */
1307	int mp_div(mp_int * a, mp_int * b, mp_int * c, mp_int * d)
1308	{
1309	mp_int ta, tb, tq, q;
1310	int res, n, n2;
1311
1312	/* is divisor zero ? */
1313	if (mp_iszero (b) == 1) {
1314	return MP_VAL;
1315	}
1316
1317	/* if a < b then q=0, r = a */
1318	if (mp_cmp_mag (a, b) == MP_LT) {
1319	if (d != NULL) {
1320	res = mp_copy (a, d);
1321	} else {
1322	res = MP_OKAY;
1323	}
1324	if (c != NULL) {
1325	mp_zero (c);
1326	}
1327	return res;
1328	}
1329
1330	/* init our temps */
1331	if ((res = mp_init_multi(&ta, &tb, &tq, &q, 0, 0)) != MP_OKAY) {
1332	return res;
1333	}
1334
1335
1336	mp_set(&tq, 1);
1337	n = mp_count_bits(a) - mp_count_bits(b);
1338	if (((res = mp_abs(a, &ta)) != MP_OKAY) \|\|
1339	((res = mp_abs(b, &tb)) != MP_OKAY) \|\|
1340	((res = mp_mul_2d(&tb, n, &tb)) != MP_OKAY) \|\|
1341	((res = mp_mul_2d(&tq, n, &tq)) != MP_OKAY)) {
1342	goto LBL_ERR;
1343	}
1344
1345	while (n-- >= 0) {
1346	if (mp_cmp(&tb, &ta) != MP_GT) {
1347	if (((res = mp_sub(&ta, &tb, &ta)) != MP_OKAY) \|\|
1348	((res = mp_add(&q, &tq, &q)) != MP_OKAY)) {
1349	goto LBL_ERR;
1350	}
1351	}
1352	if (((res = mp_div_2d(&tb, 1, &tb, NULL)) != MP_OKAY) \|\|
1353	((res = mp_div_2d(&tq, 1, &tq, NULL)) != MP_OKAY)) {
1354	goto LBL_ERR;
1355	}
1356	}
1357
1358	/* now q == quotient and ta == remainder */
1359	n = a->sign;
1360	n2 = (a->sign == b->sign ? MP_ZPOS : MP_NEG);
1361	if (c != NULL) {
1362	mp_exch(c, &q);
1363	c->sign = (mp_iszero(c) == MP_YES) ? MP_ZPOS : n2;
1364	}
1365	if (d != NULL) {
1366	mp_exch(d, &ta);
1367	d->sign = (mp_iszero(d) == MP_YES) ? MP_ZPOS : n;
1368	}
1369	LBL_ERR:
1370	mp_clear(&ta);
1371	mp_clear(&tb);
1372	mp_clear(&tq);
1373	mp_clear(&q);
1374	return res;
1375	}
1376
1377
1378	/* b = a/2 */
1379	int mp_div_2(mp_int * a, mp_int * b)
1380	{
1381	int x, res, oldused;
1382
1383	/* copy */
1384	if (b->alloc < a->used) {
1385	if ((res = mp_grow (b, a->used)) != MP_OKAY) {
1386	return res;
1387	}
1388	}
1389
1390	oldused = b->used;
1391	b->used = a->used;
1392	{
1393	register mp_digit r, rr, tmpa, tmpb;
1394
1395	/* source alias */
1396	tmpa = a->dp + b->used - 1;
1397
1398	/* dest alias */
1399	tmpb = b->dp + b->used - 1;
1400
1401	/* carry */
1402	r = 0;
1403	for (x = b->used - 1; x >= 0; x--) {
1404	/* get the carry for the next iteration */
1405	rr = *tmpa & 1;
1406
1407	/* shift the current digit, add in carry and store */
1408	tmpb-- = (tmpa-- >> 1) \| (r << (DIGIT_BIT - 1));
1409
1410	/* forward carry to next iteration */
1411	r = rr;
1412	}
1413
1414	/* zero excess digits */
1415	tmpb = b->dp + b->used;
1416	for (x = b->used; x < oldused; x++) {
1417	*tmpb++ = 0;
1418	}
1419	}
1420	b->sign = a->sign;
1421	mp_clamp (b);
1422	return MP_OKAY;
1423	}
1424
1425
1426	/* high level addition (handles signs) */
1427	int mp_add (mp_int * a, mp_int * b, mp_int * c)
1428	{
1429	int sa, sb, res;
1430
1431	/* get sign of both inputs */
1432	sa = a->sign;
1433	sb = b->sign;
1434
1435	/* handle two cases, not four */
1436	if (sa == sb) {
1437	/* both positive or both negative */
1438	/* add their magnitudes, copy the sign */
1439	c->sign = sa;
1440	res = s_mp_add (a, b, c);
1441	} else {
1442	/* one positive, the other negative */
1443	/* subtract the one with the greater magnitude from */
1444	/* the one of the lesser magnitude. The result gets */
1445	/* the sign of the one with the greater magnitude. */
1446	if (mp_cmp_mag (a, b) == MP_LT) {
1447	c->sign = sb;
1448	res = s_mp_sub (b, a, c);
1449	} else {
1450	c->sign = sa;
1451	res = s_mp_sub (a, b, c);
1452	}
1453	}
1454	return res;
1455	}
1456
1457
1458	/* low level addition, based on HAC pp.594, Algorithm 14.7 */
1459	int
1460	s_mp_add (mp_int * a, mp_int * b, mp_int * c)
1461	{
1462	mp_int *x;
1463	int olduse, res, min, max;
1464
1465	/* find sizes, we let \|a\| <= \|b\| which means we have to sort
1466	* them. "x" will point to the input with the most digits
1467	*/
1468	if (a->used > b->used) {
1469	min = b->used;
1470	max = a->used;
1471	x = a;
1472	} else {
1473	min = a->used;
1474	max = b->used;
1475	x = b;
1476	}
1477
1478	/* init result */
1479	if (c->alloc < max + 1) {
1480	if ((res = mp_grow (c, max + 1)) != MP_OKAY) {
1481	return res;
1482	}
1483	}
1484
1485	/* get old used digit count and set new one */
1486	olduse = c->used;
1487	c->used = max + 1;
1488
1489	{
1490	register mp_digit u, tmpa, tmpb, *tmpc;
1491	register int i;
1492
1493	/* alias for digit pointers */
1494
1495	/* first input */
1496	tmpa = a->dp;
1497
1498	/* second input */
1499	tmpb = b->dp;
1500
1501	/* destination */
1502	tmpc = c->dp;
1503
1504	/* zero the carry */
1505	u = 0;
1506	for (i = 0; i < min; i++) {
1507	/* Compute the sum at one digit, T[i] = A[i] + B[i] + U */
1508	tmpc = tmpa++ + *tmpb++ + u;
1509
1510	/* U = carry bit of T[i] */
1511	u = *tmpc >> ((mp_digit)DIGIT_BIT);
1512
1513	/* take away carry bit from T[i] */
1514	*tmpc++ &= MP_MASK;
1515	}
1516
1517	/* now copy higher words if any, that is in A+B
1518	* if A or B has more digits add those in
1519	*/
1520	if (min != max) {
1521	for (; i < max; i++) {
1522	/* T[i] = X[i] + U */
1523	*tmpc = x->dp[i] + u;
1524
1525	/* U = carry bit of T[i] */
1526	u = *tmpc >> ((mp_digit)DIGIT_BIT);
1527
1528	/* take away carry bit from T[i] */
1529	*tmpc++ &= MP_MASK;
1530	}
1531	}
1532
1533	/* add carry */
1534	*tmpc++ = u;
1535
1536	/* clear digits above oldused */
1537	for (i = c->used; i < olduse; i++) {
1538	*tmpc++ = 0;
1539	}
1540	}
1541
1542	mp_clamp (c);
1543	return MP_OKAY;
1544	}
1545
1546
1547	/* low level subtraction (assumes \|a\| > \|b\|), HAC pp.595 Algorithm 14.9 */
1548	int
1549	s_mp_sub (mp_int * a, mp_int * b, mp_int * c)
1550	{
1551	int olduse, res, min, max;
1552
1553	/* find sizes */
1554	min = b->used;
1555	max = a->used;
1556
1557	/* init result */
1558	if (c->alloc < max) {
1559	if ((res = mp_grow (c, max)) != MP_OKAY) {
1560	return res;
1561	}
1562	}
1563	olduse = c->used;
1564	c->used = max;
1565
1566	{
1567	register mp_digit u, tmpa, tmpb, *tmpc;
1568	register int i;
1569
1570	/* alias for digit pointers */
1571	tmpa = a->dp;
1572	tmpb = b->dp;
1573	tmpc = c->dp;
1574
1575	/* set carry to zero */
1576	u = 0;
1577	for (i = 0; i < min; i++) {
1578	/* T[i] = A[i] - B[i] - U */
1579	tmpc = tmpa++ - *tmpb++ - u;
1580
1581	/* U = carry bit of T[i]
1582	* Note this saves performing an AND operation since
1583	* if a carry does occur it will propagate all the way to the
1584	* MSB. As a result a single shift is enough to get the carry
1585	*/
1586	u = tmpc >> ((mp_digit)(CHAR_BIT sizeof (mp_digit) - 1));
1587
1588	/* Clear carry from T[i] */
1589	*tmpc++ &= MP_MASK;
1590	}
1591
1592	/* now copy higher words if any, e.g. if A has more digits than B */
1593	for (; i < max; i++) {
1594	/* T[i] = A[i] - U */
1595	tmpc = tmpa++ - u;
1596
1597	/* U = carry bit of T[i] */
1598	u = tmpc >> ((mp_digit)(CHAR_BIT sizeof (mp_digit) - 1));
1599
1600	/* Clear carry from T[i] */
1601	*tmpc++ &= MP_MASK;
1602	}
1603
1604	/* clear digits above used (since we may not have grown result above) */
1605	for (i = c->used; i < olduse; i++) {
1606	*tmpc++ = 0;
1607	}
1608	}
1609
1610	mp_clamp (c);
1611	return MP_OKAY;
1612	}
1613
1614
1615	/* high level subtraction (handles signs) */
1616	int
1617	mp_sub (mp_int * a, mp_int * b, mp_int * c)
1618	{
1619	int sa, sb, res;
1620
1621	sa = a->sign;
1622	sb = b->sign;
1623
1624	if (sa != sb) {
1625	/* subtract a negative from a positive, OR */
1626	/* subtract a positive from a negative. */
1627	/* In either case, ADD their magnitudes, */
1628	/* and use the sign of the first number. */
1629	c->sign = sa;
1630	res = s_mp_add (a, b, c);
1631	} else {
1632	/* subtract a positive from a positive, OR */
1633	/* subtract a negative from a negative. */
1634	/* First, take the difference between their */
1635	/* magnitudes, then... */
1636	if (mp_cmp_mag (a, b) != MP_LT) {
1637	/* Copy the sign from the first */
1638	c->sign = sa;
1639	/* The first has a larger or equal magnitude */
1640	res = s_mp_sub (a, b, c);
1641	} else {
1642	/* The result has the opposite sign from */
1643	/* the first number. */
1644	c->sign = (sa == MP_ZPOS) ? MP_NEG : MP_ZPOS;
1645	/* The second has a larger magnitude */
1646	res = s_mp_sub (b, a, c);
1647	}
1648	}
1649	return res;
1650	}
1651
1652
1653	/* determines if reduce_2k_l can be used */
1654	int mp_reduce_is_2k_l(mp_int *a)
1655	{
1656	int ix, iy;
1657
1658	if (a->used == 0) {
1659	return MP_NO;
1660	} else if (a->used == 1) {
1661	return MP_YES;
1662	} else if (a->used > 1) {
1663	/* if more than half of the digits are -1 we're sold */
1664	for (iy = ix = 0; ix < a->used; ix++) {
1665	if (a->dp[ix] == MP_MASK) {
1666	++iy;
1667	}
1668	}
1669	return (iy >= (a->used/2)) ? MP_YES : MP_NO;
1670
1671	}
1672	return MP_NO;
1673	}
1674
1675
1676	/* determines if mp_reduce_2k can be used */
1677	int mp_reduce_is_2k(mp_int *a)
1678	{
1679	int ix, iy, iw;
1680	mp_digit iz;
1681
1682	if (a->used == 0) {
1683	return MP_NO;
1684	} else if (a->used == 1) {
1685	return MP_YES;
1686	} else if (a->used > 1) {
1687	iy = mp_count_bits(a);
1688	iz = 1;
1689	iw = 1;
1690
1691	/* Test every bit from the second digit up, must be 1 */
1692	for (ix = DIGIT_BIT; ix < iy; ix++) {
1693	if ((a->dp[iw] & iz) == 0) {
1694	return MP_NO;
1695	}
1696	iz <<= 1;
1697	if (iz > (mp_digit)MP_MASK) {
1698	++iw;
1699	iz = 1;
1700	}
1701	}
1702	}
1703	return MP_YES;
1704	}
1705
1706
1707	/* determines if a number is a valid DR modulus */
1708	int mp_dr_is_modulus(mp_int *a)
1709	{
1710	int ix;
1711
1712	/* must be at least two digits */
1713	if (a->used < 2) {
1714	return 0;
1715	}
1716
1717	/* must be of the form b**k - a [a <= b] so all
1718	* but the first digit must be equal to -1 (mod b).
1719	*/
1720	for (ix = 1; ix < a->used; ix++) {
1721	if (a->dp[ix] != MP_MASK) {
1722	return 0;
1723	}
1724	}
1725	return 1;
1726	}
1727
1728
1729	/* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85
1730	*
1731	* Uses a left-to-right k-ary sliding window to compute the modular
1732	* exponentiation.
1733	* The value of k changes based on the size of the exponent.
1734	*
1735	* Uses Montgomery or Diminished Radix reduction [whichever appropriate]
1736	*/
1737
1738	#ifdef MP_LOW_MEM
1739	#define TAB_SIZE 32
1740	#else
1741	#define TAB_SIZE 256
1742	#endif
1743
1744	int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y,
1745	int redmode)
1746	{
1747	mp_int res;
1748	mp_digit buf, mp;
1749	int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
1750	#ifdef WOLFSSL_SMALL_STACK
1751	mp_int* M = NULL;
1752	#else
1753	mp_int M[TAB_SIZE];
1754	#endif
1755	/* use a pointer to the reduction algorithm. This allows us to use
1756	* one of many reduction algorithms without modding the guts of
1757	* the code with if statements everywhere.
1758	*/
1759	int (redux)(mp_int,mp_int*,mp_digit);
1760
1761	#ifdef WOLFSSL_SMALL_STACK
1762	M = (mp_int) XMALLOC(sizeof(mp_int) TAB_SIZE, NULL,
1763	DYNAMIC_TYPE_TMP_BUFFER);
1764	if (M == NULL)
1765	return MP_MEM;
1766	#endif
1767
1768	/* find window size */
1769	x = mp_count_bits (X);
1770	if (x <= 7) {
1771	winsize = 2;
1772	} else if (x <= 36) {
1773	winsize = 3;
1774	} else if (x <= 140) {
1775	winsize = 4;
1776	} else if (x <= 450) {
1777	winsize = 5;
1778	} else if (x <= 1303) {
1779	winsize = 6;
1780	} else if (x <= 3529) {
1781	winsize = 7;
1782	} else {
1783	winsize = 8;
1784	}
1785
1786	#ifdef MP_LOW_MEM
1787	if (winsize > 5) {
1788	winsize = 5;
1789	}
1790	#endif
1791
1792	/* init M array */
1793	/* init first cell */
1794	if ((err = mp_init(&M[1])) != MP_OKAY) {
1795	#ifdef WOLFSSL_SMALL_STACK
1796	XFREE(M, NULL, DYNAMIC_TYPE_TMP_BUFFER);
1797	#endif
1798
1799	return err;
1800	}
1801
1802	/* now init the second half of the array */
1803	for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
1804	if ((err = mp_init(&M[x])) != MP_OKAY) {
1805	for (y = 1<<(winsize-1); y < x; y++) {
1806	mp_clear (&M[y]);
1807	}
1808	mp_clear(&M[1]);
1809
1810	#ifdef WOLFSSL_SMALL_STACK
1811	XFREE(M, NULL, DYNAMIC_TYPE_TMP_BUFFER);
1812	#endif
1813
1814	return err;
1815	}
1816	}
1817
1818	/* determine and setup reduction code */
1819	if (redmode == 0) {
1820	#ifdef BN_MP_MONTGOMERY_SETUP_C
1821	/* now setup montgomery */
1822	if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) {
1823	goto LBL_M;
1824	}
1825	#else
1826	err = MP_VAL;
1827	goto LBL_M;
1828	#endif
1829
1830	/* automatically pick the comba one if available (saves quite a few
1831	calls/ifs) */
1832	#ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C
1833	if (((P->used * 2 + 1) < MP_WARRAY) &&
1834	P->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
1835	redux = fast_mp_montgomery_reduce;
1836	} else
1837	#endif
1838	{
1839	#ifdef BN_MP_MONTGOMERY_REDUCE_C
1840	/* use slower baseline Montgomery method */
1841	redux = mp_montgomery_reduce;
1842	#else
1843	err = MP_VAL;
1844	goto LBL_M;
1845	#endif
1846	}
1847	} else if (redmode == 1) {
1848	#if defined(BN_MP_DR_SETUP_C) && defined(BN_MP_DR_REDUCE_C)
1849	/* setup DR reduction for moduli of the form B*k - b /
1850	mp_dr_setup(P, &mp);
1851	redux = mp_dr_reduce;
1852	#else
1853	err = MP_VAL;
1854	goto LBL_M;
1855	#endif
1856	} else {
1857	#if defined(BN_MP_REDUCE_2K_SETUP_C) && defined(BN_MP_REDUCE_2K_C)
1858	/* setup DR reduction for moduli of the form 2*k - b /
1859	if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) {
1860	goto LBL_M;
1861	}
1862	redux = mp_reduce_2k;
1863	#else
1864	err = MP_VAL;
1865	goto LBL_M;
1866	#endif
1867	}
1868
1869	/* setup result */
1870	if ((err = mp_init (&res)) != MP_OKAY) {
1871	goto LBL_M;
1872	}
1873
1874	/* create M table
1875	*
1876
1877	*
1878	* The first half of the table is not computed though accept for M[0] and M[1]
1879	*/
1880
1881	if (redmode == 0) {
1882	#ifdef BN_MP_MONTGOMERY_CALC_NORMALIZATION_C
1883	/* now we need R mod m */
1884	if ((err = mp_montgomery_calc_normalization (&res, P)) != MP_OKAY) {
1885	goto LBL_RES;
1886	}
1887	#else
1888	err = MP_VAL;
1889	goto LBL_RES;
1890	#endif
1891
1892	/* now set M[1] to G * R mod m */
1893	if ((err = mp_mulmod (G, &res, P, &M[1])) != MP_OKAY) {
1894	goto LBL_RES;
1895	}
1896	} else {
1897	mp_set(&res, 1);
1898	if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) {
1899	goto LBL_RES;
1900	}
1901	}
1902
1903	/* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times*/
1904	if ((err = mp_copy (&M[1], &M[(mp_digit)(1 << (winsize - 1))])) != MP_OKAY) {
1905	goto LBL_RES;
1906	}
1907
1908	for (x = 0; x < (winsize - 1); x++) {
1909	if ((err = mp_sqr (&M[(mp_digit)(1 << (winsize - 1))],
1910	&M[(mp_digit)(1 << (winsize - 1))])) != MP_OKAY) {
1911	goto LBL_RES;
1912	}
1913	if ((err = redux (&M[(mp_digit)(1 << (winsize - 1))], P, mp)) != MP_OKAY) {
1914	goto LBL_RES;
1915	}
1916	}
1917
1918	/* create upper table */
1919	for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
1920	if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
1921	goto LBL_RES;
1922	}
1923	if ((err = redux (&M[x], P, mp)) != MP_OKAY) {
1924	goto LBL_RES;
1925	}
1926	}
1927
1928	/* set initial mode and bit cnt */
1929	mode = 0;
1930	bitcnt = 1;
1931	buf = 0;
1932	digidx = X->used - 1;
1933	bitcpy = 0;
1934	bitbuf = 0;
1935
1936	for (;;) {
1937	/* grab next digit as required */
1938	if (--bitcnt == 0) {
1939	/* if digidx == -1 we are out of digits so break */
1940	if (digidx == -1) {
1941	break;
1942	}
1943	/* read next digit and reset bitcnt */
1944	buf = X->dp[digidx--];
1945	bitcnt = (int)DIGIT_BIT;
1946	}
1947
1948	/* grab the next msb from the exponent */
1949	y = (int)(buf >> (DIGIT_BIT - 1)) & 1;
1950	buf <<= (mp_digit)1;
1951
1952	/* if the bit is zero and mode == 0 then we ignore it
1953	* These represent the leading zero bits before the first 1 bit
1954	* in the exponent. Technically this opt is not required but it
1955	* does lower the # of trivial squaring/reductions used
1956	*/
1957	if (mode == 0 && y == 0) {
1958	continue;
1959	}
1960
1961	/* if the bit is zero and mode == 1 then we square */
1962	if (mode == 1 && y == 0) {
1963	if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
1964	goto LBL_RES;
1965	}
1966	if ((err = redux (&res, P, mp)) != MP_OKAY) {
1967	goto LBL_RES;
1968	}
1969	continue;
1970	}
1971
1972	/* else we add it to the window */
1973	bitbuf \|= (y << (winsize - ++bitcpy));
1974	mode = 2;
1975
1976	if (bitcpy == winsize) {
1977	/* ok window is filled so square as required and multiply */
1978	/* square first */
1979	for (x = 0; x < winsize; x++) {
1980	if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
1981	goto LBL_RES;
1982	}
1983	if ((err = redux (&res, P, mp)) != MP_OKAY) {
1984	goto LBL_RES;
1985	}
1986	}
1987
1988	/* then multiply */
1989	if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
1990	goto LBL_RES;
1991	}
1992	if ((err = redux (&res, P, mp)) != MP_OKAY) {
1993	goto LBL_RES;
1994	}
1995
1996	/* empty window and reset */
1997	bitcpy = 0;
1998	bitbuf = 0;
1999	mode = 1;
2000	}
2001	}
2002
2003	/* if bits remain then square/multiply */
2004	if (mode == 2 && bitcpy > 0) {
2005	/* square then multiply if the bit is set */
2006	for (x = 0; x < bitcpy; x++) {
2007	if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
2008	goto LBL_RES;
2009	}
2010	if ((err = redux (&res, P, mp)) != MP_OKAY) {
2011	goto LBL_RES;
2012	}
2013
2014	/* get next bit of the window */
2015	bitbuf <<= 1;
2016	if ((bitbuf & (1 << winsize)) != 0) {
2017	/* then multiply */
2018	if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
2019	goto LBL_RES;
2020	}
2021	if ((err = redux (&res, P, mp)) != MP_OKAY) {
2022	goto LBL_RES;
2023	}
2024	}
2025	}
2026	}
2027
2028	if (redmode == 0) {
2029	/* fixup result if Montgomery reduction is used
2030	* recall that any value in a Montgomery system is
2031	* actually multiplied by R mod n. So we have
2032	* to reduce one more time to cancel out the factor
2033	* of R.
2034	*/
2035	if ((err = redux(&res, P, mp)) != MP_OKAY) {
2036	goto LBL_RES;
2037	}
2038	}
2039
2040	/* swap res with Y */
2041	mp_exch (&res, Y);
2042	err = MP_OKAY;
2043	LBL_RES:mp_clear (&res);
2044	LBL_M:
2045	mp_clear(&M[1]);
2046	for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
2047	mp_clear (&M[x]);
2048	}
2049
2050	#ifdef WOLFSSL_SMALL_STACK
2051	XFREE(M, NULL, DYNAMIC_TYPE_TMP_BUFFER);
2052	#endif
2053
2054	return err;
2055	}
2056
2057
2058	/* setups the montgomery reduction stuff */
2059	int
2060	mp_montgomery_setup (mp_int * n, mp_digit * rho)
2061	{
2062	mp_digit x, b;
2063
2064	/* fast inversion mod 2**k
2065	*
2066	* Based on the fact that
2067	*
2068	* XA = 1 (mod 2n) => (X(2-XA)) A = 1 (mod 22n)
2069	* => 2XA - XXA*A = 1
2070	* => 2*(1) - (1) = 1
2071	*/
2072	b = n->dp[0];
2073
2074	if ((b & 1) == 0) {
2075	return MP_VAL;
2076	}
2077
2078	x = (((b + 2) & 4) << 1) + b; /* here xa==1 mod 24 /
2079	x = 2 - b x; /* here xa==1 mod 28 /
2080	#if !defined(MP_8BIT)
2081	x = 2 - b x; /* here xa==1 mod 216 /
2082	#endif
2083	#if defined(MP_64BIT) \|\| !(defined(MP_8BIT) \|\| defined(MP_16BIT))
2084	x = 2 - b x; /* here xa==1 mod 232 /
2085	#endif
2086	#ifdef MP_64BIT
2087	x = 2 - b x; /* here xa==1 mod 264 /
2088	#endif
2089
2090	/* rho = -1/m mod b */
2091	/* TAO, switched mp_word casts to mp_digit to shut up compiler */
2092	*rho = (((mp_digit)1 << ((mp_digit) DIGIT_BIT)) - x) & MP_MASK;
2093
2094	return MP_OKAY;
2095	}
2096
2097
2098	/* computes xR**-1 == x (mod N) via Montgomery Reduction
2099	*
2100	* This is an optimized implementation of montgomery_reduce
2101	* which uses the comba method to quickly calculate the columns of the
2102	* reduction.
2103	*
2104	* Based on Algorithm 14.32 on pp.601 of HAC.
2105	*/
2106	int fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
2107	{
2108	int ix, res, olduse;
2109	#ifdef WOLFSSL_SMALL_STACK
2110	mp_word* W; /* uses dynamic memory and slower */
2111	#else
2112	mp_word W[MP_WARRAY];
2113	#endif
2114
2115	/* get old used count */
2116	olduse = x->used;
2117
2118	/* grow a as required */
2119	if (x->alloc < n->used + 1) {
2120	if ((res = mp_grow (x, n->used + 1)) != MP_OKAY) {
2121	return res;
2122	}
2123	}
2124
2125	#ifdef WOLFSSL_SMALL_STACK
2126	W = (mp_word)XMALLOC(sizeof(mp_word) MP_WARRAY, 0, DYNAMIC_TYPE_BIGINT);
2127	if (W == NULL)
2128	return MP_MEM;
2129	#endif
2130
2131	/* first we have to get the digits of the input into
2132	* an array of double precision words W[...]
2133	*/
2134	{
2135	register mp_word *_W;
2136	register mp_digit *tmpx;
2137
2138	/* alias for the W[] array */
2139	_W = W;
2140
2141	/* alias for the digits of x*/
2142	tmpx = x->dp;
2143
2144	/* copy the digits of a into W[0..a->used-1] */
2145	for (ix = 0; ix < x->used; ix++) {
2146	_W++ = tmpx++;
2147	}
2148
2149	/* zero the high words of W[a->used..m->used2] /
2150	for (; ix < n->used * 2 + 1; ix++) {
2151	*_W++ = 0;
2152	}
2153	}
2154
2155	/* now we proceed to zero successive digits
2156	* from the least significant upwards
2157	*/
2158	for (ix = 0; ix < n->used; ix++) {
2159	/* mu = ai * m' mod b
2160	*
2161	* We avoid a double precision multiplication (which isn't required)
2162	* by casting the value down to a mp_digit. Note this requires
2163	* that W[ix-1] have the carry cleared (see after the inner loop)
2164	*/
2165	register mp_digit mu;
2166	mu = (mp_digit) (((W[ix] & MP_MASK) * rho) & MP_MASK);
2167
2168	/* a = a + mu * m * b**i
2169	*
2170	* This is computed in place and on the fly. The multiplication
2171	* by b**i is handled by offseting which columns the results
2172	* are added to.
2173	*
2174	* Note the comba method normally doesn't handle carries in the
2175	* inner loop In this case we fix the carry from the previous
2176	* column since the Montgomery reduction requires digits of the
2177	* result (so far) [see above] to work. This is
2178	* handled by fixing up one carry after the inner loop. The
2179	* carry fixups are done in order so after these loops the
2180	* first m->used words of W[] have the carries fixed
2181	*/
2182	{
2183	register int iy;
2184	register mp_digit *tmpn;
2185	register mp_word *_W;
2186
2187	/* alias for the digits of the modulus */
2188	tmpn = n->dp;
2189
2190	/* Alias for the columns set by an offset of ix */
2191	_W = W + ix;
2192
2193	/* inner loop */
2194	for (iy = 0; iy < n->used; iy++) {
2195	_W++ += ((mp_word)mu) ((mp_word)*tmpn++);
2196	}
2197	}
2198
2199	/* now fix carry for next digit, W[ix+1] */
2200	W[ix + 1] += W[ix] >> ((mp_word) DIGIT_BIT);
2201	}
2202
2203	/* now we have to propagate the carries and
2204	* shift the words downward [all those least
2205	* significant digits we zeroed].
2206	*/
2207	{
2208	register mp_digit *tmpx;
2209	register mp_word _W, _W1;
2210
2211	/* nox fix rest of carries */
2212
2213	/* alias for current word */
2214	_W1 = W + ix;
2215
2216	/* alias for next word, where the carry goes */
2217	_W = W + ++ix;
2218
2219	for (; ix <= n->used * 2 + 1; ix++) {
2220	_W++ += _W1++ >> ((mp_word) DIGIT_BIT);
2221	}
2222
2223	/* copy out, A = A/b**n
2224	*
2225	* The result is A/b**n but instead of converting from an
2226	* array of mp_word to mp_digit than calling mp_rshd
2227	* we just copy them in the right order
2228	*/
2229
2230	/* alias for destination word */
2231	tmpx = x->dp;
2232
2233	/* alias for shifted double precision result */
2234	_W = W + n->used;
2235
2236	for (ix = 0; ix < n->used + 1; ix++) {
2237	tmpx++ = (mp_digit)(_W++ & ((mp_word) MP_MASK));
2238	}
2239
2240	/* zero oldused digits, if the input a was larger than
2241	* m->used+1 we'll have to clear the digits
2242	*/
2243	for (; ix < olduse; ix++) {
2244	*tmpx++ = 0;
2245	}
2246	}
2247
2248	/* set the max used and clamp */
2249	x->used = n->used + 1;
2250	mp_clamp (x);
2251
2252	#ifdef WOLFSSL_SMALL_STACK
2253	XFREE(W, 0, DYNAMIC_TYPE_BIGINT);
2254	#endif
2255
2256	/* if A >= m then A = A - m */
2257	if (mp_cmp_mag (x, n) != MP_LT) {
2258	return s_mp_sub (x, n, x);
2259	}
2260	return MP_OKAY;
2261	}
2262
2263
2264	/* computes xR*-1 == x (mod N) via Montgomery Reduction /
2265	int
2266	mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
2267	{
2268	int ix, res, digs;
2269	mp_digit mu;
2270
2271	/* can the fast reduction [comba] method be used?
2272	*
2273	* Note that unlike in mul you're safely allowed less
2274	* than the available columns [255 per default] since carries
2275	* are fixed up in the inner loop.
2276	*/
2277	digs = n->used * 2 + 1;
2278	if ((digs < MP_WARRAY) &&
2279	n->used <
2280	(1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
2281	return fast_mp_montgomery_reduce (x, n, rho);
2282	}
2283
2284	/* grow the input as required */
2285	if (x->alloc < digs) {
2286	if ((res = mp_grow (x, digs)) != MP_OKAY) {
2287	return res;
2288	}
2289	}
2290	x->used = digs;
2291
2292	for (ix = 0; ix < n->used; ix++) {
2293	/* mu = ai * rho mod b
2294	*
2295	* The value of rho must be precalculated via
2296	* montgomery_setup() such that
2297	* it equals -1/n0 mod b this allows the
2298	* following inner loop to reduce the
2299	* input one digit at a time
2300	*/
2301	mu = (mp_digit) (((mp_word)x->dp[ix]) * ((mp_word)rho) & MP_MASK);
2302
2303	/* a = a + mu * m * b*i /
2304	{
2305	register int iy;
2306	register mp_digit tmpn, tmpx, u;
2307	register mp_word r;
2308
2309	/* alias for digits of the modulus */
2310	tmpn = n->dp;
2311
2312	/* alias for the digits of x [the input] */
2313	tmpx = x->dp + ix;
2314
2315	/* set the carry to zero */
2316	u = 0;
2317
2318	/* Multiply and add in place */
2319	for (iy = 0; iy < n->used; iy++) {
2320	/* compute product and sum */
2321	r = ((mp_word)mu) * ((mp_word)*tmpn++) +
2322	((mp_word) u) + ((mp_word) * tmpx);
2323
2324	/* get carry */
2325	u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
2326
2327	/* fix digit */
2328	*tmpx++ = (mp_digit)(r & ((mp_word) MP_MASK));
2329	}
2330	/* At this point the ix'th digit of x should be zero */
2331
2332
2333	/* propagate carries upwards as required*/
2334	while (u) {
2335	*tmpx += u;
2336	u = *tmpx >> DIGIT_BIT;
2337	*tmpx++ &= MP_MASK;
2338	}
2339	}
2340	}
2341
2342	/* at this point the n.used'th least
2343	* significant digits of x are all zero
2344	* which means we can shift x to the
2345	* right by n.used digits and the
2346	* residue is unchanged.
2347	*/
2348
2349	/* x = x/b*n.used /
2350	mp_clamp(x);
2351	mp_rshd (x, n->used);
2352
2353	/* if x >= n then x = x - n */
2354	if (mp_cmp_mag (x, n) != MP_LT) {
2355	return s_mp_sub (x, n, x);
2356	}
2357
2358	return MP_OKAY;
2359	}
2360
2361
2362	/* determines the setup value */
2363	void mp_dr_setup(mp_int a, mp_digit d)
2364	{
2365	/* the casts are required if DIGIT_BIT is one less than
2366	* the number of bits in a mp_digit [e.g. DIGIT_BIT==31]
2367	*/
2368	*d = (mp_digit)((((mp_word)1) << ((mp_word)DIGIT_BIT)) -
2369	((mp_word)a->dp[0]));
2370	}
2371
2372
2373	/* reduce "x" in place modulo "n" using the Diminished Radix algorithm.
2374	*
2375	* Based on algorithm from the paper
2376	*
2377	* "Generating Efficient Primes for Discrete Log Cryptosystems"
2378	* Chae Hoon Lim, Pil Joong Lee,
2379	* POSTECH Information Research Laboratories
2380	*
2381	* The modulus must be of a special format [see manual]
2382	*
2383	* Has been modified to use algorithm 7.10 from the LTM book instead
2384	*
2385	* Input x must be in the range 0 <= x <= (n-1)**2
2386	*/
2387	int
2388	mp_dr_reduce (mp_int * x, mp_int * n, mp_digit k)
2389	{
2390	int err, i, m;
2391	mp_word r;
2392	mp_digit mu, tmpx1, tmpx2;
2393
2394	/* m = digits in modulus */
2395	m = n->used;
2396
2397	/* ensure that "x" has at least 2m digits */
2398	if (x->alloc < m + m) {
2399	if ((err = mp_grow (x, m + m)) != MP_OKAY) {
2400	return err;
2401	}
2402	}
2403
2404	/* top of loop, this is where the code resumes if
2405	* another reduction pass is required.
2406	*/
2407	top:
2408	/* aliases for digits */
2409	/* alias for lower half of x */
2410	tmpx1 = x->dp;
2411
2412	/* alias for upper half of x, or x/B*m /
2413	tmpx2 = x->dp + m;
2414
2415	/* set carry to zero */
2416	mu = 0;
2417
2418	/* compute (x mod B*m) + k [x/B*m] inline and inplace /
2419	for (i = 0; i < m; i++) {
2420	r = ((mp_word)tmpx2++) ((mp_word)k) + *tmpx1 + mu;
2421	*tmpx1++ = (mp_digit)(r & MP_MASK);
2422	mu = (mp_digit)(r >> ((mp_word)DIGIT_BIT));
2423	}
2424
2425	/* set final carry */
2426	*tmpx1++ = mu;
2427
2428	/* zero words above m */
2429	for (i = m + 1; i < x->used; i++) {
2430	*tmpx1++ = 0;
2431	}
2432
2433	/* clamp, sub and return */
2434	mp_clamp (x);
2435
2436	/* if x >= n then subtract and reduce again
2437	* Each successive "recursion" makes the input smaller and smaller.
2438	*/
2439	if (mp_cmp_mag (x, n) != MP_LT) {
2440	s_mp_sub(x, n, x);
2441	goto top;
2442	}
2443	return MP_OKAY;
2444	}
2445
2446
2447	/* reduces a modulo n where n is of the form 2*p - d /
2448	int mp_reduce_2k(mp_int a, mp_int n, mp_digit d)
2449	{
2450	mp_int q;
2451	int p, res;
2452
2453	if ((res = mp_init(&q)) != MP_OKAY) {
2454	return res;
2455	}
2456
2457	p = mp_count_bits(n);
2458	top:
2459	/* q = a/2p, a = a mod 2p */
2460	if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
2461	goto ERR;
2462	}
2463
2464	if (d != 1) {
2465	/* q = q * d */
2466	if ((res = mp_mul_d(&q, d, &q)) != MP_OKAY) {
2467	goto ERR;
2468	}
2469	}
2470
2471	/* a = a + q */
2472	if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
2473	goto ERR;
2474	}
2475
2476	if (mp_cmp_mag(a, n) != MP_LT) {
2477	s_mp_sub(a, n, a);
2478	goto top;
2479	}
2480
2481	ERR:
2482	mp_clear(&q);
2483	return res;
2484	}
2485
2486
2487	/* determines the setup value */
2488	int mp_reduce_2k_setup(mp_int a, mp_digit d)
2489	{
2490	int res, p;
2491	mp_int tmp;
2492
2493	if ((res = mp_init(&tmp)) != MP_OKAY) {
2494	return res;
2495	}
2496
2497	p = mp_count_bits(a);
2498	if ((res = mp_2expt(&tmp, p)) != MP_OKAY) {
2499	mp_clear(&tmp);
2500	return res;
2501	}
2502
2503	if ((res = s_mp_sub(&tmp, a, &tmp)) != MP_OKAY) {
2504	mp_clear(&tmp);
2505	return res;
2506	}
2507
2508	*d = tmp.dp[0];
2509	mp_clear(&tmp);
2510	return MP_OKAY;
2511	}
2512
2513
2514	/* set the b bit of a */
2515	int
2516	mp_set_bit (mp_int * a, int b)
2517	{
2518	int i = b / DIGIT_BIT, res;
2519
2520	if (a->used < (int)(i + 1)) {
2521	/* grow a to accomodate the single bit */
2522	if ((res = mp_grow (a, i + 1)) != MP_OKAY) {
2523	return res;
2524	}
2525
2526	/* set the used count of where the bit will go */
2527	a->used = (int)(i + 1);
2528	}
2529
2530	/* put the single bit in its place */
2531	a->dp[i] \|= ((mp_digit)1) << (b % DIGIT_BIT);
2532
2533	return MP_OKAY;
2534	}
2535
2536	/* computes a = 2**b
2537	*
2538	* Simple algorithm which zeroes the int, set the required bit
2539	*/
2540	int
2541	mp_2expt (mp_int * a, int b)
2542	{
2543	/* zero a as per default */
2544	mp_zero (a);
2545
2546	return mp_set_bit(a, b);
2547	}
2548
2549	/* multiply by a digit */
2550	int
2551	mp_mul_d (mp_int * a, mp_digit b, mp_int * c)
2552	{
2553	mp_digit u, tmpa, tmpc;
2554	mp_word r;
2555	int ix, res, olduse;
2556
2557	/* make sure c is big enough to hold ab /
2558	if (c->alloc < a->used + 1) {
2559	if ((res = mp_grow (c, a->used + 1)) != MP_OKAY) {
2560	return res;
2561	}
2562	}
2563
2564	/* get the original destinations used count */
2565	olduse = c->used;
2566
2567	/* set the sign */
2568	c->sign = a->sign;
2569
2570	/* alias for a->dp [source] */
2571	tmpa = a->dp;
2572
2573	/* alias for c->dp [dest] */
2574	tmpc = c->dp;
2575
2576	/* zero carry */
2577	u = 0;
2578
2579	/* compute columns */
2580	for (ix = 0; ix < a->used; ix++) {
2581	/* compute product and carry sum for this term */
2582	r = ((mp_word) u) + ((mp_word)tmpa++) ((mp_word)b);
2583
2584	/* mask off higher bits to get a single digit */
2585	*tmpc++ = (mp_digit) (r & ((mp_word) MP_MASK));
2586
2587	/* send carry into next iteration */
2588	u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
2589	}
2590
2591	/* store final carry [if any] and increment ix offset */
2592	*tmpc++ = u;
2593	++ix;
2594
2595	/* now zero digits above the top */
2596	while (ix++ < olduse) {
2597	*tmpc++ = 0;
2598	}
2599
2600	/* set used count */
2601	c->used = a->used + 1;
2602	mp_clamp(c);
2603
2604	return MP_OKAY;
2605	}
2606
2607
2608	/* d = a * b (mod c) */
2609	int mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
2610	{
2611	int res;
2612	mp_int t;
2613
2614	if ((res = mp_init (&t)) != MP_OKAY) {
2615	return res;
2616	}
2617
2618	if ((res = mp_mul (a, b, &t)) != MP_OKAY) {
2619	mp_clear (&t);
2620	return res;
2621	}
2622	res = mp_mod (&t, c, d);
2623	mp_clear (&t);
2624	return res;
2625	}
2626
2627
2628	/* computes b = aa /
2629	int
2630	mp_sqr (mp_int * a, mp_int * b)
2631	{
2632	int res;
2633
2634	{
2635	#ifdef BN_FAST_S_MP_SQR_C
2636	/* can we use the fast comba multiplier? */
2637	if ((a->used * 2 + 1) < MP_WARRAY &&
2638	a->used <
2639	(1 << (sizeof(mp_word) * CHAR_BIT - 2*DIGIT_BIT - 1))) {
2640	res = fast_s_mp_sqr (a, b);
2641	} else
2642	#endif
2643	#ifdef BN_S_MP_SQR_C
2644	res = s_mp_sqr (a, b);
2645	#else
2646	res = MP_VAL;
2647	#endif
2648	}
2649	b->sign = MP_ZPOS;
2650	return res;
2651	}
2652
2653
2654	/* high level multiplication (handles sign) */
2655	int mp_mul (mp_int * a, mp_int * b, mp_int * c)
2656	{
2657	int res, neg;
2658	neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
2659
2660	{
2661	/* can we use the fast multiplier?
2662	*
2663	* The fast multiplier can be used if the output will
2664	* have less than MP_WARRAY digits and the number of
2665	* digits won't affect carry propagation
2666	*/
2667	int digs = a->used + b->used + 1;
2668
2669	#ifdef BN_FAST_S_MP_MUL_DIGS_C
2670	if ((digs < MP_WARRAY) &&
2671	MIN(a->used, b->used) <=
2672	(1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
2673	res = fast_s_mp_mul_digs (a, b, c, digs);
2674	} else
2675	#endif
2676	#ifdef BN_S_MP_MUL_DIGS_C
2677	res = s_mp_mul (a, b, c); /* uses s_mp_mul_digs */
2678	#else
2679	res = MP_VAL;
2680	#endif
2681
2682	}
2683	c->sign = (c->used > 0) ? neg : MP_ZPOS;
2684	return res;
2685	}
2686
2687
2688	/* b = a2 /
2689	int mp_mul_2(mp_int * a, mp_int * b)
2690	{
2691	int x, res, oldused;
2692
2693	/* grow to accomodate result */
2694	if (b->alloc < a->used + 1) {
2695	if ((res = mp_grow (b, a->used + 1)) != MP_OKAY) {
2696	return res;
2697	}
2698	}
2699
2700	oldused = b->used;
2701	b->used = a->used;
2702
2703	{
2704	register mp_digit r, rr, tmpa, tmpb;
2705
2706	/* alias for source */
2707	tmpa = a->dp;
2708
2709	/* alias for dest */
2710	tmpb = b->dp;
2711
2712	/* carry */
2713	r = 0;
2714	for (x = 0; x < a->used; x++) {
2715
2716	/* get what will be the next carry bit from the
2717	* MSB of the current digit
2718	*/
2719	rr = *tmpa >> ((mp_digit)(DIGIT_BIT - 1));
2720
2721	/* now shift up this digit, add in the carry [from the previous] */
2722	tmpb++ = ((tmpa++ << ((mp_digit)1)) \| r) & MP_MASK;
2723
2724	/* copy the carry that would be from the source
2725	* digit into the next iteration
2726	*/
2727	r = rr;
2728	}
2729
2730	/* new leading digit? */
2731	if (r != 0) {
2732	/* add a MSB which is always 1 at this point */
2733	*tmpb = 1;
2734	++(b->used);
2735	}
2736
2737	/* now zero any excess digits on the destination
2738	* that we didn't write to
2739	*/
2740	tmpb = b->dp + b->used;
2741	for (x = b->used; x < oldused; x++) {
2742	*tmpb++ = 0;
2743	}
2744	}
2745	b->sign = a->sign;
2746	return MP_OKAY;
2747	}
2748
2749
2750	/* divide by three (based on routine from MPI and the GMP manual) */
2751	int
2752	mp_div_3 (mp_int * a, mp_int c, mp_digit d)
2753	{
2754	mp_int q;
2755	mp_word w, t;
2756	mp_digit b;
2757	int res, ix;
2758
2759	/* b = 2*DIGIT_BIT / 3 /
2760	b = (mp_digit) ( (((mp_word)1) << ((mp_word)DIGIT_BIT)) / ((mp_word)3) );
2761
2762	if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
2763	return res;
2764	}
2765
2766	q.used = a->used;
2767	q.sign = a->sign;
2768	w = 0;
2769	for (ix = a->used - 1; ix >= 0; ix--) {
2770	w = (w << ((mp_word)DIGIT_BIT)) \| ((mp_word)a->dp[ix]);
2771
2772	if (w >= 3) {
2773	/* multiply w by [1/3] */
2774	t = (w * ((mp_word)b)) >> ((mp_word)DIGIT_BIT);
2775
2776	/* now subtract 3 * [w/3] from w, to get the remainder */
2777	w -= t+t+t;
2778
2779	/* fixup the remainder as required since
2780	* the optimization is not exact.
2781	*/
2782	while (w >= 3) {
2783	t += 1;
2784	w -= 3;
2785	}
2786	} else {
2787	t = 0;
2788	}
2789	q.dp[ix] = (mp_digit)t;
2790	}
2791
2792	/* [optional] store the remainder */
2793	if (d != NULL) {
2794	*d = (mp_digit)w;
2795	}
2796
2797	/* [optional] store the quotient */
2798	if (c != NULL) {
2799	mp_clamp(&q);
2800	mp_exch(&q, c);
2801	}
2802	mp_clear(&q);
2803
2804	return res;
2805	}
2806
2807
2808	/* init an mp_init for a given size */
2809	int mp_init_size (mp_int * a, int size)
2810	{
2811	int x;
2812
2813	/* pad size so there are always extra digits */
2814	size += (MP_PREC * 2) - (size % MP_PREC);
2815
2816	/* alloc mem */
2817	a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * size, 0,
2818	DYNAMIC_TYPE_BIGINT);
2819	if (a->dp == NULL) {
2820	return MP_MEM;
2821	}
2822
2823	/* set the members */
2824	a->used = 0;
2825	a->alloc = size;
2826	a->sign = MP_ZPOS;
2827
2828	/* zero the digits */
2829	for (x = 0; x < size; x++) {
2830	a->dp[x] = 0;
2831	}
2832
2833	return MP_OKAY;
2834	}
2835
2836
2837	/* the jist of squaring...
2838	* you do like mult except the offset of the tmpx [one that
2839	* starts closer to zero] can't equal the offset of tmpy.
2840	* So basically you set up iy like before then you min it with
2841	* (ty-tx) so that it never happens. You double all those
2842	* you add in the inner loop
2843
2844	After that loop you do the squares and add them in.
2845	*/
2846
2847	int fast_s_mp_sqr (mp_int * a, mp_int * b)
2848	{
2849	int olduse, res, pa, ix, iz;
2850	#ifdef WOLFSSL_SMALL_STACK
2851	mp_digit* W; /* uses dynamic memory and slower */
2852	#else
2853	mp_digit W[MP_WARRAY];
2854	#endif
2855	mp_digit *tmpx;
2856	mp_word W1;
2857
2858	/* grow the destination as required */
2859	pa = a->used + a->used;
2860	if (b->alloc < pa) {
2861	if ((res = mp_grow (b, pa)) != MP_OKAY) {
2862	return res;
2863	}
2864	}
2865
2866	if (pa > MP_WARRAY)
2867	return MP_RANGE; /* TAO range check */
2868
2869	#ifdef WOLFSSL_SMALL_STACK
2870	W = (mp_digit)XMALLOC(sizeof(mp_digit) MP_WARRAY, 0, DYNAMIC_TYPE_BIGINT);
2871	if (W == NULL)
2872	return MP_MEM;
2873	#endif
2874
2875	/* number of output digits to produce */
2876	W1 = 0;
2877	for (ix = 0; ix < pa; ix++) {
2878	int tx, ty, iy;
2879	mp_word _W;
2880	mp_digit *tmpy;
2881
2882	/* clear counter */
2883	_W = 0;
2884
2885	/* get offsets into the two bignums */
2886	ty = MIN(a->used-1, ix);
2887	tx = ix - ty;
2888
2889	/* setup temp aliases */
2890	tmpx = a->dp + tx;
2891	tmpy = a->dp + ty;
2892
2893	/* this is the number of times the loop will iterrate, essentially
2894	while (tx++ < a->used && ty-- >= 0) { ... }
2895	*/
2896	iy = MIN(a->used-tx, ty+1);
2897
2898	/* now for squaring tx can never equal ty
2899	* we halve the distance since they approach at a rate of 2x
2900	* and we have to round because odd cases need to be executed
2901	*/
2902	iy = MIN(iy, (ty-tx+1)>>1);
2903
2904	/* execute loop */
2905	for (iz = 0; iz < iy; iz++) {
2906	_W += ((mp_word)tmpx++)((mp_word)*tmpy--);
2907	}
2908
2909	/* double the inner product and add carry */
2910	_W = _W + _W + W1;
2911
2912	/* even columns have the square term in them */
2913	if ((ix&1) == 0) {
2914	_W += ((mp_word)a->dp[ix>>1])*((mp_word)a->dp[ix>>1]);
2915	}
2916
2917	/* store it */
2918	W[ix] = (mp_digit)(_W & MP_MASK);
2919
2920	/* make next carry */
2921	W1 = _W >> ((mp_word)DIGIT_BIT);
2922	}
2923
2924	/* setup dest */
2925	olduse = b->used;
2926	b->used = a->used+a->used;
2927
2928	{
2929	mp_digit *tmpb;
2930	tmpb = b->dp;
2931	for (ix = 0; ix < pa; ix++) {
2932	*tmpb++ = W[ix] & MP_MASK;
2933	}
2934
2935	/* clear unused digits [that existed in the old copy of c] */
2936	for (; ix < olduse; ix++) {
2937	*tmpb++ = 0;
2938	}
2939	}
2940	mp_clamp (b);
2941
2942	#ifdef WOLFSSL_SMALL_STACK
2943	XFREE(W, 0, DYNAMIC_TYPE_BIGINT);
2944	#endif
2945
2946	return MP_OKAY;
2947	}
2948
2949
2950	/* Fast (comba) multiplier
2951	*
2952	* This is the fast column-array [comba] multiplier. It is
2953	* designed to compute the columns of the product first
2954	* then handle the carries afterwards. This has the effect
2955	* of making the nested loops that compute the columns very
2956	* simple and schedulable on super-scalar processors.
2957	*
2958	* This has been modified to produce a variable number of
2959	* digits of output so if say only a half-product is required
2960	* you don't have to compute the upper half (a feature
2961	* required for fast Barrett reduction).
2962	*
2963	* Based on Algorithm 14.12 on pp.595 of HAC.
2964	*
2965	*/
2966	int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
2967	{
2968	int olduse, res, pa, ix, iz;
2969	#ifdef WOLFSSL_SMALL_STACK
2970	mp_digit* W; /* uses dynamic memory and slower */
2971	#else
2972	mp_digit W[MP_WARRAY];
2973	#endif
2974	register mp_word _W;
2975
2976	/* grow the destination as required */
2977	if (c->alloc < digs) {
2978	if ((res = mp_grow (c, digs)) != MP_OKAY) {
2979	return res;
2980	}
2981	}
2982
2983	/* number of output digits to produce */
2984	pa = MIN(digs, a->used + b->used);
2985	if (pa > MP_WARRAY)
2986	return MP_RANGE; /* TAO range check */
2987
2988	#ifdef WOLFSSL_SMALL_STACK
2989	W = (mp_digit)XMALLOC(sizeof(mp_digit) MP_WARRAY, 0, DYNAMIC_TYPE_BIGINT);
2990	if (W == NULL)
2991	return MP_MEM;
2992	#endif
2993
2994	/* clear the carry */
2995	_W = 0;
2996	for (ix = 0; ix < pa; ix++) {
2997	int tx, ty;
2998	int iy;
2999	mp_digit tmpx, tmpy;
3000
3001	/* get offsets into the two bignums */
3002	ty = MIN(b->used-1, ix);
3003	tx = ix - ty;
3004
3005	/* setup temp aliases */
3006	tmpx = a->dp + tx;
3007	tmpy = b->dp + ty;
3008
3009	/* this is the number of times the loop will iterrate, essentially
3010	while (tx++ < a->used && ty-- >= 0) { ... }
3011	*/
3012	iy = MIN(a->used-tx, ty+1);
3013
3014	/* execute loop */
3015	for (iz = 0; iz < iy; ++iz) {
3016	_W += ((mp_word)tmpx++)((mp_word)*tmpy--);
3017
3018	}
3019
3020	/* store term */
3021	W[ix] = ((mp_digit)_W) & MP_MASK;
3022
3023	/* make next carry */
3024	_W = _W >> ((mp_word)DIGIT_BIT);
3025	}
3026
3027	/* setup dest */
3028	olduse = c->used;
3029	c->used = pa;
3030
3031	{
3032	register mp_digit *tmpc;
3033	tmpc = c->dp;
3034	for (ix = 0; ix < pa+1; ix++) {
3035	/* now extract the previous digit [below the carry] */
3036	*tmpc++ = W[ix];
3037	}
3038
3039	/* clear unused digits [that existed in the old copy of c] */
3040	for (; ix < olduse; ix++) {
3041	*tmpc++ = 0;
3042	}
3043	}
3044	mp_clamp (c);
3045
3046	#ifdef WOLFSSL_SMALL_STACK
3047	XFREE(W, 0, DYNAMIC_TYPE_BIGINT);
3048	#endif
3049
3050	return MP_OKAY;
3051	}
3052
3053
3054	/* low level squaring, b = aa, HAC pp.596-597, Algorithm 14.16 /
3055	int s_mp_sqr (mp_int * a, mp_int * b)
3056	{
3057	mp_int t;
3058	int res, ix, iy, pa;
3059	mp_word r;
3060	mp_digit u, tmpx, *tmpt;
3061
3062	pa = a->used;
3063	if ((res = mp_init_size (&t, 2*pa + 1)) != MP_OKAY) {
3064	return res;
3065	}
3066
3067	/* default used is maximum possible size */
3068	t.used = 2*pa + 1;
3069
3070	for (ix = 0; ix < pa; ix++) {
3071	/* first calculate the digit at 2ix /
3072	/* calculate double precision result */
3073	r = ((mp_word) t.dp[2*ix]) +
3074	((mp_word)a->dp[ix])*((mp_word)a->dp[ix]);
3075
3076	/* store lower part in result */
3077	t.dp[ix+ix] = (mp_digit) (r & ((mp_word) MP_MASK));
3078
3079	/* get the carry */
3080	u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
3081
3082	/* left hand side of A[ix] * A[iy] */
3083	tmpx = a->dp[ix];
3084
3085	/* alias for where to store the results */
3086	tmpt = t.dp + (2*ix + 1);
3087
3088	for (iy = ix + 1; iy < pa; iy++) {
3089	/* first calculate the product */
3090	r = ((mp_word)tmpx) * ((mp_word)a->dp[iy]);
3091
3092	/* now calculate the double precision result, note we use
3093	* addition instead of *2 since it's easier to optimize
3094	*/
3095	r = ((mp_word) *tmpt) + r + r + ((mp_word) u);
3096
3097	/* store lower part */
3098	*tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
3099
3100	/* get carry */
3101	u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
3102	}
3103	/* propagate upwards */
3104	while (u != ((mp_digit) 0)) {
3105	r = ((mp_word) *tmpt) + ((mp_word) u);
3106	*tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
3107	u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
3108	}
3109	}
3110
3111	mp_clamp (&t);
3112	mp_exch (&t, b);
3113	mp_clear (&t);
3114	return MP_OKAY;
3115	}
3116
3117
3118	/* multiplies \|a\| * \|b\| and only computes upto digs digits of result
3119	* HAC pp. 595, Algorithm 14.12 Modified so you can control how
3120	* many digits of output are created.
3121	*/
3122	int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
3123	{
3124	mp_int t;
3125	int res, pa, pb, ix, iy;
3126	mp_digit u;
3127	mp_word r;
3128	mp_digit tmpx, tmpt, tmpy;
3129
3130	/* can we use the fast multiplier? */
3131	if (((digs) < MP_WARRAY) &&
3132	MIN (a->used, b->used) <
3133	(1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
3134	return fast_s_mp_mul_digs (a, b, c, digs);
3135	}
3136
3137	if ((res = mp_init_size (&t, digs)) != MP_OKAY) {
3138	return res;
3139	}
3140	t.used = digs;
3141
3142	/* compute the digits of the product directly */
3143	pa = a->used;
3144	for (ix = 0; ix < pa; ix++) {
3145	/* set the carry to zero */
3146	u = 0;
3147
3148	/* limit ourselves to making digs digits of output */
3149	pb = MIN (b->used, digs - ix);
3150
3151	/* setup some aliases */
3152	/* copy of the digit from a used within the nested loop */
3153	tmpx = a->dp[ix];
3154
3155	/* an alias for the destination shifted ix places */
3156	tmpt = t.dp + ix;
3157
3158	/* an alias for the digits of b */
3159	tmpy = b->dp;
3160
3161	/* compute the columns of the output and propagate the carry */
3162	for (iy = 0; iy < pb; iy++) {
3163	/* compute the column as a mp_word */
3164	r = ((mp_word)*tmpt) +
3165	((mp_word)tmpx) * ((mp_word)*tmpy++) +
3166	((mp_word) u);
3167
3168	/* the new column is the lower part of the result */
3169	*tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
3170
3171	/* get the carry word from the result */
3172	u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
3173	}
3174	/* set carry if it is placed below digs */
3175	if (ix + iy < digs) {
3176	*tmpt = u;
3177	}
3178	}
3179
3180	mp_clamp (&t);
3181	mp_exch (&t, c);
3182
3183	mp_clear (&t);
3184	return MP_OKAY;
3185	}
3186
3187
3188	/*
3189	* shifts with subtractions when the result is greater than b.
3190	*
3191	* The method is slightly modified to shift B unconditionally upto just under
3192	* the leading bit of b. This saves alot of multiple precision shifting.
3193	*/
3194	int mp_montgomery_calc_normalization (mp_int * a, mp_int * b)
3195	{
3196	int x, bits, res;
3197
3198	/* how many bits of last digit does b use */
3199	bits = mp_count_bits (b) % DIGIT_BIT;
3200
3201	if (b->used > 1) {
3202	if ((res = mp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1))
3203	!= MP_OKAY) {
3204	return res;
3205	}
3206	} else {
3207	mp_set(a, 1);
3208	bits = 1;
3209	}
3210
3211
3212	/* now compute C = A * B mod b */
3213	for (x = bits - 1; x < (int)DIGIT_BIT; x++) {
3214	if ((res = mp_mul_2 (a, a)) != MP_OKAY) {
3215	return res;
3216	}
3217	if (mp_cmp_mag (a, b) != MP_LT) {
3218	if ((res = s_mp_sub (a, b, a)) != MP_OKAY) {
3219	return res;
3220	}
3221	}
3222	}
3223
3224	return MP_OKAY;
3225	}
3226
3227
3228	#ifdef MP_LOW_MEM
3229	#define TAB_SIZE 32
3230	#else
3231	#define TAB_SIZE 256
3232	#endif
3233
3234	int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
3235	{
3236	mp_int M[TAB_SIZE], res, mu;
3237	mp_digit buf;
3238	int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
3239	int (redux)(mp_int,mp_int,mp_int);
3240
3241	/* find window size */
3242	x = mp_count_bits (X);
3243	if (x <= 7) {
3244	winsize = 2;
3245	} else if (x <= 36) {
3246	winsize = 3;
3247	} else if (x <= 140) {
3248	winsize = 4;
3249	} else if (x <= 450) {
3250	winsize = 5;
3251	} else if (x <= 1303) {
3252	winsize = 6;
3253	} else if (x <= 3529) {
3254	winsize = 7;
3255	} else {
3256	winsize = 8;
3257	}
3258
3259	#ifdef MP_LOW_MEM
3260	if (winsize > 5) {
3261	winsize = 5;
3262	}
3263	#endif
3264
3265	/* init M array */
3266	/* init first cell */
3267	if ((err = mp_init(&M[1])) != MP_OKAY) {
3268	return err;
3269	}
3270
3271	/* now init the second half of the array */
3272	for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
3273	if ((err = mp_init(&M[x])) != MP_OKAY) {
3274	for (y = 1<<(winsize-1); y < x; y++) {
3275	mp_clear (&M[y]);
3276	}
3277	mp_clear(&M[1]);
3278	return err;
3279	}
3280	}
3281
3282	/* create mu, used for Barrett reduction */
3283	if ((err = mp_init (&mu)) != MP_OKAY) {
3284	goto LBL_M;
3285	}
3286
3287	if (redmode == 0) {
3288	if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) {
3289	goto LBL_MU;
3290	}
3291	redux = mp_reduce;
3292	} else {
3293	if ((err = mp_reduce_2k_setup_l (P, &mu)) != MP_OKAY) {
3294	goto LBL_MU;
3295	}
3296	redux = mp_reduce_2k_l;
3297	}
3298
3299	/* create M table
3300	*
3301	* The M table contains powers of the base,
3302	* e.g. M[x] = G**x mod P
3303	*
3304	* The first half of the table is not
3305	* computed though accept for M[0] and M[1]
3306	*/
3307	if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) {
3308	goto LBL_MU;
3309	}
3310
3311	/* compute the value at M[1<<(winsize-1)] by squaring
3312	* M[1] (winsize-1) times
3313	*/
3314	if ((err = mp_copy (&M[1], &M[(mp_digit)(1 << (winsize - 1))])) != MP_OKAY) {
3315	goto LBL_MU;
3316	}
3317
3318	for (x = 0; x < (winsize - 1); x++) {
3319	/* square it */
3320	if ((err = mp_sqr (&M[(mp_digit)(1 << (winsize - 1))],
3321	&M[(mp_digit)(1 << (winsize - 1))])) != MP_OKAY) {
3322	goto LBL_MU;
3323	}
3324
3325	/* reduce modulo P */
3326	if ((err = redux (&M[(mp_digit)(1 << (winsize - 1))], P, &mu)) != MP_OKAY) {
3327	goto LBL_MU;
3328	}
3329	}
3330
3331	/* create upper table, that is M[x] = M[x-1] * M[1] (mod P)
3332	* for x = (2(winsize - 1) + 1) to (2winsize - 1)
3333	*/
3334	for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
3335	if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
3336	goto LBL_MU;
3337	}
3338	if ((err = redux (&M[x], P, &mu)) != MP_OKAY) {
3339	goto LBL_MU;
3340	}
3341	}
3342
3343	/* setup result */
3344	if ((err = mp_init (&res)) != MP_OKAY) {
3345	goto LBL_MU;
3346	}
3347	mp_set (&res, 1);
3348
3349	/* set initial mode and bit cnt */
3350	mode = 0;
3351	bitcnt = 1;
3352	buf = 0;
3353	digidx = X->used - 1;
3354	bitcpy = 0;
3355	bitbuf = 0;
3356
3357	for (;;) {
3358	/* grab next digit as required */
3359	if (--bitcnt == 0) {
3360	/* if digidx == -1 we are out of digits */
3361	if (digidx == -1) {
3362	break;
3363	}
3364	/* read next digit and reset the bitcnt */
3365	buf = X->dp[digidx--];
3366	bitcnt = (int) DIGIT_BIT;
3367	}
3368
3369	/* grab the next msb from the exponent */
3370	y = (int)(buf >> (mp_digit)(DIGIT_BIT - 1)) & 1;
3371	buf <<= (mp_digit)1;
3372
3373	/* if the bit is zero and mode == 0 then we ignore it
3374	* These represent the leading zero bits before the first 1 bit
3375	* in the exponent. Technically this opt is not required but it
3376	* does lower the # of trivial squaring/reductions used
3377	*/
3378	if (mode == 0 && y == 0) {
3379	continue;
3380	}
3381
3382	/* if the bit is zero and mode == 1 then we square */
3383	if (mode == 1 && y == 0) {
3384	if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
3385	goto LBL_RES;
3386	}
3387	if ((err = redux (&res, P, &mu)) != MP_OKAY) {
3388	goto LBL_RES;
3389	}
3390	continue;
3391	}
3392
3393	/* else we add it to the window */
3394	bitbuf \|= (y << (winsize - ++bitcpy));
3395	mode = 2;
3396
3397	if (bitcpy == winsize) {
3398	/* ok window is filled so square as required and multiply */
3399	/* square first */
3400	for (x = 0; x < winsize; x++) {
3401	if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
3402	goto LBL_RES;
3403	}
3404	if ((err = redux (&res, P, &mu)) != MP_OKAY) {
3405	goto LBL_RES;
3406	}
3407	}
3408
3409	/* then multiply */
3410	if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
3411	goto LBL_RES;
3412	}
3413	if ((err = redux (&res, P, &mu)) != MP_OKAY) {
3414	goto LBL_RES;
3415	}
3416
3417	/* empty window and reset */
3418	bitcpy = 0;
3419	bitbuf = 0;
3420	mode = 1;
3421	}
3422	}
3423
3424	/* if bits remain then square/multiply */
3425	if (mode == 2 && bitcpy > 0) {
3426	/* square then multiply if the bit is set */
3427	for (x = 0; x < bitcpy; x++) {
3428	if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
3429	goto LBL_RES;
3430	}
3431	if ((err = redux (&res, P, &mu)) != MP_OKAY) {
3432	goto LBL_RES;
3433	}
3434
3435	bitbuf <<= 1;
3436	if ((bitbuf & (1 << winsize)) != 0) {
3437	/* then multiply */
3438	if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
3439	goto LBL_RES;
3440	}
3441	if ((err = redux (&res, P, &mu)) != MP_OKAY) {
3442	goto LBL_RES;
3443	}
3444	}
3445	}
3446	}
3447
3448	mp_exch (&res, Y);
3449	err = MP_OKAY;
3450	LBL_RES:mp_clear (&res);
3451	LBL_MU:mp_clear (&mu);
3452	LBL_M:
3453	mp_clear(&M[1]);
3454	for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
3455	mp_clear (&M[x]);
3456	}
3457	return err;
3458	}
3459
3460
3461	/* pre-calculate the value required for Barrett reduction
3462	* For a given modulus "b" it calulates the value required in "a"
3463	*/
3464	int mp_reduce_setup (mp_int * a, mp_int * b)
3465	{
3466	int res;
3467
3468	if ((res = mp_2expt (a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) {
3469	return res;
3470	}
3471	return mp_div (a, b, a, NULL);
3472	}
3473
3474
3475	/* reduces x mod m, assumes 0 < x < m**2, mu is
3476	* precomputed via mp_reduce_setup.
3477	* From HAC pp.604 Algorithm 14.42
3478	*/
3479	int mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
3480	{
3481	mp_int q;
3482	int res, um = m->used;
3483
3484	/* q = x */
3485	if ((res = mp_init_copy (&q, x)) != MP_OKAY) {
3486	return res;
3487	}
3488
3489	/* q1 = x / b*(k-1) /
3490	mp_rshd (&q, um - 1);
3491
3492	/* according to HAC this optimization is ok */
3493	if (((mp_word) um) > (((mp_digit)1) << (DIGIT_BIT - 1))) {
3494	if ((res = mp_mul (&q, mu, &q)) != MP_OKAY) {
3495	goto CLEANUP;
3496	}
3497	} else {
3498	#ifdef BN_S_MP_MUL_HIGH_DIGS_C
3499	if ((res = s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
3500	goto CLEANUP;
3501	}
3502	#elif defined(BN_FAST_S_MP_MUL_HIGH_DIGS_C)
3503	if ((res = fast_s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
3504	goto CLEANUP;
3505	}
3506	#else
3507	{
3508	res = MP_VAL;
3509	goto CLEANUP;
3510	}
3511	#endif
3512	}
3513
3514	/* q3 = q2 / b*(k+1) /
3515	mp_rshd (&q, um + 1);
3516
3517	/* x = x mod b*(k+1), quick (no division) /
3518	if ((res = mp_mod_2d (x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) {
3519	goto CLEANUP;
3520	}
3521
3522	/* q = q * m mod b*(k+1), quick (no division) /
3523	if ((res = s_mp_mul_digs (&q, m, &q, um + 1)) != MP_OKAY) {
3524	goto CLEANUP;
3525	}
3526
3527	/* x = x - q */
3528	if ((res = mp_sub (x, &q, x)) != MP_OKAY) {
3529	goto CLEANUP;
3530	}
3531
3532	/* If x < 0, add b*(k+1) to it /
3533	if (mp_cmp_d (x, 0) == MP_LT) {
3534	mp_set (&q, 1);
3535	if ((res = mp_lshd (&q, um + 1)) != MP_OKAY)
3536	goto CLEANUP;
3537	if ((res = mp_add (x, &q, x)) != MP_OKAY)
3538	goto CLEANUP;
3539	}
3540
3541	/* Back off if it's too big */
3542	while (mp_cmp (x, m) != MP_LT) {
3543	if ((res = s_mp_sub (x, m, x)) != MP_OKAY) {
3544	goto CLEANUP;
3545	}
3546	}
3547
3548	CLEANUP:
3549	mp_clear (&q);
3550
3551	return res;
3552	}
3553
3554
3555	/* reduces a modulo n where n is of the form 2**p - d
3556	This differs from reduce_2k since "d" can be larger
3557	than a single digit.
3558	*/
3559	int mp_reduce_2k_l(mp_int a, mp_int n, mp_int *d)
3560	{
3561	mp_int q;
3562	int p, res;
3563
3564	if ((res = mp_init(&q)) != MP_OKAY) {
3565	return res;
3566	}
3567
3568	p = mp_count_bits(n);
3569	top:
3570	/* q = a/2p, a = a mod 2p */
3571	if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
3572	goto ERR;
3573	}
3574
3575	/* q = q * d */
3576	if ((res = mp_mul(&q, d, &q)) != MP_OKAY) {
3577	goto ERR;
3578	}
3579
3580	/* a = a + q */
3581	if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
3582	goto ERR;
3583	}
3584
3585	if (mp_cmp_mag(a, n) != MP_LT) {
3586	s_mp_sub(a, n, a);
3587	goto top;
3588	}
3589
3590	ERR:
3591	mp_clear(&q);
3592	return res;
3593	}
3594
3595
3596	/* determines the setup value */
3597	int mp_reduce_2k_setup_l(mp_int a, mp_int d)
3598	{
3599	int res;
3600	mp_int tmp;
3601
3602	if ((res = mp_init(&tmp)) != MP_OKAY) {
3603	return res;
3604	}
3605
3606	if ((res = mp_2expt(&tmp, mp_count_bits(a))) != MP_OKAY) {
3607	goto ERR;
3608	}
3609
3610	if ((res = s_mp_sub(&tmp, a, d)) != MP_OKAY) {
3611	goto ERR;
3612	}
3613
3614	ERR:
3615	mp_clear(&tmp);
3616	return res;
3617	}
3618
3619
3620	/* multiplies \|a\| * \|b\| and does not compute the lower digs digits
3621	* [meant to get the higher part of the product]
3622	*/
3623	int
3624	s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
3625	{
3626	mp_int t;
3627	int res, pa, pb, ix, iy;
3628	mp_digit u;
3629	mp_word r;
3630	mp_digit tmpx, tmpt, tmpy;
3631
3632	/* can we use the fast multiplier? */
3633	#ifdef BN_FAST_S_MP_MUL_HIGH_DIGS_C
3634	if (((a->used + b->used + 1) < MP_WARRAY)
3635	&& MIN (a->used, b->used) <
3636	(1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
3637	return fast_s_mp_mul_high_digs (a, b, c, digs);
3638	}
3639	#endif
3640
3641	if ((res = mp_init_size (&t, a->used + b->used + 1)) != MP_OKAY) {
3642	return res;
3643	}
3644	t.used = a->used + b->used + 1;
3645
3646	pa = a->used;
3647	pb = b->used;
3648	for (ix = 0; ix < pa; ix++) {
3649	/* clear the carry */
3650	u = 0;
3651
3652	/* left hand side of A[ix] * B[iy] */
3653	tmpx = a->dp[ix];
3654
3655	/* alias to the address of where the digits will be stored */
3656	tmpt = &(t.dp[digs]);
3657
3658	/* alias for where to read the right hand side from */
3659	tmpy = b->dp + (digs - ix);
3660
3661	for (iy = digs - ix; iy < pb; iy++) {
3662	/* calculate the double precision result */
3663	r = ((mp_word)*tmpt) +
3664	((mp_word)tmpx) * ((mp_word)*tmpy++) +
3665	((mp_word) u);
3666
3667	/* get the lower part */
3668	*tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
3669
3670	/* carry the carry */
3671	u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
3672	}
3673	*tmpt = u;
3674	}
3675	mp_clamp (&t);
3676	mp_exch (&t, c);
3677	mp_clear (&t);
3678	return MP_OKAY;
3679	}
3680
3681
3682	/* this is a modified version of fast_s_mul_digs that only produces
3683	* output digits above digs. See the comments for fast_s_mul_digs
3684	* to see how it works.
3685	*
3686	* This is used in the Barrett reduction since for one of the multiplications
3687	* only the higher digits were needed. This essentially halves the work.
3688	*
3689	* Based on Algorithm 14.12 on pp.595 of HAC.
3690	*/
3691	int fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
3692	{
3693	int olduse, res, pa, ix, iz;
3694	#ifdef WOLFSSL_SMALL_STACK
3695	mp_digit* W; /* uses dynamic memory and slower */
3696	#else
3697	mp_digit W[MP_WARRAY];
3698	#endif
3699	mp_word _W;
3700
3701	/* grow the destination as required */
3702	pa = a->used + b->used;
3703	if (c->alloc < pa) {
3704	if ((res = mp_grow (c, pa)) != MP_OKAY) {
3705	return res;
3706	}
3707	}
3708
3709	if (pa > MP_WARRAY)
3710	return MP_RANGE; /* TAO range check */
3711
3712	#ifdef WOLFSSL_SMALL_STACK
3713	W = (mp_digit)XMALLOC(sizeof(mp_digit) MP_WARRAY, 0, DYNAMIC_TYPE_BIGINT);
3714	if (W == NULL)
3715	return MP_MEM;
3716	#endif
3717
3718	/* number of output digits to produce */
3719	pa = a->used + b->used;
3720	_W = 0;
3721	for (ix = digs; ix < pa; ix++) {
3722	int tx, ty, iy;
3723	mp_digit tmpx, tmpy;
3724
3725	/* get offsets into the two bignums */
3726	ty = MIN(b->used-1, ix);
3727	tx = ix - ty;
3728
3729	/* setup temp aliases */
3730	tmpx = a->dp + tx;
3731	tmpy = b->dp + ty;
3732
3733	/* this is the number of times the loop will iterrate, essentially its
3734	while (tx++ < a->used && ty-- >= 0) { ... }
3735	*/
3736	iy = MIN(a->used-tx, ty+1);
3737
3738	/* execute loop */
3739	for (iz = 0; iz < iy; iz++) {
3740	_W += ((mp_word)tmpx++)((mp_word)*tmpy--);
3741	}
3742
3743	/* store term */
3744	W[ix] = ((mp_digit)_W) & MP_MASK;
3745
3746	/* make next carry */
3747	_W = _W >> ((mp_word)DIGIT_BIT);
3748	}
3749
3750	/* setup dest */
3751	olduse = c->used;
3752	c->used = pa;
3753
3754	{
3755	register mp_digit *tmpc;
3756
3757	tmpc = c->dp + digs;
3758	for (ix = digs; ix <= pa; ix++) {
3759	/* now extract the previous digit [below the carry] */
3760	*tmpc++ = W[ix];
3761	}
3762
3763	/* clear unused digits [that existed in the old copy of c] */
3764	for (; ix < olduse; ix++) {
3765	*tmpc++ = 0;
3766	}
3767	}
3768	mp_clamp (c);
3769
3770	#ifdef WOLFSSL_SMALL_STACK
3771	XFREE(W, 0, DYNAMIC_TYPE_BIGINT);
3772	#endif
3773
3774	return MP_OKAY;
3775	}
3776
3777
3778	/* set a 32-bit const */
3779	int mp_set_int (mp_int * a, unsigned long b)
3780	{
3781	int x, res;
3782
3783	mp_zero (a);
3784
3785	/* set four bits at a time */
3786	for (x = 0; x < 8; x++) {
3787	/* shift the number up four bits */
3788	if ((res = mp_mul_2d (a, 4, a)) != MP_OKAY) {
3789	return res;
3790	}
3791
3792	/* OR in the top four bits of the source */
3793	a->dp[0] \|= (b >> 28) & 15;
3794
3795	/* shift the source up to the next four bits */
3796	b <<= 4;
3797
3798	/* ensure that digits are not clamped off */
3799	a->used += 1;
3800	}
3801	mp_clamp (a);
3802	return MP_OKAY;
3803	}
3804
3805
3806	#if defined(WOLFSSL_KEY_GEN) \|\| defined(HAVE_ECC)
3807
3808	/* c = a * a (mod b) */
3809	int mp_sqrmod (mp_int * a, mp_int * b, mp_int * c)
3810	{
3811	int res;
3812	mp_int t;
3813
3814	if ((res = mp_init (&t)) != MP_OKAY) {
3815	return res;
3816	}
3817
3818	if ((res = mp_sqr (a, &t)) != MP_OKAY) {
3819	mp_clear (&t);
3820	return res;
3821	}
3822	res = mp_mod (&t, b, c);
3823	mp_clear (&t);
3824	return res;
3825	}
3826
3827	#endif
3828
3829
3830	#if defined(HAVE_ECC) \|\| !defined(NO_PWDBASED) \|\| defined(WOLFSSL_SNIFFER) \|\| \
3831	defined(WOLFSSL_HAVE_WOLFSCEP) \|\| defined(WOLFSSL_KEY_GEN)
3832
3833	/* single digit addition */
3834	int mp_add_d (mp_int* a, mp_digit b, mp_int* c)
3835	{
3836	int res, ix, oldused;
3837	mp_digit tmpa, tmpc, mu;
3838
3839	/* grow c as required */
3840	if (c->alloc < a->used + 1) {
3841	if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
3842	return res;
3843	}
3844	}
3845
3846	/* if a is negative and \|a\| >= b, call c = \|a\| - b */
3847	if (a->sign == MP_NEG && (a->used > 1 \|\| a->dp[0] >= b)) {
3848	/* temporarily fix sign of a */
3849	a->sign = MP_ZPOS;
3850
3851	/* c = \|a\| - b */
3852	res = mp_sub_d(a, b, c);
3853
3854	/* fix sign */
3855	a->sign = c->sign = MP_NEG;
3856
3857	/* clamp */
3858	mp_clamp(c);
3859
3860	return res;
3861	}
3862
3863	/* old number of used digits in c */
3864	oldused = c->used;
3865
3866	/* sign always positive */
3867	c->sign = MP_ZPOS;
3868
3869	/* source alias */
3870	tmpa = a->dp;
3871
3872	/* destination alias */
3873	tmpc = c->dp;
3874
3875	/* if a is positive */
3876	if (a->sign == MP_ZPOS) {
3877	/* add digit, after this we're propagating
3878	* the carry.
3879	*/
3880	tmpc = tmpa++ + b;
3881	mu = *tmpc >> DIGIT_BIT;
3882	*tmpc++ &= MP_MASK;
3883
3884	/* now handle rest of the digits */
3885	for (ix = 1; ix < a->used; ix++) {
3886	tmpc = tmpa++ + mu;
3887	mu = *tmpc >> DIGIT_BIT;
3888	*tmpc++ &= MP_MASK;
3889	}
3890	/* set final carry */
3891	if (ix < c->alloc) {
3892	ix++;
3893	*tmpc++ = mu;
3894	}
3895
3896	/* setup size */
3897	c->used = a->used + 1;
3898	} else {
3899	/* a was negative and \|a\| < b */
3900	c->used = 1;
3901
3902	/* the result is a single digit */
3903	if (a->used == 1) {
3904	*tmpc++ = b - a->dp[0];
3905	} else {
3906	*tmpc++ = b;
3907	}
3908
3909	/* setup count so the clearing of oldused
3910	* can fall through correctly
3911	*/
3912	ix = 1;
3913	}
3914
3915	/* now zero to oldused */
3916	while (ix++ < oldused) {
3917	*tmpc++ = 0;
3918	}
3919	mp_clamp(c);
3920
3921	return MP_OKAY;
3922	}
3923
3924
3925	/* single digit subtraction */
3926	int mp_sub_d (mp_int * a, mp_digit b, mp_int * c)
3927	{
3928	mp_digit tmpa, tmpc, mu;
3929	int res, ix, oldused;
3930
3931	/* grow c as required */
3932	if (c->alloc < a->used + 1) {
3933	if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
3934	return res;
3935	}
3936	}
3937
3938	/* if a is negative just do an unsigned
3939	* addition [with fudged signs]
3940	*/
3941	if (a->sign == MP_NEG) {
3942	a->sign = MP_ZPOS;
3943	res = mp_add_d(a, b, c);
3944	a->sign = c->sign = MP_NEG;
3945
3946	/* clamp */
3947	mp_clamp(c);
3948
3949	return res;
3950	}
3951
3952	/* setup regs */
3953	oldused = c->used;
3954	tmpa = a->dp;
3955	tmpc = c->dp;
3956
3957	/* if a <= b simply fix the single digit */
3958	if ((a->used == 1 && a->dp[0] <= b) \|\| a->used == 0) {
3959	if (a->used == 1) {
3960	tmpc++ = b - tmpa;
3961	} else {
3962	*tmpc++ = b;
3963	}
3964	ix = 1;
3965
3966	/* negative/1digit */
3967	c->sign = MP_NEG;
3968	c->used = 1;
3969	} else {
3970	/* positive/size */
3971	c->sign = MP_ZPOS;
3972	c->used = a->used;
3973
3974	/* subtract first digit */
3975	tmpc = tmpa++ - b;
3976	mu = tmpc >> (sizeof(mp_digit) CHAR_BIT - 1);
3977	*tmpc++ &= MP_MASK;
3978
3979	/* handle rest of the digits */
3980	for (ix = 1; ix < a->used; ix++) {
3981	tmpc = tmpa++ - mu;
3982	mu = tmpc >> (sizeof(mp_digit) CHAR_BIT - 1);
3983	*tmpc++ &= MP_MASK;
3984	}
3985	}
3986
3987	/* zero excess digits */
3988	while (ix++ < oldused) {
3989	*tmpc++ = 0;
3990	}
3991	mp_clamp(c);
3992	return MP_OKAY;
3993	}
3994
3995	#endif /* defined(HAVE_ECC) \|\| !defined(NO_PWDBASED) */
3996
3997
3998	#if defined(WOLFSSL_KEY_GEN) \|\| defined(HAVE_COMP_KEY) \|\| defined(HAVE_ECC)
3999
4000	static const int lnz[16] = {
4001	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
4002	};
4003
4004	/* Counts the number of lsbs which are zero before the first zero bit */
4005	int mp_cnt_lsb(mp_int *a)
4006	{
4007	int x;
4008	mp_digit q, qq;
4009
4010	/* easy out */
4011	if (mp_iszero(a) == 1) {
4012	return 0;
4013	}
4014
4015	/* scan lower digits until non-zero */
4016	for (x = 0; x < a->used && a->dp[x] == 0; x++);
4017	q = a->dp[x];
4018	x *= DIGIT_BIT;
4019
4020	/* now scan this digit until a 1 is found */
4021	if ((q & 1) == 0) {
4022	do {
4023	qq = q & 15;
4024	x += lnz[qq];
4025	q >>= 4;
4026	} while (qq == 0);
4027	}
4028	return x;
4029	}
4030
4031
4032
4033
4034	static int s_is_power_of_two(mp_digit b, int *p)
4035	{
4036	int x;
4037
4038	/* fast return if no power of two */
4039	if ((b==0) \|\| (b & (b-1))) {
4040	return 0;
4041	}
4042
4043	for (x = 0; x < DIGIT_BIT; x++) {
4044	if (b == (((mp_digit)1)<<x)) {
4045	*p = x;
4046	return 1;
4047	}
4048	}
4049	return 0;
4050	}
4051
4052	/* single digit division (based on routine from MPI) */
4053	static int mp_div_d (mp_int * a, mp_digit b, mp_int * c, mp_digit * d)
4054	{
4055	mp_int q;
4056	mp_word w;
4057	mp_digit t;
4058	int res = MP_OKAY, ix;
4059
4060	/* cannot divide by zero */
4061	if (b == 0) {
4062	return MP_VAL;
4063	}
4064
4065	/* quick outs */
4066	if (b == 1 \|\| mp_iszero(a) == 1) {
4067	if (d != NULL) {
4068	*d = 0;
4069	}
4070	if (c != NULL) {
4071	return mp_copy(a, c);
4072	}
4073	return MP_OKAY;
4074	}
4075
4076	/* power of two ? */
4077	if (s_is_power_of_two(b, &ix) == 1) {
4078	if (d != NULL) {
4079	*d = a->dp[0] & ((((mp_digit)1)<<ix) - 1);
4080	}
4081	if (c != NULL) {
4082	return mp_div_2d(a, ix, c, NULL);
4083	}
4084	return MP_OKAY;
4085	}
4086
4087	#ifdef BN_MP_DIV_3_C
4088	/* three? */
4089	if (b == 3) {
4090	return mp_div_3(a, c, d);
4091	}
4092	#endif
4093
4094	/* no easy answer [c'est la vie]. Just division */
4095	if (c != NULL) {
4096	if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
4097	return res;
4098	}
4099
4100	q.used = a->used;
4101	q.sign = a->sign;
4102	}
4103
4104	w = 0;
4105	for (ix = a->used - 1; ix >= 0; ix--) {
4106	w = (w << ((mp_word)DIGIT_BIT)) \| ((mp_word)a->dp[ix]);
4107
4108	if (w >= b) {
4109	t = (mp_digit)(w / b);
4110	w -= ((mp_word)t) * ((mp_word)b);
4111	} else {
4112	t = 0;
4113	}
4114	if (c != NULL)
4115	q.dp[ix] = (mp_digit)t;
4116	}
4117
4118	if (d != NULL) {
4119	*d = (mp_digit)w;
4120	}
4121
4122	if (c != NULL) {
4123	mp_clamp(&q);
4124	mp_exch(&q, c);
4125	mp_clear(&q);
4126	}
4127
4128	return res;
4129	}
4130
4131
4132	int mp_mod_d (mp_int * a, mp_digit b, mp_digit * c)
4133	{
4134	return mp_div_d(a, b, NULL, c);
4135	}
4136
4137	#endif /* defined(WOLFSSL_KEY_GEN)\|\|defined(HAVE_COMP_KEY)\|\|defined(HAVE_ECC) */
4138
4139	#ifdef WOLFSSL_KEY_GEN
4140
4141	const mp_digit ltm_prime_tab[] = {
4142	0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
4143	0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
4144	0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
4145	0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F,
4146	#ifndef MP_8BIT
4147	0x0083,
4148	0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
4149	0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
4150	0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
4151	0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
4152
4153	0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
4154	0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
4155	0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
4156	0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
4157	0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
4158	0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
4159	0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
4160	0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
4161
4162	0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
4163	0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
4164	0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
4165	0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
4166	0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
4167	0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
4168	0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
4169	0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
4170
4171	0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
4172	0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
4173	0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
4174	0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
4175	0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
4176	0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
4177	0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
4178	0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
4179	#endif
4180	};
4181
4182
4183	/* Miller-Rabin test of "a" to the base of "b" as described in
4184	* HAC pp. 139 Algorithm 4.24
4185	*
4186	* Sets result to 0 if definitely composite or 1 if probably prime.
4187	* Randomly the chance of error is no more than 1/4 and often
4188	* very much lower.
4189	*/
4190	static int mp_prime_miller_rabin (mp_int * a, mp_int * b, int *result)
4191	{
4192	mp_int n1, y, r;
4193	int s, j, err;
4194
4195	/* default */
4196	*result = MP_NO;
4197
4198	/* ensure b > 1 */
4199	if (mp_cmp_d(b, 1) != MP_GT) {
4200	return MP_VAL;
4201	}
4202
4203	/* get n1 = a - 1 */
4204	if ((err = mp_init_copy (&n1, a)) != MP_OKAY) {
4205	return err;
4206	}
4207	if ((err = mp_sub_d (&n1, 1, &n1)) != MP_OKAY) {
4208	goto LBL_N1;
4209	}
4210
4211	/* set 2*s r = n1 */
4212	if ((err = mp_init_copy (&r, &n1)) != MP_OKAY) {
4213	goto LBL_N1;
4214	}
4215
4216	/* count the number of least significant bits
4217	* which are zero
4218	*/
4219	s = mp_cnt_lsb(&r);
4220
4221	/* now divide n - 1 by 2*s /
4222	if ((err = mp_div_2d (&r, s, &r, NULL)) != MP_OKAY) {
4223	goto LBL_R;
4224	}
4225
4226	/* compute y = b*r mod a /
4227	if ((err = mp_init (&y)) != MP_OKAY) {
4228	goto LBL_R;
4229	}
4230	if ((err = mp_exptmod (b, &r, a, &y)) != MP_OKAY) {
4231	goto LBL_Y;
4232	}
4233
4234	/* if y != 1 and y != n1 do */
4235	if (mp_cmp_d (&y, 1) != MP_EQ && mp_cmp (&y, &n1) != MP_EQ) {
4236	j = 1;
4237	/* while j <= s-1 and y != n1 */
4238	while ((j <= (s - 1)) && mp_cmp (&y, &n1) != MP_EQ) {
4239	if ((err = mp_sqrmod (&y, a, &y)) != MP_OKAY) {
4240	goto LBL_Y;
4241	}
4242
4243	/* if y == 1 then composite */
4244	if (mp_cmp_d (&y, 1) == MP_EQ) {
4245	goto LBL_Y;
4246	}
4247
4248	++j;
4249	}
4250
4251	/* if y != n1 then composite */
4252	if (mp_cmp (&y, &n1) != MP_EQ) {
4253	goto LBL_Y;
4254	}
4255	}
4256
4257	/* probably prime now */
4258	*result = MP_YES;
4259	LBL_Y:mp_clear (&y);
4260	LBL_R:mp_clear (&r);
4261	LBL_N1:mp_clear (&n1);
4262	return err;
4263	}
4264
4265
4266	/* determines if an integers is divisible by one
4267	* of the first PRIME_SIZE primes or not
4268	*
4269	* sets result to 0 if not, 1 if yes
4270	*/
4271	static int mp_prime_is_divisible (mp_int * a, int *result)
4272	{
4273	int err, ix;
4274	mp_digit res;
4275
4276	/* default to not */
4277	*result = MP_NO;
4278
4279	for (ix = 0; ix < PRIME_SIZE; ix++) {
4280	/* what is a mod LBL_prime_tab[ix] */
4281	if ((err = mp_mod_d (a, ltm_prime_tab[ix], &res)) != MP_OKAY) {
4282	return err;
4283	}
4284
4285	/* is the residue zero? */
4286	if (res == 0) {
4287	*result = MP_YES;
4288	return MP_OKAY;
4289	}
4290	}
4291
4292	return MP_OKAY;
4293	}
4294
4295	static const int USE_BBS = 1;
4296
4297	int mp_rand_prime(mp_int* N, int len, WC_RNG* rng, void* heap)
4298	{
4299	int err, res, type;
4300	byte* buf;
4301
4302	if (N == NULL \|\| rng == NULL)
4303	return MP_VAL;
4304
4305	/* get type */
4306	if (len < 0) {
4307	type = USE_BBS;
4308	len = -len;
4309	} else {
4310	type = 0;
4311	}
4312
4313	/* allow sizes between 2 and 512 bytes for a prime size */
4314	if (len < 2 \|\| len > 512) {
4315	return MP_VAL;
4316	}
4317
4318	/* allocate buffer to work with */
4319	buf = (byte*)XMALLOC(len, heap, DYNAMIC_TYPE_RSA);
4320	if (buf == NULL) {
4321	return MP_MEM;
4322	}
4323	XMEMSET(buf, 0, len);
4324
4325	do {
4326	#ifdef SHOW_GEN
4327	printf(".");
4328	fflush(stdout);
4329	#endif
4330	/* generate value */
4331	err = wc_RNG_GenerateBlock(rng, buf, len);
4332	if (err != 0) {
4333	XFREE(buf, heap, DYNAMIC_TYPE_RSA);
4334	return err;
4335	}
4336
4337	/* munge bits */
4338	buf[0] \|= 0x80 \| 0x40;
4339	buf[len-1] \|= 0x01 \| ((type & USE_BBS) ? 0x02 : 0x00);
4340
4341	/* load value */
4342	if ((err = mp_read_unsigned_bin(N, buf, len)) != MP_OKAY) {
4343	XFREE(buf, heap, DYNAMIC_TYPE_RSA);
4344	return err;
4345	}
4346
4347	/* test */
4348	if ((err = mp_prime_is_prime(N, 8, &res)) != MP_OKAY) {
4349	XFREE(buf, heap, DYNAMIC_TYPE_RSA);
4350	return err;
4351	}
4352	} while (res == MP_NO);
4353
4354	XMEMSET(buf, 0, len);
4355	XFREE(buf, heap, DYNAMIC_TYPE_RSA);
4356
4357	return MP_OKAY;
4358	}
4359
4360	/*
4361	* Sets result to 1 if probably prime, 0 otherwise
4362	*/
4363	int mp_prime_is_prime (mp_int * a, int t, int *result)
4364	{
4365	mp_int b;
4366	int ix, err, res;
4367
4368	/* default to no */
4369	*result = MP_NO;
4370
4371	/* valid value of t? */
4372	if (t <= 0 \|\| t > PRIME_SIZE) {
4373	return MP_VAL;
4374	}
4375
4376	/* is the input equal to one of the primes in the table? */
4377	for (ix = 0; ix < PRIME_SIZE; ix++) {
4378	if (mp_cmp_d(a, ltm_prime_tab[ix]) == MP_EQ) {
4379	*result = 1;
4380	return MP_OKAY;
4381	}
4382	}
4383
4384	/* first perform trial division */
4385	if ((err = mp_prime_is_divisible (a, &res)) != MP_OKAY) {
4386	return err;
4387	}
4388
4389	/* return if it was trivially divisible */
4390	if (res == MP_YES) {
4391	return MP_OKAY;
4392	}
4393
4394	/* now perform the miller-rabin rounds */
4395	if ((err = mp_init (&b)) != MP_OKAY) {
4396	return err;
4397	}
4398
4399	for (ix = 0; ix < t; ix++) {
4400	/* set the prime */
4401	mp_set (&b, ltm_prime_tab[ix]);
4402
4403	if ((err = mp_prime_miller_rabin (a, &b, &res)) != MP_OKAY) {
4404	goto LBL_B;
4405	}
4406
4407	if (res == MP_NO) {
4408	goto LBL_B;
4409	}
4410	}
4411
4412	/* passed the test */
4413	*result = MP_YES;
4414	LBL_B:mp_clear (&b);
4415	return err;
4416	}
4417
4418
4419	/* computes least common multiple as \|ab\|/(a, b) /
4420	int mp_lcm (mp_int * a, mp_int * b, mp_int * c)
4421	{
4422	int res;
4423	mp_int t1, t2;
4424
4425
4426	if ((res = mp_init_multi (&t1, &t2, NULL, NULL, NULL, NULL)) != MP_OKAY) {
4427	return res;
4428	}
4429
4430	/* t1 = get the GCD of the two inputs */
4431	if ((res = mp_gcd (a, b, &t1)) != MP_OKAY) {
4432	goto LBL_T;
4433	}
4434
4435	/* divide the smallest by the GCD */
4436	if (mp_cmp_mag(a, b) == MP_LT) {
4437	/* store quotient in t2 such that t2 * b is the LCM */
4438	if ((res = mp_div(a, &t1, &t2, NULL)) != MP_OKAY) {
4439	goto LBL_T;
4440	}
4441	res = mp_mul(b, &t2, c);
4442	} else {
4443	/* store quotient in t2 such that t2 * a is the LCM */
4444	if ((res = mp_div(b, &t1, &t2, NULL)) != MP_OKAY) {
4445	goto LBL_T;
4446	}
4447	res = mp_mul(a, &t2, c);
4448	}
4449
4450	/* fix the sign to positive */
4451	c->sign = MP_ZPOS;
4452
4453	LBL_T:
4454	mp_clear(&t1);
4455	mp_clear(&t2);
4456	return res;
4457	}
4458
4459
4460
4461	/* Greatest Common Divisor using the binary method */
4462	int mp_gcd (mp_int * a, mp_int * b, mp_int * c)
4463	{
4464	mp_int u, v;
4465	int k, u_lsb, v_lsb, res;
4466
4467	/* either zero than gcd is the largest */
4468	if (mp_iszero (a) == MP_YES) {
4469	return mp_abs (b, c);
4470	}
4471	if (mp_iszero (b) == MP_YES) {
4472	return mp_abs (a, c);
4473	}
4474
4475	/* get copies of a and b we can modify */
4476	if ((res = mp_init_copy (&u, a)) != MP_OKAY) {
4477	return res;
4478	}
4479
4480	if ((res = mp_init_copy (&v, b)) != MP_OKAY) {
4481	goto LBL_U;
4482	}
4483
4484	/* must be positive for the remainder of the algorithm */
4485	u.sign = v.sign = MP_ZPOS;
4486
4487	/* B1. Find the common power of two for u and v */
4488	u_lsb = mp_cnt_lsb(&u);
4489	v_lsb = mp_cnt_lsb(&v);
4490	k = MIN(u_lsb, v_lsb);
4491
4492	if (k > 0) {
4493	/* divide the power of two out */
4494	if ((res = mp_div_2d(&u, k, &u, NULL)) != MP_OKAY) {
4495	goto LBL_V;
4496	}
4497
4498	if ((res = mp_div_2d(&v, k, &v, NULL)) != MP_OKAY) {
4499	goto LBL_V;
4500	}
4501	}
4502
4503	/* divide any remaining factors of two out */
4504	if (u_lsb != k) {
4505	if ((res = mp_div_2d(&u, u_lsb - k, &u, NULL)) != MP_OKAY) {
4506	goto LBL_V;
4507	}
4508	}
4509
4510	if (v_lsb != k) {
4511	if ((res = mp_div_2d(&v, v_lsb - k, &v, NULL)) != MP_OKAY) {
4512	goto LBL_V;
4513	}
4514	}
4515
4516	while (mp_iszero(&v) == 0) {
4517	/* make sure v is the largest */
4518	if (mp_cmp_mag(&u, &v) == MP_GT) {
4519	/* swap u and v to make sure v is >= u */
4520	mp_exch(&u, &v);
4521	}
4522
4523	/* subtract smallest from largest */
4524	if ((res = s_mp_sub(&v, &u, &v)) != MP_OKAY) {
4525	goto LBL_V;
4526	}
4527
4528	/* Divide out all factors of two */
4529	if ((res = mp_div_2d(&v, mp_cnt_lsb(&v), &v, NULL)) != MP_OKAY) {
4530	goto LBL_V;
4531	}
4532	}
4533
4534	/* multiply by 2*k which we divided out at the beginning /
4535	if ((res = mp_mul_2d (&u, k, c)) != MP_OKAY) {
4536	goto LBL_V;
4537	}
4538	c->sign = MP_ZPOS;
4539	res = MP_OKAY;
4540	LBL_V:mp_clear (&u);
4541	LBL_U:mp_clear (&v);
4542	return res;
4543	}
4544
4545	#endif /* WOLFSSL_KEY_GEN */
4546
4547
4548	#if defined(HAVE_ECC) \|\| defined(WOLFSSL_KEY_GEN) \|\| defined(HAVE_COMP_KEY)
4549
4550	/* chars used in radix conversions */
4551	const char *mp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ\
4552	abcdefghijklmnopqrstuvwxyz+/";
4553	#endif
4554
4555	#ifdef HAVE_ECC
4556	/* read a string [ASCII] in a given radix */
4557	int mp_read_radix (mp_int * a, const char *str, int radix)
4558	{
4559	int y, res, neg;
4560	char ch;
4561
4562	/* zero the digit bignum */
4563	mp_zero(a);
4564
4565	/* make sure the radix is ok */
4566	if (radix < 2 \|\| radix > 64) {
4567	return MP_VAL;
4568	}
4569
4570	/* if the leading digit is a
4571	* minus set the sign to negative.
4572	*/
4573	if (*str == '-') {
4574	++str;
4575	neg = MP_NEG;
4576	} else {
4577	neg = MP_ZPOS;
4578	}
4579
4580	/* set the integer to the default of zero */
4581	mp_zero (a);
4582
4583	/* process each digit of the string */
4584	while (*str) {
4585	/* if the radix < 36 the conversion is case insensitive
4586	* this allows numbers like 1AB and 1ab to represent the same value
4587	* [e.g. in hex]
4588	*/
4589	ch = (char) ((radix < 36) ? XTOUPPER((unsigned char)str) : str);
4590	for (y = 0; y < 64; y++) {
4591	if (ch == mp_s_rmap[y]) {
4592	break;
4593	}
4594	}
4595
4596	/* if the char was found in the map
4597	* and is less than the given radix add it
4598	* to the number, otherwise exit the loop.
4599	*/
4600	if (y < radix) {
4601	if ((res = mp_mul_d (a, (mp_digit) radix, a)) != MP_OKAY) {
4602	return res;
4603	}
4604	if ((res = mp_add_d (a, (mp_digit) y, a)) != MP_OKAY) {
4605	return res;
4606	}
4607	} else {
4608	break;
4609	}
4610	++str;
4611	}
4612
4613	/* set the sign only if a != 0 */
4614	if (mp_iszero(a) != 1) {
4615	a->sign = neg;
4616	}
4617	return MP_OKAY;
4618	}
4619	#endif /* HAVE_ECC */
4620
4621	#if defined(WOLFSSL_KEY_GEN) \|\| defined(HAVE_COMP_KEY)
4622
4623	/* returns size of ASCII representation */
4624	int mp_radix_size (mp_int a, int radix, int size)
4625	{
4626	int res, digs;
4627	mp_int t;
4628	mp_digit d;
4629
4630	*size = 0;
4631
4632	/* special case for binary */
4633	if (radix == 2) {
4634	*size = mp_count_bits (a) + (a->sign == MP_NEG ? 1 : 0) + 1;
4635	return MP_OKAY;
4636	}
4637
4638	/* make sure the radix is in range */
4639	if (radix < 2 \|\| radix > 64) {
4640	return MP_VAL;
4641	}
4642
4643	if (mp_iszero(a) == MP_YES) {
4644	*size = 2;
4645	return MP_OKAY;
4646	}
4647
4648	/* digs is the digit count */
4649	digs = 0;
4650
4651	/* if it's negative add one for the sign */
4652	if (a->sign == MP_NEG) {
4653	++digs;
4654	}
4655
4656	/* init a copy of the input */
4657	if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
4658	return res;
4659	}
4660
4661	/* force temp to positive */
4662	t.sign = MP_ZPOS;
4663
4664	/* fetch out all of the digits */
4665	while (mp_iszero (&t) == MP_NO) {
4666	if ((res = mp_div_d (&t, (mp_digit) radix, &t, &d)) != MP_OKAY) {
4667	mp_clear (&t);
4668	return res;
4669	}
4670	++digs;
4671	}
4672	mp_clear (&t);
4673
4674	/* return digs + 1, the 1 is for the NULL byte that would be required. */
4675	*size = digs + 1;
4676	return MP_OKAY;
4677	}
4678
4679	/* stores a bignum as a ASCII string in a given radix (2..64) */
4680	int mp_toradix (mp_int a, char str, int radix)
4681	{
4682	int res, digs;
4683	mp_int t;
4684	mp_digit d;
4685	char *_s = str;
4686
4687	/* check range of the radix */
4688	if (radix < 2 \|\| radix > 64) {
4689	return MP_VAL;
4690	}
4691
4692	/* quick out if its zero */
4693	if (mp_iszero(a) == 1) {
4694	*str++ = '0';
4695	*str = '\0';
4696	return MP_OKAY;
4697	}
4698
4699	if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
4700	return res;
4701	}
4702
4703	/* if it is negative output a - */
4704	if (t.sign == MP_NEG) {
4705	++_s;
4706	*str++ = '-';
4707	t.sign = MP_ZPOS;
4708	}
4709
4710	digs = 0;
4711	while (mp_iszero (&t) == 0) {
4712	if ((res = mp_div_d (&t, (mp_digit) radix, &t, &d)) != MP_OKAY) {
4713	mp_clear (&t);
4714	return res;
4715	}
4716	*str++ = mp_s_rmap[d];
4717	++digs;
4718	}
4719
4720	/* reverse the digits of the string. In this case _s points
4721	* to the first digit [exluding the sign] of the number]
4722	*/
4723	bn_reverse ((unsigned char *)_s, digs);
4724
4725	/* append a NULL so the string is properly terminated */
4726	*str = '\0';
4727
4728	mp_clear (&t);
4729	return MP_OKAY;
4730	}
4731
4732	#endif /* defined(WOLFSSL_KEY_GEN) \|\| defined(HAVE_COMP_KEY) */
4733
4734	#endif /* USE_FAST_MATH */
4735
4736	#endif /* NO_BIG_INT */
4737

Note: See TracBrowser for help on using the repository browser.

Download in other formats: