Context Navigation

integer.c@ 389

Last change on this file since 389 was 389, checked in by coas-nagasima, 5 years ago
ビルドが通るよう更新
Property svn:eol-style set to `native` Property svn:mime-type set to `text/x-csrc;charset=UTF-8`
File size: 110.5 KB

Rev	Line
[388]	1	/* integer.c
	2	*
	3	* Copyright (C) 2006-2017 wolfSSL Inc.
	4	*
	5	* This file is part of wolfSSL.
	6	*
	7	* wolfSSL is free software; you can redistribute it and/or modify
	8	* it under the terms of the GNU General Public License as published by
	9	* the Free Software Foundation; either version 2 of the License, or
	10	* (at your option) any later version.
	11	*
	12	* wolfSSL is distributed in the hope that it will be useful,
	13	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	14	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	15	* GNU General Public License for more details.
	16	*
	17	* You should have received a copy of the GNU General Public License
	18	* along with this program; if not, write to the Free Software
	19	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
	20	*/
	21
	22
	23
	24	/*
	25	* Based on public domain LibTomMath 0.38 by Tom St Denis, tomstdenis@iahu.ca,
	26	* http://math.libtomcrypt.com
	27	*/
	28
	29
	30	#ifdef HAVE_CONFIG_H
	31	#include <config.h>
	32	#endif
	33
	34	/* in case user set USE_FAST_MATH there */
	35	#include <wolfssl/wolfcrypt/settings.h>
	36
	37	#ifdef NO_INLINE
	38	#include <wolfssl/wolfcrypt/misc.h>
	39	#else
	40	#define WOLFSSL_MISC_INCLUDED
	41	#include <wolfcrypt/src/misc.c>
	42	#endif
	43
	44	#ifndef NO_BIG_INT
	45
	46	#ifndef USE_FAST_MATH
	47
	48	#ifndef WOLFSSL_SP_MATH
	49
	50	#include <wolfssl/wolfcrypt/integer.h>
	51
	52	#if defined(FREESCALE_LTC_TFM)
	53	#include <wolfssl/wolfcrypt/port/nxp/ksdk_port.h>
	54	#endif
	55	#ifdef WOLFSSL_DEBUG_MATH
	56	#include <stdio.h>
	57	#endif
	58
	59	#ifndef NO_WOLFSSL_SMALL_STACK
	60	#ifndef WOLFSSL_SMALL_STACK
	61	#define WOLFSSL_SMALL_STACK
	62	#endif
	63	#endif
	64
	65	#ifdef SHOW_GEN
	66	#if defined(FREESCALE_MQX) \|\| defined(FREESCALE_KSDK_MQX)
	67	#if MQX_USE_IO_OLD
	68	#include <fio.h>
	69	#else
	70	#include <nio.h>
	71	#endif
	72	#else
	73	#include <stdio.h>
	74	#endif
	75	#endif
	76
	77	/* reverse an array, used for radix code */
	78	static void
	79	bn_reverse (unsigned char *s, int len)
	80	{
	81	int ix, iy;
	82	unsigned char t;
	83
	84	ix = 0;
	85	iy = len - 1;
	86	while (ix < iy) {
	87	t = s[ix];
	88	s[ix] = s[iy];
	89	s[iy] = t;
	90	++ix;
	91	--iy;
	92	}
	93	}
	94
	95	/* math settings check */
	96	word32 CheckRunTimeSettings(void)
	97	{
	98	return CTC_SETTINGS;
	99	}
	100
	101
	102	/* handle up to 6 inits */
	103	int mp_init_multi(mp_int* a, mp_int* b, mp_int* c, mp_int* d, mp_int* e,
	104	mp_int* f)
	105	{
	106	int res = MP_OKAY;
	107
	108	if (a) XMEMSET(a, 0, sizeof(mp_int));
	109	if (b) XMEMSET(b, 0, sizeof(mp_int));
	110	if (c) XMEMSET(c, 0, sizeof(mp_int));
	111	if (d) XMEMSET(d, 0, sizeof(mp_int));
	112	if (e) XMEMSET(e, 0, sizeof(mp_int));
	113	if (f) XMEMSET(f, 0, sizeof(mp_int));
	114
	115	if (a && ((res = mp_init(a)) != MP_OKAY))
	116	return res;
	117
	118	if (b && ((res = mp_init(b)) != MP_OKAY)) {
	119	mp_clear(a);
	120	return res;
	121	}
	122
	123	if (c && ((res = mp_init(c)) != MP_OKAY)) {
	124	mp_clear(a); mp_clear(b);
	125	return res;
	126	}
	127
	128	if (d && ((res = mp_init(d)) != MP_OKAY)) {
	129	mp_clear(a); mp_clear(b); mp_clear(c);
	130	return res;
	131	}
	132
	133	if (e && ((res = mp_init(e)) != MP_OKAY)) {
	134	mp_clear(a); mp_clear(b); mp_clear(c); mp_clear(d);
	135	return res;
	136	}
	137
	138	if (f && ((res = mp_init(f)) != MP_OKAY)) {
	139	mp_clear(a); mp_clear(b); mp_clear(c); mp_clear(d); mp_clear(e);
	140	return res;
	141	}
	142
	143	return res;
	144	}
	145
	146
	147	/* init a new mp_int */
	148	int mp_init (mp_int * a)
	149	{
	150	/* Safeguard against passing in a null pointer */
	151	if (a == NULL)
	152	return MP_VAL;
	153
	154	/* defer allocation until mp_grow */
	155	a->dp = NULL;
	156
	157	/* set the used to zero, allocated digits to the default precision
	158	* and sign to positive */
	159	a->used = 0;
	160	a->alloc = 0;
	161	a->sign = MP_ZPOS;
	162	#ifdef HAVE_WOLF_BIGINT
	163	wc_bigint_init(&a->raw);
	164	#endif
	165
	166	return MP_OKAY;
	167	}
	168
	169
	170	/* clear one (frees) */
	171	void mp_clear (mp_int * a)
	172	{
	173	int i;
	174
	175	if (a == NULL)
	176	return;
	177
	178	/* only do anything if a hasn't been freed previously */
	179	if (a->dp != NULL) {
	180	/* first zero the digits */
	181	for (i = 0; i < a->used; i++) {
	182	a->dp[i] = 0;
	183	}
	184
	185	/* free ram */
	186	mp_free(a);
	187
	188	/* reset members to make debugging easier */
	189	a->alloc = a->used = 0;
	190	a->sign = MP_ZPOS;
	191	}
	192	}
	193
	194	void mp_free (mp_int * a)
	195	{
	196	/* only do anything if a hasn't been freed previously */
	197	if (a->dp != NULL) {
	198	/* free ram */
	199	XFREE(a->dp, 0, DYNAMIC_TYPE_BIGINT);
	200	a->dp = NULL;
	201	}
	202
	203	#ifdef HAVE_WOLF_BIGINT
	204	wc_bigint_free(&a->raw);
	205	#endif
	206	}
	207
	208	void mp_forcezero(mp_int * a)
	209	{
	210	if (a == NULL)
	211	return;
	212
	213	/* only do anything if a hasn't been freed previously */
	214	if (a->dp != NULL) {
	215	/* force zero the used digits */
	216	ForceZero(a->dp, a->used * sizeof(mp_digit));
	217	#ifdef HAVE_WOLF_BIGINT
	218	wc_bigint_zero(&a->raw);
	219	#endif
	220	/* free ram */
	221	mp_free(a);
	222
	223	/* reset members to make debugging easier */
	224	a->alloc = a->used = 0;
	225	a->sign = MP_ZPOS;
	226	}
	227
	228	a->sign = MP_ZPOS;
	229	a->used = 0;
	230	}
	231
	232
	233	/* get the size for an unsigned equivalent */
	234	int mp_unsigned_bin_size (mp_int * a)
	235	{
	236	int size = mp_count_bits (a);
	237	return (size / 8 + ((size & 7) != 0 ? 1 : 0));
	238	}
	239
	240
	241	/* returns the number of bits in an int */
	242	int mp_count_bits (mp_int * a)
	243	{
	244	int r;
	245	mp_digit q;
	246
	247	/* shortcut */
	248	if (a->used == 0) {
	249	return 0;
	250	}
	251
	252	/* get number of digits and add that */
	253	r = (a->used - 1) * DIGIT_BIT;
	254
	255	/* take the last digit and count the bits in it */
	256	q = a->dp[a->used - 1];
	257	while (q > ((mp_digit) 0)) {
	258	++r;
	259	q >>= ((mp_digit) 1);
	260	}
	261	return r;
	262	}
	263
	264
	265	int mp_leading_bit (mp_int * a)
	266	{
	267	int bit = 0;
	268	mp_int t;
	269
	270	if (mp_init_copy(&t, a) != MP_OKAY)
	271	return 0;
	272
	273	while (mp_iszero(&t) == MP_NO) {
	274	#ifndef MP_8BIT
	275	bit = (t.dp[0] & 0x80) != 0;
	276	#else
	277	bit = (t.dp[0] \| ((t.dp[1] & 0x01) << 7)) & 0x80 != 0;
	278	#endif
	279	if (mp_div_2d (&t, 8, &t, NULL) != MP_OKAY)
	280	break;
	281	}
	282	mp_clear(&t);
	283	return bit;
	284	}
	285
	286	int mp_to_unsigned_bin_at_pos(int x, mp_int t, unsigned char b)
	287	{
	288	int res = 0;
	289	while (mp_iszero(t) == MP_NO) {
	290	#ifndef MP_8BIT
	291	b[x++] = (unsigned char) (t->dp[0] & 255);
	292	#else
	293	b[x++] = (unsigned char) (t->dp[0] \| ((t->dp[1] & 0x01) << 7));
	294	#endif
	295	if ((res = mp_div_2d (t, 8, t, NULL)) != MP_OKAY) {
	296	return res;
	297	}
	298	res = x;
	299	}
	300	return res;
	301	}
	302
	303	/* store in unsigned [big endian] format */
	304	int mp_to_unsigned_bin (mp_int * a, unsigned char *b)
	305	{
	306	int x, res;
	307	mp_int t;
	308
	309	if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
	310	return res;
	311	}
	312
	313	x = mp_to_unsigned_bin_at_pos(0, &t, b);
	314	if (x < 0) {
	315	mp_clear(&t);
	316	return x;
	317	}
	318
	319	bn_reverse (b, x);
	320	mp_clear (&t);
	321	return res;
	322	}
	323
	324	int mp_to_unsigned_bin_len(mp_int * a, unsigned char *b, int c)
	325	{
	326	int i, len;
	327
	328	len = mp_unsigned_bin_size(a);
	329
	330	/* pad front w/ zeros to match length */
	331	for (i = 0; i < c - len; i++)
	332	b[i] = 0x00;
	333	return mp_to_unsigned_bin(a, b + i);
	334	}
	335
	336	/* creates "a" then copies b into it */
	337	int mp_init_copy (mp_int * a, mp_int * b)
	338	{
	339	int res;
	340
	341	if ((res = mp_init_size (a, b->used)) != MP_OKAY) {
	342	return res;
	343	}
	344
	345	if((res = mp_copy (b, a)) != MP_OKAY) {
	346	mp_clear(a);
	347	}
	348
	349	return res;
	350	}
	351
	352
	353	/* copy, b = a */
	354	int mp_copy (mp_int * a, mp_int * b)
	355	{
	356	int res, n;
	357
	358	/* Safeguard against passing in a null pointer */
	359	if (a == NULL \|\| b == NULL)
	360	return MP_VAL;
	361
	362	/* if dst == src do nothing */
	363	if (a == b) {
	364	return MP_OKAY;
	365	}
	366
	367	/* grow dest */
	368	if (b->alloc < a->used \|\| b->alloc == 0) {
	369	if ((res = mp_grow (b, a->used)) != MP_OKAY) {
	370	return res;
	371	}
	372	}
	373
	374	/* zero b and copy the parameters over */
	375	{
	376	mp_digit tmpa, tmpb;
	377
	378	/* pointer aliases */
	379
	380	/* source */
	381	tmpa = a->dp;
	382
	383	/* destination */
	384	tmpb = b->dp;
	385
	386	/* copy all the digits */
	387	for (n = 0; n < a->used; n++) {
	388	tmpb++ = tmpa++;
	389	}
	390
	391	/* clear high digits */
	392	for (; n < b->used && b->dp; n++) {
	393	*tmpb++ = 0;
	394	}
	395	}
	396
	397	/* copy used count and sign */
	398	b->used = a->used;
	399	b->sign = a->sign;
	400	return MP_OKAY;
	401	}
	402
	403
	404	/* grow as required */
	405	int mp_grow (mp_int * a, int size)
	406	{
	407	int i;
	408	mp_digit *tmp;
	409
	410	/* if the alloc size is smaller alloc more ram */
	411	if (a->alloc < size \|\| size == 0) {
	412	/* ensure there are always at least MP_PREC digits extra on top */
	413	size += (MP_PREC * 2) - (size % MP_PREC);
	414
	415	/* reallocate the array a->dp
	416	*
	417	* We store the return in a temporary variable
	418	* in case the operation failed we don't want
	419	* to overwrite the dp member of a.
	420	*/
	421	tmp = OPT_CAST(mp_digit) XREALLOC (a->dp, sizeof (mp_digit) * size, NULL,
	422	DYNAMIC_TYPE_BIGINT);
	423	if (tmp == NULL) {
	424	/* reallocation failed but "a" is still valid [can be freed] */
	425	return MP_MEM;
	426	}
	427
	428	/* reallocation succeeded so set a->dp */
	429	a->dp = tmp;
	430
	431	/* zero excess digits */
	432	i = a->alloc;
	433	a->alloc = size;
	434	for (; i < a->alloc; i++) {
	435	a->dp[i] = 0;
	436	}
	437	}
	438	return MP_OKAY;
	439	}
	440
	441
	442	/* shift right by a certain bit count (store quotient in c, optional
	443	remainder in d) */
	444	int mp_div_2d (mp_int * a, int b, mp_int * c, mp_int * d)
	445	{
	446	int D, res;
	447	mp_int t;
	448
	449
	450	/* if the shift count is <= 0 then we do no work */
	451	if (b <= 0) {
	452	res = mp_copy (a, c);
	453	if (d != NULL) {
	454	mp_zero (d);
	455	}
	456	return res;
	457	}
	458
	459	if ((res = mp_init (&t)) != MP_OKAY) {
	460	return res;
	461	}
	462
	463	/* get the remainder */
	464	if (d != NULL) {
	465	if ((res = mp_mod_2d (a, b, &t)) != MP_OKAY) {
	466	mp_clear (&t);
	467	return res;
	468	}
	469	}
	470
	471	/* copy */
	472	if ((res = mp_copy (a, c)) != MP_OKAY) {
	473	mp_clear (&t);
	474	return res;
	475	}
	476
	477	/* shift by as many digits in the bit count */
	478	if (b >= (int)DIGIT_BIT) {
	479	mp_rshd (c, b / DIGIT_BIT);
	480	}
	481
	482	/* shift any bit count < DIGIT_BIT */
	483	D = (b % DIGIT_BIT);
	484	if (D != 0) {
	485	mp_rshb(c, D);
	486	}
	487	mp_clamp (c);
	488	if (d != NULL) {
	489	mp_exch (&t, d);
	490	}
	491	mp_clear (&t);
	492	return MP_OKAY;
	493	}
	494
	495
	496	/* set to zero */
	497	void mp_zero (mp_int * a)
	498	{
	499	int n;
	500	mp_digit *tmp;
	501
	502	if (a == NULL)
	503	return;
	504
	505	a->sign = MP_ZPOS;
	506	a->used = 0;
	507
	508	tmp = a->dp;
	509	for (n = 0; n < a->alloc; n++) {
	510	*tmp++ = 0;
	511	}
	512	}
	513
	514
	515	/* trim unused digits
	516	*
	517	* This is used to ensure that leading zero digits are
	518	* trimmed and the leading "used" digit will be non-zero
	519	* Typically very fast. Also fixes the sign if there
	520	* are no more leading digits
	521	*/
	522	void mp_clamp (mp_int * a)
	523	{
	524	/* decrease used while the most significant digit is
	525	* zero.
	526	*/
	527	while (a->used > 0 && a->dp[a->used - 1] == 0) {
	528	--(a->used);
	529	}
	530
	531	/* reset the sign flag if used == 0 */
	532	if (a->used == 0) {
	533	a->sign = MP_ZPOS;
	534	}
	535	}
	536
	537
	538	/* swap the elements of two integers, for cases where you can't simply swap the
	539	* mp_int pointers around
	540	*/
	541	void mp_exch (mp_int * a, mp_int * b)
	542	{
	543	mp_int t;
	544
	545	t = *a;
	546	a = b;
	547	*b = t;
	548	}
	549
	550
	551	/* shift right a certain number of bits */
	552	void mp_rshb (mp_int *c, int x)
	553	{
	554	mp_digit *tmpc, mask, shift;
	555	mp_digit r, rr;
	556	mp_digit D = x;
	557
	558	/* mask */
	559	mask = (((mp_digit)1) << D) - 1;
	560
	561	/* shift for lsb */
	562	shift = DIGIT_BIT - D;
	563
	564	/* alias */
	565	tmpc = c->dp + (c->used - 1);
	566
	567	/* carry */
	568	r = 0;
	569	for (x = c->used - 1; x >= 0; x--) {
	570	/* get the lower bits of this word in a temp */
	571	rr = *tmpc & mask;
	572
	573	/* shift the current word and mix in the carry bits from previous word */
	574	tmpc = (tmpc >> D) \| (r << shift);
	575	--tmpc;
	576
	577	/* set the carry to the carry bits of the current word found above */
	578	r = rr;
	579	}
	580	mp_clamp(c);
	581	}
	582
	583
	584	/* shift right a certain amount of digits */
	585	void mp_rshd (mp_int * a, int b)
	586	{
	587	int x;
	588
	589	/* if b <= 0 then ignore it */
	590	if (b <= 0) {
	591	return;
	592	}
	593
	594	/* if b > used then simply zero it and return */
	595	if (a->used <= b) {
	596	mp_zero (a);
	597	return;
	598	}
	599
	600	{
	601	mp_digit bottom, top;
	602
	603	/* shift the digits down */
	604
	605	/* bottom */
	606	bottom = a->dp;
	607
	608	/* top [offset into digits] */
	609	top = a->dp + b;
	610
	611	/* this is implemented as a sliding window where
	612	* the window is b-digits long and digits from
	613	* the top of the window are copied to the bottom
	614	*
	615	* e.g.
	616
	617	b-2 \| b-1 \| b0 \| b1 \| b2 \| ... \| bb \| ---->
	618	/\ \| ---->
	619	\-------------------/ ---->
	620	*/
	621	for (x = 0; x < (a->used - b); x++) {
	622	bottom++ = top++;
	623	}
	624
	625	/* zero the top digits */
	626	for (; x < a->used; x++) {
	627	*bottom++ = 0;
	628	}
	629	}
	630
	631	/* remove excess digits */
	632	a->used -= b;
	633	}
	634
	635
	636	/* calc a value mod 2*b /
	637	int mp_mod_2d (mp_int * a, int b, mp_int * c)
	638	{
	639	int x, res;
	640
	641	/* if b is <= 0 then zero the int */
	642	if (b <= 0) {
	643	mp_zero (c);
	644	return MP_OKAY;
	645	}
	646
	647	/* if the modulus is larger than the value than return */
	648	if (b >= (int) (a->used * DIGIT_BIT)) {
	649	res = mp_copy (a, c);
	650	return res;
	651	}
	652
	653	/* copy */
	654	if ((res = mp_copy (a, c)) != MP_OKAY) {
	655	return res;
	656	}
	657
	658	/* zero digits above the last digit of the modulus */
	659	for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) {
	660	c->dp[x] = 0;
	661	}
	662	/* clear the digit that is not completely outside/inside the modulus */
	663	c->dp[b / DIGIT_BIT] &= (mp_digit) ((((mp_digit) 1) <<
	664	(((mp_digit) b) % DIGIT_BIT)) - ((mp_digit) 1));
	665	mp_clamp (c);
	666	return MP_OKAY;
	667	}
	668
	669
	670	/* reads a unsigned char array, assumes the msb is stored first [big endian] */
	671	int mp_read_unsigned_bin (mp_int * a, const unsigned char *b, int c)
	672	{
	673	int res;
	674
	675	/* make sure there are at least two digits */
	676	if (a->alloc < 2) {
	677	if ((res = mp_grow(a, 2)) != MP_OKAY) {
	678	return res;
	679	}
	680	}
	681
	682	/* zero the int */
	683	mp_zero (a);
	684
	685	/* read the bytes in */
	686	while (c-- > 0) {
	687	if ((res = mp_mul_2d (a, 8, a)) != MP_OKAY) {
	688	return res;
	689	}
	690
	691	#ifndef MP_8BIT
	692	a->dp[0] \|= *b++;
	693	a->used += 1;
	694	#else
	695	a->dp[0] = (*b & MP_MASK);
	696	a->dp[1] \|= ((*b++ >> 7U) & 1);
	697	a->used += 2;
	698	#endif
	699	}
	700	mp_clamp (a);
	701	return MP_OKAY;
	702	}
	703
	704
	705	/* shift left by a certain bit count */
	706	int mp_mul_2d (mp_int * a, int b, mp_int * c)
	707	{
	708	mp_digit d;
	709	int res;
	710
	711	/* copy */
	712	if (a != c) {
	713	if ((res = mp_copy (a, c)) != MP_OKAY) {
	714	return res;
	715	}
	716	}
	717
	718	if (c->alloc < (int)(c->used + b/DIGIT_BIT + 1)) {
	719	if ((res = mp_grow (c, c->used + b / DIGIT_BIT + 1)) != MP_OKAY) {
	720	return res;
	721	}
	722	}
	723
	724	/* shift by as many digits in the bit count */
	725	if (b >= (int)DIGIT_BIT) {
	726	if ((res = mp_lshd (c, b / DIGIT_BIT)) != MP_OKAY) {
	727	return res;
	728	}
	729	}
	730
	731	/* shift any bit count < DIGIT_BIT */
	732	d = (mp_digit) (b % DIGIT_BIT);
	733	if (d != 0) {
	734	mp_digit *tmpc, shift, mask, r, rr;
	735	int x;
	736
	737	/* bitmask for carries */
	738	mask = (((mp_digit)1) << d) - 1;
	739
	740	/* shift for msbs */
	741	shift = DIGIT_BIT - d;
	742
	743	/* alias */
	744	tmpc = c->dp;
	745
	746	/* carry */
	747	r = 0;
	748	for (x = 0; x < c->used; x++) {
	749	/* get the higher bits of the current word */
	750	rr = (*tmpc >> shift) & mask;
	751
	752	/* shift the current word and OR in the carry */
	753	tmpc = (mp_digit)(((tmpc << d) \| r) & MP_MASK);
	754	++tmpc;
	755
	756	/* set the carry to the carry bits of the current word */
	757	r = rr;
	758	}
	759
	760	/* set final carry */
	761	if (r != 0) {
	762	c->dp[(c->used)++] = r;
	763	}
	764	}
	765	mp_clamp (c);
	766	return MP_OKAY;
	767	}
	768
	769
	770	/* shift left a certain amount of digits */
	771	int mp_lshd (mp_int * a, int b)
	772	{
	773	int x, res;
	774
	775	/* if its less than zero return */
	776	if (b <= 0) {
	777	return MP_OKAY;
	778	}
	779
	780	/* grow to fit the new digits */
	781	if (a->alloc < a->used + b) {
	782	if ((res = mp_grow (a, a->used + b)) != MP_OKAY) {
	783	return res;
	784	}
	785	}
	786
	787	{
	788	mp_digit top, bottom;
	789
	790	/* increment the used by the shift amount then copy upwards */
	791	a->used += b;
	792
	793	/* top */
	794	top = a->dp + a->used - 1;
	795
	796	/* base */
	797	bottom = a->dp + a->used - 1 - b;
	798
	799	/* much like mp_rshd this is implemented using a sliding window
	800	* except the window goes the other way around. Copying from
	801	* the bottom to the top. see bn_mp_rshd.c for more info.
	802	*/
	803	for (x = a->used - 1; x >= b; x--) {
	804	top-- = bottom--;
	805	}
	806
	807	/* zero the lower digits */
	808	top = a->dp;
	809	for (x = 0; x < b; x++) {
	810	*top++ = 0;
	811	}
	812	}
	813	return MP_OKAY;
	814	}
	815
	816
	817	/* this is a shell function that calls either the normal or Montgomery
	818	* exptmod functions. Originally the call to the montgomery code was
	819	* embedded in the normal function but that wasted a lot of stack space
	820	* for nothing (since 99% of the time the Montgomery code would be called)
	821	*/
	822	#if defined(FREESCALE_LTC_TFM)
	823	int wolfcrypt_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
	824	#else
	825	int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
	826	#endif
	827	{
	828	int dr;
	829
	830	/* modulus P must be positive */
	831	if (P->sign == MP_NEG) {
	832	return MP_VAL;
	833	}
	834
	835	/* if exponent X is negative we have to recurse */
	836	if (X->sign == MP_NEG) {
	837	#ifdef BN_MP_INVMOD_C
	838	mp_int tmpG, tmpX;
	839	int err;
	840
	841	/* first compute 1/G mod P */
	842	if ((err = mp_init(&tmpG)) != MP_OKAY) {
	843	return err;
	844	}
	845	if ((err = mp_invmod(G, P, &tmpG)) != MP_OKAY) {
	846	mp_clear(&tmpG);
	847	return err;
	848	}
	849
	850	/* now get \|X\| */
	851	if ((err = mp_init(&tmpX)) != MP_OKAY) {
	852	mp_clear(&tmpG);
	853	return err;
	854	}
	855	if ((err = mp_abs(X, &tmpX)) != MP_OKAY) {
	856	mp_clear(&tmpG);
	857	mp_clear(&tmpX);
	858	return err;
	859	}
	860
	861	/* and now compute (1/G)\|X\| instead of GX [X < 0] */
	862	err = mp_exptmod(&tmpG, &tmpX, P, Y);
	863	mp_clear(&tmpG);
	864	mp_clear(&tmpX);
	865	return err;
	866	#else
	867	/* no invmod */
	868	return MP_VAL;
	869	#endif
	870	}
	871
	872	/* modified diminished radix reduction */
	873	#if defined(BN_MP_REDUCE_IS_2K_L_C) && defined(BN_MP_REDUCE_2K_L_C) && \
	874	defined(BN_S_MP_EXPTMOD_C)
	875	if (mp_reduce_is_2k_l(P) == MP_YES) {
	876	return s_mp_exptmod(G, X, P, Y, 1);
	877	}
	878	#endif
	879
	880	#ifdef BN_MP_DR_IS_MODULUS_C
	881	/* is it a DR modulus? */
	882	dr = mp_dr_is_modulus(P);
	883	#else
	884	/* default to no */
	885	dr = 0;
	886	#endif
	887
	888	#ifdef BN_MP_REDUCE_IS_2K_C
	889	/* if not, is it a unrestricted DR modulus? */
	890	if (dr == 0) {
	891	dr = mp_reduce_is_2k(P) << 1;
	892	}
	893	#endif
	894
	895	/* if the modulus is odd or dr != 0 use the montgomery method */
	896	#ifdef BN_MP_EXPTMOD_FAST_C
	897	if (mp_isodd (P) == MP_YES \|\| dr != 0) {
	898	return mp_exptmod_fast (G, X, P, Y, dr);
	899	} else {
	900	#endif
	901	#ifdef BN_S_MP_EXPTMOD_C
	902	/* otherwise use the generic Barrett reduction technique */
	903	return s_mp_exptmod (G, X, P, Y, 0);
	904	#else
	905	/* no exptmod for evens */
	906	return MP_VAL;
	907	#endif
	908	#ifdef BN_MP_EXPTMOD_FAST_C
	909	}
	910	#endif
	911	}
	912
	913
	914	/* b = \|a\|
	915	*
	916	* Simple function copies the input and fixes the sign to positive
	917	*/
	918	int mp_abs (mp_int * a, mp_int * b)
	919	{
	920	int res;
	921
	922	/* copy a to b */
	923	if (a != b) {
	924	if ((res = mp_copy (a, b)) != MP_OKAY) {
	925	return res;
	926	}
	927	}
	928
	929	/* force the sign of b to positive */
	930	b->sign = MP_ZPOS;
	931
	932	return MP_OKAY;
	933	}
	934
	935
	936	/* hac 14.61, pp608 */
	937	#if defined(FREESCALE_LTC_TFM)
	938	int wolfcrypt_mp_invmod(mp_int * a, mp_int * b, mp_int * c)
	939	#else
	940	int mp_invmod (mp_int * a, mp_int * b, mp_int * c)
	941	#endif
	942	{
	943	/* b cannot be negative */
	944	if (b->sign == MP_NEG \|\| mp_iszero(b) == MP_YES) {
	945	return MP_VAL;
	946	}
	947
	948	#ifdef BN_FAST_MP_INVMOD_C
	949	/* if the modulus is odd we can use a faster routine instead */
	950	if ((mp_isodd(b) == MP_YES) && (mp_cmp_d(b, 1) != MP_EQ)) {
	951	return fast_mp_invmod (a, b, c);
	952	}
	953	#endif
	954
	955	#ifdef BN_MP_INVMOD_SLOW_C
	956	return mp_invmod_slow(a, b, c);
	957	#else
	958	return MP_VAL;
	959	#endif
	960	}
	961
	962
	963	/* computes the modular inverse via binary extended euclidean algorithm,
	964	* that is c = 1/a mod b
	965	*
	966	* Based on slow invmod except this is optimized for the case where b is
	967	* odd as per HAC Note 14.64 on pp. 610
	968	*/
	969	int fast_mp_invmod (mp_int * a, mp_int * b, mp_int * c)
	970	{
	971	mp_int x, y, u, v, B, D;
	972	int res, neg, loop_check = 0;
	973
	974	/* 2. [modified] b must be odd */
	975	if (mp_iseven (b) == MP_YES) {
	976	return MP_VAL;
	977	}
	978
	979	/* init all our temps */
	980	if ((res = mp_init_multi(&x, &y, &u, &v, &B, &D)) != MP_OKAY) {
	981	return res;
	982	}
	983
	984	/* x == modulus, y == value to invert */
	985	if ((res = mp_copy (b, &x)) != MP_OKAY) {
	986	goto LBL_ERR;
	987	}
	988
	989	/* we need y = \|a\| */
	990	if ((res = mp_mod (a, b, &y)) != MP_OKAY) {
	991	goto LBL_ERR;
	992	}
	993
	994	/* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
	995	if ((res = mp_copy (&x, &u)) != MP_OKAY) {
	996	goto LBL_ERR;
	997	}
	998	if ((res = mp_copy (&y, &v)) != MP_OKAY) {
	999	goto LBL_ERR;
	1000	}
	1001	if ((res = mp_set (&D, 1)) != MP_OKAY) {
	1002	goto LBL_ERR;
	1003	}
	1004
	1005	top:
	1006	/* 4. while u is even do */
	1007	while (mp_iseven (&u) == MP_YES) {
	1008	/* 4.1 u = u/2 */
	1009	if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
	1010	goto LBL_ERR;
	1011	}
	1012	/* 4.2 if B is odd then */
	1013	if (mp_isodd (&B) == MP_YES) {
	1014	if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
	1015	goto LBL_ERR;
	1016	}
	1017	}
	1018	/* B = B/2 */
	1019	if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
	1020	goto LBL_ERR;
	1021	}
	1022	}
	1023
	1024	/* 5. while v is even do */
	1025	while (mp_iseven (&v) == MP_YES) {
	1026	/* 5.1 v = v/2 */
	1027	if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
	1028	goto LBL_ERR;
	1029	}
	1030	/* 5.2 if D is odd then */
	1031	if (mp_isodd (&D) == MP_YES) {
	1032	/* D = (D-x)/2 */
	1033	if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
	1034	goto LBL_ERR;
	1035	}
	1036	}
	1037	/* D = D/2 */
	1038	if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
	1039	goto LBL_ERR;
	1040	}
	1041	}
	1042
	1043	/* 6. if u >= v then */
	1044	if (mp_cmp (&u, &v) != MP_LT) {
	1045	/* u = u - v, B = B - D */
	1046	if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
	1047	goto LBL_ERR;
	1048	}
	1049
	1050	if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
	1051	goto LBL_ERR;
	1052	}
	1053	} else {
	1054	/* v - v - u, D = D - B */
	1055	if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
	1056	goto LBL_ERR;
	1057	}
	1058
	1059	if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
	1060	goto LBL_ERR;
	1061	}
	1062	}
	1063
	1064	/* if not zero goto step 4 */
	1065	if (mp_iszero (&u) == MP_NO) {
	1066	if (++loop_check > MAX_INVMOD_SZ) {
	1067	res = MP_VAL;
	1068	goto LBL_ERR;
	1069	}
	1070	goto top;
	1071	}
	1072
	1073	/* now a = C, b = D, gcd == gv /
	1074
	1075	/* if v != 1 then there is no inverse */
	1076	if (mp_cmp_d (&v, 1) != MP_EQ) {
	1077	res = MP_VAL;
	1078	goto LBL_ERR;
	1079	}
	1080
	1081	/* b is now the inverse */
	1082	neg = a->sign;
	1083	while (D.sign == MP_NEG) {
	1084	if ((res = mp_add (&D, b, &D)) != MP_OKAY) {
	1085	goto LBL_ERR;
	1086	}
	1087	}
	1088	/* too big */
	1089	while (mp_cmp_mag(&D, b) != MP_LT) {
	1090	if ((res = mp_sub(&D, b, &D)) != MP_OKAY) {
	1091	goto LBL_ERR;
	1092	}
	1093	}
	1094	mp_exch (&D, c);
	1095	c->sign = neg;
	1096	res = MP_OKAY;
	1097
	1098	LBL_ERR:mp_clear(&x);
	1099	mp_clear(&y);
	1100	mp_clear(&u);
	1101	mp_clear(&v);
	1102	mp_clear(&B);
	1103	mp_clear(&D);
	1104	return res;
	1105	}
	1106
	1107
	1108	/* hac 14.61, pp608 */
	1109	int mp_invmod_slow (mp_int * a, mp_int * b, mp_int * c)
	1110	{
	1111	mp_int x, y, u, v, A, B, C, D;
	1112	int res;
	1113
	1114	/* b cannot be negative */
	1115	if (b->sign == MP_NEG \|\| mp_iszero(b) == MP_YES) {
	1116	return MP_VAL;
	1117	}
	1118
	1119	/* init temps */
	1120	if ((res = mp_init_multi(&x, &y, &u, &v,
	1121	&A, &B)) != MP_OKAY) {
	1122	return res;
	1123	}
	1124
	1125	/* init rest of tmps temps */
	1126	if ((res = mp_init_multi(&C, &D, 0, 0, 0, 0)) != MP_OKAY) {
	1127	mp_clear(&x);
	1128	mp_clear(&y);
	1129	mp_clear(&u);
	1130	mp_clear(&v);
	1131	mp_clear(&A);
	1132	mp_clear(&B);
	1133	return res;
	1134	}
	1135
	1136	/* x = a, y = b */
	1137	if ((res = mp_mod(a, b, &x)) != MP_OKAY) {
	1138	goto LBL_ERR;
	1139	}
	1140	if ((res = mp_copy (b, &y)) != MP_OKAY) {
	1141	goto LBL_ERR;
	1142	}
	1143
	1144	/* 2. [modified] if x,y are both even then return an error! */
	1145	if (mp_iseven (&x) == MP_YES && mp_iseven (&y) == MP_YES) {
	1146	res = MP_VAL;
	1147	goto LBL_ERR;
	1148	}
	1149
	1150	/* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
	1151	if ((res = mp_copy (&x, &u)) != MP_OKAY) {
	1152	goto LBL_ERR;
	1153	}
	1154	if ((res = mp_copy (&y, &v)) != MP_OKAY) {
	1155	goto LBL_ERR;
	1156	}
	1157	if ((res = mp_set (&A, 1)) != MP_OKAY) {
	1158	goto LBL_ERR;
	1159	}
	1160	if ((res = mp_set (&D, 1)) != MP_OKAY) {
	1161	goto LBL_ERR;
	1162	}
	1163
	1164	top:
	1165	/* 4. while u is even do */
	1166	while (mp_iseven (&u) == MP_YES) {
	1167	/* 4.1 u = u/2 */
	1168	if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
	1169	goto LBL_ERR;
	1170	}
	1171	/* 4.2 if A or B is odd then */
	1172	if (mp_isodd (&A) == MP_YES \|\| mp_isodd (&B) == MP_YES) {
	1173	/* A = (A+y)/2, B = (B-x)/2 */
	1174	if ((res = mp_add (&A, &y, &A)) != MP_OKAY) {
	1175	goto LBL_ERR;
	1176	}
	1177	if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
	1178	goto LBL_ERR;
	1179	}
	1180	}
	1181	/* A = A/2, B = B/2 */
	1182	if ((res = mp_div_2 (&A, &A)) != MP_OKAY) {
	1183	goto LBL_ERR;
	1184	}
	1185	if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
	1186	goto LBL_ERR;
	1187	}
	1188	}
	1189
	1190	/* 5. while v is even do */
	1191	while (mp_iseven (&v) == MP_YES) {
	1192	/* 5.1 v = v/2 */
	1193	if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
	1194	goto LBL_ERR;
	1195	}
	1196	/* 5.2 if C or D is odd then */
	1197	if (mp_isodd (&C) == MP_YES \|\| mp_isodd (&D) == MP_YES) {
	1198	/* C = (C+y)/2, D = (D-x)/2 */
	1199	if ((res = mp_add (&C, &y, &C)) != MP_OKAY) {
	1200	goto LBL_ERR;
	1201	}
	1202	if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
	1203	goto LBL_ERR;
	1204	}
	1205	}
	1206	/* C = C/2, D = D/2 */
	1207	if ((res = mp_div_2 (&C, &C)) != MP_OKAY) {
	1208	goto LBL_ERR;
	1209	}
	1210	if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
	1211	goto LBL_ERR;
	1212	}
	1213	}
	1214
	1215	/* 6. if u >= v then */
	1216	if (mp_cmp (&u, &v) != MP_LT) {
	1217	/* u = u - v, A = A - C, B = B - D */
	1218	if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
	1219	goto LBL_ERR;
	1220	}
	1221
	1222	if ((res = mp_sub (&A, &C, &A)) != MP_OKAY) {
	1223	goto LBL_ERR;
	1224	}
	1225
	1226	if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
	1227	goto LBL_ERR;
	1228	}
	1229	} else {
	1230	/* v - v - u, C = C - A, D = D - B */
	1231	if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
	1232	goto LBL_ERR;
	1233	}
	1234
	1235	if ((res = mp_sub (&C, &A, &C)) != MP_OKAY) {
	1236	goto LBL_ERR;
	1237	}
	1238
	1239	if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
	1240	goto LBL_ERR;
	1241	}
	1242	}
	1243
	1244	/* if not zero goto step 4 */
	1245	if (mp_iszero (&u) == MP_NO)
	1246	goto top;
	1247
	1248	/* now a = C, b = D, gcd == gv /
	1249
	1250	/* if v != 1 then there is no inverse */
	1251	if (mp_cmp_d (&v, 1) != MP_EQ) {
	1252	res = MP_VAL;
	1253	goto LBL_ERR;
	1254	}
	1255
	1256	/* if its too low */
	1257	while (mp_cmp_d(&C, 0) == MP_LT) {
	1258	if ((res = mp_add(&C, b, &C)) != MP_OKAY) {
	1259	goto LBL_ERR;
	1260	}
	1261	}
	1262
	1263	/* too big */
	1264	while (mp_cmp_mag(&C, b) != MP_LT) {
	1265	if ((res = mp_sub(&C, b, &C)) != MP_OKAY) {
	1266	goto LBL_ERR;
	1267	}
	1268	}
	1269
	1270	/* C is now the inverse */
	1271	mp_exch (&C, c);
	1272	res = MP_OKAY;
	1273	LBL_ERR:mp_clear(&x);
	1274	mp_clear(&y);
	1275	mp_clear(&u);
	1276	mp_clear(&v);
	1277	mp_clear(&A);
	1278	mp_clear(&B);
	1279	mp_clear(&C);
	1280	mp_clear(&D);
	1281	return res;
	1282	}
	1283
	1284
	1285	/* compare magnitude of two ints (unsigned) */
	1286	int mp_cmp_mag (mp_int * a, mp_int * b)
	1287	{
	1288	int n;
	1289	mp_digit tmpa, tmpb;
	1290
	1291	/* compare based on # of non-zero digits */
	1292	if (a->used > b->used) {
	1293	return MP_GT;
	1294	}
	1295
	1296	if (a->used < b->used) {
	1297	return MP_LT;
	1298	}
	1299
	1300	/* alias for a */
	1301	tmpa = a->dp + (a->used - 1);
	1302
	1303	/* alias for b */
	1304	tmpb = b->dp + (a->used - 1);
	1305
	1306	/* compare based on digits */
	1307	for (n = 0; n < a->used; ++n, --tmpa, --tmpb) {
	1308	if (tmpa > tmpb) {
	1309	return MP_GT;
	1310	}
	1311
	1312	if (tmpa < tmpb) {
	1313	return MP_LT;
	1314	}
	1315	}
	1316	return MP_EQ;
	1317	}
	1318
	1319
	1320	/* compare two ints (signed)*/
	1321	int mp_cmp (mp_int * a, mp_int * b)
	1322	{
	1323	/* compare based on sign */
	1324	if (a->sign != b->sign) {
	1325	if (a->sign == MP_NEG) {
	1326	return MP_LT;
	1327	} else {
	1328	return MP_GT;
	1329	}
	1330	}
	1331
	1332	/* compare digits */
	1333	if (a->sign == MP_NEG) {
	1334	/* if negative compare opposite direction */
	1335	return mp_cmp_mag(b, a);
	1336	} else {
	1337	return mp_cmp_mag(a, b);
	1338	}
	1339	}
	1340
	1341
	1342	/* compare a digit */
	1343	int mp_cmp_d(mp_int * a, mp_digit b)
	1344	{
	1345	/* special case for zero*/
	1346	if (a->used == 0 && b == 0)
	1347	return MP_EQ;
	1348
	1349	/* compare based on sign */
	1350	if ((b && a->used == 0) \|\| a->sign == MP_NEG) {
	1351	return MP_LT;
	1352	}
	1353
	1354	/* compare based on magnitude */
	1355	if (a->used > 1) {
	1356	return MP_GT;
	1357	}
	1358
	1359	/* compare the only digit of a to b */
	1360	if (a->dp[0] > b) {
	1361	return MP_GT;
	1362	} else if (a->dp[0] < b) {
	1363	return MP_LT;
	1364	} else {
	1365	return MP_EQ;
	1366	}
	1367	}
	1368
	1369
	1370	/* set to a digit */
	1371	int mp_set (mp_int * a, mp_digit b)
	1372	{
	1373	int res;
	1374	mp_zero (a);
	1375	res = mp_grow (a, 1);
	1376	if (res == MP_OKAY) {
	1377	a->dp[0] = (mp_digit)(b & MP_MASK);
	1378	a->used = (a->dp[0] != 0) ? 1 : 0;
	1379	}
	1380	return res;
	1381	}
	1382
	1383	/* chek if a bit is set */
	1384	int mp_is_bit_set (mp_int *a, mp_digit b)
	1385	{
	1386	if ((mp_digit)a->used < b/DIGIT_BIT)
	1387	return 0;
	1388
	1389	return (int)((a->dp[b/DIGIT_BIT] >> b%DIGIT_BIT) & (mp_digit)1);
	1390	}
	1391
	1392	/* c = a mod b, 0 <= c < b */
	1393	#if defined(FREESCALE_LTC_TFM)
	1394	int wolfcrypt_mp_mod(mp_int * a, mp_int * b, mp_int * c)
	1395	#else
	1396	int mp_mod (mp_int * a, mp_int * b, mp_int * c)
	1397	#endif
	1398	{
	1399	mp_int t;
	1400	int res;
	1401
	1402	if ((res = mp_init_size (&t, b->used)) != MP_OKAY) {
	1403	return res;
	1404	}
	1405
	1406	if ((res = mp_div (a, b, NULL, &t)) != MP_OKAY) {
	1407	mp_clear (&t);
	1408	return res;
	1409	}
	1410
	1411	if ((mp_iszero(&t) != MP_NO) \|\| (t.sign == b->sign)) {
	1412	res = MP_OKAY;
	1413	mp_exch (&t, c);
	1414	} else {
	1415	res = mp_add (b, &t, c);
	1416	}
	1417
	1418	mp_clear (&t);
	1419	return res;
	1420	}
	1421
	1422
	1423	/* slower bit-bang division... also smaller */
	1424	int mp_div(mp_int * a, mp_int * b, mp_int * c, mp_int * d)
	1425	{
	1426	mp_int ta, tb, tq, q;
	1427	int res, n, n2;
	1428
	1429	/* is divisor zero ? */
	1430	if (mp_iszero (b) == MP_YES) {
	1431	return MP_VAL;
	1432	}
	1433
	1434	/* if a < b then q=0, r = a */
	1435	if (mp_cmp_mag (a, b) == MP_LT) {
	1436	if (d != NULL) {
	1437	res = mp_copy (a, d);
	1438	} else {
	1439	res = MP_OKAY;
	1440	}
	1441	if (c != NULL) {
	1442	mp_zero (c);
	1443	}
	1444	return res;
	1445	}
	1446
	1447	/* init our temps */
	1448	if ((res = mp_init_multi(&ta, &tb, &tq, &q, 0, 0)) != MP_OKAY) {
	1449	return res;
	1450	}
	1451
	1452	if ((res = mp_set(&tq, 1)) != MP_OKAY) {
	1453	return res;
	1454	}
	1455	n = mp_count_bits(a) - mp_count_bits(b);
	1456	if (((res = mp_abs(a, &ta)) != MP_OKAY) \|\|
	1457	((res = mp_abs(b, &tb)) != MP_OKAY) \|\|
	1458	((res = mp_mul_2d(&tb, n, &tb)) != MP_OKAY) \|\|
	1459	((res = mp_mul_2d(&tq, n, &tq)) != MP_OKAY)) {
	1460	goto LBL_ERR;
	1461	}
	1462
	1463	while (n-- >= 0) {
	1464	if (mp_cmp(&tb, &ta) != MP_GT) {
	1465	if (((res = mp_sub(&ta, &tb, &ta)) != MP_OKAY) \|\|
	1466	((res = mp_add(&q, &tq, &q)) != MP_OKAY)) {
	1467	goto LBL_ERR;
	1468	}
	1469	}
	1470	if (((res = mp_div_2d(&tb, 1, &tb, NULL)) != MP_OKAY) \|\|
	1471	((res = mp_div_2d(&tq, 1, &tq, NULL)) != MP_OKAY)) {
	1472	goto LBL_ERR;
	1473	}
	1474	}
	1475
	1476	/* now q == quotient and ta == remainder */
	1477	n = a->sign;
	1478	n2 = (a->sign == b->sign ? MP_ZPOS : MP_NEG);
	1479	if (c != NULL) {
	1480	mp_exch(c, &q);
	1481	c->sign = (mp_iszero(c) == MP_YES) ? MP_ZPOS : n2;
	1482	}
	1483	if (d != NULL) {
	1484	mp_exch(d, &ta);
	1485	d->sign = (mp_iszero(d) == MP_YES) ? MP_ZPOS : n;
	1486	}
	1487	LBL_ERR:
	1488	mp_clear(&ta);
	1489	mp_clear(&tb);
	1490	mp_clear(&tq);
	1491	mp_clear(&q);
	1492	return res;
	1493	}
	1494
	1495
	1496	/* b = a/2 */
	1497	int mp_div_2(mp_int * a, mp_int * b)
	1498	{
	1499	int x, res, oldused;
	1500
	1501	/* copy */
	1502	if (b->alloc < a->used) {
	1503	if ((res = mp_grow (b, a->used)) != MP_OKAY) {
	1504	return res;
	1505	}
	1506	}
	1507
	1508	oldused = b->used;
	1509	b->used = a->used;
	1510	{
	1511	mp_digit r, rr, tmpa, tmpb;
	1512
	1513	/* source alias */
	1514	tmpa = a->dp + b->used - 1;
	1515
	1516	/* dest alias */
	1517	tmpb = b->dp + b->used - 1;
	1518
	1519	/* carry */
	1520	r = 0;
	1521	for (x = b->used - 1; x >= 0; x--) {
	1522	/* get the carry for the next iteration */
	1523	rr = *tmpa & 1;
	1524
	1525	/* shift the current digit, add in carry and store */
	1526	tmpb-- = (tmpa-- >> 1) \| (r << (DIGIT_BIT - 1));
	1527
	1528	/* forward carry to next iteration */
	1529	r = rr;
	1530	}
	1531
	1532	/* zero excess digits */
	1533	tmpb = b->dp + b->used;
	1534	for (x = b->used; x < oldused; x++) {
	1535	*tmpb++ = 0;
	1536	}
	1537	}
	1538	b->sign = a->sign;
	1539	mp_clamp (b);
	1540	return MP_OKAY;
	1541	}
	1542
	1543
	1544	/* high level addition (handles signs) */
	1545	int mp_add (mp_int * a, mp_int * b, mp_int * c)
	1546	{
	1547	int sa, sb, res;
	1548
	1549	/* get sign of both inputs */
	1550	sa = a->sign;
	1551	sb = b->sign;
	1552
	1553	/* handle two cases, not four */
	1554	if (sa == sb) {
	1555	/* both positive or both negative */
	1556	/* add their magnitudes, copy the sign */
	1557	c->sign = sa;
	1558	res = s_mp_add (a, b, c);
	1559	} else {
	1560	/* one positive, the other negative */
	1561	/* subtract the one with the greater magnitude from */
	1562	/* the one of the lesser magnitude. The result gets */
	1563	/* the sign of the one with the greater magnitude. */
	1564	if (mp_cmp_mag (a, b) == MP_LT) {
	1565	c->sign = sb;
	1566	res = s_mp_sub (b, a, c);
	1567	} else {
	1568	c->sign = sa;
	1569	res = s_mp_sub (a, b, c);
	1570	}
	1571	}
	1572	return res;
	1573	}
	1574
	1575
	1576	/* low level addition, based on HAC pp.594, Algorithm 14.7 */
	1577	int s_mp_add (mp_int * a, mp_int * b, mp_int * c)
	1578	{
	1579	mp_int *x;
	1580	int olduse, res, min_ab, max_ab;
	1581
	1582	/* find sizes, we let \|a\| <= \|b\| which means we have to sort
	1583	* them. "x" will point to the input with the most digits
	1584	*/
	1585	if (a->used > b->used) {
	1586	min_ab = b->used;
	1587	max_ab = a->used;
	1588	x = a;
	1589	} else {
	1590	min_ab = a->used;
	1591	max_ab = b->used;
	1592	x = b;
	1593	}
	1594
	1595	/* init result */
	1596	if (c->alloc < max_ab + 1) {
	1597	if ((res = mp_grow (c, max_ab + 1)) != MP_OKAY) {
	1598	return res;
	1599	}
	1600	}
	1601
	1602	/* get old used digit count and set new one */
	1603	olduse = c->used;
	1604	c->used = max_ab + 1;
	1605
	1606	{
	1607	mp_digit u, tmpa, tmpb, *tmpc;
	1608	int i;
	1609
	1610	/* alias for digit pointers */
	1611
	1612	/* first input */
	1613	tmpa = a->dp;
	1614
	1615	/* second input */
	1616	tmpb = b->dp;
	1617
	1618	/* destination */
	1619	tmpc = c->dp;
	1620
	1621	/* zero the carry */
	1622	u = 0;
	1623	for (i = 0; i < min_ab; i++) {
	1624	/* Compute the sum at one digit, T[i] = A[i] + B[i] + U */
	1625	tmpc = tmpa++ + *tmpb++ + u;
	1626
	1627	/* U = carry bit of T[i] */
	1628	u = *tmpc >> ((mp_digit)DIGIT_BIT);
	1629
	1630	/* take away carry bit from T[i] */
	1631	*tmpc++ &= MP_MASK;
	1632	}
	1633
	1634	/* now copy higher words if any, that is in A+B
	1635	* if A or B has more digits add those in
	1636	*/
	1637	if (min_ab != max_ab) {
	1638	for (; i < max_ab; i++) {
	1639	/* T[i] = X[i] + U */
	1640	*tmpc = x->dp[i] + u;
	1641
	1642	/* U = carry bit of T[i] */
	1643	u = *tmpc >> ((mp_digit)DIGIT_BIT);
	1644
	1645	/* take away carry bit from T[i] */
	1646	*tmpc++ &= MP_MASK;
	1647	}
	1648	}
	1649
	1650	/* add carry */
	1651	*tmpc++ = u;
	1652
	1653	/* clear digits above olduse */
	1654	for (i = c->used; i < olduse; i++) {
	1655	*tmpc++ = 0;
	1656	}
	1657	}
	1658
	1659	mp_clamp (c);
	1660	return MP_OKAY;
	1661	}
	1662
	1663
	1664	/* low level subtraction (assumes \|a\| > \|b\|), HAC pp.595 Algorithm 14.9 */
	1665	int s_mp_sub (mp_int * a, mp_int * b, mp_int * c)
	1666	{
	1667	int olduse, res, min_b, max_a;
	1668
	1669	/* find sizes */
	1670	min_b = b->used;
	1671	max_a = a->used;
	1672
	1673	/* init result */
	1674	if (c->alloc < max_a) {
	1675	if ((res = mp_grow (c, max_a)) != MP_OKAY) {
	1676	return res;
	1677	}
	1678	}
	1679
	1680	/* sanity check on destination */
	1681	if (c->dp == NULL)
	1682	return MP_VAL;
	1683
	1684	olduse = c->used;
	1685	c->used = max_a;
	1686
	1687	{
	1688	mp_digit u, tmpa, tmpb, *tmpc;
	1689	int i;
	1690
	1691	/* alias for digit pointers */
	1692	tmpa = a->dp;
	1693	tmpb = b->dp;
	1694	tmpc = c->dp;
	1695
	1696	/* set carry to zero */
	1697	u = 0;
	1698	for (i = 0; i < min_b; i++) {
	1699	/* T[i] = A[i] - B[i] - U */
	1700	tmpc = tmpa++ - *tmpb++ - u;
	1701
	1702	/* U = carry bit of T[i]
	1703	* Note this saves performing an AND operation since
	1704	* if a carry does occur it will propagate all the way to the
	1705	* MSB. As a result a single shift is enough to get the carry
	1706	*/
	1707	u = tmpc >> ((mp_digit)(CHAR_BIT sizeof (mp_digit) - 1));
	1708
	1709	/* Clear carry from T[i] */
	1710	*tmpc++ &= MP_MASK;
	1711	}
	1712
	1713	/* now copy higher words if any, e.g. if A has more digits than B */
	1714	for (; i < max_a; i++) {
	1715	/* T[i] = A[i] - U */
	1716	tmpc = tmpa++ - u;
	1717
	1718	/* U = carry bit of T[i] */
	1719	u = tmpc >> ((mp_digit)(CHAR_BIT sizeof (mp_digit) - 1));
	1720
	1721	/* Clear carry from T[i] */
	1722	*tmpc++ &= MP_MASK;
	1723	}
	1724
	1725	/* clear digits above used (since we may not have grown result above) */
	1726	for (i = c->used; i < olduse; i++) {
	1727	*tmpc++ = 0;
	1728	}
	1729	}
	1730
	1731	mp_clamp (c);
	1732	return MP_OKAY;
	1733	}
	1734
	1735
	1736	/* high level subtraction (handles signs) */
	1737	int mp_sub (mp_int * a, mp_int * b, mp_int * c)
	1738	{
	1739	int sa, sb, res;
	1740
	1741	sa = a->sign;
	1742	sb = b->sign;
	1743
	1744	if (sa != sb) {
	1745	/* subtract a negative from a positive, OR */
	1746	/* subtract a positive from a negative. */
	1747	/* In either case, ADD their magnitudes, */
	1748	/* and use the sign of the first number. */
	1749	c->sign = sa;
	1750	res = s_mp_add (a, b, c);
	1751	} else {
	1752	/* subtract a positive from a positive, OR */
	1753	/* subtract a negative from a negative. */
	1754	/* First, take the difference between their */
	1755	/* magnitudes, then... */
	1756	if (mp_cmp_mag (a, b) != MP_LT) {
	1757	/* Copy the sign from the first */
	1758	c->sign = sa;
	1759	/* The first has a larger or equal magnitude */
	1760	res = s_mp_sub (a, b, c);
	1761	} else {
	1762	/* The result has the opposite sign from */
	1763	/* the first number. */
	1764	c->sign = (sa == MP_ZPOS) ? MP_NEG : MP_ZPOS;
	1765	/* The second has a larger magnitude */
	1766	res = s_mp_sub (b, a, c);
	1767	}
	1768	}
	1769	return res;
	1770	}
	1771
	1772
	1773	/* determines if reduce_2k_l can be used */
	1774	int mp_reduce_is_2k_l(mp_int *a)
	1775	{
	1776	int ix, iy;
	1777
	1778	if (a->used == 0) {
	1779	return MP_NO;
	1780	} else if (a->used == 1) {
	1781	return MP_YES;
	1782	} else if (a->used > 1) {
	1783	/* if more than half of the digits are -1 we're sold */
	1784	for (iy = ix = 0; ix < a->used; ix++) {
	1785	if (a->dp[ix] == MP_MASK) {
	1786	++iy;
	1787	}
	1788	}
	1789	return (iy >= (a->used/2)) ? MP_YES : MP_NO;
	1790
	1791	}
	1792	return MP_NO;
	1793	}
	1794
	1795
	1796	/* determines if mp_reduce_2k can be used */
	1797	int mp_reduce_is_2k(mp_int *a)
	1798	{
	1799	int ix, iy, iw;
	1800	mp_digit iz;
	1801
	1802	if (a->used == 0) {
	1803	return MP_NO;
	1804	} else if (a->used == 1) {
	1805	return MP_YES;
	1806	} else if (a->used > 1) {
	1807	iy = mp_count_bits(a);
	1808	iz = 1;
	1809	iw = 1;
	1810
	1811	/* Test every bit from the second digit up, must be 1 */
	1812	for (ix = DIGIT_BIT; ix < iy; ix++) {
	1813	if ((a->dp[iw] & iz) == 0) {
	1814	return MP_NO;
	1815	}
	1816	iz <<= 1;
	1817	if (iz > (mp_digit)MP_MASK) {
	1818	++iw;
	1819	iz = 1;
	1820	}
	1821	}
	1822	}
	1823	return MP_YES;
	1824	}
	1825
	1826
	1827	/* determines if a number is a valid DR modulus */
	1828	int mp_dr_is_modulus(mp_int *a)
	1829	{
	1830	int ix;
	1831
	1832	/* must be at least two digits */
	1833	if (a->used < 2) {
	1834	return 0;
	1835	}
	1836
	1837	/* must be of the form b**k - a [a <= b] so all
	1838	* but the first digit must be equal to -1 (mod b).
	1839	*/
	1840	for (ix = 1; ix < a->used; ix++) {
	1841	if (a->dp[ix] != MP_MASK) {
	1842	return 0;
	1843	}
	1844	}
	1845	return 1;
	1846	}
	1847
	1848
	1849	/* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85
	1850	*
	1851	* Uses a left-to-right k-ary sliding window to compute the modular
	1852	* exponentiation.
	1853	* The value of k changes based on the size of the exponent.
	1854	*
	1855	* Uses Montgomery or Diminished Radix reduction [whichever appropriate]
	1856	*/
	1857
	1858	#ifdef MP_LOW_MEM
	1859	#define TAB_SIZE 32
	1860	#else
	1861	#define TAB_SIZE 256
	1862	#endif
	1863
	1864	int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y,
	1865	int redmode)
	1866	{
	1867	mp_int res;
	1868	mp_digit buf, mp;
	1869	int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
	1870	#ifdef WOLFSSL_SMALL_STACK
	1871	mp_int* M = NULL;
	1872	#else
	1873	mp_int M[TAB_SIZE];
	1874	#endif
	1875	/* use a pointer to the reduction algorithm. This allows us to use
	1876	* one of many reduction algorithms without modding the guts of
	1877	* the code with if statements everywhere.
	1878	*/
	1879	int (redux)(mp_int,mp_int*,mp_digit);
	1880
	1881	#ifdef WOLFSSL_SMALL_STACK
	1882	M = (mp_int) XMALLOC(sizeof(mp_int) TAB_SIZE, NULL,
	1883	DYNAMIC_TYPE_TMP_BUFFER);
	1884	if (M == NULL)
	1885	return MP_MEM;
	1886	#endif
	1887
	1888	/* find window size */
	1889	x = mp_count_bits (X);
	1890	if (x <= 7) {
	1891	winsize = 2;
	1892	} else if (x <= 36) {
	1893	winsize = 3;
	1894	} else if (x <= 140) {
	1895	winsize = 4;
	1896	} else if (x <= 450) {
	1897	winsize = 5;
	1898	} else if (x <= 1303) {
	1899	winsize = 6;
	1900	} else if (x <= 3529) {
	1901	winsize = 7;
	1902	} else {
	1903	winsize = 8;
	1904	}
	1905
	1906	#ifdef MP_LOW_MEM
	1907	if (winsize > 5) {
	1908	winsize = 5;
	1909	}
	1910	#endif
	1911
	1912	/* init M array */
	1913	/* init first cell */
	1914	if ((err = mp_init_size(&M[1], P->alloc)) != MP_OKAY) {
	1915	#ifdef WOLFSSL_SMALL_STACK
	1916	XFREE(M, NULL, DYNAMIC_TYPE_TMP_BUFFER);
	1917	#endif
	1918
	1919	return err;
	1920	}
	1921
	1922	/* now init the second half of the array */
	1923	for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
	1924	if ((err = mp_init_size(&M[x], P->alloc)) != MP_OKAY) {
	1925	for (y = 1<<(winsize-1); y < x; y++) {
	1926	mp_clear (&M[y]);
	1927	}
	1928	mp_clear(&M[1]);
	1929
	1930	#ifdef WOLFSSL_SMALL_STACK
	1931	XFREE(M, NULL, DYNAMIC_TYPE_TMP_BUFFER);
	1932	#endif
	1933
	1934	return err;
	1935	}
	1936	}
	1937
	1938	/* determine and setup reduction code */
	1939	if (redmode == 0) {
	1940	#ifdef BN_MP_MONTGOMERY_SETUP_C
	1941	/* now setup montgomery */
	1942	if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) {
	1943	goto LBL_M;
	1944	}
	1945	#else
	1946	err = MP_VAL;
	1947	goto LBL_M;
	1948	#endif
	1949
	1950	/* automatically pick the comba one if available (saves quite a few
	1951	calls/ifs) */
	1952	#ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C
	1953	if (((P->used * 2 + 1) < MP_WARRAY) &&
	1954	P->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
	1955	redux = fast_mp_montgomery_reduce;
	1956	} else
	1957	#endif
	1958	{
	1959	#ifdef BN_MP_MONTGOMERY_REDUCE_C
	1960	/* use slower baseline Montgomery method */
	1961	redux = mp_montgomery_reduce;
	1962	#else
	1963	err = MP_VAL;
	1964	goto LBL_M;
	1965	#endif
	1966	}
	1967	} else if (redmode == 1) {
	1968	#if defined(BN_MP_DR_SETUP_C) && defined(BN_MP_DR_REDUCE_C)
	1969	/* setup DR reduction for moduli of the form B*k - b /
	1970	mp_dr_setup(P, &mp);
	1971	redux = mp_dr_reduce;
	1972	#else
	1973	err = MP_VAL;
	1974	goto LBL_M;
	1975	#endif
	1976	} else {
	1977	#if defined(BN_MP_REDUCE_2K_SETUP_C) && defined(BN_MP_REDUCE_2K_C)
	1978	/* setup DR reduction for moduli of the form 2*k - b /
	1979	if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) {
	1980	goto LBL_M;
	1981	}
	1982	redux = mp_reduce_2k;
	1983	#else
	1984	err = MP_VAL;
	1985	goto LBL_M;
	1986	#endif
	1987	}
	1988
	1989	/* setup result */
	1990	if ((err = mp_init_size (&res, P->alloc)) != MP_OKAY) {
	1991	goto LBL_M;
	1992	}
	1993
	1994	/* create M table
	1995	*
	1996
	1997	*
	1998	* The first half of the table is not computed though accept for M[0] and M[1]
	1999	*/
	2000
	2001	if (redmode == 0) {
	2002	#ifdef BN_MP_MONTGOMERY_CALC_NORMALIZATION_C
	2003	/* now we need R mod m */
	2004	if ((err = mp_montgomery_calc_normalization (&res, P)) != MP_OKAY) {
	2005	goto LBL_RES;
	2006	}
	2007
	2008	/* now set M[1] to G * R mod m */
	2009	if ((err = mp_mulmod (G, &res, P, &M[1])) != MP_OKAY) {
	2010	goto LBL_RES;
	2011	}
	2012	#else
	2013	err = MP_VAL;
	2014	goto LBL_RES;
	2015	#endif
	2016	} else {
	2017	if ((err = mp_set(&res, 1)) != MP_OKAY) {
	2018	goto LBL_RES;
	2019	}
	2020	if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) {
	2021	goto LBL_RES;
	2022	}
	2023	}
	2024
	2025	/* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times*/
	2026	if ((err = mp_copy (&M[1], &M[(mp_digit)(1 << (winsize - 1))])) != MP_OKAY) {
	2027	goto LBL_RES;
	2028	}
	2029
	2030	for (x = 0; x < (winsize - 1); x++) {
	2031	if ((err = mp_sqr (&M[(mp_digit)(1 << (winsize - 1))],
	2032	&M[(mp_digit)(1 << (winsize - 1))])) != MP_OKAY) {
	2033	goto LBL_RES;
	2034	}
	2035	if ((err = redux (&M[(mp_digit)(1 << (winsize - 1))], P, mp)) != MP_OKAY) {
	2036	goto LBL_RES;
	2037	}
	2038	}
	2039
	2040	/* create upper table */
	2041	for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
	2042	if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
	2043	goto LBL_RES;
	2044	}
	2045	if ((err = redux (&M[x], P, mp)) != MP_OKAY) {
	2046	goto LBL_RES;
	2047	}
	2048	}
	2049
	2050	/* set initial mode and bit cnt */
	2051	mode = 0;
	2052	bitcnt = 1;
	2053	buf = 0;
	2054	digidx = X->used - 1;
	2055	bitcpy = 0;
	2056	bitbuf = 0;
	2057
	2058	for (;;) {
	2059	/* grab next digit as required */
	2060	if (--bitcnt == 0) {
	2061	/* if digidx == -1 we are out of digits so break */
	2062	if (digidx == -1) {
	2063	break;
	2064	}
	2065	/* read next digit and reset bitcnt */
	2066	buf = X->dp[digidx--];
	2067	bitcnt = (int)DIGIT_BIT;
	2068	}
	2069
	2070	/* grab the next msb from the exponent */
	2071	y = (int)(buf >> (DIGIT_BIT - 1)) & 1;
	2072	buf <<= (mp_digit)1;
	2073
	2074	/* if the bit is zero and mode == 0 then we ignore it
	2075	* These represent the leading zero bits before the first 1 bit
	2076	* in the exponent. Technically this opt is not required but it
	2077	* does lower the # of trivial squaring/reductions used
	2078	*/
	2079	if (mode == 0 && y == 0) {
	2080	continue;
	2081	}
	2082
	2083	/* if the bit is zero and mode == 1 then we square */
	2084	if (mode == 1 && y == 0) {
	2085	if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
	2086	goto LBL_RES;
	2087	}
	2088	if ((err = redux (&res, P, mp)) != MP_OKAY) {
	2089	goto LBL_RES;
	2090	}
	2091	continue;
	2092	}
	2093
	2094	/* else we add it to the window */
	2095	bitbuf \|= (y << (winsize - ++bitcpy));
	2096	mode = 2;
	2097
	2098	if (bitcpy == winsize) {
	2099	/* ok window is filled so square as required and multiply */
	2100	/* square first */
	2101	for (x = 0; x < winsize; x++) {
	2102	if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
	2103	goto LBL_RES;
	2104	}
	2105	if ((err = redux (&res, P, mp)) != MP_OKAY) {
	2106	goto LBL_RES;
	2107	}
	2108	}
	2109
	2110	/* then multiply */
	2111	if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
	2112	goto LBL_RES;
	2113	}
	2114	if ((err = redux (&res, P, mp)) != MP_OKAY) {
	2115	goto LBL_RES;
	2116	}
	2117
	2118	/* empty window and reset */
	2119	bitcpy = 0;
	2120	bitbuf = 0;
	2121	mode = 1;
	2122	}
	2123	}
	2124
	2125	/* if bits remain then square/multiply */
	2126	if (mode == 2 && bitcpy > 0) {
	2127	/* square then multiply if the bit is set */
	2128	for (x = 0; x < bitcpy; x++) {
	2129	if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
	2130	goto LBL_RES;
	2131	}
	2132	if ((err = redux (&res, P, mp)) != MP_OKAY) {
	2133	goto LBL_RES;
	2134	}
	2135
	2136	/* get next bit of the window */
	2137	bitbuf <<= 1;
	2138	if ((bitbuf & (1 << winsize)) != 0) {
	2139	/* then multiply */
	2140	if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
	2141	goto LBL_RES;
	2142	}
	2143	if ((err = redux (&res, P, mp)) != MP_OKAY) {
	2144	goto LBL_RES;
	2145	}
	2146	}
	2147	}
	2148	}
	2149
	2150	if (redmode == 0) {
	2151	/* fixup result if Montgomery reduction is used
	2152	* recall that any value in a Montgomery system is
	2153	* actually multiplied by R mod n. So we have
	2154	* to reduce one more time to cancel out the factor
	2155	* of R.
	2156	*/
	2157	if ((err = redux(&res, P, mp)) != MP_OKAY) {
	2158	goto LBL_RES;
	2159	}
	2160	}
	2161
	2162	/* swap res with Y */
	2163	mp_exch (&res, Y);
	2164	err = MP_OKAY;
	2165	LBL_RES:mp_clear (&res);
	2166	LBL_M:
	2167	mp_clear(&M[1]);
	2168	for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
	2169	mp_clear (&M[x]);
	2170	}
	2171
	2172	#ifdef WOLFSSL_SMALL_STACK
	2173	XFREE(M, NULL, DYNAMIC_TYPE_TMP_BUFFER);
	2174	#endif
	2175
	2176	return err;
	2177	}
	2178
	2179
	2180	/* setups the montgomery reduction stuff */
	2181	int mp_montgomery_setup (mp_int * n, mp_digit * rho)
	2182	{
	2183	mp_digit x, b;
	2184
	2185	/* fast inversion mod 2**k
	2186	*
	2187	* Based on the fact that
	2188	*
	2189	* XA = 1 (mod 2n) => (X(2-XA)) A = 1 (mod 22n)
	2190	* => 2XA - XXA*A = 1
	2191	* => 2*(1) - (1) = 1
	2192	*/
	2193	b = n->dp[0];
	2194
	2195	if ((b & 1) == 0) {
	2196	return MP_VAL;
	2197	}
	2198
	2199	x = (((b + 2) & 4) << 1) + b; /* here xa==1 mod 24 /
	2200	x = 2 - b x; /* here xa==1 mod 28 /
	2201	#if !defined(MP_8BIT)
	2202	x = 2 - b x; /* here xa==1 mod 216 /
	2203	#endif
	2204	#if defined(MP_64BIT) \|\| !(defined(MP_8BIT) \|\| defined(MP_16BIT))
	2205	x = 2 - b x; /* here xa==1 mod 232 /
	2206	#endif
	2207	#ifdef MP_64BIT
	2208	x = 2 - b x; /* here xa==1 mod 264 /
	2209	#endif
	2210
	2211	/* rho = -1/m mod b */
	2212	/* TAO, switched mp_word casts to mp_digit to shut up compiler */
	2213	*rho = (mp_digit)((((mp_digit)1 << ((mp_digit) DIGIT_BIT)) - x) & MP_MASK);
	2214
	2215	return MP_OKAY;
	2216	}
	2217
	2218
	2219	/* computes xR**-1 == x (mod N) via Montgomery Reduction
	2220	*
	2221	* This is an optimized implementation of montgomery_reduce
	2222	* which uses the comba method to quickly calculate the columns of the
	2223	* reduction.
	2224	*
	2225	* Based on Algorithm 14.32 on pp.601 of HAC.
	2226	*/
	2227	int fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
	2228	{
	2229	int ix, res, olduse;
	2230	#ifdef WOLFSSL_SMALL_STACK
	2231	mp_word* W; /* uses dynamic memory and slower */
	2232	#else
	2233	mp_word W[MP_WARRAY];
	2234	#endif
	2235
	2236	/* get old used count */
	2237	olduse = x->used;
	2238
	2239	/* grow a as required */
	2240	if (x->alloc < n->used + 1) {
	2241	if ((res = mp_grow (x, n->used + 1)) != MP_OKAY) {
	2242	return res;
	2243	}
	2244	}
	2245
	2246	#ifdef WOLFSSL_SMALL_STACK
	2247	W = (mp_word)XMALLOC(sizeof(mp_word) MP_WARRAY, NULL, DYNAMIC_TYPE_BIGINT);
	2248	if (W == NULL)
	2249	return MP_MEM;
	2250	#endif
	2251
	2252	/* first we have to get the digits of the input into
	2253	* an array of double precision words W[...]
	2254	*/
	2255	{
	2256	mp_word *_W;
	2257	mp_digit *tmpx;
	2258
	2259	/* alias for the W[] array */
	2260	_W = W;
	2261
	2262	/* alias for the digits of x*/
	2263	tmpx = x->dp;
	2264
	2265	/* copy the digits of a into W[0..a->used-1] */
	2266	for (ix = 0; ix < x->used; ix++) {
	2267	_W++ = tmpx++;
	2268	}
	2269
	2270	/* zero the high words of W[a->used..m->used2] /
	2271	for (; ix < n->used * 2 + 1; ix++) {
	2272	*_W++ = 0;
	2273	}
	2274	}
	2275
	2276	/* now we proceed to zero successive digits
	2277	* from the least significant upwards
	2278	*/
	2279	for (ix = 0; ix < n->used; ix++) {
	2280	/* mu = ai * m' mod b
	2281	*
	2282	* We avoid a double precision multiplication (which isn't required)
	2283	* by casting the value down to a mp_digit. Note this requires
	2284	* that W[ix-1] have the carry cleared (see after the inner loop)
	2285	*/
	2286	mp_digit mu;
	2287	mu = (mp_digit) (((W[ix] & MP_MASK) * rho) & MP_MASK);
	2288
	2289	/* a = a + mu * m * b**i
	2290	*
	2291	* This is computed in place and on the fly. The multiplication
	2292	* by b**i is handled by offseting which columns the results
	2293	* are added to.
	2294	*
	2295	* Note the comba method normally doesn't handle carries in the
	2296	* inner loop In this case we fix the carry from the previous
	2297	* column since the Montgomery reduction requires digits of the
	2298	* result (so far) [see above] to work. This is
	2299	* handled by fixing up one carry after the inner loop. The
	2300	* carry fixups are done in order so after these loops the
	2301	* first m->used words of W[] have the carries fixed
	2302	*/
	2303	{
	2304	int iy;
	2305	mp_digit *tmpn;
	2306	mp_word *_W;
	2307
	2308	/* alias for the digits of the modulus */
	2309	tmpn = n->dp;
	2310
	2311	/* Alias for the columns set by an offset of ix */
	2312	_W = W + ix;
	2313
	2314	/* inner loop */
	2315	for (iy = 0; iy < n->used; iy++) {
	2316	_W++ += ((mp_word)mu) ((mp_word)*tmpn++);
	2317	}
	2318	}
	2319
	2320	/* now fix carry for next digit, W[ix+1] */
	2321	W[ix + 1] += W[ix] >> ((mp_word) DIGIT_BIT);
	2322	}
	2323
	2324	/* now we have to propagate the carries and
	2325	* shift the words downward [all those least
	2326	* significant digits we zeroed].
	2327	*/
	2328	{
	2329	mp_digit *tmpx;
	2330	mp_word _W, _W1;
	2331
	2332	/* nox fix rest of carries */
	2333
	2334	/* alias for current word */
	2335	_W1 = W + ix;
	2336
	2337	/* alias for next word, where the carry goes */
	2338	_W = W + ++ix;
	2339
	2340	for (; ix <= n->used * 2 + 1; ix++) {
	2341	_W++ += _W1++ >> ((mp_word) DIGIT_BIT);
	2342	}
	2343
	2344	/* copy out, A = A/b**n
	2345	*
	2346	* The result is A/b**n but instead of converting from an
	2347	* array of mp_word to mp_digit than calling mp_rshd
	2348	* we just copy them in the right order
	2349	*/
	2350
	2351	/* alias for destination word */
	2352	tmpx = x->dp;
	2353
	2354	/* alias for shifted double precision result */
	2355	_W = W + n->used;
	2356
	2357	for (ix = 0; ix < n->used + 1; ix++) {
	2358	tmpx++ = (mp_digit)(_W++ & ((mp_word) MP_MASK));
	2359	}
	2360
	2361	/* zero olduse digits, if the input a was larger than
	2362	* m->used+1 we'll have to clear the digits
	2363	*/
	2364	for (; ix < olduse; ix++) {
	2365	*tmpx++ = 0;
	2366	}
	2367	}
	2368
	2369	/* set the max used and clamp */
	2370	x->used = n->used + 1;
	2371	mp_clamp (x);
	2372
	2373	#ifdef WOLFSSL_SMALL_STACK
	2374	XFREE(W, NULL, DYNAMIC_TYPE_BIGINT);
	2375	#endif
	2376
	2377	/* if A >= m then A = A - m */
	2378	if (mp_cmp_mag (x, n) != MP_LT) {
	2379	return s_mp_sub (x, n, x);
	2380	}
	2381	return MP_OKAY;
	2382	}
	2383
	2384
	2385	/* computes xR*-1 == x (mod N) via Montgomery Reduction /
	2386	int mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
	2387	{
	2388	int ix, res, digs;
	2389	mp_digit mu;
	2390
	2391	/* can the fast reduction [comba] method be used?
	2392	*
	2393	* Note that unlike in mul you're safely allowed less
	2394	* than the available columns [255 per default] since carries
	2395	* are fixed up in the inner loop.
	2396	*/
	2397	digs = n->used * 2 + 1;
	2398	if ((digs < MP_WARRAY) &&
	2399	n->used <
	2400	(1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
	2401	return fast_mp_montgomery_reduce (x, n, rho);
	2402	}
	2403
	2404	/* grow the input as required */
	2405	if (x->alloc < digs) {
	2406	if ((res = mp_grow (x, digs)) != MP_OKAY) {
	2407	return res;
	2408	}
	2409	}
	2410	x->used = digs;
	2411
	2412	for (ix = 0; ix < n->used; ix++) {
	2413	/* mu = ai * rho mod b
	2414	*
	2415	* The value of rho must be precalculated via
	2416	* montgomery_setup() such that
	2417	* it equals -1/n0 mod b this allows the
	2418	* following inner loop to reduce the
	2419	* input one digit at a time
	2420	*/
	2421	mu = (mp_digit) (((mp_word)x->dp[ix]) * ((mp_word)rho) & MP_MASK);
	2422
	2423	/* a = a + mu * m * b*i /
	2424	{
	2425	int iy;
	2426	mp_digit tmpn, tmpx, u;
	2427	mp_word r;
	2428
	2429	/* alias for digits of the modulus */
	2430	tmpn = n->dp;
	2431
	2432	/* alias for the digits of x [the input] */
	2433	tmpx = x->dp + ix;
	2434
	2435	/* set the carry to zero */
	2436	u = 0;
	2437
	2438	/* Multiply and add in place */
	2439	for (iy = 0; iy < n->used; iy++) {
	2440	/* compute product and sum */
	2441	r = ((mp_word)mu) * ((mp_word)*tmpn++) +
	2442	((mp_word) u) + ((mp_word) * tmpx);
	2443
	2444	/* get carry */
	2445	u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
	2446
	2447	/* fix digit */
	2448	*tmpx++ = (mp_digit)(r & ((mp_word) MP_MASK));
	2449	}
	2450	/* At this point the ix'th digit of x should be zero */
	2451
	2452
	2453	/* propagate carries upwards as required*/
	2454	while (u) {
	2455	*tmpx += u;
	2456	u = *tmpx >> DIGIT_BIT;
	2457	*tmpx++ &= MP_MASK;
	2458	}
	2459	}
	2460	}
	2461
	2462	/* at this point the n.used'th least
	2463	* significant digits of x are all zero
	2464	* which means we can shift x to the
	2465	* right by n.used digits and the
	2466	* residue is unchanged.
	2467	*/
	2468
	2469	/* x = x/b*n.used /
	2470	mp_clamp(x);
	2471	mp_rshd (x, n->used);
	2472
	2473	/* if x >= n then x = x - n */
	2474	if (mp_cmp_mag (x, n) != MP_LT) {
	2475	return s_mp_sub (x, n, x);
	2476	}
	2477
	2478	return MP_OKAY;
	2479	}
	2480
	2481
	2482	/* determines the setup value */
	2483	void mp_dr_setup(mp_int a, mp_digit d)
	2484	{
	2485	/* the casts are required if DIGIT_BIT is one less than
	2486	* the number of bits in a mp_digit [e.g. DIGIT_BIT==31]
	2487	*/
	2488	*d = (mp_digit)((((mp_word)1) << ((mp_word)DIGIT_BIT)) -
	2489	((mp_word)a->dp[0]));
	2490	}
	2491
	2492
	2493	/* reduce "x" in place modulo "n" using the Diminished Radix algorithm.
	2494	*
	2495	* Based on algorithm from the paper
	2496	*
	2497	* "Generating Efficient Primes for Discrete Log Cryptosystems"
	2498	* Chae Hoon Lim, Pil Joong Lee,
	2499	* POSTECH Information Research Laboratories
	2500	*
	2501	* The modulus must be of a special format [see manual]
	2502	*
	2503	* Has been modified to use algorithm 7.10 from the LTM book instead
	2504	*
	2505	* Input x must be in the range 0 <= x <= (n-1)**2
	2506	*/
	2507	int mp_dr_reduce (mp_int * x, mp_int * n, mp_digit k)
	2508	{
	2509	int err, i, m;
	2510	mp_word r;
	2511	mp_digit mu, tmpx1, tmpx2;
	2512
	2513	/* m = digits in modulus */
	2514	m = n->used;
	2515
	2516	/* ensure that "x" has at least 2m digits */
	2517	if (x->alloc < m + m) {
	2518	if ((err = mp_grow (x, m + m)) != MP_OKAY) {
	2519	return err;
	2520	}
	2521	}
	2522
	2523	/* top of loop, this is where the code resumes if
	2524	* another reduction pass is required.
	2525	*/
	2526	top:
	2527	/* aliases for digits */
	2528	/* alias for lower half of x */
	2529	tmpx1 = x->dp;
	2530
	2531	/* alias for upper half of x, or x/B*m /
	2532	tmpx2 = x->dp + m;
	2533
	2534	/* set carry to zero */
	2535	mu = 0;
	2536
	2537	/* compute (x mod B*m) + k [x/B*m] inline and inplace /
	2538	for (i = 0; i < m; i++) {
	2539	r = ((mp_word)tmpx2++) ((mp_word)k) + *tmpx1 + mu;
	2540	*tmpx1++ = (mp_digit)(r & MP_MASK);
	2541	mu = (mp_digit)(r >> ((mp_word)DIGIT_BIT));
	2542	}
	2543
	2544	/* set final carry */
	2545	*tmpx1++ = mu;
	2546
	2547	/* zero words above m */
	2548	for (i = m + 1; i < x->used; i++) {
	2549	*tmpx1++ = 0;
	2550	}
	2551
	2552	/* clamp, sub and return */
	2553	mp_clamp (x);
	2554
	2555	/* if x >= n then subtract and reduce again
	2556	* Each successive "recursion" makes the input smaller and smaller.
	2557	*/
	2558	if (mp_cmp_mag (x, n) != MP_LT) {
	2559	if ((err = s_mp_sub(x, n, x)) != MP_OKAY) {
	2560	return err;
	2561	}
	2562	goto top;
	2563	}
	2564	return MP_OKAY;
	2565	}
	2566
	2567
	2568	/* reduces a modulo n where n is of the form 2*p - d /
	2569	int mp_reduce_2k(mp_int a, mp_int n, mp_digit d)
	2570	{
	2571	mp_int q;
	2572	int p, res;
	2573
	2574	if ((res = mp_init(&q)) != MP_OKAY) {
	2575	return res;
	2576	}
	2577
	2578	p = mp_count_bits(n);
	2579	top:
	2580	/* q = a/2p, a = a mod 2p */
	2581	if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
	2582	goto ERR;
	2583	}
	2584
	2585	if (d != 1) {
	2586	/* q = q * d */
	2587	if ((res = mp_mul_d(&q, d, &q)) != MP_OKAY) {
	2588	goto ERR;
	2589	}
	2590	}
	2591
	2592	/* a = a + q */
	2593	if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
	2594	goto ERR;
	2595	}
	2596
	2597	if (mp_cmp_mag(a, n) != MP_LT) {
	2598	if ((res = s_mp_sub(a, n, a)) != MP_OKAY) {
	2599	goto ERR;
	2600	}
	2601	goto top;
	2602	}
	2603
	2604	ERR:
	2605	mp_clear(&q);
	2606	return res;
	2607	}
	2608
	2609
	2610	/* determines the setup value */
	2611	int mp_reduce_2k_setup(mp_int a, mp_digit d)
	2612	{
	2613	int res, p;
	2614	mp_int tmp;
	2615
	2616	if ((res = mp_init(&tmp)) != MP_OKAY) {
	2617	return res;
	2618	}
	2619
	2620	p = mp_count_bits(a);
	2621	if ((res = mp_2expt(&tmp, p)) != MP_OKAY) {
	2622	mp_clear(&tmp);
	2623	return res;
	2624	}
	2625
	2626	if ((res = s_mp_sub(&tmp, a, &tmp)) != MP_OKAY) {
	2627	mp_clear(&tmp);
	2628	return res;
	2629	}
	2630
	2631	*d = tmp.dp[0];
	2632	mp_clear(&tmp);
	2633	return MP_OKAY;
	2634	}
	2635
	2636
	2637	/* set the b bit of a */
	2638	int mp_set_bit (mp_int * a, int b)
	2639	{
	2640	int i = b / DIGIT_BIT, res;
	2641
	2642	if (a->used < (int)(i + 1)) {
	2643	/* grow a to accommodate the single bit */
	2644	if ((res = mp_grow (a, i + 1)) != MP_OKAY) {
	2645	return res;
	2646	}
	2647
	2648	/* set the used count of where the bit will go */
	2649	a->used = (int)(i + 1);
	2650	}
	2651
	2652	/* put the single bit in its place */
	2653	a->dp[i] \|= ((mp_digit)1) << (b % DIGIT_BIT);
	2654
	2655	return MP_OKAY;
	2656	}
	2657
	2658	/* computes a = 2**b
	2659	*
	2660	* Simple algorithm which zeros the int, set the required bit
	2661	*/
	2662	int mp_2expt (mp_int * a, int b)
	2663	{
	2664	/* zero a as per default */
	2665	mp_zero (a);
	2666
	2667	return mp_set_bit(a, b);
	2668	}
	2669
	2670	/* multiply by a digit */
	2671	int mp_mul_d (mp_int * a, mp_digit b, mp_int * c)
	2672	{
	2673	mp_digit u, tmpa, tmpc;
	2674	mp_word r;
	2675	int ix, res, olduse;
	2676
	2677	/* make sure c is big enough to hold ab /
	2678	if (c->alloc < a->used + 1) {
	2679	if ((res = mp_grow (c, a->used + 1)) != MP_OKAY) {
	2680	return res;
	2681	}
	2682	}
	2683
	2684	/* get the original destinations used count */
	2685	olduse = c->used;
	2686
	2687	/* set the sign */
	2688	c->sign = a->sign;
	2689
	2690	/* alias for a->dp [source] */
	2691	tmpa = a->dp;
	2692
	2693	/* alias for c->dp [dest] */
	2694	tmpc = c->dp;
	2695
	2696	/* zero carry */
	2697	u = 0;
	2698
	2699	/* compute columns */
	2700	for (ix = 0; ix < a->used; ix++) {
	2701	/* compute product and carry sum for this term */
	2702	r = ((mp_word) u) + ((mp_word)tmpa++) ((mp_word)b);
	2703
	2704	/* mask off higher bits to get a single digit */
	2705	*tmpc++ = (mp_digit) (r & ((mp_word) MP_MASK));
	2706
	2707	/* send carry into next iteration */
	2708	u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
	2709	}
	2710
	2711	/* store final carry [if any] and increment ix offset */
	2712	*tmpc++ = u;
	2713	++ix;
	2714
	2715	/* now zero digits above the top */
	2716	while (ix++ < olduse) {
	2717	*tmpc++ = 0;
	2718	}
	2719
	2720	/* set used count */
	2721	c->used = a->used + 1;
	2722	mp_clamp(c);
	2723
	2724	return MP_OKAY;
	2725	}
	2726
	2727
	2728	/* d = a * b (mod c) */
	2729	#if defined(FREESCALE_LTC_TFM)
	2730	int wolfcrypt_mp_mulmod(mp_int a, mp_int b, mp_int c, mp_int d)
	2731	#else
	2732	int mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
	2733	#endif
	2734	{
	2735	int res;
	2736	mp_int t;
	2737
	2738	if ((res = mp_init_size (&t, c->used)) != MP_OKAY) {
	2739	return res;
	2740	}
	2741
	2742	res = mp_mul (a, b, &t);
	2743	if (res == MP_OKAY) {
	2744	res = mp_mod (&t, c, d);
	2745	}
	2746
	2747	mp_clear (&t);
	2748	return res;
	2749	}
	2750
	2751
	2752	/* d = a - b (mod c) */
	2753	int mp_submod(mp_int* a, mp_int* b, mp_int* c, mp_int* d)
	2754	{
	2755	int res;
	2756	mp_int t;
	2757
	2758	if ((res = mp_init (&t)) != MP_OKAY) {
	2759	return res;
	2760	}
	2761
	2762	res = mp_sub (a, b, &t);
	2763	if (res == MP_OKAY) {
	2764	res = mp_mod (&t, c, d);
	2765	}
	2766
	2767	mp_clear (&t);
	2768
	2769	return res;
	2770	}
	2771
	2772	/* d = a + b (mod c) */
	2773	int mp_addmod(mp_int* a, mp_int* b, mp_int* c, mp_int* d)
	2774	{
	2775	int res;
	2776	mp_int t;
	2777
	2778	if ((res = mp_init (&t)) != MP_OKAY) {
	2779	return res;
	2780	}
	2781
	2782	res = mp_add (a, b, &t);
	2783	if (res == MP_OKAY) {
	2784	res = mp_mod (&t, c, d);
	2785	}
	2786
	2787	mp_clear (&t);
	2788
	2789	return res;
	2790	}
	2791
	2792	/* computes b = aa /
	2793	int mp_sqr (mp_int * a, mp_int * b)
	2794	{
	2795	int res;
	2796
	2797	{
	2798	#ifdef BN_FAST_S_MP_SQR_C
	2799	/* can we use the fast comba multiplier? */
	2800	if ((a->used * 2 + 1) < MP_WARRAY &&
	2801	a->used <
	2802	(1 << (sizeof(mp_word) * CHAR_BIT - 2*DIGIT_BIT - 1))) {
	2803	res = fast_s_mp_sqr (a, b);
	2804	} else
	2805	#endif
	2806	#ifdef BN_S_MP_SQR_C
	2807	res = s_mp_sqr (a, b);
	2808	#else
	2809	res = MP_VAL;
	2810	#endif
	2811	}
	2812	b->sign = MP_ZPOS;
	2813	return res;
	2814	}
	2815
	2816
	2817	/* high level multiplication (handles sign) */
	2818	#if defined(FREESCALE_LTC_TFM)
	2819	int wolfcrypt_mp_mul(mp_int a, mp_int b, mp_int *c)
	2820	#else
	2821	int mp_mul (mp_int * a, mp_int * b, mp_int * c)
	2822	#endif
	2823	{
	2824	int res, neg;
	2825	neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
	2826
	2827	{
	2828	/* can we use the fast multiplier?
	2829	*
	2830	* The fast multiplier can be used if the output will
	2831	* have less than MP_WARRAY digits and the number of
	2832	* digits won't affect carry propagation
	2833	*/
	2834	int digs = a->used + b->used + 1;
	2835
	2836	#ifdef BN_FAST_S_MP_MUL_DIGS_C
	2837	if ((digs < MP_WARRAY) &&
	2838	MIN(a->used, b->used) <=
	2839	(1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
	2840	res = fast_s_mp_mul_digs (a, b, c, digs);
	2841	} else
	2842	#endif
	2843	#ifdef BN_S_MP_MUL_DIGS_C
	2844	res = s_mp_mul (a, b, c); /* uses s_mp_mul_digs */
	2845	#else
	2846	res = MP_VAL;
	2847	#endif
	2848
	2849	}
	2850	c->sign = (c->used > 0) ? neg : MP_ZPOS;
	2851	return res;
	2852	}
	2853
	2854
	2855	/* b = a2 /
	2856	int mp_mul_2(mp_int * a, mp_int * b)
	2857	{
	2858	int x, res, oldused;
	2859
	2860	/* grow to accommodate result */
	2861	if (b->alloc < a->used + 1) {
	2862	if ((res = mp_grow (b, a->used + 1)) != MP_OKAY) {
	2863	return res;
	2864	}
	2865	}
	2866
	2867	oldused = b->used;
	2868	b->used = a->used;
	2869
	2870	{
	2871	mp_digit r, rr, tmpa, tmpb;
	2872
	2873	/* alias for source */
	2874	tmpa = a->dp;
	2875
	2876	/* alias for dest */
	2877	tmpb = b->dp;
	2878
	2879	/* carry */
	2880	r = 0;
	2881	for (x = 0; x < a->used; x++) {
	2882
	2883	/* get what will be the next carry bit from the
	2884	* MSB of the current digit
	2885	*/
	2886	rr = *tmpa >> ((mp_digit)(DIGIT_BIT - 1));
	2887
	2888	/* now shift up this digit, add in the carry [from the previous] */
	2889	tmpb++ = (mp_digit)(((tmpa++ << ((mp_digit)1)) \| r) & MP_MASK);
	2890
	2891	/* copy the carry that would be from the source
	2892	* digit into the next iteration
	2893	*/
	2894	r = rr;
	2895	}
	2896
	2897	/* new leading digit? */
	2898	if (r != 0) {
	2899	/* add a MSB which is always 1 at this point */
	2900	*tmpb = 1;
	2901	++(b->used);
	2902	}
	2903
	2904	/* now zero any excess digits on the destination
	2905	* that we didn't write to
	2906	*/
	2907	tmpb = b->dp + b->used;
	2908	for (x = b->used; x < oldused; x++) {
	2909	*tmpb++ = 0;
	2910	}
	2911	}
	2912	b->sign = a->sign;
	2913	return MP_OKAY;
	2914	}
	2915
	2916
	2917	/* divide by three (based on routine from MPI and the GMP manual) */
	2918	int mp_div_3 (mp_int * a, mp_int c, mp_digit d)
	2919	{
	2920	mp_int q;
	2921	mp_word w, t;
	2922	mp_digit b;
	2923	int res, ix;
	2924
	2925	/* b = 2*DIGIT_BIT / 3 /
	2926	b = (mp_digit) ( (((mp_word)1) << ((mp_word)DIGIT_BIT)) / ((mp_word)3) );
	2927
	2928	if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
	2929	return res;
	2930	}
	2931
	2932	q.used = a->used;
	2933	q.sign = a->sign;
	2934	w = 0;
	2935	for (ix = a->used - 1; ix >= 0; ix--) {
	2936	w = (w << ((mp_word)DIGIT_BIT)) \| ((mp_word)a->dp[ix]);
	2937
	2938	if (w >= 3) {
	2939	/* multiply w by [1/3] */
	2940	t = (w * ((mp_word)b)) >> ((mp_word)DIGIT_BIT);
	2941
	2942	/* now subtract 3 * [w/3] from w, to get the remainder */
	2943	w -= t+t+t;
	2944
	2945	/* fixup the remainder as required since
	2946	* the optimization is not exact.
	2947	*/
	2948	while (w >= 3) {
	2949	t += 1;
	2950	w -= 3;
	2951	}
	2952	} else {
	2953	t = 0;
	2954	}
	2955	q.dp[ix] = (mp_digit)t;
	2956	}
	2957
	2958	/* [optional] store the remainder */
	2959	if (d != NULL) {
	2960	*d = (mp_digit)w;
	2961	}
	2962
	2963	/* [optional] store the quotient */
	2964	if (c != NULL) {
	2965	mp_clamp(&q);
	2966	mp_exch(&q, c);
	2967	}
	2968	mp_clear(&q);
	2969
	2970	return res;
	2971	}
	2972
	2973
	2974	/* init an mp_init for a given size */
	2975	int mp_init_size (mp_int * a, int size)
	2976	{
	2977	int x;
	2978
	2979	/* pad size so there are always extra digits */
	2980	size += (MP_PREC * 2) - (size % MP_PREC);
	2981
	2982	/* alloc mem */
	2983	a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * size, NULL,
	2984	DYNAMIC_TYPE_BIGINT);
	2985	if (a->dp == NULL) {
	2986	return MP_MEM;
	2987	}
	2988
	2989	/* set the members */
	2990	a->used = 0;
	2991	a->alloc = size;
	2992	a->sign = MP_ZPOS;
	2993	#ifdef HAVE_WOLF_BIGINT
	2994	wc_bigint_init(&a->raw);
	2995	#endif
	2996
	2997	/* zero the digits */
	2998	for (x = 0; x < size; x++) {
	2999	a->dp[x] = 0;
	3000	}
	3001
	3002	return MP_OKAY;
	3003	}
	3004
	3005
	3006	/* the jist of squaring...
	3007	* you do like mult except the offset of the tmpx [one that
	3008	* starts closer to zero] can't equal the offset of tmpy.
	3009	* So basically you set up iy like before then you min it with
	3010	* (ty-tx) so that it never happens. You double all those
	3011	* you add in the inner loop
	3012
	3013	After that loop you do the squares and add them in.
	3014	*/
	3015
	3016	int fast_s_mp_sqr (mp_int * a, mp_int * b)
	3017	{
	3018	int olduse, res, pa, ix, iz;
	3019	#ifdef WOLFSSL_SMALL_STACK
	3020	mp_digit* W; /* uses dynamic memory and slower */
	3021	#else
	3022	mp_digit W[MP_WARRAY];
	3023	#endif
	3024	mp_digit *tmpx;
	3025	mp_word W1;
	3026
	3027	/* grow the destination as required */
	3028	pa = a->used + a->used;
	3029	if (b->alloc < pa) {
	3030	if ((res = mp_grow (b, pa)) != MP_OKAY) {
	3031	return res;
	3032	}
	3033	}
	3034
	3035	if (pa > MP_WARRAY)
	3036	return MP_RANGE; /* TAO range check */
	3037
	3038	#ifdef WOLFSSL_SMALL_STACK
	3039	W = (mp_digit)XMALLOC(sizeof(mp_digit) MP_WARRAY, NULL, DYNAMIC_TYPE_BIGINT);
	3040	if (W == NULL)
	3041	return MP_MEM;
	3042	#endif
	3043
	3044	/* number of output digits to produce */
	3045	W1 = 0;
	3046	for (ix = 0; ix < pa; ix++) {
	3047	int tx, ty, iy;
	3048	mp_word _W;
	3049	mp_digit *tmpy;
	3050
	3051	/* clear counter */
	3052	_W = 0;
	3053
	3054	/* get offsets into the two bignums */
	3055	ty = MIN(a->used-1, ix);
	3056	tx = ix - ty;
	3057
	3058	/* setup temp aliases */
	3059	tmpx = a->dp + tx;
	3060	tmpy = a->dp + ty;
	3061
	3062	/* this is the number of times the loop will iterate, essentially
	3063	while (tx++ < a->used && ty-- >= 0) { ... }
	3064	*/
	3065	iy = MIN(a->used-tx, ty+1);
	3066
	3067	/* now for squaring tx can never equal ty
	3068	* we halve the distance since they approach at a rate of 2x
	3069	* and we have to round because odd cases need to be executed
	3070	*/
	3071	iy = MIN(iy, (ty-tx+1)>>1);
	3072
	3073	/* execute loop */
	3074	for (iz = 0; iz < iy; iz++) {
	3075	_W += ((mp_word)tmpx++)((mp_word)*tmpy--);
	3076	}
	3077
	3078	/* double the inner product and add carry */
	3079	_W = _W + _W + W1;
	3080
	3081	/* even columns have the square term in them */
	3082	if ((ix&1) == 0) {
	3083	_W += ((mp_word)a->dp[ix>>1])*((mp_word)a->dp[ix>>1]);
	3084	}
	3085
	3086	/* store it */
	3087	W[ix] = (mp_digit)(_W & MP_MASK);
	3088
	3089	/* make next carry */
	3090	W1 = _W >> ((mp_word)DIGIT_BIT);
	3091	}
	3092
	3093	/* setup dest */
	3094	olduse = b->used;
	3095	b->used = a->used+a->used;
	3096
	3097	{
	3098	mp_digit *tmpb;
	3099	tmpb = b->dp;
	3100	for (ix = 0; ix < pa; ix++) {
	3101	*tmpb++ = (mp_digit)(W[ix] & MP_MASK);
	3102	}
	3103
	3104	/* clear unused digits [that existed in the old copy of c] */
	3105	for (; ix < olduse; ix++) {
	3106	*tmpb++ = 0;
	3107	}
	3108	}
	3109	mp_clamp (b);
	3110
	3111	#ifdef WOLFSSL_SMALL_STACK
	3112	XFREE(W, NULL, DYNAMIC_TYPE_BIGINT);
	3113	#endif
	3114
	3115	return MP_OKAY;
	3116	}
	3117
	3118
	3119	/* Fast (comba) multiplier
	3120	*
	3121	* This is the fast column-array [comba] multiplier. It is
	3122	* designed to compute the columns of the product first
	3123	* then handle the carries afterwards. This has the effect
	3124	* of making the nested loops that compute the columns very
	3125	* simple and schedulable on super-scalar processors.
	3126	*
	3127	* This has been modified to produce a variable number of
	3128	* digits of output so if say only a half-product is required
	3129	* you don't have to compute the upper half (a feature
	3130	* required for fast Barrett reduction).
	3131	*
	3132	* Based on Algorithm 14.12 on pp.595 of HAC.
	3133	*
	3134	*/
	3135	int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
	3136	{
	3137	int olduse, res, pa, ix, iz;
	3138	#ifdef WOLFSSL_SMALL_STACK
	3139	mp_digit* W; /* uses dynamic memory and slower */
	3140	#else
	3141	mp_digit W[MP_WARRAY];
	3142	#endif
	3143	mp_word _W;
	3144
	3145	/* grow the destination as required */
	3146	if (c->alloc < digs) {
	3147	if ((res = mp_grow (c, digs)) != MP_OKAY) {
	3148	return res;
	3149	}
	3150	}
	3151
	3152	/* number of output digits to produce */
	3153	pa = MIN(digs, a->used + b->used);
	3154	if (pa > MP_WARRAY)
	3155	return MP_RANGE; /* TAO range check */
	3156
	3157	#ifdef WOLFSSL_SMALL_STACK
	3158	W = (mp_digit)XMALLOC(sizeof(mp_digit) MP_WARRAY, NULL, DYNAMIC_TYPE_BIGINT);
	3159	if (W == NULL)
	3160	return MP_MEM;
	3161	#endif
	3162
	3163	/* clear the carry */
	3164	_W = 0;
	3165	for (ix = 0; ix < pa; ix++) {
	3166	int tx, ty;
	3167	int iy;
	3168	mp_digit tmpx, tmpy;
	3169
	3170	/* get offsets into the two bignums */
	3171	ty = MIN(b->used-1, ix);
	3172	tx = ix - ty;
	3173
	3174	/* setup temp aliases */
	3175	tmpx = a->dp + tx;
	3176	tmpy = b->dp + ty;
	3177
	3178	/* this is the number of times the loop will iterate, essentially
	3179	while (tx++ < a->used && ty-- >= 0) { ... }
	3180	*/
	3181	iy = MIN(a->used-tx, ty+1);
	3182
	3183	/* execute loop */
	3184	for (iz = 0; iz < iy; ++iz) {
	3185	_W += ((mp_word)tmpx++)((mp_word)*tmpy--);
	3186
	3187	}
	3188
	3189	/* store term */
	3190	W[ix] = (mp_digit)(((mp_digit)_W) & MP_MASK);
	3191
	3192	/* make next carry */
	3193	_W = _W >> ((mp_word)DIGIT_BIT);
	3194	}
	3195
	3196	/* setup dest */
	3197	olduse = c->used;
	3198	c->used = pa;
	3199
	3200	{
	3201	mp_digit *tmpc;
	3202	tmpc = c->dp;
	3203	for (ix = 0; ix < pa; ix++) { /* JRB, +1 could read uninitialized data */
	3204	/* now extract the previous digit [below the carry] */
	3205	*tmpc++ = W[ix];
	3206	}
	3207
	3208	/* clear unused digits [that existed in the old copy of c] */
	3209	for (; ix < olduse; ix++) {
	3210	*tmpc++ = 0;
	3211	}
	3212	}
	3213	mp_clamp (c);
	3214
	3215	#ifdef WOLFSSL_SMALL_STACK
	3216	XFREE(W, NULL, DYNAMIC_TYPE_BIGINT);
	3217	#endif
	3218
	3219	return MP_OKAY;
	3220	}
	3221
	3222
	3223	/* low level squaring, b = aa, HAC pp.596-597, Algorithm 14.16 /
	3224	int s_mp_sqr (mp_int * a, mp_int * b)
	3225	{
	3226	mp_int t;
	3227	int res, ix, iy, pa;
	3228	mp_word r;
	3229	mp_digit u, tmpx, *tmpt;
	3230
	3231	pa = a->used;
	3232	if ((res = mp_init_size (&t, 2*pa + 1)) != MP_OKAY) {
	3233	return res;
	3234	}
	3235
	3236	/* default used is maximum possible size */
	3237	t.used = 2*pa + 1;
	3238
	3239	for (ix = 0; ix < pa; ix++) {
	3240	/* first calculate the digit at 2ix /
	3241	/* calculate double precision result */
	3242	r = ((mp_word) t.dp[2*ix]) +
	3243	((mp_word)a->dp[ix])*((mp_word)a->dp[ix]);
	3244
	3245	/* store lower part in result */
	3246	t.dp[ix+ix] = (mp_digit) (r & ((mp_word) MP_MASK));
	3247
	3248	/* get the carry */
	3249	u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
	3250
	3251	/* left hand side of A[ix] * A[iy] */
	3252	tmpx = a->dp[ix];
	3253
	3254	/* alias for where to store the results */
	3255	tmpt = t.dp + (2*ix + 1);
	3256
	3257	for (iy = ix + 1; iy < pa; iy++) {
	3258	/* first calculate the product */
	3259	r = ((mp_word)tmpx) * ((mp_word)a->dp[iy]);
	3260
	3261	/* now calculate the double precision result, note we use
	3262	* addition instead of *2 since it's easier to optimize
	3263	*/
	3264	r = ((mp_word) *tmpt) + r + r + ((mp_word) u);
	3265
	3266	/* store lower part */
	3267	*tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
	3268
	3269	/* get carry */
	3270	u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
	3271	}
	3272	/* propagate upwards */
	3273	while (u != ((mp_digit) 0)) {
	3274	r = ((mp_word) *tmpt) + ((mp_word) u);
	3275	*tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
	3276	u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
	3277	}
	3278	}
	3279
	3280	mp_clamp (&t);
	3281	mp_exch (&t, b);
	3282	mp_clear (&t);
	3283	return MP_OKAY;
	3284	}
	3285
	3286
	3287	/* multiplies \|a\| * \|b\| and only computes up to digs digits of result
	3288	* HAC pp. 595, Algorithm 14.12 Modified so you can control how
	3289	* many digits of output are created.
	3290	*/
	3291	int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
	3292	{
	3293	mp_int t;
	3294	int res, pa, pb, ix, iy;
	3295	mp_digit u;
	3296	mp_word r;
	3297	mp_digit tmpx, tmpt, tmpy;
	3298
	3299	/* can we use the fast multiplier? */
	3300	if (((digs) < MP_WARRAY) &&
	3301	MIN (a->used, b->used) <
	3302	(1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
	3303	return fast_s_mp_mul_digs (a, b, c, digs);
	3304	}
	3305
	3306	if ((res = mp_init_size (&t, digs)) != MP_OKAY) {
	3307	return res;
	3308	}
	3309	t.used = digs;
	3310
	3311	/* compute the digits of the product directly */
	3312	pa = a->used;
	3313	for (ix = 0; ix < pa; ix++) {
	3314	/* set the carry to zero */
	3315	u = 0;
	3316
	3317	/* limit ourselves to making digs digits of output */
	3318	pb = MIN (b->used, digs - ix);
	3319
	3320	/* setup some aliases */
	3321	/* copy of the digit from a used within the nested loop */
	3322	tmpx = a->dp[ix];
	3323
	3324	/* an alias for the destination shifted ix places */
	3325	tmpt = t.dp + ix;
	3326
	3327	/* an alias for the digits of b */
	3328	tmpy = b->dp;
	3329
	3330	/* compute the columns of the output and propagate the carry */
	3331	for (iy = 0; iy < pb; iy++) {
	3332	/* compute the column as a mp_word */
	3333	r = ((mp_word)*tmpt) +
	3334	((mp_word)tmpx) * ((mp_word)*tmpy++) +
	3335	((mp_word) u);
	3336
	3337	/* the new column is the lower part of the result */
	3338	*tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
	3339
	3340	/* get the carry word from the result */
	3341	u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
	3342	}
	3343	/* set carry if it is placed below digs */
	3344	if (ix + iy < digs) {
	3345	*tmpt = u;
	3346	}
	3347	}
	3348
	3349	mp_clamp (&t);
	3350	mp_exch (&t, c);
	3351
	3352	mp_clear (&t);
	3353	return MP_OKAY;
	3354	}
	3355
	3356
	3357	/*
	3358	* shifts with subtractions when the result is greater than b.
	3359	*
	3360	* The method is slightly modified to shift B unconditionally up to just under
	3361	* the leading bit of b. This saves a lot of multiple precision shifting.
	3362	*/
	3363	int mp_montgomery_calc_normalization (mp_int * a, mp_int * b)
	3364	{
	3365	int x, bits, res;
	3366
	3367	/* how many bits of last digit does b use */
	3368	bits = mp_count_bits (b) % DIGIT_BIT;
	3369
	3370	if (b->used > 1) {
	3371	if ((res = mp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1))
	3372	!= MP_OKAY) {
	3373	return res;
	3374	}
	3375	} else {
	3376	if ((res = mp_set(a, 1)) != MP_OKAY) {
	3377	return res;
	3378	}
	3379	bits = 1;
	3380	}
	3381
	3382	/* now compute C = A * B mod b */
	3383	for (x = bits - 1; x < (int)DIGIT_BIT; x++) {
	3384	if ((res = mp_mul_2 (a, a)) != MP_OKAY) {
	3385	return res;
	3386	}
	3387	if (mp_cmp_mag (a, b) != MP_LT) {
	3388	if ((res = s_mp_sub (a, b, a)) != MP_OKAY) {
	3389	return res;
	3390	}
	3391	}
	3392	}
	3393
	3394	return MP_OKAY;
	3395	}
	3396
	3397
	3398	#ifdef MP_LOW_MEM
	3399	#define TAB_SIZE 32
	3400	#else
	3401	#define TAB_SIZE 256
	3402	#endif
	3403
	3404	int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
	3405	{
	3406	mp_int M[TAB_SIZE], res, mu;
	3407	mp_digit buf;
	3408	int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
	3409	int (redux)(mp_int,mp_int,mp_int);
	3410
	3411	/* find window size */
	3412	x = mp_count_bits (X);
	3413	if (x <= 7) {
	3414	winsize = 2;
	3415	} else if (x <= 36) {
	3416	winsize = 3;
	3417	} else if (x <= 140) {
	3418	winsize = 4;
	3419	} else if (x <= 450) {
	3420	winsize = 5;
	3421	} else if (x <= 1303) {
	3422	winsize = 6;
	3423	} else if (x <= 3529) {
	3424	winsize = 7;
	3425	} else {
	3426	winsize = 8;
	3427	}
	3428
	3429	#ifdef MP_LOW_MEM
	3430	if (winsize > 5) {
	3431	winsize = 5;
	3432	}
	3433	#endif
	3434
	3435	/* init M array */
	3436	/* init first cell */
	3437	if ((err = mp_init(&M[1])) != MP_OKAY) {
	3438	return err;
	3439	}
	3440
	3441	/* now init the second half of the array */
	3442	for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
	3443	if ((err = mp_init(&M[x])) != MP_OKAY) {
	3444	for (y = 1<<(winsize-1); y < x; y++) {
	3445	mp_clear (&M[y]);
	3446	}
	3447	mp_clear(&M[1]);
	3448	return err;
	3449	}
	3450	}
	3451
	3452	/* create mu, used for Barrett reduction */
	3453	if ((err = mp_init (&mu)) != MP_OKAY) {
	3454	goto LBL_M;
	3455	}
	3456
	3457	if (redmode == 0) {
	3458	if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) {
	3459	goto LBL_MU;
	3460	}
	3461	redux = mp_reduce;
	3462	} else {
	3463	if ((err = mp_reduce_2k_setup_l (P, &mu)) != MP_OKAY) {
	3464	goto LBL_MU;
	3465	}
	3466	redux = mp_reduce_2k_l;
	3467	}
	3468
	3469	/* create M table
	3470	*
	3471	* The M table contains powers of the base,
	3472	* e.g. M[x] = G**x mod P
	3473	*
	3474	* The first half of the table is not
	3475	* computed though accept for M[0] and M[1]
	3476	*/
	3477	if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) {
	3478	goto LBL_MU;
	3479	}
	3480
	3481	/* compute the value at M[1<<(winsize-1)] by squaring
	3482	* M[1] (winsize-1) times
	3483	*/
	3484	if ((err = mp_copy (&M[1], &M[(mp_digit)(1 << (winsize - 1))])) != MP_OKAY) {
	3485	goto LBL_MU;
	3486	}
	3487
	3488	for (x = 0; x < (winsize - 1); x++) {
	3489	/* square it */
	3490	if ((err = mp_sqr (&M[(mp_digit)(1 << (winsize - 1))],
	3491	&M[(mp_digit)(1 << (winsize - 1))])) != MP_OKAY) {
	3492	goto LBL_MU;
	3493	}
	3494
	3495	/* reduce modulo P */
	3496	if ((err = redux (&M[(mp_digit)(1 << (winsize - 1))], P, &mu)) != MP_OKAY) {
	3497	goto LBL_MU;
	3498	}
	3499	}
	3500
	3501	/* create upper table, that is M[x] = M[x-1] * M[1] (mod P)
	3502	* for x = (2(winsize - 1) + 1) to (2winsize - 1)
	3503	*/
	3504	for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
	3505	if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
	3506	goto LBL_MU;
	3507	}
	3508	if ((err = redux (&M[x], P, &mu)) != MP_OKAY) {
	3509	goto LBL_MU;
	3510	}
	3511	}
	3512
	3513	/* setup result */
	3514	if ((err = mp_init (&res)) != MP_OKAY) {
	3515	goto LBL_MU;
	3516	}
	3517	if ((err = mp_set (&res, 1)) != MP_OKAY) {
	3518	goto LBL_MU;
	3519	}
	3520
	3521	/* set initial mode and bit cnt */
	3522	mode = 0;
	3523	bitcnt = 1;
	3524	buf = 0;
	3525	digidx = X->used - 1;
	3526	bitcpy = 0;
	3527	bitbuf = 0;
	3528
	3529	for (;;) {
	3530	/* grab next digit as required */
	3531	if (--bitcnt == 0) {
	3532	/* if digidx == -1 we are out of digits */
	3533	if (digidx == -1) {
	3534	break;
	3535	}
	3536	/* read next digit and reset the bitcnt */
	3537	buf = X->dp[digidx--];
	3538	bitcnt = (int) DIGIT_BIT;
	3539	}
	3540
	3541	/* grab the next msb from the exponent */
	3542	y = (int)(buf >> (mp_digit)(DIGIT_BIT - 1)) & 1;
	3543	buf <<= (mp_digit)1;
	3544
	3545	/* if the bit is zero and mode == 0 then we ignore it
	3546	* These represent the leading zero bits before the first 1 bit
	3547	* in the exponent. Technically this opt is not required but it
	3548	* does lower the # of trivial squaring/reductions used
	3549	*/
	3550	if (mode == 0 && y == 0) {
	3551	continue;
	3552	}
	3553
	3554	/* if the bit is zero and mode == 1 then we square */
	3555	if (mode == 1 && y == 0) {
	3556	if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
	3557	goto LBL_RES;
	3558	}
	3559	if ((err = redux (&res, P, &mu)) != MP_OKAY) {
	3560	goto LBL_RES;
	3561	}
	3562	continue;
	3563	}
	3564
	3565	/* else we add it to the window */
	3566	bitbuf \|= (y << (winsize - ++bitcpy));
	3567	mode = 2;
	3568
	3569	if (bitcpy == winsize) {
	3570	/* ok window is filled so square as required and multiply */
	3571	/* square first */
	3572	for (x = 0; x < winsize; x++) {
	3573	if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
	3574	goto LBL_RES;
	3575	}
	3576	if ((err = redux (&res, P, &mu)) != MP_OKAY) {
	3577	goto LBL_RES;
	3578	}
	3579	}
	3580
	3581	/* then multiply */
	3582	if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
	3583	goto LBL_RES;
	3584	}
	3585	if ((err = redux (&res, P, &mu)) != MP_OKAY) {
	3586	goto LBL_RES;
	3587	}
	3588
	3589	/* empty window and reset */
	3590	bitcpy = 0;
	3591	bitbuf = 0;
	3592	mode = 1;
	3593	}
	3594	}
	3595
	3596	/* if bits remain then square/multiply */
	3597	if (mode == 2 && bitcpy > 0) {
	3598	/* square then multiply if the bit is set */
	3599	for (x = 0; x < bitcpy; x++) {
	3600	if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
	3601	goto LBL_RES;
	3602	}
	3603	if ((err = redux (&res, P, &mu)) != MP_OKAY) {
	3604	goto LBL_RES;
	3605	}
	3606
	3607	bitbuf <<= 1;
	3608	if ((bitbuf & (1 << winsize)) != 0) {
	3609	/* then multiply */
	3610	if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
	3611	goto LBL_RES;
	3612	}
	3613	if ((err = redux (&res, P, &mu)) != MP_OKAY) {
	3614	goto LBL_RES;
	3615	}
	3616	}
	3617	}
	3618	}
	3619
	3620	mp_exch (&res, Y);
	3621	err = MP_OKAY;
	3622	LBL_RES:mp_clear (&res);
	3623	LBL_MU:mp_clear (&mu);
	3624	LBL_M:
	3625	mp_clear(&M[1]);
	3626	for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
	3627	mp_clear (&M[x]);
	3628	}
	3629	return err;
	3630	}
	3631
	3632
	3633	/* pre-calculate the value required for Barrett reduction
	3634	* For a given modulus "b" it calculates the value required in "a"
	3635	*/
	3636	int mp_reduce_setup (mp_int * a, mp_int * b)
	3637	{
	3638	int res;
	3639
	3640	if ((res = mp_2expt (a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) {
	3641	return res;
	3642	}
	3643	return mp_div (a, b, a, NULL);
	3644	}
	3645
	3646
	3647	/* reduces x mod m, assumes 0 < x < m**2, mu is
	3648	* precomputed via mp_reduce_setup.
	3649	* From HAC pp.604 Algorithm 14.42
	3650	*/
	3651	int mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
	3652	{
	3653	mp_int q;
	3654	int res, um = m->used;
	3655
	3656	/* q = x */
	3657	if ((res = mp_init_copy (&q, x)) != MP_OKAY) {
	3658	return res;
	3659	}
	3660
	3661	/* q1 = x / b*(k-1) /
	3662	mp_rshd (&q, um - 1);
	3663
	3664	/* according to HAC this optimization is ok */
	3665	if (((mp_word) um) > (((mp_digit)1) << (DIGIT_BIT - 1))) {
	3666	if ((res = mp_mul (&q, mu, &q)) != MP_OKAY) {
	3667	goto CLEANUP;
	3668	}
	3669	} else {
	3670	#ifdef BN_S_MP_MUL_HIGH_DIGS_C
	3671	if ((res = s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
	3672	goto CLEANUP;
	3673	}
	3674	#elif defined(BN_FAST_S_MP_MUL_HIGH_DIGS_C)
	3675	if ((res = fast_s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
	3676	goto CLEANUP;
	3677	}
	3678	#else
	3679	{
	3680	res = MP_VAL;
	3681	goto CLEANUP;
	3682	}
	3683	#endif
	3684	}
	3685
	3686	/* q3 = q2 / b*(k+1) /
	3687	mp_rshd (&q, um + 1);
	3688
	3689	/* x = x mod b*(k+1), quick (no division) /
	3690	if ((res = mp_mod_2d (x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) {
	3691	goto CLEANUP;
	3692	}
	3693
	3694	/* q = q * m mod b*(k+1), quick (no division) /
	3695	if ((res = s_mp_mul_digs (&q, m, &q, um + 1)) != MP_OKAY) {
	3696	goto CLEANUP;
	3697	}
	3698
	3699	/* x = x - q */
	3700	if ((res = mp_sub (x, &q, x)) != MP_OKAY) {
	3701	goto CLEANUP;
	3702	}
	3703
	3704	/* If x < 0, add b*(k+1) to it /
	3705	if (mp_cmp_d (x, 0) == MP_LT) {
	3706	if ((res = mp_set (&q, 1)) != MP_OKAY)
	3707	goto CLEANUP;
	3708	if ((res = mp_lshd (&q, um + 1)) != MP_OKAY)
	3709	goto CLEANUP;
	3710	if ((res = mp_add (x, &q, x)) != MP_OKAY)
	3711	goto CLEANUP;
	3712	}
	3713
	3714	/* Back off if it's too big */
	3715	while (mp_cmp (x, m) != MP_LT) {
	3716	if ((res = s_mp_sub (x, m, x)) != MP_OKAY) {
	3717	goto CLEANUP;
	3718	}
	3719	}
	3720
	3721	CLEANUP:
	3722	mp_clear (&q);
	3723
	3724	return res;
	3725	}
	3726
	3727
	3728	/* reduces a modulo n where n is of the form 2**p - d
	3729	This differs from reduce_2k since "d" can be larger
	3730	than a single digit.
	3731	*/
	3732	int mp_reduce_2k_l(mp_int a, mp_int n, mp_int *d)
	3733	{
	3734	mp_int q;
	3735	int p, res;
	3736
	3737	if ((res = mp_init(&q)) != MP_OKAY) {
	3738	return res;
	3739	}
	3740
	3741	p = mp_count_bits(n);
	3742	top:
	3743	/* q = a/2p, a = a mod 2p */
	3744	if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
	3745	goto ERR;
	3746	}
	3747
	3748	/* q = q * d */
	3749	if ((res = mp_mul(&q, d, &q)) != MP_OKAY) {
	3750	goto ERR;
	3751	}
	3752
	3753	/* a = a + q */
	3754	if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
	3755	goto ERR;
	3756	}
	3757
	3758	if (mp_cmp_mag(a, n) != MP_LT) {
	3759	if ((res = s_mp_sub(a, n, a)) != MP_OKAY) {
	3760	goto ERR;
	3761	}
	3762	goto top;
	3763	}
	3764
	3765	ERR:
	3766	mp_clear(&q);
	3767	return res;
	3768	}
	3769
	3770
	3771	/* determines the setup value */
	3772	int mp_reduce_2k_setup_l(mp_int a, mp_int d)
	3773	{
	3774	int res;
	3775	mp_int tmp;
	3776
	3777	if ((res = mp_init(&tmp)) != MP_OKAY) {
	3778	return res;
	3779	}
	3780
	3781	if ((res = mp_2expt(&tmp, mp_count_bits(a))) != MP_OKAY) {
	3782	goto ERR;
	3783	}
	3784
	3785	if ((res = s_mp_sub(&tmp, a, d)) != MP_OKAY) {
	3786	goto ERR;
	3787	}
	3788
	3789	ERR:
	3790	mp_clear(&tmp);
	3791	return res;
	3792	}
	3793
	3794
	3795	/* multiplies \|a\| * \|b\| and does not compute the lower digs digits
	3796	* [meant to get the higher part of the product]
	3797	*/
	3798	int s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
	3799	{
	3800	mp_int t;
	3801	int res, pa, pb, ix, iy;
	3802	mp_digit u;
	3803	mp_word r;
	3804	mp_digit tmpx, tmpt, tmpy;
	3805
	3806	/* can we use the fast multiplier? */
	3807	#ifdef BN_FAST_S_MP_MUL_HIGH_DIGS_C
	3808	if (((a->used + b->used + 1) < MP_WARRAY)
	3809	&& MIN (a->used, b->used) <
	3810	(1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
	3811	return fast_s_mp_mul_high_digs (a, b, c, digs);
	3812	}
	3813	#endif
	3814
	3815	if ((res = mp_init_size (&t, a->used + b->used + 1)) != MP_OKAY) {
	3816	return res;
	3817	}
	3818	t.used = a->used + b->used + 1;
	3819
	3820	pa = a->used;
	3821	pb = b->used;
	3822	for (ix = 0; ix < pa && a->dp; ix++) {
	3823	/* clear the carry */
	3824	u = 0;
	3825
	3826	/* left hand side of A[ix] * B[iy] */
	3827	tmpx = a->dp[ix];
	3828
	3829	/* alias to the address of where the digits will be stored */
	3830	tmpt = &(t.dp[digs]);
	3831
	3832	/* alias for where to read the right hand side from */
	3833	tmpy = b->dp + (digs - ix);
	3834
	3835	for (iy = digs - ix; iy < pb; iy++) {
	3836	/* calculate the double precision result */
	3837	r = ((mp_word)*tmpt) +
	3838	((mp_word)tmpx) * ((mp_word)*tmpy++) +
	3839	((mp_word) u);
	3840
	3841	/* get the lower part */
	3842	*tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
	3843
	3844	/* carry the carry */
	3845	u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
	3846	}
	3847	*tmpt = u;
	3848	}
	3849	mp_clamp (&t);
	3850	mp_exch (&t, c);
	3851	mp_clear (&t);
	3852	return MP_OKAY;
	3853	}
	3854
	3855
	3856	/* this is a modified version of fast_s_mul_digs that only produces
	3857	* output digits above digs. See the comments for fast_s_mul_digs
	3858	* to see how it works.
	3859	*
	3860	* This is used in the Barrett reduction since for one of the multiplications
	3861	* only the higher digits were needed. This essentially halves the work.
	3862	*
	3863	* Based on Algorithm 14.12 on pp.595 of HAC.
	3864	*/
	3865	int fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
	3866	{
	3867	int olduse, res, pa, ix, iz;
	3868	#ifdef WOLFSSL_SMALL_STACK
	3869	mp_digit* W; /* uses dynamic memory and slower */
	3870	#else
	3871	mp_digit W[MP_WARRAY];
	3872	#endif
	3873	mp_word _W;
	3874
	3875	if (a->dp == NULL) { /* JRB, avoid reading uninitialized values */
	3876	return MP_VAL;
	3877	}
	3878
	3879	/* grow the destination as required */
	3880	pa = a->used + b->used;
	3881	if (c->alloc < pa) {
	3882	if ((res = mp_grow (c, pa)) != MP_OKAY) {
	3883	return res;
	3884	}
	3885	}
	3886
	3887	if (pa > MP_WARRAY)
	3888	return MP_RANGE; /* TAO range check */
	3889
	3890	#ifdef WOLFSSL_SMALL_STACK
	3891	W = (mp_digit)XMALLOC(sizeof(mp_digit) MP_WARRAY, NULL, DYNAMIC_TYPE_BIGINT);
	3892	if (W == NULL)
	3893	return MP_MEM;
	3894	#endif
	3895
	3896	/* number of output digits to produce */
	3897	pa = a->used + b->used;
	3898	_W = 0;
	3899	for (ix = digs; ix < pa; ix++) { /* JRB, have a->dp check at top of function*/
	3900	int tx, ty, iy;
	3901	mp_digit tmpx, tmpy;
	3902
	3903	/* get offsets into the two bignums */
	3904	ty = MIN(b->used-1, ix);
	3905	tx = ix - ty;
	3906
	3907	/* setup temp aliases */
	3908	tmpx = a->dp + tx;
	3909	tmpy = b->dp + ty;
	3910
	3911	/* this is the number of times the loop will iterate, essentially its
	3912	while (tx++ < a->used && ty-- >= 0) { ... }
	3913	*/
	3914	iy = MIN(a->used-tx, ty+1);
	3915
	3916	/* execute loop */
	3917	for (iz = 0; iz < iy; iz++) {
	3918	_W += ((mp_word)tmpx++)((mp_word)*tmpy--);
	3919	}
	3920
	3921	/* store term */
	3922	W[ix] = (mp_digit)(((mp_digit)_W) & MP_MASK);
	3923
	3924	/* make next carry */
	3925	_W = _W >> ((mp_word)DIGIT_BIT);
	3926	}
	3927
	3928	/* setup dest */
	3929	olduse = c->used;
	3930	c->used = pa;
	3931
	3932	{
	3933	mp_digit *tmpc;
	3934
	3935	tmpc = c->dp + digs;
	3936	for (ix = digs; ix < pa; ix++) { /* TAO, <= could potentially overwrite */
	3937	/* now extract the previous digit [below the carry] */
	3938	*tmpc++ = W[ix];
	3939	}
	3940
	3941	/* clear unused digits [that existed in the old copy of c] */
	3942	for (; ix < olduse; ix++) {
	3943	*tmpc++ = 0;
	3944	}
	3945	}
	3946	mp_clamp (c);
	3947
	3948	#ifdef WOLFSSL_SMALL_STACK
	3949	XFREE(W, NULL, DYNAMIC_TYPE_BIGINT);
	3950	#endif
	3951
	3952	return MP_OKAY;
	3953	}
	3954
	3955
	3956	#ifndef MP_SET_CHUNK_BITS
	3957	#define MP_SET_CHUNK_BITS 4
	3958	#endif
	3959	int mp_set_int (mp_int * a, unsigned long b)
	3960	{
	3961	int x, res;
	3962
	3963	/* use direct mp_set if b is less than mp_digit max */
	3964	if (b < MP_DIGIT_MAX) {
	3965	return mp_set (a, (mp_digit)b);
	3966	}
	3967
	3968	mp_zero (a);
	3969
	3970	/* set chunk bits at a time */
	3971	for (x = 0; x < (int)(sizeof(b) * 8) / MP_SET_CHUNK_BITS; x++) {
	3972	/* shift the number up chunk bits */
	3973	if ((res = mp_mul_2d (a, MP_SET_CHUNK_BITS, a)) != MP_OKAY) {
	3974	return res;
	3975	}
	3976
	3977	/* OR in the top bits of the source */
	3978	a->dp[0] \|= (b >> ((sizeof(b) * 8) - MP_SET_CHUNK_BITS)) &
	3979	((1 << MP_SET_CHUNK_BITS) - 1);
	3980
	3981	/* shift the source up to the next chunk bits */
	3982	b <<= MP_SET_CHUNK_BITS;
	3983
	3984	/* ensure that digits are not clamped off */
	3985	a->used += 1;
	3986	}
	3987	mp_clamp (a);
	3988	return MP_OKAY;
	3989	}
	3990
	3991
	3992	#if defined(WOLFSSL_KEY_GEN) \|\| defined(HAVE_ECC) \|\| !defined(NO_RSA) \|\| \
	3993	!defined(NO_DSA) \| !defined(NO_DH)
	3994
	3995	/* c = a * a (mod b) */
	3996	int mp_sqrmod (mp_int * a, mp_int * b, mp_int * c)
	3997	{
	3998	int res;
	3999	mp_int t;
	4000
	4001	if ((res = mp_init (&t)) != MP_OKAY) {
	4002	return res;
	4003	}
	4004
	4005	if ((res = mp_sqr (a, &t)) != MP_OKAY) {
	4006	mp_clear (&t);
	4007	return res;
	4008	}
	4009	res = mp_mod (&t, b, c);
	4010	mp_clear (&t);
	4011	return res;
	4012	}
	4013
	4014	#endif
	4015
	4016
	4017	#if defined(HAVE_ECC) \|\| !defined(NO_PWDBASED) \|\| defined(WOLFSSL_SNIFFER) \|\| \
	4018	defined(WOLFSSL_HAVE_WOLFSCEP) \|\| defined(WOLFSSL_KEY_GEN) \|\| \
	4019	defined(OPENSSL_EXTRA) \|\| defined(WC_RSA_BLINDING) \|\| \
	4020	(!defined(NO_RSA) && !defined(NO_RSA_BOUNDS_CHECK))
	4021
	4022	/* single digit addition */
	4023	int mp_add_d (mp_int* a, mp_digit b, mp_int* c)
	4024	{
	4025	int res, ix, oldused;
	4026	mp_digit tmpa, tmpc, mu;
	4027
	4028	/* grow c as required */
	4029	if (c->alloc < a->used + 1) {
	4030	if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
	4031	return res;
	4032	}
	4033	}
	4034
	4035	/* if a is negative and \|a\| >= b, call c = \|a\| - b */
	4036	if (a->sign == MP_NEG && (a->used > 1 \|\| a->dp[0] >= b)) {
	4037	/* temporarily fix sign of a */
	4038	a->sign = MP_ZPOS;
	4039
	4040	/* c = \|a\| - b */
	4041	res = mp_sub_d(a, b, c);
	4042
	4043	/* fix sign */
	4044	a->sign = c->sign = MP_NEG;
	4045
	4046	/* clamp */
	4047	mp_clamp(c);
	4048
	4049	return res;
	4050	}
	4051
	4052	/* old number of used digits in c */
	4053	oldused = c->used;
	4054
	4055	/* sign always positive */
	4056	c->sign = MP_ZPOS;
	4057
	4058	/* source alias */
	4059	tmpa = a->dp;
	4060
	4061	/* destination alias */
	4062	tmpc = c->dp;
	4063
	4064	/* if a is positive */
	4065	if (a->sign == MP_ZPOS) {
	4066	/* add digit, after this we're propagating
	4067	* the carry.
	4068	*/
	4069	tmpc = tmpa++ + b;
	4070	mu = *tmpc >> DIGIT_BIT;
	4071	*tmpc++ &= MP_MASK;
	4072
	4073	/* now handle rest of the digits */
	4074	for (ix = 1; ix < a->used; ix++) {
	4075	tmpc = tmpa++ + mu;
	4076	mu = *tmpc >> DIGIT_BIT;
	4077	*tmpc++ &= MP_MASK;
	4078	}
	4079	/* set final carry */
	4080	if (ix < c->alloc) {
	4081	ix++;
	4082	*tmpc++ = mu;
	4083	}
	4084
	4085	/* setup size */
	4086	c->used = a->used + 1;
	4087	} else {
	4088	/* a was negative and \|a\| < b */
	4089	c->used = 1;
	4090
	4091	/* the result is a single digit */
	4092	if (a->used == 1) {
	4093	*tmpc++ = b - a->dp[0];
	4094	} else {
	4095	*tmpc++ = b;
	4096	}
	4097
	4098	/* setup count so the clearing of oldused
	4099	* can fall through correctly
	4100	*/
	4101	ix = 1;
	4102	}
	4103
	4104	/* now zero to oldused */
	4105	while (ix++ < oldused) {
	4106	*tmpc++ = 0;
	4107	}
	4108	mp_clamp(c);
	4109
	4110	return MP_OKAY;
	4111	}
	4112
	4113
	4114	/* single digit subtraction */
	4115	int mp_sub_d (mp_int * a, mp_digit b, mp_int * c)
	4116	{
	4117	mp_digit tmpa, tmpc, mu;
	4118	int res, ix, oldused;
	4119
	4120	/* grow c as required */
	4121	if (c->alloc < a->used + 1) {
	4122	if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
	4123	return res;
	4124	}
	4125	}
	4126
	4127	/* if a is negative just do an unsigned
	4128	* addition [with fudged signs]
	4129	*/
	4130	if (a->sign == MP_NEG) {
	4131	a->sign = MP_ZPOS;
	4132	res = mp_add_d(a, b, c);
	4133	a->sign = c->sign = MP_NEG;
	4134
	4135	/* clamp */
	4136	mp_clamp(c);
	4137
	4138	return res;
	4139	}
	4140
	4141	/* setup regs */
	4142	oldused = c->used;
	4143	tmpa = a->dp;
	4144	tmpc = c->dp;
	4145
	4146	/* if a <= b simply fix the single digit */
	4147	if ((a->used == 1 && a->dp[0] <= b) \|\| a->used == 0) {
	4148	if (a->used == 1) {
	4149	tmpc++ = b - tmpa;
	4150	} else {
	4151	*tmpc++ = b;
	4152	}
	4153	ix = 1;
	4154
	4155	/* negative/1digit */
	4156	c->sign = MP_NEG;
	4157	c->used = 1;
	4158	} else {
	4159	/* positive/size */
	4160	c->sign = MP_ZPOS;
	4161	c->used = a->used;
	4162
	4163	/* subtract first digit */
	4164	tmpc = tmpa++ - b;
	4165	mu = tmpc >> (sizeof(mp_digit) CHAR_BIT - 1);
	4166	*tmpc++ &= MP_MASK;
	4167
	4168	/* handle rest of the digits */
	4169	for (ix = 1; ix < a->used; ix++) {
	4170	tmpc = tmpa++ - mu;
	4171	mu = tmpc >> (sizeof(mp_digit) CHAR_BIT - 1);
	4172	*tmpc++ &= MP_MASK;
	4173	}
	4174	}
	4175
	4176	/* zero excess digits */
	4177	while (ix++ < oldused) {
	4178	*tmpc++ = 0;
	4179	}
	4180	mp_clamp(c);
	4181	return MP_OKAY;
	4182	}
	4183
	4184	#endif /* defined(HAVE_ECC) \|\| !defined(NO_PWDBASED) */
	4185
	4186
	4187	#if defined(WOLFSSL_KEY_GEN) \|\| defined(HAVE_COMP_KEY) \|\| defined(HAVE_ECC) \|\| \
	4188	defined(DEBUG_WOLFSSL) \|\| !defined(NO_RSA) \|\| !defined(NO_DSA) \|\| \
	4189	!defined(NO_DH)
	4190
	4191	static const int lnz[16] = {
	4192	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
	4193	};
	4194
	4195	/* Counts the number of lsbs which are zero before the first zero bit */
	4196	int mp_cnt_lsb(mp_int *a)
	4197	{
	4198	int x;
	4199	mp_digit q = 0, qq;
	4200
	4201	/* easy out */
	4202	if (mp_iszero(a) == MP_YES) {
	4203	return 0;
	4204	}
	4205
	4206	/* scan lower digits until non-zero */
	4207	for (x = 0; x < a->used && a->dp[x] == 0; x++) {}
	4208	if (a->dp)
	4209	q = a->dp[x];
	4210	x *= DIGIT_BIT;
	4211
	4212	/* now scan this digit until a 1 is found */
	4213	if ((q & 1) == 0) {
	4214	do {
	4215	qq = q & 15;
	4216	x += lnz[qq];
	4217	q >>= 4;
	4218	} while (qq == 0);
	4219	}
	4220	return x;
	4221	}
	4222
	4223
	4224
	4225
	4226	static int s_is_power_of_two(mp_digit b, int *p)
	4227	{
	4228	int x;
	4229
	4230	/* fast return if no power of two */
	4231	if ((b==0) \|\| (b & (b-1))) {
	4232	return 0;
	4233	}
	4234
	4235	for (x = 0; x < DIGIT_BIT; x++) {
	4236	if (b == (((mp_digit)1)<<x)) {
	4237	*p = x;
	4238	return 1;
	4239	}
	4240	}
	4241	return 0;
	4242	}
	4243
	4244	/* single digit division (based on routine from MPI) */
	4245	static int mp_div_d (mp_int * a, mp_digit b, mp_int * c, mp_digit * d)
	4246	{
	4247	mp_int q;
	4248	mp_word w;
	4249	mp_digit t;
	4250	int res = MP_OKAY, ix;
	4251
	4252	/* cannot divide by zero */
	4253	if (b == 0) {
	4254	return MP_VAL;
	4255	}
	4256
	4257	/* quick outs */
	4258	if (b == 1 \|\| mp_iszero(a) == MP_YES) {
	4259	if (d != NULL) {
	4260	*d = 0;
	4261	}
	4262	if (c != NULL) {
	4263	return mp_copy(a, c);
	4264	}
	4265	return MP_OKAY;
	4266	}
	4267
	4268	/* power of two ? */
	4269	if (s_is_power_of_two(b, &ix) == 1) {
	4270	if (d != NULL) {
	4271	*d = a->dp[0] & ((((mp_digit)1)<<ix) - 1);
	4272	}
	4273	if (c != NULL) {
	4274	return mp_div_2d(a, ix, c, NULL);
	4275	}
	4276	return MP_OKAY;
	4277	}
	4278
	4279	#ifdef BN_MP_DIV_3_C
	4280	/* three? */
	4281	if (b == 3) {
	4282	return mp_div_3(a, c, d);
	4283	}
	4284	#endif
	4285
	4286	/* no easy answer [c'est la vie]. Just division */
	4287	if (c != NULL) {
	4288	if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
	4289	return res;
	4290	}
	4291
	4292	q.used = a->used;
	4293	q.sign = a->sign;
	4294	}
	4295	else {
	4296	if ((res = mp_init(&q)) != MP_OKAY) {
	4297	return res;
	4298	}
	4299	}
	4300
	4301
	4302	w = 0;
	4303	for (ix = a->used - 1; ix >= 0; ix--) {
	4304	w = (w << ((mp_word)DIGIT_BIT)) \| ((mp_word)a->dp[ix]);
	4305
	4306	if (w >= b) {
	4307	t = (mp_digit)(w / b);
	4308	w -= ((mp_word)t) * ((mp_word)b);
	4309	} else {
	4310	t = 0;
	4311	}
	4312	if (c != NULL)
	4313	q.dp[ix] = (mp_digit)t;
	4314	}
	4315
	4316	if (d != NULL) {
	4317	*d = (mp_digit)w;
	4318	}
	4319
	4320	if (c != NULL) {
	4321	mp_clamp(&q);
	4322	mp_exch(&q, c);
	4323	}
	4324	mp_clear(&q);
	4325
	4326	return res;
	4327	}
	4328
	4329
	4330	int mp_mod_d (mp_int * a, mp_digit b, mp_digit * c)
	4331	{
	4332	return mp_div_d(a, b, NULL, c);
	4333	}
	4334
	4335	#endif /* WOLFSSL_KEY_GEN \|\| HAVE_COMP_KEY \|\| HAVE_ECC \|\| DEBUG_WOLFSSL */
	4336
	4337	#if defined(WOLFSSL_KEY_GEN) \|\| !defined(NO_DH) \|\| !defined(NO_DSA) \|\| !defined(NO_RSA)
	4338
	4339	const mp_digit ltm_prime_tab[PRIME_SIZE] = {
	4340	0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
	4341	0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
	4342	0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
	4343	0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F,
	4344	#ifndef MP_8BIT
	4345	0x0083,
	4346	0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
	4347	0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
	4348	0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
	4349	0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
	4350
	4351	0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
	4352	0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
	4353	0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
	4354	0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
	4355	0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
	4356	0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
	4357	0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
	4358	0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
	4359
	4360	0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
	4361	0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
	4362	0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
	4363	0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
	4364	0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
	4365	0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
	4366	0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
	4367	0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
	4368
	4369	0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
	4370	0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
	4371	0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
	4372	0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
	4373	0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
	4374	0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
	4375	0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
	4376	0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
	4377	#endif
	4378	};
	4379
	4380
	4381	/* Miller-Rabin test of "a" to the base of "b" as described in
	4382	* HAC pp. 139 Algorithm 4.24
	4383	*
	4384	* Sets result to 0 if definitely composite or 1 if probably prime.
	4385	* Randomly the chance of error is no more than 1/4 and often
	4386	* very much lower.
	4387	*/
	4388	static int mp_prime_miller_rabin (mp_int * a, mp_int * b, int *result)
	4389	{
	4390	mp_int n1, y, r;
	4391	int s, j, err;
	4392
	4393	/* default */
	4394	*result = MP_NO;
	4395
	4396	/* ensure b > 1 */
	4397	if (mp_cmp_d(b, 1) != MP_GT) {
	4398	return MP_VAL;
	4399	}
	4400
	4401	/* get n1 = a - 1 */
	4402	if ((err = mp_init_copy (&n1, a)) != MP_OKAY) {
	4403	return err;
	4404	}
	4405	if ((err = mp_sub_d (&n1, 1, &n1)) != MP_OKAY) {
	4406	goto LBL_N1;
	4407	}
	4408
	4409	/* set 2*s r = n1 */
	4410	if ((err = mp_init_copy (&r, &n1)) != MP_OKAY) {
	4411	goto LBL_N1;
	4412	}
	4413
	4414	/* count the number of least significant bits
	4415	* which are zero
	4416	*/
	4417	s = mp_cnt_lsb(&r);
	4418
	4419	/* now divide n - 1 by 2*s /
	4420	if ((err = mp_div_2d (&r, s, &r, NULL)) != MP_OKAY) {
	4421	goto LBL_R;
	4422	}
	4423
	4424	/* compute y = b*r mod a /
	4425	if ((err = mp_init (&y)) != MP_OKAY) {
	4426	goto LBL_R;
	4427	}
	4428	if ((err = mp_exptmod (b, &r, a, &y)) != MP_OKAY) {
	4429	goto LBL_Y;
	4430	}
	4431
	4432	/* if y != 1 and y != n1 do */
	4433	if (mp_cmp_d (&y, 1) != MP_EQ && mp_cmp (&y, &n1) != MP_EQ) {
	4434	j = 1;
	4435	/* while j <= s-1 and y != n1 */
	4436	while ((j <= (s - 1)) && mp_cmp (&y, &n1) != MP_EQ) {
	4437	if ((err = mp_sqrmod (&y, a, &y)) != MP_OKAY) {
	4438	goto LBL_Y;
	4439	}
	4440
	4441	/* if y == 1 then composite */
	4442	if (mp_cmp_d (&y, 1) == MP_EQ) {
	4443	goto LBL_Y;
	4444	}
	4445
	4446	++j;
	4447	}
	4448
	4449	/* if y != n1 then composite */
	4450	if (mp_cmp (&y, &n1) != MP_EQ) {
	4451	goto LBL_Y;
	4452	}
	4453	}
	4454
	4455	/* probably prime now */
	4456	*result = MP_YES;
	4457	LBL_Y:mp_clear (&y);
	4458	LBL_R:mp_clear (&r);
	4459	LBL_N1:mp_clear (&n1);
	4460	return err;
	4461	}
	4462
	4463
	4464	/* determines if an integers is divisible by one
	4465	* of the first PRIME_SIZE primes or not
	4466	*
	4467	* sets result to 0 if not, 1 if yes
	4468	*/
	4469	static int mp_prime_is_divisible (mp_int * a, int *result)
	4470	{
	4471	int err, ix;
	4472	mp_digit res;
	4473
	4474	/* default to not */
	4475	*result = MP_NO;
	4476
	4477	for (ix = 0; ix < PRIME_SIZE; ix++) {
	4478	/* what is a mod LBL_prime_tab[ix] */
	4479	if ((err = mp_mod_d (a, ltm_prime_tab[ix], &res)) != MP_OKAY) {
	4480	return err;
	4481	}
	4482
	4483	/* is the residue zero? */
	4484	if (res == 0) {
	4485	*result = MP_YES;
	4486	return MP_OKAY;
	4487	}
	4488	}
	4489
	4490	return MP_OKAY;
	4491	}
	4492
	4493	/*
	4494	* Sets result to 1 if probably prime, 0 otherwise
	4495	*/
	4496	int mp_prime_is_prime (mp_int * a, int t, int *result)
	4497	{
	4498	mp_int b;
	4499	int ix, err, res;
	4500
	4501	/* default to no */
	4502	*result = MP_NO;
	4503
	4504	/* valid value of t? */
	4505	if (t <= 0 \|\| t > PRIME_SIZE) {
	4506	return MP_VAL;
	4507	}
	4508
	4509	/* is the input equal to one of the primes in the table? */
	4510	for (ix = 0; ix < PRIME_SIZE; ix++) {
	4511	if (mp_cmp_d(a, ltm_prime_tab[ix]) == MP_EQ) {
	4512	*result = 1;
	4513	return MP_OKAY;
	4514	}
	4515	}
	4516
	4517	/* first perform trial division */
	4518	if ((err = mp_prime_is_divisible (a, &res)) != MP_OKAY) {
	4519	return err;
	4520	}
	4521
	4522	/* return if it was trivially divisible */
	4523	if (res == MP_YES) {
	4524	return MP_OKAY;
	4525	}
	4526
	4527	/* now perform the miller-rabin rounds */
	4528	if ((err = mp_init (&b)) != MP_OKAY) {
	4529	return err;
	4530	}
	4531
	4532	for (ix = 0; ix < t; ix++) {
	4533	/* set the prime */
	4534	if ((err = mp_set (&b, ltm_prime_tab[ix])) != MP_OKAY) {
	4535	goto LBL_B;
	4536	}
	4537
	4538	if ((err = mp_prime_miller_rabin (a, &b, &res)) != MP_OKAY) {
	4539	goto LBL_B;
	4540	}
	4541
	4542	if (res == MP_NO) {
	4543	goto LBL_B;
	4544	}
	4545	}
	4546
	4547	/* passed the test */
	4548	*result = MP_YES;
	4549	LBL_B:mp_clear (&b);
	4550	return err;
	4551	}
	4552
	4553
	4554	/*
	4555	* Sets result to 1 if probably prime, 0 otherwise
	4556	*/
	4557	int mp_prime_is_prime_ex (mp_int * a, int t, int result, WC_RNG rng)
	4558	{
	4559	mp_int b, c;
	4560	int ix, err, res;
	4561	byte* base = NULL;
	4562	word32 baseSz = 0;
	4563
	4564	/* default to no */
	4565	*result = MP_NO;
	4566
	4567	/* valid value of t? */
	4568	if (t <= 0 \|\| t > PRIME_SIZE) {
	4569	return MP_VAL;
	4570	}
	4571
	4572	/* is the input equal to one of the primes in the table? */
	4573	for (ix = 0; ix < PRIME_SIZE; ix++) {
	4574	if (mp_cmp_d(a, ltm_prime_tab[ix]) == MP_EQ) {
	4575	*result = MP_YES;
	4576	return MP_OKAY;
	4577	}
	4578	}
	4579
	4580	/* first perform trial division */
	4581	if ((err = mp_prime_is_divisible (a, &res)) != MP_OKAY) {
	4582	return err;
	4583	}
	4584
	4585	/* return if it was trivially divisible */
	4586	if (res == MP_YES) {
	4587	return MP_OKAY;
	4588	}
	4589
	4590	/* now perform the miller-rabin rounds */
	4591	if ((err = mp_init (&b)) != MP_OKAY) {
	4592	return err;
	4593	}
	4594	if ((err = mp_init (&c)) != MP_OKAY) {
	4595	mp_clear(&b);
	4596	return err;
	4597	}
	4598
	4599	baseSz = mp_count_bits(a);
	4600	baseSz = (baseSz / 8) + ((baseSz % 8) ? 1 : 0);
	4601
	4602	base = (byte*)XMALLOC(baseSz, NULL, DYNAMIC_TYPE_TMP_BUFFER);
	4603	if (base == NULL) {
	4604	err = MP_MEM;
	4605	goto LBL_B;
	4606	}
	4607
	4608	if ((err = mp_sub_d(a, 2, &c)) != MP_OKAY) {
	4609	goto LBL_B;
	4610	}
	4611
	4612	/* now do a miller rabin with up to t random numbers, this should
	4613	* give a (1/4)^t chance of a false prime. */
	4614	for (ix = 0; ix < t; ix++) {
	4615	/* Set a test candidate. */
	4616	if ((err = wc_RNG_GenerateBlock(rng, base, baseSz)) != 0) {
	4617	goto LBL_B;
	4618	}
	4619
	4620	if ((err = mp_read_unsigned_bin(&b, base, baseSz)) != MP_OKAY) {
	4621	goto LBL_B;
	4622	}
	4623
	4624	if (mp_cmp_d(&b, 2) != MP_GT \|\| mp_cmp(&b, &c) != MP_LT)
	4625	continue;
	4626
	4627	if ((err = mp_prime_miller_rabin (a, &b, &res)) != MP_OKAY) {
	4628	goto LBL_B;
	4629	}
	4630
	4631	if (res == MP_NO) {
	4632	goto LBL_B;
	4633	}
	4634	}
	4635
	4636	/* passed the test */
	4637	*result = MP_YES;
	4638	LBL_B:mp_clear (&b);
	4639	mp_clear (&c);
	4640	XFREE(base, NULL, DYNAMIC_TYPE_TMP_BUFFER);
	4641	return err;
	4642	}
	4643
	4644	#endif /* WOLFSSL_KEY_GEN NO_DH NO_DSA NO_RSA */
	4645
	4646	#ifdef WOLFSSL_KEY_GEN
	4647
	4648	static const int USE_BBS = 1;
	4649
	4650	int mp_rand_prime(mp_int* N, int len, WC_RNG* rng, void* heap)
	4651	{
	4652	int err, res, type;
	4653	byte* buf;
	4654
	4655	if (N == NULL \|\| rng == NULL)
	4656	return MP_VAL;
	4657
	4658	/* get type */
	4659	if (len < 0) {
	4660	type = USE_BBS;
	4661	len = -len;
	4662	} else {
	4663	type = 0;
	4664	}
	4665
	4666	/* allow sizes between 2 and 512 bytes for a prime size */
	4667	if (len < 2 \|\| len > 512) {
	4668	return MP_VAL;
	4669	}
	4670
	4671	/* allocate buffer to work with */
	4672	buf = (byte*)XMALLOC(len, heap, DYNAMIC_TYPE_RSA);
	4673	if (buf == NULL) {
	4674	return MP_MEM;
	4675	}
	4676	XMEMSET(buf, 0, len);
	4677
	4678	do {
	4679	#ifdef SHOW_GEN
	4680	printf(".");
	4681	fflush(stdout);
	4682	#endif
	4683	/* generate value */
	4684	err = wc_RNG_GenerateBlock(rng, buf, len);
	4685	if (err != 0) {
	4686	XFREE(buf, heap, DYNAMIC_TYPE_RSA);
	4687	return err;
	4688	}
	4689
	4690	/* munge bits */
	4691	buf[0] \|= 0x80 \| 0x40;
	4692	buf[len-1] \|= 0x01 \| ((type & USE_BBS) ? 0x02 : 0x00);
	4693
	4694	/* load value */
	4695	if ((err = mp_read_unsigned_bin(N, buf, len)) != MP_OKAY) {
	4696	XFREE(buf, heap, DYNAMIC_TYPE_RSA);
	4697	return err;
	4698	}
	4699
	4700	/* test */
	4701	/* Running Miller-Rabin up to 3 times gives us a 2^{-80} chance
	4702	* of a 1024-bit candidate being a false positive, when it is our
	4703	* prime candidate. (Note 4.49 of Handbook of Applied Cryptography.)
	4704	* Using 8 because we've always used 8. */
	4705	if ((err = mp_prime_is_prime_ex(N, 8, &res, rng)) != MP_OKAY) {
	4706	XFREE(buf, heap, DYNAMIC_TYPE_RSA);
	4707	return err;
	4708	}
	4709	} while (res == MP_NO);
	4710
	4711	XMEMSET(buf, 0, len);
	4712	XFREE(buf, heap, DYNAMIC_TYPE_RSA);
	4713
	4714	return MP_OKAY;
	4715	}
	4716
	4717
	4718	/* computes least common multiple as \|ab\|/(a, b) /
	4719	int mp_lcm (mp_int * a, mp_int * b, mp_int * c)
	4720	{
	4721	int res;
	4722	mp_int t1, t2;
	4723
	4724
	4725	if ((res = mp_init_multi (&t1, &t2, NULL, NULL, NULL, NULL)) != MP_OKAY) {
	4726	return res;
	4727	}
	4728
	4729	/* t1 = get the GCD of the two inputs */
	4730	if ((res = mp_gcd (a, b, &t1)) != MP_OKAY) {
	4731	goto LBL_T;
	4732	}
	4733
	4734	/* divide the smallest by the GCD */
	4735	if (mp_cmp_mag(a, b) == MP_LT) {
	4736	/* store quotient in t2 such that t2 * b is the LCM */
	4737	if ((res = mp_div(a, &t1, &t2, NULL)) != MP_OKAY) {
	4738	goto LBL_T;
	4739	}
	4740	res = mp_mul(b, &t2, c);
	4741	} else {
	4742	/* store quotient in t2 such that t2 * a is the LCM */
	4743	if ((res = mp_div(b, &t1, &t2, NULL)) != MP_OKAY) {
	4744	goto LBL_T;
	4745	}
	4746	res = mp_mul(a, &t2, c);
	4747	}
	4748
	4749	/* fix the sign to positive */
	4750	c->sign = MP_ZPOS;
	4751
	4752	LBL_T:
	4753	mp_clear(&t1);
	4754	mp_clear(&t2);
	4755	return res;
	4756	}
	4757
	4758
	4759
	4760	/* Greatest Common Divisor using the binary method */
	4761	int mp_gcd (mp_int * a, mp_int * b, mp_int * c)
	4762	{
	4763	mp_int u, v;
	4764	int k, u_lsb, v_lsb, res;
	4765
	4766	/* either zero than gcd is the largest */
	4767	if (mp_iszero (a) == MP_YES) {
	4768	return mp_abs (b, c);
	4769	}
	4770	if (mp_iszero (b) == MP_YES) {
	4771	return mp_abs (a, c);
	4772	}
	4773
	4774	/* get copies of a and b we can modify */
	4775	if ((res = mp_init_copy (&u, a)) != MP_OKAY) {
	4776	return res;
	4777	}
	4778
	4779	if ((res = mp_init_copy (&v, b)) != MP_OKAY) {
	4780	goto LBL_U;
	4781	}
	4782
	4783	/* must be positive for the remainder of the algorithm */
	4784	u.sign = v.sign = MP_ZPOS;
	4785
	4786	/* B1. Find the common power of two for u and v */
	4787	u_lsb = mp_cnt_lsb(&u);
	4788	v_lsb = mp_cnt_lsb(&v);
	4789	k = MIN(u_lsb, v_lsb);
	4790
	4791	if (k > 0) {
	4792	/* divide the power of two out */
	4793	if ((res = mp_div_2d(&u, k, &u, NULL)) != MP_OKAY) {
	4794	goto LBL_V;
	4795	}
	4796
	4797	if ((res = mp_div_2d(&v, k, &v, NULL)) != MP_OKAY) {
	4798	goto LBL_V;
	4799	}
	4800	}
	4801
	4802	/* divide any remaining factors of two out */
	4803	if (u_lsb != k) {
	4804	if ((res = mp_div_2d(&u, u_lsb - k, &u, NULL)) != MP_OKAY) {
	4805	goto LBL_V;
	4806	}
	4807	}
	4808
	4809	if (v_lsb != k) {
	4810	if ((res = mp_div_2d(&v, v_lsb - k, &v, NULL)) != MP_OKAY) {
	4811	goto LBL_V;
	4812	}
	4813	}
	4814
	4815	while (mp_iszero(&v) == MP_NO) {
	4816	/* make sure v is the largest */
	4817	if (mp_cmp_mag(&u, &v) == MP_GT) {
	4818	/* swap u and v to make sure v is >= u */
	4819	mp_exch(&u, &v);
	4820	}
	4821
	4822	/* subtract smallest from largest */
	4823	if ((res = s_mp_sub(&v, &u, &v)) != MP_OKAY) {
	4824	goto LBL_V;
	4825	}
	4826
	4827	/* Divide out all factors of two */
	4828	if ((res = mp_div_2d(&v, mp_cnt_lsb(&v), &v, NULL)) != MP_OKAY) {
	4829	goto LBL_V;
	4830	}
	4831	}
	4832
	4833	/* multiply by 2*k which we divided out at the beginning /
	4834	if ((res = mp_mul_2d (&u, k, c)) != MP_OKAY) {
	4835	goto LBL_V;
	4836	}
	4837	c->sign = MP_ZPOS;
	4838	res = MP_OKAY;
	4839	LBL_V:mp_clear (&u);
	4840	LBL_U:mp_clear (&v);
	4841	return res;
	4842	}
	4843
	4844	#endif /* WOLFSSL_KEY_GEN */
	4845
	4846
	4847	#if !defined(NO_DSA) \|\| defined(HAVE_ECC) \|\| defined(WOLFSSL_KEY_GEN) \|\| \
	4848	defined(HAVE_COMP_KEY) \|\| defined(WOLFSSL_DEBUG_MATH) \|\| \
	4849	defined(DEBUG_WOLFSSL) \|\| defined(OPENSSL_EXTRA)
	4850
	4851	/* chars used in radix conversions */
	4852	const char *mp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ\
	4853	abcdefghijklmnopqrstuvwxyz+/";
	4854	#endif
	4855
	4856	#if !defined(NO_DSA) \|\| defined(HAVE_ECC)
	4857	/* read a string [ASCII] in a given radix */
	4858	int mp_read_radix (mp_int * a, const char *str, int radix)
	4859	{
	4860	int y, res, neg;
	4861	char ch;
	4862
	4863	/* zero the digit bignum */
	4864	mp_zero(a);
	4865
	4866	/* make sure the radix is ok */
	4867	if (radix < MP_RADIX_BIN \|\| radix > MP_RADIX_MAX) {
	4868	return MP_VAL;
	4869	}
	4870
	4871	/* if the leading digit is a
	4872	* minus set the sign to negative.
	4873	*/
	4874	if (*str == '-') {
	4875	++str;
	4876	neg = MP_NEG;
	4877	} else {
	4878	neg = MP_ZPOS;
	4879	}
	4880
	4881	/* set the integer to the default of zero */
	4882	mp_zero (a);
	4883
	4884	/* process each digit of the string */
	4885	while (*str != '\0') {
	4886	/* if the radix <= 36 the conversion is case insensitive
	4887	* this allows numbers like 1AB and 1ab to represent the same value
	4888	* [e.g. in hex]
	4889	*/
	4890	ch = (radix <= 36) ? (char)XTOUPPER((unsigned char)str) : str;
	4891	for (y = 0; y < 64; y++) {
	4892	if (ch == mp_s_rmap[y]) {
	4893	break;
	4894	}
	4895	}
	4896
	4897	/* if the char was found in the map
	4898	* and is less than the given radix add it
	4899	* to the number, otherwise exit the loop.
	4900	*/
	4901	if (y < radix) {
	4902	if ((res = mp_mul_d (a, (mp_digit) radix, a)) != MP_OKAY) {
	4903	return res;
	4904	}
	4905	if ((res = mp_add_d (a, (mp_digit) y, a)) != MP_OKAY) {
	4906	return res;
	4907	}
	4908	} else {
	4909	break;
	4910	}
	4911	++str;
	4912	}
	4913
	4914	/* if digit in isn't null term, then invalid character was found */
	4915	if (*str != '\0') {
	4916	mp_zero (a);
	4917	return MP_VAL;
	4918	}
	4919
	4920	/* set the sign only if a != 0 */
	4921	if (mp_iszero(a) != MP_YES) {
	4922	a->sign = neg;
	4923	}
	4924	return MP_OKAY;
	4925	}
	4926	#endif /* !defined(NO_DSA) \|\| defined(HAVE_ECC) */
	4927
	4928	#ifdef WC_MP_TO_RADIX
	4929
	4930	/* returns size of ASCII representation */
	4931	int mp_radix_size (mp_int a, int radix, int size)
	4932	{
	4933	int res, digs;
	4934	mp_int t;
	4935	mp_digit d;
	4936
	4937	*size = 0;
	4938
	4939	/* special case for binary */
	4940	if (radix == MP_RADIX_BIN) {
	4941	*size = mp_count_bits (a) + (a->sign == MP_NEG ? 1 : 0) + 1;
	4942	return MP_OKAY;
	4943	}
	4944
	4945	/* make sure the radix is in range */
	4946	if (radix < MP_RADIX_BIN \|\| radix > MP_RADIX_MAX) {
	4947	return MP_VAL;
	4948	}
	4949
	4950	if (mp_iszero(a) == MP_YES) {
	4951	*size = 2;
	4952	return MP_OKAY;
	4953	}
	4954
	4955	/* digs is the digit count */
	4956	digs = 0;
	4957
	4958	/* if it's negative add one for the sign */
	4959	if (a->sign == MP_NEG) {
	4960	++digs;
	4961	}
	4962
	4963	/* init a copy of the input */
	4964	if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
	4965	return res;
	4966	}
	4967
	4968	/* force temp to positive */
	4969	t.sign = MP_ZPOS;
	4970
	4971	/* fetch out all of the digits */
	4972	while (mp_iszero (&t) == MP_NO) {
	4973	if ((res = mp_div_d (&t, (mp_digit) radix, &t, &d)) != MP_OKAY) {
	4974	mp_clear (&t);
	4975	return res;
	4976	}
	4977	++digs;
	4978	}
	4979	mp_clear (&t);
	4980
	4981	/* return digs + 1, the 1 is for the NULL byte that would be required. */
	4982	*size = digs + 1;
	4983	return MP_OKAY;
	4984	}
	4985
	4986	/* stores a bignum as a ASCII string in a given radix (2..64) */
	4987	int mp_toradix (mp_int a, char str, int radix)
	4988	{
	4989	int res, digs;
	4990	mp_int t;
	4991	mp_digit d;
	4992	char *_s = str;
	4993
	4994	/* check range of the radix */
	4995	if (radix < MP_RADIX_BIN \|\| radix > MP_RADIX_MAX) {
	4996	return MP_VAL;
	4997	}
	4998
	4999	/* quick out if its zero */
	5000	if (mp_iszero(a) == MP_YES) {
	5001	*str++ = '0';
	5002	*str = '\0';
	5003	return MP_OKAY;
	5004	}
	5005
	5006	if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
	5007	return res;
	5008	}
	5009
	5010	/* if it is negative output a - */
	5011	if (t.sign == MP_NEG) {
	5012	++_s;
	5013	*str++ = '-';
	5014	t.sign = MP_ZPOS;
	5015	}
	5016
	5017	digs = 0;
	5018	while (mp_iszero (&t) == MP_NO) {
	5019	if ((res = mp_div_d (&t, (mp_digit) radix, &t, &d)) != MP_OKAY) {
	5020	mp_clear (&t);
	5021	return res;
	5022	}
	5023	*str++ = mp_s_rmap[d];
	5024	++digs;
	5025	}
	5026
	5027	/* reverse the digits of the string. In this case _s points
	5028	* to the first digit [excluding the sign] of the number]
	5029	*/
	5030	bn_reverse ((unsigned char *)_s, digs);
	5031
	5032	/* append a NULL so the string is properly terminated */
	5033	*str = '\0';
	5034
	5035	mp_clear (&t);
	5036	return MP_OKAY;
	5037	}
	5038
	5039	#ifdef WOLFSSL_DEBUG_MATH
	5040	void mp_dump(const char* desc, mp_int* a, byte verbose)
	5041	{
	5042	char *buffer;
	5043	int size = a->alloc;
	5044
	5045	buffer = (char)XMALLOC(size sizeof(mp_digit) * 2, NULL, DYNAMIC_TYPE_TMP_BUFFER);
	5046	if (buffer == NULL) {
	5047	return;
	5048	}
	5049
	5050	printf("%s: ptr=%p, used=%d, sign=%d, size=%d, mpd=%d\n",
	5051	desc, a, a->used, a->sign, size, (int)sizeof(mp_digit));
	5052
	5053	mp_tohex(a, buffer);
	5054	printf(" %s\n ", buffer);
	5055
	5056	if (verbose) {
	5057	int i;
	5058	for(i=0; i<a->alloc * (int)sizeof(mp_digit); i++) {
	5059	printf("%02x ", (((byte)a->dp) + i));
	5060	}
	5061	printf("\n");
	5062	}
	5063
	5064	XFREE(buffer, NULL, DYNAMIC_TYPE_TMP_BUFFER);
	5065	}
	5066	#endif /* WOLFSSL_DEBUG_MATH */
	5067
	5068	#endif /* WC_MP_TO_RADIX */
	5069
	5070	#endif /* WOLFSSL_SP_MATH */
	5071
	5072	#endif /* USE_FAST_MATH */
	5073
	5074	#endif /* NO_BIG_INT */
	5075

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: azure_iot_hub/trunk/wolfssl-3.15.7/wolfcrypt/src/integer.c@ 389

Download in other formats: