Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Normal
Revision Log

integer.c@ 464

Last change on this file since 464 was 464, checked in by coas-nagasima, 3 years ago
WolfSSLとAzure IoT SDKを更新
Property svn:eol-style set to `native` Property svn:mime-type set to `text/x-csrc;charset=UTF-8`
File size: 118.4 KB

Rev	Line
[457]	1	/* integer.c
	2	*
	3	* Copyright (C) 2006-2020 wolfSSL Inc.
	4	*
	5	* This file is part of wolfSSL.
	6	*
	7	* wolfSSL is free software; you can redistribute it and/or modify
	8	* it under the terms of the GNU General Public License as published by
	9	* the Free Software Foundation; either version 2 of the License, or
	10	* (at your option) any later version.
	11	*
	12	* wolfSSL is distributed in the hope that it will be useful,
	13	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	14	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	15	* GNU General Public License for more details.
	16	*
	17	* You should have received a copy of the GNU General Public License
	18	* along with this program; if not, write to the Free Software
	19	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
	20	*/
	21
	22
	23
	24	/*
	25	* Based on public domain LibTomMath 0.38 by Tom St Denis, tomstdenis@iahu.ca,
	26	* http://math.libtomcrypt.com
	27	*/
	28
	29
	30	#ifdef HAVE_CONFIG_H
	31	#include <config.h>
	32	#endif
	33
	34	/* in case user set USE_FAST_MATH there */
	35	#include <wolfssl/wolfcrypt/settings.h>
	36
	37	#ifdef NO_INLINE
	38	#include <wolfssl/wolfcrypt/misc.h>
	39	#else
	40	#define WOLFSSL_MISC_INCLUDED
	41	#include <wolfcrypt/src/misc.c>
	42	#endif
	43
	44	#ifndef NO_BIG_INT
	45
	46	#ifndef USE_FAST_MATH
	47
	48	#ifndef WOLFSSL_SP_MATH
	49
	50	#include <wolfssl/wolfcrypt/integer.h>
	51
	52	#if defined(FREESCALE_LTC_TFM)
	53	#include <wolfssl/wolfcrypt/port/nxp/ksdk_port.h>
	54	#endif
	55	#ifdef WOLFSSL_DEBUG_MATH
	56	#include <stdio.h>
	57	#endif
	58
	59	#ifdef SHOW_GEN
	60	#ifndef NO_STDIO_FILESYSTEM
	61	#include <stdio.h>
	62	#endif
	63	#endif
	64
	65	#if defined(WOLFSSL_HAVE_SP_RSA) \|\| defined(WOLFSSL_HAVE_SP_DH)
	66	#ifdef __cplusplus
	67	extern "C" {
	68	#endif
	69	WOLFSSL_LOCAL int sp_ModExp_1024(mp_int* base, mp_int* exp, mp_int* mod,
	70	mp_int* res);
	71	WOLFSSL_LOCAL int sp_ModExp_1536(mp_int* base, mp_int* exp, mp_int* mod,
	72	mp_int* res);
	73	WOLFSSL_LOCAL int sp_ModExp_2048(mp_int* base, mp_int* exp, mp_int* mod,
	74	mp_int* res);
	75	WOLFSSL_LOCAL int sp_ModExp_3072(mp_int* base, mp_int* exp, mp_int* mod,
	76	mp_int* res);
	77	WOLFSSL_LOCAL int sp_ModExp_4096(mp_int* base, mp_int* exp, mp_int* mod,
	78	mp_int* res);
	79	#ifdef __cplusplus
	80	} /* extern "C" */
	81	#endif
	82	#endif
	83
	84	/* reverse an array, used for radix code */
	85	static void
	86	bn_reverse (unsigned char *s, int len)
	87	{
	88	int ix, iy;
	89	unsigned char t;
	90
	91	ix = 0;
	92	iy = len - 1;
	93	while (ix < iy) {
	94	t = s[ix];
	95	s[ix] = s[iy];
	96	s[iy] = t;
	97	++ix;
	98	--iy;
	99	}
	100	}
	101
	102	/* math settings check */
	103	word32 CheckRunTimeSettings(void)
	104	{
	105	return CTC_SETTINGS;
	106	}
	107
	108
	109	/* handle up to 6 inits */
	110	int mp_init_multi(mp_int* a, mp_int* b, mp_int* c, mp_int* d, mp_int* e,
	111	mp_int* f)
	112	{
	113	int res = MP_OKAY;
	114
	115	if (a) XMEMSET(a, 0, sizeof(mp_int));
	116	if (b) XMEMSET(b, 0, sizeof(mp_int));
	117	if (c) XMEMSET(c, 0, sizeof(mp_int));
	118	if (d) XMEMSET(d, 0, sizeof(mp_int));
	119	if (e) XMEMSET(e, 0, sizeof(mp_int));
	120	if (f) XMEMSET(f, 0, sizeof(mp_int));
	121
	122	if (a && ((res = mp_init(a)) != MP_OKAY))
	123	return res;
	124
	125	if (b && ((res = mp_init(b)) != MP_OKAY)) {
	126	mp_clear(a);
	127	return res;
	128	}
	129
	130	if (c && ((res = mp_init(c)) != MP_OKAY)) {
	131	mp_clear(a); mp_clear(b);
	132	return res;
	133	}
	134
	135	if (d && ((res = mp_init(d)) != MP_OKAY)) {
	136	mp_clear(a); mp_clear(b); mp_clear(c);
	137	return res;
	138	}
	139
	140	if (e && ((res = mp_init(e)) != MP_OKAY)) {
	141	mp_clear(a); mp_clear(b); mp_clear(c); mp_clear(d);
	142	return res;
	143	}
	144
	145	if (f && ((res = mp_init(f)) != MP_OKAY)) {
	146	mp_clear(a); mp_clear(b); mp_clear(c); mp_clear(d); mp_clear(e);
	147	return res;
	148	}
	149
	150	return res;
	151	}
	152
	153
	154	/* init a new mp_int */
	155	int mp_init (mp_int * a)
	156	{
	157	/* Safeguard against passing in a null pointer */
	158	if (a == NULL)
	159	return MP_VAL;
	160
	161	/* defer allocation until mp_grow */
	162	a->dp = NULL;
	163
	164	/* set the used to zero, allocated digits to the default precision
	165	* and sign to positive */
	166	a->used = 0;
	167	a->alloc = 0;
	168	a->sign = MP_ZPOS;
	169	#ifdef HAVE_WOLF_BIGINT
	170	wc_bigint_init(&a->raw);
	171	#endif
	172
	173	return MP_OKAY;
	174	}
	175
	176
	177	/* clear one (frees) */
	178	void mp_clear (mp_int * a)
	179	{
	180	int i;
	181
	182	if (a == NULL)
	183	return;
	184
	185	/* only do anything if a hasn't been freed previously */
	186	if (a->dp != NULL) {
	187	/* first zero the digits */
	188	for (i = 0; i < a->used; i++) {
	189	a->dp[i] = 0;
	190	}
	191
	192	/* free ram */
	193	mp_free(a);
	194
	195	/* reset members to make debugging easier */
	196	a->alloc = a->used = 0;
	197	a->sign = MP_ZPOS;
	198	}
	199	}
	200
	201	void mp_free (mp_int * a)
	202	{
	203	/* only do anything if a hasn't been freed previously */
	204	if (a->dp != NULL) {
	205	/* free ram */
	206	XFREE(a->dp, 0, DYNAMIC_TYPE_BIGINT);
	207	a->dp = NULL;
	208	}
	209
	210	#ifdef HAVE_WOLF_BIGINT
	211	wc_bigint_free(&a->raw);
	212	#endif
	213	}
	214
	215	void mp_forcezero(mp_int * a)
	216	{
	217	if (a == NULL)
	218	return;
	219
	220	/* only do anything if a hasn't been freed previously */
	221	if (a->dp != NULL) {
	222	/* force zero the used digits */
	223	ForceZero(a->dp, a->used * sizeof(mp_digit));
	224	#ifdef HAVE_WOLF_BIGINT
	225	wc_bigint_zero(&a->raw);
	226	#endif
	227	/* free ram */
	228	mp_free(a);
	229
	230	/* reset members to make debugging easier */
	231	a->alloc = a->used = 0;
	232	a->sign = MP_ZPOS;
	233	}
	234
	235	a->sign = MP_ZPOS;
	236	a->used = 0;
	237	}
	238
	239
	240	/* get the size for an unsigned equivalent */
	241	int mp_unsigned_bin_size (mp_int * a)
	242	{
	243	int size = mp_count_bits (a);
	244	return (size / 8 + ((size & 7) != 0 ? 1 : 0));
	245	}
	246
	247
	248	/* returns the number of bits in an int */
	249	int mp_count_bits (mp_int * a)
	250	{
	251	int r;
	252	mp_digit q;
	253
	254	/* shortcut */
	255	if (a->used == 0) {
	256	return 0;
	257	}
	258
	259	/* get number of digits and add that */
	260	r = (a->used - 1) * DIGIT_BIT;
	261
	262	/* take the last digit and count the bits in it */
	263	q = a->dp[a->used - 1];
	264	while (q > ((mp_digit) 0)) {
	265	++r;
	266	q >>= ((mp_digit) 1);
	267	}
	268	return r;
	269	}
	270
	271
	272	int mp_leading_bit (mp_int * a)
	273	{
[464]	274	int c = mp_count_bits(a);
[457]	275
[464]	276	if (c == 0) return 0;
	277	return (c % 8) == 0;
[457]	278	}
	279
	280	int mp_to_unsigned_bin_at_pos(int x, mp_int t, unsigned char b)
	281	{
	282	int res = 0;
	283	while (mp_iszero(t) == MP_NO) {
	284	#ifndef MP_8BIT
	285	b[x++] = (unsigned char) (t->dp[0] & 255);
	286	#else
	287	b[x++] = (unsigned char) (t->dp[0] \| ((t->dp[1] & 0x01) << 7));
	288	#endif
	289	if ((res = mp_div_2d (t, 8, t, NULL)) != MP_OKAY) {
	290	return res;
	291	}
	292	res = x;
	293	}
	294	return res;
	295	}
	296
	297	/* store in unsigned [big endian] format */
	298	int mp_to_unsigned_bin (mp_int * a, unsigned char *b)
	299	{
	300	int x, res;
	301	mp_int t;
	302
	303	if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
	304	return res;
	305	}
	306
	307	x = mp_to_unsigned_bin_at_pos(0, &t, b);
	308	if (x < 0) {
	309	mp_clear(&t);
	310	return x;
	311	}
	312
	313	bn_reverse (b, x);
	314	mp_clear (&t);
	315	return res;
	316	}
	317
	318	int mp_to_unsigned_bin_len(mp_int * a, unsigned char *b, int c)
	319	{
	320	int i, len;
	321
	322	len = mp_unsigned_bin_size(a);
	323
[464]	324	if (len > c) {
	325	return MP_VAL;
	326	}
	327
[457]	328	/* pad front w/ zeros to match length */
[464]	329	for (i = 0; i < c - len; i++) {
	330	b[i] = 0x00;
	331	}
[457]	332	return mp_to_unsigned_bin(a, b + i);
	333	}
	334
	335	/* creates "a" then copies b into it */
	336	int mp_init_copy (mp_int * a, mp_int * b)
	337	{
	338	int res;
	339
	340	if ((res = mp_init_size (a, b->used)) != MP_OKAY) {
	341	return res;
	342	}
	343
	344	if((res = mp_copy (b, a)) != MP_OKAY) {
	345	mp_clear(a);
	346	}
	347
	348	return res;
	349	}
	350
	351
	352	/* copy, b = a */
	353	int mp_copy (mp_int * a, mp_int * b)
	354	{
	355	int res, n;
	356
	357	/* Safeguard against passing in a null pointer */
	358	if (a == NULL \|\| b == NULL)
	359	return MP_VAL;
	360
	361	/* if dst == src do nothing */
	362	if (a == b) {
	363	return MP_OKAY;
	364	}
	365
	366	/* grow dest */
	367	if (b->alloc < a->used \|\| b->alloc == 0) {
	368	if ((res = mp_grow (b, a->used)) != MP_OKAY) {
	369	return res;
	370	}
	371	}
	372
	373	/* zero b and copy the parameters over */
	374	{
	375	mp_digit tmpa, tmpb;
	376
	377	/* pointer aliases */
	378
	379	/* source */
	380	tmpa = a->dp;
	381
	382	/* destination */
	383	tmpb = b->dp;
	384
	385	/* copy all the digits */
	386	for (n = 0; n < a->used; n++) {
	387	tmpb++ = tmpa++;
	388	}
	389
	390	/* clear high digits */
	391	for (; n < b->used && b->dp; n++) {
	392	*tmpb++ = 0;
	393	}
	394	}
	395
	396	/* copy used count and sign */
	397	b->used = a->used;
	398	b->sign = a->sign;
	399	return MP_OKAY;
	400	}
	401
	402
	403	/* grow as required */
	404	int mp_grow (mp_int * a, int size)
	405	{
	406	int i;
	407	mp_digit *tmp;
	408
	409	/* if the alloc size is smaller alloc more ram */
	410	if (a->alloc < size \|\| size == 0) {
	411	/* ensure there are always at least MP_PREC digits extra on top */
	412	size += (MP_PREC * 2) - (size % MP_PREC);
	413
	414	/* reallocate the array a->dp
	415	*
	416	* We store the return in a temporary variable
	417	* in case the operation failed we don't want
	418	* to overwrite the dp member of a.
	419	*/
	420	tmp = OPT_CAST(mp_digit) XREALLOC (a->dp, sizeof (mp_digit) * size, NULL,
	421	DYNAMIC_TYPE_BIGINT);
	422	if (tmp == NULL) {
	423	/* reallocation failed but "a" is still valid [can be freed] */
	424	return MP_MEM;
	425	}
	426
	427	/* reallocation succeeded so set a->dp */
	428	a->dp = tmp;
	429
	430	/* zero excess digits */
	431	i = a->alloc;
	432	a->alloc = size;
	433	for (; i < a->alloc; i++) {
	434	a->dp[i] = 0;
	435	}
	436	}
	437	return MP_OKAY;
	438	}
	439
	440
	441	/* shift right by a certain bit count (store quotient in c, optional
	442	remainder in d) */
	443	int mp_div_2d (mp_int * a, int b, mp_int * c, mp_int * d)
	444	{
	445	int D, res;
	446	mp_int t;
	447
	448
	449	/* if the shift count is <= 0 then we do no work */
	450	if (b <= 0) {
	451	res = mp_copy (a, c);
	452	if (d != NULL) {
	453	mp_zero (d);
	454	}
	455	return res;
	456	}
	457
	458	if ((res = mp_init (&t)) != MP_OKAY) {
	459	return res;
	460	}
	461
	462	/* get the remainder */
	463	if (d != NULL) {
	464	if ((res = mp_mod_2d (a, b, &t)) != MP_OKAY) {
	465	mp_clear (&t);
	466	return res;
	467	}
	468	}
	469
	470	/* copy */
	471	if ((res = mp_copy (a, c)) != MP_OKAY) {
	472	mp_clear (&t);
	473	return res;
	474	}
	475
	476	/* shift by as many digits in the bit count */
	477	if (b >= (int)DIGIT_BIT) {
	478	mp_rshd (c, b / DIGIT_BIT);
	479	}
	480
	481	/* shift any bit count < DIGIT_BIT */
	482	D = (b % DIGIT_BIT);
	483	if (D != 0) {
	484	mp_rshb(c, D);
	485	}
	486	mp_clamp (c);
	487	if (d != NULL) {
	488	mp_exch (&t, d);
	489	}
	490	mp_clear (&t);
	491	return MP_OKAY;
	492	}
	493
	494
	495	/* set to zero */
	496	void mp_zero (mp_int * a)
	497	{
	498	int n;
	499	mp_digit *tmp;
	500
	501	if (a == NULL)
	502	return;
	503
	504	a->sign = MP_ZPOS;
	505	a->used = 0;
	506
	507	tmp = a->dp;
	508	for (n = 0; n < a->alloc; n++) {
	509	*tmp++ = 0;
	510	}
	511	}
	512
	513
	514	/* trim unused digits
	515	*
	516	* This is used to ensure that leading zero digits are
	517	* trimmed and the leading "used" digit will be non-zero
	518	* Typically very fast. Also fixes the sign if there
	519	* are no more leading digits
	520	*/
	521	void mp_clamp (mp_int * a)
	522	{
	523	/* decrease used while the most significant digit is
	524	* zero.
	525	*/
	526	while (a->used > 0 && a->dp[a->used - 1] == 0) {
	527	--(a->used);
	528	}
	529
	530	/* reset the sign flag if used == 0 */
	531	if (a->used == 0) {
	532	a->sign = MP_ZPOS;
	533	}
	534	}
	535
	536
	537	/* swap the elements of two integers, for cases where you can't simply swap the
	538	* mp_int pointers around
	539	*/
	540	void mp_exch (mp_int * a, mp_int * b)
	541	{
	542	mp_int t;
	543
	544	t = *a;
	545	a = b;
	546	*b = t;
	547	}
	548
[464]	549	int mp_cond_swap_ct (mp_int * a, mp_int * b, int c, int m)
	550	{
	551	(void)c;
	552	if (m == 1)
	553	mp_exch(a, b);
	554	return MP_OKAY;
	555	}
[457]	556
[464]	557
[457]	558	/* shift right a certain number of bits */
	559	void mp_rshb (mp_int *c, int x)
	560	{
	561	mp_digit *tmpc, mask, shift;
	562	mp_digit r, rr;
	563	mp_digit D = x;
	564
[464]	565	/* shifting by a negative number not supported, and shifting by
	566	* zero changes nothing.
	567	*/
	568	if (x <= 0) return;
	569
	570	/* shift digits first if needed */
	571	if (x >= DIGIT_BIT) {
	572	mp_rshd(c, x / DIGIT_BIT);
	573	/* recalculate number of bits to shift */
	574	D = x % DIGIT_BIT;
	575	/* check if any more shifting needed */
	576	if (D == 0) return;
	577	}
	578
	579	/* zero shifted is always zero */
[457]	580	if (mp_iszero(c)) return;
	581
	582	/* mask */
	583	mask = (((mp_digit)1) << D) - 1;
	584
	585	/* shift for lsb */
	586	shift = DIGIT_BIT - D;
	587
	588	/* alias */
	589	tmpc = c->dp + (c->used - 1);
	590
	591	/* carry */
	592	r = 0;
	593	for (x = c->used - 1; x >= 0; x--) {
	594	/* get the lower bits of this word in a temp */
	595	rr = *tmpc & mask;
	596
	597	/* shift the current word and mix in the carry bits from previous word */
	598	tmpc = (tmpc >> D) \| (r << shift);
	599	--tmpc;
	600
	601	/* set the carry to the carry bits of the current word found above */
	602	r = rr;
	603	}
	604	mp_clamp(c);
	605	}
	606
	607
	608	/* shift right a certain amount of digits */
	609	void mp_rshd (mp_int * a, int b)
	610	{
	611	int x;
	612
	613	/* if b <= 0 then ignore it */
	614	if (b <= 0) {
	615	return;
	616	}
	617
	618	/* if b > used then simply zero it and return */
	619	if (a->used <= b) {
	620	mp_zero (a);
	621	return;
	622	}
	623
	624	{
	625	mp_digit bottom, top;
	626
	627	/* shift the digits down */
	628
	629	/* bottom */
	630	bottom = a->dp;
	631
	632	/* top [offset into digits] */
	633	top = a->dp + b;
	634
	635	/* this is implemented as a sliding window where
	636	* the window is b-digits long and digits from
	637	* the top of the window are copied to the bottom
	638	*
	639	* e.g.
	640
	641	b-2 \| b-1 \| b0 \| b1 \| b2 \| ... \| bb \| ---->
	642	/\ \| ---->
	643	\-------------------/ ---->
	644	*/
	645	for (x = 0; x < (a->used - b); x++) {
	646	bottom++ = top++;
	647	}
	648
	649	/* zero the top digits */
	650	for (; x < a->used; x++) {
	651	*bottom++ = 0;
	652	}
	653	}
	654
	655	/* remove excess digits */
	656	a->used -= b;
	657	}
	658
	659
	660	/* calc a value mod 2*b /
	661	int mp_mod_2d (mp_int * a, int b, mp_int * c)
	662	{
	663	int x, res;
	664
	665	/* if b is <= 0 then zero the int */
	666	if (b <= 0) {
	667	mp_zero (c);
	668	return MP_OKAY;
	669	}
	670
	671	/* if the modulus is larger than the value than return */
	672	if (b >= (int) (a->used * DIGIT_BIT)) {
	673	res = mp_copy (a, c);
	674	return res;
	675	}
	676
	677	/* copy */
	678	if ((res = mp_copy (a, c)) != MP_OKAY) {
	679	return res;
	680	}
	681
	682	/* zero digits above the last digit of the modulus */
	683	for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) {
	684	c->dp[x] = 0;
	685	}
	686	/* clear the digit that is not completely outside/inside the modulus */
[464]	687	x = DIGIT_BIT - (b % DIGIT_BIT);
	688	if (x != DIGIT_BIT) {
	689	c->dp[b / DIGIT_BIT] &= ~((mp_digit)0) >> (x + ((sizeof(mp_digit)*8) - DIGIT_BIT));
	690	}
[457]	691	mp_clamp (c);
	692	return MP_OKAY;
	693	}
	694
	695
	696	/* reads a unsigned char array, assumes the msb is stored first [big endian] */
	697	int mp_read_unsigned_bin (mp_int * a, const unsigned char *b, int c)
	698	{
	699	int res;
[464]	700	int digits_needed;
[457]	701
[464]	702	while (c > 0 && b[0] == 0) {
	703	c--;
	704	b++;
	705	}
	706
	707	digits_needed = ((c * CHAR_BIT) + DIGIT_BIT - 1) / DIGIT_BIT;
	708
	709	/* make sure there are enough digits available */
	710	if (a->alloc < digits_needed) {
	711	if ((res = mp_grow(a, digits_needed)) != MP_OKAY) {
[457]	712	return res;
	713	}
	714	}
	715
	716	/* zero the int */
	717	mp_zero (a);
	718
	719	/* read the bytes in */
	720	while (c-- > 0) {
	721	if ((res = mp_mul_2d (a, 8, a)) != MP_OKAY) {
	722	return res;
	723	}
	724
	725	#ifndef MP_8BIT
	726	a->dp[0] \|= *b++;
[464]	727	if (a->used == 0)
	728	a->used = 1;
[457]	729	#else
	730	a->dp[0] = (*b & MP_MASK);
	731	a->dp[1] \|= ((*b++ >> 7U) & 1);
[464]	732	if (a->used == 0)
	733	a->used = 2;
[457]	734	#endif
	735	}
	736	mp_clamp (a);
	737	return MP_OKAY;
	738	}
	739
	740
	741	/* shift left by a certain bit count */
	742	int mp_mul_2d (mp_int * a, int b, mp_int * c)
	743	{
	744	mp_digit d;
	745	int res;
	746
	747	/* copy */
	748	if (a != c) {
	749	if ((res = mp_copy (a, c)) != MP_OKAY) {
	750	return res;
	751	}
	752	}
	753
	754	if (c->alloc < (int)(c->used + b/DIGIT_BIT + 1)) {
	755	if ((res = mp_grow (c, c->used + b / DIGIT_BIT + 1)) != MP_OKAY) {
	756	return res;
	757	}
	758	}
	759
	760	/* shift by as many digits in the bit count */
	761	if (b >= (int)DIGIT_BIT) {
	762	if ((res = mp_lshd (c, b / DIGIT_BIT)) != MP_OKAY) {
	763	return res;
	764	}
	765	}
	766
	767	/* shift any bit count < DIGIT_BIT */
	768	d = (mp_digit) (b % DIGIT_BIT);
	769	if (d != 0) {
	770	mp_digit *tmpc, shift, mask, r, rr;
	771	int x;
	772
	773	/* bitmask for carries */
	774	mask = (((mp_digit)1) << d) - 1;
	775
	776	/* shift for msbs */
	777	shift = DIGIT_BIT - d;
	778
	779	/* alias */
	780	tmpc = c->dp;
	781
	782	/* carry */
	783	r = 0;
	784	for (x = 0; x < c->used; x++) {
	785	/* get the higher bits of the current word */
	786	rr = (*tmpc >> shift) & mask;
	787
	788	/* shift the current word and OR in the carry */
	789	tmpc = (mp_digit)(((tmpc << d) \| r) & MP_MASK);
	790	++tmpc;
	791
	792	/* set the carry to the carry bits of the current word */
	793	r = rr;
	794	}
	795
	796	/* set final carry */
	797	if (r != 0) {
	798	c->dp[(c->used)++] = r;
	799	}
	800	}
	801	mp_clamp (c);
	802	return MP_OKAY;
	803	}
	804
	805
	806	/* shift left a certain amount of digits */
	807	int mp_lshd (mp_int * a, int b)
	808	{
	809	int x, res;
	810
	811	/* if its less than zero return */
	812	if (b <= 0) {
	813	return MP_OKAY;
	814	}
	815
	816	/* grow to fit the new digits */
	817	if (a->alloc < a->used + b) {
	818	if ((res = mp_grow (a, a->used + b)) != MP_OKAY) {
	819	return res;
	820	}
	821	}
	822
	823	{
	824	mp_digit top, bottom;
	825
	826	/* increment the used by the shift amount then copy upwards */
	827	a->used += b;
	828
	829	/* top */
	830	top = a->dp + a->used - 1;
	831
	832	/* base */
	833	bottom = a->dp + a->used - 1 - b;
	834
	835	/* much like mp_rshd this is implemented using a sliding window
	836	* except the window goes the other way around. Copying from
	837	* the bottom to the top. see bn_mp_rshd.c for more info.
	838	*/
	839	for (x = a->used - 1; x >= b; x--) {
	840	top-- = bottom--;
	841	}
	842
	843	/* zero the lower digits */
	844	top = a->dp;
	845	for (x = 0; x < b; x++) {
	846	*top++ = 0;
	847	}
	848	}
	849	return MP_OKAY;
	850	}
	851
	852
	853	/* this is a shell function that calls either the normal or Montgomery
	854	* exptmod functions. Originally the call to the montgomery code was
	855	* embedded in the normal function but that wasted a lot of stack space
	856	* for nothing (since 99% of the time the Montgomery code would be called)
	857	*/
	858	#if defined(FREESCALE_LTC_TFM)
	859	int wolfcrypt_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
	860	#else
	861	int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
	862	#endif
	863	{
	864	int dr;
	865
	866	/* modulus P must be positive */
	867	if (mp_iszero(P) \|\| P->sign == MP_NEG) {
	868	return MP_VAL;
	869	}
	870	if (mp_isone(P)) {
	871	mp_set(Y, 0);
	872	return MP_OKAY;
	873	}
	874	if (mp_iszero(X)) {
	875	mp_set(Y, 1);
	876	return MP_OKAY;
	877	}
	878	if (mp_iszero(G)) {
	879	mp_set(Y, 0);
	880	return MP_OKAY;
	881	}
	882
	883	/* if exponent X is negative we have to recurse */
	884	if (X->sign == MP_NEG) {
	885	#ifdef BN_MP_INVMOD_C
	886	mp_int tmpG, tmpX;
	887	int err;
	888
	889	/* first compute 1/G mod P */
	890	if ((err = mp_init(&tmpG)) != MP_OKAY) {
	891	return err;
	892	}
	893	if ((err = mp_invmod(G, P, &tmpG)) != MP_OKAY) {
	894	mp_clear(&tmpG);
	895	return err;
	896	}
	897
	898	/* now get \|X\| */
	899	if ((err = mp_init(&tmpX)) != MP_OKAY) {
	900	mp_clear(&tmpG);
	901	return err;
	902	}
	903	if ((err = mp_abs(X, &tmpX)) != MP_OKAY) {
	904	mp_clear(&tmpG);
	905	mp_clear(&tmpX);
	906	return err;
	907	}
	908
	909	/* and now compute (1/G)\|X\| instead of GX [X < 0] */
	910	err = mp_exptmod(&tmpG, &tmpX, P, Y);
	911	mp_clear(&tmpG);
	912	mp_clear(&tmpX);
	913	return err;
	914	#else
	915	/* no invmod */
	916	return MP_VAL;
	917	#endif
	918	}
	919
	920	#ifdef BN_MP_EXPTMOD_BASE_2
	921	if (G->used == 1 && G->dp[0] == 2) {
	922	return mp_exptmod_base_2(X, P, Y);
	923	}
	924	#endif
	925
	926	/* modified diminished radix reduction */
	927	#if defined(BN_MP_REDUCE_IS_2K_L_C) && defined(BN_MP_REDUCE_2K_L_C) && \
	928	defined(BN_S_MP_EXPTMOD_C)
	929	if (mp_reduce_is_2k_l(P) == MP_YES) {
	930	return s_mp_exptmod(G, X, P, Y, 1);
	931	}
	932	#endif
	933
	934	#ifdef BN_MP_DR_IS_MODULUS_C
	935	/* is it a DR modulus? */
	936	dr = mp_dr_is_modulus(P);
	937	#else
	938	/* default to no */
	939	dr = 0;
	940	#endif
	941
	942	(void)dr;
	943
	944	#ifdef BN_MP_REDUCE_IS_2K_C
	945	/* if not, is it a unrestricted DR modulus? */
	946	if (dr == 0) {
	947	dr = mp_reduce_is_2k(P) << 1;
	948	}
	949	#endif
	950
	951	/* if the modulus is odd or dr != 0 use the montgomery method */
	952	#ifdef BN_MP_EXPTMOD_FAST_C
	953	if (mp_isodd (P) == MP_YES \|\| dr != 0) {
	954	return mp_exptmod_fast (G, X, P, Y, dr);
	955	} else {
	956	#endif
	957	#ifdef BN_S_MP_EXPTMOD_C
	958	/* otherwise use the generic Barrett reduction technique */
	959	return s_mp_exptmod (G, X, P, Y, 0);
	960	#else
	961	/* no exptmod for evens */
	962	return MP_VAL;
	963	#endif
	964	#ifdef BN_MP_EXPTMOD_FAST_C
	965	}
	966	#endif
	967	}
	968
	969	int mp_exptmod_ex (mp_int * G, mp_int * X, int digits, mp_int * P, mp_int * Y)
	970	{
	971	(void)digits;
	972	return mp_exptmod(G, X, P, Y);
	973	}
	974
	975	/* b = \|a\|
	976	*
	977	* Simple function copies the input and fixes the sign to positive
	978	*/
	979	int mp_abs (mp_int * a, mp_int * b)
	980	{
	981	int res;
	982
	983	/* copy a to b */
	984	if (a != b) {
	985	if ((res = mp_copy (a, b)) != MP_OKAY) {
	986	return res;
	987	}
	988	}
	989
	990	/* force the sign of b to positive */
	991	b->sign = MP_ZPOS;
	992
	993	return MP_OKAY;
	994	}
	995
	996
	997	/* hac 14.61, pp608 */
	998	#if defined(FREESCALE_LTC_TFM)
	999	int wolfcrypt_mp_invmod(mp_int * a, mp_int * b, mp_int * c)
	1000	#else
	1001	int mp_invmod (mp_int * a, mp_int * b, mp_int * c)
	1002	#endif
	1003	{
	1004	/* b cannot be negative or zero, and can not divide by 0 (1/a mod b) */
	1005	if (b->sign == MP_NEG \|\| mp_iszero(b) == MP_YES \|\| mp_iszero(a) == MP_YES) {
	1006	return MP_VAL;
	1007	}
	1008
	1009	#ifdef BN_FAST_MP_INVMOD_C
	1010	/* if the modulus is odd we can use a faster routine instead */
	1011	if ((mp_isodd(b) == MP_YES) && (mp_cmp_d(b, 1) != MP_EQ)) {
	1012	return fast_mp_invmod (a, b, c);
	1013	}
	1014	#endif
	1015
	1016	#ifdef BN_MP_INVMOD_SLOW_C
	1017	return mp_invmod_slow(a, b, c);
	1018	#else
	1019	return MP_VAL;
	1020	#endif
	1021	}
	1022
	1023
	1024	/* computes the modular inverse via binary extended euclidean algorithm,
	1025	* that is c = 1/a mod b
	1026	*
	1027	* Based on slow invmod except this is optimized for the case where b is
	1028	* odd as per HAC Note 14.64 on pp. 610
	1029	*/
	1030	int fast_mp_invmod (mp_int * a, mp_int * b, mp_int * c)
	1031	{
	1032	mp_int x, y, u, v, B, D;
	1033	int res, neg, loop_check = 0;
	1034
	1035	/* 2. [modified] b must be odd */
	1036	if (mp_iseven (b) == MP_YES) {
	1037	return MP_VAL;
	1038	}
	1039
	1040	/* init all our temps */
	1041	if ((res = mp_init_multi(&x, &y, &u, &v, &B, &D)) != MP_OKAY) {
	1042	return res;
	1043	}
	1044
	1045	/* x == modulus, y == value to invert */
	1046	if ((res = mp_copy (b, &x)) != MP_OKAY) {
	1047	goto LBL_ERR;
	1048	}
	1049
	1050	/* we need y = \|a\| */
	1051	if ((res = mp_mod (a, b, &y)) != MP_OKAY) {
	1052	goto LBL_ERR;
	1053	}
	1054
	1055	/* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
	1056	if ((res = mp_copy (&x, &u)) != MP_OKAY) {
	1057	goto LBL_ERR;
	1058	}
	1059	if ((res = mp_copy (&y, &v)) != MP_OKAY) {
	1060	goto LBL_ERR;
	1061	}
	1062	if ((res = mp_set (&D, 1)) != MP_OKAY) {
	1063	goto LBL_ERR;
	1064	}
	1065
	1066	top:
	1067	/* 4. while u is even do */
	1068	while (mp_iseven (&u) == MP_YES) {
	1069	/* 4.1 u = u/2 */
	1070	if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
	1071	goto LBL_ERR;
	1072	}
	1073	/* 4.2 if B is odd then */
	1074	if (mp_isodd (&B) == MP_YES) {
	1075	if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
	1076	goto LBL_ERR;
	1077	}
	1078	}
	1079	/* B = B/2 */
	1080	if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
	1081	goto LBL_ERR;
	1082	}
	1083	}
	1084
	1085	/* 5. while v is even do */
	1086	while (mp_iseven (&v) == MP_YES) {
	1087	/* 5.1 v = v/2 */
	1088	if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
	1089	goto LBL_ERR;
	1090	}
	1091	/* 5.2 if D is odd then */
	1092	if (mp_isodd (&D) == MP_YES) {
	1093	/* D = (D-x)/2 */
	1094	if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
	1095	goto LBL_ERR;
	1096	}
	1097	}
	1098	/* D = D/2 */
	1099	if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
	1100	goto LBL_ERR;
	1101	}
	1102	}
	1103
	1104	/* 6. if u >= v then */
	1105	if (mp_cmp (&u, &v) != MP_LT) {
	1106	/* u = u - v, B = B - D */
	1107	if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
	1108	goto LBL_ERR;
	1109	}
	1110
	1111	if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
	1112	goto LBL_ERR;
	1113	}
	1114	} else {
	1115	/* v - v - u, D = D - B */
	1116	if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
	1117	goto LBL_ERR;
	1118	}
	1119
	1120	if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
	1121	goto LBL_ERR;
	1122	}
	1123	}
	1124
	1125	/* if not zero goto step 4 */
	1126	if (mp_iszero (&u) == MP_NO) {
	1127	if (++loop_check > MAX_INVMOD_SZ) {
	1128	res = MP_VAL;
	1129	goto LBL_ERR;
	1130	}
	1131	goto top;
	1132	}
	1133
	1134	/* now a = C, b = D, gcd == gv /
	1135
	1136	/* if v != 1 then there is no inverse */
	1137	if (mp_cmp_d (&v, 1) != MP_EQ) {
	1138	res = MP_VAL;
	1139	goto LBL_ERR;
	1140	}
	1141
	1142	/* b is now the inverse */
	1143	neg = a->sign;
	1144	while (D.sign == MP_NEG) {
	1145	if ((res = mp_add (&D, b, &D)) != MP_OKAY) {
	1146	goto LBL_ERR;
	1147	}
	1148	}
	1149	/* too big */
	1150	while (mp_cmp_mag(&D, b) != MP_LT) {
	1151	if ((res = mp_sub(&D, b, &D)) != MP_OKAY) {
	1152	goto LBL_ERR;
	1153	}
	1154	}
	1155	mp_exch (&D, c);
	1156	c->sign = neg;
	1157	res = MP_OKAY;
	1158
	1159	LBL_ERR:mp_clear(&x);
	1160	mp_clear(&y);
	1161	mp_clear(&u);
	1162	mp_clear(&v);
	1163	mp_clear(&B);
	1164	mp_clear(&D);
	1165	return res;
	1166	}
	1167
	1168
	1169	/* hac 14.61, pp608 */
	1170	int mp_invmod_slow (mp_int * a, mp_int * b, mp_int * c)
	1171	{
	1172	mp_int x, y, u, v, A, B, C, D;
	1173	int res;
	1174
	1175	/* b cannot be negative */
	1176	if (b->sign == MP_NEG \|\| mp_iszero(b) == MP_YES) {
	1177	return MP_VAL;
	1178	}
	1179
	1180	/* init temps */
	1181	if ((res = mp_init_multi(&x, &y, &u, &v,
	1182	&A, &B)) != MP_OKAY) {
	1183	return res;
	1184	}
	1185
	1186	/* init rest of tmps temps */
	1187	if ((res = mp_init_multi(&C, &D, 0, 0, 0, 0)) != MP_OKAY) {
	1188	mp_clear(&x);
	1189	mp_clear(&y);
	1190	mp_clear(&u);
	1191	mp_clear(&v);
	1192	mp_clear(&A);
	1193	mp_clear(&B);
	1194	return res;
	1195	}
	1196
	1197	/* x = a, y = b */
	1198	if ((res = mp_mod(a, b, &x)) != MP_OKAY) {
	1199	goto LBL_ERR;
	1200	}
	1201	if (mp_isone(&x)) {
	1202	mp_set(c, 1);
	1203	res = MP_OKAY;
	1204	goto LBL_ERR;
	1205	}
	1206	if ((res = mp_copy (b, &y)) != MP_OKAY) {
	1207	goto LBL_ERR;
	1208	}
	1209
	1210	/* 2. [modified] if x,y are both even then return an error! */
	1211	if (mp_iseven (&x) == MP_YES && mp_iseven (&y) == MP_YES) {
	1212	res = MP_VAL;
	1213	goto LBL_ERR;
	1214	}
	1215
	1216	/* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
	1217	if ((res = mp_copy (&x, &u)) != MP_OKAY) {
	1218	goto LBL_ERR;
	1219	}
	1220	if ((res = mp_copy (&y, &v)) != MP_OKAY) {
	1221	goto LBL_ERR;
	1222	}
	1223	if ((res = mp_set (&A, 1)) != MP_OKAY) {
	1224	goto LBL_ERR;
	1225	}
	1226	if ((res = mp_set (&D, 1)) != MP_OKAY) {
	1227	goto LBL_ERR;
	1228	}
	1229
	1230	top:
	1231	/* 4. while u is even do */
	1232	while (mp_iseven (&u) == MP_YES) {
	1233	/* 4.1 u = u/2 */
	1234	if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
	1235	goto LBL_ERR;
	1236	}
	1237	/* 4.2 if A or B is odd then */
	1238	if (mp_isodd (&A) == MP_YES \|\| mp_isodd (&B) == MP_YES) {
	1239	/* A = (A+y)/2, B = (B-x)/2 */
	1240	if ((res = mp_add (&A, &y, &A)) != MP_OKAY) {
	1241	goto LBL_ERR;
	1242	}
	1243	if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
	1244	goto LBL_ERR;
	1245	}
	1246	}
	1247	/* A = A/2, B = B/2 */
	1248	if ((res = mp_div_2 (&A, &A)) != MP_OKAY) {
	1249	goto LBL_ERR;
	1250	}
	1251	if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
	1252	goto LBL_ERR;
	1253	}
	1254	}
	1255
	1256	/* 5. while v is even do */
	1257	while (mp_iseven (&v) == MP_YES) {
	1258	/* 5.1 v = v/2 */
	1259	if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
	1260	goto LBL_ERR;
	1261	}
	1262	/* 5.2 if C or D is odd then */
	1263	if (mp_isodd (&C) == MP_YES \|\| mp_isodd (&D) == MP_YES) {
	1264	/* C = (C+y)/2, D = (D-x)/2 */
	1265	if ((res = mp_add (&C, &y, &C)) != MP_OKAY) {
	1266	goto LBL_ERR;
	1267	}
	1268	if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
	1269	goto LBL_ERR;
	1270	}
	1271	}
	1272	/* C = C/2, D = D/2 */
	1273	if ((res = mp_div_2 (&C, &C)) != MP_OKAY) {
	1274	goto LBL_ERR;
	1275	}
	1276	if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
	1277	goto LBL_ERR;
	1278	}
	1279	}
	1280
	1281	/* 6. if u >= v then */
	1282	if (mp_cmp (&u, &v) != MP_LT) {
	1283	/* u = u - v, A = A - C, B = B - D */
	1284	if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
	1285	goto LBL_ERR;
	1286	}
	1287
	1288	if ((res = mp_sub (&A, &C, &A)) != MP_OKAY) {
	1289	goto LBL_ERR;
	1290	}
	1291
	1292	if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
	1293	goto LBL_ERR;
	1294	}
	1295	} else {
	1296	/* v - v - u, C = C - A, D = D - B */
	1297	if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
	1298	goto LBL_ERR;
	1299	}
	1300
	1301	if ((res = mp_sub (&C, &A, &C)) != MP_OKAY) {
	1302	goto LBL_ERR;
	1303	}
	1304
	1305	if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
	1306	goto LBL_ERR;
	1307	}
	1308	}
	1309
	1310	/* if not zero goto step 4 */
	1311	if (mp_iszero (&u) == MP_NO)
	1312	goto top;
	1313
	1314	/* now a = C, b = D, gcd == gv /
	1315
	1316	/* if v != 1 then there is no inverse */
	1317	if (mp_cmp_d (&v, 1) != MP_EQ) {
	1318	res = MP_VAL;
	1319	goto LBL_ERR;
	1320	}
	1321
	1322	/* if its too low */
	1323	while (mp_cmp_d(&C, 0) == MP_LT) {
	1324	if ((res = mp_add(&C, b, &C)) != MP_OKAY) {
	1325	goto LBL_ERR;
	1326	}
	1327	}
	1328
	1329	/* too big */
	1330	while (mp_cmp_mag(&C, b) != MP_LT) {
	1331	if ((res = mp_sub(&C, b, &C)) != MP_OKAY) {
	1332	goto LBL_ERR;
	1333	}
	1334	}
	1335
	1336	/* C is now the inverse */
	1337	mp_exch (&C, c);
	1338	res = MP_OKAY;
	1339	LBL_ERR:mp_clear(&x);
	1340	mp_clear(&y);
	1341	mp_clear(&u);
	1342	mp_clear(&v);
	1343	mp_clear(&A);
	1344	mp_clear(&B);
	1345	mp_clear(&C);
	1346	mp_clear(&D);
	1347	return res;
	1348	}
	1349
	1350
	1351	/* compare magnitude of two ints (unsigned) */
	1352	int mp_cmp_mag (mp_int * a, mp_int * b)
	1353	{
	1354	int n;
	1355	mp_digit tmpa, tmpb;
	1356
	1357	/* compare based on # of non-zero digits */
	1358	if (a->used > b->used) {
	1359	return MP_GT;
	1360	}
	1361
	1362	if (a->used < b->used) {
	1363	return MP_LT;
	1364	}
	1365
	1366	/* alias for a */
	1367	tmpa = a->dp + (a->used - 1);
	1368
	1369	/* alias for b */
	1370	tmpb = b->dp + (a->used - 1);
	1371
	1372	/* compare based on digits */
	1373	for (n = 0; n < a->used; ++n, --tmpa, --tmpb) {
	1374	if (tmpa > tmpb) {
	1375	return MP_GT;
	1376	}
	1377
	1378	if (tmpa < tmpb) {
	1379	return MP_LT;
	1380	}
	1381	}
	1382	return MP_EQ;
	1383	}
	1384
	1385
	1386	/* compare two ints (signed)*/
	1387	int mp_cmp (mp_int * a, mp_int * b)
	1388	{
	1389	/* compare based on sign */
	1390	if (a->sign != b->sign) {
	1391	if (a->sign == MP_NEG) {
	1392	return MP_LT;
	1393	} else {
	1394	return MP_GT;
	1395	}
	1396	}
	1397
	1398	/* compare digits */
	1399	if (a->sign == MP_NEG) {
	1400	/* if negative compare opposite direction */
	1401	return mp_cmp_mag(b, a);
	1402	} else {
	1403	return mp_cmp_mag(a, b);
	1404	}
	1405	}
	1406
	1407
	1408	/* compare a digit */
	1409	int mp_cmp_d(mp_int * a, mp_digit b)
	1410	{
	1411	/* special case for zero*/
	1412	if (a->used == 0 && b == 0)
	1413	return MP_EQ;
	1414
	1415	/* compare based on sign */
	1416	if ((b && a->used == 0) \|\| a->sign == MP_NEG) {
	1417	return MP_LT;
	1418	}
	1419
	1420	/* compare based on magnitude */
	1421	if (a->used > 1) {
	1422	return MP_GT;
	1423	}
	1424
	1425	/* compare the only digit of a to b */
	1426	if (a->dp[0] > b) {
	1427	return MP_GT;
	1428	} else if (a->dp[0] < b) {
	1429	return MP_LT;
	1430	} else {
	1431	return MP_EQ;
	1432	}
	1433	}
	1434
	1435
	1436	/* set to a digit */
	1437	int mp_set (mp_int * a, mp_digit b)
	1438	{
	1439	int res;
	1440	mp_zero (a);
	1441	res = mp_grow (a, 1);
	1442	if (res == MP_OKAY) {
	1443	a->dp[0] = (mp_digit)(b & MP_MASK);
	1444	a->used = (a->dp[0] != 0) ? 1 : 0;
	1445	}
	1446	return res;
	1447	}
	1448
	1449	/* check if a bit is set */
	1450	int mp_is_bit_set (mp_int *a, mp_digit b)
	1451	{
	1452	if ((mp_digit)a->used < b/DIGIT_BIT)
	1453	return 0;
	1454
	1455	return (int)((a->dp[b/DIGIT_BIT] >> b%DIGIT_BIT) & (mp_digit)1);
	1456	}
	1457
	1458	/* c = a mod b, 0 <= c < b */
	1459	#if defined(FREESCALE_LTC_TFM)
	1460	int wolfcrypt_mp_mod(mp_int * a, mp_int * b, mp_int * c)
	1461	#else
	1462	int mp_mod (mp_int * a, mp_int * b, mp_int * c)
	1463	#endif
	1464	{
	1465	mp_int t;
	1466	int res;
	1467
	1468	if ((res = mp_init_size (&t, b->used)) != MP_OKAY) {
	1469	return res;
	1470	}
	1471
	1472	if ((res = mp_div (a, b, NULL, &t)) != MP_OKAY) {
	1473	mp_clear (&t);
	1474	return res;
	1475	}
	1476
	1477	if ((mp_iszero(&t) != MP_NO) \|\| (t.sign == b->sign)) {
	1478	res = MP_OKAY;
	1479	mp_exch (&t, c);
	1480	} else {
	1481	res = mp_add (b, &t, c);
	1482	}
	1483
	1484	mp_clear (&t);
	1485	return res;
	1486	}
	1487
	1488
	1489	/* slower bit-bang division... also smaller */
	1490	int mp_div(mp_int * a, mp_int * b, mp_int * c, mp_int * d)
	1491	{
	1492	mp_int ta, tb, tq, q;
	1493	int res, n, n2;
	1494
	1495	/* is divisor zero ? */
	1496	if (mp_iszero (b) == MP_YES) {
	1497	return MP_VAL;
	1498	}
	1499
	1500	/* if a < b then q=0, r = a */
	1501	if (mp_cmp_mag (a, b) == MP_LT) {
	1502	if (d != NULL) {
	1503	res = mp_copy (a, d);
	1504	} else {
	1505	res = MP_OKAY;
	1506	}
	1507	if (c != NULL) {
	1508	mp_zero (c);
	1509	}
	1510	return res;
	1511	}
	1512
	1513	/* init our temps */
	1514	if ((res = mp_init_multi(&ta, &tb, &tq, &q, 0, 0)) != MP_OKAY) {
	1515	return res;
	1516	}
	1517
	1518	if ((res = mp_set(&tq, 1)) != MP_OKAY) {
	1519	return res;
	1520	}
	1521	n = mp_count_bits(a) - mp_count_bits(b);
	1522	if (((res = mp_abs(a, &ta)) != MP_OKAY) \|\|
	1523	((res = mp_abs(b, &tb)) != MP_OKAY) \|\|
	1524	((res = mp_mul_2d(&tb, n, &tb)) != MP_OKAY) \|\|
	1525	((res = mp_mul_2d(&tq, n, &tq)) != MP_OKAY)) {
	1526	goto LBL_ERR;
	1527	}
	1528
	1529	while (n-- >= 0) {
	1530	if (mp_cmp(&tb, &ta) != MP_GT) {
	1531	if (((res = mp_sub(&ta, &tb, &ta)) != MP_OKAY) \|\|
	1532	((res = mp_add(&q, &tq, &q)) != MP_OKAY)) {
	1533	goto LBL_ERR;
	1534	}
	1535	}
	1536	if (((res = mp_div_2d(&tb, 1, &tb, NULL)) != MP_OKAY) \|\|
	1537	((res = mp_div_2d(&tq, 1, &tq, NULL)) != MP_OKAY)) {
	1538	goto LBL_ERR;
	1539	}
	1540	}
	1541
	1542	/* now q == quotient and ta == remainder */
	1543	n = a->sign;
	1544	n2 = (a->sign == b->sign ? MP_ZPOS : MP_NEG);
	1545	if (c != NULL) {
	1546	mp_exch(c, &q);
	1547	c->sign = (mp_iszero(c) == MP_YES) ? MP_ZPOS : n2;
	1548	}
	1549	if (d != NULL) {
	1550	mp_exch(d, &ta);
	1551	d->sign = (mp_iszero(d) == MP_YES) ? MP_ZPOS : n;
	1552	}
	1553	LBL_ERR:
	1554	mp_clear(&ta);
	1555	mp_clear(&tb);
	1556	mp_clear(&tq);
	1557	mp_clear(&q);
	1558	return res;
	1559	}
	1560
	1561
	1562	/* b = a/2 */
	1563	int mp_div_2(mp_int * a, mp_int * b)
	1564	{
	1565	int x, res, oldused;
	1566
	1567	/* copy */
	1568	if (b->alloc < a->used) {
	1569	if ((res = mp_grow (b, a->used)) != MP_OKAY) {
	1570	return res;
	1571	}
	1572	}
	1573
	1574	oldused = b->used;
	1575	b->used = a->used;
	1576	{
	1577	mp_digit r, rr, tmpa, tmpb;
	1578
	1579	/* source alias */
	1580	tmpa = a->dp + b->used - 1;
	1581
	1582	/* dest alias */
	1583	tmpb = b->dp + b->used - 1;
	1584
	1585	/* carry */
	1586	r = 0;
	1587	for (x = b->used - 1; x >= 0; x--) {
	1588	/* get the carry for the next iteration */
	1589	rr = *tmpa & 1;
	1590
	1591	/* shift the current digit, add in carry and store */
	1592	tmpb-- = (tmpa-- >> 1) \| (r << (DIGIT_BIT - 1));
	1593
	1594	/* forward carry to next iteration */
	1595	r = rr;
	1596	}
	1597
	1598	/* zero excess digits */
	1599	tmpb = b->dp + b->used;
	1600	for (x = b->used; x < oldused; x++) {
	1601	*tmpb++ = 0;
	1602	}
	1603	}
	1604	b->sign = a->sign;
	1605	mp_clamp (b);
	1606	return MP_OKAY;
	1607	}
	1608
[464]	1609	/* c = a / 2 (mod b) - constant time (a < b and positive) */
	1610	int mp_div_2_mod_ct(mp_int a, mp_int b, mp_int *c)
	1611	{
	1612	int res;
[457]	1613
[464]	1614	if (mp_isodd(a)) {
	1615	res = mp_add(a, b, c);
	1616	if (res == MP_OKAY) {
	1617	res = mp_div_2(c, c);
	1618	}
	1619	}
	1620	else {
	1621	res = mp_div_2(a, c);
	1622	}
	1623
	1624	return res;
	1625	}
	1626
	1627
[457]	1628	/* high level addition (handles signs) */
	1629	int mp_add (mp_int * a, mp_int * b, mp_int * c)
	1630	{
	1631	int sa, sb, res;
	1632
	1633	/* get sign of both inputs */
	1634	sa = a->sign;
	1635	sb = b->sign;
	1636
	1637	/* handle two cases, not four */
	1638	if (sa == sb) {
	1639	/* both positive or both negative */
	1640	/* add their magnitudes, copy the sign */
	1641	c->sign = sa;
	1642	res = s_mp_add (a, b, c);
	1643	} else {
	1644	/* one positive, the other negative */
	1645	/* subtract the one with the greater magnitude from */
	1646	/* the one of the lesser magnitude. The result gets */
	1647	/* the sign of the one with the greater magnitude. */
	1648	if (mp_cmp_mag (a, b) == MP_LT) {
	1649	c->sign = sb;
	1650	res = s_mp_sub (b, a, c);
	1651	} else {
	1652	c->sign = sa;
	1653	res = s_mp_sub (a, b, c);
	1654	}
	1655	}
	1656	return res;
	1657	}
	1658
	1659
	1660	/* low level addition, based on HAC pp.594, Algorithm 14.7 */
	1661	int s_mp_add (mp_int * a, mp_int * b, mp_int * c)
	1662	{
	1663	mp_int *x;
	1664	int olduse, res, min_ab, max_ab;
	1665
	1666	/* find sizes, we let \|a\| <= \|b\| which means we have to sort
	1667	* them. "x" will point to the input with the most digits
	1668	*/
	1669	if (a->used > b->used) {
	1670	min_ab = b->used;
	1671	max_ab = a->used;
	1672	x = a;
	1673	} else {
	1674	min_ab = a->used;
	1675	max_ab = b->used;
	1676	x = b;
	1677	}
	1678
	1679	/* init result */
	1680	if (c->alloc < max_ab + 1) {
	1681	if ((res = mp_grow (c, max_ab + 1)) != MP_OKAY) {
	1682	return res;
	1683	}
	1684	}
	1685
	1686	/* get old used digit count and set new one */
	1687	olduse = c->used;
	1688	c->used = max_ab + 1;
	1689
	1690	{
	1691	mp_digit u, tmpa, tmpb, *tmpc;
	1692	int i;
	1693
	1694	/* alias for digit pointers */
	1695
	1696	/* first input */
	1697	tmpa = a->dp;
	1698
	1699	/* second input */
	1700	tmpb = b->dp;
	1701
	1702	/* destination */
	1703	tmpc = c->dp;
	1704
	1705	/* zero the carry */
	1706	u = 0;
	1707	for (i = 0; i < min_ab; i++) {
	1708	/* Compute the sum at one digit, T[i] = A[i] + B[i] + U */
	1709	tmpc = tmpa++ + *tmpb++ + u;
	1710
	1711	/* U = carry bit of T[i] */
	1712	u = *tmpc >> ((mp_digit)DIGIT_BIT);
	1713
	1714	/* take away carry bit from T[i] */
	1715	*tmpc++ &= MP_MASK;
	1716	}
	1717
	1718	/* now copy higher words if any, that is in A+B
	1719	* if A or B has more digits add those in
	1720	*/
	1721	if (min_ab != max_ab) {
	1722	for (; i < max_ab; i++) {
	1723	/* T[i] = X[i] + U */
	1724	*tmpc = x->dp[i] + u;
	1725
	1726	/* U = carry bit of T[i] */
	1727	u = *tmpc >> ((mp_digit)DIGIT_BIT);
	1728
	1729	/* take away carry bit from T[i] */
	1730	*tmpc++ &= MP_MASK;
	1731	}
	1732	}
	1733
	1734	/* add carry */
	1735	*tmpc++ = u;
	1736
	1737	/* clear digits above olduse */
	1738	for (i = c->used; i < olduse; i++) {
	1739	*tmpc++ = 0;
	1740	}
	1741	}
	1742
	1743	mp_clamp (c);
	1744	return MP_OKAY;
	1745	}
	1746
	1747
	1748	/* low level subtraction (assumes \|a\| > \|b\|), HAC pp.595 Algorithm 14.9 */
	1749	int s_mp_sub (mp_int * a, mp_int * b, mp_int * c)
	1750	{
	1751	int olduse, res, min_b, max_a;
	1752
	1753	/* find sizes */
	1754	min_b = b->used;
	1755	max_a = a->used;
	1756
	1757	/* init result */
	1758	if (c->alloc < max_a) {
	1759	if ((res = mp_grow (c, max_a)) != MP_OKAY) {
	1760	return res;
	1761	}
	1762	}
	1763
	1764	/* sanity check on destination */
	1765	if (c->dp == NULL)
	1766	return MP_VAL;
	1767
	1768	olduse = c->used;
	1769	c->used = max_a;
	1770
	1771	{
	1772	mp_digit u, tmpa, tmpb, *tmpc;
	1773	int i;
	1774
	1775	/* alias for digit pointers */
	1776	tmpa = a->dp;
	1777	tmpb = b->dp;
	1778	tmpc = c->dp;
	1779
	1780	/* set carry to zero */
	1781	u = 0;
	1782	for (i = 0; i < min_b; i++) {
	1783	/* T[i] = A[i] - B[i] - U */
	1784	tmpc = tmpa++ - *tmpb++ - u;
	1785
	1786	/* U = carry bit of T[i]
	1787	* Note this saves performing an AND operation since
	1788	* if a carry does occur it will propagate all the way to the
	1789	* MSB. As a result a single shift is enough to get the carry
	1790	*/
	1791	u = tmpc >> ((mp_digit)(CHAR_BIT sizeof (mp_digit) - 1));
	1792
	1793	/* Clear carry from T[i] */
	1794	*tmpc++ &= MP_MASK;
	1795	}
	1796
	1797	/* now copy higher words if any, e.g. if A has more digits than B */
	1798	for (; i < max_a; i++) {
	1799	/* T[i] = A[i] - U */
	1800	tmpc = tmpa++ - u;
	1801
	1802	/* U = carry bit of T[i] */
	1803	u = tmpc >> ((mp_digit)(CHAR_BIT sizeof (mp_digit) - 1));
	1804
	1805	/* Clear carry from T[i] */
	1806	*tmpc++ &= MP_MASK;
	1807	}
	1808
	1809	/* clear digits above used (since we may not have grown result above) */
	1810	for (i = c->used; i < olduse; i++) {
	1811	*tmpc++ = 0;
	1812	}
	1813	}
	1814
	1815	mp_clamp (c);
	1816	return MP_OKAY;
	1817	}
	1818
	1819
	1820	/* high level subtraction (handles signs) */
	1821	int mp_sub (mp_int * a, mp_int * b, mp_int * c)
	1822	{
	1823	int sa, sb, res;
	1824
	1825	sa = a->sign;
	1826	sb = b->sign;
	1827
	1828	if (sa != sb) {
	1829	/* subtract a negative from a positive, OR */
	1830	/* subtract a positive from a negative. */
	1831	/* In either case, ADD their magnitudes, */
	1832	/* and use the sign of the first number. */
	1833	c->sign = sa;
	1834	res = s_mp_add (a, b, c);
	1835	} else {
	1836	/* subtract a positive from a positive, OR */
	1837	/* subtract a negative from a negative. */
	1838	/* First, take the difference between their */
	1839	/* magnitudes, then... */
	1840	if (mp_cmp_mag (a, b) != MP_LT) {
	1841	/* Copy the sign from the first */
	1842	c->sign = sa;
	1843	/* The first has a larger or equal magnitude */
	1844	res = s_mp_sub (a, b, c);
	1845	} else {
	1846	/* The result has the opposite sign from */
	1847	/* the first number. */
	1848	c->sign = (sa == MP_ZPOS) ? MP_NEG : MP_ZPOS;
	1849	/* The second has a larger magnitude */
	1850	res = s_mp_sub (b, a, c);
	1851	}
	1852	}
	1853	return res;
	1854	}
	1855
	1856
	1857	/* determines if reduce_2k_l can be used */
	1858	int mp_reduce_is_2k_l(mp_int *a)
	1859	{
	1860	int ix, iy;
	1861
	1862	if (a->used == 0) {
	1863	return MP_NO;
	1864	} else if (a->used == 1) {
	1865	return MP_YES;
	1866	} else if (a->used > 1) {
	1867	/* if more than half of the digits are -1 we're sold */
	1868	for (iy = ix = 0; ix < a->used; ix++) {
	1869	if (a->dp[ix] == MP_MASK) {
	1870	++iy;
	1871	}
	1872	}
	1873	return (iy >= (a->used/2)) ? MP_YES : MP_NO;
	1874
	1875	}
	1876	return MP_NO;
	1877	}
	1878
	1879
	1880	/* determines if mp_reduce_2k can be used */
	1881	int mp_reduce_is_2k(mp_int *a)
	1882	{
	1883	int ix, iy, iw;
	1884	mp_digit iz;
	1885
	1886	if (a->used == 0) {
	1887	return MP_NO;
	1888	} else if (a->used == 1) {
	1889	return MP_YES;
	1890	} else if (a->used > 1) {
	1891	iy = mp_count_bits(a);
	1892	iz = 1;
	1893	iw = 1;
	1894
	1895	/* Test every bit from the second digit up, must be 1 */
	1896	for (ix = DIGIT_BIT; ix < iy; ix++) {
	1897	if ((a->dp[iw] & iz) == 0) {
	1898	return MP_NO;
	1899	}
	1900	iz <<= 1;
	1901	if (iz > (mp_digit)MP_MASK) {
	1902	++iw;
	1903	iz = 1;
	1904	}
	1905	}
	1906	}
	1907	return MP_YES;
	1908	}
	1909
	1910
	1911	/* determines if a number is a valid DR modulus */
	1912	int mp_dr_is_modulus(mp_int *a)
	1913	{
	1914	int ix;
	1915
	1916	/* must be at least two digits */
	1917	if (a->used < 2) {
	1918	return 0;
	1919	}
	1920
	1921	/* must be of the form b**k - a [a <= b] so all
	1922	* but the first digit must be equal to -1 (mod b).
	1923	*/
	1924	for (ix = 1; ix < a->used; ix++) {
	1925	if (a->dp[ix] != MP_MASK) {
	1926	return 0;
	1927	}
	1928	}
	1929	return 1;
	1930	}
	1931
	1932
	1933	/* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85
	1934	*
	1935	* Uses a left-to-right k-ary sliding window to compute the modular
	1936	* exponentiation.
	1937	* The value of k changes based on the size of the exponent.
	1938	*
	1939	* Uses Montgomery or Diminished Radix reduction [whichever appropriate]
	1940	*/
	1941
	1942	#ifdef MP_LOW_MEM
	1943	#define TAB_SIZE 32
	1944	#else
	1945	#define TAB_SIZE 256
	1946	#endif
	1947
	1948	int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y,
	1949	int redmode)
	1950	{
	1951	mp_int res;
	1952	mp_digit buf, mp;
	1953	int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
	1954	#ifdef WOLFSSL_SMALL_STACK
	1955	mp_int* M;
	1956	#else
	1957	mp_int M[TAB_SIZE];
	1958	#endif
	1959	/* use a pointer to the reduction algorithm. This allows us to use
	1960	* one of many reduction algorithms without modding the guts of
	1961	* the code with if statements everywhere.
	1962	*/
	1963	int (redux)(mp_int,mp_int*,mp_digit) = NULL;
	1964
	1965	#ifdef WOLFSSL_SMALL_STACK
	1966	M = (mp_int) XMALLOC(sizeof(mp_int) TAB_SIZE, NULL,
	1967	DYNAMIC_TYPE_BIGINT);
	1968	if (M == NULL)
	1969	return MP_MEM;
	1970	#endif
	1971
	1972	/* find window size */
	1973	x = mp_count_bits (X);
	1974	if (x <= 7) {
	1975	winsize = 2;
	1976	} else if (x <= 36) {
	1977	winsize = 3;
	1978	} else if (x <= 140) {
	1979	winsize = 4;
	1980	} else if (x <= 450) {
	1981	winsize = 5;
	1982	} else if (x <= 1303) {
	1983	winsize = 6;
	1984	} else if (x <= 3529) {
	1985	winsize = 7;
	1986	} else {
	1987	winsize = 8;
	1988	}
	1989
	1990	#ifdef MP_LOW_MEM
	1991	if (winsize > 5) {
	1992	winsize = 5;
	1993	}
	1994	#endif
	1995
	1996	/* init M array */
	1997	/* init first cell */
	1998	if ((err = mp_init_size(&M[1], P->alloc)) != MP_OKAY) {
	1999	#ifdef WOLFSSL_SMALL_STACK
	2000	XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
	2001	#endif
	2002
	2003	return err;
	2004	}
	2005
	2006	/* now init the second half of the array */
	2007	for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
	2008	if ((err = mp_init_size(&M[x], P->alloc)) != MP_OKAY) {
	2009	for (y = 1<<(winsize-1); y < x; y++) {
	2010	mp_clear (&M[y]);
	2011	}
	2012	mp_clear(&M[1]);
	2013
	2014	#ifdef WOLFSSL_SMALL_STACK
	2015	XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
	2016	#endif
	2017
	2018	return err;
	2019	}
	2020	}
	2021
	2022	/* determine and setup reduction code */
	2023	if (redmode == 0) {
	2024	#ifdef BN_MP_MONTGOMERY_SETUP_C
	2025	/* now setup montgomery */
	2026	if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) {
	2027	goto LBL_M;
	2028	}
	2029	#else
	2030	err = MP_VAL;
	2031	goto LBL_M;
	2032	#endif
	2033
	2034	/* automatically pick the comba one if available (saves quite a few
	2035	calls/ifs) */
	2036	#ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C
	2037	if (((P->used * 2 + 1) < (int)MP_WARRAY) &&
[464]	2038	P->used < (1L << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
[457]	2039	redux = fast_mp_montgomery_reduce;
	2040	} else
	2041	#endif
	2042	{
	2043	#ifdef BN_MP_MONTGOMERY_REDUCE_C
	2044	/* use slower baseline Montgomery method */
	2045	redux = mp_montgomery_reduce;
	2046	#endif
	2047	}
	2048	} else if (redmode == 1) {
	2049	#if defined(BN_MP_DR_SETUP_C) && defined(BN_MP_DR_REDUCE_C)
	2050	/* setup DR reduction for moduli of the form B*k - b /
	2051	mp_dr_setup(P, &mp);
	2052	redux = mp_dr_reduce;
	2053	#endif
	2054	} else {
	2055	#if defined(BN_MP_REDUCE_2K_SETUP_C) && defined(BN_MP_REDUCE_2K_C)
	2056	/* setup DR reduction for moduli of the form 2*k - b /
	2057	if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) {
	2058	goto LBL_M;
	2059	}
	2060	redux = mp_reduce_2k;
	2061	#endif
	2062	}
	2063
	2064	if (redux == NULL) {
	2065	err = MP_VAL;
	2066	goto LBL_M;
	2067	}
	2068
	2069	/* setup result */
	2070	if ((err = mp_init_size (&res, P->alloc)) != MP_OKAY) {
	2071	goto LBL_M;
	2072	}
	2073
	2074	/* create M table
	2075	*
	2076
	2077	*
	2078	* The first half of the table is not computed though accept for M[0] and M[1]
	2079	*/
	2080
	2081	if (redmode == 0) {
	2082	#ifdef BN_MP_MONTGOMERY_CALC_NORMALIZATION_C
	2083	/* now we need R mod m */
	2084	if ((err = mp_montgomery_calc_normalization (&res, P)) != MP_OKAY) {
	2085	goto LBL_RES;
	2086	}
	2087
	2088	/* now set M[1] to G * R mod m */
	2089	if ((err = mp_mulmod (G, &res, P, &M[1])) != MP_OKAY) {
	2090	goto LBL_RES;
	2091	}
	2092	#else
	2093	err = MP_VAL;
	2094	goto LBL_RES;
	2095	#endif
	2096	} else {
	2097	if ((err = mp_set(&res, 1)) != MP_OKAY) {
	2098	goto LBL_RES;
	2099	}
	2100	if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) {
	2101	goto LBL_RES;
	2102	}
	2103	}
	2104
	2105	/* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times*/
	2106	if ((err = mp_copy (&M[1], &M[(mp_digit)(1 << (winsize - 1))])) != MP_OKAY) {
	2107	goto LBL_RES;
	2108	}
	2109
	2110	for (x = 0; x < (winsize - 1); x++) {
	2111	if ((err = mp_sqr (&M[(mp_digit)(1 << (winsize - 1))],
	2112	&M[(mp_digit)(1 << (winsize - 1))])) != MP_OKAY) {
	2113	goto LBL_RES;
	2114	}
	2115	if ((err = redux (&M[(mp_digit)(1 << (winsize - 1))], P, mp)) != MP_OKAY) {
	2116	goto LBL_RES;
	2117	}
	2118	}
	2119
	2120	/* create upper table */
	2121	for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
	2122	if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
	2123	goto LBL_RES;
	2124	}
	2125	if ((err = redux (&M[x], P, mp)) != MP_OKAY) {
	2126	goto LBL_RES;
	2127	}
	2128	}
	2129
	2130	/* set initial mode and bit cnt */
	2131	mode = 0;
	2132	bitcnt = 1;
	2133	buf = 0;
	2134	digidx = X->used - 1;
	2135	bitcpy = 0;
	2136	bitbuf = 0;
	2137
	2138	for (;;) {
	2139	/* grab next digit as required */
	2140	if (--bitcnt == 0) {
	2141	/* if digidx == -1 we are out of digits so break */
	2142	if (digidx == -1) {
	2143	break;
	2144	}
	2145	/* read next digit and reset bitcnt */
	2146	buf = X->dp[digidx--];
	2147	bitcnt = (int)DIGIT_BIT;
	2148	}
	2149
	2150	/* grab the next msb from the exponent */
	2151	y = (int)(buf >> (DIGIT_BIT - 1)) & 1;
	2152	buf <<= (mp_digit)1;
	2153
	2154	/* if the bit is zero and mode == 0 then we ignore it
	2155	* These represent the leading zero bits before the first 1 bit
	2156	* in the exponent. Technically this opt is not required but it
	2157	* does lower the # of trivial squaring/reductions used
	2158	*/
	2159	if (mode == 0 && y == 0) {
	2160	continue;
	2161	}
	2162
	2163	/* if the bit is zero and mode == 1 then we square */
	2164	if (mode == 1 && y == 0) {
	2165	if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
	2166	goto LBL_RES;
	2167	}
	2168	if ((err = redux (&res, P, mp)) != MP_OKAY) {
	2169	goto LBL_RES;
	2170	}
	2171	continue;
	2172	}
	2173
	2174	/* else we add it to the window */
	2175	bitbuf \|= (y << (winsize - ++bitcpy));
	2176	mode = 2;
	2177
	2178	if (bitcpy == winsize) {
	2179	/* ok window is filled so square as required and multiply */
	2180	/* square first */
	2181	for (x = 0; x < winsize; x++) {
	2182	if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
	2183	goto LBL_RES;
	2184	}
	2185	if ((err = redux (&res, P, mp)) != MP_OKAY) {
	2186	goto LBL_RES;
	2187	}
	2188	}
	2189
	2190	/* then multiply */
	2191	if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
	2192	goto LBL_RES;
	2193	}
	2194	if ((err = redux (&res, P, mp)) != MP_OKAY) {
	2195	goto LBL_RES;
	2196	}
	2197
	2198	/* empty window and reset */
	2199	bitcpy = 0;
	2200	bitbuf = 0;
	2201	mode = 1;
	2202	}
	2203	}
	2204
	2205	/* if bits remain then square/multiply */
	2206	if (mode == 2 && bitcpy > 0) {
	2207	/* square then multiply if the bit is set */
	2208	for (x = 0; x < bitcpy; x++) {
	2209	if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
	2210	goto LBL_RES;
	2211	}
	2212	if ((err = redux (&res, P, mp)) != MP_OKAY) {
	2213	goto LBL_RES;
	2214	}
	2215
	2216	/* get next bit of the window */
	2217	bitbuf <<= 1;
	2218	if ((bitbuf & (1 << winsize)) != 0) {
	2219	/* then multiply */
	2220	if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
	2221	goto LBL_RES;
	2222	}
	2223	if ((err = redux (&res, P, mp)) != MP_OKAY) {
	2224	goto LBL_RES;
	2225	}
	2226	}
	2227	}
	2228	}
	2229
	2230	if (redmode == 0) {
	2231	/* fixup result if Montgomery reduction is used
	2232	* recall that any value in a Montgomery system is
	2233	* actually multiplied by R mod n. So we have
	2234	* to reduce one more time to cancel out the factor
	2235	* of R.
	2236	*/
	2237	if ((err = redux(&res, P, mp)) != MP_OKAY) {
	2238	goto LBL_RES;
	2239	}
	2240	}
	2241
	2242	/* swap res with Y */
	2243	mp_exch (&res, Y);
	2244	err = MP_OKAY;
	2245	LBL_RES:mp_clear (&res);
	2246	LBL_M:
	2247	mp_clear(&M[1]);
	2248	for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
	2249	mp_clear (&M[x]);
	2250	}
	2251
	2252	#ifdef WOLFSSL_SMALL_STACK
	2253	XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
	2254	#endif
	2255
	2256	return err;
	2257	}
	2258
	2259	#ifdef BN_MP_EXPTMOD_BASE_2
	2260	#if DIGIT_BIT < 16
	2261	#define WINSIZE 3
	2262	#elif DIGIT_BIT < 32
	2263	#define WINSIZE 4
	2264	#elif DIGIT_BIT < 64
	2265	#define WINSIZE 5
	2266	#elif DIGIT_BIT < 128
	2267	#define WINSIZE 6
	2268	#endif
	2269	int mp_exptmod_base_2(mp_int * X, mp_int * P, mp_int * Y)
	2270	{
	2271	mp_digit buf, mp;
	2272	int err = MP_OKAY, bitbuf, bitcpy, bitcnt, digidx, x, y;
	2273	#ifdef WOLFSSL_SMALL_STACK
	2274	mp_int *res = NULL;
	2275	#else
	2276	mp_int res[1];
	2277	#endif
	2278	int (redux)(mp_int,mp_int*,mp_digit) = NULL;
	2279
	2280	/* automatically pick the comba one if available (saves quite a few
	2281	calls/ifs) */
	2282	#ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C
	2283	if (((P->used * 2 + 1) < (int)MP_WARRAY) &&
[464]	2284	P->used < (1L << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
[457]	2285	redux = fast_mp_montgomery_reduce;
	2286	} else
	2287	#endif
[464]	2288	#ifdef BN_MP_MONTGOMERY_REDUCE_C
[457]	2289	{
	2290	/* use slower baseline Montgomery method */
	2291	redux = mp_montgomery_reduce;
[464]	2292	}
[457]	2293	#endif
[464]	2294
	2295	if (redux == NULL) {
	2296	return MP_VAL;
[457]	2297	}
	2298
	2299	#ifdef WOLFSSL_SMALL_STACK
	2300	res = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_TMP_BUFFER);
	2301	if (res == NULL) {
	2302	return MP_MEM;
	2303	}
	2304	#endif
	2305
	2306	/* now setup montgomery */
	2307	if ((err = mp_montgomery_setup(P, &mp)) != MP_OKAY) {
	2308	goto LBL_M;
	2309	}
	2310
	2311	/* setup result */
	2312	if ((err = mp_init(res)) != MP_OKAY) {
	2313	goto LBL_M;
	2314	}
	2315
	2316	/* now we need R mod m */
	2317	if ((err = mp_montgomery_calc_normalization(res, P)) != MP_OKAY) {
	2318	goto LBL_RES;
	2319	}
	2320
	2321	/* Get the top bits left over after taking WINSIZE bits starting at the
	2322	* least-significant.
	2323	*/
	2324	digidx = X->used - 1;
	2325	bitcpy = (X->used * DIGIT_BIT) % WINSIZE;
	2326	if (bitcpy > 0) {
	2327	bitcnt = (int)DIGIT_BIT - bitcpy;
	2328	buf = X->dp[digidx--];
	2329	bitbuf = (int)(buf >> bitcnt);
	2330	/* Multiply montgomery representation of 1 by 2 ^ top */
	2331	err = mp_mul_2d(res, bitbuf, res);
	2332	if (err != MP_OKAY) {
	2333	goto LBL_RES;
	2334	}
	2335	err = mp_mod(res, P, res);
	2336	if (err != MP_OKAY) {
	2337	goto LBL_RES;
	2338	}
	2339	/* Move out bits used */
	2340	buf <<= bitcpy;
	2341	bitcnt++;
	2342	}
	2343	else {
	2344	bitcnt = 1;
	2345	buf = 0;
	2346	}
	2347
	2348	/* empty window and reset */
	2349	bitbuf = 0;
	2350	bitcpy = 0;
	2351
	2352	for (;;) {
	2353	/* grab next digit as required */
	2354	if (--bitcnt == 0) {
	2355	/* if digidx == -1 we are out of digits so break */
	2356	if (digidx == -1) {
	2357	break;
	2358	}
	2359	/* read next digit and reset bitcnt */
	2360	buf = X->dp[digidx--];
	2361	bitcnt = (int)DIGIT_BIT;
	2362	}
	2363
	2364	/* grab the next msb from the exponent */
	2365	y = (int)(buf >> (DIGIT_BIT - 1)) & 1;
	2366	buf <<= (mp_digit)1;
	2367	/* add bit to the window */
	2368	bitbuf \|= (y << (WINSIZE - ++bitcpy));
	2369
	2370	if (bitcpy == WINSIZE) {
	2371	/* ok window is filled so square as required and multiply */
	2372	/* square first */
	2373	for (x = 0; x < WINSIZE; x++) {
	2374	err = mp_sqr(res, res);
	2375	if (err != MP_OKAY) {
	2376	goto LBL_RES;
	2377	}
	2378	err = (*redux)(res, P, mp);
	2379	if (err != MP_OKAY) {
	2380	goto LBL_RES;
	2381	}
	2382	}
	2383
	2384	/* then multiply by 2^bitbuf */
	2385	err = mp_mul_2d(res, bitbuf, res);
	2386	if (err != MP_OKAY) {
	2387	goto LBL_RES;
	2388	}
	2389	err = mp_mod(res, P, res);
	2390	if (err != MP_OKAY) {
	2391	goto LBL_RES;
	2392	}
	2393
	2394	/* empty window and reset */
	2395	bitcpy = 0;
	2396	bitbuf = 0;
	2397	}
	2398	}
	2399
	2400	/* fixup result if Montgomery reduction is used
	2401	* recall that any value in a Montgomery system is
	2402	* actually multiplied by R mod n. So we have
	2403	* to reduce one more time to cancel out the factor
	2404	* of R.
	2405	*/
	2406	err = (*redux)(res, P, mp);
	2407	if (err != MP_OKAY) {
	2408	goto LBL_RES;
	2409	}
	2410
	2411	/* swap res with Y */
	2412	mp_copy(res, Y);
	2413
	2414	LBL_RES:mp_clear (res);
	2415	LBL_M:
	2416	#ifdef WOLFSSL_SMALL_STACK
	2417	XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
	2418	#endif
	2419	return err;
	2420	}
	2421
	2422	#undef WINSIZE
	2423	#endif /* BN_MP_EXPTMOD_BASE_2 */
	2424
	2425
	2426	/* setups the montgomery reduction stuff */
	2427	int mp_montgomery_setup (mp_int * n, mp_digit * rho)
	2428	{
	2429	mp_digit x, b;
	2430
	2431	/* fast inversion mod 2**k
	2432	*
	2433	* Based on the fact that
	2434	*
	2435	* XA = 1 (mod 2n) => (X(2-XA)) A = 1 (mod 22n)
	2436	* => 2XA - XXA*A = 1
	2437	* => 2*(1) - (1) = 1
	2438	*/
	2439	b = n->dp[0];
	2440
	2441	if ((b & 1) == 0) {
	2442	return MP_VAL;
	2443	}
	2444
	2445	x = (((b + 2) & 4) << 1) + b; /* here xa==1 mod 24 /
	2446	x = 2 - b x; /* here xa==1 mod 28 /
	2447	#if !defined(MP_8BIT)
	2448	x = 2 - b x; /* here xa==1 mod 216 /
	2449	#endif
	2450	#if defined(MP_64BIT) \|\| !(defined(MP_8BIT) \|\| defined(MP_16BIT))
	2451	x = 2 - b x; /* here xa==1 mod 232 /
	2452	#endif
	2453	#ifdef MP_64BIT
	2454	x = 2 - b x; /* here xa==1 mod 264 /
	2455	#endif
	2456
	2457	/* rho = -1/m mod b */
	2458	/* TAO, switched mp_word casts to mp_digit to shut up compiler */
	2459	*rho = (mp_digit)((((mp_digit)1 << ((mp_digit) DIGIT_BIT)) - x) & MP_MASK);
	2460
	2461	return MP_OKAY;
	2462	}
	2463
	2464
	2465	/* computes xR**-1 == x (mod N) via Montgomery Reduction
	2466	*
	2467	* This is an optimized implementation of montgomery_reduce
	2468	* which uses the comba method to quickly calculate the columns of the
	2469	* reduction.
	2470	*
	2471	* Based on Algorithm 14.32 on pp.601 of HAC.
	2472	*/
	2473	int fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
	2474	{
	2475	int ix, res, olduse;
	2476	#ifdef WOLFSSL_SMALL_STACK
	2477	mp_word* W; /* uses dynamic memory and slower */
	2478	#else
	2479	mp_word W[MP_WARRAY];
	2480	#endif
	2481
	2482	/* get old used count */
	2483	olduse = x->used;
	2484
	2485	/* grow a as required */
	2486	if (x->alloc < n->used + 1) {
	2487	if ((res = mp_grow (x, n->used + 1)) != MP_OKAY) {
	2488	return res;
	2489	}
	2490	}
	2491
	2492	#ifdef WOLFSSL_SMALL_STACK
	2493	W = (mp_word)XMALLOC(sizeof(mp_word) MP_WARRAY, NULL, DYNAMIC_TYPE_BIGINT);
	2494	if (W == NULL)
	2495	return MP_MEM;
	2496	#endif
	2497
[464]	2498	XMEMSET(W, 0, (n->used * 2 + 1) * sizeof(mp_word));
	2499
[457]	2500	/* first we have to get the digits of the input into
	2501	* an array of double precision words W[...]
	2502	*/
	2503	{
	2504	mp_word *_W;
	2505	mp_digit *tmpx;
	2506
	2507	/* alias for the W[] array */
	2508	_W = W;
	2509
	2510	/* alias for the digits of x*/
	2511	tmpx = x->dp;
	2512
	2513	/* copy the digits of a into W[0..a->used-1] */
	2514	for (ix = 0; ix < x->used; ix++) {
	2515	_W++ = tmpx++;
	2516	}
	2517	}
	2518
	2519	/* now we proceed to zero successive digits
	2520	* from the least significant upwards
	2521	*/
	2522	for (ix = 0; ix < n->used; ix++) {
	2523	/* mu = ai * m' mod b
	2524	*
	2525	* We avoid a double precision multiplication (which isn't required)
	2526	* by casting the value down to a mp_digit. Note this requires
	2527	* that W[ix-1] have the carry cleared (see after the inner loop)
	2528	*/
	2529	mp_digit mu;
	2530	mu = (mp_digit) (((W[ix] & MP_MASK) * rho) & MP_MASK);
	2531
	2532	/* a = a + mu * m * b**i
	2533	*
	2534	* This is computed in place and on the fly. The multiplication
	2535	* by b**i is handled by offsetting which columns the results
	2536	* are added to.
	2537	*
	2538	* Note the comba method normally doesn't handle carries in the
	2539	* inner loop In this case we fix the carry from the previous
	2540	* column since the Montgomery reduction requires digits of the
	2541	* result (so far) [see above] to work. This is
	2542	* handled by fixing up one carry after the inner loop. The
	2543	* carry fixups are done in order so after these loops the
	2544	* first m->used words of W[] have the carries fixed
	2545	*/
	2546	{
	2547	int iy;
	2548	mp_digit *tmpn;
	2549	mp_word *_W;
	2550
	2551	/* alias for the digits of the modulus */
	2552	tmpn = n->dp;
	2553
	2554	/* Alias for the columns set by an offset of ix */
	2555	_W = W + ix;
	2556
	2557	/* inner loop */
	2558	for (iy = 0; iy < n->used; iy++) {
	2559	_W++ += ((mp_word)mu) ((mp_word)*tmpn++);
	2560	}
	2561	}
	2562
	2563	/* now fix carry for next digit, W[ix+1] */
	2564	W[ix + 1] += W[ix] >> ((mp_word) DIGIT_BIT);
	2565	}
	2566
	2567	/* now we have to propagate the carries and
	2568	* shift the words downward [all those least
	2569	* significant digits we zeroed].
	2570	*/
	2571	{
	2572	mp_digit *tmpx;
	2573	mp_word _W, _W1;
	2574
	2575	/* nox fix rest of carries */
	2576
	2577	/* alias for current word */
	2578	_W1 = W + ix;
	2579
	2580	/* alias for next word, where the carry goes */
	2581	_W = W + ++ix;
	2582
	2583	for (; ix <= n->used * 2 + 1; ix++) {
	2584	_W++ += _W1++ >> ((mp_word) DIGIT_BIT);
	2585	}
	2586
	2587	/* copy out, A = A/b**n
	2588	*
	2589	* The result is A/b**n but instead of converting from an
	2590	* array of mp_word to mp_digit than calling mp_rshd
	2591	* we just copy them in the right order
	2592	*/
	2593
	2594	/* alias for destination word */
	2595	tmpx = x->dp;
	2596
	2597	/* alias for shifted double precision result */
	2598	_W = W + n->used;
	2599
	2600	for (ix = 0; ix < n->used + 1; ix++) {
	2601	tmpx++ = (mp_digit)(_W++ & ((mp_word) MP_MASK));
	2602	}
	2603
	2604	/* zero olduse digits, if the input a was larger than
	2605	* m->used+1 we'll have to clear the digits
	2606	*/
	2607	for (; ix < olduse; ix++) {
	2608	*tmpx++ = 0;
	2609	}
	2610	}
	2611
	2612	/* set the max used and clamp */
	2613	x->used = n->used + 1;
	2614	mp_clamp (x);
	2615
	2616	#ifdef WOLFSSL_SMALL_STACK
	2617	XFREE(W, NULL, DYNAMIC_TYPE_BIGINT);
	2618	#endif
	2619
	2620	/* if A >= m then A = A - m */
	2621	if (mp_cmp_mag (x, n) != MP_LT) {
	2622	return s_mp_sub (x, n, x);
	2623	}
	2624	return MP_OKAY;
	2625	}
	2626
	2627
	2628	/* computes xR*-1 == x (mod N) via Montgomery Reduction /
	2629	int mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
	2630	{
	2631	int ix, res, digs;
	2632	mp_digit mu;
	2633
	2634	/* can the fast reduction [comba] method be used?
	2635	*
	2636	* Note that unlike in mul you're safely allowed less
	2637	* than the available columns [255 per default] since carries
	2638	* are fixed up in the inner loop.
	2639	*/
	2640	digs = n->used * 2 + 1;
	2641	if ((digs < (int)MP_WARRAY) &&
	2642	n->used <
[464]	2643	(1L << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
[457]	2644	return fast_mp_montgomery_reduce (x, n, rho);
	2645	}
	2646
	2647	/* grow the input as required */
	2648	if (x->alloc < digs) {
	2649	if ((res = mp_grow (x, digs)) != MP_OKAY) {
	2650	return res;
	2651	}
	2652	}
	2653	x->used = digs;
	2654
	2655	for (ix = 0; ix < n->used; ix++) {
	2656	/* mu = ai * rho mod b
	2657	*
	2658	* The value of rho must be precalculated via
	2659	* montgomery_setup() such that
	2660	* it equals -1/n0 mod b this allows the
	2661	* following inner loop to reduce the
	2662	* input one digit at a time
	2663	*/
	2664	mu = (mp_digit) (((mp_word)x->dp[ix]) * ((mp_word)rho) & MP_MASK);
	2665
	2666	/* a = a + mu * m * b*i /
	2667	{
	2668	int iy;
	2669	mp_digit tmpn, tmpx, u;
	2670	mp_word r;
	2671
	2672	/* alias for digits of the modulus */
	2673	tmpn = n->dp;
	2674
	2675	/* alias for the digits of x [the input] */
	2676	tmpx = x->dp + ix;
	2677
	2678	/* set the carry to zero */
	2679	u = 0;
	2680
	2681	/* Multiply and add in place */
	2682	for (iy = 0; iy < n->used; iy++) {
	2683	/* compute product and sum */
	2684	r = ((mp_word)mu) * ((mp_word)*tmpn++) +
	2685	((mp_word) u) + ((mp_word) * tmpx);
	2686
	2687	/* get carry */
	2688	u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
	2689
	2690	/* fix digit */
	2691	*tmpx++ = (mp_digit)(r & ((mp_word) MP_MASK));
	2692	}
	2693	/* At this point the ix'th digit of x should be zero */
	2694
	2695
	2696	/* propagate carries upwards as required*/
	2697	while (u) {
	2698	*tmpx += u;
	2699	u = *tmpx >> DIGIT_BIT;
	2700	*tmpx++ &= MP_MASK;
	2701	}
	2702	}
	2703	}
	2704
	2705	/* at this point the n.used'th least
	2706	* significant digits of x are all zero
	2707	* which means we can shift x to the
	2708	* right by n.used digits and the
	2709	* residue is unchanged.
	2710	*/
	2711
	2712	/* x = x/b*n.used /
	2713	mp_clamp(x);
	2714	mp_rshd (x, n->used);
	2715
	2716	/* if x >= n then x = x - n */
	2717	if (mp_cmp_mag (x, n) != MP_LT) {
	2718	return s_mp_sub (x, n, x);
	2719	}
	2720
	2721	return MP_OKAY;
	2722	}
	2723
	2724
	2725	/* determines the setup value */
	2726	void mp_dr_setup(mp_int a, mp_digit d)
	2727	{
	2728	/* the casts are required if DIGIT_BIT is one less than
	2729	* the number of bits in a mp_digit [e.g. DIGIT_BIT==31]
	2730	*/
	2731	*d = (mp_digit)((((mp_word)1) << ((mp_word)DIGIT_BIT)) -
	2732	((mp_word)a->dp[0]));
	2733	}
	2734
	2735
	2736	/* reduce "x" in place modulo "n" using the Diminished Radix algorithm.
	2737	*
	2738	* Based on algorithm from the paper
	2739	*
	2740	* "Generating Efficient Primes for Discrete Log Cryptosystems"
	2741	* Chae Hoon Lim, Pil Joong Lee,
	2742	* POSTECH Information Research Laboratories
	2743	*
	2744	* The modulus must be of a special format [see manual]
	2745	*
	2746	* Has been modified to use algorithm 7.10 from the LTM book instead
	2747	*
	2748	* Input x must be in the range 0 <= x <= (n-1)**2
	2749	*/
	2750	int mp_dr_reduce (mp_int * x, mp_int * n, mp_digit k)
	2751	{
	2752	int err, i, m;
	2753	mp_word r;
	2754	mp_digit mu, tmpx1, tmpx2;
	2755
	2756	/* m = digits in modulus */
	2757	m = n->used;
	2758
	2759	/* ensure that "x" has at least 2m digits */
	2760	if (x->alloc < m + m) {
	2761	if ((err = mp_grow (x, m + m)) != MP_OKAY) {
	2762	return err;
	2763	}
	2764	}
	2765
	2766	/* top of loop, this is where the code resumes if
	2767	* another reduction pass is required.
	2768	*/
	2769	top:
	2770	/* aliases for digits */
	2771	/* alias for lower half of x */
	2772	tmpx1 = x->dp;
	2773
	2774	/* alias for upper half of x, or x/B*m /
	2775	tmpx2 = x->dp + m;
	2776
	2777	/* set carry to zero */
	2778	mu = 0;
	2779
	2780	/* compute (x mod B*m) + k [x/B*m] inline and inplace /
	2781	for (i = 0; i < m; i++) {
	2782	r = ((mp_word)tmpx2++) ((mp_word)k) + *tmpx1 + mu;
	2783	*tmpx1++ = (mp_digit)(r & MP_MASK);
	2784	mu = (mp_digit)(r >> ((mp_word)DIGIT_BIT));
	2785	}
	2786
	2787	/* set final carry */
	2788	*tmpx1++ = mu;
	2789
	2790	/* zero words above m */
	2791	for (i = m + 1; i < x->used; i++) {
	2792	*tmpx1++ = 0;
	2793	}
	2794
	2795	/* clamp, sub and return */
	2796	mp_clamp (x);
	2797
	2798	/* if x >= n then subtract and reduce again
	2799	* Each successive "recursion" makes the input smaller and smaller.
	2800	*/
	2801	if (mp_cmp_mag (x, n) != MP_LT) {
	2802	if ((err = s_mp_sub(x, n, x)) != MP_OKAY) {
	2803	return err;
	2804	}
	2805	goto top;
	2806	}
	2807	return MP_OKAY;
	2808	}
	2809
	2810
	2811	/* reduces a modulo n where n is of the form 2*p - d /
	2812	int mp_reduce_2k(mp_int a, mp_int n, mp_digit d)
	2813	{
	2814	mp_int q;
	2815	int p, res;
	2816
	2817	if ((res = mp_init(&q)) != MP_OKAY) {
	2818	return res;
	2819	}
	2820
	2821	p = mp_count_bits(n);
	2822	top:
	2823	/* q = a/2p, a = a mod 2p */
	2824	if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
	2825	goto ERR;
	2826	}
	2827
	2828	if (d != 1) {
	2829	/* q = q * d */
	2830	if ((res = mp_mul_d(&q, d, &q)) != MP_OKAY) {
	2831	goto ERR;
	2832	}
	2833	}
	2834
	2835	/* a = a + q */
	2836	if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
	2837	goto ERR;
	2838	}
	2839
	2840	if (mp_cmp_mag(a, n) != MP_LT) {
	2841	if ((res = s_mp_sub(a, n, a)) != MP_OKAY) {
	2842	goto ERR;
	2843	}
	2844	goto top;
	2845	}
	2846
	2847	ERR:
	2848	mp_clear(&q);
	2849	return res;
	2850	}
	2851
	2852
	2853	/* determines the setup value */
	2854	int mp_reduce_2k_setup(mp_int a, mp_digit d)
	2855	{
	2856	int res, p;
	2857	mp_int tmp;
	2858
	2859	if ((res = mp_init(&tmp)) != MP_OKAY) {
	2860	return res;
	2861	}
	2862
	2863	p = mp_count_bits(a);
	2864	if ((res = mp_2expt(&tmp, p)) != MP_OKAY) {
	2865	mp_clear(&tmp);
	2866	return res;
	2867	}
	2868
	2869	if ((res = s_mp_sub(&tmp, a, &tmp)) != MP_OKAY) {
	2870	mp_clear(&tmp);
	2871	return res;
	2872	}
	2873
	2874	*d = tmp.dp[0];
	2875	mp_clear(&tmp);
	2876	return MP_OKAY;
	2877	}
	2878
	2879
	2880	/* set the b bit of a */
	2881	int mp_set_bit (mp_int * a, int b)
	2882	{
	2883	int i = b / DIGIT_BIT, res;
	2884
	2885	/*
	2886	* Require:
	2887	* bit index b >= 0
	2888	* a->alloc == a->used == 0 if a->dp == NULL
	2889	*/
	2890	if (b < 0 \|\| (a->dp == NULL && (a->alloc != 0 \|\| a->used != 0)))
	2891	return MP_VAL;
	2892
	2893	if (a->dp == NULL \|\| a->used < (int)(i + 1)) {
	2894	/* grow a to accommodate the single bit */
	2895	if ((res = mp_grow (a, i + 1)) != MP_OKAY) {
	2896	return res;
	2897	}
	2898
	2899	/* set the used count of where the bit will go */
	2900	a->used = (int)(i + 1);
	2901	}
	2902
	2903	/* put the single bit in its place */
	2904	a->dp[i] \|= ((mp_digit)1) << (b % DIGIT_BIT);
	2905
	2906	return MP_OKAY;
	2907	}
	2908
	2909	/* computes a = 2**b
	2910	*
	2911	* Simple algorithm which zeros the int, set the required bit
	2912	*/
	2913	int mp_2expt (mp_int * a, int b)
	2914	{
	2915	/* zero a as per default */
	2916	mp_zero (a);
	2917
	2918	return mp_set_bit(a, b);
	2919	}
	2920
	2921	/* multiply by a digit */
	2922	int mp_mul_d (mp_int * a, mp_digit b, mp_int * c)
	2923	{
	2924	mp_digit u, tmpa, tmpc;
	2925	mp_word r;
	2926	int ix, res, olduse;
	2927
	2928	/* make sure c is big enough to hold ab /
	2929	if (c->alloc < a->used + 1) {
	2930	if ((res = mp_grow (c, a->used + 1)) != MP_OKAY) {
	2931	return res;
	2932	}
	2933	}
	2934
	2935	/* get the original destinations used count */
	2936	olduse = c->used;
	2937
	2938	/* set the sign */
	2939	c->sign = a->sign;
	2940
	2941	/* alias for a->dp [source] */
	2942	tmpa = a->dp;
	2943
	2944	/* alias for c->dp [dest] */
	2945	tmpc = c->dp;
	2946
	2947	/* zero carry */
	2948	u = 0;
	2949
	2950	/* compute columns */
	2951	for (ix = 0; ix < a->used; ix++) {
	2952	/* compute product and carry sum for this term */
	2953	r = ((mp_word) u) + ((mp_word)tmpa++) ((mp_word)b);
	2954
	2955	/* mask off higher bits to get a single digit */
	2956	*tmpc++ = (mp_digit) (r & ((mp_word) MP_MASK));
	2957
	2958	/* send carry into next iteration */
	2959	u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
	2960	}
	2961
	2962	/* store final carry [if any] and increment ix offset */
	2963	*tmpc++ = u;
	2964	++ix;
	2965
	2966	/* now zero digits above the top */
	2967	while (ix++ < olduse) {
	2968	*tmpc++ = 0;
	2969	}
	2970
	2971	/* set used count */
	2972	c->used = a->used + 1;
	2973	mp_clamp(c);
	2974
	2975	return MP_OKAY;
	2976	}
	2977
	2978
	2979	/* d = a * b (mod c) */
	2980	#if defined(FREESCALE_LTC_TFM)
	2981	int wolfcrypt_mp_mulmod(mp_int a, mp_int b, mp_int c, mp_int d)
	2982	#else
	2983	int mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
	2984	#endif
	2985	{
	2986	int res;
	2987	mp_int t;
	2988
	2989	if ((res = mp_init_size (&t, c->used)) != MP_OKAY) {
	2990	return res;
	2991	}
	2992
	2993	res = mp_mul (a, b, &t);
	2994	if (res == MP_OKAY) {
	2995	res = mp_mod (&t, c, d);
	2996	}
	2997
	2998	mp_clear (&t);
	2999	return res;
	3000	}
	3001
	3002
	3003	/* d = a - b (mod c) */
	3004	int mp_submod(mp_int* a, mp_int* b, mp_int* c, mp_int* d)
	3005	{
	3006	int res;
	3007	mp_int t;
	3008
	3009	if ((res = mp_init (&t)) != MP_OKAY) {
	3010	return res;
	3011	}
	3012
	3013	res = mp_sub (a, b, &t);
	3014	if (res == MP_OKAY) {
	3015	res = mp_mod (&t, c, d);
	3016	}
	3017
	3018	mp_clear (&t);
	3019
	3020	return res;
	3021	}
	3022
	3023	/* d = a + b (mod c) */
	3024	int mp_addmod(mp_int* a, mp_int* b, mp_int* c, mp_int* d)
	3025	{
	3026	int res;
	3027	mp_int t;
	3028
	3029	if ((res = mp_init (&t)) != MP_OKAY) {
	3030	return res;
	3031	}
	3032
	3033	res = mp_add (a, b, &t);
	3034	if (res == MP_OKAY) {
	3035	res = mp_mod (&t, c, d);
	3036	}
	3037
	3038	mp_clear (&t);
	3039
	3040	return res;
	3041	}
	3042
[464]	3043	/* d = a - b (mod c) - a < c and b < c and positive */
	3044	int mp_submod_ct(mp_int* a, mp_int* b, mp_int* c, mp_int* d)
	3045	{
	3046	int res;
	3047
	3048	res = mp_sub(a, b, d);
	3049	if (res == MP_OKAY && mp_isneg(d)) {
	3050	res = mp_add(d, c, d);
	3051	}
	3052
	3053	return res;
	3054	}
	3055
	3056	/* d = a + b (mod c) - a < c and b < c and positive */
	3057	int mp_addmod_ct(mp_int* a, mp_int* b, mp_int* c, mp_int* d)
	3058	{
	3059	int res;
	3060
	3061	res = mp_add(a, b, d);
	3062	if (res == MP_OKAY && mp_cmp(d, c) != MP_LT) {
	3063	res = mp_sub(d, c, d);
	3064	}
	3065
	3066	return res;
	3067	}
	3068
[457]	3069	/* computes b = aa /
	3070	int mp_sqr (mp_int * a, mp_int * b)
	3071	{
	3072	int res;
	3073
	3074	{
	3075	#ifdef BN_FAST_S_MP_SQR_C
	3076	/* can we use the fast comba multiplier? */
	3077	if ((a->used * 2 + 1) < (int)MP_WARRAY &&
	3078	a->used <
	3079	(1 << (sizeof(mp_word) * CHAR_BIT - 2*DIGIT_BIT - 1))) {
	3080	res = fast_s_mp_sqr (a, b);
	3081	} else
	3082	#endif
	3083	#ifdef BN_S_MP_SQR_C
	3084	res = s_mp_sqr (a, b);
	3085	#else
	3086	res = MP_VAL;
	3087	#endif
	3088	}
	3089	b->sign = MP_ZPOS;
	3090	return res;
	3091	}
	3092
	3093
	3094	/* high level multiplication (handles sign) */
	3095	#if defined(FREESCALE_LTC_TFM)
	3096	int wolfcrypt_mp_mul(mp_int a, mp_int b, mp_int *c)
	3097	#else
	3098	int mp_mul (mp_int * a, mp_int * b, mp_int * c)
	3099	#endif
	3100	{
	3101	int res, neg;
	3102	neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
	3103
	3104	{
	3105	#ifdef BN_FAST_S_MP_MUL_DIGS_C
	3106	/* can we use the fast multiplier?
	3107	*
	3108	* The fast multiplier can be used if the output will
	3109	* have less than MP_WARRAY digits and the number of
	3110	* digits won't affect carry propagation
	3111	*/
	3112	int digs = a->used + b->used + 1;
	3113
	3114	if ((digs < (int)MP_WARRAY) &&
	3115	MIN(a->used, b->used) <=
[464]	3116	(1L << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
[457]	3117	res = fast_s_mp_mul_digs (a, b, c, digs);
	3118	} else
	3119	#endif
	3120	#ifdef BN_S_MP_MUL_DIGS_C
	3121	res = s_mp_mul (a, b, c); /* uses s_mp_mul_digs */
	3122	#else
	3123	res = MP_VAL;
	3124	#endif
	3125
	3126	}
	3127	c->sign = (c->used > 0) ? neg : MP_ZPOS;
	3128	return res;
	3129	}
	3130
	3131
	3132	/* b = a2 /
	3133	int mp_mul_2(mp_int * a, mp_int * b)
	3134	{
	3135	int x, res, oldused;
	3136
	3137	/* grow to accommodate result */
	3138	if (b->alloc < a->used + 1) {
	3139	if ((res = mp_grow (b, a->used + 1)) != MP_OKAY) {
	3140	return res;
	3141	}
	3142	}
	3143
	3144	oldused = b->used;
	3145	b->used = a->used;
	3146
	3147	{
	3148	mp_digit r, rr, tmpa, tmpb;
	3149
	3150	/* alias for source */
	3151	tmpa = a->dp;
	3152
	3153	/* alias for dest */
	3154	tmpb = b->dp;
	3155
	3156	/* carry */
	3157	r = 0;
	3158	for (x = 0; x < a->used; x++) {
	3159
	3160	/* get what will be the next carry bit from the
	3161	* MSB of the current digit
	3162	*/
	3163	rr = *tmpa >> ((mp_digit)(DIGIT_BIT - 1));
	3164
	3165	/* now shift up this digit, add in the carry [from the previous] */
	3166	tmpb++ = (mp_digit)(((tmpa++ << ((mp_digit)1)) \| r) & MP_MASK);
	3167
	3168	/* copy the carry that would be from the source
	3169	* digit into the next iteration
	3170	*/
	3171	r = rr;
	3172	}
	3173
	3174	/* new leading digit? */
	3175	if (r != 0) {
	3176	/* add a MSB which is always 1 at this point */
	3177	*tmpb = 1;
	3178	++(b->used);
	3179	}
	3180
	3181	/* now zero any excess digits on the destination
	3182	* that we didn't write to
	3183	*/
	3184	tmpb = b->dp + b->used;
	3185	for (x = b->used; x < oldused; x++) {
	3186	*tmpb++ = 0;
	3187	}
	3188	}
	3189	b->sign = a->sign;
	3190	return MP_OKAY;
	3191	}
	3192
	3193
	3194	/* divide by three (based on routine from MPI and the GMP manual) */
	3195	int mp_div_3 (mp_int * a, mp_int c, mp_digit d)
	3196	{
	3197	mp_int q;
	3198	mp_word w, t;
	3199	mp_digit b;
	3200	int res, ix;
	3201
	3202	/* b = 2*DIGIT_BIT / 3 /
	3203	b = (mp_digit) ( (((mp_word)1) << ((mp_word)DIGIT_BIT)) / ((mp_word)3) );
	3204
	3205	if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
	3206	return res;
	3207	}
	3208
	3209	q.used = a->used;
	3210	q.sign = a->sign;
	3211	w = 0;
	3212	for (ix = a->used - 1; ix >= 0; ix--) {
	3213	w = (w << ((mp_word)DIGIT_BIT)) \| ((mp_word)a->dp[ix]);
	3214
	3215	if (w >= 3) {
	3216	/* multiply w by [1/3] */
	3217	t = (w * ((mp_word)b)) >> ((mp_word)DIGIT_BIT);
	3218
	3219	/* now subtract 3 * [w/3] from w, to get the remainder */
	3220	w -= t+t+t;
	3221
	3222	/* fixup the remainder as required since
	3223	* the optimization is not exact.
	3224	*/
	3225	while (w >= 3) {
	3226	t += 1;
	3227	w -= 3;
	3228	}
	3229	} else {
	3230	t = 0;
	3231	}
	3232	q.dp[ix] = (mp_digit)t;
	3233	}
	3234
	3235	/* [optional] store the remainder */
	3236	if (d != NULL) {
	3237	*d = (mp_digit)w;
	3238	}
	3239
	3240	/* [optional] store the quotient */
	3241	if (c != NULL) {
	3242	mp_clamp(&q);
	3243	mp_exch(&q, c);
	3244	}
	3245	mp_clear(&q);
	3246
	3247	return res;
	3248	}
	3249
	3250
	3251	/* init an mp_init for a given size */
	3252	int mp_init_size (mp_int * a, int size)
	3253	{
	3254	int x;
	3255
	3256	/* pad size so there are always extra digits */
	3257	size += (MP_PREC * 2) - (size % MP_PREC);
	3258
	3259	/* alloc mem */
	3260	a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * size, NULL,
	3261	DYNAMIC_TYPE_BIGINT);
	3262	if (a->dp == NULL) {
	3263	return MP_MEM;
	3264	}
	3265
	3266	/* set the members */
	3267	a->used = 0;
	3268	a->alloc = size;
	3269	a->sign = MP_ZPOS;
	3270	#ifdef HAVE_WOLF_BIGINT
	3271	wc_bigint_init(&a->raw);
	3272	#endif
	3273
	3274	/* zero the digits */
	3275	for (x = 0; x < size; x++) {
	3276	a->dp[x] = 0;
	3277	}
	3278
	3279	return MP_OKAY;
	3280	}
	3281
	3282
	3283	/* the jist of squaring...
	3284	* you do like mult except the offset of the tmpx [one that
	3285	* starts closer to zero] can't equal the offset of tmpy.
	3286	* So basically you set up iy like before then you min it with
	3287	* (ty-tx) so that it never happens. You double all those
	3288	* you add in the inner loop
	3289
	3290	After that loop you do the squares and add them in.
	3291	*/
	3292
	3293	int fast_s_mp_sqr (mp_int * a, mp_int * b)
	3294	{
	3295	int olduse, res, pa, ix, iz;
	3296	#ifdef WOLFSSL_SMALL_STACK
	3297	mp_digit* W; /* uses dynamic memory and slower */
	3298	#else
	3299	mp_digit W[MP_WARRAY];
	3300	#endif
	3301	mp_digit *tmpx;
	3302	mp_word W1;
	3303
	3304	/* grow the destination as required */
	3305	pa = a->used + a->used;
	3306	if (b->alloc < pa) {
	3307	if ((res = mp_grow (b, pa)) != MP_OKAY) {
	3308	return res;
	3309	}
	3310	}
	3311
	3312	if (pa > (int)MP_WARRAY)
	3313	return MP_RANGE; /* TAO range check */
	3314
	3315	#ifdef WOLFSSL_SMALL_STACK
	3316	W = (mp_digit)XMALLOC(sizeof(mp_digit) MP_WARRAY, NULL, DYNAMIC_TYPE_BIGINT);
	3317	if (W == NULL)
	3318	return MP_MEM;
	3319	#endif
	3320
	3321	/* number of output digits to produce */
	3322	W1 = 0;
	3323	for (ix = 0; ix < pa; ix++) {
	3324	int tx, ty, iy;
	3325	mp_word _W;
	3326	mp_digit *tmpy;
	3327
	3328	/* clear counter */
	3329	_W = 0;
	3330
	3331	/* get offsets into the two bignums */
	3332	ty = MIN(a->used-1, ix);
	3333	tx = ix - ty;
	3334
	3335	/* setup temp aliases */
	3336	tmpx = a->dp + tx;
	3337	tmpy = a->dp + ty;
	3338
	3339	/* this is the number of times the loop will iterate, essentially
	3340	while (tx++ < a->used && ty-- >= 0) { ... }
	3341	*/
	3342	iy = MIN(a->used-tx, ty+1);
	3343
	3344	/* now for squaring tx can never equal ty
	3345	* we halve the distance since they approach at a rate of 2x
	3346	* and we have to round because odd cases need to be executed
	3347	*/
	3348	iy = MIN(iy, (ty-tx+1)>>1);
	3349
	3350	/* execute loop */
	3351	for (iz = 0; iz < iy; iz++) {
	3352	_W += ((mp_word)tmpx++)((mp_word)*tmpy--);
	3353	}
	3354
	3355	/* double the inner product and add carry */
	3356	_W = _W + _W + W1;
	3357
	3358	/* even columns have the square term in them */
	3359	if ((ix&1) == 0) {
	3360	_W += ((mp_word)a->dp[ix>>1])*((mp_word)a->dp[ix>>1]);
	3361	}
	3362
	3363	/* store it */
	3364	W[ix] = (mp_digit)(_W & MP_MASK);
	3365
	3366	/* make next carry */
	3367	W1 = _W >> ((mp_word)DIGIT_BIT);
	3368	}
	3369
	3370	/* setup dest */
	3371	olduse = b->used;
	3372	b->used = a->used+a->used;
	3373
	3374	{
	3375	mp_digit *tmpb;
	3376	tmpb = b->dp;
	3377	for (ix = 0; ix < pa; ix++) {
	3378	*tmpb++ = (mp_digit)(W[ix] & MP_MASK);
	3379	}
	3380
	3381	/* clear unused digits [that existed in the old copy of c] */
	3382	for (; ix < olduse; ix++) {
	3383	*tmpb++ = 0;
	3384	}
	3385	}
	3386	mp_clamp (b);
	3387
	3388	#ifdef WOLFSSL_SMALL_STACK
	3389	XFREE(W, NULL, DYNAMIC_TYPE_BIGINT);
	3390	#endif
	3391
	3392	return MP_OKAY;
	3393	}
	3394
	3395
	3396	/* Fast (comba) multiplier
	3397	*
	3398	* This is the fast column-array [comba] multiplier. It is
	3399	* designed to compute the columns of the product first
	3400	* then handle the carries afterwards. This has the effect
	3401	* of making the nested loops that compute the columns very
	3402	* simple and schedulable on super-scalar processors.
	3403	*
	3404	* This has been modified to produce a variable number of
	3405	* digits of output so if say only a half-product is required
	3406	* you don't have to compute the upper half (a feature
	3407	* required for fast Barrett reduction).
	3408	*
	3409	* Based on Algorithm 14.12 on pp.595 of HAC.
	3410	*
	3411	*/
	3412	int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
	3413	{
	3414	int olduse, res, pa, ix, iz;
	3415	#ifdef WOLFSSL_SMALL_STACK
	3416	mp_digit* W; /* uses dynamic memory and slower */
	3417	#else
	3418	mp_digit W[MP_WARRAY];
	3419	#endif
	3420	mp_word _W;
	3421
	3422	/* grow the destination as required */
	3423	if (c->alloc < digs) {
	3424	if ((res = mp_grow (c, digs)) != MP_OKAY) {
	3425	return res;
	3426	}
	3427	}
	3428
	3429	/* number of output digits to produce */
	3430	pa = MIN(digs, a->used + b->used);
	3431	if (pa > (int)MP_WARRAY)
	3432	return MP_RANGE; /* TAO range check */
	3433
	3434	#ifdef WOLFSSL_SMALL_STACK
	3435	W = (mp_digit)XMALLOC(sizeof(mp_digit) MP_WARRAY, NULL, DYNAMIC_TYPE_BIGINT);
	3436	if (W == NULL)
	3437	return MP_MEM;
	3438	#endif
	3439
	3440	/* clear the carry */
	3441	_W = 0;
	3442	for (ix = 0; ix < pa; ix++) {
	3443	int tx, ty;
	3444	int iy;
	3445	mp_digit tmpx, tmpy;
	3446
	3447	/* get offsets into the two bignums */
	3448	ty = MIN(b->used-1, ix);
	3449	tx = ix - ty;
	3450
	3451	/* setup temp aliases */
	3452	tmpx = a->dp + tx;
	3453	tmpy = b->dp + ty;
	3454
	3455	/* this is the number of times the loop will iterate, essentially
	3456	while (tx++ < a->used && ty-- >= 0) { ... }
	3457	*/
	3458	iy = MIN(a->used-tx, ty+1);
	3459
	3460	/* execute loop */
	3461	for (iz = 0; iz < iy; ++iz) {
	3462	_W += ((mp_word)tmpx++)((mp_word)*tmpy--);
	3463
	3464	}
	3465
	3466	/* store term */
	3467	W[ix] = (mp_digit)(((mp_digit)_W) & MP_MASK);
	3468
	3469	/* make next carry */
	3470	_W = _W >> ((mp_word)DIGIT_BIT);
	3471	}
	3472
	3473	/* setup dest */
	3474	olduse = c->used;
	3475	c->used = pa;
	3476
	3477	{
	3478	mp_digit *tmpc;
	3479	tmpc = c->dp;
	3480	for (ix = 0; ix < pa; ix++) { /* JRB, +1 could read uninitialized data */
	3481	/* now extract the previous digit [below the carry] */
	3482	*tmpc++ = W[ix];
	3483	}
	3484
	3485	/* clear unused digits [that existed in the old copy of c] */
	3486	for (; ix < olduse; ix++) {
	3487	*tmpc++ = 0;
	3488	}
	3489	}
	3490	mp_clamp (c);
	3491
	3492	#ifdef WOLFSSL_SMALL_STACK
	3493	XFREE(W, NULL, DYNAMIC_TYPE_BIGINT);
	3494	#endif
	3495
	3496	return MP_OKAY;
	3497	}
	3498
	3499
	3500	/* low level squaring, b = aa, HAC pp.596-597, Algorithm 14.16 /
	3501	int s_mp_sqr (mp_int * a, mp_int * b)
	3502	{
	3503	mp_int t;
	3504	int res, ix, iy, pa;
	3505	mp_word r;
	3506	mp_digit u, tmpx, *tmpt;
	3507
	3508	pa = a->used;
	3509	if ((res = mp_init_size (&t, 2*pa + 1)) != MP_OKAY) {
	3510	return res;
	3511	}
	3512
	3513	/* default used is maximum possible size */
	3514	t.used = 2*pa + 1;
	3515
	3516	for (ix = 0; ix < pa; ix++) {
	3517	/* first calculate the digit at 2ix /
	3518	/* calculate double precision result */
	3519	r = ((mp_word) t.dp[2*ix]) +
	3520	((mp_word)a->dp[ix])*((mp_word)a->dp[ix]);
	3521
	3522	/* store lower part in result */
	3523	t.dp[ix+ix] = (mp_digit) (r & ((mp_word) MP_MASK));
	3524
	3525	/* get the carry */
	3526	u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
	3527
	3528	/* left hand side of A[ix] * A[iy] */
	3529	tmpx = a->dp[ix];
	3530
	3531	/* alias for where to store the results */
	3532	tmpt = t.dp + (2*ix + 1);
	3533
	3534	for (iy = ix + 1; iy < pa; iy++) {
	3535	/* first calculate the product */
	3536	r = ((mp_word)tmpx) * ((mp_word)a->dp[iy]);
	3537
	3538	/* now calculate the double precision result, note we use
	3539	* addition instead of *2 since it's easier to optimize
	3540	*/
	3541	r = ((mp_word) *tmpt) + r + r + ((mp_word) u);
	3542
	3543	/* store lower part */
	3544	*tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
	3545
	3546	/* get carry */
	3547	u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
	3548	}
	3549	/* propagate upwards */
	3550	while (u != ((mp_digit) 0)) {
	3551	r = ((mp_word) *tmpt) + ((mp_word) u);
	3552	*tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
	3553	u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
	3554	}
	3555	}
	3556
	3557	mp_clamp (&t);
	3558	mp_exch (&t, b);
	3559	mp_clear (&t);
	3560	return MP_OKAY;
	3561	}
	3562
	3563
	3564	/* multiplies \|a\| * \|b\| and only computes up to digs digits of result
	3565	* HAC pp. 595, Algorithm 14.12 Modified so you can control how
	3566	* many digits of output are created.
	3567	*/
	3568	int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
	3569	{
	3570	mp_int t;
	3571	int res, pa, pb, ix, iy;
	3572	mp_digit u;
	3573	mp_word r;
	3574	mp_digit tmpx, tmpt, tmpy;
	3575
	3576	/* can we use the fast multiplier? */
	3577	if ((digs < (int)MP_WARRAY) &&
	3578	MIN (a->used, b->used) <
[464]	3579	(1L << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
[457]	3580	return fast_s_mp_mul_digs (a, b, c, digs);
	3581	}
	3582
	3583	if ((res = mp_init_size (&t, digs)) != MP_OKAY) {
	3584	return res;
	3585	}
	3586	t.used = digs;
	3587
	3588	/* compute the digits of the product directly */
	3589	pa = a->used;
	3590	for (ix = 0; ix < pa; ix++) {
	3591	/* set the carry to zero */
	3592	u = 0;
	3593
	3594	/* limit ourselves to making digs digits of output */
	3595	pb = MIN (b->used, digs - ix);
	3596
	3597	/* setup some aliases */
	3598	/* copy of the digit from a used within the nested loop */
	3599	tmpx = a->dp[ix];
	3600
	3601	/* an alias for the destination shifted ix places */
	3602	tmpt = t.dp + ix;
	3603
	3604	/* an alias for the digits of b */
	3605	tmpy = b->dp;
	3606
	3607	/* compute the columns of the output and propagate the carry */
	3608	for (iy = 0; iy < pb; iy++) {
	3609	/* compute the column as a mp_word */
	3610	r = ((mp_word)*tmpt) +
	3611	((mp_word)tmpx) * ((mp_word)*tmpy++) +
	3612	((mp_word) u);
	3613
	3614	/* the new column is the lower part of the result */
	3615	*tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
	3616
	3617	/* get the carry word from the result */
	3618	u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
	3619	}
	3620	/* set carry if it is placed below digs */
	3621	if (ix + iy < digs) {
	3622	*tmpt = u;
	3623	}
	3624	}
	3625
	3626	mp_clamp (&t);
	3627	mp_exch (&t, c);
	3628
	3629	mp_clear (&t);
	3630	return MP_OKAY;
	3631	}
	3632
	3633
	3634	/*
	3635	* shifts with subtractions when the result is greater than b.
	3636	*
	3637	* The method is slightly modified to shift B unconditionally up to just under
	3638	* the leading bit of b. This saves a lot of multiple precision shifting.
	3639	*/
	3640	int mp_montgomery_calc_normalization (mp_int * a, mp_int * b)
	3641	{
	3642	int x, bits, res;
	3643
	3644	/* how many bits of last digit does b use */
	3645	bits = mp_count_bits (b) % DIGIT_BIT;
	3646
	3647	if (b->used > 1) {
	3648	if ((res = mp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1))
	3649	!= MP_OKAY) {
	3650	return res;
	3651	}
	3652	} else {
	3653	if ((res = mp_set(a, 1)) != MP_OKAY) {
	3654	return res;
	3655	}
	3656	bits = 1;
	3657	}
	3658
	3659	/* now compute C = A * B mod b */
	3660	for (x = bits - 1; x < (int)DIGIT_BIT; x++) {
	3661	if ((res = mp_mul_2 (a, a)) != MP_OKAY) {
	3662	return res;
	3663	}
	3664	if (mp_cmp_mag (a, b) != MP_LT) {
	3665	if ((res = s_mp_sub (a, b, a)) != MP_OKAY) {
	3666	return res;
	3667	}
	3668	}
	3669	}
	3670
	3671	return MP_OKAY;
	3672	}
	3673
	3674
	3675	#ifdef MP_LOW_MEM
	3676	#define TAB_SIZE 32
	3677	#else
	3678	#define TAB_SIZE 256
	3679	#endif
	3680
	3681	int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
	3682	{
	3683	mp_int M[TAB_SIZE], res, mu;
	3684	mp_digit buf;
	3685	int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
	3686	int (redux)(mp_int,mp_int,mp_int);
	3687
	3688	/* find window size */
	3689	x = mp_count_bits (X);
	3690	if (x <= 7) {
	3691	winsize = 2;
	3692	} else if (x <= 36) {
	3693	winsize = 3;
	3694	} else if (x <= 140) {
	3695	winsize = 4;
	3696	} else if (x <= 450) {
	3697	winsize = 5;
	3698	} else if (x <= 1303) {
	3699	winsize = 6;
	3700	} else if (x <= 3529) {
	3701	winsize = 7;
	3702	} else {
	3703	winsize = 8;
	3704	}
	3705
	3706	#ifdef MP_LOW_MEM
	3707	if (winsize > 5) {
	3708	winsize = 5;
	3709	}
	3710	#endif
	3711
	3712	/* init M array */
	3713	/* init first cell */
	3714	if ((err = mp_init(&M[1])) != MP_OKAY) {
	3715	return err;
	3716	}
	3717
	3718	/* now init the second half of the array */
	3719	for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
	3720	if ((err = mp_init(&M[x])) != MP_OKAY) {
	3721	for (y = 1<<(winsize-1); y < x; y++) {
	3722	mp_clear (&M[y]);
	3723	}
	3724	mp_clear(&M[1]);
	3725	return err;
	3726	}
	3727	}
	3728
	3729	/* create mu, used for Barrett reduction */
	3730	if ((err = mp_init (&mu)) != MP_OKAY) {
	3731	goto LBL_M;
	3732	}
	3733
	3734	if (redmode == 0) {
	3735	if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) {
	3736	goto LBL_MU;
	3737	}
	3738	redux = mp_reduce;
	3739	} else {
	3740	if ((err = mp_reduce_2k_setup_l (P, &mu)) != MP_OKAY) {
	3741	goto LBL_MU;
	3742	}
	3743	redux = mp_reduce_2k_l;
	3744	}
	3745
	3746	/* create M table
	3747	*
	3748	* The M table contains powers of the base,
	3749	* e.g. M[x] = G**x mod P
	3750	*
	3751	* The first half of the table is not
	3752	* computed though accept for M[0] and M[1]
	3753	*/
	3754	if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) {
	3755	goto LBL_MU;
	3756	}
	3757
	3758	/* compute the value at M[1<<(winsize-1)] by squaring
	3759	* M[1] (winsize-1) times
	3760	*/
	3761	if ((err = mp_copy (&M[1], &M[(mp_digit)(1 << (winsize - 1))])) != MP_OKAY) {
	3762	goto LBL_MU;
	3763	}
	3764
	3765	for (x = 0; x < (winsize - 1); x++) {
	3766	/* square it */
	3767	if ((err = mp_sqr (&M[(mp_digit)(1 << (winsize - 1))],
	3768	&M[(mp_digit)(1 << (winsize - 1))])) != MP_OKAY) {
	3769	goto LBL_MU;
	3770	}
	3771
	3772	/* reduce modulo P */
	3773	if ((err = redux (&M[(mp_digit)(1 << (winsize - 1))], P, &mu)) != MP_OKAY) {
	3774	goto LBL_MU;
	3775	}
	3776	}
	3777
	3778	/* create upper table, that is M[x] = M[x-1] * M[1] (mod P)
	3779	* for x = (2(winsize - 1) + 1) to (2winsize - 1)
	3780	*/
	3781	for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
	3782	if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
	3783	goto LBL_MU;
	3784	}
	3785	if ((err = redux (&M[x], P, &mu)) != MP_OKAY) {
	3786	goto LBL_MU;
	3787	}
	3788	}
	3789
	3790	/* setup result */
	3791	if ((err = mp_init (&res)) != MP_OKAY) {
	3792	goto LBL_MU;
	3793	}
	3794	if ((err = mp_set (&res, 1)) != MP_OKAY) {
	3795	goto LBL_MU;
	3796	}
	3797
	3798	/* set initial mode and bit cnt */
	3799	mode = 0;
	3800	bitcnt = 1;
	3801	buf = 0;
	3802	digidx = X->used - 1;
	3803	bitcpy = 0;
	3804	bitbuf = 0;
	3805
	3806	for (;;) {
	3807	/* grab next digit as required */
	3808	if (--bitcnt == 0) {
	3809	/* if digidx == -1 we are out of digits */
	3810	if (digidx == -1) {
	3811	break;
	3812	}
	3813	/* read next digit and reset the bitcnt */
	3814	buf = X->dp[digidx--];
	3815	bitcnt = (int) DIGIT_BIT;
	3816	}
	3817
	3818	/* grab the next msb from the exponent */
	3819	y = (int)(buf >> (mp_digit)(DIGIT_BIT - 1)) & 1;
	3820	buf <<= (mp_digit)1;
	3821
	3822	/* if the bit is zero and mode == 0 then we ignore it
	3823	* These represent the leading zero bits before the first 1 bit
	3824	* in the exponent. Technically this opt is not required but it
	3825	* does lower the # of trivial squaring/reductions used
	3826	*/
	3827	if (mode == 0 && y == 0) {
	3828	continue;
	3829	}
	3830
	3831	/* if the bit is zero and mode == 1 then we square */
	3832	if (mode == 1 && y == 0) {
	3833	if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
	3834	goto LBL_RES;
	3835	}
	3836	if ((err = redux (&res, P, &mu)) != MP_OKAY) {
	3837	goto LBL_RES;
	3838	}
	3839	continue;
	3840	}
	3841
	3842	/* else we add it to the window */
	3843	bitbuf \|= (y << (winsize - ++bitcpy));
	3844	mode = 2;
	3845
	3846	if (bitcpy == winsize) {
	3847	/* ok window is filled so square as required and multiply */
	3848	/* square first */
	3849	for (x = 0; x < winsize; x++) {
	3850	if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
	3851	goto LBL_RES;
	3852	}
	3853	if ((err = redux (&res, P, &mu)) != MP_OKAY) {
	3854	goto LBL_RES;
	3855	}
	3856	}
	3857
	3858	/* then multiply */
	3859	if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
	3860	goto LBL_RES;
	3861	}
	3862	if ((err = redux (&res, P, &mu)) != MP_OKAY) {
	3863	goto LBL_RES;
	3864	}
	3865
	3866	/* empty window and reset */
	3867	bitcpy = 0;
	3868	bitbuf = 0;
	3869	mode = 1;
	3870	}
	3871	}
	3872
	3873	/* if bits remain then square/multiply */
	3874	if (mode == 2 && bitcpy > 0) {
	3875	/* square then multiply if the bit is set */
	3876	for (x = 0; x < bitcpy; x++) {
	3877	if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
	3878	goto LBL_RES;
	3879	}
	3880	if ((err = redux (&res, P, &mu)) != MP_OKAY) {
	3881	goto LBL_RES;
	3882	}
	3883
	3884	bitbuf <<= 1;
	3885	if ((bitbuf & (1 << winsize)) != 0) {
	3886	/* then multiply */
	3887	if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
	3888	goto LBL_RES;
	3889	}
	3890	if ((err = redux (&res, P, &mu)) != MP_OKAY) {
	3891	goto LBL_RES;
	3892	}
	3893	}
	3894	}
	3895	}
	3896
	3897	mp_exch (&res, Y);
	3898	err = MP_OKAY;
	3899	LBL_RES:mp_clear (&res);
	3900	LBL_MU:mp_clear (&mu);
	3901	LBL_M:
	3902	mp_clear(&M[1]);
	3903	for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
	3904	mp_clear (&M[x]);
	3905	}
	3906	return err;
	3907	}
	3908
	3909
	3910	/* pre-calculate the value required for Barrett reduction
	3911	* For a given modulus "b" it calculates the value required in "a"
	3912	*/
	3913	int mp_reduce_setup (mp_int * a, mp_int * b)
	3914	{
	3915	int res;
	3916
	3917	if ((res = mp_2expt (a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) {
	3918	return res;
	3919	}
	3920	return mp_div (a, b, a, NULL);
	3921	}
	3922
	3923
	3924	/* reduces x mod m, assumes 0 < x < m**2, mu is
	3925	* precomputed via mp_reduce_setup.
	3926	* From HAC pp.604 Algorithm 14.42
	3927	*/
	3928	int mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
	3929	{
	3930	mp_int q;
	3931	int res, um = m->used;
	3932
	3933	/* q = x */
	3934	if ((res = mp_init_copy (&q, x)) != MP_OKAY) {
	3935	return res;
	3936	}
	3937
	3938	/* q1 = x / b*(k-1) /
	3939	mp_rshd (&q, um - 1);
	3940
	3941	/* according to HAC this optimization is ok */
	3942	if (((mp_word) um) > (((mp_digit)1) << (DIGIT_BIT - 1))) {
	3943	if ((res = mp_mul (&q, mu, &q)) != MP_OKAY) {
	3944	goto CLEANUP;
	3945	}
	3946	} else {
	3947	#ifdef BN_S_MP_MUL_HIGH_DIGS_C
	3948	if ((res = s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
	3949	goto CLEANUP;
	3950	}
	3951	#elif defined(BN_FAST_S_MP_MUL_HIGH_DIGS_C)
	3952	if ((res = fast_s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
	3953	goto CLEANUP;
	3954	}
	3955	#else
	3956	{
	3957	res = MP_VAL;
	3958	goto CLEANUP;
	3959	}
	3960	#endif
	3961	}
	3962
	3963	/* q3 = q2 / b*(k+1) /
	3964	mp_rshd (&q, um + 1);
	3965
	3966	/* x = x mod b*(k+1), quick (no division) /
	3967	if ((res = mp_mod_2d (x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) {
	3968	goto CLEANUP;
	3969	}
	3970
	3971	/* q = q * m mod b*(k+1), quick (no division) /
	3972	if ((res = s_mp_mul_digs (&q, m, &q, um + 1)) != MP_OKAY) {
	3973	goto CLEANUP;
	3974	}
	3975
	3976	/* x = x - q */
	3977	if ((res = mp_sub (x, &q, x)) != MP_OKAY) {
	3978	goto CLEANUP;
	3979	}
	3980
	3981	/* If x < 0, add b*(k+1) to it /
	3982	if (mp_cmp_d (x, 0) == MP_LT) {
	3983	if ((res = mp_set (&q, 1)) != MP_OKAY)
	3984	goto CLEANUP;
	3985	if ((res = mp_lshd (&q, um + 1)) != MP_OKAY)
	3986	goto CLEANUP;
	3987	if ((res = mp_add (x, &q, x)) != MP_OKAY)
	3988	goto CLEANUP;
	3989	}
	3990
	3991	/* Back off if it's too big */
	3992	while (mp_cmp (x, m) != MP_LT) {
	3993	if ((res = s_mp_sub (x, m, x)) != MP_OKAY) {
	3994	goto CLEANUP;
	3995	}
	3996	}
	3997
	3998	CLEANUP:
	3999	mp_clear (&q);
	4000
	4001	return res;
	4002	}
	4003
	4004
	4005	/* reduces a modulo n where n is of the form 2**p - d
	4006	This differs from reduce_2k since "d" can be larger
	4007	than a single digit.
	4008	*/
	4009	int mp_reduce_2k_l(mp_int a, mp_int n, mp_int *d)
	4010	{
	4011	mp_int q;
	4012	int p, res;
	4013
	4014	if ((res = mp_init(&q)) != MP_OKAY) {
	4015	return res;
	4016	}
	4017
	4018	p = mp_count_bits(n);
	4019	top:
	4020	/* q = a/2p, a = a mod 2p */
	4021	if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
	4022	goto ERR;
	4023	}
	4024
	4025	/* q = q * d */
	4026	if ((res = mp_mul(&q, d, &q)) != MP_OKAY) {
	4027	goto ERR;
	4028	}
	4029
	4030	/* a = a + q */
	4031	if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
	4032	goto ERR;
	4033	}
	4034
	4035	if (mp_cmp_mag(a, n) != MP_LT) {
	4036	if ((res = s_mp_sub(a, n, a)) != MP_OKAY) {
	4037	goto ERR;
	4038	}
	4039	goto top;
	4040	}
	4041
	4042	ERR:
	4043	mp_clear(&q);
	4044	return res;
	4045	}
	4046
	4047
	4048	/* determines the setup value */
	4049	int mp_reduce_2k_setup_l(mp_int a, mp_int d)
	4050	{
	4051	int res;
	4052	mp_int tmp;
	4053
	4054	if ((res = mp_init(&tmp)) != MP_OKAY) {
	4055	return res;
	4056	}
	4057
	4058	if ((res = mp_2expt(&tmp, mp_count_bits(a))) != MP_OKAY) {
	4059	goto ERR;
	4060	}
	4061
	4062	if ((res = s_mp_sub(&tmp, a, d)) != MP_OKAY) {
	4063	goto ERR;
	4064	}
	4065
	4066	ERR:
	4067	mp_clear(&tmp);
	4068	return res;
	4069	}
	4070
	4071
	4072	/* multiplies \|a\| * \|b\| and does not compute the lower digs digits
	4073	* [meant to get the higher part of the product]
	4074	*/
	4075	int s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
	4076	{
	4077	mp_int t;
	4078	int res, pa, pb, ix, iy;
	4079	mp_digit u;
	4080	mp_word r;
	4081	mp_digit tmpx, tmpt, tmpy;
	4082
	4083	/* can we use the fast multiplier? */
	4084	#ifdef BN_FAST_S_MP_MUL_HIGH_DIGS_C
	4085	if (((a->used + b->used + 1) < (int)MP_WARRAY)
	4086	&& MIN (a->used, b->used) <
[464]	4087	(1L << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
[457]	4088	return fast_s_mp_mul_high_digs (a, b, c, digs);
	4089	}
	4090	#endif
	4091
	4092	if ((res = mp_init_size (&t, a->used + b->used + 1)) != MP_OKAY) {
	4093	return res;
	4094	}
	4095	t.used = a->used + b->used + 1;
	4096
	4097	pa = a->used;
	4098	pb = b->used;
	4099	for (ix = 0; ix < pa && a->dp; ix++) {
	4100	/* clear the carry */
	4101	u = 0;
	4102
	4103	/* left hand side of A[ix] * B[iy] */
	4104	tmpx = a->dp[ix];
	4105
	4106	/* alias to the address of where the digits will be stored */
	4107	tmpt = &(t.dp[digs]);
	4108
	4109	/* alias for where to read the right hand side from */
	4110	tmpy = b->dp + (digs - ix);
	4111
	4112	for (iy = digs - ix; iy < pb; iy++) {
	4113	/* calculate the double precision result */
	4114	r = ((mp_word)*tmpt) +
	4115	((mp_word)tmpx) * ((mp_word)*tmpy++) +
	4116	((mp_word) u);
	4117
	4118	/* get the lower part */
	4119	*tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
	4120
	4121	/* carry the carry */
	4122	u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
	4123	}
	4124	*tmpt = u;
	4125	}
	4126	mp_clamp (&t);
	4127	mp_exch (&t, c);
	4128	mp_clear (&t);
	4129	return MP_OKAY;
	4130	}
	4131
	4132
	4133	/* this is a modified version of fast_s_mul_digs that only produces
	4134	* output digits above digs. See the comments for fast_s_mul_digs
	4135	* to see how it works.
	4136	*
	4137	* This is used in the Barrett reduction since for one of the multiplications
	4138	* only the higher digits were needed. This essentially halves the work.
	4139	*
	4140	* Based on Algorithm 14.12 on pp.595 of HAC.
	4141	*/
	4142	int fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
	4143	{
	4144	int olduse, res, pa, ix, iz;
	4145	#ifdef WOLFSSL_SMALL_STACK
	4146	mp_digit* W; /* uses dynamic memory and slower */
	4147	#else
	4148	mp_digit W[MP_WARRAY];
	4149	#endif
	4150	mp_word _W;
	4151
	4152	if (a->dp == NULL) { /* JRB, avoid reading uninitialized values */
	4153	return MP_VAL;
	4154	}
	4155
	4156	/* grow the destination as required */
	4157	pa = a->used + b->used;
	4158	if (c->alloc < pa) {
	4159	if ((res = mp_grow (c, pa)) != MP_OKAY) {
	4160	return res;
	4161	}
	4162	}
	4163
	4164	if (pa > (int)MP_WARRAY)
	4165	return MP_RANGE; /* TAO range check */
	4166
	4167	#ifdef WOLFSSL_SMALL_STACK
	4168	W = (mp_digit)XMALLOC(sizeof(mp_digit) MP_WARRAY, NULL, DYNAMIC_TYPE_BIGINT);
	4169	if (W == NULL)
	4170	return MP_MEM;
	4171	#endif
	4172
	4173	/* number of output digits to produce */
	4174	pa = a->used + b->used;
	4175	_W = 0;
	4176	for (ix = digs; ix < pa; ix++) { /* JRB, have a->dp check at top of function*/
	4177	int tx, ty, iy;
	4178	mp_digit tmpx, tmpy;
	4179
	4180	/* get offsets into the two bignums */
	4181	ty = MIN(b->used-1, ix);
	4182	tx = ix - ty;
	4183
	4184	/* setup temp aliases */
	4185	tmpx = a->dp + tx;
	4186	tmpy = b->dp + ty;
	4187
	4188	/* this is the number of times the loop will iterate, essentially its
	4189	while (tx++ < a->used && ty-- >= 0) { ... }
	4190	*/
	4191	iy = MIN(a->used-tx, ty+1);
	4192
	4193	/* execute loop */
	4194	for (iz = 0; iz < iy; iz++) {
	4195	_W += ((mp_word)tmpx++)((mp_word)*tmpy--);
	4196	}
	4197
	4198	/* store term */
	4199	W[ix] = (mp_digit)(((mp_digit)_W) & MP_MASK);
	4200
	4201	/* make next carry */
	4202	_W = _W >> ((mp_word)DIGIT_BIT);
	4203	}
	4204
	4205	/* setup dest */
	4206	olduse = c->used;
	4207	c->used = pa;
	4208
	4209	{
	4210	mp_digit *tmpc;
	4211
	4212	tmpc = c->dp + digs;
	4213	for (ix = digs; ix < pa; ix++) { /* TAO, <= could potentially overwrite */
	4214	/* now extract the previous digit [below the carry] */
	4215	*tmpc++ = W[ix];
	4216	}
	4217
	4218	/* clear unused digits [that existed in the old copy of c] */
	4219	for (; ix < olduse; ix++) {
	4220	*tmpc++ = 0;
	4221	}
	4222	}
	4223	mp_clamp (c);
	4224
	4225	#ifdef WOLFSSL_SMALL_STACK
	4226	XFREE(W, NULL, DYNAMIC_TYPE_BIGINT);
	4227	#endif
	4228
	4229	return MP_OKAY;
	4230	}
	4231
	4232
	4233	#ifndef MP_SET_CHUNK_BITS
	4234	#define MP_SET_CHUNK_BITS 4
	4235	#endif
	4236	int mp_set_int (mp_int * a, unsigned long b)
	4237	{
	4238	int x, res;
	4239
	4240	/* use direct mp_set if b is less than mp_digit max */
	4241	if (b < MP_DIGIT_MAX) {
	4242	return mp_set (a, (mp_digit)b);
	4243	}
	4244
	4245	mp_zero (a);
	4246
	4247	/* set chunk bits at a time */
	4248	for (x = 0; x < (int)(sizeof(b) * 8) / MP_SET_CHUNK_BITS; x++) {
	4249	/* shift the number up chunk bits */
	4250	if ((res = mp_mul_2d (a, MP_SET_CHUNK_BITS, a)) != MP_OKAY) {
	4251	return res;
	4252	}
	4253
	4254	/* OR in the top bits of the source */
	4255	a->dp[0] \|= (b >> ((sizeof(b) * 8) - MP_SET_CHUNK_BITS)) &
	4256	((1 << MP_SET_CHUNK_BITS) - 1);
	4257
	4258	/* shift the source up to the next chunk bits */
	4259	b <<= MP_SET_CHUNK_BITS;
	4260
	4261	/* ensure that digits are not clamped off */
	4262	a->used += 1;
	4263	}
	4264	mp_clamp (a);
	4265	return MP_OKAY;
	4266	}
	4267
	4268
	4269	#if defined(WOLFSSL_KEY_GEN) \|\| defined(HAVE_ECC) \|\| !defined(NO_RSA) \|\| \
	4270	!defined(NO_DSA) \| !defined(NO_DH)
	4271
	4272	/* c = a * a (mod b) */
	4273	int mp_sqrmod (mp_int * a, mp_int * b, mp_int * c)
	4274	{
	4275	int res;
	4276	mp_int t;
	4277
	4278	if ((res = mp_init (&t)) != MP_OKAY) {
	4279	return res;
	4280	}
	4281
	4282	if ((res = mp_sqr (a, &t)) != MP_OKAY) {
	4283	mp_clear (&t);
	4284	return res;
	4285	}
	4286	res = mp_mod (&t, b, c);
	4287	mp_clear (&t);
	4288	return res;
	4289	}
	4290
	4291	#endif
	4292
	4293
	4294	#if defined(HAVE_ECC) \|\| !defined(NO_PWDBASED) \|\| defined(WOLFSSL_SNIFFER) \|\| \
	4295	defined(WOLFSSL_HAVE_WOLFSCEP) \|\| defined(WOLFSSL_KEY_GEN) \|\| \
	4296	defined(OPENSSL_EXTRA) \|\| defined(WC_RSA_BLINDING) \|\| \
	4297	(!defined(NO_RSA) && !defined(NO_RSA_BOUNDS_CHECK))
	4298
	4299	/* single digit addition */
	4300	int mp_add_d (mp_int* a, mp_digit b, mp_int* c)
	4301	{
	4302	int res, ix, oldused;
	4303	mp_digit tmpa, tmpc, mu;
	4304
	4305	/* grow c as required */
	4306	if (c->alloc < a->used + 1) {
	4307	if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
	4308	return res;
	4309	}
	4310	}
	4311
	4312	/* if a is negative and \|a\| >= b, call c = \|a\| - b */
	4313	if (a->sign == MP_NEG && (a->used > 1 \|\| a->dp[0] >= b)) {
	4314	/* temporarily fix sign of a */
	4315	a->sign = MP_ZPOS;
	4316
	4317	/* c = \|a\| - b */
	4318	res = mp_sub_d(a, b, c);
	4319
	4320	/* fix sign */
	4321	a->sign = c->sign = MP_NEG;
	4322
	4323	/* clamp */
	4324	mp_clamp(c);
	4325
	4326	return res;
	4327	}
	4328
	4329	/* old number of used digits in c */
	4330	oldused = c->used;
	4331
	4332	/* sign always positive */
	4333	c->sign = MP_ZPOS;
	4334
	4335	/* source alias */
	4336	tmpa = a->dp;
	4337
	4338	/* destination alias */
	4339	tmpc = c->dp;
	4340
	4341	/* if a is positive */
	4342	if (a->sign == MP_ZPOS) {
	4343	/* add digit, after this we're propagating
	4344	* the carry.
	4345	*/
	4346	tmpc = tmpa++ + b;
	4347	mu = *tmpc >> DIGIT_BIT;
	4348	*tmpc++ &= MP_MASK;
	4349
	4350	/* now handle rest of the digits */
	4351	for (ix = 1; ix < a->used; ix++) {
	4352	tmpc = tmpa++ + mu;
	4353	mu = *tmpc >> DIGIT_BIT;
	4354	*tmpc++ &= MP_MASK;
	4355	}
	4356	/* set final carry */
	4357	if (ix < c->alloc) {
	4358	ix++;
	4359	*tmpc++ = mu;
	4360	}
	4361
	4362	/* setup size */
	4363	c->used = a->used + 1;
	4364	} else {
	4365	/* a was negative and \|a\| < b */
	4366	c->used = 1;
	4367
	4368	/* the result is a single digit */
	4369	if (a->used == 1) {
	4370	*tmpc++ = b - a->dp[0];
	4371	} else {
	4372	*tmpc++ = b;
	4373	}
	4374
	4375	/* setup count so the clearing of oldused
	4376	* can fall through correctly
	4377	*/
	4378	ix = 1;
	4379	}
	4380
	4381	/* now zero to oldused */
	4382	while (ix++ < oldused) {
	4383	*tmpc++ = 0;
	4384	}
	4385	mp_clamp(c);
	4386
	4387	return MP_OKAY;
	4388	}
	4389
	4390
	4391	/* single digit subtraction */
	4392	int mp_sub_d (mp_int * a, mp_digit b, mp_int * c)
	4393	{
	4394	mp_digit tmpa, tmpc, mu;
	4395	int res, ix, oldused;
	4396
[464]	4397	if (b > MP_MASK) return MP_VAL;
	4398
[457]	4399	/* grow c as required */
	4400	if (c->alloc < a->used + 1) {
	4401	if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
	4402	return res;
	4403	}
	4404	}
	4405
	4406	/* if a is negative just do an unsigned
	4407	* addition [with fudged signs]
	4408	*/
	4409	if (a->sign == MP_NEG) {
	4410	a->sign = MP_ZPOS;
	4411	res = mp_add_d(a, b, c);
	4412	a->sign = c->sign = MP_NEG;
	4413
	4414	/* clamp */
	4415	mp_clamp(c);
	4416
	4417	return res;
	4418	}
	4419
	4420	/* setup regs */
	4421	oldused = c->used;
	4422	tmpa = a->dp;
	4423	tmpc = c->dp;
	4424
	4425	/* if a <= b simply fix the single digit */
	4426	if ((a->used == 1 && a->dp[0] <= b) \|\| a->used == 0) {
	4427	if (a->used == 1) {
	4428	tmpc++ = b - tmpa;
	4429	} else {
	4430	*tmpc++ = b;
	4431	}
	4432	ix = 1;
	4433
	4434	/* negative/1digit */
	4435	c->sign = MP_NEG;
	4436	c->used = 1;
	4437	} else {
	4438	/* positive/size */
	4439	c->sign = MP_ZPOS;
	4440	c->used = a->used;
	4441
	4442	/* subtract first digit */
	4443	tmpc = tmpa++ - b;
	4444	mu = tmpc >> (sizeof(mp_digit) CHAR_BIT - 1);
	4445	*tmpc++ &= MP_MASK;
	4446
	4447	/* handle rest of the digits */
	4448	for (ix = 1; ix < a->used; ix++) {
	4449	tmpc = tmpa++ - mu;
	4450	mu = tmpc >> (sizeof(mp_digit) CHAR_BIT - 1);
	4451	*tmpc++ &= MP_MASK;
	4452	}
	4453	}
	4454
	4455	/* zero excess digits */
	4456	while (ix++ < oldused) {
	4457	*tmpc++ = 0;
	4458	}
	4459	mp_clamp(c);
	4460	return MP_OKAY;
	4461	}
	4462
	4463	#endif /* defined(HAVE_ECC) \|\| !defined(NO_PWDBASED) */
	4464
	4465
	4466	#if defined(WOLFSSL_KEY_GEN) \|\| defined(HAVE_COMP_KEY) \|\| defined(HAVE_ECC) \|\| \
	4467	defined(DEBUG_WOLFSSL) \|\| !defined(NO_RSA) \|\| !defined(NO_DSA) \|\| \
	4468	!defined(NO_DH)
	4469
	4470	static const int lnz[16] = {
	4471	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
	4472	};
	4473
	4474	/* Counts the number of lsbs which are zero before the first zero bit */
	4475	int mp_cnt_lsb(mp_int *a)
	4476	{
	4477	int x;
	4478	mp_digit q = 0, qq;
	4479
	4480	/* easy out */
	4481	if (mp_iszero(a) == MP_YES) {
	4482	return 0;
	4483	}
	4484
	4485	/* scan lower digits until non-zero */
	4486	for (x = 0; x < a->used && a->dp[x] == 0; x++) {}
	4487	if (a->dp)
	4488	q = a->dp[x];
	4489	x *= DIGIT_BIT;
	4490
	4491	/* now scan this digit until a 1 is found */
	4492	if ((q & 1) == 0) {
	4493	do {
	4494	qq = q & 15;
	4495	x += lnz[qq];
	4496	q >>= 4;
	4497	} while (qq == 0);
	4498	}
	4499	return x;
	4500	}
	4501
	4502
	4503
	4504
	4505	static int s_is_power_of_two(mp_digit b, int *p)
	4506	{
	4507	int x;
	4508
	4509	/* fast return if no power of two */
	4510	if ((b==0) \|\| (b & (b-1))) {
	4511	return 0;
	4512	}
	4513
	4514	for (x = 0; x < DIGIT_BIT; x++) {
	4515	if (b == (((mp_digit)1)<<x)) {
	4516	*p = x;
	4517	return 1;
	4518	}
	4519	}
	4520	return 0;
	4521	}
	4522
	4523	/* single digit division (based on routine from MPI) */
	4524	static int mp_div_d (mp_int * a, mp_digit b, mp_int * c, mp_digit * d)
	4525	{
	4526	mp_int q;
	4527	mp_word w;
	4528	mp_digit t;
	4529	int res = MP_OKAY, ix;
	4530
	4531	/* cannot divide by zero */
	4532	if (b == 0) {
	4533	return MP_VAL;
	4534	}
	4535
	4536	/* quick outs */
	4537	if (b == 1 \|\| mp_iszero(a) == MP_YES) {
	4538	if (d != NULL) {
	4539	*d = 0;
	4540	}
	4541	if (c != NULL) {
	4542	return mp_copy(a, c);
	4543	}
	4544	return MP_OKAY;
	4545	}
	4546
	4547	/* power of two ? */
	4548	if (s_is_power_of_two(b, &ix) == 1) {
	4549	if (d != NULL) {
	4550	*d = a->dp[0] & ((((mp_digit)1)<<ix) - 1);
	4551	}
	4552	if (c != NULL) {
	4553	return mp_div_2d(a, ix, c, NULL);
	4554	}
	4555	return MP_OKAY;
	4556	}
	4557
	4558	#ifdef BN_MP_DIV_3_C
	4559	/* three? */
	4560	if (b == 3) {
	4561	return mp_div_3(a, c, d);
	4562	}
	4563	#endif
	4564
	4565	/* no easy answer [c'est la vie]. Just division */
	4566	if (c != NULL) {
	4567	if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
	4568	return res;
	4569	}
	4570
	4571	q.used = a->used;
	4572	q.sign = a->sign;
	4573	}
	4574	else {
	4575	if ((res = mp_init(&q)) != MP_OKAY) {
	4576	return res;
	4577	}
	4578	}
	4579
	4580
	4581	w = 0;
	4582	for (ix = a->used - 1; ix >= 0; ix--) {
	4583	w = (w << ((mp_word)DIGIT_BIT)) \| ((mp_word)a->dp[ix]);
	4584
	4585	if (w >= b) {
[464]	4586	#ifdef WOLFSSL_LINUXKM
	4587	t = (mp_digit)w;
	4588	/* Linux kernel macro for in-place 64 bit integer division. */
	4589	do_div(t, b);
	4590	#else
[457]	4591	t = (mp_digit)(w / b);
[464]	4592	#endif
[457]	4593	w -= ((mp_word)t) * ((mp_word)b);
	4594	} else {
	4595	t = 0;
	4596	}
	4597	if (c != NULL)
	4598	q.dp[ix] = (mp_digit)t;
	4599	}
	4600
	4601	if (d != NULL) {
	4602	*d = (mp_digit)w;
	4603	}
	4604
	4605	if (c != NULL) {
	4606	mp_clamp(&q);
	4607	mp_exch(&q, c);
	4608	}
	4609	mp_clear(&q);
	4610
	4611	return res;
	4612	}
	4613
	4614
	4615	int mp_mod_d (mp_int * a, mp_digit b, mp_digit * c)
	4616	{
	4617	return mp_div_d(a, b, NULL, c);
	4618	}
	4619
	4620	#endif /* WOLFSSL_KEY_GEN \|\| HAVE_COMP_KEY \|\| HAVE_ECC \|\| DEBUG_WOLFSSL */
	4621
	4622	#if defined(WOLFSSL_KEY_GEN) \|\| !defined(NO_DH) \|\| !defined(NO_DSA) \|\| !defined(NO_RSA)
	4623
[464]	4624	const FLASH_QUALIFIER mp_digit ltm_prime_tab[PRIME_SIZE] = {
[457]	4625	0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
	4626	0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
	4627	0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
	4628	0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F,
	4629	#ifndef MP_8BIT
	4630	0x0083,
	4631	0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
	4632	0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
	4633	0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
	4634	0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
	4635
	4636	0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
	4637	0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
	4638	0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
	4639	0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
	4640	0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
	4641	0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
	4642	0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
	4643	0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
	4644
	4645	0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
	4646	0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
	4647	0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
	4648	0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
	4649	0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
	4650	0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
	4651	0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
	4652	0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
	4653
	4654	0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
	4655	0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
	4656	0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
	4657	0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
	4658	0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
	4659	0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
	4660	0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
	4661	0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
	4662	#endif
	4663	};
	4664
	4665
	4666	/* Miller-Rabin test of "a" to the base of "b" as described in
	4667	* HAC pp. 139 Algorithm 4.24
	4668	*
	4669	* Sets result to 0 if definitely composite or 1 if probably prime.
	4670	* Randomly the chance of error is no more than 1/4 and often
	4671	* very much lower.
	4672	*/
	4673	static int mp_prime_miller_rabin (mp_int * a, mp_int * b, int *result)
	4674	{
	4675	mp_int n1, y, r;
	4676	int s, j, err;
	4677
	4678	/* default */
	4679	*result = MP_NO;
	4680
	4681	/* ensure b > 1 */
	4682	if (mp_cmp_d(b, 1) != MP_GT) {
	4683	return MP_VAL;
	4684	}
	4685
	4686	/* get n1 = a - 1 */
	4687	if ((err = mp_init_copy (&n1, a)) != MP_OKAY) {
	4688	return err;
	4689	}
	4690	if ((err = mp_sub_d (&n1, 1, &n1)) != MP_OKAY) {
	4691	goto LBL_N1;
	4692	}
	4693
	4694	/* set 2*s r = n1 */
	4695	if ((err = mp_init_copy (&r, &n1)) != MP_OKAY) {
	4696	goto LBL_N1;
	4697	}
	4698
	4699	/* count the number of least significant bits
	4700	* which are zero
	4701	*/
	4702	s = mp_cnt_lsb(&r);
	4703
	4704	/* now divide n - 1 by 2*s /
	4705	if ((err = mp_div_2d (&r, s, &r, NULL)) != MP_OKAY) {
	4706	goto LBL_R;
	4707	}
	4708
	4709	/* compute y = b*r mod a /
	4710	if ((err = mp_init (&y)) != MP_OKAY) {
	4711	goto LBL_R;
	4712	}
	4713	#if defined(WOLFSSL_HAVE_SP_RSA) \|\| defined(WOLFSSL_HAVE_SP_DH)
	4714	#ifndef WOLFSSL_SP_NO_2048
[464]	4715	if (mp_count_bits(a) == 1024 && mp_isodd(a))
[457]	4716	err = sp_ModExp_1024(b, &r, a, &y);
[464]	4717	else if (mp_count_bits(a) == 2048 && mp_isodd(a))
[457]	4718	err = sp_ModExp_2048(b, &r, a, &y);
	4719	else
	4720	#endif
	4721	#ifndef WOLFSSL_SP_NO_3072
[464]	4722	if (mp_count_bits(a) == 1536 && mp_isodd(a))
[457]	4723	err = sp_ModExp_1536(b, &r, a, &y);
[464]	4724	else if (mp_count_bits(a) == 3072 && mp_isodd(a))
[457]	4725	err = sp_ModExp_3072(b, &r, a, &y);
	4726	else
	4727	#endif
	4728	#ifdef WOLFSSL_SP_4096
[464]	4729	if (mp_count_bits(a) == 4096 && mp_isodd(a))
[457]	4730	err = sp_ModExp_4096(b, &r, a, &y);
	4731	else
	4732	#endif
	4733	#endif
	4734	err = mp_exptmod (b, &r, a, &y);
	4735	if (err != MP_OKAY)
	4736	goto LBL_Y;
	4737
	4738	/* if y != 1 and y != n1 do */
	4739	if (mp_cmp_d (&y, 1) != MP_EQ && mp_cmp (&y, &n1) != MP_EQ) {
	4740	j = 1;
	4741	/* while j <= s-1 and y != n1 */
	4742	while ((j <= (s - 1)) && mp_cmp (&y, &n1) != MP_EQ) {
	4743	if ((err = mp_sqrmod (&y, a, &y)) != MP_OKAY) {
	4744	goto LBL_Y;
	4745	}
	4746
	4747	/* if y == 1 then composite */
	4748	if (mp_cmp_d (&y, 1) == MP_EQ) {
	4749	goto LBL_Y;
	4750	}
	4751
	4752	++j;
	4753	}
	4754
	4755	/* if y != n1 then composite */
	4756	if (mp_cmp (&y, &n1) != MP_EQ) {
	4757	goto LBL_Y;
	4758	}
	4759	}
	4760
	4761	/* probably prime now */
	4762	*result = MP_YES;
	4763	LBL_Y:mp_clear (&y);
	4764	LBL_R:mp_clear (&r);
	4765	LBL_N1:mp_clear (&n1);
	4766	return err;
	4767	}
	4768
	4769
	4770	/* determines if an integers is divisible by one
	4771	* of the first PRIME_SIZE primes or not
	4772	*
	4773	* sets result to 0 if not, 1 if yes
	4774	*/
	4775	static int mp_prime_is_divisible (mp_int * a, int *result)
	4776	{
	4777	int err, ix;
	4778	mp_digit res;
	4779
	4780	/* default to not */
	4781	*result = MP_NO;
	4782
	4783	for (ix = 0; ix < PRIME_SIZE; ix++) {
	4784	/* what is a mod LBL_prime_tab[ix] */
	4785	if ((err = mp_mod_d (a, ltm_prime_tab[ix], &res)) != MP_OKAY) {
	4786	return err;
	4787	}
	4788
	4789	/* is the residue zero? */
	4790	if (res == 0) {
	4791	*result = MP_YES;
	4792	return MP_OKAY;
	4793	}
	4794	}
	4795
	4796	return MP_OKAY;
	4797	}
	4798
	4799	/*
	4800	* Sets result to 1 if probably prime, 0 otherwise
	4801	*/
	4802	int mp_prime_is_prime (mp_int * a, int t, int *result)
	4803	{
	4804	mp_int b;
	4805	int ix, err, res;
	4806
	4807	/* default to no */
	4808	*result = MP_NO;
	4809
	4810	/* valid value of t? */
	4811	if (t <= 0 \|\| t > PRIME_SIZE) {
	4812	return MP_VAL;
	4813	}
	4814
	4815	if (mp_isone(a)) {
	4816	*result = MP_NO;
	4817	return MP_OKAY;
	4818	}
	4819
	4820	/* is the input equal to one of the primes in the table? */
	4821	for (ix = 0; ix < PRIME_SIZE; ix++) {
	4822	if (mp_cmp_d(a, ltm_prime_tab[ix]) == MP_EQ) {
	4823	*result = MP_YES;
	4824	return MP_OKAY;
	4825	}
	4826	}
	4827
	4828	/* first perform trial division */
	4829	if ((err = mp_prime_is_divisible (a, &res)) != MP_OKAY) {
	4830	return err;
	4831	}
	4832
	4833	/* return if it was trivially divisible */
	4834	if (res == MP_YES) {
	4835	return MP_OKAY;
	4836	}
	4837
	4838	/* now perform the miller-rabin rounds */
	4839	if ((err = mp_init (&b)) != MP_OKAY) {
	4840	return err;
	4841	}
	4842
	4843	for (ix = 0; ix < t; ix++) {
	4844	/* set the prime */
	4845	if ((err = mp_set (&b, ltm_prime_tab[ix])) != MP_OKAY) {
	4846	goto LBL_B;
	4847	}
	4848
	4849	if ((err = mp_prime_miller_rabin (a, &b, &res)) != MP_OKAY) {
	4850	goto LBL_B;
	4851	}
	4852
	4853	if (res == MP_NO) {
	4854	goto LBL_B;
	4855	}
	4856	}
	4857
	4858	/* passed the test */
	4859	*result = MP_YES;
	4860	LBL_B:mp_clear (&b);
	4861	return err;
	4862	}
	4863
	4864
	4865	/*
	4866	* Sets result to 1 if probably prime, 0 otherwise
	4867	*/
	4868	int mp_prime_is_prime_ex (mp_int * a, int t, int result, WC_RNG rng)
	4869	{
	4870	mp_int b, c;
	4871	int ix, err, res;
	4872	byte* base = NULL;
	4873	word32 baseSz = 0;
	4874
	4875	/* default to no */
	4876	*result = MP_NO;
	4877
	4878	/* valid value of t? */
	4879	if (t <= 0 \|\| t > PRIME_SIZE) {
	4880	return MP_VAL;
	4881	}
	4882
	4883	if (mp_isone(a)) {
	4884	*result = MP_NO;
	4885	return MP_OKAY;
	4886	}
	4887
	4888	/* is the input equal to one of the primes in the table? */
	4889	for (ix = 0; ix < PRIME_SIZE; ix++) {
	4890	if (mp_cmp_d(a, ltm_prime_tab[ix]) == MP_EQ) {
	4891	*result = MP_YES;
	4892	return MP_OKAY;
	4893	}
	4894	}
	4895
	4896	/* first perform trial division */
	4897	if ((err = mp_prime_is_divisible (a, &res)) != MP_OKAY) {
	4898	return err;
	4899	}
	4900
	4901	/* return if it was trivially divisible */
	4902	if (res == MP_YES) {
	4903	return MP_OKAY;
	4904	}
	4905
	4906	/* now perform the miller-rabin rounds */
	4907	if ((err = mp_init (&b)) != MP_OKAY) {
	4908	return err;
	4909	}
	4910	if ((err = mp_init (&c)) != MP_OKAY) {
	4911	mp_clear(&b);
	4912	return err;
	4913	}
	4914
	4915	baseSz = mp_count_bits(a);
	4916	baseSz = (baseSz / 8) + ((baseSz % 8) ? 1 : 0);
	4917
	4918	base = (byte*)XMALLOC(baseSz, NULL, DYNAMIC_TYPE_TMP_BUFFER);
	4919	if (base == NULL) {
	4920	err = MP_MEM;
	4921	goto LBL_B;
	4922	}
	4923
	4924	if ((err = mp_sub_d(a, 2, &c)) != MP_OKAY) {
	4925	goto LBL_B;
	4926	}
	4927
	4928	/* now do a miller rabin with up to t random numbers, this should
	4929	* give a (1/4)^t chance of a false prime. */
	4930	for (ix = 0; ix < t; ix++) {
	4931	/* Set a test candidate. */
	4932	if ((err = wc_RNG_GenerateBlock(rng, base, baseSz)) != 0) {
	4933	goto LBL_B;
	4934	}
	4935
	4936	if ((err = mp_read_unsigned_bin(&b, base, baseSz)) != MP_OKAY) {
	4937	goto LBL_B;
	4938	}
	4939
	4940	if (mp_cmp_d(&b, 2) != MP_GT \|\| mp_cmp(&b, &c) != MP_LT) {
	4941	ix--;
	4942	continue;
	4943	}
	4944
	4945	if ((err = mp_prime_miller_rabin (a, &b, &res)) != MP_OKAY) {
	4946	goto LBL_B;
	4947	}
	4948
	4949	if (res == MP_NO) {
	4950	goto LBL_B;
	4951	}
	4952	}
	4953
	4954	/* passed the test */
	4955	*result = MP_YES;
	4956	LBL_B:mp_clear (&b);
	4957	mp_clear (&c);
	4958	XFREE(base, NULL, DYNAMIC_TYPE_TMP_BUFFER);
	4959	return err;
	4960	}
	4961
	4962	#endif /* WOLFSSL_KEY_GEN NO_DH NO_DSA NO_RSA */
	4963
	4964	#ifdef WOLFSSL_KEY_GEN
	4965
	4966	static const int USE_BBS = 1;
	4967
	4968	int mp_rand_prime(mp_int* N, int len, WC_RNG* rng, void* heap)
	4969	{
	4970	int err, res, type;
	4971	byte* buf;
	4972
	4973	if (N == NULL \|\| rng == NULL)
	4974	return MP_VAL;
	4975
	4976	/* get type */
	4977	if (len < 0) {
	4978	type = USE_BBS;
	4979	len = -len;
	4980	} else {
	4981	type = 0;
	4982	}
	4983
	4984	/* allow sizes between 2 and 512 bytes for a prime size */
	4985	if (len < 2 \|\| len > 512) {
	4986	return MP_VAL;
	4987	}
	4988
	4989	/* allocate buffer to work with */
	4990	buf = (byte*)XMALLOC(len, heap, DYNAMIC_TYPE_RSA);
	4991	if (buf == NULL) {
	4992	return MP_MEM;
	4993	}
	4994	XMEMSET(buf, 0, len);
	4995
	4996	do {
	4997	#ifdef SHOW_GEN
	4998	printf(".");
	4999	fflush(stdout);
	5000	#endif
	5001	/* generate value */
	5002	err = wc_RNG_GenerateBlock(rng, buf, len);
	5003	if (err != 0) {
	5004	XFREE(buf, heap, DYNAMIC_TYPE_RSA);
	5005	return err;
	5006	}
	5007
	5008	/* munge bits */
	5009	buf[0] \|= 0x80 \| 0x40;
	5010	buf[len-1] \|= 0x01 \| ((type & USE_BBS) ? 0x02 : 0x00);
	5011
	5012	/* load value */
	5013	if ((err = mp_read_unsigned_bin(N, buf, len)) != MP_OKAY) {
	5014	XFREE(buf, heap, DYNAMIC_TYPE_RSA);
	5015	return err;
	5016	}
	5017
	5018	/* test */
	5019	/* Running Miller-Rabin up to 3 times gives us a 2^{-80} chance
	5020	* of a 1024-bit candidate being a false positive, when it is our
	5021	* prime candidate. (Note 4.49 of Handbook of Applied Cryptography.)
	5022	* Using 8 because we've always used 8. */
	5023	if ((err = mp_prime_is_prime_ex(N, 8, &res, rng)) != MP_OKAY) {
	5024	XFREE(buf, heap, DYNAMIC_TYPE_RSA);
	5025	return err;
	5026	}
	5027	} while (res == MP_NO);
	5028
	5029	XMEMSET(buf, 0, len);
	5030	XFREE(buf, heap, DYNAMIC_TYPE_RSA);
	5031
	5032	return MP_OKAY;
	5033	}
	5034
	5035
	5036	/* computes least common multiple as \|ab\|/(a, b) /
	5037	int mp_lcm (mp_int * a, mp_int * b, mp_int * c)
	5038	{
	5039	int res;
	5040	mp_int t1, t2;
	5041
	5042
	5043	if ((res = mp_init_multi (&t1, &t2, NULL, NULL, NULL, NULL)) != MP_OKAY) {
	5044	return res;
	5045	}
	5046
	5047	/* t1 = get the GCD of the two inputs */
	5048	if ((res = mp_gcd (a, b, &t1)) != MP_OKAY) {
	5049	goto LBL_T;
	5050	}
	5051
	5052	/* divide the smallest by the GCD */
	5053	if (mp_cmp_mag(a, b) == MP_LT) {
	5054	/* store quotient in t2 such that t2 * b is the LCM */
	5055	if ((res = mp_div(a, &t1, &t2, NULL)) != MP_OKAY) {
	5056	goto LBL_T;
	5057	}
	5058	res = mp_mul(b, &t2, c);
	5059	} else {
	5060	/* store quotient in t2 such that t2 * a is the LCM */
	5061	if ((res = mp_div(b, &t1, &t2, NULL)) != MP_OKAY) {
	5062	goto LBL_T;
	5063	}
	5064	res = mp_mul(a, &t2, c);
	5065	}
	5066
	5067	/* fix the sign to positive */
	5068	c->sign = MP_ZPOS;
	5069
	5070	LBL_T:
	5071	mp_clear(&t1);
	5072	mp_clear(&t2);
	5073	return res;
	5074	}
	5075
	5076
	5077
	5078	/* Greatest Common Divisor using the binary method */
	5079	int mp_gcd (mp_int * a, mp_int * b, mp_int * c)
	5080	{
	5081	mp_int u, v;
	5082	int k, u_lsb, v_lsb, res;
	5083
	5084	/* either zero than gcd is the largest */
	5085	if (mp_iszero (a) == MP_YES) {
	5086	return mp_abs (b, c);
	5087	}
	5088	if (mp_iszero (b) == MP_YES) {
	5089	return mp_abs (a, c);
	5090	}
	5091
	5092	/* get copies of a and b we can modify */
	5093	if ((res = mp_init_copy (&u, a)) != MP_OKAY) {
	5094	return res;
	5095	}
	5096
	5097	if ((res = mp_init_copy (&v, b)) != MP_OKAY) {
	5098	goto LBL_U;
	5099	}
	5100
	5101	/* must be positive for the remainder of the algorithm */
	5102	u.sign = v.sign = MP_ZPOS;
	5103
	5104	/* B1. Find the common power of two for u and v */
	5105	u_lsb = mp_cnt_lsb(&u);
	5106	v_lsb = mp_cnt_lsb(&v);
	5107	k = MIN(u_lsb, v_lsb);
	5108
	5109	if (k > 0) {
	5110	/* divide the power of two out */
	5111	if ((res = mp_div_2d(&u, k, &u, NULL)) != MP_OKAY) {
	5112	goto LBL_V;
	5113	}
	5114
	5115	if ((res = mp_div_2d(&v, k, &v, NULL)) != MP_OKAY) {
	5116	goto LBL_V;
	5117	}
	5118	}
	5119
	5120	/* divide any remaining factors of two out */
	5121	if (u_lsb != k) {
	5122	if ((res = mp_div_2d(&u, u_lsb - k, &u, NULL)) != MP_OKAY) {
	5123	goto LBL_V;
	5124	}
	5125	}
	5126
	5127	if (v_lsb != k) {
	5128	if ((res = mp_div_2d(&v, v_lsb - k, &v, NULL)) != MP_OKAY) {
	5129	goto LBL_V;
	5130	}
	5131	}
	5132
	5133	while (mp_iszero(&v) == MP_NO) {
	5134	/* make sure v is the largest */
	5135	if (mp_cmp_mag(&u, &v) == MP_GT) {
	5136	/* swap u and v to make sure v is >= u */
	5137	mp_exch(&u, &v);
	5138	}
	5139
	5140	/* subtract smallest from largest */
	5141	if ((res = s_mp_sub(&v, &u, &v)) != MP_OKAY) {
	5142	goto LBL_V;
	5143	}
	5144
	5145	/* Divide out all factors of two */
	5146	if ((res = mp_div_2d(&v, mp_cnt_lsb(&v), &v, NULL)) != MP_OKAY) {
	5147	goto LBL_V;
	5148	}
	5149	}
	5150
	5151	/* multiply by 2*k which we divided out at the beginning /
	5152	if ((res = mp_mul_2d (&u, k, c)) != MP_OKAY) {
	5153	goto LBL_V;
	5154	}
	5155	c->sign = MP_ZPOS;
	5156	res = MP_OKAY;
	5157	LBL_V:mp_clear (&v);
	5158	LBL_U:mp_clear (&u);
	5159	return res;
	5160	}
	5161
	5162	#endif /* WOLFSSL_KEY_GEN */
	5163
	5164
	5165	#if !defined(NO_DSA) \|\| defined(HAVE_ECC) \|\| defined(WOLFSSL_KEY_GEN) \|\| \
	5166	defined(HAVE_COMP_KEY) \|\| defined(WOLFSSL_DEBUG_MATH) \|\| \
	5167	defined(DEBUG_WOLFSSL) \|\| defined(OPENSSL_EXTRA)
	5168
	5169	/* chars used in radix conversions */
[464]	5170	const char *mp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
	5171	"abcdefghijklmnopqrstuvwxyz+/";
[457]	5172	#endif
	5173
	5174	#if !defined(NO_DSA) \|\| defined(HAVE_ECC)
	5175	/* read a string [ASCII] in a given radix */
	5176	int mp_read_radix (mp_int * a, const char *str, int radix)
	5177	{
	5178	int y, res, neg;
	5179	char ch;
	5180
	5181	/* zero the digit bignum */
	5182	mp_zero(a);
	5183
	5184	/* make sure the radix is ok */
	5185	if (radix < MP_RADIX_BIN \|\| radix > MP_RADIX_MAX) {
	5186	return MP_VAL;
	5187	}
	5188
	5189	/* if the leading digit is a
	5190	* minus set the sign to negative.
	5191	*/
	5192	if (*str == '-') {
	5193	++str;
	5194	neg = MP_NEG;
	5195	} else {
	5196	neg = MP_ZPOS;
	5197	}
	5198
	5199	/* set the integer to the default of zero */
	5200	mp_zero (a);
	5201
	5202	/* process each digit of the string */
	5203	while (*str != '\0') {
	5204	/* if the radix <= 36 the conversion is case insensitive
	5205	* this allows numbers like 1AB and 1ab to represent the same value
	5206	* [e.g. in hex]
	5207	*/
	5208	ch = (radix <= 36) ? (char)XTOUPPER((unsigned char)str) : str;
	5209	for (y = 0; y < 64; y++) {
	5210	if (ch == mp_s_rmap[y]) {
	5211	break;
	5212	}
	5213	}
	5214
	5215	/* if the char was found in the map
	5216	* and is less than the given radix add it
	5217	* to the number, otherwise exit the loop.
	5218	*/
	5219	if (y < radix) {
	5220	if ((res = mp_mul_d (a, (mp_digit) radix, a)) != MP_OKAY) {
	5221	return res;
	5222	}
	5223	if ((res = mp_add_d (a, (mp_digit) y, a)) != MP_OKAY) {
	5224	return res;
	5225	}
	5226	} else {
	5227	break;
	5228	}
	5229	++str;
	5230	}
	5231
	5232	/* if digit in isn't null term, then invalid character was found */
	5233	if (*str != '\0') {
	5234	mp_zero (a);
	5235	return MP_VAL;
	5236	}
	5237
	5238	/* set the sign only if a != 0 */
	5239	if (mp_iszero(a) != MP_YES) {
	5240	a->sign = neg;
	5241	}
	5242	return MP_OKAY;
	5243	}
	5244	#endif /* !defined(NO_DSA) \|\| defined(HAVE_ECC) */
	5245
	5246	#ifdef WC_MP_TO_RADIX
	5247
	5248	/* returns size of ASCII representation */
	5249	int mp_radix_size (mp_int a, int radix, int size)
	5250	{
	5251	int res, digs;
	5252	mp_int t;
	5253	mp_digit d;
	5254
	5255	*size = 0;
	5256
	5257	/* special case for binary */
	5258	if (radix == MP_RADIX_BIN) {
[464]	5259	*size = mp_count_bits(a);
	5260	if (*size == 0)
	5261	*size = 1;
	5262	size += (a->sign == MP_NEG ? 1 : 0) + 1; / "-" sign + null term */
[457]	5263	return MP_OKAY;
	5264	}
	5265
	5266	/* make sure the radix is in range */
	5267	if (radix < MP_RADIX_BIN \|\| radix > MP_RADIX_MAX) {
	5268	return MP_VAL;
	5269	}
	5270
	5271	if (mp_iszero(a) == MP_YES) {
[464]	5272	#ifndef WC_DISABLE_RADIX_ZERO_PAD
	5273	if (radix == 16)
	5274	*size = 3;
	5275	else
	5276	#endif
	5277	*size = 2;
[457]	5278	return MP_OKAY;
	5279	}
	5280
	5281	/* digs is the digit count */
	5282	digs = 0;
	5283
	5284	/* init a copy of the input */
	5285	if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
	5286	return res;
	5287	}
	5288
	5289	/* force temp to positive */
	5290	t.sign = MP_ZPOS;
	5291
	5292	/* fetch out all of the digits */
	5293	while (mp_iszero (&t) == MP_NO) {
	5294	if ((res = mp_div_d (&t, (mp_digit) radix, &t, &d)) != MP_OKAY) {
	5295	mp_clear (&t);
	5296	return res;
	5297	}
	5298	++digs;
	5299	}
	5300	mp_clear (&t);
	5301
[464]	5302	#ifndef WC_DISABLE_RADIX_ZERO_PAD
	5303	/* For hexadecimal output, add zero padding when number of digits is odd */
	5304	if ((digs & 1) && (radix == 16)) {
	5305	++digs;
	5306	}
	5307	#endif
	5308
	5309	/* if it's negative add one for the sign */
	5310	if (a->sign == MP_NEG) {
	5311	++digs;
	5312	}
	5313
[457]	5314	/* return digs + 1, the 1 is for the NULL byte that would be required. */
	5315	*size = digs + 1;
	5316	return MP_OKAY;
	5317	}
	5318
	5319	/* stores a bignum as a ASCII string in a given radix (2..64) */
	5320	int mp_toradix (mp_int a, char str, int radix)
	5321	{
	5322	int res, digs;
	5323	mp_int t;
	5324	mp_digit d;
	5325	char *_s = str;
	5326
	5327	/* check range of the radix */
	5328	if (radix < MP_RADIX_BIN \|\| radix > MP_RADIX_MAX) {
	5329	return MP_VAL;
	5330	}
	5331
	5332	/* quick out if its zero */
	5333	if (mp_iszero(a) == MP_YES) {
[464]	5334	#ifndef WC_DISABLE_RADIX_ZERO_PAD
	5335	if (radix == 16) {
	5336	*str++ = '0';
	5337	}
	5338	#endif
[457]	5339	*str++ = '0';
	5340	*str = '\0';
	5341	return MP_OKAY;
	5342	}
	5343
	5344	if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
	5345	return res;
	5346	}
	5347
	5348	/* if it is negative output a - */
	5349	if (t.sign == MP_NEG) {
	5350	++_s;
	5351	*str++ = '-';
	5352	t.sign = MP_ZPOS;
	5353	}
	5354
	5355	digs = 0;
	5356	while (mp_iszero (&t) == MP_NO) {
	5357	if ((res = mp_div_d (&t, (mp_digit) radix, &t, &d)) != MP_OKAY) {
	5358	mp_clear (&t);
	5359	return res;
	5360	}
	5361	*str++ = mp_s_rmap[d];
	5362	++digs;
	5363	}
	5364	#ifndef WC_DISABLE_RADIX_ZERO_PAD
	5365	/* For hexadecimal output, add zero padding when number of digits is odd */
	5366	if ((digs & 1) && (radix == 16)) {
	5367	*str++ = mp_s_rmap[0];
	5368	++digs;
	5369	}
	5370	#endif
	5371	/* reverse the digits of the string. In this case _s points
	5372	* to the first digit [excluding the sign] of the number]
	5373	*/
	5374	bn_reverse ((unsigned char *)_s, digs);
	5375
	5376	/* append a NULL so the string is properly terminated */
	5377	*str = '\0';
	5378
	5379	mp_clear (&t);
	5380	return MP_OKAY;
	5381	}
	5382
	5383	#ifdef WOLFSSL_DEBUG_MATH
	5384	void mp_dump(const char* desc, mp_int* a, byte verbose)
	5385	{
	5386	char *buffer;
	5387	int size = a->alloc;
	5388
	5389	buffer = (char)XMALLOC(size sizeof(mp_digit) * 2, NULL, DYNAMIC_TYPE_TMP_BUFFER);
	5390	if (buffer == NULL) {
	5391	return;
	5392	}
	5393
	5394	printf("%s: ptr=%p, used=%d, sign=%d, size=%d, mpd=%d\n",
	5395	desc, a, a->used, a->sign, size, (int)sizeof(mp_digit));
	5396
	5397	mp_tohex(a, buffer);
	5398	printf(" %s\n ", buffer);
	5399
	5400	if (verbose) {
	5401	int i;
	5402	for(i=0; i<a->alloc * (int)sizeof(mp_digit); i++) {
	5403	printf("%02x ", (((byte)a->dp) + i));
	5404	}
	5405	printf("\n");
	5406	}
	5407
	5408	XFREE(buffer, NULL, DYNAMIC_TYPE_TMP_BUFFER);
	5409	}
	5410	#endif /* WOLFSSL_DEBUG_MATH */
	5411
	5412	#endif /* WC_MP_TO_RADIX */
	5413
	5414	#endif /* WOLFSSL_SP_MATH */
	5415
	5416	#endif /* USE_FAST_MATH */
	5417
	5418	#endif /* NO_BIG_INT */

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: azure_iot_hub_f767zi/trunk/wolfssl-4.7.0/wolfcrypt/src/integer.c@ 464

Download in other formats: