source: asp3_tinet_ecnl_arm/trunk/wolfssl-3.12.2/wolfcrypt/src/integer.c@ 372

Last change on this file since 372 was 372, checked in by coas-nagasima, 5 years ago

wolfsslを3.15.7にバージョンアップ

  • Property svn:eol-style set to native
  • Property svn:mime-type set to text/x-csrc;charset=UTF-8
File size: 110.5 KB
Line 
1/* integer.c
2 *
3 * Copyright (C) 2006-2017 wolfSSL Inc.
4 *
5 * This file is part of wolfSSL.
6 *
7 * wolfSSL is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * wolfSSL is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
20 */
21
22
23
24/*
25 * Based on public domain LibTomMath 0.38 by Tom St Denis, tomstdenis@iahu.ca,
26 * http://math.libtomcrypt.com
27 */
28
29
30#ifdef HAVE_CONFIG_H
31 #include <config.h>
32#endif
33
34/* in case user set USE_FAST_MATH there */
35#include <wolfssl/wolfcrypt/settings.h>
36
37#ifdef NO_INLINE
38 #include <wolfssl/wolfcrypt/misc.h>
39#else
40 #define WOLFSSL_MISC_INCLUDED
41 #include <wolfcrypt/src/misc.c>
42#endif
43
44#ifndef NO_BIG_INT
45
46#ifndef USE_FAST_MATH
47
48#ifndef WOLFSSL_SP_MATH
49
50#include <wolfssl/wolfcrypt/integer.h>
51
52#if defined(FREESCALE_LTC_TFM)
53 #include <wolfssl/wolfcrypt/port/nxp/ksdk_port.h>
54#endif
55#ifdef WOLFSSL_DEBUG_MATH
56 #include <stdio.h>
57#endif
58
59#ifndef NO_WOLFSSL_SMALL_STACK
60 #ifndef WOLFSSL_SMALL_STACK
61 #define WOLFSSL_SMALL_STACK
62 #endif
63#endif
64
65#ifdef SHOW_GEN
66 #if defined(FREESCALE_MQX) || defined(FREESCALE_KSDK_MQX)
67 #if MQX_USE_IO_OLD
68 #include <fio.h>
69 #else
70 #include <nio.h>
71 #endif
72 #else
73 #include <stdio.h>
74 #endif
75#endif
76
77/* reverse an array, used for radix code */
78static void
79bn_reverse (unsigned char *s, int len)
80{
81 int ix, iy;
82 unsigned char t;
83
84 ix = 0;
85 iy = len - 1;
86 while (ix < iy) {
87 t = s[ix];
88 s[ix] = s[iy];
89 s[iy] = t;
90 ++ix;
91 --iy;
92 }
93}
94
95/* math settings check */
96word32 CheckRunTimeSettings(void)
97{
98 return CTC_SETTINGS;
99}
100
101
102/* handle up to 6 inits */
103int mp_init_multi(mp_int* a, mp_int* b, mp_int* c, mp_int* d, mp_int* e,
104 mp_int* f)
105{
106 int res = MP_OKAY;
107
108 if (a) XMEMSET(a, 0, sizeof(mp_int));
109 if (b) XMEMSET(b, 0, sizeof(mp_int));
110 if (c) XMEMSET(c, 0, sizeof(mp_int));
111 if (d) XMEMSET(d, 0, sizeof(mp_int));
112 if (e) XMEMSET(e, 0, sizeof(mp_int));
113 if (f) XMEMSET(f, 0, sizeof(mp_int));
114
115 if (a && ((res = mp_init(a)) != MP_OKAY))
116 return res;
117
118 if (b && ((res = mp_init(b)) != MP_OKAY)) {
119 mp_clear(a);
120 return res;
121 }
122
123 if (c && ((res = mp_init(c)) != MP_OKAY)) {
124 mp_clear(a); mp_clear(b);
125 return res;
126 }
127
128 if (d && ((res = mp_init(d)) != MP_OKAY)) {
129 mp_clear(a); mp_clear(b); mp_clear(c);
130 return res;
131 }
132
133 if (e && ((res = mp_init(e)) != MP_OKAY)) {
134 mp_clear(a); mp_clear(b); mp_clear(c); mp_clear(d);
135 return res;
136 }
137
138 if (f && ((res = mp_init(f)) != MP_OKAY)) {
139 mp_clear(a); mp_clear(b); mp_clear(c); mp_clear(d); mp_clear(e);
140 return res;
141 }
142
143 return res;
144}
145
146
147/* init a new mp_int */
148int mp_init (mp_int * a)
149{
150 /* Safeguard against passing in a null pointer */
151 if (a == NULL)
152 return MP_VAL;
153
154 /* defer allocation until mp_grow */
155 a->dp = NULL;
156
157 /* set the used to zero, allocated digits to the default precision
158 * and sign to positive */
159 a->used = 0;
160 a->alloc = 0;
161 a->sign = MP_ZPOS;
162#ifdef HAVE_WOLF_BIGINT
163 wc_bigint_init(&a->raw);
164#endif
165
166 return MP_OKAY;
167}
168
169
170/* clear one (frees) */
171void mp_clear (mp_int * a)
172{
173 int i;
174
175 if (a == NULL)
176 return;
177
178 /* only do anything if a hasn't been freed previously */
179 if (a->dp != NULL) {
180 /* first zero the digits */
181 for (i = 0; i < a->used; i++) {
182 a->dp[i] = 0;
183 }
184
185 /* free ram */
186 mp_free(a);
187
188 /* reset members to make debugging easier */
189 a->alloc = a->used = 0;
190 a->sign = MP_ZPOS;
191 }
192}
193
194void mp_free (mp_int * a)
195{
196 /* only do anything if a hasn't been freed previously */
197 if (a->dp != NULL) {
198 /* free ram */
199 XFREE(a->dp, 0, DYNAMIC_TYPE_BIGINT);
200 a->dp = NULL;
201 }
202
203#ifdef HAVE_WOLF_BIGINT
204 wc_bigint_free(&a->raw);
205#endif
206}
207
208void mp_forcezero(mp_int * a)
209{
210 if (a == NULL)
211 return;
212
213 /* only do anything if a hasn't been freed previously */
214 if (a->dp != NULL) {
215 /* force zero the used digits */
216 ForceZero(a->dp, a->used * sizeof(mp_digit));
217#ifdef HAVE_WOLF_BIGINT
218 wc_bigint_zero(&a->raw);
219#endif
220 /* free ram */
221 mp_free(a);
222
223 /* reset members to make debugging easier */
224 a->alloc = a->used = 0;
225 a->sign = MP_ZPOS;
226 }
227
228 a->sign = MP_ZPOS;
229 a->used = 0;
230}
231
232
233/* get the size for an unsigned equivalent */
234int mp_unsigned_bin_size (mp_int * a)
235{
236 int size = mp_count_bits (a);
237 return (size / 8 + ((size & 7) != 0 ? 1 : 0));
238}
239
240
241/* returns the number of bits in an int */
242int mp_count_bits (mp_int * a)
243{
244 int r;
245 mp_digit q;
246
247 /* shortcut */
248 if (a->used == 0) {
249 return 0;
250 }
251
252 /* get number of digits and add that */
253 r = (a->used - 1) * DIGIT_BIT;
254
255 /* take the last digit and count the bits in it */
256 q = a->dp[a->used - 1];
257 while (q > ((mp_digit) 0)) {
258 ++r;
259 q >>= ((mp_digit) 1);
260 }
261 return r;
262}
263
264
265int mp_leading_bit (mp_int * a)
266{
267 int bit = 0;
268 mp_int t;
269
270 if (mp_init_copy(&t, a) != MP_OKAY)
271 return 0;
272
273 while (mp_iszero(&t) == MP_NO) {
274#ifndef MP_8BIT
275 bit = (t.dp[0] & 0x80) != 0;
276#else
277 bit = (t.dp[0] | ((t.dp[1] & 0x01) << 7)) & 0x80 != 0;
278#endif
279 if (mp_div_2d (&t, 8, &t, NULL) != MP_OKAY)
280 break;
281 }
282 mp_clear(&t);
283 return bit;
284}
285
286int mp_to_unsigned_bin_at_pos(int x, mp_int *t, unsigned char *b)
287{
288 int res = 0;
289 while (mp_iszero(t) == MP_NO) {
290#ifndef MP_8BIT
291 b[x++] = (unsigned char) (t->dp[0] & 255);
292#else
293 b[x++] = (unsigned char) (t->dp[0] | ((t->dp[1] & 0x01) << 7));
294#endif
295 if ((res = mp_div_2d (t, 8, t, NULL)) != MP_OKAY) {
296 return res;
297 }
298 res = x;
299 }
300 return res;
301}
302
303/* store in unsigned [big endian] format */
304int mp_to_unsigned_bin (mp_int * a, unsigned char *b)
305{
306 int x, res;
307 mp_int t;
308
309 if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
310 return res;
311 }
312
313 x = mp_to_unsigned_bin_at_pos(0, &t, b);
314 if (x < 0) {
315 mp_clear(&t);
316 return x;
317 }
318
319 bn_reverse (b, x);
320 mp_clear (&t);
321 return res;
322}
323
324int mp_to_unsigned_bin_len(mp_int * a, unsigned char *b, int c)
325{
326 int i, len;
327
328 len = mp_unsigned_bin_size(a);
329
330 /* pad front w/ zeros to match length */
331 for (i = 0; i < c - len; i++)
332 b[i] = 0x00;
333 return mp_to_unsigned_bin(a, b + i);
334}
335
336/* creates "a" then copies b into it */
337int mp_init_copy (mp_int * a, mp_int * b)
338{
339 int res;
340
341 if ((res = mp_init_size (a, b->used)) != MP_OKAY) {
342 return res;
343 }
344
345 if((res = mp_copy (b, a)) != MP_OKAY) {
346 mp_clear(a);
347 }
348
349 return res;
350}
351
352
353/* copy, b = a */
354int mp_copy (mp_int * a, mp_int * b)
355{
356 int res, n;
357
358 /* Safeguard against passing in a null pointer */
359 if (a == NULL || b == NULL)
360 return MP_VAL;
361
362 /* if dst == src do nothing */
363 if (a == b) {
364 return MP_OKAY;
365 }
366
367 /* grow dest */
368 if (b->alloc < a->used || b->alloc == 0) {
369 if ((res = mp_grow (b, a->used)) != MP_OKAY) {
370 return res;
371 }
372 }
373
374 /* zero b and copy the parameters over */
375 {
376 mp_digit *tmpa, *tmpb;
377
378 /* pointer aliases */
379
380 /* source */
381 tmpa = a->dp;
382
383 /* destination */
384 tmpb = b->dp;
385
386 /* copy all the digits */
387 for (n = 0; n < a->used; n++) {
388 *tmpb++ = *tmpa++;
389 }
390
391 /* clear high digits */
392 for (; n < b->used && b->dp; n++) {
393 *tmpb++ = 0;
394 }
395 }
396
397 /* copy used count and sign */
398 b->used = a->used;
399 b->sign = a->sign;
400 return MP_OKAY;
401}
402
403
404/* grow as required */
405int mp_grow (mp_int * a, int size)
406{
407 int i;
408 mp_digit *tmp;
409
410 /* if the alloc size is smaller alloc more ram */
411 if (a->alloc < size || size == 0) {
412 /* ensure there are always at least MP_PREC digits extra on top */
413 size += (MP_PREC * 2) - (size % MP_PREC);
414
415 /* reallocate the array a->dp
416 *
417 * We store the return in a temporary variable
418 * in case the operation failed we don't want
419 * to overwrite the dp member of a.
420 */
421 tmp = OPT_CAST(mp_digit) XREALLOC (a->dp, sizeof (mp_digit) * size, NULL,
422 DYNAMIC_TYPE_BIGINT);
423 if (tmp == NULL) {
424 /* reallocation failed but "a" is still valid [can be freed] */
425 return MP_MEM;
426 }
427
428 /* reallocation succeeded so set a->dp */
429 a->dp = tmp;
430
431 /* zero excess digits */
432 i = a->alloc;
433 a->alloc = size;
434 for (; i < a->alloc; i++) {
435 a->dp[i] = 0;
436 }
437 }
438 return MP_OKAY;
439}
440
441
442/* shift right by a certain bit count (store quotient in c, optional
443 remainder in d) */
444int mp_div_2d (mp_int * a, int b, mp_int * c, mp_int * d)
445{
446 int D, res;
447 mp_int t;
448
449
450 /* if the shift count is <= 0 then we do no work */
451 if (b <= 0) {
452 res = mp_copy (a, c);
453 if (d != NULL) {
454 mp_zero (d);
455 }
456 return res;
457 }
458
459 if ((res = mp_init (&t)) != MP_OKAY) {
460 return res;
461 }
462
463 /* get the remainder */
464 if (d != NULL) {
465 if ((res = mp_mod_2d (a, b, &t)) != MP_OKAY) {
466 mp_clear (&t);
467 return res;
468 }
469 }
470
471 /* copy */
472 if ((res = mp_copy (a, c)) != MP_OKAY) {
473 mp_clear (&t);
474 return res;
475 }
476
477 /* shift by as many digits in the bit count */
478 if (b >= (int)DIGIT_BIT) {
479 mp_rshd (c, b / DIGIT_BIT);
480 }
481
482 /* shift any bit count < DIGIT_BIT */
483 D = (b % DIGIT_BIT);
484 if (D != 0) {
485 mp_rshb(c, D);
486 }
487 mp_clamp (c);
488 if (d != NULL) {
489 mp_exch (&t, d);
490 }
491 mp_clear (&t);
492 return MP_OKAY;
493}
494
495
496/* set to zero */
497void mp_zero (mp_int * a)
498{
499 int n;
500 mp_digit *tmp;
501
502 if (a == NULL)
503 return;
504
505 a->sign = MP_ZPOS;
506 a->used = 0;
507
508 tmp = a->dp;
509 for (n = 0; n < a->alloc; n++) {
510 *tmp++ = 0;
511 }
512}
513
514
515/* trim unused digits
516 *
517 * This is used to ensure that leading zero digits are
518 * trimmed and the leading "used" digit will be non-zero
519 * Typically very fast. Also fixes the sign if there
520 * are no more leading digits
521 */
522void mp_clamp (mp_int * a)
523{
524 /* decrease used while the most significant digit is
525 * zero.
526 */
527 while (a->used > 0 && a->dp[a->used - 1] == 0) {
528 --(a->used);
529 }
530
531 /* reset the sign flag if used == 0 */
532 if (a->used == 0) {
533 a->sign = MP_ZPOS;
534 }
535}
536
537
538/* swap the elements of two integers, for cases where you can't simply swap the
539 * mp_int pointers around
540 */
541void mp_exch (mp_int * a, mp_int * b)
542{
543 mp_int t;
544
545 t = *a;
546 *a = *b;
547 *b = t;
548}
549
550
551/* shift right a certain number of bits */
552void mp_rshb (mp_int *c, int x)
553{
554 mp_digit *tmpc, mask, shift;
555 mp_digit r, rr;
556 mp_digit D = x;
557
558 /* mask */
559 mask = (((mp_digit)1) << D) - 1;
560
561 /* shift for lsb */
562 shift = DIGIT_BIT - D;
563
564 /* alias */
565 tmpc = c->dp + (c->used - 1);
566
567 /* carry */
568 r = 0;
569 for (x = c->used - 1; x >= 0; x--) {
570 /* get the lower bits of this word in a temp */
571 rr = *tmpc & mask;
572
573 /* shift the current word and mix in the carry bits from previous word */
574 *tmpc = (*tmpc >> D) | (r << shift);
575 --tmpc;
576
577 /* set the carry to the carry bits of the current word found above */
578 r = rr;
579 }
580 mp_clamp(c);
581}
582
583
584/* shift right a certain amount of digits */
585void mp_rshd (mp_int * a, int b)
586{
587 int x;
588
589 /* if b <= 0 then ignore it */
590 if (b <= 0) {
591 return;
592 }
593
594 /* if b > used then simply zero it and return */
595 if (a->used <= b) {
596 mp_zero (a);
597 return;
598 }
599
600 {
601 mp_digit *bottom, *top;
602
603 /* shift the digits down */
604
605 /* bottom */
606 bottom = a->dp;
607
608 /* top [offset into digits] */
609 top = a->dp + b;
610
611 /* this is implemented as a sliding window where
612 * the window is b-digits long and digits from
613 * the top of the window are copied to the bottom
614 *
615 * e.g.
616
617 b-2 | b-1 | b0 | b1 | b2 | ... | bb | ---->
618 /\ | ---->
619 \-------------------/ ---->
620 */
621 for (x = 0; x < (a->used - b); x++) {
622 *bottom++ = *top++;
623 }
624
625 /* zero the top digits */
626 for (; x < a->used; x++) {
627 *bottom++ = 0;
628 }
629 }
630
631 /* remove excess digits */
632 a->used -= b;
633}
634
635
636/* calc a value mod 2**b */
637int mp_mod_2d (mp_int * a, int b, mp_int * c)
638{
639 int x, res;
640
641 /* if b is <= 0 then zero the int */
642 if (b <= 0) {
643 mp_zero (c);
644 return MP_OKAY;
645 }
646
647 /* if the modulus is larger than the value than return */
648 if (b >= (int) (a->used * DIGIT_BIT)) {
649 res = mp_copy (a, c);
650 return res;
651 }
652
653 /* copy */
654 if ((res = mp_copy (a, c)) != MP_OKAY) {
655 return res;
656 }
657
658 /* zero digits above the last digit of the modulus */
659 for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) {
660 c->dp[x] = 0;
661 }
662 /* clear the digit that is not completely outside/inside the modulus */
663 c->dp[b / DIGIT_BIT] &= (mp_digit) ((((mp_digit) 1) <<
664 (((mp_digit) b) % DIGIT_BIT)) - ((mp_digit) 1));
665 mp_clamp (c);
666 return MP_OKAY;
667}
668
669
670/* reads a unsigned char array, assumes the msb is stored first [big endian] */
671int mp_read_unsigned_bin (mp_int * a, const unsigned char *b, int c)
672{
673 int res;
674
675 /* make sure there are at least two digits */
676 if (a->alloc < 2) {
677 if ((res = mp_grow(a, 2)) != MP_OKAY) {
678 return res;
679 }
680 }
681
682 /* zero the int */
683 mp_zero (a);
684
685 /* read the bytes in */
686 while (c-- > 0) {
687 if ((res = mp_mul_2d (a, 8, a)) != MP_OKAY) {
688 return res;
689 }
690
691#ifndef MP_8BIT
692 a->dp[0] |= *b++;
693 a->used += 1;
694#else
695 a->dp[0] = (*b & MP_MASK);
696 a->dp[1] |= ((*b++ >> 7U) & 1);
697 a->used += 2;
698#endif
699 }
700 mp_clamp (a);
701 return MP_OKAY;
702}
703
704
705/* shift left by a certain bit count */
706int mp_mul_2d (mp_int * a, int b, mp_int * c)
707{
708 mp_digit d;
709 int res;
710
711 /* copy */
712 if (a != c) {
713 if ((res = mp_copy (a, c)) != MP_OKAY) {
714 return res;
715 }
716 }
717
718 if (c->alloc < (int)(c->used + b/DIGIT_BIT + 1)) {
719 if ((res = mp_grow (c, c->used + b / DIGIT_BIT + 1)) != MP_OKAY) {
720 return res;
721 }
722 }
723
724 /* shift by as many digits in the bit count */
725 if (b >= (int)DIGIT_BIT) {
726 if ((res = mp_lshd (c, b / DIGIT_BIT)) != MP_OKAY) {
727 return res;
728 }
729 }
730
731 /* shift any bit count < DIGIT_BIT */
732 d = (mp_digit) (b % DIGIT_BIT);
733 if (d != 0) {
734 mp_digit *tmpc, shift, mask, r, rr;
735 int x;
736
737 /* bitmask for carries */
738 mask = (((mp_digit)1) << d) - 1;
739
740 /* shift for msbs */
741 shift = DIGIT_BIT - d;
742
743 /* alias */
744 tmpc = c->dp;
745
746 /* carry */
747 r = 0;
748 for (x = 0; x < c->used; x++) {
749 /* get the higher bits of the current word */
750 rr = (*tmpc >> shift) & mask;
751
752 /* shift the current word and OR in the carry */
753 *tmpc = (mp_digit)(((*tmpc << d) | r) & MP_MASK);
754 ++tmpc;
755
756 /* set the carry to the carry bits of the current word */
757 r = rr;
758 }
759
760 /* set final carry */
761 if (r != 0) {
762 c->dp[(c->used)++] = r;
763 }
764 }
765 mp_clamp (c);
766 return MP_OKAY;
767}
768
769
770/* shift left a certain amount of digits */
771int mp_lshd (mp_int * a, int b)
772{
773 int x, res;
774
775 /* if its less than zero return */
776 if (b <= 0) {
777 return MP_OKAY;
778 }
779
780 /* grow to fit the new digits */
781 if (a->alloc < a->used + b) {
782 if ((res = mp_grow (a, a->used + b)) != MP_OKAY) {
783 return res;
784 }
785 }
786
787 {
788 mp_digit *top, *bottom;
789
790 /* increment the used by the shift amount then copy upwards */
791 a->used += b;
792
793 /* top */
794 top = a->dp + a->used - 1;
795
796 /* base */
797 bottom = a->dp + a->used - 1 - b;
798
799 /* much like mp_rshd this is implemented using a sliding window
800 * except the window goes the other way around. Copying from
801 * the bottom to the top. see bn_mp_rshd.c for more info.
802 */
803 for (x = a->used - 1; x >= b; x--) {
804 *top-- = *bottom--;
805 }
806
807 /* zero the lower digits */
808 top = a->dp;
809 for (x = 0; x < b; x++) {
810 *top++ = 0;
811 }
812 }
813 return MP_OKAY;
814}
815
816
817/* this is a shell function that calls either the normal or Montgomery
818 * exptmod functions. Originally the call to the montgomery code was
819 * embedded in the normal function but that wasted a lot of stack space
820 * for nothing (since 99% of the time the Montgomery code would be called)
821 */
822#if defined(FREESCALE_LTC_TFM)
823int wolfcrypt_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
824#else
825int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
826#endif
827{
828 int dr;
829
830 /* modulus P must be positive */
831 if (P->sign == MP_NEG) {
832 return MP_VAL;
833 }
834
835 /* if exponent X is negative we have to recurse */
836 if (X->sign == MP_NEG) {
837#ifdef BN_MP_INVMOD_C
838 mp_int tmpG, tmpX;
839 int err;
840
841 /* first compute 1/G mod P */
842 if ((err = mp_init(&tmpG)) != MP_OKAY) {
843 return err;
844 }
845 if ((err = mp_invmod(G, P, &tmpG)) != MP_OKAY) {
846 mp_clear(&tmpG);
847 return err;
848 }
849
850 /* now get |X| */
851 if ((err = mp_init(&tmpX)) != MP_OKAY) {
852 mp_clear(&tmpG);
853 return err;
854 }
855 if ((err = mp_abs(X, &tmpX)) != MP_OKAY) {
856 mp_clear(&tmpG);
857 mp_clear(&tmpX);
858 return err;
859 }
860
861 /* and now compute (1/G)**|X| instead of G**X [X < 0] */
862 err = mp_exptmod(&tmpG, &tmpX, P, Y);
863 mp_clear(&tmpG);
864 mp_clear(&tmpX);
865 return err;
866#else
867 /* no invmod */
868 return MP_VAL;
869#endif
870 }
871
872/* modified diminished radix reduction */
873#if defined(BN_MP_REDUCE_IS_2K_L_C) && defined(BN_MP_REDUCE_2K_L_C) && \
874 defined(BN_S_MP_EXPTMOD_C)
875 if (mp_reduce_is_2k_l(P) == MP_YES) {
876 return s_mp_exptmod(G, X, P, Y, 1);
877 }
878#endif
879
880#ifdef BN_MP_DR_IS_MODULUS_C
881 /* is it a DR modulus? */
882 dr = mp_dr_is_modulus(P);
883#else
884 /* default to no */
885 dr = 0;
886#endif
887
888#ifdef BN_MP_REDUCE_IS_2K_C
889 /* if not, is it a unrestricted DR modulus? */
890 if (dr == 0) {
891 dr = mp_reduce_is_2k(P) << 1;
892 }
893#endif
894
895 /* if the modulus is odd or dr != 0 use the montgomery method */
896#ifdef BN_MP_EXPTMOD_FAST_C
897 if (mp_isodd (P) == MP_YES || dr != 0) {
898 return mp_exptmod_fast (G, X, P, Y, dr);
899 } else {
900#endif
901#ifdef BN_S_MP_EXPTMOD_C
902 /* otherwise use the generic Barrett reduction technique */
903 return s_mp_exptmod (G, X, P, Y, 0);
904#else
905 /* no exptmod for evens */
906 return MP_VAL;
907#endif
908#ifdef BN_MP_EXPTMOD_FAST_C
909 }
910#endif
911}
912
913
914/* b = |a|
915 *
916 * Simple function copies the input and fixes the sign to positive
917 */
918int mp_abs (mp_int * a, mp_int * b)
919{
920 int res;
921
922 /* copy a to b */
923 if (a != b) {
924 if ((res = mp_copy (a, b)) != MP_OKAY) {
925 return res;
926 }
927 }
928
929 /* force the sign of b to positive */
930 b->sign = MP_ZPOS;
931
932 return MP_OKAY;
933}
934
935
936/* hac 14.61, pp608 */
937#if defined(FREESCALE_LTC_TFM)
938int wolfcrypt_mp_invmod(mp_int * a, mp_int * b, mp_int * c)
939#else
940int mp_invmod (mp_int * a, mp_int * b, mp_int * c)
941#endif
942{
943 /* b cannot be negative */
944 if (b->sign == MP_NEG || mp_iszero(b) == MP_YES) {
945 return MP_VAL;
946 }
947
948#ifdef BN_FAST_MP_INVMOD_C
949 /* if the modulus is odd we can use a faster routine instead */
950 if ((mp_isodd(b) == MP_YES) && (mp_cmp_d(b, 1) != MP_EQ)) {
951 return fast_mp_invmod (a, b, c);
952 }
953#endif
954
955#ifdef BN_MP_INVMOD_SLOW_C
956 return mp_invmod_slow(a, b, c);
957#else
958 return MP_VAL;
959#endif
960}
961
962
963/* computes the modular inverse via binary extended euclidean algorithm,
964 * that is c = 1/a mod b
965 *
966 * Based on slow invmod except this is optimized for the case where b is
967 * odd as per HAC Note 14.64 on pp. 610
968 */
969int fast_mp_invmod (mp_int * a, mp_int * b, mp_int * c)
970{
971 mp_int x, y, u, v, B, D;
972 int res, neg, loop_check = 0;
973
974 /* 2. [modified] b must be odd */
975 if (mp_iseven (b) == MP_YES) {
976 return MP_VAL;
977 }
978
979 /* init all our temps */
980 if ((res = mp_init_multi(&x, &y, &u, &v, &B, &D)) != MP_OKAY) {
981 return res;
982 }
983
984 /* x == modulus, y == value to invert */
985 if ((res = mp_copy (b, &x)) != MP_OKAY) {
986 goto LBL_ERR;
987 }
988
989 /* we need y = |a| */
990 if ((res = mp_mod (a, b, &y)) != MP_OKAY) {
991 goto LBL_ERR;
992 }
993
994 /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
995 if ((res = mp_copy (&x, &u)) != MP_OKAY) {
996 goto LBL_ERR;
997 }
998 if ((res = mp_copy (&y, &v)) != MP_OKAY) {
999 goto LBL_ERR;
1000 }
1001 if ((res = mp_set (&D, 1)) != MP_OKAY) {
1002 goto LBL_ERR;
1003 }
1004
1005top:
1006 /* 4. while u is even do */
1007 while (mp_iseven (&u) == MP_YES) {
1008 /* 4.1 u = u/2 */
1009 if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
1010 goto LBL_ERR;
1011 }
1012 /* 4.2 if B is odd then */
1013 if (mp_isodd (&B) == MP_YES) {
1014 if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
1015 goto LBL_ERR;
1016 }
1017 }
1018 /* B = B/2 */
1019 if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
1020 goto LBL_ERR;
1021 }
1022 }
1023
1024 /* 5. while v is even do */
1025 while (mp_iseven (&v) == MP_YES) {
1026 /* 5.1 v = v/2 */
1027 if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
1028 goto LBL_ERR;
1029 }
1030 /* 5.2 if D is odd then */
1031 if (mp_isodd (&D) == MP_YES) {
1032 /* D = (D-x)/2 */
1033 if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
1034 goto LBL_ERR;
1035 }
1036 }
1037 /* D = D/2 */
1038 if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
1039 goto LBL_ERR;
1040 }
1041 }
1042
1043 /* 6. if u >= v then */
1044 if (mp_cmp (&u, &v) != MP_LT) {
1045 /* u = u - v, B = B - D */
1046 if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
1047 goto LBL_ERR;
1048 }
1049
1050 if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
1051 goto LBL_ERR;
1052 }
1053 } else {
1054 /* v - v - u, D = D - B */
1055 if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
1056 goto LBL_ERR;
1057 }
1058
1059 if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
1060 goto LBL_ERR;
1061 }
1062 }
1063
1064 /* if not zero goto step 4 */
1065 if (mp_iszero (&u) == MP_NO) {
1066 if (++loop_check > MAX_INVMOD_SZ) {
1067 res = MP_VAL;
1068 goto LBL_ERR;
1069 }
1070 goto top;
1071 }
1072
1073 /* now a = C, b = D, gcd == g*v */
1074
1075 /* if v != 1 then there is no inverse */
1076 if (mp_cmp_d (&v, 1) != MP_EQ) {
1077 res = MP_VAL;
1078 goto LBL_ERR;
1079 }
1080
1081 /* b is now the inverse */
1082 neg = a->sign;
1083 while (D.sign == MP_NEG) {
1084 if ((res = mp_add (&D, b, &D)) != MP_OKAY) {
1085 goto LBL_ERR;
1086 }
1087 }
1088 /* too big */
1089 while (mp_cmp_mag(&D, b) != MP_LT) {
1090 if ((res = mp_sub(&D, b, &D)) != MP_OKAY) {
1091 goto LBL_ERR;
1092 }
1093 }
1094 mp_exch (&D, c);
1095 c->sign = neg;
1096 res = MP_OKAY;
1097
1098LBL_ERR:mp_clear(&x);
1099 mp_clear(&y);
1100 mp_clear(&u);
1101 mp_clear(&v);
1102 mp_clear(&B);
1103 mp_clear(&D);
1104 return res;
1105}
1106
1107
1108/* hac 14.61, pp608 */
1109int mp_invmod_slow (mp_int * a, mp_int * b, mp_int * c)
1110{
1111 mp_int x, y, u, v, A, B, C, D;
1112 int res;
1113
1114 /* b cannot be negative */
1115 if (b->sign == MP_NEG || mp_iszero(b) == MP_YES) {
1116 return MP_VAL;
1117 }
1118
1119 /* init temps */
1120 if ((res = mp_init_multi(&x, &y, &u, &v,
1121 &A, &B)) != MP_OKAY) {
1122 return res;
1123 }
1124
1125 /* init rest of tmps temps */
1126 if ((res = mp_init_multi(&C, &D, 0, 0, 0, 0)) != MP_OKAY) {
1127 mp_clear(&x);
1128 mp_clear(&y);
1129 mp_clear(&u);
1130 mp_clear(&v);
1131 mp_clear(&A);
1132 mp_clear(&B);
1133 return res;
1134 }
1135
1136 /* x = a, y = b */
1137 if ((res = mp_mod(a, b, &x)) != MP_OKAY) {
1138 goto LBL_ERR;
1139 }
1140 if ((res = mp_copy (b, &y)) != MP_OKAY) {
1141 goto LBL_ERR;
1142 }
1143
1144 /* 2. [modified] if x,y are both even then return an error! */
1145 if (mp_iseven (&x) == MP_YES && mp_iseven (&y) == MP_YES) {
1146 res = MP_VAL;
1147 goto LBL_ERR;
1148 }
1149
1150 /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
1151 if ((res = mp_copy (&x, &u)) != MP_OKAY) {
1152 goto LBL_ERR;
1153 }
1154 if ((res = mp_copy (&y, &v)) != MP_OKAY) {
1155 goto LBL_ERR;
1156 }
1157 if ((res = mp_set (&A, 1)) != MP_OKAY) {
1158 goto LBL_ERR;
1159 }
1160 if ((res = mp_set (&D, 1)) != MP_OKAY) {
1161 goto LBL_ERR;
1162 }
1163
1164top:
1165 /* 4. while u is even do */
1166 while (mp_iseven (&u) == MP_YES) {
1167 /* 4.1 u = u/2 */
1168 if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
1169 goto LBL_ERR;
1170 }
1171 /* 4.2 if A or B is odd then */
1172 if (mp_isodd (&A) == MP_YES || mp_isodd (&B) == MP_YES) {
1173 /* A = (A+y)/2, B = (B-x)/2 */
1174 if ((res = mp_add (&A, &y, &A)) != MP_OKAY) {
1175 goto LBL_ERR;
1176 }
1177 if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
1178 goto LBL_ERR;
1179 }
1180 }
1181 /* A = A/2, B = B/2 */
1182 if ((res = mp_div_2 (&A, &A)) != MP_OKAY) {
1183 goto LBL_ERR;
1184 }
1185 if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
1186 goto LBL_ERR;
1187 }
1188 }
1189
1190 /* 5. while v is even do */
1191 while (mp_iseven (&v) == MP_YES) {
1192 /* 5.1 v = v/2 */
1193 if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
1194 goto LBL_ERR;
1195 }
1196 /* 5.2 if C or D is odd then */
1197 if (mp_isodd (&C) == MP_YES || mp_isodd (&D) == MP_YES) {
1198 /* C = (C+y)/2, D = (D-x)/2 */
1199 if ((res = mp_add (&C, &y, &C)) != MP_OKAY) {
1200 goto LBL_ERR;
1201 }
1202 if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
1203 goto LBL_ERR;
1204 }
1205 }
1206 /* C = C/2, D = D/2 */
1207 if ((res = mp_div_2 (&C, &C)) != MP_OKAY) {
1208 goto LBL_ERR;
1209 }
1210 if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
1211 goto LBL_ERR;
1212 }
1213 }
1214
1215 /* 6. if u >= v then */
1216 if (mp_cmp (&u, &v) != MP_LT) {
1217 /* u = u - v, A = A - C, B = B - D */
1218 if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
1219 goto LBL_ERR;
1220 }
1221
1222 if ((res = mp_sub (&A, &C, &A)) != MP_OKAY) {
1223 goto LBL_ERR;
1224 }
1225
1226 if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
1227 goto LBL_ERR;
1228 }
1229 } else {
1230 /* v - v - u, C = C - A, D = D - B */
1231 if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
1232 goto LBL_ERR;
1233 }
1234
1235 if ((res = mp_sub (&C, &A, &C)) != MP_OKAY) {
1236 goto LBL_ERR;
1237 }
1238
1239 if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
1240 goto LBL_ERR;
1241 }
1242 }
1243
1244 /* if not zero goto step 4 */
1245 if (mp_iszero (&u) == MP_NO)
1246 goto top;
1247
1248 /* now a = C, b = D, gcd == g*v */
1249
1250 /* if v != 1 then there is no inverse */
1251 if (mp_cmp_d (&v, 1) != MP_EQ) {
1252 res = MP_VAL;
1253 goto LBL_ERR;
1254 }
1255
1256 /* if its too low */
1257 while (mp_cmp_d(&C, 0) == MP_LT) {
1258 if ((res = mp_add(&C, b, &C)) != MP_OKAY) {
1259 goto LBL_ERR;
1260 }
1261 }
1262
1263 /* too big */
1264 while (mp_cmp_mag(&C, b) != MP_LT) {
1265 if ((res = mp_sub(&C, b, &C)) != MP_OKAY) {
1266 goto LBL_ERR;
1267 }
1268 }
1269
1270 /* C is now the inverse */
1271 mp_exch (&C, c);
1272 res = MP_OKAY;
1273LBL_ERR:mp_clear(&x);
1274 mp_clear(&y);
1275 mp_clear(&u);
1276 mp_clear(&v);
1277 mp_clear(&A);
1278 mp_clear(&B);
1279 mp_clear(&C);
1280 mp_clear(&D);
1281 return res;
1282}
1283
1284
1285/* compare magnitude of two ints (unsigned) */
1286int mp_cmp_mag (mp_int * a, mp_int * b)
1287{
1288 int n;
1289 mp_digit *tmpa, *tmpb;
1290
1291 /* compare based on # of non-zero digits */
1292 if (a->used > b->used) {
1293 return MP_GT;
1294 }
1295
1296 if (a->used < b->used) {
1297 return MP_LT;
1298 }
1299
1300 /* alias for a */
1301 tmpa = a->dp + (a->used - 1);
1302
1303 /* alias for b */
1304 tmpb = b->dp + (a->used - 1);
1305
1306 /* compare based on digits */
1307 for (n = 0; n < a->used; ++n, --tmpa, --tmpb) {
1308 if (*tmpa > *tmpb) {
1309 return MP_GT;
1310 }
1311
1312 if (*tmpa < *tmpb) {
1313 return MP_LT;
1314 }
1315 }
1316 return MP_EQ;
1317}
1318
1319
1320/* compare two ints (signed)*/
1321int mp_cmp (mp_int * a, mp_int * b)
1322{
1323 /* compare based on sign */
1324 if (a->sign != b->sign) {
1325 if (a->sign == MP_NEG) {
1326 return MP_LT;
1327 } else {
1328 return MP_GT;
1329 }
1330 }
1331
1332 /* compare digits */
1333 if (a->sign == MP_NEG) {
1334 /* if negative compare opposite direction */
1335 return mp_cmp_mag(b, a);
1336 } else {
1337 return mp_cmp_mag(a, b);
1338 }
1339}
1340
1341
1342/* compare a digit */
1343int mp_cmp_d(mp_int * a, mp_digit b)
1344{
1345 /* special case for zero*/
1346 if (a->used == 0 && b == 0)
1347 return MP_EQ;
1348
1349 /* compare based on sign */
1350 if ((b && a->used == 0) || a->sign == MP_NEG) {
1351 return MP_LT;
1352 }
1353
1354 /* compare based on magnitude */
1355 if (a->used > 1) {
1356 return MP_GT;
1357 }
1358
1359 /* compare the only digit of a to b */
1360 if (a->dp[0] > b) {
1361 return MP_GT;
1362 } else if (a->dp[0] < b) {
1363 return MP_LT;
1364 } else {
1365 return MP_EQ;
1366 }
1367}
1368
1369
1370/* set to a digit */
1371int mp_set (mp_int * a, mp_digit b)
1372{
1373 int res;
1374 mp_zero (a);
1375 res = mp_grow (a, 1);
1376 if (res == MP_OKAY) {
1377 a->dp[0] = (mp_digit)(b & MP_MASK);
1378 a->used = (a->dp[0] != 0) ? 1 : 0;
1379 }
1380 return res;
1381}
1382
1383/* chek if a bit is set */
1384int mp_is_bit_set (mp_int *a, mp_digit b)
1385{
1386 if ((mp_digit)a->used < b/DIGIT_BIT)
1387 return 0;
1388
1389 return (int)((a->dp[b/DIGIT_BIT] >> b%DIGIT_BIT) & (mp_digit)1);
1390}
1391
1392/* c = a mod b, 0 <= c < b */
1393#if defined(FREESCALE_LTC_TFM)
1394int wolfcrypt_mp_mod(mp_int * a, mp_int * b, mp_int * c)
1395#else
1396int mp_mod (mp_int * a, mp_int * b, mp_int * c)
1397#endif
1398{
1399 mp_int t;
1400 int res;
1401
1402 if ((res = mp_init_size (&t, b->used)) != MP_OKAY) {
1403 return res;
1404 }
1405
1406 if ((res = mp_div (a, b, NULL, &t)) != MP_OKAY) {
1407 mp_clear (&t);
1408 return res;
1409 }
1410
1411 if ((mp_iszero(&t) != MP_NO) || (t.sign == b->sign)) {
1412 res = MP_OKAY;
1413 mp_exch (&t, c);
1414 } else {
1415 res = mp_add (b, &t, c);
1416 }
1417
1418 mp_clear (&t);
1419 return res;
1420}
1421
1422
1423/* slower bit-bang division... also smaller */
1424int mp_div(mp_int * a, mp_int * b, mp_int * c, mp_int * d)
1425{
1426 mp_int ta, tb, tq, q;
1427 int res, n, n2;
1428
1429 /* is divisor zero ? */
1430 if (mp_iszero (b) == MP_YES) {
1431 return MP_VAL;
1432 }
1433
1434 /* if a < b then q=0, r = a */
1435 if (mp_cmp_mag (a, b) == MP_LT) {
1436 if (d != NULL) {
1437 res = mp_copy (a, d);
1438 } else {
1439 res = MP_OKAY;
1440 }
1441 if (c != NULL) {
1442 mp_zero (c);
1443 }
1444 return res;
1445 }
1446
1447 /* init our temps */
1448 if ((res = mp_init_multi(&ta, &tb, &tq, &q, 0, 0)) != MP_OKAY) {
1449 return res;
1450 }
1451
1452 if ((res = mp_set(&tq, 1)) != MP_OKAY) {
1453 return res;
1454 }
1455 n = mp_count_bits(a) - mp_count_bits(b);
1456 if (((res = mp_abs(a, &ta)) != MP_OKAY) ||
1457 ((res = mp_abs(b, &tb)) != MP_OKAY) ||
1458 ((res = mp_mul_2d(&tb, n, &tb)) != MP_OKAY) ||
1459 ((res = mp_mul_2d(&tq, n, &tq)) != MP_OKAY)) {
1460 goto LBL_ERR;
1461 }
1462
1463 while (n-- >= 0) {
1464 if (mp_cmp(&tb, &ta) != MP_GT) {
1465 if (((res = mp_sub(&ta, &tb, &ta)) != MP_OKAY) ||
1466 ((res = mp_add(&q, &tq, &q)) != MP_OKAY)) {
1467 goto LBL_ERR;
1468 }
1469 }
1470 if (((res = mp_div_2d(&tb, 1, &tb, NULL)) != MP_OKAY) ||
1471 ((res = mp_div_2d(&tq, 1, &tq, NULL)) != MP_OKAY)) {
1472 goto LBL_ERR;
1473 }
1474 }
1475
1476 /* now q == quotient and ta == remainder */
1477 n = a->sign;
1478 n2 = (a->sign == b->sign ? MP_ZPOS : MP_NEG);
1479 if (c != NULL) {
1480 mp_exch(c, &q);
1481 c->sign = (mp_iszero(c) == MP_YES) ? MP_ZPOS : n2;
1482 }
1483 if (d != NULL) {
1484 mp_exch(d, &ta);
1485 d->sign = (mp_iszero(d) == MP_YES) ? MP_ZPOS : n;
1486 }
1487LBL_ERR:
1488 mp_clear(&ta);
1489 mp_clear(&tb);
1490 mp_clear(&tq);
1491 mp_clear(&q);
1492 return res;
1493}
1494
1495
1496/* b = a/2 */
1497int mp_div_2(mp_int * a, mp_int * b)
1498{
1499 int x, res, oldused;
1500
1501 /* copy */
1502 if (b->alloc < a->used) {
1503 if ((res = mp_grow (b, a->used)) != MP_OKAY) {
1504 return res;
1505 }
1506 }
1507
1508 oldused = b->used;
1509 b->used = a->used;
1510 {
1511 mp_digit r, rr, *tmpa, *tmpb;
1512
1513 /* source alias */
1514 tmpa = a->dp + b->used - 1;
1515
1516 /* dest alias */
1517 tmpb = b->dp + b->used - 1;
1518
1519 /* carry */
1520 r = 0;
1521 for (x = b->used - 1; x >= 0; x--) {
1522 /* get the carry for the next iteration */
1523 rr = *tmpa & 1;
1524
1525 /* shift the current digit, add in carry and store */
1526 *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1));
1527
1528 /* forward carry to next iteration */
1529 r = rr;
1530 }
1531
1532 /* zero excess digits */
1533 tmpb = b->dp + b->used;
1534 for (x = b->used; x < oldused; x++) {
1535 *tmpb++ = 0;
1536 }
1537 }
1538 b->sign = a->sign;
1539 mp_clamp (b);
1540 return MP_OKAY;
1541}
1542
1543
1544/* high level addition (handles signs) */
1545int mp_add (mp_int * a, mp_int * b, mp_int * c)
1546{
1547 int sa, sb, res;
1548
1549 /* get sign of both inputs */
1550 sa = a->sign;
1551 sb = b->sign;
1552
1553 /* handle two cases, not four */
1554 if (sa == sb) {
1555 /* both positive or both negative */
1556 /* add their magnitudes, copy the sign */
1557 c->sign = sa;
1558 res = s_mp_add (a, b, c);
1559 } else {
1560 /* one positive, the other negative */
1561 /* subtract the one with the greater magnitude from */
1562 /* the one of the lesser magnitude. The result gets */
1563 /* the sign of the one with the greater magnitude. */
1564 if (mp_cmp_mag (a, b) == MP_LT) {
1565 c->sign = sb;
1566 res = s_mp_sub (b, a, c);
1567 } else {
1568 c->sign = sa;
1569 res = s_mp_sub (a, b, c);
1570 }
1571 }
1572 return res;
1573}
1574
1575
1576/* low level addition, based on HAC pp.594, Algorithm 14.7 */
1577int s_mp_add (mp_int * a, mp_int * b, mp_int * c)
1578{
1579 mp_int *x;
1580 int olduse, res, min_ab, max_ab;
1581
1582 /* find sizes, we let |a| <= |b| which means we have to sort
1583 * them. "x" will point to the input with the most digits
1584 */
1585 if (a->used > b->used) {
1586 min_ab = b->used;
1587 max_ab = a->used;
1588 x = a;
1589 } else {
1590 min_ab = a->used;
1591 max_ab = b->used;
1592 x = b;
1593 }
1594
1595 /* init result */
1596 if (c->alloc < max_ab + 1) {
1597 if ((res = mp_grow (c, max_ab + 1)) != MP_OKAY) {
1598 return res;
1599 }
1600 }
1601
1602 /* get old used digit count and set new one */
1603 olduse = c->used;
1604 c->used = max_ab + 1;
1605
1606 {
1607 mp_digit u, *tmpa, *tmpb, *tmpc;
1608 int i;
1609
1610 /* alias for digit pointers */
1611
1612 /* first input */
1613 tmpa = a->dp;
1614
1615 /* second input */
1616 tmpb = b->dp;
1617
1618 /* destination */
1619 tmpc = c->dp;
1620
1621 /* zero the carry */
1622 u = 0;
1623 for (i = 0; i < min_ab; i++) {
1624 /* Compute the sum at one digit, T[i] = A[i] + B[i] + U */
1625 *tmpc = *tmpa++ + *tmpb++ + u;
1626
1627 /* U = carry bit of T[i] */
1628 u = *tmpc >> ((mp_digit)DIGIT_BIT);
1629
1630 /* take away carry bit from T[i] */
1631 *tmpc++ &= MP_MASK;
1632 }
1633
1634 /* now copy higher words if any, that is in A+B
1635 * if A or B has more digits add those in
1636 */
1637 if (min_ab != max_ab) {
1638 for (; i < max_ab; i++) {
1639 /* T[i] = X[i] + U */
1640 *tmpc = x->dp[i] + u;
1641
1642 /* U = carry bit of T[i] */
1643 u = *tmpc >> ((mp_digit)DIGIT_BIT);
1644
1645 /* take away carry bit from T[i] */
1646 *tmpc++ &= MP_MASK;
1647 }
1648 }
1649
1650 /* add carry */
1651 *tmpc++ = u;
1652
1653 /* clear digits above olduse */
1654 for (i = c->used; i < olduse; i++) {
1655 *tmpc++ = 0;
1656 }
1657 }
1658
1659 mp_clamp (c);
1660 return MP_OKAY;
1661}
1662
1663
1664/* low level subtraction (assumes |a| > |b|), HAC pp.595 Algorithm 14.9 */
1665int s_mp_sub (mp_int * a, mp_int * b, mp_int * c)
1666{
1667 int olduse, res, min_b, max_a;
1668
1669 /* find sizes */
1670 min_b = b->used;
1671 max_a = a->used;
1672
1673 /* init result */
1674 if (c->alloc < max_a) {
1675 if ((res = mp_grow (c, max_a)) != MP_OKAY) {
1676 return res;
1677 }
1678 }
1679
1680 /* sanity check on destination */
1681 if (c->dp == NULL)
1682 return MP_VAL;
1683
1684 olduse = c->used;
1685 c->used = max_a;
1686
1687 {
1688 mp_digit u, *tmpa, *tmpb, *tmpc;
1689 int i;
1690
1691 /* alias for digit pointers */
1692 tmpa = a->dp;
1693 tmpb = b->dp;
1694 tmpc = c->dp;
1695
1696 /* set carry to zero */
1697 u = 0;
1698 for (i = 0; i < min_b; i++) {
1699 /* T[i] = A[i] - B[i] - U */
1700 *tmpc = *tmpa++ - *tmpb++ - u;
1701
1702 /* U = carry bit of T[i]
1703 * Note this saves performing an AND operation since
1704 * if a carry does occur it will propagate all the way to the
1705 * MSB. As a result a single shift is enough to get the carry
1706 */
1707 u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
1708
1709 /* Clear carry from T[i] */
1710 *tmpc++ &= MP_MASK;
1711 }
1712
1713 /* now copy higher words if any, e.g. if A has more digits than B */
1714 for (; i < max_a; i++) {
1715 /* T[i] = A[i] - U */
1716 *tmpc = *tmpa++ - u;
1717
1718 /* U = carry bit of T[i] */
1719 u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
1720
1721 /* Clear carry from T[i] */
1722 *tmpc++ &= MP_MASK;
1723 }
1724
1725 /* clear digits above used (since we may not have grown result above) */
1726 for (i = c->used; i < olduse; i++) {
1727 *tmpc++ = 0;
1728 }
1729 }
1730
1731 mp_clamp (c);
1732 return MP_OKAY;
1733}
1734
1735
1736/* high level subtraction (handles signs) */
1737int mp_sub (mp_int * a, mp_int * b, mp_int * c)
1738{
1739 int sa, sb, res;
1740
1741 sa = a->sign;
1742 sb = b->sign;
1743
1744 if (sa != sb) {
1745 /* subtract a negative from a positive, OR */
1746 /* subtract a positive from a negative. */
1747 /* In either case, ADD their magnitudes, */
1748 /* and use the sign of the first number. */
1749 c->sign = sa;
1750 res = s_mp_add (a, b, c);
1751 } else {
1752 /* subtract a positive from a positive, OR */
1753 /* subtract a negative from a negative. */
1754 /* First, take the difference between their */
1755 /* magnitudes, then... */
1756 if (mp_cmp_mag (a, b) != MP_LT) {
1757 /* Copy the sign from the first */
1758 c->sign = sa;
1759 /* The first has a larger or equal magnitude */
1760 res = s_mp_sub (a, b, c);
1761 } else {
1762 /* The result has the *opposite* sign from */
1763 /* the first number. */
1764 c->sign = (sa == MP_ZPOS) ? MP_NEG : MP_ZPOS;
1765 /* The second has a larger magnitude */
1766 res = s_mp_sub (b, a, c);
1767 }
1768 }
1769 return res;
1770}
1771
1772
1773/* determines if reduce_2k_l can be used */
1774int mp_reduce_is_2k_l(mp_int *a)
1775{
1776 int ix, iy;
1777
1778 if (a->used == 0) {
1779 return MP_NO;
1780 } else if (a->used == 1) {
1781 return MP_YES;
1782 } else if (a->used > 1) {
1783 /* if more than half of the digits are -1 we're sold */
1784 for (iy = ix = 0; ix < a->used; ix++) {
1785 if (a->dp[ix] == MP_MASK) {
1786 ++iy;
1787 }
1788 }
1789 return (iy >= (a->used/2)) ? MP_YES : MP_NO;
1790
1791 }
1792 return MP_NO;
1793}
1794
1795
1796/* determines if mp_reduce_2k can be used */
1797int mp_reduce_is_2k(mp_int *a)
1798{
1799 int ix, iy, iw;
1800 mp_digit iz;
1801
1802 if (a->used == 0) {
1803 return MP_NO;
1804 } else if (a->used == 1) {
1805 return MP_YES;
1806 } else if (a->used > 1) {
1807 iy = mp_count_bits(a);
1808 iz = 1;
1809 iw = 1;
1810
1811 /* Test every bit from the second digit up, must be 1 */
1812 for (ix = DIGIT_BIT; ix < iy; ix++) {
1813 if ((a->dp[iw] & iz) == 0) {
1814 return MP_NO;
1815 }
1816 iz <<= 1;
1817 if (iz > (mp_digit)MP_MASK) {
1818 ++iw;
1819 iz = 1;
1820 }
1821 }
1822 }
1823 return MP_YES;
1824}
1825
1826
1827/* determines if a number is a valid DR modulus */
1828int mp_dr_is_modulus(mp_int *a)
1829{
1830 int ix;
1831
1832 /* must be at least two digits */
1833 if (a->used < 2) {
1834 return 0;
1835 }
1836
1837 /* must be of the form b**k - a [a <= b] so all
1838 * but the first digit must be equal to -1 (mod b).
1839 */
1840 for (ix = 1; ix < a->used; ix++) {
1841 if (a->dp[ix] != MP_MASK) {
1842 return 0;
1843 }
1844 }
1845 return 1;
1846}
1847
1848
1849/* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85
1850 *
1851 * Uses a left-to-right k-ary sliding window to compute the modular
1852 * exponentiation.
1853 * The value of k changes based on the size of the exponent.
1854 *
1855 * Uses Montgomery or Diminished Radix reduction [whichever appropriate]
1856 */
1857
1858#ifdef MP_LOW_MEM
1859 #define TAB_SIZE 32
1860#else
1861 #define TAB_SIZE 256
1862#endif
1863
1864int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y,
1865 int redmode)
1866{
1867 mp_int res;
1868 mp_digit buf, mp;
1869 int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
1870#ifdef WOLFSSL_SMALL_STACK
1871 mp_int* M = NULL;
1872#else
1873 mp_int M[TAB_SIZE];
1874#endif
1875 /* use a pointer to the reduction algorithm. This allows us to use
1876 * one of many reduction algorithms without modding the guts of
1877 * the code with if statements everywhere.
1878 */
1879 int (*redux)(mp_int*,mp_int*,mp_digit);
1880
1881#ifdef WOLFSSL_SMALL_STACK
1882 M = (mp_int*) XMALLOC(sizeof(mp_int) * TAB_SIZE, NULL,
1883 DYNAMIC_TYPE_TMP_BUFFER);
1884 if (M == NULL)
1885 return MP_MEM;
1886#endif
1887
1888 /* find window size */
1889 x = mp_count_bits (X);
1890 if (x <= 7) {
1891 winsize = 2;
1892 } else if (x <= 36) {
1893 winsize = 3;
1894 } else if (x <= 140) {
1895 winsize = 4;
1896 } else if (x <= 450) {
1897 winsize = 5;
1898 } else if (x <= 1303) {
1899 winsize = 6;
1900 } else if (x <= 3529) {
1901 winsize = 7;
1902 } else {
1903 winsize = 8;
1904 }
1905
1906#ifdef MP_LOW_MEM
1907 if (winsize > 5) {
1908 winsize = 5;
1909 }
1910#endif
1911
1912 /* init M array */
1913 /* init first cell */
1914 if ((err = mp_init_size(&M[1], P->alloc)) != MP_OKAY) {
1915#ifdef WOLFSSL_SMALL_STACK
1916 XFREE(M, NULL, DYNAMIC_TYPE_TMP_BUFFER);
1917#endif
1918
1919 return err;
1920 }
1921
1922 /* now init the second half of the array */
1923 for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
1924 if ((err = mp_init_size(&M[x], P->alloc)) != MP_OKAY) {
1925 for (y = 1<<(winsize-1); y < x; y++) {
1926 mp_clear (&M[y]);
1927 }
1928 mp_clear(&M[1]);
1929
1930#ifdef WOLFSSL_SMALL_STACK
1931 XFREE(M, NULL, DYNAMIC_TYPE_TMP_BUFFER);
1932#endif
1933
1934 return err;
1935 }
1936 }
1937
1938 /* determine and setup reduction code */
1939 if (redmode == 0) {
1940#ifdef BN_MP_MONTGOMERY_SETUP_C
1941 /* now setup montgomery */
1942 if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) {
1943 goto LBL_M;
1944 }
1945#else
1946 err = MP_VAL;
1947 goto LBL_M;
1948#endif
1949
1950 /* automatically pick the comba one if available (saves quite a few
1951 calls/ifs) */
1952#ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C
1953 if (((P->used * 2 + 1) < MP_WARRAY) &&
1954 P->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
1955 redux = fast_mp_montgomery_reduce;
1956 } else
1957#endif
1958 {
1959#ifdef BN_MP_MONTGOMERY_REDUCE_C
1960 /* use slower baseline Montgomery method */
1961 redux = mp_montgomery_reduce;
1962#else
1963 err = MP_VAL;
1964 goto LBL_M;
1965#endif
1966 }
1967 } else if (redmode == 1) {
1968#if defined(BN_MP_DR_SETUP_C) && defined(BN_MP_DR_REDUCE_C)
1969 /* setup DR reduction for moduli of the form B**k - b */
1970 mp_dr_setup(P, &mp);
1971 redux = mp_dr_reduce;
1972#else
1973 err = MP_VAL;
1974 goto LBL_M;
1975#endif
1976 } else {
1977#if defined(BN_MP_REDUCE_2K_SETUP_C) && defined(BN_MP_REDUCE_2K_C)
1978 /* setup DR reduction for moduli of the form 2**k - b */
1979 if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) {
1980 goto LBL_M;
1981 }
1982 redux = mp_reduce_2k;
1983#else
1984 err = MP_VAL;
1985 goto LBL_M;
1986#endif
1987 }
1988
1989 /* setup result */
1990 if ((err = mp_init_size (&res, P->alloc)) != MP_OKAY) {
1991 goto LBL_M;
1992 }
1993
1994 /* create M table
1995 *
1996
1997 *
1998 * The first half of the table is not computed though accept for M[0] and M[1]
1999 */
2000
2001 if (redmode == 0) {
2002#ifdef BN_MP_MONTGOMERY_CALC_NORMALIZATION_C
2003 /* now we need R mod m */
2004 if ((err = mp_montgomery_calc_normalization (&res, P)) != MP_OKAY) {
2005 goto LBL_RES;
2006 }
2007
2008 /* now set M[1] to G * R mod m */
2009 if ((err = mp_mulmod (G, &res, P, &M[1])) != MP_OKAY) {
2010 goto LBL_RES;
2011 }
2012#else
2013 err = MP_VAL;
2014 goto LBL_RES;
2015#endif
2016 } else {
2017 if ((err = mp_set(&res, 1)) != MP_OKAY) {
2018 goto LBL_RES;
2019 }
2020 if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) {
2021 goto LBL_RES;
2022 }
2023 }
2024
2025 /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times*/
2026 if ((err = mp_copy (&M[1], &M[(mp_digit)(1 << (winsize - 1))])) != MP_OKAY) {
2027 goto LBL_RES;
2028 }
2029
2030 for (x = 0; x < (winsize - 1); x++) {
2031 if ((err = mp_sqr (&M[(mp_digit)(1 << (winsize - 1))],
2032 &M[(mp_digit)(1 << (winsize - 1))])) != MP_OKAY) {
2033 goto LBL_RES;
2034 }
2035 if ((err = redux (&M[(mp_digit)(1 << (winsize - 1))], P, mp)) != MP_OKAY) {
2036 goto LBL_RES;
2037 }
2038 }
2039
2040 /* create upper table */
2041 for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
2042 if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
2043 goto LBL_RES;
2044 }
2045 if ((err = redux (&M[x], P, mp)) != MP_OKAY) {
2046 goto LBL_RES;
2047 }
2048 }
2049
2050 /* set initial mode and bit cnt */
2051 mode = 0;
2052 bitcnt = 1;
2053 buf = 0;
2054 digidx = X->used - 1;
2055 bitcpy = 0;
2056 bitbuf = 0;
2057
2058 for (;;) {
2059 /* grab next digit as required */
2060 if (--bitcnt == 0) {
2061 /* if digidx == -1 we are out of digits so break */
2062 if (digidx == -1) {
2063 break;
2064 }
2065 /* read next digit and reset bitcnt */
2066 buf = X->dp[digidx--];
2067 bitcnt = (int)DIGIT_BIT;
2068 }
2069
2070 /* grab the next msb from the exponent */
2071 y = (int)(buf >> (DIGIT_BIT - 1)) & 1;
2072 buf <<= (mp_digit)1;
2073
2074 /* if the bit is zero and mode == 0 then we ignore it
2075 * These represent the leading zero bits before the first 1 bit
2076 * in the exponent. Technically this opt is not required but it
2077 * does lower the # of trivial squaring/reductions used
2078 */
2079 if (mode == 0 && y == 0) {
2080 continue;
2081 }
2082
2083 /* if the bit is zero and mode == 1 then we square */
2084 if (mode == 1 && y == 0) {
2085 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
2086 goto LBL_RES;
2087 }
2088 if ((err = redux (&res, P, mp)) != MP_OKAY) {
2089 goto LBL_RES;
2090 }
2091 continue;
2092 }
2093
2094 /* else we add it to the window */
2095 bitbuf |= (y << (winsize - ++bitcpy));
2096 mode = 2;
2097
2098 if (bitcpy == winsize) {
2099 /* ok window is filled so square as required and multiply */
2100 /* square first */
2101 for (x = 0; x < winsize; x++) {
2102 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
2103 goto LBL_RES;
2104 }
2105 if ((err = redux (&res, P, mp)) != MP_OKAY) {
2106 goto LBL_RES;
2107 }
2108 }
2109
2110 /* then multiply */
2111 if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
2112 goto LBL_RES;
2113 }
2114 if ((err = redux (&res, P, mp)) != MP_OKAY) {
2115 goto LBL_RES;
2116 }
2117
2118 /* empty window and reset */
2119 bitcpy = 0;
2120 bitbuf = 0;
2121 mode = 1;
2122 }
2123 }
2124
2125 /* if bits remain then square/multiply */
2126 if (mode == 2 && bitcpy > 0) {
2127 /* square then multiply if the bit is set */
2128 for (x = 0; x < bitcpy; x++) {
2129 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
2130 goto LBL_RES;
2131 }
2132 if ((err = redux (&res, P, mp)) != MP_OKAY) {
2133 goto LBL_RES;
2134 }
2135
2136 /* get next bit of the window */
2137 bitbuf <<= 1;
2138 if ((bitbuf & (1 << winsize)) != 0) {
2139 /* then multiply */
2140 if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
2141 goto LBL_RES;
2142 }
2143 if ((err = redux (&res, P, mp)) != MP_OKAY) {
2144 goto LBL_RES;
2145 }
2146 }
2147 }
2148 }
2149
2150 if (redmode == 0) {
2151 /* fixup result if Montgomery reduction is used
2152 * recall that any value in a Montgomery system is
2153 * actually multiplied by R mod n. So we have
2154 * to reduce one more time to cancel out the factor
2155 * of R.
2156 */
2157 if ((err = redux(&res, P, mp)) != MP_OKAY) {
2158 goto LBL_RES;
2159 }
2160 }
2161
2162 /* swap res with Y */
2163 mp_exch (&res, Y);
2164 err = MP_OKAY;
2165LBL_RES:mp_clear (&res);
2166LBL_M:
2167 mp_clear(&M[1]);
2168 for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
2169 mp_clear (&M[x]);
2170 }
2171
2172#ifdef WOLFSSL_SMALL_STACK
2173 XFREE(M, NULL, DYNAMIC_TYPE_TMP_BUFFER);
2174#endif
2175
2176 return err;
2177}
2178
2179
2180/* setups the montgomery reduction stuff */
2181int mp_montgomery_setup (mp_int * n, mp_digit * rho)
2182{
2183 mp_digit x, b;
2184
2185/* fast inversion mod 2**k
2186 *
2187 * Based on the fact that
2188 *
2189 * XA = 1 (mod 2**n) => (X(2-XA)) A = 1 (mod 2**2n)
2190 * => 2*X*A - X*X*A*A = 1
2191 * => 2*(1) - (1) = 1
2192 */
2193 b = n->dp[0];
2194
2195 if ((b & 1) == 0) {
2196 return MP_VAL;
2197 }
2198
2199 x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
2200 x *= 2 - b * x; /* here x*a==1 mod 2**8 */
2201#if !defined(MP_8BIT)
2202 x *= 2 - b * x; /* here x*a==1 mod 2**16 */
2203#endif
2204#if defined(MP_64BIT) || !(defined(MP_8BIT) || defined(MP_16BIT))
2205 x *= 2 - b * x; /* here x*a==1 mod 2**32 */
2206#endif
2207#ifdef MP_64BIT
2208 x *= 2 - b * x; /* here x*a==1 mod 2**64 */
2209#endif
2210
2211 /* rho = -1/m mod b */
2212 /* TAO, switched mp_word casts to mp_digit to shut up compiler */
2213 *rho = (mp_digit)((((mp_digit)1 << ((mp_digit) DIGIT_BIT)) - x) & MP_MASK);
2214
2215 return MP_OKAY;
2216}
2217
2218
2219/* computes xR**-1 == x (mod N) via Montgomery Reduction
2220 *
2221 * This is an optimized implementation of montgomery_reduce
2222 * which uses the comba method to quickly calculate the columns of the
2223 * reduction.
2224 *
2225 * Based on Algorithm 14.32 on pp.601 of HAC.
2226*/
2227int fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
2228{
2229 int ix, res, olduse;
2230#ifdef WOLFSSL_SMALL_STACK
2231 mp_word* W; /* uses dynamic memory and slower */
2232#else
2233 mp_word W[MP_WARRAY];
2234#endif
2235
2236 /* get old used count */
2237 olduse = x->used;
2238
2239 /* grow a as required */
2240 if (x->alloc < n->used + 1) {
2241 if ((res = mp_grow (x, n->used + 1)) != MP_OKAY) {
2242 return res;
2243 }
2244 }
2245
2246#ifdef WOLFSSL_SMALL_STACK
2247 W = (mp_word*)XMALLOC(sizeof(mp_word) * MP_WARRAY, NULL, DYNAMIC_TYPE_BIGINT);
2248 if (W == NULL)
2249 return MP_MEM;
2250#endif
2251
2252 /* first we have to get the digits of the input into
2253 * an array of double precision words W[...]
2254 */
2255 {
2256 mp_word *_W;
2257 mp_digit *tmpx;
2258
2259 /* alias for the W[] array */
2260 _W = W;
2261
2262 /* alias for the digits of x*/
2263 tmpx = x->dp;
2264
2265 /* copy the digits of a into W[0..a->used-1] */
2266 for (ix = 0; ix < x->used; ix++) {
2267 *_W++ = *tmpx++;
2268 }
2269
2270 /* zero the high words of W[a->used..m->used*2] */
2271 for (; ix < n->used * 2 + 1; ix++) {
2272 *_W++ = 0;
2273 }
2274 }
2275
2276 /* now we proceed to zero successive digits
2277 * from the least significant upwards
2278 */
2279 for (ix = 0; ix < n->used; ix++) {
2280 /* mu = ai * m' mod b
2281 *
2282 * We avoid a double precision multiplication (which isn't required)
2283 * by casting the value down to a mp_digit. Note this requires
2284 * that W[ix-1] have the carry cleared (see after the inner loop)
2285 */
2286 mp_digit mu;
2287 mu = (mp_digit) (((W[ix] & MP_MASK) * rho) & MP_MASK);
2288
2289 /* a = a + mu * m * b**i
2290 *
2291 * This is computed in place and on the fly. The multiplication
2292 * by b**i is handled by offseting which columns the results
2293 * are added to.
2294 *
2295 * Note the comba method normally doesn't handle carries in the
2296 * inner loop In this case we fix the carry from the previous
2297 * column since the Montgomery reduction requires digits of the
2298 * result (so far) [see above] to work. This is
2299 * handled by fixing up one carry after the inner loop. The
2300 * carry fixups are done in order so after these loops the
2301 * first m->used words of W[] have the carries fixed
2302 */
2303 {
2304 int iy;
2305 mp_digit *tmpn;
2306 mp_word *_W;
2307
2308 /* alias for the digits of the modulus */
2309 tmpn = n->dp;
2310
2311 /* Alias for the columns set by an offset of ix */
2312 _W = W + ix;
2313
2314 /* inner loop */
2315 for (iy = 0; iy < n->used; iy++) {
2316 *_W++ += ((mp_word)mu) * ((mp_word)*tmpn++);
2317 }
2318 }
2319
2320 /* now fix carry for next digit, W[ix+1] */
2321 W[ix + 1] += W[ix] >> ((mp_word) DIGIT_BIT);
2322 }
2323
2324 /* now we have to propagate the carries and
2325 * shift the words downward [all those least
2326 * significant digits we zeroed].
2327 */
2328 {
2329 mp_digit *tmpx;
2330 mp_word *_W, *_W1;
2331
2332 /* nox fix rest of carries */
2333
2334 /* alias for current word */
2335 _W1 = W + ix;
2336
2337 /* alias for next word, where the carry goes */
2338 _W = W + ++ix;
2339
2340 for (; ix <= n->used * 2 + 1; ix++) {
2341 *_W++ += *_W1++ >> ((mp_word) DIGIT_BIT);
2342 }
2343
2344 /* copy out, A = A/b**n
2345 *
2346 * The result is A/b**n but instead of converting from an
2347 * array of mp_word to mp_digit than calling mp_rshd
2348 * we just copy them in the right order
2349 */
2350
2351 /* alias for destination word */
2352 tmpx = x->dp;
2353
2354 /* alias for shifted double precision result */
2355 _W = W + n->used;
2356
2357 for (ix = 0; ix < n->used + 1; ix++) {
2358 *tmpx++ = (mp_digit)(*_W++ & ((mp_word) MP_MASK));
2359 }
2360
2361 /* zero olduse digits, if the input a was larger than
2362 * m->used+1 we'll have to clear the digits
2363 */
2364 for (; ix < olduse; ix++) {
2365 *tmpx++ = 0;
2366 }
2367 }
2368
2369 /* set the max used and clamp */
2370 x->used = n->used + 1;
2371 mp_clamp (x);
2372
2373#ifdef WOLFSSL_SMALL_STACK
2374 XFREE(W, NULL, DYNAMIC_TYPE_BIGINT);
2375#endif
2376
2377 /* if A >= m then A = A - m */
2378 if (mp_cmp_mag (x, n) != MP_LT) {
2379 return s_mp_sub (x, n, x);
2380 }
2381 return MP_OKAY;
2382}
2383
2384
2385/* computes xR**-1 == x (mod N) via Montgomery Reduction */
2386int mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
2387{
2388 int ix, res, digs;
2389 mp_digit mu;
2390
2391 /* can the fast reduction [comba] method be used?
2392 *
2393 * Note that unlike in mul you're safely allowed *less*
2394 * than the available columns [255 per default] since carries
2395 * are fixed up in the inner loop.
2396 */
2397 digs = n->used * 2 + 1;
2398 if ((digs < MP_WARRAY) &&
2399 n->used <
2400 (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
2401 return fast_mp_montgomery_reduce (x, n, rho);
2402 }
2403
2404 /* grow the input as required */
2405 if (x->alloc < digs) {
2406 if ((res = mp_grow (x, digs)) != MP_OKAY) {
2407 return res;
2408 }
2409 }
2410 x->used = digs;
2411
2412 for (ix = 0; ix < n->used; ix++) {
2413 /* mu = ai * rho mod b
2414 *
2415 * The value of rho must be precalculated via
2416 * montgomery_setup() such that
2417 * it equals -1/n0 mod b this allows the
2418 * following inner loop to reduce the
2419 * input one digit at a time
2420 */
2421 mu = (mp_digit) (((mp_word)x->dp[ix]) * ((mp_word)rho) & MP_MASK);
2422
2423 /* a = a + mu * m * b**i */
2424 {
2425 int iy;
2426 mp_digit *tmpn, *tmpx, u;
2427 mp_word r;
2428
2429 /* alias for digits of the modulus */
2430 tmpn = n->dp;
2431
2432 /* alias for the digits of x [the input] */
2433 tmpx = x->dp + ix;
2434
2435 /* set the carry to zero */
2436 u = 0;
2437
2438 /* Multiply and add in place */
2439 for (iy = 0; iy < n->used; iy++) {
2440 /* compute product and sum */
2441 r = ((mp_word)mu) * ((mp_word)*tmpn++) +
2442 ((mp_word) u) + ((mp_word) * tmpx);
2443
2444 /* get carry */
2445 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
2446
2447 /* fix digit */
2448 *tmpx++ = (mp_digit)(r & ((mp_word) MP_MASK));
2449 }
2450 /* At this point the ix'th digit of x should be zero */
2451
2452
2453 /* propagate carries upwards as required*/
2454 while (u) {
2455 *tmpx += u;
2456 u = *tmpx >> DIGIT_BIT;
2457 *tmpx++ &= MP_MASK;
2458 }
2459 }
2460 }
2461
2462 /* at this point the n.used'th least
2463 * significant digits of x are all zero
2464 * which means we can shift x to the
2465 * right by n.used digits and the
2466 * residue is unchanged.
2467 */
2468
2469 /* x = x/b**n.used */
2470 mp_clamp(x);
2471 mp_rshd (x, n->used);
2472
2473 /* if x >= n then x = x - n */
2474 if (mp_cmp_mag (x, n) != MP_LT) {
2475 return s_mp_sub (x, n, x);
2476 }
2477
2478 return MP_OKAY;
2479}
2480
2481
2482/* determines the setup value */
2483void mp_dr_setup(mp_int *a, mp_digit *d)
2484{
2485 /* the casts are required if DIGIT_BIT is one less than
2486 * the number of bits in a mp_digit [e.g. DIGIT_BIT==31]
2487 */
2488 *d = (mp_digit)((((mp_word)1) << ((mp_word)DIGIT_BIT)) -
2489 ((mp_word)a->dp[0]));
2490}
2491
2492
2493/* reduce "x" in place modulo "n" using the Diminished Radix algorithm.
2494 *
2495 * Based on algorithm from the paper
2496 *
2497 * "Generating Efficient Primes for Discrete Log Cryptosystems"
2498 * Chae Hoon Lim, Pil Joong Lee,
2499 * POSTECH Information Research Laboratories
2500 *
2501 * The modulus must be of a special format [see manual]
2502 *
2503 * Has been modified to use algorithm 7.10 from the LTM book instead
2504 *
2505 * Input x must be in the range 0 <= x <= (n-1)**2
2506 */
2507int mp_dr_reduce (mp_int * x, mp_int * n, mp_digit k)
2508{
2509 int err, i, m;
2510 mp_word r;
2511 mp_digit mu, *tmpx1, *tmpx2;
2512
2513 /* m = digits in modulus */
2514 m = n->used;
2515
2516 /* ensure that "x" has at least 2m digits */
2517 if (x->alloc < m + m) {
2518 if ((err = mp_grow (x, m + m)) != MP_OKAY) {
2519 return err;
2520 }
2521 }
2522
2523/* top of loop, this is where the code resumes if
2524 * another reduction pass is required.
2525 */
2526top:
2527 /* aliases for digits */
2528 /* alias for lower half of x */
2529 tmpx1 = x->dp;
2530
2531 /* alias for upper half of x, or x/B**m */
2532 tmpx2 = x->dp + m;
2533
2534 /* set carry to zero */
2535 mu = 0;
2536
2537 /* compute (x mod B**m) + k * [x/B**m] inline and inplace */
2538 for (i = 0; i < m; i++) {
2539 r = ((mp_word)*tmpx2++) * ((mp_word)k) + *tmpx1 + mu;
2540 *tmpx1++ = (mp_digit)(r & MP_MASK);
2541 mu = (mp_digit)(r >> ((mp_word)DIGIT_BIT));
2542 }
2543
2544 /* set final carry */
2545 *tmpx1++ = mu;
2546
2547 /* zero words above m */
2548 for (i = m + 1; i < x->used; i++) {
2549 *tmpx1++ = 0;
2550 }
2551
2552 /* clamp, sub and return */
2553 mp_clamp (x);
2554
2555 /* if x >= n then subtract and reduce again
2556 * Each successive "recursion" makes the input smaller and smaller.
2557 */
2558 if (mp_cmp_mag (x, n) != MP_LT) {
2559 if ((err = s_mp_sub(x, n, x)) != MP_OKAY) {
2560 return err;
2561 }
2562 goto top;
2563 }
2564 return MP_OKAY;
2565}
2566
2567
2568/* reduces a modulo n where n is of the form 2**p - d */
2569int mp_reduce_2k(mp_int *a, mp_int *n, mp_digit d)
2570{
2571 mp_int q;
2572 int p, res;
2573
2574 if ((res = mp_init(&q)) != MP_OKAY) {
2575 return res;
2576 }
2577
2578 p = mp_count_bits(n);
2579top:
2580 /* q = a/2**p, a = a mod 2**p */
2581 if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
2582 goto ERR;
2583 }
2584
2585 if (d != 1) {
2586 /* q = q * d */
2587 if ((res = mp_mul_d(&q, d, &q)) != MP_OKAY) {
2588 goto ERR;
2589 }
2590 }
2591
2592 /* a = a + q */
2593 if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
2594 goto ERR;
2595 }
2596
2597 if (mp_cmp_mag(a, n) != MP_LT) {
2598 if ((res = s_mp_sub(a, n, a)) != MP_OKAY) {
2599 goto ERR;
2600 }
2601 goto top;
2602 }
2603
2604ERR:
2605 mp_clear(&q);
2606 return res;
2607}
2608
2609
2610/* determines the setup value */
2611int mp_reduce_2k_setup(mp_int *a, mp_digit *d)
2612{
2613 int res, p;
2614 mp_int tmp;
2615
2616 if ((res = mp_init(&tmp)) != MP_OKAY) {
2617 return res;
2618 }
2619
2620 p = mp_count_bits(a);
2621 if ((res = mp_2expt(&tmp, p)) != MP_OKAY) {
2622 mp_clear(&tmp);
2623 return res;
2624 }
2625
2626 if ((res = s_mp_sub(&tmp, a, &tmp)) != MP_OKAY) {
2627 mp_clear(&tmp);
2628 return res;
2629 }
2630
2631 *d = tmp.dp[0];
2632 mp_clear(&tmp);
2633 return MP_OKAY;
2634}
2635
2636
2637/* set the b bit of a */
2638int mp_set_bit (mp_int * a, int b)
2639{
2640 int i = b / DIGIT_BIT, res;
2641
2642 if (a->used < (int)(i + 1)) {
2643 /* grow a to accommodate the single bit */
2644 if ((res = mp_grow (a, i + 1)) != MP_OKAY) {
2645 return res;
2646 }
2647
2648 /* set the used count of where the bit will go */
2649 a->used = (int)(i + 1);
2650 }
2651
2652 /* put the single bit in its place */
2653 a->dp[i] |= ((mp_digit)1) << (b % DIGIT_BIT);
2654
2655 return MP_OKAY;
2656}
2657
2658/* computes a = 2**b
2659 *
2660 * Simple algorithm which zeros the int, set the required bit
2661 */
2662int mp_2expt (mp_int * a, int b)
2663{
2664 /* zero a as per default */
2665 mp_zero (a);
2666
2667 return mp_set_bit(a, b);
2668}
2669
2670/* multiply by a digit */
2671int mp_mul_d (mp_int * a, mp_digit b, mp_int * c)
2672{
2673 mp_digit u, *tmpa, *tmpc;
2674 mp_word r;
2675 int ix, res, olduse;
2676
2677 /* make sure c is big enough to hold a*b */
2678 if (c->alloc < a->used + 1) {
2679 if ((res = mp_grow (c, a->used + 1)) != MP_OKAY) {
2680 return res;
2681 }
2682 }
2683
2684 /* get the original destinations used count */
2685 olduse = c->used;
2686
2687 /* set the sign */
2688 c->sign = a->sign;
2689
2690 /* alias for a->dp [source] */
2691 tmpa = a->dp;
2692
2693 /* alias for c->dp [dest] */
2694 tmpc = c->dp;
2695
2696 /* zero carry */
2697 u = 0;
2698
2699 /* compute columns */
2700 for (ix = 0; ix < a->used; ix++) {
2701 /* compute product and carry sum for this term */
2702 r = ((mp_word) u) + ((mp_word)*tmpa++) * ((mp_word)b);
2703
2704 /* mask off higher bits to get a single digit */
2705 *tmpc++ = (mp_digit) (r & ((mp_word) MP_MASK));
2706
2707 /* send carry into next iteration */
2708 u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
2709 }
2710
2711 /* store final carry [if any] and increment ix offset */
2712 *tmpc++ = u;
2713 ++ix;
2714
2715 /* now zero digits above the top */
2716 while (ix++ < olduse) {
2717 *tmpc++ = 0;
2718 }
2719
2720 /* set used count */
2721 c->used = a->used + 1;
2722 mp_clamp(c);
2723
2724 return MP_OKAY;
2725}
2726
2727
2728/* d = a * b (mod c) */
2729#if defined(FREESCALE_LTC_TFM)
2730int wolfcrypt_mp_mulmod(mp_int *a, mp_int *b, mp_int *c, mp_int *d)
2731#else
2732int mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
2733#endif
2734{
2735 int res;
2736 mp_int t;
2737
2738 if ((res = mp_init_size (&t, c->used)) != MP_OKAY) {
2739 return res;
2740 }
2741
2742 res = mp_mul (a, b, &t);
2743 if (res == MP_OKAY) {
2744 res = mp_mod (&t, c, d);
2745 }
2746
2747 mp_clear (&t);
2748 return res;
2749}
2750
2751
2752/* d = a - b (mod c) */
2753int mp_submod(mp_int* a, mp_int* b, mp_int* c, mp_int* d)
2754{
2755 int res;
2756 mp_int t;
2757
2758 if ((res = mp_init (&t)) != MP_OKAY) {
2759 return res;
2760 }
2761
2762 res = mp_sub (a, b, &t);
2763 if (res == MP_OKAY) {
2764 res = mp_mod (&t, c, d);
2765 }
2766
2767 mp_clear (&t);
2768
2769 return res;
2770}
2771
2772/* d = a + b (mod c) */
2773int mp_addmod(mp_int* a, mp_int* b, mp_int* c, mp_int* d)
2774{
2775 int res;
2776 mp_int t;
2777
2778 if ((res = mp_init (&t)) != MP_OKAY) {
2779 return res;
2780 }
2781
2782 res = mp_add (a, b, &t);
2783 if (res == MP_OKAY) {
2784 res = mp_mod (&t, c, d);
2785 }
2786
2787 mp_clear (&t);
2788
2789 return res;
2790}
2791
2792/* computes b = a*a */
2793int mp_sqr (mp_int * a, mp_int * b)
2794{
2795 int res;
2796
2797 {
2798#ifdef BN_FAST_S_MP_SQR_C
2799 /* can we use the fast comba multiplier? */
2800 if ((a->used * 2 + 1) < MP_WARRAY &&
2801 a->used <
2802 (1 << (sizeof(mp_word) * CHAR_BIT - 2*DIGIT_BIT - 1))) {
2803 res = fast_s_mp_sqr (a, b);
2804 } else
2805#endif
2806#ifdef BN_S_MP_SQR_C
2807 res = s_mp_sqr (a, b);
2808#else
2809 res = MP_VAL;
2810#endif
2811 }
2812 b->sign = MP_ZPOS;
2813 return res;
2814}
2815
2816
2817/* high level multiplication (handles sign) */
2818#if defined(FREESCALE_LTC_TFM)
2819int wolfcrypt_mp_mul(mp_int *a, mp_int *b, mp_int *c)
2820#else
2821int mp_mul (mp_int * a, mp_int * b, mp_int * c)
2822#endif
2823{
2824 int res, neg;
2825 neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
2826
2827 {
2828 /* can we use the fast multiplier?
2829 *
2830 * The fast multiplier can be used if the output will
2831 * have less than MP_WARRAY digits and the number of
2832 * digits won't affect carry propagation
2833 */
2834 int digs = a->used + b->used + 1;
2835
2836#ifdef BN_FAST_S_MP_MUL_DIGS_C
2837 if ((digs < MP_WARRAY) &&
2838 MIN(a->used, b->used) <=
2839 (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
2840 res = fast_s_mp_mul_digs (a, b, c, digs);
2841 } else
2842#endif
2843#ifdef BN_S_MP_MUL_DIGS_C
2844 res = s_mp_mul (a, b, c); /* uses s_mp_mul_digs */
2845#else
2846 res = MP_VAL;
2847#endif
2848
2849 }
2850 c->sign = (c->used > 0) ? neg : MP_ZPOS;
2851 return res;
2852}
2853
2854
2855/* b = a*2 */
2856int mp_mul_2(mp_int * a, mp_int * b)
2857{
2858 int x, res, oldused;
2859
2860 /* grow to accommodate result */
2861 if (b->alloc < a->used + 1) {
2862 if ((res = mp_grow (b, a->used + 1)) != MP_OKAY) {
2863 return res;
2864 }
2865 }
2866
2867 oldused = b->used;
2868 b->used = a->used;
2869
2870 {
2871 mp_digit r, rr, *tmpa, *tmpb;
2872
2873 /* alias for source */
2874 tmpa = a->dp;
2875
2876 /* alias for dest */
2877 tmpb = b->dp;
2878
2879 /* carry */
2880 r = 0;
2881 for (x = 0; x < a->used; x++) {
2882
2883 /* get what will be the *next* carry bit from the
2884 * MSB of the current digit
2885 */
2886 rr = *tmpa >> ((mp_digit)(DIGIT_BIT - 1));
2887
2888 /* now shift up this digit, add in the carry [from the previous] */
2889 *tmpb++ = (mp_digit)(((*tmpa++ << ((mp_digit)1)) | r) & MP_MASK);
2890
2891 /* copy the carry that would be from the source
2892 * digit into the next iteration
2893 */
2894 r = rr;
2895 }
2896
2897 /* new leading digit? */
2898 if (r != 0) {
2899 /* add a MSB which is always 1 at this point */
2900 *tmpb = 1;
2901 ++(b->used);
2902 }
2903
2904 /* now zero any excess digits on the destination
2905 * that we didn't write to
2906 */
2907 tmpb = b->dp + b->used;
2908 for (x = b->used; x < oldused; x++) {
2909 *tmpb++ = 0;
2910 }
2911 }
2912 b->sign = a->sign;
2913 return MP_OKAY;
2914}
2915
2916
2917/* divide by three (based on routine from MPI and the GMP manual) */
2918int mp_div_3 (mp_int * a, mp_int *c, mp_digit * d)
2919{
2920 mp_int q;
2921 mp_word w, t;
2922 mp_digit b;
2923 int res, ix;
2924
2925 /* b = 2**DIGIT_BIT / 3 */
2926 b = (mp_digit) ( (((mp_word)1) << ((mp_word)DIGIT_BIT)) / ((mp_word)3) );
2927
2928 if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
2929 return res;
2930 }
2931
2932 q.used = a->used;
2933 q.sign = a->sign;
2934 w = 0;
2935 for (ix = a->used - 1; ix >= 0; ix--) {
2936 w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]);
2937
2938 if (w >= 3) {
2939 /* multiply w by [1/3] */
2940 t = (w * ((mp_word)b)) >> ((mp_word)DIGIT_BIT);
2941
2942 /* now subtract 3 * [w/3] from w, to get the remainder */
2943 w -= t+t+t;
2944
2945 /* fixup the remainder as required since
2946 * the optimization is not exact.
2947 */
2948 while (w >= 3) {
2949 t += 1;
2950 w -= 3;
2951 }
2952 } else {
2953 t = 0;
2954 }
2955 q.dp[ix] = (mp_digit)t;
2956 }
2957
2958 /* [optional] store the remainder */
2959 if (d != NULL) {
2960 *d = (mp_digit)w;
2961 }
2962
2963 /* [optional] store the quotient */
2964 if (c != NULL) {
2965 mp_clamp(&q);
2966 mp_exch(&q, c);
2967 }
2968 mp_clear(&q);
2969
2970 return res;
2971}
2972
2973
2974/* init an mp_init for a given size */
2975int mp_init_size (mp_int * a, int size)
2976{
2977 int x;
2978
2979 /* pad size so there are always extra digits */
2980 size += (MP_PREC * 2) - (size % MP_PREC);
2981
2982 /* alloc mem */
2983 a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * size, NULL,
2984 DYNAMIC_TYPE_BIGINT);
2985 if (a->dp == NULL) {
2986 return MP_MEM;
2987 }
2988
2989 /* set the members */
2990 a->used = 0;
2991 a->alloc = size;
2992 a->sign = MP_ZPOS;
2993#ifdef HAVE_WOLF_BIGINT
2994 wc_bigint_init(&a->raw);
2995#endif
2996
2997 /* zero the digits */
2998 for (x = 0; x < size; x++) {
2999 a->dp[x] = 0;
3000 }
3001
3002 return MP_OKAY;
3003}
3004
3005
3006/* the jist of squaring...
3007 * you do like mult except the offset of the tmpx [one that
3008 * starts closer to zero] can't equal the offset of tmpy.
3009 * So basically you set up iy like before then you min it with
3010 * (ty-tx) so that it never happens. You double all those
3011 * you add in the inner loop
3012
3013After that loop you do the squares and add them in.
3014*/
3015
3016int fast_s_mp_sqr (mp_int * a, mp_int * b)
3017{
3018 int olduse, res, pa, ix, iz;
3019#ifdef WOLFSSL_SMALL_STACK
3020 mp_digit* W; /* uses dynamic memory and slower */
3021#else
3022 mp_digit W[MP_WARRAY];
3023#endif
3024 mp_digit *tmpx;
3025 mp_word W1;
3026
3027 /* grow the destination as required */
3028 pa = a->used + a->used;
3029 if (b->alloc < pa) {
3030 if ((res = mp_grow (b, pa)) != MP_OKAY) {
3031 return res;
3032 }
3033 }
3034
3035 if (pa > MP_WARRAY)
3036 return MP_RANGE; /* TAO range check */
3037
3038#ifdef WOLFSSL_SMALL_STACK
3039 W = (mp_digit*)XMALLOC(sizeof(mp_digit) * MP_WARRAY, NULL, DYNAMIC_TYPE_BIGINT);
3040 if (W == NULL)
3041 return MP_MEM;
3042#endif
3043
3044 /* number of output digits to produce */
3045 W1 = 0;
3046 for (ix = 0; ix < pa; ix++) {
3047 int tx, ty, iy;
3048 mp_word _W;
3049 mp_digit *tmpy;
3050
3051 /* clear counter */
3052 _W = 0;
3053
3054 /* get offsets into the two bignums */
3055 ty = MIN(a->used-1, ix);
3056 tx = ix - ty;
3057
3058 /* setup temp aliases */
3059 tmpx = a->dp + tx;
3060 tmpy = a->dp + ty;
3061
3062 /* this is the number of times the loop will iterate, essentially
3063 while (tx++ < a->used && ty-- >= 0) { ... }
3064 */
3065 iy = MIN(a->used-tx, ty+1);
3066
3067 /* now for squaring tx can never equal ty
3068 * we halve the distance since they approach at a rate of 2x
3069 * and we have to round because odd cases need to be executed
3070 */
3071 iy = MIN(iy, (ty-tx+1)>>1);
3072
3073 /* execute loop */
3074 for (iz = 0; iz < iy; iz++) {
3075 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
3076 }
3077
3078 /* double the inner product and add carry */
3079 _W = _W + _W + W1;
3080
3081 /* even columns have the square term in them */
3082 if ((ix&1) == 0) {
3083 _W += ((mp_word)a->dp[ix>>1])*((mp_word)a->dp[ix>>1]);
3084 }
3085
3086 /* store it */
3087 W[ix] = (mp_digit)(_W & MP_MASK);
3088
3089 /* make next carry */
3090 W1 = _W >> ((mp_word)DIGIT_BIT);
3091 }
3092
3093 /* setup dest */
3094 olduse = b->used;
3095 b->used = a->used+a->used;
3096
3097 {
3098 mp_digit *tmpb;
3099 tmpb = b->dp;
3100 for (ix = 0; ix < pa; ix++) {
3101 *tmpb++ = (mp_digit)(W[ix] & MP_MASK);
3102 }
3103
3104 /* clear unused digits [that existed in the old copy of c] */
3105 for (; ix < olduse; ix++) {
3106 *tmpb++ = 0;
3107 }
3108 }
3109 mp_clamp (b);
3110
3111#ifdef WOLFSSL_SMALL_STACK
3112 XFREE(W, NULL, DYNAMIC_TYPE_BIGINT);
3113#endif
3114
3115 return MP_OKAY;
3116}
3117
3118
3119/* Fast (comba) multiplier
3120 *
3121 * This is the fast column-array [comba] multiplier. It is
3122 * designed to compute the columns of the product first
3123 * then handle the carries afterwards. This has the effect
3124 * of making the nested loops that compute the columns very
3125 * simple and schedulable on super-scalar processors.
3126 *
3127 * This has been modified to produce a variable number of
3128 * digits of output so if say only a half-product is required
3129 * you don't have to compute the upper half (a feature
3130 * required for fast Barrett reduction).
3131 *
3132 * Based on Algorithm 14.12 on pp.595 of HAC.
3133 *
3134 */
3135int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
3136{
3137 int olduse, res, pa, ix, iz;
3138#ifdef WOLFSSL_SMALL_STACK
3139 mp_digit* W; /* uses dynamic memory and slower */
3140#else
3141 mp_digit W[MP_WARRAY];
3142#endif
3143 mp_word _W;
3144
3145 /* grow the destination as required */
3146 if (c->alloc < digs) {
3147 if ((res = mp_grow (c, digs)) != MP_OKAY) {
3148 return res;
3149 }
3150 }
3151
3152 /* number of output digits to produce */
3153 pa = MIN(digs, a->used + b->used);
3154 if (pa > MP_WARRAY)
3155 return MP_RANGE; /* TAO range check */
3156
3157#ifdef WOLFSSL_SMALL_STACK
3158 W = (mp_digit*)XMALLOC(sizeof(mp_digit) * MP_WARRAY, NULL, DYNAMIC_TYPE_BIGINT);
3159 if (W == NULL)
3160 return MP_MEM;
3161#endif
3162
3163 /* clear the carry */
3164 _W = 0;
3165 for (ix = 0; ix < pa; ix++) {
3166 int tx, ty;
3167 int iy;
3168 mp_digit *tmpx, *tmpy;
3169
3170 /* get offsets into the two bignums */
3171 ty = MIN(b->used-1, ix);
3172 tx = ix - ty;
3173
3174 /* setup temp aliases */
3175 tmpx = a->dp + tx;
3176 tmpy = b->dp + ty;
3177
3178 /* this is the number of times the loop will iterate, essentially
3179 while (tx++ < a->used && ty-- >= 0) { ... }
3180 */
3181 iy = MIN(a->used-tx, ty+1);
3182
3183 /* execute loop */
3184 for (iz = 0; iz < iy; ++iz) {
3185 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
3186
3187 }
3188
3189 /* store term */
3190 W[ix] = (mp_digit)(((mp_digit)_W) & MP_MASK);
3191
3192 /* make next carry */
3193 _W = _W >> ((mp_word)DIGIT_BIT);
3194 }
3195
3196 /* setup dest */
3197 olduse = c->used;
3198 c->used = pa;
3199
3200 {
3201 mp_digit *tmpc;
3202 tmpc = c->dp;
3203 for (ix = 0; ix < pa; ix++) { /* JRB, +1 could read uninitialized data */
3204 /* now extract the previous digit [below the carry] */
3205 *tmpc++ = W[ix];
3206 }
3207
3208 /* clear unused digits [that existed in the old copy of c] */
3209 for (; ix < olduse; ix++) {
3210 *tmpc++ = 0;
3211 }
3212 }
3213 mp_clamp (c);
3214
3215#ifdef WOLFSSL_SMALL_STACK
3216 XFREE(W, NULL, DYNAMIC_TYPE_BIGINT);
3217#endif
3218
3219 return MP_OKAY;
3220}
3221
3222
3223/* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */
3224int s_mp_sqr (mp_int * a, mp_int * b)
3225{
3226 mp_int t;
3227 int res, ix, iy, pa;
3228 mp_word r;
3229 mp_digit u, tmpx, *tmpt;
3230
3231 pa = a->used;
3232 if ((res = mp_init_size (&t, 2*pa + 1)) != MP_OKAY) {
3233 return res;
3234 }
3235
3236 /* default used is maximum possible size */
3237 t.used = 2*pa + 1;
3238
3239 for (ix = 0; ix < pa; ix++) {
3240 /* first calculate the digit at 2*ix */
3241 /* calculate double precision result */
3242 r = ((mp_word) t.dp[2*ix]) +
3243 ((mp_word)a->dp[ix])*((mp_word)a->dp[ix]);
3244
3245 /* store lower part in result */
3246 t.dp[ix+ix] = (mp_digit) (r & ((mp_word) MP_MASK));
3247
3248 /* get the carry */
3249 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
3250
3251 /* left hand side of A[ix] * A[iy] */
3252 tmpx = a->dp[ix];
3253
3254 /* alias for where to store the results */
3255 tmpt = t.dp + (2*ix + 1);
3256
3257 for (iy = ix + 1; iy < pa; iy++) {
3258 /* first calculate the product */
3259 r = ((mp_word)tmpx) * ((mp_word)a->dp[iy]);
3260
3261 /* now calculate the double precision result, note we use
3262 * addition instead of *2 since it's easier to optimize
3263 */
3264 r = ((mp_word) *tmpt) + r + r + ((mp_word) u);
3265
3266 /* store lower part */
3267 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
3268
3269 /* get carry */
3270 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
3271 }
3272 /* propagate upwards */
3273 while (u != ((mp_digit) 0)) {
3274 r = ((mp_word) *tmpt) + ((mp_word) u);
3275 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
3276 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
3277 }
3278 }
3279
3280 mp_clamp (&t);
3281 mp_exch (&t, b);
3282 mp_clear (&t);
3283 return MP_OKAY;
3284}
3285
3286
3287/* multiplies |a| * |b| and only computes up to digs digits of result
3288 * HAC pp. 595, Algorithm 14.12 Modified so you can control how
3289 * many digits of output are created.
3290 */
3291int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
3292{
3293 mp_int t;
3294 int res, pa, pb, ix, iy;
3295 mp_digit u;
3296 mp_word r;
3297 mp_digit tmpx, *tmpt, *tmpy;
3298
3299 /* can we use the fast multiplier? */
3300 if (((digs) < MP_WARRAY) &&
3301 MIN (a->used, b->used) <
3302 (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
3303 return fast_s_mp_mul_digs (a, b, c, digs);
3304 }
3305
3306 if ((res = mp_init_size (&t, digs)) != MP_OKAY) {
3307 return res;
3308 }
3309 t.used = digs;
3310
3311 /* compute the digits of the product directly */
3312 pa = a->used;
3313 for (ix = 0; ix < pa; ix++) {
3314 /* set the carry to zero */
3315 u = 0;
3316
3317 /* limit ourselves to making digs digits of output */
3318 pb = MIN (b->used, digs - ix);
3319
3320 /* setup some aliases */
3321 /* copy of the digit from a used within the nested loop */
3322 tmpx = a->dp[ix];
3323
3324 /* an alias for the destination shifted ix places */
3325 tmpt = t.dp + ix;
3326
3327 /* an alias for the digits of b */
3328 tmpy = b->dp;
3329
3330 /* compute the columns of the output and propagate the carry */
3331 for (iy = 0; iy < pb; iy++) {
3332 /* compute the column as a mp_word */
3333 r = ((mp_word)*tmpt) +
3334 ((mp_word)tmpx) * ((mp_word)*tmpy++) +
3335 ((mp_word) u);
3336
3337 /* the new column is the lower part of the result */
3338 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
3339
3340 /* get the carry word from the result */
3341 u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
3342 }
3343 /* set carry if it is placed below digs */
3344 if (ix + iy < digs) {
3345 *tmpt = u;
3346 }
3347 }
3348
3349 mp_clamp (&t);
3350 mp_exch (&t, c);
3351
3352 mp_clear (&t);
3353 return MP_OKAY;
3354}
3355
3356
3357/*
3358 * shifts with subtractions when the result is greater than b.
3359 *
3360 * The method is slightly modified to shift B unconditionally up to just under
3361 * the leading bit of b. This saves a lot of multiple precision shifting.
3362 */
3363int mp_montgomery_calc_normalization (mp_int * a, mp_int * b)
3364{
3365 int x, bits, res;
3366
3367 /* how many bits of last digit does b use */
3368 bits = mp_count_bits (b) % DIGIT_BIT;
3369
3370 if (b->used > 1) {
3371 if ((res = mp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1))
3372 != MP_OKAY) {
3373 return res;
3374 }
3375 } else {
3376 if ((res = mp_set(a, 1)) != MP_OKAY) {
3377 return res;
3378 }
3379 bits = 1;
3380 }
3381
3382 /* now compute C = A * B mod b */
3383 for (x = bits - 1; x < (int)DIGIT_BIT; x++) {
3384 if ((res = mp_mul_2 (a, a)) != MP_OKAY) {
3385 return res;
3386 }
3387 if (mp_cmp_mag (a, b) != MP_LT) {
3388 if ((res = s_mp_sub (a, b, a)) != MP_OKAY) {
3389 return res;
3390 }
3391 }
3392 }
3393
3394 return MP_OKAY;
3395}
3396
3397
3398#ifdef MP_LOW_MEM
3399 #define TAB_SIZE 32
3400#else
3401 #define TAB_SIZE 256
3402#endif
3403
3404int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
3405{
3406 mp_int M[TAB_SIZE], res, mu;
3407 mp_digit buf;
3408 int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
3409 int (*redux)(mp_int*,mp_int*,mp_int*);
3410
3411 /* find window size */
3412 x = mp_count_bits (X);
3413 if (x <= 7) {
3414 winsize = 2;
3415 } else if (x <= 36) {
3416 winsize = 3;
3417 } else if (x <= 140) {
3418 winsize = 4;
3419 } else if (x <= 450) {
3420 winsize = 5;
3421 } else if (x <= 1303) {
3422 winsize = 6;
3423 } else if (x <= 3529) {
3424 winsize = 7;
3425 } else {
3426 winsize = 8;
3427 }
3428
3429#ifdef MP_LOW_MEM
3430 if (winsize > 5) {
3431 winsize = 5;
3432 }
3433#endif
3434
3435 /* init M array */
3436 /* init first cell */
3437 if ((err = mp_init(&M[1])) != MP_OKAY) {
3438 return err;
3439 }
3440
3441 /* now init the second half of the array */
3442 for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
3443 if ((err = mp_init(&M[x])) != MP_OKAY) {
3444 for (y = 1<<(winsize-1); y < x; y++) {
3445 mp_clear (&M[y]);
3446 }
3447 mp_clear(&M[1]);
3448 return err;
3449 }
3450 }
3451
3452 /* create mu, used for Barrett reduction */
3453 if ((err = mp_init (&mu)) != MP_OKAY) {
3454 goto LBL_M;
3455 }
3456
3457 if (redmode == 0) {
3458 if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) {
3459 goto LBL_MU;
3460 }
3461 redux = mp_reduce;
3462 } else {
3463 if ((err = mp_reduce_2k_setup_l (P, &mu)) != MP_OKAY) {
3464 goto LBL_MU;
3465 }
3466 redux = mp_reduce_2k_l;
3467 }
3468
3469 /* create M table
3470 *
3471 * The M table contains powers of the base,
3472 * e.g. M[x] = G**x mod P
3473 *
3474 * The first half of the table is not
3475 * computed though accept for M[0] and M[1]
3476 */
3477 if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) {
3478 goto LBL_MU;
3479 }
3480
3481 /* compute the value at M[1<<(winsize-1)] by squaring
3482 * M[1] (winsize-1) times
3483 */
3484 if ((err = mp_copy (&M[1], &M[(mp_digit)(1 << (winsize - 1))])) != MP_OKAY) {
3485 goto LBL_MU;
3486 }
3487
3488 for (x = 0; x < (winsize - 1); x++) {
3489 /* square it */
3490 if ((err = mp_sqr (&M[(mp_digit)(1 << (winsize - 1))],
3491 &M[(mp_digit)(1 << (winsize - 1))])) != MP_OKAY) {
3492 goto LBL_MU;
3493 }
3494
3495 /* reduce modulo P */
3496 if ((err = redux (&M[(mp_digit)(1 << (winsize - 1))], P, &mu)) != MP_OKAY) {
3497 goto LBL_MU;
3498 }
3499 }
3500
3501 /* create upper table, that is M[x] = M[x-1] * M[1] (mod P)
3502 * for x = (2**(winsize - 1) + 1) to (2**winsize - 1)
3503 */
3504 for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
3505 if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
3506 goto LBL_MU;
3507 }
3508 if ((err = redux (&M[x], P, &mu)) != MP_OKAY) {
3509 goto LBL_MU;
3510 }
3511 }
3512
3513 /* setup result */
3514 if ((err = mp_init (&res)) != MP_OKAY) {
3515 goto LBL_MU;
3516 }
3517 if ((err = mp_set (&res, 1)) != MP_OKAY) {
3518 goto LBL_MU;
3519 }
3520
3521 /* set initial mode and bit cnt */
3522 mode = 0;
3523 bitcnt = 1;
3524 buf = 0;
3525 digidx = X->used - 1;
3526 bitcpy = 0;
3527 bitbuf = 0;
3528
3529 for (;;) {
3530 /* grab next digit as required */
3531 if (--bitcnt == 0) {
3532 /* if digidx == -1 we are out of digits */
3533 if (digidx == -1) {
3534 break;
3535 }
3536 /* read next digit and reset the bitcnt */
3537 buf = X->dp[digidx--];
3538 bitcnt = (int) DIGIT_BIT;
3539 }
3540
3541 /* grab the next msb from the exponent */
3542 y = (int)(buf >> (mp_digit)(DIGIT_BIT - 1)) & 1;
3543 buf <<= (mp_digit)1;
3544
3545 /* if the bit is zero and mode == 0 then we ignore it
3546 * These represent the leading zero bits before the first 1 bit
3547 * in the exponent. Technically this opt is not required but it
3548 * does lower the # of trivial squaring/reductions used
3549 */
3550 if (mode == 0 && y == 0) {
3551 continue;
3552 }
3553
3554 /* if the bit is zero and mode == 1 then we square */
3555 if (mode == 1 && y == 0) {
3556 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
3557 goto LBL_RES;
3558 }
3559 if ((err = redux (&res, P, &mu)) != MP_OKAY) {
3560 goto LBL_RES;
3561 }
3562 continue;
3563 }
3564
3565 /* else we add it to the window */
3566 bitbuf |= (y << (winsize - ++bitcpy));
3567 mode = 2;
3568
3569 if (bitcpy == winsize) {
3570 /* ok window is filled so square as required and multiply */
3571 /* square first */
3572 for (x = 0; x < winsize; x++) {
3573 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
3574 goto LBL_RES;
3575 }
3576 if ((err = redux (&res, P, &mu)) != MP_OKAY) {
3577 goto LBL_RES;
3578 }
3579 }
3580
3581 /* then multiply */
3582 if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
3583 goto LBL_RES;
3584 }
3585 if ((err = redux (&res, P, &mu)) != MP_OKAY) {
3586 goto LBL_RES;
3587 }
3588
3589 /* empty window and reset */
3590 bitcpy = 0;
3591 bitbuf = 0;
3592 mode = 1;
3593 }
3594 }
3595
3596 /* if bits remain then square/multiply */
3597 if (mode == 2 && bitcpy > 0) {
3598 /* square then multiply if the bit is set */
3599 for (x = 0; x < bitcpy; x++) {
3600 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
3601 goto LBL_RES;
3602 }
3603 if ((err = redux (&res, P, &mu)) != MP_OKAY) {
3604 goto LBL_RES;
3605 }
3606
3607 bitbuf <<= 1;
3608 if ((bitbuf & (1 << winsize)) != 0) {
3609 /* then multiply */
3610 if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
3611 goto LBL_RES;
3612 }
3613 if ((err = redux (&res, P, &mu)) != MP_OKAY) {
3614 goto LBL_RES;
3615 }
3616 }
3617 }
3618 }
3619
3620 mp_exch (&res, Y);
3621 err = MP_OKAY;
3622LBL_RES:mp_clear (&res);
3623LBL_MU:mp_clear (&mu);
3624LBL_M:
3625 mp_clear(&M[1]);
3626 for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
3627 mp_clear (&M[x]);
3628 }
3629 return err;
3630}
3631
3632
3633/* pre-calculate the value required for Barrett reduction
3634 * For a given modulus "b" it calculates the value required in "a"
3635 */
3636int mp_reduce_setup (mp_int * a, mp_int * b)
3637{
3638 int res;
3639
3640 if ((res = mp_2expt (a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) {
3641 return res;
3642 }
3643 return mp_div (a, b, a, NULL);
3644}
3645
3646
3647/* reduces x mod m, assumes 0 < x < m**2, mu is
3648 * precomputed via mp_reduce_setup.
3649 * From HAC pp.604 Algorithm 14.42
3650 */
3651int mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
3652{
3653 mp_int q;
3654 int res, um = m->used;
3655
3656 /* q = x */
3657 if ((res = mp_init_copy (&q, x)) != MP_OKAY) {
3658 return res;
3659 }
3660
3661 /* q1 = x / b**(k-1) */
3662 mp_rshd (&q, um - 1);
3663
3664 /* according to HAC this optimization is ok */
3665 if (((mp_word) um) > (((mp_digit)1) << (DIGIT_BIT - 1))) {
3666 if ((res = mp_mul (&q, mu, &q)) != MP_OKAY) {
3667 goto CLEANUP;
3668 }
3669 } else {
3670#ifdef BN_S_MP_MUL_HIGH_DIGS_C
3671 if ((res = s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
3672 goto CLEANUP;
3673 }
3674#elif defined(BN_FAST_S_MP_MUL_HIGH_DIGS_C)
3675 if ((res = fast_s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
3676 goto CLEANUP;
3677 }
3678#else
3679 {
3680 res = MP_VAL;
3681 goto CLEANUP;
3682 }
3683#endif
3684 }
3685
3686 /* q3 = q2 / b**(k+1) */
3687 mp_rshd (&q, um + 1);
3688
3689 /* x = x mod b**(k+1), quick (no division) */
3690 if ((res = mp_mod_2d (x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) {
3691 goto CLEANUP;
3692 }
3693
3694 /* q = q * m mod b**(k+1), quick (no division) */
3695 if ((res = s_mp_mul_digs (&q, m, &q, um + 1)) != MP_OKAY) {
3696 goto CLEANUP;
3697 }
3698
3699 /* x = x - q */
3700 if ((res = mp_sub (x, &q, x)) != MP_OKAY) {
3701 goto CLEANUP;
3702 }
3703
3704 /* If x < 0, add b**(k+1) to it */
3705 if (mp_cmp_d (x, 0) == MP_LT) {
3706 if ((res = mp_set (&q, 1)) != MP_OKAY)
3707 goto CLEANUP;
3708 if ((res = mp_lshd (&q, um + 1)) != MP_OKAY)
3709 goto CLEANUP;
3710 if ((res = mp_add (x, &q, x)) != MP_OKAY)
3711 goto CLEANUP;
3712 }
3713
3714 /* Back off if it's too big */
3715 while (mp_cmp (x, m) != MP_LT) {
3716 if ((res = s_mp_sub (x, m, x)) != MP_OKAY) {
3717 goto CLEANUP;
3718 }
3719 }
3720
3721CLEANUP:
3722 mp_clear (&q);
3723
3724 return res;
3725}
3726
3727
3728/* reduces a modulo n where n is of the form 2**p - d
3729 This differs from reduce_2k since "d" can be larger
3730 than a single digit.
3731*/
3732int mp_reduce_2k_l(mp_int *a, mp_int *n, mp_int *d)
3733{
3734 mp_int q;
3735 int p, res;
3736
3737 if ((res = mp_init(&q)) != MP_OKAY) {
3738 return res;
3739 }
3740
3741 p = mp_count_bits(n);
3742top:
3743 /* q = a/2**p, a = a mod 2**p */
3744 if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
3745 goto ERR;
3746 }
3747
3748 /* q = q * d */
3749 if ((res = mp_mul(&q, d, &q)) != MP_OKAY) {
3750 goto ERR;
3751 }
3752
3753 /* a = a + q */
3754 if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
3755 goto ERR;
3756 }
3757
3758 if (mp_cmp_mag(a, n) != MP_LT) {
3759 if ((res = s_mp_sub(a, n, a)) != MP_OKAY) {
3760 goto ERR;
3761 }
3762 goto top;
3763 }
3764
3765ERR:
3766 mp_clear(&q);
3767 return res;
3768}
3769
3770
3771/* determines the setup value */
3772int mp_reduce_2k_setup_l(mp_int *a, mp_int *d)
3773{
3774 int res;
3775 mp_int tmp;
3776
3777 if ((res = mp_init(&tmp)) != MP_OKAY) {
3778 return res;
3779 }
3780
3781 if ((res = mp_2expt(&tmp, mp_count_bits(a))) != MP_OKAY) {
3782 goto ERR;
3783 }
3784
3785 if ((res = s_mp_sub(&tmp, a, d)) != MP_OKAY) {
3786 goto ERR;
3787 }
3788
3789ERR:
3790 mp_clear(&tmp);
3791 return res;
3792}
3793
3794
3795/* multiplies |a| * |b| and does not compute the lower digs digits
3796 * [meant to get the higher part of the product]
3797 */
3798int s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
3799{
3800 mp_int t;
3801 int res, pa, pb, ix, iy;
3802 mp_digit u;
3803 mp_word r;
3804 mp_digit tmpx, *tmpt, *tmpy;
3805
3806 /* can we use the fast multiplier? */
3807#ifdef BN_FAST_S_MP_MUL_HIGH_DIGS_C
3808 if (((a->used + b->used + 1) < MP_WARRAY)
3809 && MIN (a->used, b->used) <
3810 (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
3811 return fast_s_mp_mul_high_digs (a, b, c, digs);
3812 }
3813#endif
3814
3815 if ((res = mp_init_size (&t, a->used + b->used + 1)) != MP_OKAY) {
3816 return res;
3817 }
3818 t.used = a->used + b->used + 1;
3819
3820 pa = a->used;
3821 pb = b->used;
3822 for (ix = 0; ix < pa && a->dp; ix++) {
3823 /* clear the carry */
3824 u = 0;
3825
3826 /* left hand side of A[ix] * B[iy] */
3827 tmpx = a->dp[ix];
3828
3829 /* alias to the address of where the digits will be stored */
3830 tmpt = &(t.dp[digs]);
3831
3832 /* alias for where to read the right hand side from */
3833 tmpy = b->dp + (digs - ix);
3834
3835 for (iy = digs - ix; iy < pb; iy++) {
3836 /* calculate the double precision result */
3837 r = ((mp_word)*tmpt) +
3838 ((mp_word)tmpx) * ((mp_word)*tmpy++) +
3839 ((mp_word) u);
3840
3841 /* get the lower part */
3842 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
3843
3844 /* carry the carry */
3845 u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
3846 }
3847 *tmpt = u;
3848 }
3849 mp_clamp (&t);
3850 mp_exch (&t, c);
3851 mp_clear (&t);
3852 return MP_OKAY;
3853}
3854
3855
3856/* this is a modified version of fast_s_mul_digs that only produces
3857 * output digits *above* digs. See the comments for fast_s_mul_digs
3858 * to see how it works.
3859 *
3860 * This is used in the Barrett reduction since for one of the multiplications
3861 * only the higher digits were needed. This essentially halves the work.
3862 *
3863 * Based on Algorithm 14.12 on pp.595 of HAC.
3864 */
3865int fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
3866{
3867 int olduse, res, pa, ix, iz;
3868#ifdef WOLFSSL_SMALL_STACK
3869 mp_digit* W; /* uses dynamic memory and slower */
3870#else
3871 mp_digit W[MP_WARRAY];
3872#endif
3873 mp_word _W;
3874
3875 if (a->dp == NULL) { /* JRB, avoid reading uninitialized values */
3876 return MP_VAL;
3877 }
3878
3879 /* grow the destination as required */
3880 pa = a->used + b->used;
3881 if (c->alloc < pa) {
3882 if ((res = mp_grow (c, pa)) != MP_OKAY) {
3883 return res;
3884 }
3885 }
3886
3887 if (pa > MP_WARRAY)
3888 return MP_RANGE; /* TAO range check */
3889
3890#ifdef WOLFSSL_SMALL_STACK
3891 W = (mp_digit*)XMALLOC(sizeof(mp_digit) * MP_WARRAY, NULL, DYNAMIC_TYPE_BIGINT);
3892 if (W == NULL)
3893 return MP_MEM;
3894#endif
3895
3896 /* number of output digits to produce */
3897 pa = a->used + b->used;
3898 _W = 0;
3899 for (ix = digs; ix < pa; ix++) { /* JRB, have a->dp check at top of function*/
3900 int tx, ty, iy;
3901 mp_digit *tmpx, *tmpy;
3902
3903 /* get offsets into the two bignums */
3904 ty = MIN(b->used-1, ix);
3905 tx = ix - ty;
3906
3907 /* setup temp aliases */
3908 tmpx = a->dp + tx;
3909 tmpy = b->dp + ty;
3910
3911 /* this is the number of times the loop will iterate, essentially its
3912 while (tx++ < a->used && ty-- >= 0) { ... }
3913 */
3914 iy = MIN(a->used-tx, ty+1);
3915
3916 /* execute loop */
3917 for (iz = 0; iz < iy; iz++) {
3918 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
3919 }
3920
3921 /* store term */
3922 W[ix] = (mp_digit)(((mp_digit)_W) & MP_MASK);
3923
3924 /* make next carry */
3925 _W = _W >> ((mp_word)DIGIT_BIT);
3926 }
3927
3928 /* setup dest */
3929 olduse = c->used;
3930 c->used = pa;
3931
3932 {
3933 mp_digit *tmpc;
3934
3935 tmpc = c->dp + digs;
3936 for (ix = digs; ix < pa; ix++) { /* TAO, <= could potentially overwrite */
3937 /* now extract the previous digit [below the carry] */
3938 *tmpc++ = W[ix];
3939 }
3940
3941 /* clear unused digits [that existed in the old copy of c] */
3942 for (; ix < olduse; ix++) {
3943 *tmpc++ = 0;
3944 }
3945 }
3946 mp_clamp (c);
3947
3948#ifdef WOLFSSL_SMALL_STACK
3949 XFREE(W, NULL, DYNAMIC_TYPE_BIGINT);
3950#endif
3951
3952 return MP_OKAY;
3953}
3954
3955
3956#ifndef MP_SET_CHUNK_BITS
3957 #define MP_SET_CHUNK_BITS 4
3958#endif
3959int mp_set_int (mp_int * a, unsigned long b)
3960{
3961 int x, res;
3962
3963 /* use direct mp_set if b is less than mp_digit max */
3964 if (b < MP_DIGIT_MAX) {
3965 return mp_set (a, (mp_digit)b);
3966 }
3967
3968 mp_zero (a);
3969
3970 /* set chunk bits at a time */
3971 for (x = 0; x < (int)(sizeof(b) * 8) / MP_SET_CHUNK_BITS; x++) {
3972 /* shift the number up chunk bits */
3973 if ((res = mp_mul_2d (a, MP_SET_CHUNK_BITS, a)) != MP_OKAY) {
3974 return res;
3975 }
3976
3977 /* OR in the top bits of the source */
3978 a->dp[0] |= (b >> ((sizeof(b) * 8) - MP_SET_CHUNK_BITS)) &
3979 ((1 << MP_SET_CHUNK_BITS) - 1);
3980
3981 /* shift the source up to the next chunk bits */
3982 b <<= MP_SET_CHUNK_BITS;
3983
3984 /* ensure that digits are not clamped off */
3985 a->used += 1;
3986 }
3987 mp_clamp (a);
3988 return MP_OKAY;
3989}
3990
3991
3992#if defined(WOLFSSL_KEY_GEN) || defined(HAVE_ECC) || !defined(NO_RSA) || \
3993 !defined(NO_DSA) | !defined(NO_DH)
3994
3995/* c = a * a (mod b) */
3996int mp_sqrmod (mp_int * a, mp_int * b, mp_int * c)
3997{
3998 int res;
3999 mp_int t;
4000
4001 if ((res = mp_init (&t)) != MP_OKAY) {
4002 return res;
4003 }
4004
4005 if ((res = mp_sqr (a, &t)) != MP_OKAY) {
4006 mp_clear (&t);
4007 return res;
4008 }
4009 res = mp_mod (&t, b, c);
4010 mp_clear (&t);
4011 return res;
4012}
4013
4014#endif
4015
4016
4017#if defined(HAVE_ECC) || !defined(NO_PWDBASED) || defined(WOLFSSL_SNIFFER) || \
4018 defined(WOLFSSL_HAVE_WOLFSCEP) || defined(WOLFSSL_KEY_GEN) || \
4019 defined(OPENSSL_EXTRA) || defined(WC_RSA_BLINDING) || \
4020 (!defined(NO_RSA) && !defined(NO_RSA_BOUNDS_CHECK))
4021
4022/* single digit addition */
4023int mp_add_d (mp_int* a, mp_digit b, mp_int* c)
4024{
4025 int res, ix, oldused;
4026 mp_digit *tmpa, *tmpc, mu;
4027
4028 /* grow c as required */
4029 if (c->alloc < a->used + 1) {
4030 if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
4031 return res;
4032 }
4033 }
4034
4035 /* if a is negative and |a| >= b, call c = |a| - b */
4036 if (a->sign == MP_NEG && (a->used > 1 || a->dp[0] >= b)) {
4037 /* temporarily fix sign of a */
4038 a->sign = MP_ZPOS;
4039
4040 /* c = |a| - b */
4041 res = mp_sub_d(a, b, c);
4042
4043 /* fix sign */
4044 a->sign = c->sign = MP_NEG;
4045
4046 /* clamp */
4047 mp_clamp(c);
4048
4049 return res;
4050 }
4051
4052 /* old number of used digits in c */
4053 oldused = c->used;
4054
4055 /* sign always positive */
4056 c->sign = MP_ZPOS;
4057
4058 /* source alias */
4059 tmpa = a->dp;
4060
4061 /* destination alias */
4062 tmpc = c->dp;
4063
4064 /* if a is positive */
4065 if (a->sign == MP_ZPOS) {
4066 /* add digit, after this we're propagating
4067 * the carry.
4068 */
4069 *tmpc = *tmpa++ + b;
4070 mu = *tmpc >> DIGIT_BIT;
4071 *tmpc++ &= MP_MASK;
4072
4073 /* now handle rest of the digits */
4074 for (ix = 1; ix < a->used; ix++) {
4075 *tmpc = *tmpa++ + mu;
4076 mu = *tmpc >> DIGIT_BIT;
4077 *tmpc++ &= MP_MASK;
4078 }
4079 /* set final carry */
4080 if (ix < c->alloc) {
4081 ix++;
4082 *tmpc++ = mu;
4083 }
4084
4085 /* setup size */
4086 c->used = a->used + 1;
4087 } else {
4088 /* a was negative and |a| < b */
4089 c->used = 1;
4090
4091 /* the result is a single digit */
4092 if (a->used == 1) {
4093 *tmpc++ = b - a->dp[0];
4094 } else {
4095 *tmpc++ = b;
4096 }
4097
4098 /* setup count so the clearing of oldused
4099 * can fall through correctly
4100 */
4101 ix = 1;
4102 }
4103
4104 /* now zero to oldused */
4105 while (ix++ < oldused) {
4106 *tmpc++ = 0;
4107 }
4108 mp_clamp(c);
4109
4110 return MP_OKAY;
4111}
4112
4113
4114/* single digit subtraction */
4115int mp_sub_d (mp_int * a, mp_digit b, mp_int * c)
4116{
4117 mp_digit *tmpa, *tmpc, mu;
4118 int res, ix, oldused;
4119
4120 /* grow c as required */
4121 if (c->alloc < a->used + 1) {
4122 if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
4123 return res;
4124 }
4125 }
4126
4127 /* if a is negative just do an unsigned
4128 * addition [with fudged signs]
4129 */
4130 if (a->sign == MP_NEG) {
4131 a->sign = MP_ZPOS;
4132 res = mp_add_d(a, b, c);
4133 a->sign = c->sign = MP_NEG;
4134
4135 /* clamp */
4136 mp_clamp(c);
4137
4138 return res;
4139 }
4140
4141 /* setup regs */
4142 oldused = c->used;
4143 tmpa = a->dp;
4144 tmpc = c->dp;
4145
4146 /* if a <= b simply fix the single digit */
4147 if ((a->used == 1 && a->dp[0] <= b) || a->used == 0) {
4148 if (a->used == 1) {
4149 *tmpc++ = b - *tmpa;
4150 } else {
4151 *tmpc++ = b;
4152 }
4153 ix = 1;
4154
4155 /* negative/1digit */
4156 c->sign = MP_NEG;
4157 c->used = 1;
4158 } else {
4159 /* positive/size */
4160 c->sign = MP_ZPOS;
4161 c->used = a->used;
4162
4163 /* subtract first digit */
4164 *tmpc = *tmpa++ - b;
4165 mu = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1);
4166 *tmpc++ &= MP_MASK;
4167
4168 /* handle rest of the digits */
4169 for (ix = 1; ix < a->used; ix++) {
4170 *tmpc = *tmpa++ - mu;
4171 mu = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1);
4172 *tmpc++ &= MP_MASK;
4173 }
4174 }
4175
4176 /* zero excess digits */
4177 while (ix++ < oldused) {
4178 *tmpc++ = 0;
4179 }
4180 mp_clamp(c);
4181 return MP_OKAY;
4182}
4183
4184#endif /* defined(HAVE_ECC) || !defined(NO_PWDBASED) */
4185
4186
4187#if defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || defined(HAVE_ECC) || \
4188 defined(DEBUG_WOLFSSL) || !defined(NO_RSA) || !defined(NO_DSA) || \
4189 !defined(NO_DH)
4190
4191static const int lnz[16] = {
4192 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
4193};
4194
4195/* Counts the number of lsbs which are zero before the first zero bit */
4196int mp_cnt_lsb(mp_int *a)
4197{
4198 int x;
4199 mp_digit q = 0, qq;
4200
4201 /* easy out */
4202 if (mp_iszero(a) == MP_YES) {
4203 return 0;
4204 }
4205
4206 /* scan lower digits until non-zero */
4207 for (x = 0; x < a->used && a->dp[x] == 0; x++) {}
4208 if (a->dp)
4209 q = a->dp[x];
4210 x *= DIGIT_BIT;
4211
4212 /* now scan this digit until a 1 is found */
4213 if ((q & 1) == 0) {
4214 do {
4215 qq = q & 15;
4216 x += lnz[qq];
4217 q >>= 4;
4218 } while (qq == 0);
4219 }
4220 return x;
4221}
4222
4223
4224
4225
4226static int s_is_power_of_two(mp_digit b, int *p)
4227{
4228 int x;
4229
4230 /* fast return if no power of two */
4231 if ((b==0) || (b & (b-1))) {
4232 return 0;
4233 }
4234
4235 for (x = 0; x < DIGIT_BIT; x++) {
4236 if (b == (((mp_digit)1)<<x)) {
4237 *p = x;
4238 return 1;
4239 }
4240 }
4241 return 0;
4242}
4243
4244/* single digit division (based on routine from MPI) */
4245static int mp_div_d (mp_int * a, mp_digit b, mp_int * c, mp_digit * d)
4246{
4247 mp_int q;
4248 mp_word w;
4249 mp_digit t;
4250 int res = MP_OKAY, ix;
4251
4252 /* cannot divide by zero */
4253 if (b == 0) {
4254 return MP_VAL;
4255 }
4256
4257 /* quick outs */
4258 if (b == 1 || mp_iszero(a) == MP_YES) {
4259 if (d != NULL) {
4260 *d = 0;
4261 }
4262 if (c != NULL) {
4263 return mp_copy(a, c);
4264 }
4265 return MP_OKAY;
4266 }
4267
4268 /* power of two ? */
4269 if (s_is_power_of_two(b, &ix) == 1) {
4270 if (d != NULL) {
4271 *d = a->dp[0] & ((((mp_digit)1)<<ix) - 1);
4272 }
4273 if (c != NULL) {
4274 return mp_div_2d(a, ix, c, NULL);
4275 }
4276 return MP_OKAY;
4277 }
4278
4279#ifdef BN_MP_DIV_3_C
4280 /* three? */
4281 if (b == 3) {
4282 return mp_div_3(a, c, d);
4283 }
4284#endif
4285
4286 /* no easy answer [c'est la vie]. Just division */
4287 if (c != NULL) {
4288 if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
4289 return res;
4290 }
4291
4292 q.used = a->used;
4293 q.sign = a->sign;
4294 }
4295 else {
4296 if ((res = mp_init(&q)) != MP_OKAY) {
4297 return res;
4298 }
4299 }
4300
4301
4302 w = 0;
4303 for (ix = a->used - 1; ix >= 0; ix--) {
4304 w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]);
4305
4306 if (w >= b) {
4307 t = (mp_digit)(w / b);
4308 w -= ((mp_word)t) * ((mp_word)b);
4309 } else {
4310 t = 0;
4311 }
4312 if (c != NULL)
4313 q.dp[ix] = (mp_digit)t;
4314 }
4315
4316 if (d != NULL) {
4317 *d = (mp_digit)w;
4318 }
4319
4320 if (c != NULL) {
4321 mp_clamp(&q);
4322 mp_exch(&q, c);
4323 }
4324 mp_clear(&q);
4325
4326 return res;
4327}
4328
4329
4330int mp_mod_d (mp_int * a, mp_digit b, mp_digit * c)
4331{
4332 return mp_div_d(a, b, NULL, c);
4333}
4334
4335#endif /* WOLFSSL_KEY_GEN || HAVE_COMP_KEY || HAVE_ECC || DEBUG_WOLFSSL */
4336
4337#if defined(WOLFSSL_KEY_GEN) || !defined(NO_DH) || !defined(NO_DSA) || !defined(NO_RSA)
4338
4339const mp_digit ltm_prime_tab[PRIME_SIZE] = {
4340 0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
4341 0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
4342 0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
4343 0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F,
4344#ifndef MP_8BIT
4345 0x0083,
4346 0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
4347 0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
4348 0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
4349 0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
4350
4351 0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
4352 0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
4353 0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
4354 0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
4355 0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
4356 0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
4357 0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
4358 0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
4359
4360 0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
4361 0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
4362 0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
4363 0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
4364 0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
4365 0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
4366 0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
4367 0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
4368
4369 0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
4370 0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
4371 0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
4372 0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
4373 0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
4374 0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
4375 0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
4376 0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
4377#endif
4378};
4379
4380
4381/* Miller-Rabin test of "a" to the base of "b" as described in
4382 * HAC pp. 139 Algorithm 4.24
4383 *
4384 * Sets result to 0 if definitely composite or 1 if probably prime.
4385 * Randomly the chance of error is no more than 1/4 and often
4386 * very much lower.
4387 */
4388static int mp_prime_miller_rabin (mp_int * a, mp_int * b, int *result)
4389{
4390 mp_int n1, y, r;
4391 int s, j, err;
4392
4393 /* default */
4394 *result = MP_NO;
4395
4396 /* ensure b > 1 */
4397 if (mp_cmp_d(b, 1) != MP_GT) {
4398 return MP_VAL;
4399 }
4400
4401 /* get n1 = a - 1 */
4402 if ((err = mp_init_copy (&n1, a)) != MP_OKAY) {
4403 return err;
4404 }
4405 if ((err = mp_sub_d (&n1, 1, &n1)) != MP_OKAY) {
4406 goto LBL_N1;
4407 }
4408
4409 /* set 2**s * r = n1 */
4410 if ((err = mp_init_copy (&r, &n1)) != MP_OKAY) {
4411 goto LBL_N1;
4412 }
4413
4414 /* count the number of least significant bits
4415 * which are zero
4416 */
4417 s = mp_cnt_lsb(&r);
4418
4419 /* now divide n - 1 by 2**s */
4420 if ((err = mp_div_2d (&r, s, &r, NULL)) != MP_OKAY) {
4421 goto LBL_R;
4422 }
4423
4424 /* compute y = b**r mod a */
4425 if ((err = mp_init (&y)) != MP_OKAY) {
4426 goto LBL_R;
4427 }
4428 if ((err = mp_exptmod (b, &r, a, &y)) != MP_OKAY) {
4429 goto LBL_Y;
4430 }
4431
4432 /* if y != 1 and y != n1 do */
4433 if (mp_cmp_d (&y, 1) != MP_EQ && mp_cmp (&y, &n1) != MP_EQ) {
4434 j = 1;
4435 /* while j <= s-1 and y != n1 */
4436 while ((j <= (s - 1)) && mp_cmp (&y, &n1) != MP_EQ) {
4437 if ((err = mp_sqrmod (&y, a, &y)) != MP_OKAY) {
4438 goto LBL_Y;
4439 }
4440
4441 /* if y == 1 then composite */
4442 if (mp_cmp_d (&y, 1) == MP_EQ) {
4443 goto LBL_Y;
4444 }
4445
4446 ++j;
4447 }
4448
4449 /* if y != n1 then composite */
4450 if (mp_cmp (&y, &n1) != MP_EQ) {
4451 goto LBL_Y;
4452 }
4453 }
4454
4455 /* probably prime now */
4456 *result = MP_YES;
4457LBL_Y:mp_clear (&y);
4458LBL_R:mp_clear (&r);
4459LBL_N1:mp_clear (&n1);
4460 return err;
4461}
4462
4463
4464/* determines if an integers is divisible by one
4465 * of the first PRIME_SIZE primes or not
4466 *
4467 * sets result to 0 if not, 1 if yes
4468 */
4469static int mp_prime_is_divisible (mp_int * a, int *result)
4470{
4471 int err, ix;
4472 mp_digit res;
4473
4474 /* default to not */
4475 *result = MP_NO;
4476
4477 for (ix = 0; ix < PRIME_SIZE; ix++) {
4478 /* what is a mod LBL_prime_tab[ix] */
4479 if ((err = mp_mod_d (a, ltm_prime_tab[ix], &res)) != MP_OKAY) {
4480 return err;
4481 }
4482
4483 /* is the residue zero? */
4484 if (res == 0) {
4485 *result = MP_YES;
4486 return MP_OKAY;
4487 }
4488 }
4489
4490 return MP_OKAY;
4491}
4492
4493/*
4494 * Sets result to 1 if probably prime, 0 otherwise
4495 */
4496int mp_prime_is_prime (mp_int * a, int t, int *result)
4497{
4498 mp_int b;
4499 int ix, err, res;
4500
4501 /* default to no */
4502 *result = MP_NO;
4503
4504 /* valid value of t? */
4505 if (t <= 0 || t > PRIME_SIZE) {
4506 return MP_VAL;
4507 }
4508
4509 /* is the input equal to one of the primes in the table? */
4510 for (ix = 0; ix < PRIME_SIZE; ix++) {
4511 if (mp_cmp_d(a, ltm_prime_tab[ix]) == MP_EQ) {
4512 *result = 1;
4513 return MP_OKAY;
4514 }
4515 }
4516
4517 /* first perform trial division */
4518 if ((err = mp_prime_is_divisible (a, &res)) != MP_OKAY) {
4519 return err;
4520 }
4521
4522 /* return if it was trivially divisible */
4523 if (res == MP_YES) {
4524 return MP_OKAY;
4525 }
4526
4527 /* now perform the miller-rabin rounds */
4528 if ((err = mp_init (&b)) != MP_OKAY) {
4529 return err;
4530 }
4531
4532 for (ix = 0; ix < t; ix++) {
4533 /* set the prime */
4534 if ((err = mp_set (&b, ltm_prime_tab[ix])) != MP_OKAY) {
4535 goto LBL_B;
4536 }
4537
4538 if ((err = mp_prime_miller_rabin (a, &b, &res)) != MP_OKAY) {
4539 goto LBL_B;
4540 }
4541
4542 if (res == MP_NO) {
4543 goto LBL_B;
4544 }
4545 }
4546
4547 /* passed the test */
4548 *result = MP_YES;
4549LBL_B:mp_clear (&b);
4550 return err;
4551}
4552
4553
4554/*
4555 * Sets result to 1 if probably prime, 0 otherwise
4556 */
4557int mp_prime_is_prime_ex (mp_int * a, int t, int *result, WC_RNG *rng)
4558{
4559 mp_int b, c;
4560 int ix, err, res;
4561 byte* base = NULL;
4562 word32 baseSz = 0;
4563
4564 /* default to no */
4565 *result = MP_NO;
4566
4567 /* valid value of t? */
4568 if (t <= 0 || t > PRIME_SIZE) {
4569 return MP_VAL;
4570 }
4571
4572 /* is the input equal to one of the primes in the table? */
4573 for (ix = 0; ix < PRIME_SIZE; ix++) {
4574 if (mp_cmp_d(a, ltm_prime_tab[ix]) == MP_EQ) {
4575 *result = MP_YES;
4576 return MP_OKAY;
4577 }
4578 }
4579
4580 /* first perform trial division */
4581 if ((err = mp_prime_is_divisible (a, &res)) != MP_OKAY) {
4582 return err;
4583 }
4584
4585 /* return if it was trivially divisible */
4586 if (res == MP_YES) {
4587 return MP_OKAY;
4588 }
4589
4590 /* now perform the miller-rabin rounds */
4591 if ((err = mp_init (&b)) != MP_OKAY) {
4592 return err;
4593 }
4594 if ((err = mp_init (&c)) != MP_OKAY) {
4595 mp_clear(&b);
4596 return err;
4597 }
4598
4599 baseSz = mp_count_bits(a);
4600 baseSz = (baseSz / 8) + ((baseSz % 8) ? 1 : 0);
4601
4602 base = (byte*)XMALLOC(baseSz, NULL, DYNAMIC_TYPE_TMP_BUFFER);
4603 if (base == NULL) {
4604 err = MP_MEM;
4605 goto LBL_B;
4606 }
4607
4608 if ((err = mp_sub_d(a, 2, &c)) != MP_OKAY) {
4609 goto LBL_B;
4610 }
4611
4612 /* now do a miller rabin with up to t random numbers, this should
4613 * give a (1/4)^t chance of a false prime. */
4614 for (ix = 0; ix < t; ix++) {
4615 /* Set a test candidate. */
4616 if ((err = wc_RNG_GenerateBlock(rng, base, baseSz)) != 0) {
4617 goto LBL_B;
4618 }
4619
4620 if ((err = mp_read_unsigned_bin(&b, base, baseSz)) != MP_OKAY) {
4621 goto LBL_B;
4622 }
4623
4624 if (mp_cmp_d(&b, 2) != MP_GT || mp_cmp(&b, &c) != MP_LT)
4625 continue;
4626
4627 if ((err = mp_prime_miller_rabin (a, &b, &res)) != MP_OKAY) {
4628 goto LBL_B;
4629 }
4630
4631 if (res == MP_NO) {
4632 goto LBL_B;
4633 }
4634 }
4635
4636 /* passed the test */
4637 *result = MP_YES;
4638LBL_B:mp_clear (&b);
4639 mp_clear (&c);
4640 XFREE(base, NULL, DYNAMIC_TYPE_TMP_BUFFER);
4641 return err;
4642}
4643
4644#endif /* WOLFSSL_KEY_GEN NO_DH NO_DSA NO_RSA */
4645
4646#ifdef WOLFSSL_KEY_GEN
4647
4648static const int USE_BBS = 1;
4649
4650int mp_rand_prime(mp_int* N, int len, WC_RNG* rng, void* heap)
4651{
4652 int err, res, type;
4653 byte* buf;
4654
4655 if (N == NULL || rng == NULL)
4656 return MP_VAL;
4657
4658 /* get type */
4659 if (len < 0) {
4660 type = USE_BBS;
4661 len = -len;
4662 } else {
4663 type = 0;
4664 }
4665
4666 /* allow sizes between 2 and 512 bytes for a prime size */
4667 if (len < 2 || len > 512) {
4668 return MP_VAL;
4669 }
4670
4671 /* allocate buffer to work with */
4672 buf = (byte*)XMALLOC(len, heap, DYNAMIC_TYPE_RSA);
4673 if (buf == NULL) {
4674 return MP_MEM;
4675 }
4676 XMEMSET(buf, 0, len);
4677
4678 do {
4679#ifdef SHOW_GEN
4680 printf(".");
4681 fflush(stdout);
4682#endif
4683 /* generate value */
4684 err = wc_RNG_GenerateBlock(rng, buf, len);
4685 if (err != 0) {
4686 XFREE(buf, heap, DYNAMIC_TYPE_RSA);
4687 return err;
4688 }
4689
4690 /* munge bits */
4691 buf[0] |= 0x80 | 0x40;
4692 buf[len-1] |= 0x01 | ((type & USE_BBS) ? 0x02 : 0x00);
4693
4694 /* load value */
4695 if ((err = mp_read_unsigned_bin(N, buf, len)) != MP_OKAY) {
4696 XFREE(buf, heap, DYNAMIC_TYPE_RSA);
4697 return err;
4698 }
4699
4700 /* test */
4701 /* Running Miller-Rabin up to 3 times gives us a 2^{-80} chance
4702 * of a 1024-bit candidate being a false positive, when it is our
4703 * prime candidate. (Note 4.49 of Handbook of Applied Cryptography.)
4704 * Using 8 because we've always used 8. */
4705 if ((err = mp_prime_is_prime_ex(N, 8, &res, rng)) != MP_OKAY) {
4706 XFREE(buf, heap, DYNAMIC_TYPE_RSA);
4707 return err;
4708 }
4709 } while (res == MP_NO);
4710
4711 XMEMSET(buf, 0, len);
4712 XFREE(buf, heap, DYNAMIC_TYPE_RSA);
4713
4714 return MP_OKAY;
4715}
4716
4717
4718/* computes least common multiple as |a*b|/(a, b) */
4719int mp_lcm (mp_int * a, mp_int * b, mp_int * c)
4720{
4721 int res;
4722 mp_int t1, t2;
4723
4724
4725 if ((res = mp_init_multi (&t1, &t2, NULL, NULL, NULL, NULL)) != MP_OKAY) {
4726 return res;
4727 }
4728
4729 /* t1 = get the GCD of the two inputs */
4730 if ((res = mp_gcd (a, b, &t1)) != MP_OKAY) {
4731 goto LBL_T;
4732 }
4733
4734 /* divide the smallest by the GCD */
4735 if (mp_cmp_mag(a, b) == MP_LT) {
4736 /* store quotient in t2 such that t2 * b is the LCM */
4737 if ((res = mp_div(a, &t1, &t2, NULL)) != MP_OKAY) {
4738 goto LBL_T;
4739 }
4740 res = mp_mul(b, &t2, c);
4741 } else {
4742 /* store quotient in t2 such that t2 * a is the LCM */
4743 if ((res = mp_div(b, &t1, &t2, NULL)) != MP_OKAY) {
4744 goto LBL_T;
4745 }
4746 res = mp_mul(a, &t2, c);
4747 }
4748
4749 /* fix the sign to positive */
4750 c->sign = MP_ZPOS;
4751
4752LBL_T:
4753 mp_clear(&t1);
4754 mp_clear(&t2);
4755 return res;
4756}
4757
4758
4759
4760/* Greatest Common Divisor using the binary method */
4761int mp_gcd (mp_int * a, mp_int * b, mp_int * c)
4762{
4763 mp_int u, v;
4764 int k, u_lsb, v_lsb, res;
4765
4766 /* either zero than gcd is the largest */
4767 if (mp_iszero (a) == MP_YES) {
4768 return mp_abs (b, c);
4769 }
4770 if (mp_iszero (b) == MP_YES) {
4771 return mp_abs (a, c);
4772 }
4773
4774 /* get copies of a and b we can modify */
4775 if ((res = mp_init_copy (&u, a)) != MP_OKAY) {
4776 return res;
4777 }
4778
4779 if ((res = mp_init_copy (&v, b)) != MP_OKAY) {
4780 goto LBL_U;
4781 }
4782
4783 /* must be positive for the remainder of the algorithm */
4784 u.sign = v.sign = MP_ZPOS;
4785
4786 /* B1. Find the common power of two for u and v */
4787 u_lsb = mp_cnt_lsb(&u);
4788 v_lsb = mp_cnt_lsb(&v);
4789 k = MIN(u_lsb, v_lsb);
4790
4791 if (k > 0) {
4792 /* divide the power of two out */
4793 if ((res = mp_div_2d(&u, k, &u, NULL)) != MP_OKAY) {
4794 goto LBL_V;
4795 }
4796
4797 if ((res = mp_div_2d(&v, k, &v, NULL)) != MP_OKAY) {
4798 goto LBL_V;
4799 }
4800 }
4801
4802 /* divide any remaining factors of two out */
4803 if (u_lsb != k) {
4804 if ((res = mp_div_2d(&u, u_lsb - k, &u, NULL)) != MP_OKAY) {
4805 goto LBL_V;
4806 }
4807 }
4808
4809 if (v_lsb != k) {
4810 if ((res = mp_div_2d(&v, v_lsb - k, &v, NULL)) != MP_OKAY) {
4811 goto LBL_V;
4812 }
4813 }
4814
4815 while (mp_iszero(&v) == MP_NO) {
4816 /* make sure v is the largest */
4817 if (mp_cmp_mag(&u, &v) == MP_GT) {
4818 /* swap u and v to make sure v is >= u */
4819 mp_exch(&u, &v);
4820 }
4821
4822 /* subtract smallest from largest */
4823 if ((res = s_mp_sub(&v, &u, &v)) != MP_OKAY) {
4824 goto LBL_V;
4825 }
4826
4827 /* Divide out all factors of two */
4828 if ((res = mp_div_2d(&v, mp_cnt_lsb(&v), &v, NULL)) != MP_OKAY) {
4829 goto LBL_V;
4830 }
4831 }
4832
4833 /* multiply by 2**k which we divided out at the beginning */
4834 if ((res = mp_mul_2d (&u, k, c)) != MP_OKAY) {
4835 goto LBL_V;
4836 }
4837 c->sign = MP_ZPOS;
4838 res = MP_OKAY;
4839LBL_V:mp_clear (&u);
4840LBL_U:mp_clear (&v);
4841 return res;
4842}
4843
4844#endif /* WOLFSSL_KEY_GEN */
4845
4846
4847#if !defined(NO_DSA) || defined(HAVE_ECC) || defined(WOLFSSL_KEY_GEN) || \
4848 defined(HAVE_COMP_KEY) || defined(WOLFSSL_DEBUG_MATH) || \
4849 defined(DEBUG_WOLFSSL) || defined(OPENSSL_EXTRA)
4850
4851/* chars used in radix conversions */
4852const char *mp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ\
4853 abcdefghijklmnopqrstuvwxyz+/";
4854#endif
4855
4856#if !defined(NO_DSA) || defined(HAVE_ECC)
4857/* read a string [ASCII] in a given radix */
4858int mp_read_radix (mp_int * a, const char *str, int radix)
4859{
4860 int y, res, neg;
4861 char ch;
4862
4863 /* zero the digit bignum */
4864 mp_zero(a);
4865
4866 /* make sure the radix is ok */
4867 if (radix < MP_RADIX_BIN || radix > MP_RADIX_MAX) {
4868 return MP_VAL;
4869 }
4870
4871 /* if the leading digit is a
4872 * minus set the sign to negative.
4873 */
4874 if (*str == '-') {
4875 ++str;
4876 neg = MP_NEG;
4877 } else {
4878 neg = MP_ZPOS;
4879 }
4880
4881 /* set the integer to the default of zero */
4882 mp_zero (a);
4883
4884 /* process each digit of the string */
4885 while (*str != '\0') {
4886 /* if the radix <= 36 the conversion is case insensitive
4887 * this allows numbers like 1AB and 1ab to represent the same value
4888 * [e.g. in hex]
4889 */
4890 ch = (radix <= 36) ? (char)XTOUPPER((unsigned char)*str) : *str;
4891 for (y = 0; y < 64; y++) {
4892 if (ch == mp_s_rmap[y]) {
4893 break;
4894 }
4895 }
4896
4897 /* if the char was found in the map
4898 * and is less than the given radix add it
4899 * to the number, otherwise exit the loop.
4900 */
4901 if (y < radix) {
4902 if ((res = mp_mul_d (a, (mp_digit) radix, a)) != MP_OKAY) {
4903 return res;
4904 }
4905 if ((res = mp_add_d (a, (mp_digit) y, a)) != MP_OKAY) {
4906 return res;
4907 }
4908 } else {
4909 break;
4910 }
4911 ++str;
4912 }
4913
4914 /* if digit in isn't null term, then invalid character was found */
4915 if (*str != '\0') {
4916 mp_zero (a);
4917 return MP_VAL;
4918 }
4919
4920 /* set the sign only if a != 0 */
4921 if (mp_iszero(a) != MP_YES) {
4922 a->sign = neg;
4923 }
4924 return MP_OKAY;
4925}
4926#endif /* !defined(NO_DSA) || defined(HAVE_ECC) */
4927
4928#ifdef WC_MP_TO_RADIX
4929
4930/* returns size of ASCII representation */
4931int mp_radix_size (mp_int *a, int radix, int *size)
4932{
4933 int res, digs;
4934 mp_int t;
4935 mp_digit d;
4936
4937 *size = 0;
4938
4939 /* special case for binary */
4940 if (radix == MP_RADIX_BIN) {
4941 *size = mp_count_bits (a) + (a->sign == MP_NEG ? 1 : 0) + 1;
4942 return MP_OKAY;
4943 }
4944
4945 /* make sure the radix is in range */
4946 if (radix < MP_RADIX_BIN || radix > MP_RADIX_MAX) {
4947 return MP_VAL;
4948 }
4949
4950 if (mp_iszero(a) == MP_YES) {
4951 *size = 2;
4952 return MP_OKAY;
4953 }
4954
4955 /* digs is the digit count */
4956 digs = 0;
4957
4958 /* if it's negative add one for the sign */
4959 if (a->sign == MP_NEG) {
4960 ++digs;
4961 }
4962
4963 /* init a copy of the input */
4964 if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
4965 return res;
4966 }
4967
4968 /* force temp to positive */
4969 t.sign = MP_ZPOS;
4970
4971 /* fetch out all of the digits */
4972 while (mp_iszero (&t) == MP_NO) {
4973 if ((res = mp_div_d (&t, (mp_digit) radix, &t, &d)) != MP_OKAY) {
4974 mp_clear (&t);
4975 return res;
4976 }
4977 ++digs;
4978 }
4979 mp_clear (&t);
4980
4981 /* return digs + 1, the 1 is for the NULL byte that would be required. */
4982 *size = digs + 1;
4983 return MP_OKAY;
4984}
4985
4986/* stores a bignum as a ASCII string in a given radix (2..64) */
4987int mp_toradix (mp_int *a, char *str, int radix)
4988{
4989 int res, digs;
4990 mp_int t;
4991 mp_digit d;
4992 char *_s = str;
4993
4994 /* check range of the radix */
4995 if (radix < MP_RADIX_BIN || radix > MP_RADIX_MAX) {
4996 return MP_VAL;
4997 }
4998
4999 /* quick out if its zero */
5000 if (mp_iszero(a) == MP_YES) {
5001 *str++ = '0';
5002 *str = '\0';
5003 return MP_OKAY;
5004 }
5005
5006 if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
5007 return res;
5008 }
5009
5010 /* if it is negative output a - */
5011 if (t.sign == MP_NEG) {
5012 ++_s;
5013 *str++ = '-';
5014 t.sign = MP_ZPOS;
5015 }
5016
5017 digs = 0;
5018 while (mp_iszero (&t) == MP_NO) {
5019 if ((res = mp_div_d (&t, (mp_digit) radix, &t, &d)) != MP_OKAY) {
5020 mp_clear (&t);
5021 return res;
5022 }
5023 *str++ = mp_s_rmap[d];
5024 ++digs;
5025 }
5026
5027 /* reverse the digits of the string. In this case _s points
5028 * to the first digit [excluding the sign] of the number]
5029 */
5030 bn_reverse ((unsigned char *)_s, digs);
5031
5032 /* append a NULL so the string is properly terminated */
5033 *str = '\0';
5034
5035 mp_clear (&t);
5036 return MP_OKAY;
5037}
5038
5039#ifdef WOLFSSL_DEBUG_MATH
5040void mp_dump(const char* desc, mp_int* a, byte verbose)
5041{
5042 char *buffer;
5043 int size = a->alloc;
5044
5045 buffer = (char*)XMALLOC(size * sizeof(mp_digit) * 2, NULL, DYNAMIC_TYPE_TMP_BUFFER);
5046 if (buffer == NULL) {
5047 return;
5048 }
5049
5050 printf("%s: ptr=%p, used=%d, sign=%d, size=%d, mpd=%d\n",
5051 desc, a, a->used, a->sign, size, (int)sizeof(mp_digit));
5052
5053 mp_tohex(a, buffer);
5054 printf(" %s\n ", buffer);
5055
5056 if (verbose) {
5057 int i;
5058 for(i=0; i<a->alloc * (int)sizeof(mp_digit); i++) {
5059 printf("%02x ", *(((byte*)a->dp) + i));
5060 }
5061 printf("\n");
5062 }
5063
5064 XFREE(buffer, NULL, DYNAMIC_TYPE_TMP_BUFFER);
5065}
5066#endif /* WOLFSSL_DEBUG_MATH */
5067
5068#endif /* WC_MP_TO_RADIX */
5069
5070#endif /* WOLFSSL_SP_MATH */
5071
5072#endif /* USE_FAST_MATH */
5073
5074#endif /* NO_BIG_INT */
5075
Note: See TracBrowser for help on using the repository browser.