source: asp3_tinet_ecnl_arm/trunk/wolfssl-3.12.2/wolfcrypt/src/integer.c@ 352

Last change on this file since 352 was 352, checked in by coas-nagasima, 6 years ago

arm向けASP3版ECNLを追加

  • Property svn:eol-style set to native
  • Property svn:mime-type set to text/x-csrc;charset=UTF-8
File size: 107.5 KB
Line 
1/* integer.c
2 *
3 * Copyright (C) 2006-2017 wolfSSL Inc.
4 *
5 * This file is part of wolfSSL.
6 *
7 * wolfSSL is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * wolfSSL is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
20 */
21
22
23
24/*
25 * Based on public domain LibTomMath 0.38 by Tom St Denis, tomstdenis@iahu.ca,
26 * http://math.libtomcrypt.com
27 */
28
29
30#ifdef HAVE_CONFIG_H
31 #include <config.h>
32#endif
33
34/* in case user set USE_FAST_MATH there */
35#include <wolfssl/wolfcrypt/settings.h>
36
37#ifdef NO_INLINE
38 #include <wolfssl/wolfcrypt/misc.h>
39#else
40 #define WOLFSSL_MISC_INCLUDED
41 #include <wolfcrypt/src/misc.c>
42#endif
43
44#ifndef NO_BIG_INT
45
46#ifndef USE_FAST_MATH
47
48#include <wolfssl/wolfcrypt/integer.h>
49
50#if defined(FREESCALE_LTC_TFM)
51 #include <wolfssl/wolfcrypt/port/nxp/ksdk_port.h>
52#endif
53#ifdef WOLFSSL_DEBUG_MATH
54 #include <stdio.h>
55#endif
56
57#ifndef NO_WOLFSSL_SMALL_STACK
58 #ifndef WOLFSSL_SMALL_STACK
59 #define WOLFSSL_SMALL_STACK
60 #endif
61#endif
62
63#ifdef SHOW_GEN
64 #if defined(FREESCALE_MQX) || defined(FREESCALE_KSDK_MQX)
65 #if MQX_USE_IO_OLD
66 #include <fio.h>
67 #else
68 #include <nio.h>
69 #endif
70 #else
71 #include <stdio.h>
72 #endif
73#endif
74
75/* reverse an array, used for radix code */
76static void
77bn_reverse (unsigned char *s, int len)
78{
79 int ix, iy;
80 unsigned char t;
81
82 ix = 0;
83 iy = len - 1;
84 while (ix < iy) {
85 t = s[ix];
86 s[ix] = s[iy];
87 s[iy] = t;
88 ++ix;
89 --iy;
90 }
91}
92
93/* math settings check */
94word32 CheckRunTimeSettings(void)
95{
96 return CTC_SETTINGS;
97}
98
99
100/* handle up to 6 inits */
101int mp_init_multi(mp_int* a, mp_int* b, mp_int* c, mp_int* d, mp_int* e,
102 mp_int* f)
103{
104 int res = MP_OKAY;
105
106 if (a) XMEMSET(a, 0, sizeof(mp_int));
107 if (b) XMEMSET(b, 0, sizeof(mp_int));
108 if (c) XMEMSET(c, 0, sizeof(mp_int));
109 if (d) XMEMSET(d, 0, sizeof(mp_int));
110 if (e) XMEMSET(e, 0, sizeof(mp_int));
111 if (f) XMEMSET(f, 0, sizeof(mp_int));
112
113 if (a && ((res = mp_init(a)) != MP_OKAY))
114 return res;
115
116 if (b && ((res = mp_init(b)) != MP_OKAY)) {
117 mp_clear(a);
118 return res;
119 }
120
121 if (c && ((res = mp_init(c)) != MP_OKAY)) {
122 mp_clear(a); mp_clear(b);
123 return res;
124 }
125
126 if (d && ((res = mp_init(d)) != MP_OKAY)) {
127 mp_clear(a); mp_clear(b); mp_clear(c);
128 return res;
129 }
130
131 if (e && ((res = mp_init(e)) != MP_OKAY)) {
132 mp_clear(a); mp_clear(b); mp_clear(c); mp_clear(d);
133 return res;
134 }
135
136 if (f && ((res = mp_init(f)) != MP_OKAY)) {
137 mp_clear(a); mp_clear(b); mp_clear(c); mp_clear(d); mp_clear(e);
138 return res;
139 }
140
141 return res;
142}
143
144
145/* init a new mp_int */
146int mp_init (mp_int * a)
147{
148 /* Safeguard against passing in a null pointer */
149 if (a == NULL)
150 return MP_VAL;
151
152 /* defer allocation until mp_grow */
153 a->dp = NULL;
154
155 /* set the used to zero, allocated digits to the default precision
156 * and sign to positive */
157 a->used = 0;
158 a->alloc = 0;
159 a->sign = MP_ZPOS;
160#ifdef HAVE_WOLF_BIGINT
161 wc_bigint_init(&a->raw);
162#endif
163
164 return MP_OKAY;
165}
166
167
168/* clear one (frees) */
169void mp_clear (mp_int * a)
170{
171 int i;
172
173 if (a == NULL)
174 return;
175
176 /* only do anything if a hasn't been freed previously */
177 if (a->dp != NULL) {
178 /* first zero the digits */
179 for (i = 0; i < a->used; i++) {
180 a->dp[i] = 0;
181 }
182
183 /* free ram */
184 mp_free(a);
185
186 /* reset members to make debugging easier */
187 a->alloc = a->used = 0;
188 a->sign = MP_ZPOS;
189 }
190}
191
192void mp_free (mp_int * a)
193{
194 /* only do anything if a hasn't been freed previously */
195 if (a->dp != NULL) {
196 /* free ram */
197 XFREE(a->dp, 0, DYNAMIC_TYPE_BIGINT);
198 a->dp = NULL;
199 }
200
201#ifdef HAVE_WOLF_BIGINT
202 wc_bigint_free(&a->raw);
203#endif
204}
205
206void mp_forcezero(mp_int * a)
207{
208 if (a == NULL)
209 return;
210
211 /* only do anything if a hasn't been freed previously */
212 if (a->dp != NULL) {
213 /* force zero the used digits */
214 ForceZero(a->dp, a->used * sizeof(mp_digit));
215#ifdef HAVE_WOLF_BIGINT
216 wc_bigint_zero(&a->raw);
217#endif
218 /* free ram */
219 mp_free(a);
220
221 /* reset members to make debugging easier */
222 a->alloc = a->used = 0;
223 a->sign = MP_ZPOS;
224 }
225
226 a->sign = MP_ZPOS;
227 a->used = 0;
228}
229
230
231/* get the size for an unsigned equivalent */
232int mp_unsigned_bin_size (mp_int * a)
233{
234 int size = mp_count_bits (a);
235 return (size / 8 + ((size & 7) != 0 ? 1 : 0));
236}
237
238
239/* returns the number of bits in an int */
240int mp_count_bits (mp_int * a)
241{
242 int r;
243 mp_digit q;
244
245 /* shortcut */
246 if (a->used == 0) {
247 return 0;
248 }
249
250 /* get number of digits and add that */
251 r = (a->used - 1) * DIGIT_BIT;
252
253 /* take the last digit and count the bits in it */
254 q = a->dp[a->used - 1];
255 while (q > ((mp_digit) 0)) {
256 ++r;
257 q >>= ((mp_digit) 1);
258 }
259 return r;
260}
261
262
263int mp_leading_bit (mp_int * a)
264{
265 int bit = 0;
266 mp_int t;
267
268 if (mp_init_copy(&t, a) != MP_OKAY)
269 return 0;
270
271 while (mp_iszero(&t) == MP_NO) {
272#ifndef MP_8BIT
273 bit = (t.dp[0] & 0x80) != 0;
274#else
275 bit = (t.dp[0] | ((t.dp[1] & 0x01) << 7)) & 0x80 != 0;
276#endif
277 if (mp_div_2d (&t, 8, &t, NULL) != MP_OKAY)
278 break;
279 }
280 mp_clear(&t);
281 return bit;
282}
283
284int mp_to_unsigned_bin_at_pos(int x, mp_int *t, unsigned char *b)
285{
286 int res = 0;
287 while (mp_iszero(t) == MP_NO) {
288#ifndef MP_8BIT
289 b[x++] = (unsigned char) (t->dp[0] & 255);
290#else
291 b[x++] = (unsigned char) (t->dp[0] | ((t->dp[1] & 0x01) << 7));
292#endif
293 if ((res = mp_div_2d (t, 8, t, NULL)) != MP_OKAY) {
294 return res;
295 }
296 res = x;
297 }
298 return res;
299}
300
301/* store in unsigned [big endian] format */
302int mp_to_unsigned_bin (mp_int * a, unsigned char *b)
303{
304 int x, res;
305 mp_int t;
306
307 if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
308 return res;
309 }
310
311 x = mp_to_unsigned_bin_at_pos(0, &t, b);
312 if (x < 0) {
313 mp_clear(&t);
314 return x;
315 }
316
317 bn_reverse (b, x);
318 mp_clear (&t);
319 return res;
320}
321
322
323/* creates "a" then copies b into it */
324int mp_init_copy (mp_int * a, mp_int * b)
325{
326 int res;
327
328 if ((res = mp_init_size (a, b->used)) != MP_OKAY) {
329 return res;
330 }
331
332 if((res = mp_copy (b, a)) != MP_OKAY) {
333 mp_clear(a);
334 }
335
336 return res;
337}
338
339
340/* copy, b = a */
341int mp_copy (mp_int * a, mp_int * b)
342{
343 int res, n;
344
345 /* Safeguard against passing in a null pointer */
346 if (a == NULL || b == NULL)
347 return MP_VAL;
348
349 /* if dst == src do nothing */
350 if (a == b) {
351 return MP_OKAY;
352 }
353
354 /* grow dest */
355 if (b->alloc < a->used || b->alloc == 0) {
356 if ((res = mp_grow (b, a->used)) != MP_OKAY) {
357 return res;
358 }
359 }
360
361 /* zero b and copy the parameters over */
362 {
363 mp_digit *tmpa, *tmpb;
364
365 /* pointer aliases */
366
367 /* source */
368 tmpa = a->dp;
369
370 /* destination */
371 tmpb = b->dp;
372
373 /* copy all the digits */
374 for (n = 0; n < a->used; n++) {
375 *tmpb++ = *tmpa++;
376 }
377
378 /* clear high digits */
379 for (; n < b->used && b->dp; n++) {
380 *tmpb++ = 0;
381 }
382 }
383
384 /* copy used count and sign */
385 b->used = a->used;
386 b->sign = a->sign;
387 return MP_OKAY;
388}
389
390
391/* grow as required */
392int mp_grow (mp_int * a, int size)
393{
394 int i;
395 mp_digit *tmp;
396
397 /* if the alloc size is smaller alloc more ram */
398 if (a->alloc < size || size == 0) {
399 /* ensure there are always at least MP_PREC digits extra on top */
400 size += (MP_PREC * 2) - (size % MP_PREC);
401
402 /* reallocate the array a->dp
403 *
404 * We store the return in a temporary variable
405 * in case the operation failed we don't want
406 * to overwrite the dp member of a.
407 */
408 tmp = OPT_CAST(mp_digit) XREALLOC (a->dp, sizeof (mp_digit) * size, NULL,
409 DYNAMIC_TYPE_BIGINT);
410 if (tmp == NULL) {
411 /* reallocation failed but "a" is still valid [can be freed] */
412 return MP_MEM;
413 }
414
415 /* reallocation succeeded so set a->dp */
416 a->dp = tmp;
417
418 /* zero excess digits */
419 i = a->alloc;
420 a->alloc = size;
421 for (; i < a->alloc; i++) {
422 a->dp[i] = 0;
423 }
424 }
425 return MP_OKAY;
426}
427
428
429/* shift right by a certain bit count (store quotient in c, optional
430 remainder in d) */
431int mp_div_2d (mp_int * a, int b, mp_int * c, mp_int * d)
432{
433 int D, res;
434 mp_int t;
435
436
437 /* if the shift count is <= 0 then we do no work */
438 if (b <= 0) {
439 res = mp_copy (a, c);
440 if (d != NULL) {
441 mp_zero (d);
442 }
443 return res;
444 }
445
446 if ((res = mp_init (&t)) != MP_OKAY) {
447 return res;
448 }
449
450 /* get the remainder */
451 if (d != NULL) {
452 if ((res = mp_mod_2d (a, b, &t)) != MP_OKAY) {
453 mp_clear (&t);
454 return res;
455 }
456 }
457
458 /* copy */
459 if ((res = mp_copy (a, c)) != MP_OKAY) {
460 mp_clear (&t);
461 return res;
462 }
463
464 /* shift by as many digits in the bit count */
465 if (b >= (int)DIGIT_BIT) {
466 mp_rshd (c, b / DIGIT_BIT);
467 }
468
469 /* shift any bit count < DIGIT_BIT */
470 D = (b % DIGIT_BIT);
471 if (D != 0) {
472 mp_rshb(c, D);
473 }
474 mp_clamp (c);
475 if (d != NULL) {
476 mp_exch (&t, d);
477 }
478 mp_clear (&t);
479 return MP_OKAY;
480}
481
482
483/* set to zero */
484void mp_zero (mp_int * a)
485{
486 int n;
487 mp_digit *tmp;
488
489 if (a == NULL)
490 return;
491
492 a->sign = MP_ZPOS;
493 a->used = 0;
494
495 tmp = a->dp;
496 for (n = 0; n < a->alloc; n++) {
497 *tmp++ = 0;
498 }
499}
500
501
502/* trim unused digits
503 *
504 * This is used to ensure that leading zero digits are
505 * trimmed and the leading "used" digit will be non-zero
506 * Typically very fast. Also fixes the sign if there
507 * are no more leading digits
508 */
509void mp_clamp (mp_int * a)
510{
511 /* decrease used while the most significant digit is
512 * zero.
513 */
514 while (a->used > 0 && a->dp[a->used - 1] == 0) {
515 --(a->used);
516 }
517
518 /* reset the sign flag if used == 0 */
519 if (a->used == 0) {
520 a->sign = MP_ZPOS;
521 }
522}
523
524
525/* swap the elements of two integers, for cases where you can't simply swap the
526 * mp_int pointers around
527 */
528void mp_exch (mp_int * a, mp_int * b)
529{
530 mp_int t;
531
532 t = *a;
533 *a = *b;
534 *b = t;
535}
536
537
538/* shift right a certain number of bits */
539void mp_rshb (mp_int *c, int x)
540{
541 mp_digit *tmpc, mask, shift;
542 mp_digit r, rr;
543 mp_digit D = x;
544
545 /* mask */
546 mask = (((mp_digit)1) << D) - 1;
547
548 /* shift for lsb */
549 shift = DIGIT_BIT - D;
550
551 /* alias */
552 tmpc = c->dp + (c->used - 1);
553
554 /* carry */
555 r = 0;
556 for (x = c->used - 1; x >= 0; x--) {
557 /* get the lower bits of this word in a temp */
558 rr = *tmpc & mask;
559
560 /* shift the current word and mix in the carry bits from previous word */
561 *tmpc = (*tmpc >> D) | (r << shift);
562 --tmpc;
563
564 /* set the carry to the carry bits of the current word found above */
565 r = rr;
566 }
567 mp_clamp(c);
568}
569
570
571/* shift right a certain amount of digits */
572void mp_rshd (mp_int * a, int b)
573{
574 int x;
575
576 /* if b <= 0 then ignore it */
577 if (b <= 0) {
578 return;
579 }
580
581 /* if b > used then simply zero it and return */
582 if (a->used <= b) {
583 mp_zero (a);
584 return;
585 }
586
587 {
588 mp_digit *bottom, *top;
589
590 /* shift the digits down */
591
592 /* bottom */
593 bottom = a->dp;
594
595 /* top [offset into digits] */
596 top = a->dp + b;
597
598 /* this is implemented as a sliding window where
599 * the window is b-digits long and digits from
600 * the top of the window are copied to the bottom
601 *
602 * e.g.
603
604 b-2 | b-1 | b0 | b1 | b2 | ... | bb | ---->
605 /\ | ---->
606 \-------------------/ ---->
607 */
608 for (x = 0; x < (a->used - b); x++) {
609 *bottom++ = *top++;
610 }
611
612 /* zero the top digits */
613 for (; x < a->used; x++) {
614 *bottom++ = 0;
615 }
616 }
617
618 /* remove excess digits */
619 a->used -= b;
620}
621
622
623/* calc a value mod 2**b */
624int mp_mod_2d (mp_int * a, int b, mp_int * c)
625{
626 int x, res;
627
628 /* if b is <= 0 then zero the int */
629 if (b <= 0) {
630 mp_zero (c);
631 return MP_OKAY;
632 }
633
634 /* if the modulus is larger than the value than return */
635 if (b >= (int) (a->used * DIGIT_BIT)) {
636 res = mp_copy (a, c);
637 return res;
638 }
639
640 /* copy */
641 if ((res = mp_copy (a, c)) != MP_OKAY) {
642 return res;
643 }
644
645 /* zero digits above the last digit of the modulus */
646 for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) {
647 c->dp[x] = 0;
648 }
649 /* clear the digit that is not completely outside/inside the modulus */
650 c->dp[b / DIGIT_BIT] &= (mp_digit) ((((mp_digit) 1) <<
651 (((mp_digit) b) % DIGIT_BIT)) - ((mp_digit) 1));
652 mp_clamp (c);
653 return MP_OKAY;
654}
655
656
657/* reads a unsigned char array, assumes the msb is stored first [big endian] */
658int mp_read_unsigned_bin (mp_int * a, const unsigned char *b, int c)
659{
660 int res;
661
662 /* make sure there are at least two digits */
663 if (a->alloc < 2) {
664 if ((res = mp_grow(a, 2)) != MP_OKAY) {
665 return res;
666 }
667 }
668
669 /* zero the int */
670 mp_zero (a);
671
672 /* read the bytes in */
673 while (c-- > 0) {
674 if ((res = mp_mul_2d (a, 8, a)) != MP_OKAY) {
675 return res;
676 }
677
678#ifndef MP_8BIT
679 a->dp[0] |= *b++;
680 a->used += 1;
681#else
682 a->dp[0] = (*b & MP_MASK);
683 a->dp[1] |= ((*b++ >> 7U) & 1);
684 a->used += 2;
685#endif
686 }
687 mp_clamp (a);
688 return MP_OKAY;
689}
690
691
692/* shift left by a certain bit count */
693int mp_mul_2d (mp_int * a, int b, mp_int * c)
694{
695 mp_digit d;
696 int res;
697
698 /* copy */
699 if (a != c) {
700 if ((res = mp_copy (a, c)) != MP_OKAY) {
701 return res;
702 }
703 }
704
705 if (c->alloc < (int)(c->used + b/DIGIT_BIT + 1)) {
706 if ((res = mp_grow (c, c->used + b / DIGIT_BIT + 1)) != MP_OKAY) {
707 return res;
708 }
709 }
710
711 /* shift by as many digits in the bit count */
712 if (b >= (int)DIGIT_BIT) {
713 if ((res = mp_lshd (c, b / DIGIT_BIT)) != MP_OKAY) {
714 return res;
715 }
716 }
717
718 /* shift any bit count < DIGIT_BIT */
719 d = (mp_digit) (b % DIGIT_BIT);
720 if (d != 0) {
721 mp_digit *tmpc, shift, mask, r, rr;
722 int x;
723
724 /* bitmask for carries */
725 mask = (((mp_digit)1) << d) - 1;
726
727 /* shift for msbs */
728 shift = DIGIT_BIT - d;
729
730 /* alias */
731 tmpc = c->dp;
732
733 /* carry */
734 r = 0;
735 for (x = 0; x < c->used; x++) {
736 /* get the higher bits of the current word */
737 rr = (*tmpc >> shift) & mask;
738
739 /* shift the current word and OR in the carry */
740 *tmpc = (mp_digit)(((*tmpc << d) | r) & MP_MASK);
741 ++tmpc;
742
743 /* set the carry to the carry bits of the current word */
744 r = rr;
745 }
746
747 /* set final carry */
748 if (r != 0) {
749 c->dp[(c->used)++] = r;
750 }
751 }
752 mp_clamp (c);
753 return MP_OKAY;
754}
755
756
757/* shift left a certain amount of digits */
758int mp_lshd (mp_int * a, int b)
759{
760 int x, res;
761
762 /* if its less than zero return */
763 if (b <= 0) {
764 return MP_OKAY;
765 }
766
767 /* grow to fit the new digits */
768 if (a->alloc < a->used + b) {
769 if ((res = mp_grow (a, a->used + b)) != MP_OKAY) {
770 return res;
771 }
772 }
773
774 {
775 mp_digit *top, *bottom;
776
777 /* increment the used by the shift amount then copy upwards */
778 a->used += b;
779
780 /* top */
781 top = a->dp + a->used - 1;
782
783 /* base */
784 bottom = a->dp + a->used - 1 - b;
785
786 /* much like mp_rshd this is implemented using a sliding window
787 * except the window goes the other way around. Copying from
788 * the bottom to the top. see bn_mp_rshd.c for more info.
789 */
790 for (x = a->used - 1; x >= b; x--) {
791 *top-- = *bottom--;
792 }
793
794 /* zero the lower digits */
795 top = a->dp;
796 for (x = 0; x < b; x++) {
797 *top++ = 0;
798 }
799 }
800 return MP_OKAY;
801}
802
803
804/* this is a shell function that calls either the normal or Montgomery
805 * exptmod functions. Originally the call to the montgomery code was
806 * embedded in the normal function but that wasted a lot of stack space
807 * for nothing (since 99% of the time the Montgomery code would be called)
808 */
809#if defined(FREESCALE_LTC_TFM)
810int wolfcrypt_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
811#else
812int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
813#endif
814{
815 int dr;
816
817 /* modulus P must be positive */
818 if (P->sign == MP_NEG) {
819 return MP_VAL;
820 }
821
822 /* if exponent X is negative we have to recurse */
823 if (X->sign == MP_NEG) {
824#ifdef BN_MP_INVMOD_C
825 mp_int tmpG, tmpX;
826 int err;
827
828 /* first compute 1/G mod P */
829 if ((err = mp_init(&tmpG)) != MP_OKAY) {
830 return err;
831 }
832 if ((err = mp_invmod(G, P, &tmpG)) != MP_OKAY) {
833 mp_clear(&tmpG);
834 return err;
835 }
836
837 /* now get |X| */
838 if ((err = mp_init(&tmpX)) != MP_OKAY) {
839 mp_clear(&tmpG);
840 return err;
841 }
842 if ((err = mp_abs(X, &tmpX)) != MP_OKAY) {
843 mp_clear(&tmpG);
844 mp_clear(&tmpX);
845 return err;
846 }
847
848 /* and now compute (1/G)**|X| instead of G**X [X < 0] */
849 err = mp_exptmod(&tmpG, &tmpX, P, Y);
850 mp_clear(&tmpG);
851 mp_clear(&tmpX);
852 return err;
853#else
854 /* no invmod */
855 return MP_VAL;
856#endif
857 }
858
859/* modified diminished radix reduction */
860#if defined(BN_MP_REDUCE_IS_2K_L_C) && defined(BN_MP_REDUCE_2K_L_C) && \
861 defined(BN_S_MP_EXPTMOD_C)
862 if (mp_reduce_is_2k_l(P) == MP_YES) {
863 return s_mp_exptmod(G, X, P, Y, 1);
864 }
865#endif
866
867#ifdef BN_MP_DR_IS_MODULUS_C
868 /* is it a DR modulus? */
869 dr = mp_dr_is_modulus(P);
870#else
871 /* default to no */
872 dr = 0;
873#endif
874
875#ifdef BN_MP_REDUCE_IS_2K_C
876 /* if not, is it a unrestricted DR modulus? */
877 if (dr == 0) {
878 dr = mp_reduce_is_2k(P) << 1;
879 }
880#endif
881
882 /* if the modulus is odd or dr != 0 use the montgomery method */
883#ifdef BN_MP_EXPTMOD_FAST_C
884 if (mp_isodd (P) == MP_YES || dr != 0) {
885 return mp_exptmod_fast (G, X, P, Y, dr);
886 } else {
887#endif
888#ifdef BN_S_MP_EXPTMOD_C
889 /* otherwise use the generic Barrett reduction technique */
890 return s_mp_exptmod (G, X, P, Y, 0);
891#else
892 /* no exptmod for evens */
893 return MP_VAL;
894#endif
895#ifdef BN_MP_EXPTMOD_FAST_C
896 }
897#endif
898}
899
900
901/* b = |a|
902 *
903 * Simple function copies the input and fixes the sign to positive
904 */
905int mp_abs (mp_int * a, mp_int * b)
906{
907 int res;
908
909 /* copy a to b */
910 if (a != b) {
911 if ((res = mp_copy (a, b)) != MP_OKAY) {
912 return res;
913 }
914 }
915
916 /* force the sign of b to positive */
917 b->sign = MP_ZPOS;
918
919 return MP_OKAY;
920}
921
922
923/* hac 14.61, pp608 */
924#if defined(FREESCALE_LTC_TFM)
925int wolfcrypt_mp_invmod(mp_int * a, mp_int * b, mp_int * c)
926#else
927int mp_invmod (mp_int * a, mp_int * b, mp_int * c)
928#endif
929{
930 /* b cannot be negative */
931 if (b->sign == MP_NEG || mp_iszero(b) == MP_YES) {
932 return MP_VAL;
933 }
934
935#ifdef BN_FAST_MP_INVMOD_C
936 /* if the modulus is odd we can use a faster routine instead */
937 if ((mp_isodd(b) == MP_YES) && (mp_cmp_d(b, 1) != MP_EQ)) {
938 return fast_mp_invmod (a, b, c);
939 }
940#endif
941
942#ifdef BN_MP_INVMOD_SLOW_C
943 return mp_invmod_slow(a, b, c);
944#else
945 return MP_VAL;
946#endif
947}
948
949
950/* computes the modular inverse via binary extended euclidean algorithm,
951 * that is c = 1/a mod b
952 *
953 * Based on slow invmod except this is optimized for the case where b is
954 * odd as per HAC Note 14.64 on pp. 610
955 */
956int fast_mp_invmod (mp_int * a, mp_int * b, mp_int * c)
957{
958 mp_int x, y, u, v, B, D;
959 int res, neg, loop_check = 0;
960
961 /* 2. [modified] b must be odd */
962 if (mp_iseven (b) == MP_YES) {
963 return MP_VAL;
964 }
965
966 /* init all our temps */
967 if ((res = mp_init_multi(&x, &y, &u, &v, &B, &D)) != MP_OKAY) {
968 return res;
969 }
970
971 /* x == modulus, y == value to invert */
972 if ((res = mp_copy (b, &x)) != MP_OKAY) {
973 goto LBL_ERR;
974 }
975
976 /* we need y = |a| */
977 if ((res = mp_mod (a, b, &y)) != MP_OKAY) {
978 goto LBL_ERR;
979 }
980
981 /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
982 if ((res = mp_copy (&x, &u)) != MP_OKAY) {
983 goto LBL_ERR;
984 }
985 if ((res = mp_copy (&y, &v)) != MP_OKAY) {
986 goto LBL_ERR;
987 }
988 if ((res = mp_set (&D, 1)) != MP_OKAY) {
989 goto LBL_ERR;
990 }
991
992top:
993 /* 4. while u is even do */
994 while (mp_iseven (&u) == MP_YES) {
995 /* 4.1 u = u/2 */
996 if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
997 goto LBL_ERR;
998 }
999 /* 4.2 if B is odd then */
1000 if (mp_isodd (&B) == MP_YES) {
1001 if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
1002 goto LBL_ERR;
1003 }
1004 }
1005 /* B = B/2 */
1006 if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
1007 goto LBL_ERR;
1008 }
1009 }
1010
1011 /* 5. while v is even do */
1012 while (mp_iseven (&v) == MP_YES) {
1013 /* 5.1 v = v/2 */
1014 if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
1015 goto LBL_ERR;
1016 }
1017 /* 5.2 if D is odd then */
1018 if (mp_isodd (&D) == MP_YES) {
1019 /* D = (D-x)/2 */
1020 if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
1021 goto LBL_ERR;
1022 }
1023 }
1024 /* D = D/2 */
1025 if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
1026 goto LBL_ERR;
1027 }
1028 }
1029
1030 /* 6. if u >= v then */
1031 if (mp_cmp (&u, &v) != MP_LT) {
1032 /* u = u - v, B = B - D */
1033 if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
1034 goto LBL_ERR;
1035 }
1036
1037 if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
1038 goto LBL_ERR;
1039 }
1040 } else {
1041 /* v - v - u, D = D - B */
1042 if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
1043 goto LBL_ERR;
1044 }
1045
1046 if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
1047 goto LBL_ERR;
1048 }
1049 }
1050
1051 /* if not zero goto step 4 */
1052 if (mp_iszero (&u) == MP_NO) {
1053 if (++loop_check > MAX_INVMOD_SZ) {
1054 res = MP_VAL;
1055 goto LBL_ERR;
1056 }
1057 goto top;
1058 }
1059
1060 /* now a = C, b = D, gcd == g*v */
1061
1062 /* if v != 1 then there is no inverse */
1063 if (mp_cmp_d (&v, 1) != MP_EQ) {
1064 res = MP_VAL;
1065 goto LBL_ERR;
1066 }
1067
1068 /* b is now the inverse */
1069 neg = a->sign;
1070 while (D.sign == MP_NEG) {
1071 if ((res = mp_add (&D, b, &D)) != MP_OKAY) {
1072 goto LBL_ERR;
1073 }
1074 }
1075 /* too big */
1076 while (mp_cmp_mag(&D, b) != MP_LT) {
1077 if ((res = mp_sub(&D, b, &D)) != MP_OKAY) {
1078 goto LBL_ERR;
1079 }
1080 }
1081 mp_exch (&D, c);
1082 c->sign = neg;
1083 res = MP_OKAY;
1084
1085LBL_ERR:mp_clear(&x);
1086 mp_clear(&y);
1087 mp_clear(&u);
1088 mp_clear(&v);
1089 mp_clear(&B);
1090 mp_clear(&D);
1091 return res;
1092}
1093
1094
1095/* hac 14.61, pp608 */
1096int mp_invmod_slow (mp_int * a, mp_int * b, mp_int * c)
1097{
1098 mp_int x, y, u, v, A, B, C, D;
1099 int res;
1100
1101 /* b cannot be negative */
1102 if (b->sign == MP_NEG || mp_iszero(b) == MP_YES) {
1103 return MP_VAL;
1104 }
1105
1106 /* init temps */
1107 if ((res = mp_init_multi(&x, &y, &u, &v,
1108 &A, &B)) != MP_OKAY) {
1109 return res;
1110 }
1111
1112 /* init rest of tmps temps */
1113 if ((res = mp_init_multi(&C, &D, 0, 0, 0, 0)) != MP_OKAY) {
1114 mp_clear(&x);
1115 mp_clear(&y);
1116 mp_clear(&u);
1117 mp_clear(&v);
1118 mp_clear(&A);
1119 mp_clear(&B);
1120 return res;
1121 }
1122
1123 /* x = a, y = b */
1124 if ((res = mp_mod(a, b, &x)) != MP_OKAY) {
1125 goto LBL_ERR;
1126 }
1127 if ((res = mp_copy (b, &y)) != MP_OKAY) {
1128 goto LBL_ERR;
1129 }
1130
1131 /* 2. [modified] if x,y are both even then return an error! */
1132 if (mp_iseven (&x) == MP_YES && mp_iseven (&y) == MP_YES) {
1133 res = MP_VAL;
1134 goto LBL_ERR;
1135 }
1136
1137 /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
1138 if ((res = mp_copy (&x, &u)) != MP_OKAY) {
1139 goto LBL_ERR;
1140 }
1141 if ((res = mp_copy (&y, &v)) != MP_OKAY) {
1142 goto LBL_ERR;
1143 }
1144 if ((res = mp_set (&A, 1)) != MP_OKAY) {
1145 goto LBL_ERR;
1146 }
1147 if ((res = mp_set (&D, 1)) != MP_OKAY) {
1148 goto LBL_ERR;
1149 }
1150
1151top:
1152 /* 4. while u is even do */
1153 while (mp_iseven (&u) == MP_YES) {
1154 /* 4.1 u = u/2 */
1155 if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
1156 goto LBL_ERR;
1157 }
1158 /* 4.2 if A or B is odd then */
1159 if (mp_isodd (&A) == MP_YES || mp_isodd (&B) == MP_YES) {
1160 /* A = (A+y)/2, B = (B-x)/2 */
1161 if ((res = mp_add (&A, &y, &A)) != MP_OKAY) {
1162 goto LBL_ERR;
1163 }
1164 if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
1165 goto LBL_ERR;
1166 }
1167 }
1168 /* A = A/2, B = B/2 */
1169 if ((res = mp_div_2 (&A, &A)) != MP_OKAY) {
1170 goto LBL_ERR;
1171 }
1172 if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
1173 goto LBL_ERR;
1174 }
1175 }
1176
1177 /* 5. while v is even do */
1178 while (mp_iseven (&v) == MP_YES) {
1179 /* 5.1 v = v/2 */
1180 if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
1181 goto LBL_ERR;
1182 }
1183 /* 5.2 if C or D is odd then */
1184 if (mp_isodd (&C) == MP_YES || mp_isodd (&D) == MP_YES) {
1185 /* C = (C+y)/2, D = (D-x)/2 */
1186 if ((res = mp_add (&C, &y, &C)) != MP_OKAY) {
1187 goto LBL_ERR;
1188 }
1189 if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
1190 goto LBL_ERR;
1191 }
1192 }
1193 /* C = C/2, D = D/2 */
1194 if ((res = mp_div_2 (&C, &C)) != MP_OKAY) {
1195 goto LBL_ERR;
1196 }
1197 if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
1198 goto LBL_ERR;
1199 }
1200 }
1201
1202 /* 6. if u >= v then */
1203 if (mp_cmp (&u, &v) != MP_LT) {
1204 /* u = u - v, A = A - C, B = B - D */
1205 if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
1206 goto LBL_ERR;
1207 }
1208
1209 if ((res = mp_sub (&A, &C, &A)) != MP_OKAY) {
1210 goto LBL_ERR;
1211 }
1212
1213 if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
1214 goto LBL_ERR;
1215 }
1216 } else {
1217 /* v - v - u, C = C - A, D = D - B */
1218 if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
1219 goto LBL_ERR;
1220 }
1221
1222 if ((res = mp_sub (&C, &A, &C)) != MP_OKAY) {
1223 goto LBL_ERR;
1224 }
1225
1226 if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
1227 goto LBL_ERR;
1228 }
1229 }
1230
1231 /* if not zero goto step 4 */
1232 if (mp_iszero (&u) == MP_NO)
1233 goto top;
1234
1235 /* now a = C, b = D, gcd == g*v */
1236
1237 /* if v != 1 then there is no inverse */
1238 if (mp_cmp_d (&v, 1) != MP_EQ) {
1239 res = MP_VAL;
1240 goto LBL_ERR;
1241 }
1242
1243 /* if its too low */
1244 while (mp_cmp_d(&C, 0) == MP_LT) {
1245 if ((res = mp_add(&C, b, &C)) != MP_OKAY) {
1246 goto LBL_ERR;
1247 }
1248 }
1249
1250 /* too big */
1251 while (mp_cmp_mag(&C, b) != MP_LT) {
1252 if ((res = mp_sub(&C, b, &C)) != MP_OKAY) {
1253 goto LBL_ERR;
1254 }
1255 }
1256
1257 /* C is now the inverse */
1258 mp_exch (&C, c);
1259 res = MP_OKAY;
1260LBL_ERR:mp_clear(&x);
1261 mp_clear(&y);
1262 mp_clear(&u);
1263 mp_clear(&v);
1264 mp_clear(&A);
1265 mp_clear(&B);
1266 mp_clear(&C);
1267 mp_clear(&D);
1268 return res;
1269}
1270
1271
1272/* compare magnitude of two ints (unsigned) */
1273int mp_cmp_mag (mp_int * a, mp_int * b)
1274{
1275 int n;
1276 mp_digit *tmpa, *tmpb;
1277
1278 /* compare based on # of non-zero digits */
1279 if (a->used > b->used) {
1280 return MP_GT;
1281 }
1282
1283 if (a->used < b->used) {
1284 return MP_LT;
1285 }
1286
1287 /* alias for a */
1288 tmpa = a->dp + (a->used - 1);
1289
1290 /* alias for b */
1291 tmpb = b->dp + (a->used - 1);
1292
1293 /* compare based on digits */
1294 for (n = 0; n < a->used; ++n, --tmpa, --tmpb) {
1295 if (*tmpa > *tmpb) {
1296 return MP_GT;
1297 }
1298
1299 if (*tmpa < *tmpb) {
1300 return MP_LT;
1301 }
1302 }
1303 return MP_EQ;
1304}
1305
1306
1307/* compare two ints (signed)*/
1308int mp_cmp (mp_int * a, mp_int * b)
1309{
1310 /* compare based on sign */
1311 if (a->sign != b->sign) {
1312 if (a->sign == MP_NEG) {
1313 return MP_LT;
1314 } else {
1315 return MP_GT;
1316 }
1317 }
1318
1319 /* compare digits */
1320 if (a->sign == MP_NEG) {
1321 /* if negative compare opposite direction */
1322 return mp_cmp_mag(b, a);
1323 } else {
1324 return mp_cmp_mag(a, b);
1325 }
1326}
1327
1328
1329/* compare a digit */
1330int mp_cmp_d(mp_int * a, mp_digit b)
1331{
1332 /* special case for zero*/
1333 if (a->used == 0 && b == 0)
1334 return MP_EQ;
1335
1336 /* compare based on sign */
1337 if ((b && a->used == 0) || a->sign == MP_NEG) {
1338 return MP_LT;
1339 }
1340
1341 /* compare based on magnitude */
1342 if (a->used > 1) {
1343 return MP_GT;
1344 }
1345
1346 /* compare the only digit of a to b */
1347 if (a->dp[0] > b) {
1348 return MP_GT;
1349 } else if (a->dp[0] < b) {
1350 return MP_LT;
1351 } else {
1352 return MP_EQ;
1353 }
1354}
1355
1356
1357/* set to a digit */
1358int mp_set (mp_int * a, mp_digit b)
1359{
1360 int res;
1361 mp_zero (a);
1362 res = mp_grow (a, 1);
1363 if (res == MP_OKAY) {
1364 a->dp[0] = (mp_digit)(b & MP_MASK);
1365 a->used = (a->dp[0] != 0) ? 1 : 0;
1366 }
1367 return res;
1368}
1369
1370/* chek if a bit is set */
1371int mp_is_bit_set (mp_int *a, mp_digit b)
1372{
1373 if ((mp_digit)a->used < b/DIGIT_BIT)
1374 return 0;
1375
1376 return (int)((a->dp[b/DIGIT_BIT] >> b%DIGIT_BIT) & (mp_digit)1);
1377}
1378
1379/* c = a mod b, 0 <= c < b */
1380#if defined(FREESCALE_LTC_TFM)
1381int wolfcrypt_mp_mod(mp_int * a, mp_int * b, mp_int * c)
1382#else
1383int mp_mod (mp_int * a, mp_int * b, mp_int * c)
1384#endif
1385{
1386 mp_int t;
1387 int res;
1388
1389 if ((res = mp_init_size (&t, b->used)) != MP_OKAY) {
1390 return res;
1391 }
1392
1393 if ((res = mp_div (a, b, NULL, &t)) != MP_OKAY) {
1394 mp_clear (&t);
1395 return res;
1396 }
1397
1398 if ((mp_iszero(&t) != MP_NO) || (t.sign == b->sign)) {
1399 res = MP_OKAY;
1400 mp_exch (&t, c);
1401 } else {
1402 res = mp_add (b, &t, c);
1403 }
1404
1405 mp_clear (&t);
1406 return res;
1407}
1408
1409
1410/* slower bit-bang division... also smaller */
1411int mp_div(mp_int * a, mp_int * b, mp_int * c, mp_int * d)
1412{
1413 mp_int ta, tb, tq, q;
1414 int res, n, n2;
1415
1416 /* is divisor zero ? */
1417 if (mp_iszero (b) == MP_YES) {
1418 return MP_VAL;
1419 }
1420
1421 /* if a < b then q=0, r = a */
1422 if (mp_cmp_mag (a, b) == MP_LT) {
1423 if (d != NULL) {
1424 res = mp_copy (a, d);
1425 } else {
1426 res = MP_OKAY;
1427 }
1428 if (c != NULL) {
1429 mp_zero (c);
1430 }
1431 return res;
1432 }
1433
1434 /* init our temps */
1435 if ((res = mp_init_multi(&ta, &tb, &tq, &q, 0, 0)) != MP_OKAY) {
1436 return res;
1437 }
1438
1439 if ((res = mp_set(&tq, 1)) != MP_OKAY) {
1440 return res;
1441 }
1442 n = mp_count_bits(a) - mp_count_bits(b);
1443 if (((res = mp_abs(a, &ta)) != MP_OKAY) ||
1444 ((res = mp_abs(b, &tb)) != MP_OKAY) ||
1445 ((res = mp_mul_2d(&tb, n, &tb)) != MP_OKAY) ||
1446 ((res = mp_mul_2d(&tq, n, &tq)) != MP_OKAY)) {
1447 goto LBL_ERR;
1448 }
1449
1450 while (n-- >= 0) {
1451 if (mp_cmp(&tb, &ta) != MP_GT) {
1452 if (((res = mp_sub(&ta, &tb, &ta)) != MP_OKAY) ||
1453 ((res = mp_add(&q, &tq, &q)) != MP_OKAY)) {
1454 goto LBL_ERR;
1455 }
1456 }
1457 if (((res = mp_div_2d(&tb, 1, &tb, NULL)) != MP_OKAY) ||
1458 ((res = mp_div_2d(&tq, 1, &tq, NULL)) != MP_OKAY)) {
1459 goto LBL_ERR;
1460 }
1461 }
1462
1463 /* now q == quotient and ta == remainder */
1464 n = a->sign;
1465 n2 = (a->sign == b->sign ? MP_ZPOS : MP_NEG);
1466 if (c != NULL) {
1467 mp_exch(c, &q);
1468 c->sign = (mp_iszero(c) == MP_YES) ? MP_ZPOS : n2;
1469 }
1470 if (d != NULL) {
1471 mp_exch(d, &ta);
1472 d->sign = (mp_iszero(d) == MP_YES) ? MP_ZPOS : n;
1473 }
1474LBL_ERR:
1475 mp_clear(&ta);
1476 mp_clear(&tb);
1477 mp_clear(&tq);
1478 mp_clear(&q);
1479 return res;
1480}
1481
1482
1483/* b = a/2 */
1484int mp_div_2(mp_int * a, mp_int * b)
1485{
1486 int x, res, oldused;
1487
1488 /* copy */
1489 if (b->alloc < a->used) {
1490 if ((res = mp_grow (b, a->used)) != MP_OKAY) {
1491 return res;
1492 }
1493 }
1494
1495 oldused = b->used;
1496 b->used = a->used;
1497 {
1498 mp_digit r, rr, *tmpa, *tmpb;
1499
1500 /* source alias */
1501 tmpa = a->dp + b->used - 1;
1502
1503 /* dest alias */
1504 tmpb = b->dp + b->used - 1;
1505
1506 /* carry */
1507 r = 0;
1508 for (x = b->used - 1; x >= 0; x--) {
1509 /* get the carry for the next iteration */
1510 rr = *tmpa & 1;
1511
1512 /* shift the current digit, add in carry and store */
1513 *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1));
1514
1515 /* forward carry to next iteration */
1516 r = rr;
1517 }
1518
1519 /* zero excess digits */
1520 tmpb = b->dp + b->used;
1521 for (x = b->used; x < oldused; x++) {
1522 *tmpb++ = 0;
1523 }
1524 }
1525 b->sign = a->sign;
1526 mp_clamp (b);
1527 return MP_OKAY;
1528}
1529
1530
1531/* high level addition (handles signs) */
1532int mp_add (mp_int * a, mp_int * b, mp_int * c)
1533{
1534 int sa, sb, res;
1535
1536 /* get sign of both inputs */
1537 sa = a->sign;
1538 sb = b->sign;
1539
1540 /* handle two cases, not four */
1541 if (sa == sb) {
1542 /* both positive or both negative */
1543 /* add their magnitudes, copy the sign */
1544 c->sign = sa;
1545 res = s_mp_add (a, b, c);
1546 } else {
1547 /* one positive, the other negative */
1548 /* subtract the one with the greater magnitude from */
1549 /* the one of the lesser magnitude. The result gets */
1550 /* the sign of the one with the greater magnitude. */
1551 if (mp_cmp_mag (a, b) == MP_LT) {
1552 c->sign = sb;
1553 res = s_mp_sub (b, a, c);
1554 } else {
1555 c->sign = sa;
1556 res = s_mp_sub (a, b, c);
1557 }
1558 }
1559 return res;
1560}
1561
1562
1563/* low level addition, based on HAC pp.594, Algorithm 14.7 */
1564int s_mp_add (mp_int * a, mp_int * b, mp_int * c)
1565{
1566 mp_int *x;
1567 int olduse, res, min_ab, max_ab;
1568
1569 /* find sizes, we let |a| <= |b| which means we have to sort
1570 * them. "x" will point to the input with the most digits
1571 */
1572 if (a->used > b->used) {
1573 min_ab = b->used;
1574 max_ab = a->used;
1575 x = a;
1576 } else {
1577 min_ab = a->used;
1578 max_ab = b->used;
1579 x = b;
1580 }
1581
1582 /* init result */
1583 if (c->alloc < max_ab + 1) {
1584 if ((res = mp_grow (c, max_ab + 1)) != MP_OKAY) {
1585 return res;
1586 }
1587 }
1588
1589 /* get old used digit count and set new one */
1590 olduse = c->used;
1591 c->used = max_ab + 1;
1592
1593 {
1594 mp_digit u, *tmpa, *tmpb, *tmpc;
1595 int i;
1596
1597 /* alias for digit pointers */
1598
1599 /* first input */
1600 tmpa = a->dp;
1601
1602 /* second input */
1603 tmpb = b->dp;
1604
1605 /* destination */
1606 tmpc = c->dp;
1607
1608 /* zero the carry */
1609 u = 0;
1610 for (i = 0; i < min_ab; i++) {
1611 /* Compute the sum at one digit, T[i] = A[i] + B[i] + U */
1612 *tmpc = *tmpa++ + *tmpb++ + u;
1613
1614 /* U = carry bit of T[i] */
1615 u = *tmpc >> ((mp_digit)DIGIT_BIT);
1616
1617 /* take away carry bit from T[i] */
1618 *tmpc++ &= MP_MASK;
1619 }
1620
1621 /* now copy higher words if any, that is in A+B
1622 * if A or B has more digits add those in
1623 */
1624 if (min_ab != max_ab) {
1625 for (; i < max_ab; i++) {
1626 /* T[i] = X[i] + U */
1627 *tmpc = x->dp[i] + u;
1628
1629 /* U = carry bit of T[i] */
1630 u = *tmpc >> ((mp_digit)DIGIT_BIT);
1631
1632 /* take away carry bit from T[i] */
1633 *tmpc++ &= MP_MASK;
1634 }
1635 }
1636
1637 /* add carry */
1638 *tmpc++ = u;
1639
1640 /* clear digits above olduse */
1641 for (i = c->used; i < olduse; i++) {
1642 *tmpc++ = 0;
1643 }
1644 }
1645
1646 mp_clamp (c);
1647 return MP_OKAY;
1648}
1649
1650
1651/* low level subtraction (assumes |a| > |b|), HAC pp.595 Algorithm 14.9 */
1652int s_mp_sub (mp_int * a, mp_int * b, mp_int * c)
1653{
1654 int olduse, res, min_b, max_a;
1655
1656 /* find sizes */
1657 min_b = b->used;
1658 max_a = a->used;
1659
1660 /* init result */
1661 if (c->alloc < max_a) {
1662 if ((res = mp_grow (c, max_a)) != MP_OKAY) {
1663 return res;
1664 }
1665 }
1666
1667 /* sanity check on destination */
1668 if (c->dp == NULL)
1669 return MP_VAL;
1670
1671 olduse = c->used;
1672 c->used = max_a;
1673
1674 {
1675 mp_digit u, *tmpa, *tmpb, *tmpc;
1676 int i;
1677
1678 /* alias for digit pointers */
1679 tmpa = a->dp;
1680 tmpb = b->dp;
1681 tmpc = c->dp;
1682
1683 /* set carry to zero */
1684 u = 0;
1685 for (i = 0; i < min_b; i++) {
1686 /* T[i] = A[i] - B[i] - U */
1687 *tmpc = *tmpa++ - *tmpb++ - u;
1688
1689 /* U = carry bit of T[i]
1690 * Note this saves performing an AND operation since
1691 * if a carry does occur it will propagate all the way to the
1692 * MSB. As a result a single shift is enough to get the carry
1693 */
1694 u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
1695
1696 /* Clear carry from T[i] */
1697 *tmpc++ &= MP_MASK;
1698 }
1699
1700 /* now copy higher words if any, e.g. if A has more digits than B */
1701 for (; i < max_a; i++) {
1702 /* T[i] = A[i] - U */
1703 *tmpc = *tmpa++ - u;
1704
1705 /* U = carry bit of T[i] */
1706 u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
1707
1708 /* Clear carry from T[i] */
1709 *tmpc++ &= MP_MASK;
1710 }
1711
1712 /* clear digits above used (since we may not have grown result above) */
1713 for (i = c->used; i < olduse; i++) {
1714 *tmpc++ = 0;
1715 }
1716 }
1717
1718 mp_clamp (c);
1719 return MP_OKAY;
1720}
1721
1722
1723/* high level subtraction (handles signs) */
1724int mp_sub (mp_int * a, mp_int * b, mp_int * c)
1725{
1726 int sa, sb, res;
1727
1728 sa = a->sign;
1729 sb = b->sign;
1730
1731 if (sa != sb) {
1732 /* subtract a negative from a positive, OR */
1733 /* subtract a positive from a negative. */
1734 /* In either case, ADD their magnitudes, */
1735 /* and use the sign of the first number. */
1736 c->sign = sa;
1737 res = s_mp_add (a, b, c);
1738 } else {
1739 /* subtract a positive from a positive, OR */
1740 /* subtract a negative from a negative. */
1741 /* First, take the difference between their */
1742 /* magnitudes, then... */
1743 if (mp_cmp_mag (a, b) != MP_LT) {
1744 /* Copy the sign from the first */
1745 c->sign = sa;
1746 /* The first has a larger or equal magnitude */
1747 res = s_mp_sub (a, b, c);
1748 } else {
1749 /* The result has the *opposite* sign from */
1750 /* the first number. */
1751 c->sign = (sa == MP_ZPOS) ? MP_NEG : MP_ZPOS;
1752 /* The second has a larger magnitude */
1753 res = s_mp_sub (b, a, c);
1754 }
1755 }
1756 return res;
1757}
1758
1759
1760/* determines if reduce_2k_l can be used */
1761int mp_reduce_is_2k_l(mp_int *a)
1762{
1763 int ix, iy;
1764
1765 if (a->used == 0) {
1766 return MP_NO;
1767 } else if (a->used == 1) {
1768 return MP_YES;
1769 } else if (a->used > 1) {
1770 /* if more than half of the digits are -1 we're sold */
1771 for (iy = ix = 0; ix < a->used; ix++) {
1772 if (a->dp[ix] == MP_MASK) {
1773 ++iy;
1774 }
1775 }
1776 return (iy >= (a->used/2)) ? MP_YES : MP_NO;
1777
1778 }
1779 return MP_NO;
1780}
1781
1782
1783/* determines if mp_reduce_2k can be used */
1784int mp_reduce_is_2k(mp_int *a)
1785{
1786 int ix, iy, iw;
1787 mp_digit iz;
1788
1789 if (a->used == 0) {
1790 return MP_NO;
1791 } else if (a->used == 1) {
1792 return MP_YES;
1793 } else if (a->used > 1) {
1794 iy = mp_count_bits(a);
1795 iz = 1;
1796 iw = 1;
1797
1798 /* Test every bit from the second digit up, must be 1 */
1799 for (ix = DIGIT_BIT; ix < iy; ix++) {
1800 if ((a->dp[iw] & iz) == 0) {
1801 return MP_NO;
1802 }
1803 iz <<= 1;
1804 if (iz > (mp_digit)MP_MASK) {
1805 ++iw;
1806 iz = 1;
1807 }
1808 }
1809 }
1810 return MP_YES;
1811}
1812
1813
1814/* determines if a number is a valid DR modulus */
1815int mp_dr_is_modulus(mp_int *a)
1816{
1817 int ix;
1818
1819 /* must be at least two digits */
1820 if (a->used < 2) {
1821 return 0;
1822 }
1823
1824 /* must be of the form b**k - a [a <= b] so all
1825 * but the first digit must be equal to -1 (mod b).
1826 */
1827 for (ix = 1; ix < a->used; ix++) {
1828 if (a->dp[ix] != MP_MASK) {
1829 return 0;
1830 }
1831 }
1832 return 1;
1833}
1834
1835
1836/* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85
1837 *
1838 * Uses a left-to-right k-ary sliding window to compute the modular
1839 * exponentiation.
1840 * The value of k changes based on the size of the exponent.
1841 *
1842 * Uses Montgomery or Diminished Radix reduction [whichever appropriate]
1843 */
1844
1845#ifdef MP_LOW_MEM
1846 #define TAB_SIZE 32
1847#else
1848 #define TAB_SIZE 256
1849#endif
1850
1851int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y,
1852 int redmode)
1853{
1854 mp_int res;
1855 mp_digit buf, mp;
1856 int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
1857#ifdef WOLFSSL_SMALL_STACK
1858 mp_int* M = NULL;
1859#else
1860 mp_int M[TAB_SIZE];
1861#endif
1862 /* use a pointer to the reduction algorithm. This allows us to use
1863 * one of many reduction algorithms without modding the guts of
1864 * the code with if statements everywhere.
1865 */
1866 int (*redux)(mp_int*,mp_int*,mp_digit);
1867
1868#ifdef WOLFSSL_SMALL_STACK
1869 M = (mp_int*) XMALLOC(sizeof(mp_int) * TAB_SIZE, NULL,
1870 DYNAMIC_TYPE_TMP_BUFFER);
1871 if (M == NULL)
1872 return MP_MEM;
1873#endif
1874
1875 /* find window size */
1876 x = mp_count_bits (X);
1877 if (x <= 7) {
1878 winsize = 2;
1879 } else if (x <= 36) {
1880 winsize = 3;
1881 } else if (x <= 140) {
1882 winsize = 4;
1883 } else if (x <= 450) {
1884 winsize = 5;
1885 } else if (x <= 1303) {
1886 winsize = 6;
1887 } else if (x <= 3529) {
1888 winsize = 7;
1889 } else {
1890 winsize = 8;
1891 }
1892
1893#ifdef MP_LOW_MEM
1894 if (winsize > 5) {
1895 winsize = 5;
1896 }
1897#endif
1898
1899 /* init M array */
1900 /* init first cell */
1901 if ((err = mp_init_size(&M[1], P->alloc)) != MP_OKAY) {
1902#ifdef WOLFSSL_SMALL_STACK
1903 XFREE(M, NULL, DYNAMIC_TYPE_TMP_BUFFER);
1904#endif
1905
1906 return err;
1907 }
1908
1909 /* now init the second half of the array */
1910 for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
1911 if ((err = mp_init_size(&M[x], P->alloc)) != MP_OKAY) {
1912 for (y = 1<<(winsize-1); y < x; y++) {
1913 mp_clear (&M[y]);
1914 }
1915 mp_clear(&M[1]);
1916
1917#ifdef WOLFSSL_SMALL_STACK
1918 XFREE(M, NULL, DYNAMIC_TYPE_TMP_BUFFER);
1919#endif
1920
1921 return err;
1922 }
1923 }
1924
1925 /* determine and setup reduction code */
1926 if (redmode == 0) {
1927#ifdef BN_MP_MONTGOMERY_SETUP_C
1928 /* now setup montgomery */
1929 if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) {
1930 goto LBL_M;
1931 }
1932#else
1933 err = MP_VAL;
1934 goto LBL_M;
1935#endif
1936
1937 /* automatically pick the comba one if available (saves quite a few
1938 calls/ifs) */
1939#ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C
1940 if (((P->used * 2 + 1) < MP_WARRAY) &&
1941 P->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
1942 redux = fast_mp_montgomery_reduce;
1943 } else
1944#endif
1945 {
1946#ifdef BN_MP_MONTGOMERY_REDUCE_C
1947 /* use slower baseline Montgomery method */
1948 redux = mp_montgomery_reduce;
1949#else
1950 err = MP_VAL;
1951 goto LBL_M;
1952#endif
1953 }
1954 } else if (redmode == 1) {
1955#if defined(BN_MP_DR_SETUP_C) && defined(BN_MP_DR_REDUCE_C)
1956 /* setup DR reduction for moduli of the form B**k - b */
1957 mp_dr_setup(P, &mp);
1958 redux = mp_dr_reduce;
1959#else
1960 err = MP_VAL;
1961 goto LBL_M;
1962#endif
1963 } else {
1964#if defined(BN_MP_REDUCE_2K_SETUP_C) && defined(BN_MP_REDUCE_2K_C)
1965 /* setup DR reduction for moduli of the form 2**k - b */
1966 if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) {
1967 goto LBL_M;
1968 }
1969 redux = mp_reduce_2k;
1970#else
1971 err = MP_VAL;
1972 goto LBL_M;
1973#endif
1974 }
1975
1976 /* setup result */
1977 if ((err = mp_init_size (&res, P->alloc)) != MP_OKAY) {
1978 goto LBL_M;
1979 }
1980
1981 /* create M table
1982 *
1983
1984 *
1985 * The first half of the table is not computed though accept for M[0] and M[1]
1986 */
1987
1988 if (redmode == 0) {
1989#ifdef BN_MP_MONTGOMERY_CALC_NORMALIZATION_C
1990 /* now we need R mod m */
1991 if ((err = mp_montgomery_calc_normalization (&res, P)) != MP_OKAY) {
1992 goto LBL_RES;
1993 }
1994
1995 /* now set M[1] to G * R mod m */
1996 if ((err = mp_mulmod (G, &res, P, &M[1])) != MP_OKAY) {
1997 goto LBL_RES;
1998 }
1999#else
2000 err = MP_VAL;
2001 goto LBL_RES;
2002#endif
2003 } else {
2004 if ((err = mp_set(&res, 1)) != MP_OKAY) {
2005 goto LBL_RES;
2006 }
2007 if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) {
2008 goto LBL_RES;
2009 }
2010 }
2011
2012 /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times*/
2013 if ((err = mp_copy (&M[1], &M[(mp_digit)(1 << (winsize - 1))])) != MP_OKAY) {
2014 goto LBL_RES;
2015 }
2016
2017 for (x = 0; x < (winsize - 1); x++) {
2018 if ((err = mp_sqr (&M[(mp_digit)(1 << (winsize - 1))],
2019 &M[(mp_digit)(1 << (winsize - 1))])) != MP_OKAY) {
2020 goto LBL_RES;
2021 }
2022 if ((err = redux (&M[(mp_digit)(1 << (winsize - 1))], P, mp)) != MP_OKAY) {
2023 goto LBL_RES;
2024 }
2025 }
2026
2027 /* create upper table */
2028 for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
2029 if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
2030 goto LBL_RES;
2031 }
2032 if ((err = redux (&M[x], P, mp)) != MP_OKAY) {
2033 goto LBL_RES;
2034 }
2035 }
2036
2037 /* set initial mode and bit cnt */
2038 mode = 0;
2039 bitcnt = 1;
2040 buf = 0;
2041 digidx = X->used - 1;
2042 bitcpy = 0;
2043 bitbuf = 0;
2044
2045 for (;;) {
2046 /* grab next digit as required */
2047 if (--bitcnt == 0) {
2048 /* if digidx == -1 we are out of digits so break */
2049 if (digidx == -1) {
2050 break;
2051 }
2052 /* read next digit and reset bitcnt */
2053 buf = X->dp[digidx--];
2054 bitcnt = (int)DIGIT_BIT;
2055 }
2056
2057 /* grab the next msb from the exponent */
2058 y = (int)(buf >> (DIGIT_BIT - 1)) & 1;
2059 buf <<= (mp_digit)1;
2060
2061 /* if the bit is zero and mode == 0 then we ignore it
2062 * These represent the leading zero bits before the first 1 bit
2063 * in the exponent. Technically this opt is not required but it
2064 * does lower the # of trivial squaring/reductions used
2065 */
2066 if (mode == 0 && y == 0) {
2067 continue;
2068 }
2069
2070 /* if the bit is zero and mode == 1 then we square */
2071 if (mode == 1 && y == 0) {
2072 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
2073 goto LBL_RES;
2074 }
2075 if ((err = redux (&res, P, mp)) != MP_OKAY) {
2076 goto LBL_RES;
2077 }
2078 continue;
2079 }
2080
2081 /* else we add it to the window */
2082 bitbuf |= (y << (winsize - ++bitcpy));
2083 mode = 2;
2084
2085 if (bitcpy == winsize) {
2086 /* ok window is filled so square as required and multiply */
2087 /* square first */
2088 for (x = 0; x < winsize; x++) {
2089 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
2090 goto LBL_RES;
2091 }
2092 if ((err = redux (&res, P, mp)) != MP_OKAY) {
2093 goto LBL_RES;
2094 }
2095 }
2096
2097 /* then multiply */
2098 if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
2099 goto LBL_RES;
2100 }
2101 if ((err = redux (&res, P, mp)) != MP_OKAY) {
2102 goto LBL_RES;
2103 }
2104
2105 /* empty window and reset */
2106 bitcpy = 0;
2107 bitbuf = 0;
2108 mode = 1;
2109 }
2110 }
2111
2112 /* if bits remain then square/multiply */
2113 if (mode == 2 && bitcpy > 0) {
2114 /* square then multiply if the bit is set */
2115 for (x = 0; x < bitcpy; x++) {
2116 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
2117 goto LBL_RES;
2118 }
2119 if ((err = redux (&res, P, mp)) != MP_OKAY) {
2120 goto LBL_RES;
2121 }
2122
2123 /* get next bit of the window */
2124 bitbuf <<= 1;
2125 if ((bitbuf & (1 << winsize)) != 0) {
2126 /* then multiply */
2127 if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
2128 goto LBL_RES;
2129 }
2130 if ((err = redux (&res, P, mp)) != MP_OKAY) {
2131 goto LBL_RES;
2132 }
2133 }
2134 }
2135 }
2136
2137 if (redmode == 0) {
2138 /* fixup result if Montgomery reduction is used
2139 * recall that any value in a Montgomery system is
2140 * actually multiplied by R mod n. So we have
2141 * to reduce one more time to cancel out the factor
2142 * of R.
2143 */
2144 if ((err = redux(&res, P, mp)) != MP_OKAY) {
2145 goto LBL_RES;
2146 }
2147 }
2148
2149 /* swap res with Y */
2150 mp_exch (&res, Y);
2151 err = MP_OKAY;
2152LBL_RES:mp_clear (&res);
2153LBL_M:
2154 mp_clear(&M[1]);
2155 for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
2156 mp_clear (&M[x]);
2157 }
2158
2159#ifdef WOLFSSL_SMALL_STACK
2160 XFREE(M, NULL, DYNAMIC_TYPE_TMP_BUFFER);
2161#endif
2162
2163 return err;
2164}
2165
2166
2167/* setups the montgomery reduction stuff */
2168int mp_montgomery_setup (mp_int * n, mp_digit * rho)
2169{
2170 mp_digit x, b;
2171
2172/* fast inversion mod 2**k
2173 *
2174 * Based on the fact that
2175 *
2176 * XA = 1 (mod 2**n) => (X(2-XA)) A = 1 (mod 2**2n)
2177 * => 2*X*A - X*X*A*A = 1
2178 * => 2*(1) - (1) = 1
2179 */
2180 b = n->dp[0];
2181
2182 if ((b & 1) == 0) {
2183 return MP_VAL;
2184 }
2185
2186 x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
2187 x *= 2 - b * x; /* here x*a==1 mod 2**8 */
2188#if !defined(MP_8BIT)
2189 x *= 2 - b * x; /* here x*a==1 mod 2**16 */
2190#endif
2191#if defined(MP_64BIT) || !(defined(MP_8BIT) || defined(MP_16BIT))
2192 x *= 2 - b * x; /* here x*a==1 mod 2**32 */
2193#endif
2194#ifdef MP_64BIT
2195 x *= 2 - b * x; /* here x*a==1 mod 2**64 */
2196#endif
2197
2198 /* rho = -1/m mod b */
2199 /* TAO, switched mp_word casts to mp_digit to shut up compiler */
2200 *rho = (mp_digit)((((mp_digit)1 << ((mp_digit) DIGIT_BIT)) - x) & MP_MASK);
2201
2202 return MP_OKAY;
2203}
2204
2205
2206/* computes xR**-1 == x (mod N) via Montgomery Reduction
2207 *
2208 * This is an optimized implementation of montgomery_reduce
2209 * which uses the comba method to quickly calculate the columns of the
2210 * reduction.
2211 *
2212 * Based on Algorithm 14.32 on pp.601 of HAC.
2213*/
2214int fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
2215{
2216 int ix, res, olduse;
2217#ifdef WOLFSSL_SMALL_STACK
2218 mp_word* W; /* uses dynamic memory and slower */
2219#else
2220 mp_word W[MP_WARRAY];
2221#endif
2222
2223 /* get old used count */
2224 olduse = x->used;
2225
2226 /* grow a as required */
2227 if (x->alloc < n->used + 1) {
2228 if ((res = mp_grow (x, n->used + 1)) != MP_OKAY) {
2229 return res;
2230 }
2231 }
2232
2233#ifdef WOLFSSL_SMALL_STACK
2234 W = (mp_word*)XMALLOC(sizeof(mp_word) * MP_WARRAY, NULL, DYNAMIC_TYPE_BIGINT);
2235 if (W == NULL)
2236 return MP_MEM;
2237#endif
2238
2239 /* first we have to get the digits of the input into
2240 * an array of double precision words W[...]
2241 */
2242 {
2243 mp_word *_W;
2244 mp_digit *tmpx;
2245
2246 /* alias for the W[] array */
2247 _W = W;
2248
2249 /* alias for the digits of x*/
2250 tmpx = x->dp;
2251
2252 /* copy the digits of a into W[0..a->used-1] */
2253 for (ix = 0; ix < x->used; ix++) {
2254 *_W++ = *tmpx++;
2255 }
2256
2257 /* zero the high words of W[a->used..m->used*2] */
2258 for (; ix < n->used * 2 + 1; ix++) {
2259 *_W++ = 0;
2260 }
2261 }
2262
2263 /* now we proceed to zero successive digits
2264 * from the least significant upwards
2265 */
2266 for (ix = 0; ix < n->used; ix++) {
2267 /* mu = ai * m' mod b
2268 *
2269 * We avoid a double precision multiplication (which isn't required)
2270 * by casting the value down to a mp_digit. Note this requires
2271 * that W[ix-1] have the carry cleared (see after the inner loop)
2272 */
2273 mp_digit mu;
2274 mu = (mp_digit) (((W[ix] & MP_MASK) * rho) & MP_MASK);
2275
2276 /* a = a + mu * m * b**i
2277 *
2278 * This is computed in place and on the fly. The multiplication
2279 * by b**i is handled by offseting which columns the results
2280 * are added to.
2281 *
2282 * Note the comba method normally doesn't handle carries in the
2283 * inner loop In this case we fix the carry from the previous
2284 * column since the Montgomery reduction requires digits of the
2285 * result (so far) [see above] to work. This is
2286 * handled by fixing up one carry after the inner loop. The
2287 * carry fixups are done in order so after these loops the
2288 * first m->used words of W[] have the carries fixed
2289 */
2290 {
2291 int iy;
2292 mp_digit *tmpn;
2293 mp_word *_W;
2294
2295 /* alias for the digits of the modulus */
2296 tmpn = n->dp;
2297
2298 /* Alias for the columns set by an offset of ix */
2299 _W = W + ix;
2300
2301 /* inner loop */
2302 for (iy = 0; iy < n->used; iy++) {
2303 *_W++ += ((mp_word)mu) * ((mp_word)*tmpn++);
2304 }
2305 }
2306
2307 /* now fix carry for next digit, W[ix+1] */
2308 W[ix + 1] += W[ix] >> ((mp_word) DIGIT_BIT);
2309 }
2310
2311 /* now we have to propagate the carries and
2312 * shift the words downward [all those least
2313 * significant digits we zeroed].
2314 */
2315 {
2316 mp_digit *tmpx;
2317 mp_word *_W, *_W1;
2318
2319 /* nox fix rest of carries */
2320
2321 /* alias for current word */
2322 _W1 = W + ix;
2323
2324 /* alias for next word, where the carry goes */
2325 _W = W + ++ix;
2326
2327 for (; ix <= n->used * 2 + 1; ix++) {
2328 *_W++ += *_W1++ >> ((mp_word) DIGIT_BIT);
2329 }
2330
2331 /* copy out, A = A/b**n
2332 *
2333 * The result is A/b**n but instead of converting from an
2334 * array of mp_word to mp_digit than calling mp_rshd
2335 * we just copy them in the right order
2336 */
2337
2338 /* alias for destination word */
2339 tmpx = x->dp;
2340
2341 /* alias for shifted double precision result */
2342 _W = W + n->used;
2343
2344 for (ix = 0; ix < n->used + 1; ix++) {
2345 *tmpx++ = (mp_digit)(*_W++ & ((mp_word) MP_MASK));
2346 }
2347
2348 /* zero olduse digits, if the input a was larger than
2349 * m->used+1 we'll have to clear the digits
2350 */
2351 for (; ix < olduse; ix++) {
2352 *tmpx++ = 0;
2353 }
2354 }
2355
2356 /* set the max used and clamp */
2357 x->used = n->used + 1;
2358 mp_clamp (x);
2359
2360#ifdef WOLFSSL_SMALL_STACK
2361 XFREE(W, NULL, DYNAMIC_TYPE_BIGINT);
2362#endif
2363
2364 /* if A >= m then A = A - m */
2365 if (mp_cmp_mag (x, n) != MP_LT) {
2366 return s_mp_sub (x, n, x);
2367 }
2368 return MP_OKAY;
2369}
2370
2371
2372/* computes xR**-1 == x (mod N) via Montgomery Reduction */
2373int mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
2374{
2375 int ix, res, digs;
2376 mp_digit mu;
2377
2378 /* can the fast reduction [comba] method be used?
2379 *
2380 * Note that unlike in mul you're safely allowed *less*
2381 * than the available columns [255 per default] since carries
2382 * are fixed up in the inner loop.
2383 */
2384 digs = n->used * 2 + 1;
2385 if ((digs < MP_WARRAY) &&
2386 n->used <
2387 (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
2388 return fast_mp_montgomery_reduce (x, n, rho);
2389 }
2390
2391 /* grow the input as required */
2392 if (x->alloc < digs) {
2393 if ((res = mp_grow (x, digs)) != MP_OKAY) {
2394 return res;
2395 }
2396 }
2397 x->used = digs;
2398
2399 for (ix = 0; ix < n->used; ix++) {
2400 /* mu = ai * rho mod b
2401 *
2402 * The value of rho must be precalculated via
2403 * montgomery_setup() such that
2404 * it equals -1/n0 mod b this allows the
2405 * following inner loop to reduce the
2406 * input one digit at a time
2407 */
2408 mu = (mp_digit) (((mp_word)x->dp[ix]) * ((mp_word)rho) & MP_MASK);
2409
2410 /* a = a + mu * m * b**i */
2411 {
2412 int iy;
2413 mp_digit *tmpn, *tmpx, u;
2414 mp_word r;
2415
2416 /* alias for digits of the modulus */
2417 tmpn = n->dp;
2418
2419 /* alias for the digits of x [the input] */
2420 tmpx = x->dp + ix;
2421
2422 /* set the carry to zero */
2423 u = 0;
2424
2425 /* Multiply and add in place */
2426 for (iy = 0; iy < n->used; iy++) {
2427 /* compute product and sum */
2428 r = ((mp_word)mu) * ((mp_word)*tmpn++) +
2429 ((mp_word) u) + ((mp_word) * tmpx);
2430
2431 /* get carry */
2432 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
2433
2434 /* fix digit */
2435 *tmpx++ = (mp_digit)(r & ((mp_word) MP_MASK));
2436 }
2437 /* At this point the ix'th digit of x should be zero */
2438
2439
2440 /* propagate carries upwards as required*/
2441 while (u) {
2442 *tmpx += u;
2443 u = *tmpx >> DIGIT_BIT;
2444 *tmpx++ &= MP_MASK;
2445 }
2446 }
2447 }
2448
2449 /* at this point the n.used'th least
2450 * significant digits of x are all zero
2451 * which means we can shift x to the
2452 * right by n.used digits and the
2453 * residue is unchanged.
2454 */
2455
2456 /* x = x/b**n.used */
2457 mp_clamp(x);
2458 mp_rshd (x, n->used);
2459
2460 /* if x >= n then x = x - n */
2461 if (mp_cmp_mag (x, n) != MP_LT) {
2462 return s_mp_sub (x, n, x);
2463 }
2464
2465 return MP_OKAY;
2466}
2467
2468
2469/* determines the setup value */
2470void mp_dr_setup(mp_int *a, mp_digit *d)
2471{
2472 /* the casts are required if DIGIT_BIT is one less than
2473 * the number of bits in a mp_digit [e.g. DIGIT_BIT==31]
2474 */
2475 *d = (mp_digit)((((mp_word)1) << ((mp_word)DIGIT_BIT)) -
2476 ((mp_word)a->dp[0]));
2477}
2478
2479
2480/* reduce "x" in place modulo "n" using the Diminished Radix algorithm.
2481 *
2482 * Based on algorithm from the paper
2483 *
2484 * "Generating Efficient Primes for Discrete Log Cryptosystems"
2485 * Chae Hoon Lim, Pil Joong Lee,
2486 * POSTECH Information Research Laboratories
2487 *
2488 * The modulus must be of a special format [see manual]
2489 *
2490 * Has been modified to use algorithm 7.10 from the LTM book instead
2491 *
2492 * Input x must be in the range 0 <= x <= (n-1)**2
2493 */
2494int mp_dr_reduce (mp_int * x, mp_int * n, mp_digit k)
2495{
2496 int err, i, m;
2497 mp_word r;
2498 mp_digit mu, *tmpx1, *tmpx2;
2499
2500 /* m = digits in modulus */
2501 m = n->used;
2502
2503 /* ensure that "x" has at least 2m digits */
2504 if (x->alloc < m + m) {
2505 if ((err = mp_grow (x, m + m)) != MP_OKAY) {
2506 return err;
2507 }
2508 }
2509
2510/* top of loop, this is where the code resumes if
2511 * another reduction pass is required.
2512 */
2513top:
2514 /* aliases for digits */
2515 /* alias for lower half of x */
2516 tmpx1 = x->dp;
2517
2518 /* alias for upper half of x, or x/B**m */
2519 tmpx2 = x->dp + m;
2520
2521 /* set carry to zero */
2522 mu = 0;
2523
2524 /* compute (x mod B**m) + k * [x/B**m] inline and inplace */
2525 for (i = 0; i < m; i++) {
2526 r = ((mp_word)*tmpx2++) * ((mp_word)k) + *tmpx1 + mu;
2527 *tmpx1++ = (mp_digit)(r & MP_MASK);
2528 mu = (mp_digit)(r >> ((mp_word)DIGIT_BIT));
2529 }
2530
2531 /* set final carry */
2532 *tmpx1++ = mu;
2533
2534 /* zero words above m */
2535 for (i = m + 1; i < x->used; i++) {
2536 *tmpx1++ = 0;
2537 }
2538
2539 /* clamp, sub and return */
2540 mp_clamp (x);
2541
2542 /* if x >= n then subtract and reduce again
2543 * Each successive "recursion" makes the input smaller and smaller.
2544 */
2545 if (mp_cmp_mag (x, n) != MP_LT) {
2546 if ((err = s_mp_sub(x, n, x)) != MP_OKAY) {
2547 return err;
2548 }
2549 goto top;
2550 }
2551 return MP_OKAY;
2552}
2553
2554
2555/* reduces a modulo n where n is of the form 2**p - d */
2556int mp_reduce_2k(mp_int *a, mp_int *n, mp_digit d)
2557{
2558 mp_int q;
2559 int p, res;
2560
2561 if ((res = mp_init(&q)) != MP_OKAY) {
2562 return res;
2563 }
2564
2565 p = mp_count_bits(n);
2566top:
2567 /* q = a/2**p, a = a mod 2**p */
2568 if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
2569 goto ERR;
2570 }
2571
2572 if (d != 1) {
2573 /* q = q * d */
2574 if ((res = mp_mul_d(&q, d, &q)) != MP_OKAY) {
2575 goto ERR;
2576 }
2577 }
2578
2579 /* a = a + q */
2580 if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
2581 goto ERR;
2582 }
2583
2584 if (mp_cmp_mag(a, n) != MP_LT) {
2585 if ((res = s_mp_sub(a, n, a)) != MP_OKAY) {
2586 goto ERR;
2587 }
2588 goto top;
2589 }
2590
2591ERR:
2592 mp_clear(&q);
2593 return res;
2594}
2595
2596
2597/* determines the setup value */
2598int mp_reduce_2k_setup(mp_int *a, mp_digit *d)
2599{
2600 int res, p;
2601 mp_int tmp;
2602
2603 if ((res = mp_init(&tmp)) != MP_OKAY) {
2604 return res;
2605 }
2606
2607 p = mp_count_bits(a);
2608 if ((res = mp_2expt(&tmp, p)) != MP_OKAY) {
2609 mp_clear(&tmp);
2610 return res;
2611 }
2612
2613 if ((res = s_mp_sub(&tmp, a, &tmp)) != MP_OKAY) {
2614 mp_clear(&tmp);
2615 return res;
2616 }
2617
2618 *d = tmp.dp[0];
2619 mp_clear(&tmp);
2620 return MP_OKAY;
2621}
2622
2623
2624/* set the b bit of a */
2625int mp_set_bit (mp_int * a, int b)
2626{
2627 int i = b / DIGIT_BIT, res;
2628
2629 if (a->used < (int)(i + 1)) {
2630 /* grow a to accommodate the single bit */
2631 if ((res = mp_grow (a, i + 1)) != MP_OKAY) {
2632 return res;
2633 }
2634
2635 /* set the used count of where the bit will go */
2636 a->used = (int)(i + 1);
2637 }
2638
2639 /* put the single bit in its place */
2640 a->dp[i] |= ((mp_digit)1) << (b % DIGIT_BIT);
2641
2642 return MP_OKAY;
2643}
2644
2645/* computes a = 2**b
2646 *
2647 * Simple algorithm which zeros the int, set the required bit
2648 */
2649int mp_2expt (mp_int * a, int b)
2650{
2651 /* zero a as per default */
2652 mp_zero (a);
2653
2654 return mp_set_bit(a, b);
2655}
2656
2657/* multiply by a digit */
2658int mp_mul_d (mp_int * a, mp_digit b, mp_int * c)
2659{
2660 mp_digit u, *tmpa, *tmpc;
2661 mp_word r;
2662 int ix, res, olduse;
2663
2664 /* make sure c is big enough to hold a*b */
2665 if (c->alloc < a->used + 1) {
2666 if ((res = mp_grow (c, a->used + 1)) != MP_OKAY) {
2667 return res;
2668 }
2669 }
2670
2671 /* get the original destinations used count */
2672 olduse = c->used;
2673
2674 /* set the sign */
2675 c->sign = a->sign;
2676
2677 /* alias for a->dp [source] */
2678 tmpa = a->dp;
2679
2680 /* alias for c->dp [dest] */
2681 tmpc = c->dp;
2682
2683 /* zero carry */
2684 u = 0;
2685
2686 /* compute columns */
2687 for (ix = 0; ix < a->used; ix++) {
2688 /* compute product and carry sum for this term */
2689 r = ((mp_word) u) + ((mp_word)*tmpa++) * ((mp_word)b);
2690
2691 /* mask off higher bits to get a single digit */
2692 *tmpc++ = (mp_digit) (r & ((mp_word) MP_MASK));
2693
2694 /* send carry into next iteration */
2695 u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
2696 }
2697
2698 /* store final carry [if any] and increment ix offset */
2699 *tmpc++ = u;
2700 ++ix;
2701
2702 /* now zero digits above the top */
2703 while (ix++ < olduse) {
2704 *tmpc++ = 0;
2705 }
2706
2707 /* set used count */
2708 c->used = a->used + 1;
2709 mp_clamp(c);
2710
2711 return MP_OKAY;
2712}
2713
2714
2715/* d = a * b (mod c) */
2716#if defined(FREESCALE_LTC_TFM)
2717int wolfcrypt_mp_mulmod(mp_int *a, mp_int *b, mp_int *c, mp_int *d)
2718#else
2719int mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
2720#endif
2721{
2722 int res;
2723 mp_int t;
2724
2725 if ((res = mp_init_size (&t, c->used)) != MP_OKAY) {
2726 return res;
2727 }
2728
2729 res = mp_mul (a, b, &t);
2730 if (res == MP_OKAY) {
2731 res = mp_mod (&t, c, d);
2732 }
2733
2734 mp_clear (&t);
2735 return res;
2736}
2737
2738
2739/* d = a - b (mod c) */
2740int mp_submod(mp_int* a, mp_int* b, mp_int* c, mp_int* d)
2741{
2742 int res;
2743 mp_int t;
2744
2745 if ((res = mp_init (&t)) != MP_OKAY) {
2746 return res;
2747 }
2748
2749 res = mp_sub (a, b, &t);
2750 if (res == MP_OKAY) {
2751 res = mp_mod (&t, c, d);
2752 }
2753
2754 mp_clear (&t);
2755
2756 return res;
2757}
2758
2759/* d = a + b (mod c) */
2760int mp_addmod(mp_int* a, mp_int* b, mp_int* c, mp_int* d)
2761{
2762 int res;
2763 mp_int t;
2764
2765 if ((res = mp_init (&t)) != MP_OKAY) {
2766 return res;
2767 }
2768
2769 res = mp_add (a, b, &t);
2770 if (res == MP_OKAY) {
2771 res = mp_mod (&t, c, d);
2772 }
2773
2774 mp_clear (&t);
2775
2776 return res;
2777}
2778
2779/* computes b = a*a */
2780int mp_sqr (mp_int * a, mp_int * b)
2781{
2782 int res;
2783
2784 {
2785#ifdef BN_FAST_S_MP_SQR_C
2786 /* can we use the fast comba multiplier? */
2787 if ((a->used * 2 + 1) < MP_WARRAY &&
2788 a->used <
2789 (1 << (sizeof(mp_word) * CHAR_BIT - 2*DIGIT_BIT - 1))) {
2790 res = fast_s_mp_sqr (a, b);
2791 } else
2792#endif
2793#ifdef BN_S_MP_SQR_C
2794 res = s_mp_sqr (a, b);
2795#else
2796 res = MP_VAL;
2797#endif
2798 }
2799 b->sign = MP_ZPOS;
2800 return res;
2801}
2802
2803
2804/* high level multiplication (handles sign) */
2805#if defined(FREESCALE_LTC_TFM)
2806int wolfcrypt_mp_mul(mp_int *a, mp_int *b, mp_int *c)
2807#else
2808int mp_mul (mp_int * a, mp_int * b, mp_int * c)
2809#endif
2810{
2811 int res, neg;
2812 neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
2813
2814 {
2815 /* can we use the fast multiplier?
2816 *
2817 * The fast multiplier can be used if the output will
2818 * have less than MP_WARRAY digits and the number of
2819 * digits won't affect carry propagation
2820 */
2821 int digs = a->used + b->used + 1;
2822
2823#ifdef BN_FAST_S_MP_MUL_DIGS_C
2824 if ((digs < MP_WARRAY) &&
2825 MIN(a->used, b->used) <=
2826 (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
2827 res = fast_s_mp_mul_digs (a, b, c, digs);
2828 } else
2829#endif
2830#ifdef BN_S_MP_MUL_DIGS_C
2831 res = s_mp_mul (a, b, c); /* uses s_mp_mul_digs */
2832#else
2833 res = MP_VAL;
2834#endif
2835
2836 }
2837 c->sign = (c->used > 0) ? neg : MP_ZPOS;
2838 return res;
2839}
2840
2841
2842/* b = a*2 */
2843int mp_mul_2(mp_int * a, mp_int * b)
2844{
2845 int x, res, oldused;
2846
2847 /* grow to accommodate result */
2848 if (b->alloc < a->used + 1) {
2849 if ((res = mp_grow (b, a->used + 1)) != MP_OKAY) {
2850 return res;
2851 }
2852 }
2853
2854 oldused = b->used;
2855 b->used = a->used;
2856
2857 {
2858 mp_digit r, rr, *tmpa, *tmpb;
2859
2860 /* alias for source */
2861 tmpa = a->dp;
2862
2863 /* alias for dest */
2864 tmpb = b->dp;
2865
2866 /* carry */
2867 r = 0;
2868 for (x = 0; x < a->used; x++) {
2869
2870 /* get what will be the *next* carry bit from the
2871 * MSB of the current digit
2872 */
2873 rr = *tmpa >> ((mp_digit)(DIGIT_BIT - 1));
2874
2875 /* now shift up this digit, add in the carry [from the previous] */
2876 *tmpb++ = (mp_digit)(((*tmpa++ << ((mp_digit)1)) | r) & MP_MASK);
2877
2878 /* copy the carry that would be from the source
2879 * digit into the next iteration
2880 */
2881 r = rr;
2882 }
2883
2884 /* new leading digit? */
2885 if (r != 0) {
2886 /* add a MSB which is always 1 at this point */
2887 *tmpb = 1;
2888 ++(b->used);
2889 }
2890
2891 /* now zero any excess digits on the destination
2892 * that we didn't write to
2893 */
2894 tmpb = b->dp + b->used;
2895 for (x = b->used; x < oldused; x++) {
2896 *tmpb++ = 0;
2897 }
2898 }
2899 b->sign = a->sign;
2900 return MP_OKAY;
2901}
2902
2903
2904/* divide by three (based on routine from MPI and the GMP manual) */
2905int mp_div_3 (mp_int * a, mp_int *c, mp_digit * d)
2906{
2907 mp_int q;
2908 mp_word w, t;
2909 mp_digit b;
2910 int res, ix;
2911
2912 /* b = 2**DIGIT_BIT / 3 */
2913 b = (mp_digit) ( (((mp_word)1) << ((mp_word)DIGIT_BIT)) / ((mp_word)3) );
2914
2915 if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
2916 return res;
2917 }
2918
2919 q.used = a->used;
2920 q.sign = a->sign;
2921 w = 0;
2922 for (ix = a->used - 1; ix >= 0; ix--) {
2923 w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]);
2924
2925 if (w >= 3) {
2926 /* multiply w by [1/3] */
2927 t = (w * ((mp_word)b)) >> ((mp_word)DIGIT_BIT);
2928
2929 /* now subtract 3 * [w/3] from w, to get the remainder */
2930 w -= t+t+t;
2931
2932 /* fixup the remainder as required since
2933 * the optimization is not exact.
2934 */
2935 while (w >= 3) {
2936 t += 1;
2937 w -= 3;
2938 }
2939 } else {
2940 t = 0;
2941 }
2942 q.dp[ix] = (mp_digit)t;
2943 }
2944
2945 /* [optional] store the remainder */
2946 if (d != NULL) {
2947 *d = (mp_digit)w;
2948 }
2949
2950 /* [optional] store the quotient */
2951 if (c != NULL) {
2952 mp_clamp(&q);
2953 mp_exch(&q, c);
2954 }
2955 mp_clear(&q);
2956
2957 return res;
2958}
2959
2960
2961/* init an mp_init for a given size */
2962int mp_init_size (mp_int * a, int size)
2963{
2964 int x;
2965
2966 /* pad size so there are always extra digits */
2967 size += (MP_PREC * 2) - (size % MP_PREC);
2968
2969 /* alloc mem */
2970 a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * size, NULL,
2971 DYNAMIC_TYPE_BIGINT);
2972 if (a->dp == NULL) {
2973 return MP_MEM;
2974 }
2975
2976 /* set the members */
2977 a->used = 0;
2978 a->alloc = size;
2979 a->sign = MP_ZPOS;
2980#ifdef HAVE_WOLF_BIGINT
2981 wc_bigint_init(&a->raw);
2982#endif
2983
2984 /* zero the digits */
2985 for (x = 0; x < size; x++) {
2986 a->dp[x] = 0;
2987 }
2988
2989 return MP_OKAY;
2990}
2991
2992
2993/* the jist of squaring...
2994 * you do like mult except the offset of the tmpx [one that
2995 * starts closer to zero] can't equal the offset of tmpy.
2996 * So basically you set up iy like before then you min it with
2997 * (ty-tx) so that it never happens. You double all those
2998 * you add in the inner loop
2999
3000After that loop you do the squares and add them in.
3001*/
3002
3003int fast_s_mp_sqr (mp_int * a, mp_int * b)
3004{
3005 int olduse, res, pa, ix, iz;
3006#ifdef WOLFSSL_SMALL_STACK
3007 mp_digit* W; /* uses dynamic memory and slower */
3008#else
3009 mp_digit W[MP_WARRAY];
3010#endif
3011 mp_digit *tmpx;
3012 mp_word W1;
3013
3014 /* grow the destination as required */
3015 pa = a->used + a->used;
3016 if (b->alloc < pa) {
3017 if ((res = mp_grow (b, pa)) != MP_OKAY) {
3018 return res;
3019 }
3020 }
3021
3022 if (pa > MP_WARRAY)
3023 return MP_RANGE; /* TAO range check */
3024
3025#ifdef WOLFSSL_SMALL_STACK
3026 W = (mp_digit*)XMALLOC(sizeof(mp_digit) * MP_WARRAY, NULL, DYNAMIC_TYPE_BIGINT);
3027 if (W == NULL)
3028 return MP_MEM;
3029#endif
3030
3031 /* number of output digits to produce */
3032 W1 = 0;
3033 for (ix = 0; ix < pa; ix++) {
3034 int tx, ty, iy;
3035 mp_word _W;
3036 mp_digit *tmpy;
3037
3038 /* clear counter */
3039 _W = 0;
3040
3041 /* get offsets into the two bignums */
3042 ty = MIN(a->used-1, ix);
3043 tx = ix - ty;
3044
3045 /* setup temp aliases */
3046 tmpx = a->dp + tx;
3047 tmpy = a->dp + ty;
3048
3049 /* this is the number of times the loop will iterate, essentially
3050 while (tx++ < a->used && ty-- >= 0) { ... }
3051 */
3052 iy = MIN(a->used-tx, ty+1);
3053
3054 /* now for squaring tx can never equal ty
3055 * we halve the distance since they approach at a rate of 2x
3056 * and we have to round because odd cases need to be executed
3057 */
3058 iy = MIN(iy, (ty-tx+1)>>1);
3059
3060 /* execute loop */
3061 for (iz = 0; iz < iy; iz++) {
3062 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
3063 }
3064
3065 /* double the inner product and add carry */
3066 _W = _W + _W + W1;
3067
3068 /* even columns have the square term in them */
3069 if ((ix&1) == 0) {
3070 _W += ((mp_word)a->dp[ix>>1])*((mp_word)a->dp[ix>>1]);
3071 }
3072
3073 /* store it */
3074 W[ix] = (mp_digit)(_W & MP_MASK);
3075
3076 /* make next carry */
3077 W1 = _W >> ((mp_word)DIGIT_BIT);
3078 }
3079
3080 /* setup dest */
3081 olduse = b->used;
3082 b->used = a->used+a->used;
3083
3084 {
3085 mp_digit *tmpb;
3086 tmpb = b->dp;
3087 for (ix = 0; ix < pa; ix++) {
3088 *tmpb++ = (mp_digit)(W[ix] & MP_MASK);
3089 }
3090
3091 /* clear unused digits [that existed in the old copy of c] */
3092 for (; ix < olduse; ix++) {
3093 *tmpb++ = 0;
3094 }
3095 }
3096 mp_clamp (b);
3097
3098#ifdef WOLFSSL_SMALL_STACK
3099 XFREE(W, NULL, DYNAMIC_TYPE_BIGINT);
3100#endif
3101
3102 return MP_OKAY;
3103}
3104
3105
3106/* Fast (comba) multiplier
3107 *
3108 * This is the fast column-array [comba] multiplier. It is
3109 * designed to compute the columns of the product first
3110 * then handle the carries afterwards. This has the effect
3111 * of making the nested loops that compute the columns very
3112 * simple and schedulable on super-scalar processors.
3113 *
3114 * This has been modified to produce a variable number of
3115 * digits of output so if say only a half-product is required
3116 * you don't have to compute the upper half (a feature
3117 * required for fast Barrett reduction).
3118 *
3119 * Based on Algorithm 14.12 on pp.595 of HAC.
3120 *
3121 */
3122int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
3123{
3124 int olduse, res, pa, ix, iz;
3125#ifdef WOLFSSL_SMALL_STACK
3126 mp_digit* W; /* uses dynamic memory and slower */
3127#else
3128 mp_digit W[MP_WARRAY];
3129#endif
3130 mp_word _W;
3131
3132 /* grow the destination as required */
3133 if (c->alloc < digs) {
3134 if ((res = mp_grow (c, digs)) != MP_OKAY) {
3135 return res;
3136 }
3137 }
3138
3139 /* number of output digits to produce */
3140 pa = MIN(digs, a->used + b->used);
3141 if (pa > MP_WARRAY)
3142 return MP_RANGE; /* TAO range check */
3143
3144#ifdef WOLFSSL_SMALL_STACK
3145 W = (mp_digit*)XMALLOC(sizeof(mp_digit) * MP_WARRAY, NULL, DYNAMIC_TYPE_BIGINT);
3146 if (W == NULL)
3147 return MP_MEM;
3148#endif
3149
3150 /* clear the carry */
3151 _W = 0;
3152 for (ix = 0; ix < pa; ix++) {
3153 int tx, ty;
3154 int iy;
3155 mp_digit *tmpx, *tmpy;
3156
3157 /* get offsets into the two bignums */
3158 ty = MIN(b->used-1, ix);
3159 tx = ix - ty;
3160
3161 /* setup temp aliases */
3162 tmpx = a->dp + tx;
3163 tmpy = b->dp + ty;
3164
3165 /* this is the number of times the loop will iterate, essentially
3166 while (tx++ < a->used && ty-- >= 0) { ... }
3167 */
3168 iy = MIN(a->used-tx, ty+1);
3169
3170 /* execute loop */
3171 for (iz = 0; iz < iy; ++iz) {
3172 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
3173
3174 }
3175
3176 /* store term */
3177 W[ix] = (mp_digit)(((mp_digit)_W) & MP_MASK);
3178
3179 /* make next carry */
3180 _W = _W >> ((mp_word)DIGIT_BIT);
3181 }
3182
3183 /* setup dest */
3184 olduse = c->used;
3185 c->used = pa;
3186
3187 {
3188 mp_digit *tmpc;
3189 tmpc = c->dp;
3190 for (ix = 0; ix < pa; ix++) { /* JRB, +1 could read uninitialized data */
3191 /* now extract the previous digit [below the carry] */
3192 *tmpc++ = W[ix];
3193 }
3194
3195 /* clear unused digits [that existed in the old copy of c] */
3196 for (; ix < olduse; ix++) {
3197 *tmpc++ = 0;
3198 }
3199 }
3200 mp_clamp (c);
3201
3202#ifdef WOLFSSL_SMALL_STACK
3203 XFREE(W, NULL, DYNAMIC_TYPE_BIGINT);
3204#endif
3205
3206 return MP_OKAY;
3207}
3208
3209
3210/* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */
3211int s_mp_sqr (mp_int * a, mp_int * b)
3212{
3213 mp_int t;
3214 int res, ix, iy, pa;
3215 mp_word r;
3216 mp_digit u, tmpx, *tmpt;
3217
3218 pa = a->used;
3219 if ((res = mp_init_size (&t, 2*pa + 1)) != MP_OKAY) {
3220 return res;
3221 }
3222
3223 /* default used is maximum possible size */
3224 t.used = 2*pa + 1;
3225
3226 for (ix = 0; ix < pa; ix++) {
3227 /* first calculate the digit at 2*ix */
3228 /* calculate double precision result */
3229 r = ((mp_word) t.dp[2*ix]) +
3230 ((mp_word)a->dp[ix])*((mp_word)a->dp[ix]);
3231
3232 /* store lower part in result */
3233 t.dp[ix+ix] = (mp_digit) (r & ((mp_word) MP_MASK));
3234
3235 /* get the carry */
3236 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
3237
3238 /* left hand side of A[ix] * A[iy] */
3239 tmpx = a->dp[ix];
3240
3241 /* alias for where to store the results */
3242 tmpt = t.dp + (2*ix + 1);
3243
3244 for (iy = ix + 1; iy < pa; iy++) {
3245 /* first calculate the product */
3246 r = ((mp_word)tmpx) * ((mp_word)a->dp[iy]);
3247
3248 /* now calculate the double precision result, note we use
3249 * addition instead of *2 since it's easier to optimize
3250 */
3251 r = ((mp_word) *tmpt) + r + r + ((mp_word) u);
3252
3253 /* store lower part */
3254 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
3255
3256 /* get carry */
3257 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
3258 }
3259 /* propagate upwards */
3260 while (u != ((mp_digit) 0)) {
3261 r = ((mp_word) *tmpt) + ((mp_word) u);
3262 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
3263 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
3264 }
3265 }
3266
3267 mp_clamp (&t);
3268 mp_exch (&t, b);
3269 mp_clear (&t);
3270 return MP_OKAY;
3271}
3272
3273
3274/* multiplies |a| * |b| and only computes up to digs digits of result
3275 * HAC pp. 595, Algorithm 14.12 Modified so you can control how
3276 * many digits of output are created.
3277 */
3278int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
3279{
3280 mp_int t;
3281 int res, pa, pb, ix, iy;
3282 mp_digit u;
3283 mp_word r;
3284 mp_digit tmpx, *tmpt, *tmpy;
3285
3286 /* can we use the fast multiplier? */
3287 if (((digs) < MP_WARRAY) &&
3288 MIN (a->used, b->used) <
3289 (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
3290 return fast_s_mp_mul_digs (a, b, c, digs);
3291 }
3292
3293 if ((res = mp_init_size (&t, digs)) != MP_OKAY) {
3294 return res;
3295 }
3296 t.used = digs;
3297
3298 /* compute the digits of the product directly */
3299 pa = a->used;
3300 for (ix = 0; ix < pa; ix++) {
3301 /* set the carry to zero */
3302 u = 0;
3303
3304 /* limit ourselves to making digs digits of output */
3305 pb = MIN (b->used, digs - ix);
3306
3307 /* setup some aliases */
3308 /* copy of the digit from a used within the nested loop */
3309 tmpx = a->dp[ix];
3310
3311 /* an alias for the destination shifted ix places */
3312 tmpt = t.dp + ix;
3313
3314 /* an alias for the digits of b */
3315 tmpy = b->dp;
3316
3317 /* compute the columns of the output and propagate the carry */
3318 for (iy = 0; iy < pb; iy++) {
3319 /* compute the column as a mp_word */
3320 r = ((mp_word)*tmpt) +
3321 ((mp_word)tmpx) * ((mp_word)*tmpy++) +
3322 ((mp_word) u);
3323
3324 /* the new column is the lower part of the result */
3325 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
3326
3327 /* get the carry word from the result */
3328 u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
3329 }
3330 /* set carry if it is placed below digs */
3331 if (ix + iy < digs) {
3332 *tmpt = u;
3333 }
3334 }
3335
3336 mp_clamp (&t);
3337 mp_exch (&t, c);
3338
3339 mp_clear (&t);
3340 return MP_OKAY;
3341}
3342
3343
3344/*
3345 * shifts with subtractions when the result is greater than b.
3346 *
3347 * The method is slightly modified to shift B unconditionally up to just under
3348 * the leading bit of b. This saves a lot of multiple precision shifting.
3349 */
3350int mp_montgomery_calc_normalization (mp_int * a, mp_int * b)
3351{
3352 int x, bits, res;
3353
3354 /* how many bits of last digit does b use */
3355 bits = mp_count_bits (b) % DIGIT_BIT;
3356
3357 if (b->used > 1) {
3358 if ((res = mp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1))
3359 != MP_OKAY) {
3360 return res;
3361 }
3362 } else {
3363 if ((res = mp_set(a, 1)) != MP_OKAY) {
3364 return res;
3365 }
3366 bits = 1;
3367 }
3368
3369 /* now compute C = A * B mod b */
3370 for (x = bits - 1; x < (int)DIGIT_BIT; x++) {
3371 if ((res = mp_mul_2 (a, a)) != MP_OKAY) {
3372 return res;
3373 }
3374 if (mp_cmp_mag (a, b) != MP_LT) {
3375 if ((res = s_mp_sub (a, b, a)) != MP_OKAY) {
3376 return res;
3377 }
3378 }
3379 }
3380
3381 return MP_OKAY;
3382}
3383
3384
3385#ifdef MP_LOW_MEM
3386 #define TAB_SIZE 32
3387#else
3388 #define TAB_SIZE 256
3389#endif
3390
3391int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
3392{
3393 mp_int M[TAB_SIZE], res, mu;
3394 mp_digit buf;
3395 int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
3396 int (*redux)(mp_int*,mp_int*,mp_int*);
3397
3398 /* find window size */
3399 x = mp_count_bits (X);
3400 if (x <= 7) {
3401 winsize = 2;
3402 } else if (x <= 36) {
3403 winsize = 3;
3404 } else if (x <= 140) {
3405 winsize = 4;
3406 } else if (x <= 450) {
3407 winsize = 5;
3408 } else if (x <= 1303) {
3409 winsize = 6;
3410 } else if (x <= 3529) {
3411 winsize = 7;
3412 } else {
3413 winsize = 8;
3414 }
3415
3416#ifdef MP_LOW_MEM
3417 if (winsize > 5) {
3418 winsize = 5;
3419 }
3420#endif
3421
3422 /* init M array */
3423 /* init first cell */
3424 if ((err = mp_init(&M[1])) != MP_OKAY) {
3425 return err;
3426 }
3427
3428 /* now init the second half of the array */
3429 for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
3430 if ((err = mp_init(&M[x])) != MP_OKAY) {
3431 for (y = 1<<(winsize-1); y < x; y++) {
3432 mp_clear (&M[y]);
3433 }
3434 mp_clear(&M[1]);
3435 return err;
3436 }
3437 }
3438
3439 /* create mu, used for Barrett reduction */
3440 if ((err = mp_init (&mu)) != MP_OKAY) {
3441 goto LBL_M;
3442 }
3443
3444 if (redmode == 0) {
3445 if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) {
3446 goto LBL_MU;
3447 }
3448 redux = mp_reduce;
3449 } else {
3450 if ((err = mp_reduce_2k_setup_l (P, &mu)) != MP_OKAY) {
3451 goto LBL_MU;
3452 }
3453 redux = mp_reduce_2k_l;
3454 }
3455
3456 /* create M table
3457 *
3458 * The M table contains powers of the base,
3459 * e.g. M[x] = G**x mod P
3460 *
3461 * The first half of the table is not
3462 * computed though accept for M[0] and M[1]
3463 */
3464 if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) {
3465 goto LBL_MU;
3466 }
3467
3468 /* compute the value at M[1<<(winsize-1)] by squaring
3469 * M[1] (winsize-1) times
3470 */
3471 if ((err = mp_copy (&M[1], &M[(mp_digit)(1 << (winsize - 1))])) != MP_OKAY) {
3472 goto LBL_MU;
3473 }
3474
3475 for (x = 0; x < (winsize - 1); x++) {
3476 /* square it */
3477 if ((err = mp_sqr (&M[(mp_digit)(1 << (winsize - 1))],
3478 &M[(mp_digit)(1 << (winsize - 1))])) != MP_OKAY) {
3479 goto LBL_MU;
3480 }
3481
3482 /* reduce modulo P */
3483 if ((err = redux (&M[(mp_digit)(1 << (winsize - 1))], P, &mu)) != MP_OKAY) {
3484 goto LBL_MU;
3485 }
3486 }
3487
3488 /* create upper table, that is M[x] = M[x-1] * M[1] (mod P)
3489 * for x = (2**(winsize - 1) + 1) to (2**winsize - 1)
3490 */
3491 for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
3492 if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
3493 goto LBL_MU;
3494 }
3495 if ((err = redux (&M[x], P, &mu)) != MP_OKAY) {
3496 goto LBL_MU;
3497 }
3498 }
3499
3500 /* setup result */
3501 if ((err = mp_init (&res)) != MP_OKAY) {
3502 goto LBL_MU;
3503 }
3504 if ((err = mp_set (&res, 1)) != MP_OKAY) {
3505 goto LBL_MU;
3506 }
3507
3508 /* set initial mode and bit cnt */
3509 mode = 0;
3510 bitcnt = 1;
3511 buf = 0;
3512 digidx = X->used - 1;
3513 bitcpy = 0;
3514 bitbuf = 0;
3515
3516 for (;;) {
3517 /* grab next digit as required */
3518 if (--bitcnt == 0) {
3519 /* if digidx == -1 we are out of digits */
3520 if (digidx == -1) {
3521 break;
3522 }
3523 /* read next digit and reset the bitcnt */
3524 buf = X->dp[digidx--];
3525 bitcnt = (int) DIGIT_BIT;
3526 }
3527
3528 /* grab the next msb from the exponent */
3529 y = (int)(buf >> (mp_digit)(DIGIT_BIT - 1)) & 1;
3530 buf <<= (mp_digit)1;
3531
3532 /* if the bit is zero and mode == 0 then we ignore it
3533 * These represent the leading zero bits before the first 1 bit
3534 * in the exponent. Technically this opt is not required but it
3535 * does lower the # of trivial squaring/reductions used
3536 */
3537 if (mode == 0 && y == 0) {
3538 continue;
3539 }
3540
3541 /* if the bit is zero and mode == 1 then we square */
3542 if (mode == 1 && y == 0) {
3543 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
3544 goto LBL_RES;
3545 }
3546 if ((err = redux (&res, P, &mu)) != MP_OKAY) {
3547 goto LBL_RES;
3548 }
3549 continue;
3550 }
3551
3552 /* else we add it to the window */
3553 bitbuf |= (y << (winsize - ++bitcpy));
3554 mode = 2;
3555
3556 if (bitcpy == winsize) {
3557 /* ok window is filled so square as required and multiply */
3558 /* square first */
3559 for (x = 0; x < winsize; x++) {
3560 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
3561 goto LBL_RES;
3562 }
3563 if ((err = redux (&res, P, &mu)) != MP_OKAY) {
3564 goto LBL_RES;
3565 }
3566 }
3567
3568 /* then multiply */
3569 if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
3570 goto LBL_RES;
3571 }
3572 if ((err = redux (&res, P, &mu)) != MP_OKAY) {
3573 goto LBL_RES;
3574 }
3575
3576 /* empty window and reset */
3577 bitcpy = 0;
3578 bitbuf = 0;
3579 mode = 1;
3580 }
3581 }
3582
3583 /* if bits remain then square/multiply */
3584 if (mode == 2 && bitcpy > 0) {
3585 /* square then multiply if the bit is set */
3586 for (x = 0; x < bitcpy; x++) {
3587 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
3588 goto LBL_RES;
3589 }
3590 if ((err = redux (&res, P, &mu)) != MP_OKAY) {
3591 goto LBL_RES;
3592 }
3593
3594 bitbuf <<= 1;
3595 if ((bitbuf & (1 << winsize)) != 0) {
3596 /* then multiply */
3597 if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
3598 goto LBL_RES;
3599 }
3600 if ((err = redux (&res, P, &mu)) != MP_OKAY) {
3601 goto LBL_RES;
3602 }
3603 }
3604 }
3605 }
3606
3607 mp_exch (&res, Y);
3608 err = MP_OKAY;
3609LBL_RES:mp_clear (&res);
3610LBL_MU:mp_clear (&mu);
3611LBL_M:
3612 mp_clear(&M[1]);
3613 for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
3614 mp_clear (&M[x]);
3615 }
3616 return err;
3617}
3618
3619
3620/* pre-calculate the value required for Barrett reduction
3621 * For a given modulus "b" it calculates the value required in "a"
3622 */
3623int mp_reduce_setup (mp_int * a, mp_int * b)
3624{
3625 int res;
3626
3627 if ((res = mp_2expt (a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) {
3628 return res;
3629 }
3630 return mp_div (a, b, a, NULL);
3631}
3632
3633
3634/* reduces x mod m, assumes 0 < x < m**2, mu is
3635 * precomputed via mp_reduce_setup.
3636 * From HAC pp.604 Algorithm 14.42
3637 */
3638int mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
3639{
3640 mp_int q;
3641 int res, um = m->used;
3642
3643 /* q = x */
3644 if ((res = mp_init_copy (&q, x)) != MP_OKAY) {
3645 return res;
3646 }
3647
3648 /* q1 = x / b**(k-1) */
3649 mp_rshd (&q, um - 1);
3650
3651 /* according to HAC this optimization is ok */
3652 if (((mp_word) um) > (((mp_digit)1) << (DIGIT_BIT - 1))) {
3653 if ((res = mp_mul (&q, mu, &q)) != MP_OKAY) {
3654 goto CLEANUP;
3655 }
3656 } else {
3657#ifdef BN_S_MP_MUL_HIGH_DIGS_C
3658 if ((res = s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
3659 goto CLEANUP;
3660 }
3661#elif defined(BN_FAST_S_MP_MUL_HIGH_DIGS_C)
3662 if ((res = fast_s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
3663 goto CLEANUP;
3664 }
3665#else
3666 {
3667 res = MP_VAL;
3668 goto CLEANUP;
3669 }
3670#endif
3671 }
3672
3673 /* q3 = q2 / b**(k+1) */
3674 mp_rshd (&q, um + 1);
3675
3676 /* x = x mod b**(k+1), quick (no division) */
3677 if ((res = mp_mod_2d (x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) {
3678 goto CLEANUP;
3679 }
3680
3681 /* q = q * m mod b**(k+1), quick (no division) */
3682 if ((res = s_mp_mul_digs (&q, m, &q, um + 1)) != MP_OKAY) {
3683 goto CLEANUP;
3684 }
3685
3686 /* x = x - q */
3687 if ((res = mp_sub (x, &q, x)) != MP_OKAY) {
3688 goto CLEANUP;
3689 }
3690
3691 /* If x < 0, add b**(k+1) to it */
3692 if (mp_cmp_d (x, 0) == MP_LT) {
3693 if ((res = mp_set (&q, 1)) != MP_OKAY)
3694 goto CLEANUP;
3695 if ((res = mp_lshd (&q, um + 1)) != MP_OKAY)
3696 goto CLEANUP;
3697 if ((res = mp_add (x, &q, x)) != MP_OKAY)
3698 goto CLEANUP;
3699 }
3700
3701 /* Back off if it's too big */
3702 while (mp_cmp (x, m) != MP_LT) {
3703 if ((res = s_mp_sub (x, m, x)) != MP_OKAY) {
3704 goto CLEANUP;
3705 }
3706 }
3707
3708CLEANUP:
3709 mp_clear (&q);
3710
3711 return res;
3712}
3713
3714
3715/* reduces a modulo n where n is of the form 2**p - d
3716 This differs from reduce_2k since "d" can be larger
3717 than a single digit.
3718*/
3719int mp_reduce_2k_l(mp_int *a, mp_int *n, mp_int *d)
3720{
3721 mp_int q;
3722 int p, res;
3723
3724 if ((res = mp_init(&q)) != MP_OKAY) {
3725 return res;
3726 }
3727
3728 p = mp_count_bits(n);
3729top:
3730 /* q = a/2**p, a = a mod 2**p */
3731 if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
3732 goto ERR;
3733 }
3734
3735 /* q = q * d */
3736 if ((res = mp_mul(&q, d, &q)) != MP_OKAY) {
3737 goto ERR;
3738 }
3739
3740 /* a = a + q */
3741 if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
3742 goto ERR;
3743 }
3744
3745 if (mp_cmp_mag(a, n) != MP_LT) {
3746 if ((res = s_mp_sub(a, n, a)) != MP_OKAY) {
3747 goto ERR;
3748 }
3749 goto top;
3750 }
3751
3752ERR:
3753 mp_clear(&q);
3754 return res;
3755}
3756
3757
3758/* determines the setup value */
3759int mp_reduce_2k_setup_l(mp_int *a, mp_int *d)
3760{
3761 int res;
3762 mp_int tmp;
3763
3764 if ((res = mp_init(&tmp)) != MP_OKAY) {
3765 return res;
3766 }
3767
3768 if ((res = mp_2expt(&tmp, mp_count_bits(a))) != MP_OKAY) {
3769 goto ERR;
3770 }
3771
3772 if ((res = s_mp_sub(&tmp, a, d)) != MP_OKAY) {
3773 goto ERR;
3774 }
3775
3776ERR:
3777 mp_clear(&tmp);
3778 return res;
3779}
3780
3781
3782/* multiplies |a| * |b| and does not compute the lower digs digits
3783 * [meant to get the higher part of the product]
3784 */
3785int s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
3786{
3787 mp_int t;
3788 int res, pa, pb, ix, iy;
3789 mp_digit u;
3790 mp_word r;
3791 mp_digit tmpx, *tmpt, *tmpy;
3792
3793 /* can we use the fast multiplier? */
3794#ifdef BN_FAST_S_MP_MUL_HIGH_DIGS_C
3795 if (((a->used + b->used + 1) < MP_WARRAY)
3796 && MIN (a->used, b->used) <
3797 (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
3798 return fast_s_mp_mul_high_digs (a, b, c, digs);
3799 }
3800#endif
3801
3802 if ((res = mp_init_size (&t, a->used + b->used + 1)) != MP_OKAY) {
3803 return res;
3804 }
3805 t.used = a->used + b->used + 1;
3806
3807 pa = a->used;
3808 pb = b->used;
3809 for (ix = 0; ix < pa && a->dp; ix++) {
3810 /* clear the carry */
3811 u = 0;
3812
3813 /* left hand side of A[ix] * B[iy] */
3814 tmpx = a->dp[ix];
3815
3816 /* alias to the address of where the digits will be stored */
3817 tmpt = &(t.dp[digs]);
3818
3819 /* alias for where to read the right hand side from */
3820 tmpy = b->dp + (digs - ix);
3821
3822 for (iy = digs - ix; iy < pb; iy++) {
3823 /* calculate the double precision result */
3824 r = ((mp_word)*tmpt) +
3825 ((mp_word)tmpx) * ((mp_word)*tmpy++) +
3826 ((mp_word) u);
3827
3828 /* get the lower part */
3829 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
3830
3831 /* carry the carry */
3832 u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
3833 }
3834 *tmpt = u;
3835 }
3836 mp_clamp (&t);
3837 mp_exch (&t, c);
3838 mp_clear (&t);
3839 return MP_OKAY;
3840}
3841
3842
3843/* this is a modified version of fast_s_mul_digs that only produces
3844 * output digits *above* digs. See the comments for fast_s_mul_digs
3845 * to see how it works.
3846 *
3847 * This is used in the Barrett reduction since for one of the multiplications
3848 * only the higher digits were needed. This essentially halves the work.
3849 *
3850 * Based on Algorithm 14.12 on pp.595 of HAC.
3851 */
3852int fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
3853{
3854 int olduse, res, pa, ix, iz;
3855#ifdef WOLFSSL_SMALL_STACK
3856 mp_digit* W; /* uses dynamic memory and slower */
3857#else
3858 mp_digit W[MP_WARRAY];
3859#endif
3860 mp_word _W;
3861
3862 if (a->dp == NULL) { /* JRB, avoid reading uninitialized values */
3863 return MP_VAL;
3864 }
3865
3866 /* grow the destination as required */
3867 pa = a->used + b->used;
3868 if (c->alloc < pa) {
3869 if ((res = mp_grow (c, pa)) != MP_OKAY) {
3870 return res;
3871 }
3872 }
3873
3874 if (pa > MP_WARRAY)
3875 return MP_RANGE; /* TAO range check */
3876
3877#ifdef WOLFSSL_SMALL_STACK
3878 W = (mp_digit*)XMALLOC(sizeof(mp_digit) * MP_WARRAY, NULL, DYNAMIC_TYPE_BIGINT);
3879 if (W == NULL)
3880 return MP_MEM;
3881#endif
3882
3883 /* number of output digits to produce */
3884 pa = a->used + b->used;
3885 _W = 0;
3886 for (ix = digs; ix < pa; ix++) { /* JRB, have a->dp check at top of function*/
3887 int tx, ty, iy;
3888 mp_digit *tmpx, *tmpy;
3889
3890 /* get offsets into the two bignums */
3891 ty = MIN(b->used-1, ix);
3892 tx = ix - ty;
3893
3894 /* setup temp aliases */
3895 tmpx = a->dp + tx;
3896 tmpy = b->dp + ty;
3897
3898 /* this is the number of times the loop will iterate, essentially its
3899 while (tx++ < a->used && ty-- >= 0) { ... }
3900 */
3901 iy = MIN(a->used-tx, ty+1);
3902
3903 /* execute loop */
3904 for (iz = 0; iz < iy; iz++) {
3905 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
3906 }
3907
3908 /* store term */
3909 W[ix] = (mp_digit)(((mp_digit)_W) & MP_MASK);
3910
3911 /* make next carry */
3912 _W = _W >> ((mp_word)DIGIT_BIT);
3913 }
3914
3915 /* setup dest */
3916 olduse = c->used;
3917 c->used = pa;
3918
3919 {
3920 mp_digit *tmpc;
3921
3922 tmpc = c->dp + digs;
3923 for (ix = digs; ix < pa; ix++) { /* TAO, <= could potentially overwrite */
3924 /* now extract the previous digit [below the carry] */
3925 *tmpc++ = W[ix];
3926 }
3927
3928 /* clear unused digits [that existed in the old copy of c] */
3929 for (; ix < olduse; ix++) {
3930 *tmpc++ = 0;
3931 }
3932 }
3933 mp_clamp (c);
3934
3935#ifdef WOLFSSL_SMALL_STACK
3936 XFREE(W, NULL, DYNAMIC_TYPE_BIGINT);
3937#endif
3938
3939 return MP_OKAY;
3940}
3941
3942
3943#ifndef MP_SET_CHUNK_BITS
3944 #define MP_SET_CHUNK_BITS 4
3945#endif
3946int mp_set_int (mp_int * a, unsigned long b)
3947{
3948 int x, res;
3949
3950 /* use direct mp_set if b is less than mp_digit max */
3951 if (b < MP_DIGIT_MAX) {
3952 return mp_set (a, (mp_digit)b);
3953 }
3954
3955 mp_zero (a);
3956
3957 /* set chunk bits at a time */
3958 for (x = 0; x < (int)(sizeof(b) * 8) / MP_SET_CHUNK_BITS; x++) {
3959 /* shift the number up chunk bits */
3960 if ((res = mp_mul_2d (a, MP_SET_CHUNK_BITS, a)) != MP_OKAY) {
3961 return res;
3962 }
3963
3964 /* OR in the top bits of the source */
3965 a->dp[0] |= (b >> ((sizeof(b) * 8) - MP_SET_CHUNK_BITS)) &
3966 ((1 << MP_SET_CHUNK_BITS) - 1);
3967
3968 /* shift the source up to the next chunk bits */
3969 b <<= MP_SET_CHUNK_BITS;
3970
3971 /* ensure that digits are not clamped off */
3972 a->used += 1;
3973 }
3974 mp_clamp (a);
3975 return MP_OKAY;
3976}
3977
3978
3979#if defined(WOLFSSL_KEY_GEN) || defined(HAVE_ECC)
3980
3981/* c = a * a (mod b) */
3982int mp_sqrmod (mp_int * a, mp_int * b, mp_int * c)
3983{
3984 int res;
3985 mp_int t;
3986
3987 if ((res = mp_init (&t)) != MP_OKAY) {
3988 return res;
3989 }
3990
3991 if ((res = mp_sqr (a, &t)) != MP_OKAY) {
3992 mp_clear (&t);
3993 return res;
3994 }
3995 res = mp_mod (&t, b, c);
3996 mp_clear (&t);
3997 return res;
3998}
3999
4000#endif
4001
4002
4003#if defined(HAVE_ECC) || !defined(NO_PWDBASED) || defined(WOLFSSL_SNIFFER) || \
4004 defined(WOLFSSL_HAVE_WOLFSCEP) || defined(WOLFSSL_KEY_GEN) || \
4005 defined(OPENSSL_EXTRA) || defined(WC_RSA_BLINDING)
4006
4007/* single digit addition */
4008int mp_add_d (mp_int* a, mp_digit b, mp_int* c)
4009{
4010 int res, ix, oldused;
4011 mp_digit *tmpa, *tmpc, mu;
4012
4013 /* grow c as required */
4014 if (c->alloc < a->used + 1) {
4015 if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
4016 return res;
4017 }
4018 }
4019
4020 /* if a is negative and |a| >= b, call c = |a| - b */
4021 if (a->sign == MP_NEG && (a->used > 1 || a->dp[0] >= b)) {
4022 /* temporarily fix sign of a */
4023 a->sign = MP_ZPOS;
4024
4025 /* c = |a| - b */
4026 res = mp_sub_d(a, b, c);
4027
4028 /* fix sign */
4029 a->sign = c->sign = MP_NEG;
4030
4031 /* clamp */
4032 mp_clamp(c);
4033
4034 return res;
4035 }
4036
4037 /* old number of used digits in c */
4038 oldused = c->used;
4039
4040 /* sign always positive */
4041 c->sign = MP_ZPOS;
4042
4043 /* source alias */
4044 tmpa = a->dp;
4045
4046 /* destination alias */
4047 tmpc = c->dp;
4048
4049 /* if a is positive */
4050 if (a->sign == MP_ZPOS) {
4051 /* add digit, after this we're propagating
4052 * the carry.
4053 */
4054 *tmpc = *tmpa++ + b;
4055 mu = *tmpc >> DIGIT_BIT;
4056 *tmpc++ &= MP_MASK;
4057
4058 /* now handle rest of the digits */
4059 for (ix = 1; ix < a->used; ix++) {
4060 *tmpc = *tmpa++ + mu;
4061 mu = *tmpc >> DIGIT_BIT;
4062 *tmpc++ &= MP_MASK;
4063 }
4064 /* set final carry */
4065 if (ix < c->alloc) {
4066 ix++;
4067 *tmpc++ = mu;
4068 }
4069
4070 /* setup size */
4071 c->used = a->used + 1;
4072 } else {
4073 /* a was negative and |a| < b */
4074 c->used = 1;
4075
4076 /* the result is a single digit */
4077 if (a->used == 1) {
4078 *tmpc++ = b - a->dp[0];
4079 } else {
4080 *tmpc++ = b;
4081 }
4082
4083 /* setup count so the clearing of oldused
4084 * can fall through correctly
4085 */
4086 ix = 1;
4087 }
4088
4089 /* now zero to oldused */
4090 while (ix++ < oldused) {
4091 *tmpc++ = 0;
4092 }
4093 mp_clamp(c);
4094
4095 return MP_OKAY;
4096}
4097
4098
4099/* single digit subtraction */
4100int mp_sub_d (mp_int * a, mp_digit b, mp_int * c)
4101{
4102 mp_digit *tmpa, *tmpc, mu;
4103 int res, ix, oldused;
4104
4105 /* grow c as required */
4106 if (c->alloc < a->used + 1) {
4107 if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
4108 return res;
4109 }
4110 }
4111
4112 /* if a is negative just do an unsigned
4113 * addition [with fudged signs]
4114 */
4115 if (a->sign == MP_NEG) {
4116 a->sign = MP_ZPOS;
4117 res = mp_add_d(a, b, c);
4118 a->sign = c->sign = MP_NEG;
4119
4120 /* clamp */
4121 mp_clamp(c);
4122
4123 return res;
4124 }
4125
4126 /* setup regs */
4127 oldused = c->used;
4128 tmpa = a->dp;
4129 tmpc = c->dp;
4130
4131 /* if a <= b simply fix the single digit */
4132 if ((a->used == 1 && a->dp[0] <= b) || a->used == 0) {
4133 if (a->used == 1) {
4134 *tmpc++ = b - *tmpa;
4135 } else {
4136 *tmpc++ = b;
4137 }
4138 ix = 1;
4139
4140 /* negative/1digit */
4141 c->sign = MP_NEG;
4142 c->used = 1;
4143 } else {
4144 /* positive/size */
4145 c->sign = MP_ZPOS;
4146 c->used = a->used;
4147
4148 /* subtract first digit */
4149 *tmpc = *tmpa++ - b;
4150 mu = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1);
4151 *tmpc++ &= MP_MASK;
4152
4153 /* handle rest of the digits */
4154 for (ix = 1; ix < a->used; ix++) {
4155 *tmpc = *tmpa++ - mu;
4156 mu = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1);
4157 *tmpc++ &= MP_MASK;
4158 }
4159 }
4160
4161 /* zero excess digits */
4162 while (ix++ < oldused) {
4163 *tmpc++ = 0;
4164 }
4165 mp_clamp(c);
4166 return MP_OKAY;
4167}
4168
4169#endif /* defined(HAVE_ECC) || !defined(NO_PWDBASED) */
4170
4171
4172#if defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || defined(HAVE_ECC)
4173
4174static const int lnz[16] = {
4175 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
4176};
4177
4178/* Counts the number of lsbs which are zero before the first zero bit */
4179int mp_cnt_lsb(mp_int *a)
4180{
4181 int x;
4182 mp_digit q = 0, qq;
4183
4184 /* easy out */
4185 if (mp_iszero(a) == MP_YES) {
4186 return 0;
4187 }
4188
4189 /* scan lower digits until non-zero */
4190 for (x = 0; x < a->used && a->dp[x] == 0; x++) {}
4191 if (a->dp)
4192 q = a->dp[x];
4193 x *= DIGIT_BIT;
4194
4195 /* now scan this digit until a 1 is found */
4196 if ((q & 1) == 0) {
4197 do {
4198 qq = q & 15;
4199 x += lnz[qq];
4200 q >>= 4;
4201 } while (qq == 0);
4202 }
4203 return x;
4204}
4205
4206
4207
4208
4209static int s_is_power_of_two(mp_digit b, int *p)
4210{
4211 int x;
4212
4213 /* fast return if no power of two */
4214 if ((b==0) || (b & (b-1))) {
4215 return 0;
4216 }
4217
4218 for (x = 0; x < DIGIT_BIT; x++) {
4219 if (b == (((mp_digit)1)<<x)) {
4220 *p = x;
4221 return 1;
4222 }
4223 }
4224 return 0;
4225}
4226
4227/* single digit division (based on routine from MPI) */
4228static int mp_div_d (mp_int * a, mp_digit b, mp_int * c, mp_digit * d)
4229{
4230 mp_int q;
4231 mp_word w;
4232 mp_digit t;
4233 int res = MP_OKAY, ix;
4234
4235 /* cannot divide by zero */
4236 if (b == 0) {
4237 return MP_VAL;
4238 }
4239
4240 /* quick outs */
4241 if (b == 1 || mp_iszero(a) == MP_YES) {
4242 if (d != NULL) {
4243 *d = 0;
4244 }
4245 if (c != NULL) {
4246 return mp_copy(a, c);
4247 }
4248 return MP_OKAY;
4249 }
4250
4251 /* power of two ? */
4252 if (s_is_power_of_two(b, &ix) == 1) {
4253 if (d != NULL) {
4254 *d = a->dp[0] & ((((mp_digit)1)<<ix) - 1);
4255 }
4256 if (c != NULL) {
4257 return mp_div_2d(a, ix, c, NULL);
4258 }
4259 return MP_OKAY;
4260 }
4261
4262#ifdef BN_MP_DIV_3_C
4263 /* three? */
4264 if (b == 3) {
4265 return mp_div_3(a, c, d);
4266 }
4267#endif
4268
4269 /* no easy answer [c'est la vie]. Just division */
4270 if (c != NULL) {
4271 if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
4272 return res;
4273 }
4274
4275 q.used = a->used;
4276 q.sign = a->sign;
4277 }
4278 else {
4279 if ((res = mp_init(&q)) != MP_OKAY) {
4280 return res;
4281 }
4282 }
4283
4284
4285 w = 0;
4286 for (ix = a->used - 1; ix >= 0; ix--) {
4287 w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]);
4288
4289 if (w >= b) {
4290 t = (mp_digit)(w / b);
4291 w -= ((mp_word)t) * ((mp_word)b);
4292 } else {
4293 t = 0;
4294 }
4295 if (c != NULL)
4296 q.dp[ix] = (mp_digit)t;
4297 }
4298
4299 if (d != NULL) {
4300 *d = (mp_digit)w;
4301 }
4302
4303 if (c != NULL) {
4304 mp_clamp(&q);
4305 mp_exch(&q, c);
4306 }
4307 mp_clear(&q);
4308
4309 return res;
4310}
4311
4312
4313int mp_mod_d (mp_int * a, mp_digit b, mp_digit * c)
4314{
4315 return mp_div_d(a, b, NULL, c);
4316}
4317
4318#endif /* defined(WOLFSSL_KEY_GEN)||defined(HAVE_COMP_KEY)||defined(HAVE_ECC) */
4319
4320#ifdef WOLFSSL_KEY_GEN
4321
4322const mp_digit ltm_prime_tab[PRIME_SIZE] = {
4323 0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
4324 0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
4325 0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
4326 0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F,
4327#ifndef MP_8BIT
4328 0x0083,
4329 0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
4330 0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
4331 0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
4332 0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
4333
4334 0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
4335 0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
4336 0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
4337 0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
4338 0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
4339 0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
4340 0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
4341 0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
4342
4343 0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
4344 0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
4345 0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
4346 0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
4347 0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
4348 0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
4349 0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
4350 0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
4351
4352 0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
4353 0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
4354 0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
4355 0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
4356 0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
4357 0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
4358 0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
4359 0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
4360#endif
4361};
4362
4363
4364/* Miller-Rabin test of "a" to the base of "b" as described in
4365 * HAC pp. 139 Algorithm 4.24
4366 *
4367 * Sets result to 0 if definitely composite or 1 if probably prime.
4368 * Randomly the chance of error is no more than 1/4 and often
4369 * very much lower.
4370 */
4371static int mp_prime_miller_rabin (mp_int * a, mp_int * b, int *result)
4372{
4373 mp_int n1, y, r;
4374 int s, j, err;
4375
4376 /* default */
4377 *result = MP_NO;
4378
4379 /* ensure b > 1 */
4380 if (mp_cmp_d(b, 1) != MP_GT) {
4381 return MP_VAL;
4382 }
4383
4384 /* get n1 = a - 1 */
4385 if ((err = mp_init_copy (&n1, a)) != MP_OKAY) {
4386 return err;
4387 }
4388 if ((err = mp_sub_d (&n1, 1, &n1)) != MP_OKAY) {
4389 goto LBL_N1;
4390 }
4391
4392 /* set 2**s * r = n1 */
4393 if ((err = mp_init_copy (&r, &n1)) != MP_OKAY) {
4394 goto LBL_N1;
4395 }
4396
4397 /* count the number of least significant bits
4398 * which are zero
4399 */
4400 s = mp_cnt_lsb(&r);
4401
4402 /* now divide n - 1 by 2**s */
4403 if ((err = mp_div_2d (&r, s, &r, NULL)) != MP_OKAY) {
4404 goto LBL_R;
4405 }
4406
4407 /* compute y = b**r mod a */
4408 if ((err = mp_init (&y)) != MP_OKAY) {
4409 goto LBL_R;
4410 }
4411 if ((err = mp_exptmod (b, &r, a, &y)) != MP_OKAY) {
4412 goto LBL_Y;
4413 }
4414
4415 /* if y != 1 and y != n1 do */
4416 if (mp_cmp_d (&y, 1) != MP_EQ && mp_cmp (&y, &n1) != MP_EQ) {
4417 j = 1;
4418 /* while j <= s-1 and y != n1 */
4419 while ((j <= (s - 1)) && mp_cmp (&y, &n1) != MP_EQ) {
4420 if ((err = mp_sqrmod (&y, a, &y)) != MP_OKAY) {
4421 goto LBL_Y;
4422 }
4423
4424 /* if y == 1 then composite */
4425 if (mp_cmp_d (&y, 1) == MP_EQ) {
4426 goto LBL_Y;
4427 }
4428
4429 ++j;
4430 }
4431
4432 /* if y != n1 then composite */
4433 if (mp_cmp (&y, &n1) != MP_EQ) {
4434 goto LBL_Y;
4435 }
4436 }
4437
4438 /* probably prime now */
4439 *result = MP_YES;
4440LBL_Y:mp_clear (&y);
4441LBL_R:mp_clear (&r);
4442LBL_N1:mp_clear (&n1);
4443 return err;
4444}
4445
4446
4447/* determines if an integers is divisible by one
4448 * of the first PRIME_SIZE primes or not
4449 *
4450 * sets result to 0 if not, 1 if yes
4451 */
4452static int mp_prime_is_divisible (mp_int * a, int *result)
4453{
4454 int err, ix;
4455 mp_digit res;
4456
4457 /* default to not */
4458 *result = MP_NO;
4459
4460 for (ix = 0; ix < PRIME_SIZE; ix++) {
4461 /* what is a mod LBL_prime_tab[ix] */
4462 if ((err = mp_mod_d (a, ltm_prime_tab[ix], &res)) != MP_OKAY) {
4463 return err;
4464 }
4465
4466 /* is the residue zero? */
4467 if (res == 0) {
4468 *result = MP_YES;
4469 return MP_OKAY;
4470 }
4471 }
4472
4473 return MP_OKAY;
4474}
4475
4476static const int USE_BBS = 1;
4477
4478int mp_rand_prime(mp_int* N, int len, WC_RNG* rng, void* heap)
4479{
4480 int err, res, type;
4481 byte* buf;
4482
4483 if (N == NULL || rng == NULL)
4484 return MP_VAL;
4485
4486 /* get type */
4487 if (len < 0) {
4488 type = USE_BBS;
4489 len = -len;
4490 } else {
4491 type = 0;
4492 }
4493
4494 /* allow sizes between 2 and 512 bytes for a prime size */
4495 if (len < 2 || len > 512) {
4496 return MP_VAL;
4497 }
4498
4499 /* allocate buffer to work with */
4500 buf = (byte*)XMALLOC(len, heap, DYNAMIC_TYPE_RSA);
4501 if (buf == NULL) {
4502 return MP_MEM;
4503 }
4504 XMEMSET(buf, 0, len);
4505
4506 do {
4507#ifdef SHOW_GEN
4508 printf(".");
4509 fflush(stdout);
4510#endif
4511 /* generate value */
4512 err = wc_RNG_GenerateBlock(rng, buf, len);
4513 if (err != 0) {
4514 XFREE(buf, heap, DYNAMIC_TYPE_RSA);
4515 return err;
4516 }
4517
4518 /* munge bits */
4519 buf[0] |= 0x80 | 0x40;
4520 buf[len-1] |= 0x01 | ((type & USE_BBS) ? 0x02 : 0x00);
4521
4522 /* load value */
4523 if ((err = mp_read_unsigned_bin(N, buf, len)) != MP_OKAY) {
4524 XFREE(buf, heap, DYNAMIC_TYPE_RSA);
4525 return err;
4526 }
4527
4528 /* test */
4529 if ((err = mp_prime_is_prime(N, 8, &res)) != MP_OKAY) {
4530 XFREE(buf, heap, DYNAMIC_TYPE_RSA);
4531 return err;
4532 }
4533 } while (res == MP_NO);
4534
4535 XMEMSET(buf, 0, len);
4536 XFREE(buf, heap, DYNAMIC_TYPE_RSA);
4537
4538 return MP_OKAY;
4539}
4540
4541/*
4542 * Sets result to 1 if probably prime, 0 otherwise
4543 */
4544int mp_prime_is_prime (mp_int * a, int t, int *result)
4545{
4546 mp_int b;
4547 int ix, err, res;
4548
4549 /* default to no */
4550 *result = MP_NO;
4551
4552 /* valid value of t? */
4553 if (t <= 0 || t > PRIME_SIZE) {
4554 return MP_VAL;
4555 }
4556
4557 /* is the input equal to one of the primes in the table? */
4558 for (ix = 0; ix < PRIME_SIZE; ix++) {
4559 if (mp_cmp_d(a, ltm_prime_tab[ix]) == MP_EQ) {
4560 *result = 1;
4561 return MP_OKAY;
4562 }
4563 }
4564
4565 /* first perform trial division */
4566 if ((err = mp_prime_is_divisible (a, &res)) != MP_OKAY) {
4567 return err;
4568 }
4569
4570 /* return if it was trivially divisible */
4571 if (res == MP_YES) {
4572 return MP_OKAY;
4573 }
4574
4575 /* now perform the miller-rabin rounds */
4576 if ((err = mp_init (&b)) != MP_OKAY) {
4577 return err;
4578 }
4579
4580 for (ix = 0; ix < t; ix++) {
4581 /* set the prime */
4582 if ((err = mp_set (&b, ltm_prime_tab[ix])) != MP_OKAY) {
4583 goto LBL_B;
4584 }
4585
4586 if ((err = mp_prime_miller_rabin (a, &b, &res)) != MP_OKAY) {
4587 goto LBL_B;
4588 }
4589
4590 if (res == MP_NO) {
4591 goto LBL_B;
4592 }
4593 }
4594
4595 /* passed the test */
4596 *result = MP_YES;
4597LBL_B:mp_clear (&b);
4598 return err;
4599}
4600
4601
4602/* computes least common multiple as |a*b|/(a, b) */
4603int mp_lcm (mp_int * a, mp_int * b, mp_int * c)
4604{
4605 int res;
4606 mp_int t1, t2;
4607
4608
4609 if ((res = mp_init_multi (&t1, &t2, NULL, NULL, NULL, NULL)) != MP_OKAY) {
4610 return res;
4611 }
4612
4613 /* t1 = get the GCD of the two inputs */
4614 if ((res = mp_gcd (a, b, &t1)) != MP_OKAY) {
4615 goto LBL_T;
4616 }
4617
4618 /* divide the smallest by the GCD */
4619 if (mp_cmp_mag(a, b) == MP_LT) {
4620 /* store quotient in t2 such that t2 * b is the LCM */
4621 if ((res = mp_div(a, &t1, &t2, NULL)) != MP_OKAY) {
4622 goto LBL_T;
4623 }
4624 res = mp_mul(b, &t2, c);
4625 } else {
4626 /* store quotient in t2 such that t2 * a is the LCM */
4627 if ((res = mp_div(b, &t1, &t2, NULL)) != MP_OKAY) {
4628 goto LBL_T;
4629 }
4630 res = mp_mul(a, &t2, c);
4631 }
4632
4633 /* fix the sign to positive */
4634 c->sign = MP_ZPOS;
4635
4636LBL_T:
4637 mp_clear(&t1);
4638 mp_clear(&t2);
4639 return res;
4640}
4641
4642
4643
4644/* Greatest Common Divisor using the binary method */
4645int mp_gcd (mp_int * a, mp_int * b, mp_int * c)
4646{
4647 mp_int u, v;
4648 int k, u_lsb, v_lsb, res;
4649
4650 /* either zero than gcd is the largest */
4651 if (mp_iszero (a) == MP_YES) {
4652 return mp_abs (b, c);
4653 }
4654 if (mp_iszero (b) == MP_YES) {
4655 return mp_abs (a, c);
4656 }
4657
4658 /* get copies of a and b we can modify */
4659 if ((res = mp_init_copy (&u, a)) != MP_OKAY) {
4660 return res;
4661 }
4662
4663 if ((res = mp_init_copy (&v, b)) != MP_OKAY) {
4664 goto LBL_U;
4665 }
4666
4667 /* must be positive for the remainder of the algorithm */
4668 u.sign = v.sign = MP_ZPOS;
4669
4670 /* B1. Find the common power of two for u and v */
4671 u_lsb = mp_cnt_lsb(&u);
4672 v_lsb = mp_cnt_lsb(&v);
4673 k = MIN(u_lsb, v_lsb);
4674
4675 if (k > 0) {
4676 /* divide the power of two out */
4677 if ((res = mp_div_2d(&u, k, &u, NULL)) != MP_OKAY) {
4678 goto LBL_V;
4679 }
4680
4681 if ((res = mp_div_2d(&v, k, &v, NULL)) != MP_OKAY) {
4682 goto LBL_V;
4683 }
4684 }
4685
4686 /* divide any remaining factors of two out */
4687 if (u_lsb != k) {
4688 if ((res = mp_div_2d(&u, u_lsb - k, &u, NULL)) != MP_OKAY) {
4689 goto LBL_V;
4690 }
4691 }
4692
4693 if (v_lsb != k) {
4694 if ((res = mp_div_2d(&v, v_lsb - k, &v, NULL)) != MP_OKAY) {
4695 goto LBL_V;
4696 }
4697 }
4698
4699 while (mp_iszero(&v) == MP_NO) {
4700 /* make sure v is the largest */
4701 if (mp_cmp_mag(&u, &v) == MP_GT) {
4702 /* swap u and v to make sure v is >= u */
4703 mp_exch(&u, &v);
4704 }
4705
4706 /* subtract smallest from largest */
4707 if ((res = s_mp_sub(&v, &u, &v)) != MP_OKAY) {
4708 goto LBL_V;
4709 }
4710
4711 /* Divide out all factors of two */
4712 if ((res = mp_div_2d(&v, mp_cnt_lsb(&v), &v, NULL)) != MP_OKAY) {
4713 goto LBL_V;
4714 }
4715 }
4716
4717 /* multiply by 2**k which we divided out at the beginning */
4718 if ((res = mp_mul_2d (&u, k, c)) != MP_OKAY) {
4719 goto LBL_V;
4720 }
4721 c->sign = MP_ZPOS;
4722 res = MP_OKAY;
4723LBL_V:mp_clear (&u);
4724LBL_U:mp_clear (&v);
4725 return res;
4726}
4727
4728#endif /* WOLFSSL_KEY_GEN */
4729
4730
4731#if defined(HAVE_ECC) || defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY)
4732
4733/* chars used in radix conversions */
4734const char *mp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ\
4735 abcdefghijklmnopqrstuvwxyz+/";
4736#endif
4737
4738#ifdef HAVE_ECC
4739/* read a string [ASCII] in a given radix */
4740int mp_read_radix (mp_int * a, const char *str, int radix)
4741{
4742 int y, res, neg;
4743 char ch;
4744
4745 /* zero the digit bignum */
4746 mp_zero(a);
4747
4748 /* make sure the radix is ok */
4749 if (radix < 2 || radix > 64) {
4750 return MP_VAL;
4751 }
4752
4753 /* if the leading digit is a
4754 * minus set the sign to negative.
4755 */
4756 if (*str == '-') {
4757 ++str;
4758 neg = MP_NEG;
4759 } else {
4760 neg = MP_ZPOS;
4761 }
4762
4763 /* set the integer to the default of zero */
4764 mp_zero (a);
4765
4766 /* process each digit of the string */
4767 while (*str != '\0') {
4768 /* if the radix <= 36 the conversion is case insensitive
4769 * this allows numbers like 1AB and 1ab to represent the same value
4770 * [e.g. in hex]
4771 */
4772 ch = (radix <= 36) ? (char)XTOUPPER((unsigned char)*str) : *str;
4773 for (y = 0; y < 64; y++) {
4774 if (ch == mp_s_rmap[y]) {
4775 break;
4776 }
4777 }
4778
4779 /* if the char was found in the map
4780 * and is less than the given radix add it
4781 * to the number, otherwise exit the loop.
4782 */
4783 if (y < radix) {
4784 if ((res = mp_mul_d (a, (mp_digit) radix, a)) != MP_OKAY) {
4785 return res;
4786 }
4787 if ((res = mp_add_d (a, (mp_digit) y, a)) != MP_OKAY) {
4788 return res;
4789 }
4790 } else {
4791 break;
4792 }
4793 ++str;
4794 }
4795
4796 /* if digit in isn't null term, then invalid character was found */
4797 if (*str != '\0') {
4798 mp_zero (a);
4799 return MP_VAL;
4800 }
4801
4802 /* set the sign only if a != 0 */
4803 if (mp_iszero(a) != MP_YES) {
4804 a->sign = neg;
4805 }
4806 return MP_OKAY;
4807}
4808#endif /* HAVE_ECC */
4809
4810#if defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || \
4811 defined(WOLFSSL_DEBUG_MATH)
4812
4813/* returns size of ASCII representation */
4814int mp_radix_size (mp_int *a, int radix, int *size)
4815{
4816 int res, digs;
4817 mp_int t;
4818 mp_digit d;
4819
4820 *size = 0;
4821
4822 /* special case for binary */
4823 if (radix == 2) {
4824 *size = mp_count_bits (a) + (a->sign == MP_NEG ? 1 : 0) + 1;
4825 return MP_OKAY;
4826 }
4827
4828 /* make sure the radix is in range */
4829 if (radix < 2 || radix > 64) {
4830 return MP_VAL;
4831 }
4832
4833 if (mp_iszero(a) == MP_YES) {
4834 *size = 2;
4835 return MP_OKAY;
4836 }
4837
4838 /* digs is the digit count */
4839 digs = 0;
4840
4841 /* if it's negative add one for the sign */
4842 if (a->sign == MP_NEG) {
4843 ++digs;
4844 }
4845
4846 /* init a copy of the input */
4847 if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
4848 return res;
4849 }
4850
4851 /* force temp to positive */
4852 t.sign = MP_ZPOS;
4853
4854 /* fetch out all of the digits */
4855 while (mp_iszero (&t) == MP_NO) {
4856 if ((res = mp_div_d (&t, (mp_digit) radix, &t, &d)) != MP_OKAY) {
4857 mp_clear (&t);
4858 return res;
4859 }
4860 ++digs;
4861 }
4862 mp_clear (&t);
4863
4864 /* return digs + 1, the 1 is for the NULL byte that would be required. */
4865 *size = digs + 1;
4866 return MP_OKAY;
4867}
4868
4869/* stores a bignum as a ASCII string in a given radix (2..64) */
4870int mp_toradix (mp_int *a, char *str, int radix)
4871{
4872 int res, digs;
4873 mp_int t;
4874 mp_digit d;
4875 char *_s = str;
4876
4877 /* check range of the radix */
4878 if (radix < 2 || radix > 64) {
4879 return MP_VAL;
4880 }
4881
4882 /* quick out if its zero */
4883 if (mp_iszero(a) == MP_YES) {
4884 *str++ = '0';
4885 *str = '\0';
4886 return MP_OKAY;
4887 }
4888
4889 if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
4890 return res;
4891 }
4892
4893 /* if it is negative output a - */
4894 if (t.sign == MP_NEG) {
4895 ++_s;
4896 *str++ = '-';
4897 t.sign = MP_ZPOS;
4898 }
4899
4900 digs = 0;
4901 while (mp_iszero (&t) == MP_NO) {
4902 if ((res = mp_div_d (&t, (mp_digit) radix, &t, &d)) != MP_OKAY) {
4903 mp_clear (&t);
4904 return res;
4905 }
4906 *str++ = mp_s_rmap[d];
4907 ++digs;
4908 }
4909
4910 /* reverse the digits of the string. In this case _s points
4911 * to the first digit [excluding the sign] of the number]
4912 */
4913 bn_reverse ((unsigned char *)_s, digs);
4914
4915 /* append a NULL so the string is properly terminated */
4916 *str = '\0';
4917
4918 mp_clear (&t);
4919 return MP_OKAY;
4920}
4921
4922#ifdef WOLFSSL_DEBUG_MATH
4923void mp_dump(const char* desc, mp_int* a, byte verbose)
4924{
4925 char *buffer;
4926 int size = a->alloc;
4927
4928 buffer = (char*)XMALLOC(size * sizeof(mp_digit) * 2, NULL, DYNAMIC_TYPE_TMP_BUFFER);
4929 if (buffer == NULL) {
4930 return;
4931 }
4932
4933 printf("%s: ptr=%p, used=%d, sign=%d, size=%d, mpd=%d\n",
4934 desc, a, a->used, a->sign, size, (int)sizeof(mp_digit));
4935
4936 mp_toradix(a, buffer, 16);
4937 printf(" %s\n ", buffer);
4938
4939 if (verbose) {
4940 int i;
4941 for(i=0; i<a->alloc * (int)sizeof(mp_digit); i++) {
4942 printf("%02x ", *(((byte*)a->dp) + i));
4943 }
4944 printf("\n");
4945 }
4946
4947 XFREE(buffer, NULL, DYNAMIC_TYPE_TMP_BUFFER);
4948}
4949#endif /* WOLFSSL_DEBUG_MATH */
4950
4951#endif /* defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || defined(WOLFSSL_DEBUG_MATH) */
4952
4953#endif /* USE_FAST_MATH */
4954
4955#endif /* NO_BIG_INT */
4956
Note: See TracBrowser for help on using the repository browser.