source: asp3_tinet_ecnl_arm/trunk/wolfssl-3.12.2/wolfcrypt/src/sha512.c@ 352

Last change on this file since 352 was 352, checked in by coas-nagasima, 6 years ago

arm向けASP3版ECNLを追加

  • Property svn:eol-style set to native
  • Property svn:mime-type set to text/x-csrc;charset=UTF-8
File size: 52.6 KB
Line 
1/* sha512.c
2 *
3 * Copyright (C) 2006-2017 wolfSSL Inc.
4 *
5 * This file is part of wolfSSL.
6 *
7 * wolfSSL is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * wolfSSL is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
20 */
21
22
23#ifdef HAVE_CONFIG_H
24 #include <config.h>
25#endif
26
27#include <wolfssl/wolfcrypt/settings.h>
28
29#ifdef WOLFSSL_SHA512
30#include <wolfssl/wolfcrypt/sha512.h>
31#include <wolfssl/wolfcrypt/error-crypt.h>
32#include <wolfssl/wolfcrypt/cpuid.h>
33
34/* fips wrapper calls, user can call direct */
35#ifdef HAVE_FIPS
36 int wc_InitSha512(wc_Sha512* sha)
37 {
38 if (sha == NULL) {
39 return BAD_FUNC_ARG;
40 }
41
42 return InitSha512_fips(sha);
43 }
44 int wc_InitSha512_ex(wc_Sha512* sha, void* heap, int devId)
45 {
46 (void)heap;
47 (void)devId;
48 if (sha == NULL) {
49 return BAD_FUNC_ARG;
50 }
51 return InitSha512_fips(sha);
52 }
53 int wc_Sha512Update(wc_Sha512* sha, const byte* data, word32 len)
54 {
55 if (sha == NULL || (data == NULL && len > 0)) {
56 return BAD_FUNC_ARG;
57 }
58
59 return Sha512Update_fips(sha, data, len);
60 }
61 int wc_Sha512Final(wc_Sha512* sha, byte* out)
62 {
63 if (sha == NULL || out == NULL) {
64 return BAD_FUNC_ARG;
65 }
66
67 return Sha512Final_fips(sha, out);
68 }
69 void wc_Sha512Free(wc_Sha512* sha)
70 {
71 (void)sha;
72 /* Not supported in FIPS */
73 }
74
75 #if defined(WOLFSSL_SHA384) || defined(HAVE_AESGCM)
76 int wc_InitSha384(wc_Sha384* sha)
77 {
78 if (sha == NULL) {
79 return BAD_FUNC_ARG;
80 }
81 return InitSha384_fips(sha);
82 }
83 int wc_InitSha384_ex(wc_Sha384* sha, void* heap, int devId)
84 {
85 (void)heap;
86 (void)devId;
87 if (sha == NULL) {
88 return BAD_FUNC_ARG;
89 }
90 return InitSha384_fips(sha);
91 }
92 int wc_Sha384Update(wc_Sha384* sha, const byte* data, word32 len)
93 {
94 if (sha == NULL || (data == NULL && len > 0)) {
95 return BAD_FUNC_ARG;
96 }
97 return Sha384Update_fips(sha, data, len);
98 }
99 int wc_Sha384Final(wc_Sha384* sha, byte* out)
100 {
101 if (sha == NULL || out == NULL) {
102 return BAD_FUNC_ARG;
103 }
104 return Sha384Final_fips(sha, out);
105 }
106 void wc_Sha384Free(wc_Sha384* sha)
107 {
108 (void)sha;
109 /* Not supported in FIPS */
110 }
111 #endif /* WOLFSSL_SHA384 || HAVE_AESGCM */
112
113#else /* else build without using fips */
114
115#include <wolfssl/wolfcrypt/logging.h>
116
117#ifdef NO_INLINE
118 #include <wolfssl/wolfcrypt/misc.h>
119#else
120 #define WOLFSSL_MISC_INCLUDED
121 #include <wolfcrypt/src/misc.c>
122#endif
123
124
125#if defined(USE_INTEL_SPEEDUP)
126 #define HAVE_INTEL_AVX1
127 #define HAVE_INTEL_AVX2
128#endif
129
130#if defined(HAVE_INTEL_AVX1)
131 /* #define DEBUG_XMM */
132#endif
133
134#if defined(HAVE_INTEL_AVX2)
135 #define HAVE_INTEL_RORX
136 /* #define DEBUG_YMM */
137#endif
138
139
140#if defined(HAVE_INTEL_RORX)
141 #define ROTR(func, bits, x) \
142 word64 func(word64 x) { word64 ret ;\
143 __asm__ ("rorx $"#bits", %1, %0\n\t":"=r"(ret):"r"(x)) ;\
144 return ret ;\
145 }
146
147 static INLINE ROTR(rotrFixed64_28, 28, x);
148 static INLINE ROTR(rotrFixed64_34, 34, x);
149 static INLINE ROTR(rotrFixed64_39, 39, x);
150 static INLINE ROTR(rotrFixed64_14, 14, x);
151 static INLINE ROTR(rotrFixed64_18, 18, x);
152 static INLINE ROTR(rotrFixed64_41, 41, x);
153
154 #define S0_RORX(x) (rotrFixed64_28(x)^rotrFixed64_34(x)^rotrFixed64_39(x))
155 #define S1_RORX(x) (rotrFixed64_14(x)^rotrFixed64_18(x)^rotrFixed64_41(x))
156#endif /* HAVE_INTEL_RORX */
157
158#if defined(HAVE_BYTEREVERSE64) && \
159 !defined(HAVE_INTEL_AVX1) && !defined(HAVE_INTEL_AVX2)
160 #define ByteReverseWords64(out, in, size) ByteReverseWords64_1(out, size)
161 #define ByteReverseWords64_1(buf, size) \
162 { unsigned int i ;\
163 for(i=0; i< size/sizeof(word64); i++){\
164 __asm__ volatile("bswapq %0":"+r"(buf[i])::) ;\
165 }\
166 }
167#endif
168
169static int InitSha512(wc_Sha512* sha512)
170{
171 if (sha512 == NULL)
172 return BAD_FUNC_ARG;
173
174 sha512->digest[0] = W64LIT(0x6a09e667f3bcc908);
175 sha512->digest[1] = W64LIT(0xbb67ae8584caa73b);
176 sha512->digest[2] = W64LIT(0x3c6ef372fe94f82b);
177 sha512->digest[3] = W64LIT(0xa54ff53a5f1d36f1);
178 sha512->digest[4] = W64LIT(0x510e527fade682d1);
179 sha512->digest[5] = W64LIT(0x9b05688c2b3e6c1f);
180 sha512->digest[6] = W64LIT(0x1f83d9abfb41bd6b);
181 sha512->digest[7] = W64LIT(0x5be0cd19137e2179);
182
183 sha512->buffLen = 0;
184 sha512->loLen = 0;
185 sha512->hiLen = 0;
186
187 return 0;
188}
189
190
191/* Hardware Acceleration */
192#if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
193
194 /*****
195 Intel AVX1/AVX2 Macro Control Structure
196
197 #if defined(HAVE_INteL_SPEEDUP)
198 #define HAVE_INTEL_AVX1
199 #define HAVE_INTEL_AVX2
200 #endif
201
202 int InitSha512(wc_Sha512* sha512) {
203 Save/Recover XMM, YMM
204 ...
205
206 Check Intel AVX cpuid flags
207 }
208
209 #if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2)
210 Transform_AVX1(); # Function prototype
211 Transform_AVX2(); #
212 #endif
213
214 _Transform() { # Native Transform Function body
215
216 }
217
218 int Sha512Update() {
219 Save/Recover XMM, YMM
220 ...
221 }
222
223 int Sha512Final() {
224 Save/Recover XMM, YMM
225 ...
226 }
227
228
229 #if defined(HAVE_INTEL_AVX1)
230
231 XMM Instructions/INLINE asm Definitions
232
233 #endif
234
235 #if defined(HAVE_INTEL_AVX2)
236
237 YMM Instructions/INLINE asm Definitions
238
239 #endif
240
241 #if defnied(HAVE_INTEL_AVX1)
242
243 int Transform_AVX1() {
244 Stitched Message Sched/Round
245 }
246
247 #endif
248
249 #if defnied(HAVE_INTEL_AVX2)
250
251 int Transform_AVX2() {
252 Stitched Message Sched/Round
253 }
254 #endif
255
256 */
257
258
259 /* Each platform needs to query info type 1 from cpuid to see if aesni is
260 * supported. Also, let's setup a macro for proper linkage w/o ABI conflicts
261 */
262
263 #if defined(HAVE_INTEL_AVX1)
264 static int Transform_AVX1(wc_Sha512 *sha512);
265 #endif
266 #if defined(HAVE_INTEL_AVX2)
267 static int Transform_AVX2(wc_Sha512 *sha512);
268 #if defined(HAVE_INTEL_AVX1) && defined(HAVE_INTEL_AVX2) && defined(HAVE_INTEL_RORX)
269 static int Transform_AVX1_RORX(wc_Sha512 *sha512);
270 #endif
271 #endif
272 static int _Transform(wc_Sha512 *sha512);
273 static int (*Transform_p)(wc_Sha512* sha512) = _Transform;
274 static int transform_check = 0;
275 static int intel_flags;
276 #define Transform(sha512) (*Transform_p)(sha512)
277
278 /* Dummy for saving MM_REGs on behalf of Transform */
279 /* #if defined(HAVE_INTEL_AVX2)
280 #define SAVE_XMM_YMM __asm__ volatile("orq %%r8, %%r8":::\
281 "%ymm0","%ymm1","%ymm2","%ymm3","%ymm4","%ymm5","%ymm6","%ymm7","%ymm8","%ymm9","%ymm10","%ymm11",\
282 "%ymm12","%ymm13","%ymm14","%ymm15")
283 */
284 #if defined(HAVE_INTEL_AVX1)
285 #define SAVE_XMM_YMM __asm__ volatile("orq %%r8, %%r8":::\
286 "xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7","xmm8","xmm9","xmm10","xmm11","xmm12","xmm13","xmm14","xmm15")
287 #endif
288
289 static void Sha512_SetTransform()
290 {
291 if (transform_check)
292 return;
293
294 intel_flags = cpuid_get_flags();
295
296 #if defined(HAVE_INTEL_AVX2)
297 if (IS_INTEL_AVX2(intel_flags) && IS_INTEL_BMI2(intel_flags)) {
298 if (1)
299 Transform_p = Transform_AVX1_RORX;
300 else
301 Transform_p = Transform_AVX2;
302 }
303 else
304 #endif
305 #if defined(HAVE_INTEL_AVX1)
306 if (1) {
307 Transform_p = ((IS_INTEL_AVX1(intel_flags)) ? Transform_AVX1 :
308 _Transform);
309 }
310 else
311 #endif
312 Transform_p = _Transform;
313
314 transform_check = 1;
315 }
316
317 int wc_InitSha512_ex(wc_Sha512* sha512, void* heap, int devId)
318 {
319 int ret = InitSha512(sha512);
320
321 (void)heap;
322 (void)devId;
323
324 Sha512_SetTransform();
325
326 return ret;
327 }
328
329#else
330 #define Transform(sha512) _Transform(sha512)
331
332 int wc_InitSha512_ex(wc_Sha512* sha512, void* heap, int devId)
333 {
334 int ret = 0;
335
336 if (sha512 == NULL)
337 return BAD_FUNC_ARG;
338
339 sha512->heap = heap;
340
341 ret = InitSha512(sha512);
342 if (ret != 0)
343 return ret;
344
345 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA512)
346 ret = wolfAsync_DevCtxInit(&sha512->asyncDev,
347 WOLFSSL_ASYNC_MARKER_SHA512, sha512->heap, devId);
348 #else
349 (void)devId;
350 #endif /* WOLFSSL_ASYNC_CRYPT */
351
352 return ret;
353 }
354
355#endif /* Hardware Acceleration */
356
357#ifndef SAVE_XMM_YMM
358 #define SAVE_XMM_YMM
359#endif
360
361static const word64 K512[80] = {
362 W64LIT(0x428a2f98d728ae22), W64LIT(0x7137449123ef65cd),
363 W64LIT(0xb5c0fbcfec4d3b2f), W64LIT(0xe9b5dba58189dbbc),
364 W64LIT(0x3956c25bf348b538), W64LIT(0x59f111f1b605d019),
365 W64LIT(0x923f82a4af194f9b), W64LIT(0xab1c5ed5da6d8118),
366 W64LIT(0xd807aa98a3030242), W64LIT(0x12835b0145706fbe),
367 W64LIT(0x243185be4ee4b28c), W64LIT(0x550c7dc3d5ffb4e2),
368 W64LIT(0x72be5d74f27b896f), W64LIT(0x80deb1fe3b1696b1),
369 W64LIT(0x9bdc06a725c71235), W64LIT(0xc19bf174cf692694),
370 W64LIT(0xe49b69c19ef14ad2), W64LIT(0xefbe4786384f25e3),
371 W64LIT(0x0fc19dc68b8cd5b5), W64LIT(0x240ca1cc77ac9c65),
372 W64LIT(0x2de92c6f592b0275), W64LIT(0x4a7484aa6ea6e483),
373 W64LIT(0x5cb0a9dcbd41fbd4), W64LIT(0x76f988da831153b5),
374 W64LIT(0x983e5152ee66dfab), W64LIT(0xa831c66d2db43210),
375 W64LIT(0xb00327c898fb213f), W64LIT(0xbf597fc7beef0ee4),
376 W64LIT(0xc6e00bf33da88fc2), W64LIT(0xd5a79147930aa725),
377 W64LIT(0x06ca6351e003826f), W64LIT(0x142929670a0e6e70),
378 W64LIT(0x27b70a8546d22ffc), W64LIT(0x2e1b21385c26c926),
379 W64LIT(0x4d2c6dfc5ac42aed), W64LIT(0x53380d139d95b3df),
380 W64LIT(0x650a73548baf63de), W64LIT(0x766a0abb3c77b2a8),
381 W64LIT(0x81c2c92e47edaee6), W64LIT(0x92722c851482353b),
382 W64LIT(0xa2bfe8a14cf10364), W64LIT(0xa81a664bbc423001),
383 W64LIT(0xc24b8b70d0f89791), W64LIT(0xc76c51a30654be30),
384 W64LIT(0xd192e819d6ef5218), W64LIT(0xd69906245565a910),
385 W64LIT(0xf40e35855771202a), W64LIT(0x106aa07032bbd1b8),
386 W64LIT(0x19a4c116b8d2d0c8), W64LIT(0x1e376c085141ab53),
387 W64LIT(0x2748774cdf8eeb99), W64LIT(0x34b0bcb5e19b48a8),
388 W64LIT(0x391c0cb3c5c95a63), W64LIT(0x4ed8aa4ae3418acb),
389 W64LIT(0x5b9cca4f7763e373), W64LIT(0x682e6ff3d6b2b8a3),
390 W64LIT(0x748f82ee5defb2fc), W64LIT(0x78a5636f43172f60),
391 W64LIT(0x84c87814a1f0ab72), W64LIT(0x8cc702081a6439ec),
392 W64LIT(0x90befffa23631e28), W64LIT(0xa4506cebde82bde9),
393 W64LIT(0xbef9a3f7b2c67915), W64LIT(0xc67178f2e372532b),
394 W64LIT(0xca273eceea26619c), W64LIT(0xd186b8c721c0c207),
395 W64LIT(0xeada7dd6cde0eb1e), W64LIT(0xf57d4f7fee6ed178),
396 W64LIT(0x06f067aa72176fba), W64LIT(0x0a637dc5a2c898a6),
397 W64LIT(0x113f9804bef90dae), W64LIT(0x1b710b35131c471b),
398 W64LIT(0x28db77f523047d84), W64LIT(0x32caab7b40c72493),
399 W64LIT(0x3c9ebe0a15c9bebc), W64LIT(0x431d67c49c100d4c),
400 W64LIT(0x4cc5d4becb3e42b6), W64LIT(0x597f299cfc657e2a),
401 W64LIT(0x5fcb6fab3ad6faec), W64LIT(0x6c44198c4a475817)
402};
403
404
405
406#define blk0(i) (W[i] = sha512->buffer[i])
407
408#define blk2(i) (W[i&15]+=s1(W[(i-2)&15])+W[(i-7)&15]+s0(W[(i-15)&15]))
409
410#define Ch(x,y,z) (z^(x&(y^z)))
411#define Maj(x,y,z) ((x&y)|(z&(x|y)))
412
413#define a(i) T[(0-i)&7]
414#define b(i) T[(1-i)&7]
415#define c(i) T[(2-i)&7]
416#define d(i) T[(3-i)&7]
417#define e(i) T[(4-i)&7]
418#define f(i) T[(5-i)&7]
419#define g(i) T[(6-i)&7]
420#define h(i) T[(7-i)&7]
421
422#define S0(x) (rotrFixed64(x,28)^rotrFixed64(x,34)^rotrFixed64(x,39))
423#define S1(x) (rotrFixed64(x,14)^rotrFixed64(x,18)^rotrFixed64(x,41))
424#define s0(x) (rotrFixed64(x,1)^rotrFixed64(x,8)^(x>>7))
425#define s1(x) (rotrFixed64(x,19)^rotrFixed64(x,61)^(x>>6))
426
427#define R(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+K[i+j]+(j?blk2(i):blk0(i));\
428 d(i)+=h(i);h(i)+=S0(a(i))+Maj(a(i),b(i),c(i))
429
430static int _Transform(wc_Sha512* sha512)
431{
432 const word64* K = K512;
433
434 word32 j;
435 word64 T[8];
436
437
438#ifdef WOLFSSL_SMALL_STACK
439 word64* W;
440 W = (word64*) XMALLOC(sizeof(word64) * 16, NULL, DYNAMIC_TYPE_TMP_BUFFER);
441 if (W == NULL)
442 return MEMORY_E;
443#else
444 word64 W[16];
445#endif
446
447 /* Copy digest to working vars */
448 XMEMCPY(T, sha512->digest, sizeof(T));
449
450#ifdef USE_SLOW_SHA2
451 /* over twice as small, but 50% slower */
452 /* 80 operations, not unrolled */
453 for (j = 0; j < 80; j += 16) {
454 int m;
455 for (m = 0; m < 16; m++) { /* braces needed here for macros {} */
456 R(m);
457 }
458 }
459#else
460 /* 80 operations, partially loop unrolled */
461 for (j = 0; j < 80; j += 16) {
462 R( 0); R( 1); R( 2); R( 3);
463 R( 4); R( 5); R( 6); R( 7);
464 R( 8); R( 9); R(10); R(11);
465 R(12); R(13); R(14); R(15);
466 }
467#endif /* USE_SLOW_SHA2 */
468
469 /* Add the working vars back into digest */
470
471 sha512->digest[0] += a(0);
472 sha512->digest[1] += b(0);
473 sha512->digest[2] += c(0);
474 sha512->digest[3] += d(0);
475 sha512->digest[4] += e(0);
476 sha512->digest[5] += f(0);
477 sha512->digest[6] += g(0);
478 sha512->digest[7] += h(0);
479
480 /* Wipe variables */
481 ForceZero(W, sizeof(word64) * 16);
482 ForceZero(T, sizeof(T));
483
484#ifdef WOLFSSL_SMALL_STACK
485 XFREE(W, NULL, DYNAMIC_TYPE_TMP_BUFFER);
486#endif
487
488 return 0;
489}
490
491
492static INLINE void AddLength(wc_Sha512* sha512, word32 len)
493{
494 word64 tmp = sha512->loLen;
495 if ( (sha512->loLen += len) < tmp)
496 sha512->hiLen++; /* carry low to high */
497}
498
499static INLINE int Sha512Update(wc_Sha512* sha512, const byte* data, word32 len)
500{
501 int ret = 0;
502 /* do block size increments */
503 byte* local = (byte*)sha512->buffer;
504
505 /* check that internal buffLen is valid */
506 if (sha512->buffLen >= WC_SHA512_BLOCK_SIZE)
507 return BUFFER_E;
508
509 SAVE_XMM_YMM; /* for Intel AVX */
510
511 while (len) {
512 word32 add = min(len, WC_SHA512_BLOCK_SIZE - sha512->buffLen);
513 XMEMCPY(&local[sha512->buffLen], data, add);
514
515 sha512->buffLen += add;
516 data += add;
517 len -= add;
518
519 if (sha512->buffLen == WC_SHA512_BLOCK_SIZE) {
520 #if defined(LITTLE_ENDIAN_ORDER)
521 #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
522 if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags))
523 #endif
524 {
525 ByteReverseWords64(sha512->buffer, sha512->buffer,
526 WC_SHA512_BLOCK_SIZE);
527 }
528 #endif
529 ret = Transform(sha512);
530 if (ret != 0)
531 break;
532
533 AddLength(sha512, WC_SHA512_BLOCK_SIZE);
534 sha512->buffLen = 0;
535 }
536 }
537
538 return ret;
539}
540
541int wc_Sha512Update(wc_Sha512* sha512, const byte* data, word32 len)
542{
543 if (sha512 == NULL || (data == NULL && len > 0)) {
544 return BAD_FUNC_ARG;
545 }
546
547#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA512)
548 if (sha512->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA512) {
549 #if defined(HAVE_INTEL_QA)
550 return IntelQaSymSha512(&sha512->asyncDev, NULL, data, len);
551 #endif
552 }
553#endif /* WOLFSSL_ASYNC_CRYPT */
554
555 return Sha512Update(sha512, data, len);
556}
557
558
559static INLINE int Sha512Final(wc_Sha512* sha512)
560{
561 byte* local = (byte*)sha512->buffer;
562 int ret;
563
564 if (sha512 == NULL) {
565 return BAD_FUNC_ARG;
566 }
567
568 SAVE_XMM_YMM ; /* for Intel AVX */
569 AddLength(sha512, sha512->buffLen); /* before adding pads */
570
571 local[sha512->buffLen++] = 0x80; /* add 1 */
572
573 /* pad with zeros */
574 if (sha512->buffLen > WC_SHA512_PAD_SIZE) {
575 XMEMSET(&local[sha512->buffLen], 0, WC_SHA512_BLOCK_SIZE - sha512->buffLen);
576 sha512->buffLen += WC_SHA512_BLOCK_SIZE - sha512->buffLen;
577#if defined(LITTLE_ENDIAN_ORDER)
578 #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
579 if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags))
580 #endif
581 {
582 ByteReverseWords64(sha512->buffer,sha512->buffer,
583 WC_SHA512_BLOCK_SIZE);
584 }
585#endif /* LITTLE_ENDIAN_ORDER */
586 ret = Transform(sha512);
587 if (ret != 0)
588 return ret;
589
590 sha512->buffLen = 0;
591 }
592 XMEMSET(&local[sha512->buffLen], 0, WC_SHA512_PAD_SIZE - sha512->buffLen);
593
594 /* put lengths in bits */
595 sha512->hiLen = (sha512->loLen >> (8 * sizeof(sha512->loLen) - 3)) +
596 (sha512->hiLen << 3);
597 sha512->loLen = sha512->loLen << 3;
598
599 /* store lengths */
600#if defined(LITTLE_ENDIAN_ORDER)
601 #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
602 if (!IS_INTEL_AVX1(intel_flags) && !IS_INTEL_AVX2(intel_flags))
603 #endif
604 ByteReverseWords64(sha512->buffer, sha512->buffer, WC_SHA512_PAD_SIZE);
605#endif
606 /* ! length ordering dependent on digest endian type ! */
607
608 sha512->buffer[WC_SHA512_BLOCK_SIZE / sizeof(word64) - 2] = sha512->hiLen;
609 sha512->buffer[WC_SHA512_BLOCK_SIZE / sizeof(word64) - 1] = sha512->loLen;
610#if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
611 if (IS_INTEL_AVX1(intel_flags) || IS_INTEL_AVX2(intel_flags))
612 ByteReverseWords64(&(sha512->buffer[WC_SHA512_BLOCK_SIZE / sizeof(word64) - 2]),
613 &(sha512->buffer[WC_SHA512_BLOCK_SIZE / sizeof(word64) - 2]),
614 WC_SHA512_BLOCK_SIZE - WC_SHA512_PAD_SIZE);
615#endif
616 ret = Transform(sha512);
617 if (ret != 0)
618 return ret;
619
620 #ifdef LITTLE_ENDIAN_ORDER
621 ByteReverseWords64(sha512->digest, sha512->digest, WC_SHA512_DIGEST_SIZE);
622 #endif
623
624 return 0;
625}
626
627int wc_Sha512Final(wc_Sha512* sha512, byte* hash)
628{
629 int ret;
630
631 if (sha512 == NULL || hash == NULL) {
632 return BAD_FUNC_ARG;
633 }
634
635#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA512)
636 if (sha512->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA512) {
637 #if defined(HAVE_INTEL_QA)
638 return IntelQaSymSha512(&sha512->asyncDev, hash, NULL,
639 WC_SHA512_DIGEST_SIZE);
640 #endif
641 }
642#endif /* WOLFSSL_ASYNC_CRYPT */
643
644 ret = Sha512Final(sha512);
645 if (ret != 0)
646 return ret;
647
648 XMEMCPY(hash, sha512->digest, WC_SHA512_DIGEST_SIZE);
649
650 return InitSha512(sha512); /* reset state */
651}
652
653
654int wc_InitSha512(wc_Sha512* sha512)
655{
656 return wc_InitSha512_ex(sha512, NULL, INVALID_DEVID);
657}
658
659void wc_Sha512Free(wc_Sha512* sha512)
660{
661 if (sha512 == NULL)
662 return;
663
664#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA512)
665 wolfAsync_DevCtxFree(&sha512->asyncDev, WOLFSSL_ASYNC_MARKER_SHA512);
666#endif /* WOLFSSL_ASYNC_CRYPT */
667}
668
669
670#if defined(HAVE_INTEL_AVX1)
671
672#define Rx_1(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+K[i+j] + W_X[i];
673#define Rx_2(i) d(i)+=h(i);
674#define Rx_3(i) h(i)+=S0(a(i))+Maj(a(i),b(i),c(i));
675
676#if defined(HAVE_INTEL_RORX)
677
678 #define Rx_RORX_1(i) h(i)+=S1_RORX(e(i))+Ch(e(i),f(i),g(i))+K[i+j] + W_X[i];
679 #define Rx_RORX_2(i) d(i)+=h(i);
680 #define Rx_RORX_3(i) h(i)+=S0_RORX(a(i))+Maj(a(i),b(i),c(i));
681#endif /* HAVE_INTEL_RORX */
682
683#endif /* HAVE_INTEL_AVX1 */
684
685#if defined(HAVE_INTEL_AVX2)
686#define Ry_1(i, w) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+K[i+j] + w;
687#define Ry_2(i, w) d(i)+=h(i);
688#define Ry_3(i, w) h(i)+=S0(a(i))+Maj(a(i),b(i),c(i));
689#endif /* HAVE_INTEL_AVX2 */
690
691/* INLINE Assember for Intel AVX1 instructions */
692#if defined(HAVE_INTEL_AVX1)
693#if defined(DEBUG_XMM)
694 #define SAVE_REG(i) __asm__ volatile("vmovdqu %%xmm"#i", %0 \n\t":"=m"(reg[i][0]):);
695 #define RECV_REG(i) __asm__ volatile("vmovdqu %0, %%xmm"#i" \n\t"::"m"(reg[i][0]));
696
697 #define _DUMP_REG(REG, name)\
698 { word64 buf[16];word64 reg[16][2];int k;\
699 SAVE_REG(0); SAVE_REG(1); SAVE_REG(2); SAVE_REG(3); SAVE_REG(4); \
700 SAVE_REG(5); SAVE_REG(6); SAVE_REG(7);SAVE_REG(8); SAVE_REG(9); SAVE_REG(10);\
701 SAVE_REG(11); SAVE_REG(12); SAVE_REG(13); SAVE_REG(14); SAVE_REG(15); \
702 __asm__ volatile("vmovdqu %%"#REG", %0 \n\t":"=m"(buf[0]):);\
703 printf(" "#name":\t"); for(k=0; k<2; k++) printf("%016lx.", (word64)(buf[k])); printf("\n"); \
704 RECV_REG(0); RECV_REG(1); RECV_REG(2); RECV_REG(3); RECV_REG(4);\
705 RECV_REG(5); RECV_REG(6); RECV_REG(7); RECV_REG(8); RECV_REG(9);\
706 RECV_REG(10); RECV_REG(11); RECV_REG(12); RECV_REG(13); RECV_REG(14); RECV_REG(15);\
707 }
708
709 #define DUMP_REG(REG) _DUMP_REG(REG, #REG)
710 #define PRINTF(fmt, ...)
711#else
712 #define DUMP_REG(REG)
713 #define PRINTF(fmt, ...)
714#endif /* DEBUG_XMM */
715
716#define _MOVE_to_REG(xymm, mem) __asm__ volatile("vmovdqu %0, %%"#xymm" "\
717 :: "m"(mem));
718#define _MOVE_to_MEM(mem,i, xymm) __asm__ volatile("vmovdqu %%"#xymm", %0" :\
719 "=m"(mem[i]),"=m"(mem[i+1]),"=m"(mem[i+2]),"=m"(mem[i+3]):);
720#define _MOVE(dest, src) __asm__ volatile("vmovdqu %%"#src", %%"\
721 #dest" "::);
722
723#define _S_TEMP(dest, src, bits, temp) __asm__ volatile("vpsrlq $"#bits", %%"\
724 #src", %%"#dest"\n\tvpsllq $64-"#bits", %%"#src", %%"#temp"\n\tvpor %%"\
725 #temp",%%"#dest", %%"#dest" "::);
726#define _AVX1_R(dest, src, bits) __asm__ volatile("vpsrlq $"#bits", %%"\
727 #src", %%"#dest" "::);
728#define _XOR(dest, src1, src2) __asm__ volatile("vpxor %%"#src1", %%"\
729 #src2", %%"#dest" "::);
730#define _OR(dest, src1, src2) __asm__ volatile("vpor %%"#src1", %%"\
731 #src2", %%"#dest" "::);
732#define _ADD(dest, src1, src2) __asm__ volatile("vpaddq %%"#src1", %%"\
733 #src2", %%"#dest" "::);
734#define _ADD_MEM(dest, src1, mem) __asm__ volatile("vpaddq %0, %%"#src1", %%"\
735 #dest" "::"m"(mem));
736
737#define MOVE_to_REG(xymm, mem) _MOVE_to_REG(xymm, mem)
738#define MOVE_to_MEM(mem, i, xymm) _MOVE_to_MEM(mem, i, xymm)
739#define MOVE(dest, src) _MOVE(dest, src)
740
741#define XOR(dest, src1, src2) _XOR(dest, src1, src2)
742#define OR(dest, src1, src2) _OR(dest, src1, src2)
743#define ADD(dest, src1, src2) _ADD(dest, src1, src2)
744
745#define S_TMP(dest, src, bits, temp) _S_TEMP(dest, src, bits, temp);
746#define AVX1_S(dest, src, bits) S_TMP(dest, src, bits, S_TEMP)
747#define AVX1_R(dest, src, bits) _AVX1_R(dest, src, bits)
748
749#define Init_Mask(mask) \
750 __asm__ volatile("vmovdqu %0, %%xmm1\n\t"::"m"(mask):"%xmm1");
751
752#define _W_from_buff1(w, buff, xmm) \
753 /* X0..3(xmm4..7), W[0..15] = sha512->buffer[0.15]; */\
754 __asm__ volatile("vmovdqu %1, %%"#xmm"\n\t"\
755 "vpshufb %%xmm1, %%"#xmm", %%"#xmm"\n\t"\
756 "vmovdqu %%"#xmm", %0"\
757 :"=m"(w): "m"(buff):"%xmm0");
758
759#define W_from_buff1(w, buff, xmm) _W_from_buff1(w, buff, xmm)
760
761#define W_from_buff(w, buff)\
762 Init_Mask(mBYTE_FLIP_MASK[0]);\
763 W_from_buff1(w[0], buff[0], W_0);\
764 W_from_buff1(w[2], buff[2], W_2);\
765 W_from_buff1(w[4], buff[4], W_4);\
766 W_from_buff1(w[6], buff[6], W_6);\
767 W_from_buff1(w[8], buff[8], W_8);\
768 W_from_buff1(w[10],buff[10],W_10);\
769 W_from_buff1(w[12],buff[12],W_12);\
770 W_from_buff1(w[14],buff[14],W_14);
771
772static word64 mBYTE_FLIP_MASK[] = { 0x0001020304050607, 0x08090a0b0c0d0e0f };
773
774#define W_I_15 xmm14
775#define W_I_7 xmm11
776#define W_I_2 xmm13
777#define W_I xmm12
778#define G_TEMP xmm0
779#define S_TEMP xmm1
780#define XMM_TEMP0 xmm2
781
782#define W_0 xmm12
783#define W_2 xmm3
784#define W_4 xmm4
785#define W_6 xmm5
786#define W_8 xmm6
787#define W_10 xmm7
788#define W_12 xmm8
789#define W_14 xmm9
790
791#define s0_1(dest, src) AVX1_S(dest, src, 1);
792#define s0_2(dest, src) AVX1_S(G_TEMP, src, 8); XOR(dest, G_TEMP, dest);
793#define s0_3(dest, src) AVX1_R(G_TEMP, src, 7); XOR(dest, G_TEMP, dest);
794
795#define s1_1(dest, src) AVX1_S(dest, src, 19);
796#define s1_2(dest, src) AVX1_S(G_TEMP, src, 61); XOR(dest, G_TEMP, dest);
797#define s1_3(dest, src) AVX1_R(G_TEMP, src, 6); XOR(dest, G_TEMP, dest);
798
799#define s0_(dest, src) s0_1(dest, src); s0_2(dest, src); s0_3(dest, src)
800#define s1_(dest, src) s1_1(dest, src); s1_2(dest, src); s1_3(dest, src)
801
802#define Block_xx_1(i) \
803 MOVE_to_REG(W_I_15, W_X[(i-15)&15]);\
804 MOVE_to_REG(W_I_7, W_X[(i- 7)&15]);\
805
806#define Block_xx_2(i) \
807 MOVE_to_REG(W_I_2, W_X[(i- 2)&15]);\
808 MOVE_to_REG(W_I, W_X[(i)]);\
809
810#define Block_xx_3(i) \
811 s0_ (XMM_TEMP0, W_I_15);\
812
813#define Block_xx_4(i) \
814 ADD(W_I, W_I, XMM_TEMP0);\
815 ADD(W_I, W_I, W_I_7);\
816
817#define Block_xx_5(i) \
818 s1_ (XMM_TEMP0, W_I_2);\
819
820#define Block_xx_6(i) \
821 ADD(W_I, W_I, XMM_TEMP0);\
822 MOVE_to_MEM(W_X,i, W_I);\
823 if (i==0)\
824 MOVE_to_MEM(W_X,16, W_I);\
825
826#define Block_xx_7(i) \
827 MOVE_to_REG(W_I_15, W_X[(i-15)&15]);\
828 MOVE_to_REG(W_I_7, W_X[(i- 7)&15]);\
829
830#define Block_xx_8(i) \
831 MOVE_to_REG(W_I_2, W_X[(i- 2)&15]);\
832 MOVE_to_REG(W_I, W_X[(i)]);\
833
834#define Block_xx_9(i) \
835 s0_ (XMM_TEMP0, W_I_15);\
836
837#define Block_xx_10(i) \
838 ADD(W_I, W_I, XMM_TEMP0);\
839 ADD(W_I, W_I, W_I_7);\
840
841#define Block_xx_11(i) \
842 s1_ (XMM_TEMP0, W_I_2);\
843
844#define Block_xx_12(i) \
845 ADD(W_I, W_I, XMM_TEMP0);\
846 MOVE_to_MEM(W_X,i, W_I);\
847 if ((i)==0)\
848 MOVE_to_MEM(W_X,16, W_I);\
849
850static INLINE void Block_0_1(word64 *W_X) { Block_xx_1(0); }
851static INLINE void Block_0_2(word64 *W_X) { Block_xx_2(0); }
852static INLINE void Block_0_3(void) { Block_xx_3(0); }
853static INLINE void Block_0_4(void) { Block_xx_4(0); }
854static INLINE void Block_0_5(void) { Block_xx_5(0); }
855static INLINE void Block_0_6(word64 *W_X) { Block_xx_6(0); }
856static INLINE void Block_0_7(word64 *W_X) { Block_xx_7(2); }
857static INLINE void Block_0_8(word64 *W_X) { Block_xx_8(2); }
858static INLINE void Block_0_9(void) { Block_xx_9(2); }
859static INLINE void Block_0_10(void){ Block_xx_10(2); }
860static INLINE void Block_0_11(void){ Block_xx_11(2); }
861static INLINE void Block_0_12(word64 *W_X){ Block_xx_12(2); }
862
863static INLINE void Block_4_1(word64 *W_X) { Block_xx_1(4); }
864static INLINE void Block_4_2(word64 *W_X) { Block_xx_2(4); }
865static INLINE void Block_4_3(void) { Block_xx_3(4); }
866static INLINE void Block_4_4(void) { Block_xx_4(4); }
867static INLINE void Block_4_5(void) { Block_xx_5(4); }
868static INLINE void Block_4_6(word64 *W_X) { Block_xx_6(4); }
869static INLINE void Block_4_7(word64 *W_X) { Block_xx_7(6); }
870static INLINE void Block_4_8(word64 *W_X) { Block_xx_8(6); }
871static INLINE void Block_4_9(void) { Block_xx_9(6); }
872static INLINE void Block_4_10(void){ Block_xx_10(6); }
873static INLINE void Block_4_11(void){ Block_xx_11(6); }
874static INLINE void Block_4_12(word64 *W_X){ Block_xx_12(6); }
875
876static INLINE void Block_8_1(word64 *W_X) { Block_xx_1(8); }
877static INLINE void Block_8_2(word64 *W_X) { Block_xx_2(8); }
878static INLINE void Block_8_3(void) { Block_xx_3(8); }
879static INLINE void Block_8_4(void) { Block_xx_4(8); }
880static INLINE void Block_8_5(void) { Block_xx_5(8); }
881static INLINE void Block_8_6(word64 *W_X) { Block_xx_6(8); }
882static INLINE void Block_8_7(word64 *W_X) { Block_xx_7(10); }
883static INLINE void Block_8_8(word64 *W_X) { Block_xx_8(10); }
884static INLINE void Block_8_9(void) { Block_xx_9(10); }
885static INLINE void Block_8_10(void){ Block_xx_10(10); }
886static INLINE void Block_8_11(void){ Block_xx_11(10); }
887static INLINE void Block_8_12(word64 *W_X){ Block_xx_12(10); }
888
889static INLINE void Block_12_1(word64 *W_X) { Block_xx_1(12); }
890static INLINE void Block_12_2(word64 *W_X) { Block_xx_2(12); }
891static INLINE void Block_12_3(void) { Block_xx_3(12); }
892static INLINE void Block_12_4(void) { Block_xx_4(12); }
893static INLINE void Block_12_5(void) { Block_xx_5(12); }
894static INLINE void Block_12_6(word64 *W_X) { Block_xx_6(12); }
895static INLINE void Block_12_7(word64 *W_X) { Block_xx_7(14); }
896static INLINE void Block_12_8(word64 *W_X) { Block_xx_8(14); }
897static INLINE void Block_12_9(void) { Block_xx_9(14); }
898static INLINE void Block_12_10(void){ Block_xx_10(14); }
899static INLINE void Block_12_11(void){ Block_xx_11(14); }
900static INLINE void Block_12_12(word64 *W_X){ Block_xx_12(14); }
901
902#endif /* HAVE_INTEL_AVX1 */
903
904#if defined(HAVE_INTEL_AVX2)
905static const unsigned long mBYTE_FLIP_MASK_Y[] =
906 { 0x0001020304050607, 0x08090a0b0c0d0e0f, 0x0001020304050607, 0x08090a0b0c0d0e0f };
907
908#define W_from_buff_Y(buff)\
909 { /* X0..3(ymm9..12), W_X[0..15] = sha512->buffer[0.15]; */\
910 __asm__ volatile("vmovdqu %0, %%ymm8\n\t"::"m"(mBYTE_FLIP_MASK_Y[0]));\
911 __asm__ volatile("vmovdqu %0, %%ymm12\n\t"\
912 "vmovdqu %1, %%ymm4\n\t"\
913 "vpshufb %%ymm8, %%ymm12, %%ymm12\n\t"\
914 "vpshufb %%ymm8, %%ymm4, %%ymm4\n\t"\
915 :: "m"(buff[0]), "m"(buff[4]));\
916 __asm__ volatile("vmovdqu %0, %%ymm5\n\t"\
917 "vmovdqu %1, %%ymm6\n\t"\
918 "vpshufb %%ymm8, %%ymm5, %%ymm5\n\t"\
919 "vpshufb %%ymm8, %%ymm6, %%ymm6\n\t"\
920 :: "m"(buff[8]), "m"(buff[12]));\
921 }
922
923#if defined(DEBUG_YMM)
924 #define SAVE_REG_Y(i) __asm__ volatile("vmovdqu %%ymm"#i", %0 \n\t":"=m"(reg[i-4][0]):);
925 #define RECV_REG_Y(i) __asm__ volatile("vmovdqu %0, %%ymm"#i" \n\t"::"m"(reg[i-4][0]));
926
927 #define _DUMP_REG_Y(REG, name)\
928 { word64 buf[16];word64 reg[16][2];int k;\
929 SAVE_REG_Y(4); SAVE_REG_Y(5); SAVE_REG_Y(6); SAVE_REG_Y(7); \
930 SAVE_REG_Y(8); SAVE_REG_Y(9); SAVE_REG_Y(10); SAVE_REG_Y(11); SAVE_REG_Y(12);\
931 SAVE_REG_Y(13); SAVE_REG_Y(14); SAVE_REG_Y(15); \
932 __asm__ volatile("vmovdqu %%"#REG", %0 \n\t":"=m"(buf[0]):);\
933 printf(" "#name":\t"); for(k=0; k<4; k++) printf("%016lx.", (word64)buf[k]); printf("\n"); \
934 RECV_REG_Y(4); RECV_REG_Y(5); RECV_REG_Y(6); RECV_REG_Y(7); \
935 RECV_REG_Y(8); RECV_REG_Y(9); RECV_REG_Y(10); RECV_REG_Y(11); RECV_REG_Y(12); \
936 RECV_REG_Y(13); RECV_REG_Y(14); RECV_REG_Y(15);\
937 }
938
939 #define DUMP_REG_Y(REG) _DUMP_REG_Y(REG, #REG)
940 #define DUMP_REG2_Y(REG) _DUMP_REG_Y(REG, #REG)
941 #define PRINTF_Y(fmt, ...)
942#else
943 #define DUMP_REG_Y(REG)
944 #define DUMP_REG2_Y(REG)
945 #define PRINTF_Y(fmt, ...)
946#endif /* DEBUG_YMM */
947
948#define _MOVE_to_REGy(ymm, mem) __asm__ volatile("vmovdqu %0, %%"#ymm" "\
949 :: "m"(mem));
950#define _MOVE_to_MEMy(mem,i, ymm) __asm__ volatile("vmovdqu %%"#ymm", %0" \
951 : "=m"(mem[i]),"=m"(mem[i+1]),"=m"(mem[i+2]),"=m"(mem[i+3]):);
952#define _MOVE_128y(ymm0, ymm1, ymm2, map) __asm__ volatile("vperm2i128 $"\
953 #map", %%"#ymm2", %%"#ymm1", %%"#ymm0" "::);
954#define _S_TEMPy(dest, src, bits, temp) \
955 __asm__ volatile("vpsrlq $"#bits", %%"#src", %%"#dest"\n\tvpsllq $64-"#bits\
956 ", %%"#src", %%"#temp"\n\tvpor %%"#temp",%%"#dest", %%"#dest" "::);
957#define _AVX2_R(dest, src, bits) __asm__ volatile("vpsrlq $"#bits", %%"\
958 #src", %%"#dest" "::);
959#define _XORy(dest, src1, src2) __asm__ volatile("vpxor %%"#src1", %%"\
960 #src2", %%"#dest" "::);
961#define _ADDy(dest, src1, src2) __asm__ volatile("vpaddq %%"#src1", %%"\
962 #src2", %%"#dest" "::);
963#define _BLENDy(map, dest, src1, src2) __asm__ volatile("vpblendd $"#map", %%"\
964 #src1", %%"#src2", %%"#dest" "::);
965#define _BLENDQy(map, dest, src1, src2) __asm__ volatile("vblendpd $"#map", %%"\
966 #src1", %%"#src2", %%"#dest" "::);
967#define _PERMQy(map, dest, src) __asm__ volatile("vpermq $"#map", %%"\
968 #src", %%"#dest" "::);
969
970#define MOVE_to_REGy(ymm, mem) _MOVE_to_REGy(ymm, mem)
971#define MOVE_to_MEMy(mem, i, ymm) _MOVE_to_MEMy(mem, i, ymm)
972
973#define MOVE_128y(ymm0, ymm1, ymm2, map) _MOVE_128y(ymm0, ymm1, ymm2, map)
974#define XORy(dest, src1, src2) _XORy(dest, src1, src2)
975#define ADDy(dest, src1, src2) _ADDy(dest, src1, src2)
976#define BLENDy(map, dest, src1, src2) _BLENDy(map, dest, src1, src2)
977#define BLENDQy(map, dest, src1, src2) _BLENDQy(map, dest, src1, src2)
978#define PERMQy(map, dest, src) _PERMQy(map, dest, src)
979
980
981#define S_TMPy(dest, src, bits, temp) _S_TEMPy(dest, src, bits, temp);
982#define AVX2_S(dest, src, bits) S_TMPy(dest, src, bits, S_TEMPy)
983#define AVX2_R(dest, src, bits) _AVX2_R(dest, src, bits)
984
985
986#define FEEDBACK1_to_W_I_2(w_i_2, w_i) MOVE_128y(YMM_TEMP0, w_i, w_i, 0x08);\
987 BLENDy(0xf0, w_i_2, YMM_TEMP0, w_i_2);
988
989#define MOVE_W_to_W_I_15(w_i_15, w_0, w_4) BLENDQy(0x1, w_i_15, w_4, w_0);\
990 PERMQy(0x39, w_i_15, w_i_15);
991#define MOVE_W_to_W_I_7(w_i_7, w_8, w_12) BLENDQy(0x1, w_i_7, w_12, w_8);\
992 PERMQy(0x39, w_i_7, w_i_7);
993#define MOVE_W_to_W_I_2(w_i_2, w_12) BLENDQy(0xc, w_i_2, w_12, w_i_2);\
994 PERMQy(0x0e, w_i_2, w_i_2);
995
996
997#define W_I_16y ymm8
998#define W_I_15y ymm9
999#define W_I_7y ymm10
1000#define W_I_2y ymm11
1001#define W_Iy ymm12
1002#define G_TEMPy ymm13
1003#define S_TEMPy ymm14
1004#define YMM_TEMP0 ymm15
1005#define YMM_TEMP0x xmm15
1006#define W_I_TEMPy ymm7
1007#define W_K_TEMPy ymm15
1008#define W_K_TEMPx xmm15
1009#define W_0y ymm12
1010#define W_4y ymm4
1011#define W_8y ymm5
1012#define W_12y ymm6
1013
1014
1015#define MOVE_15_to_16(w_i_16, w_i_15, w_i_7)\
1016 __asm__ volatile("vperm2i128 $0x01, %%"#w_i_15", %%"#w_i_15", %%"#w_i_15" "::);\
1017 __asm__ volatile("vpblendd $0x08, %%"#w_i_15", %%"#w_i_7", %%"#w_i_16" "::);\
1018 __asm__ volatile("vperm2i128 $0x01, %%"#w_i_7", %%"#w_i_7", %%"#w_i_15" "::);\
1019 __asm__ volatile("vpblendd $0x80, %%"#w_i_15", %%"#w_i_16", %%"#w_i_16" "::);\
1020 __asm__ volatile("vpshufd $0x93, %%"#w_i_16", %%"#w_i_16" "::);\
1021
1022#define MOVE_7_to_15(w_i_15, w_i_7)\
1023 __asm__ volatile("vmovdqu %%"#w_i_7", %%"#w_i_15" "::);\
1024
1025#define MOVE_I_to_7(w_i_7, w_i)\
1026 __asm__ volatile("vperm2i128 $0x01, %%"#w_i", %%"#w_i", %%"#w_i_7" "::);\
1027 __asm__ volatile("vpblendd $0x01, %%"#w_i_7", %%"#w_i", %%"#w_i_7" "::);\
1028 __asm__ volatile("vpshufd $0x39, %%"#w_i_7", %%"#w_i_7" "::);\
1029
1030#define MOVE_I_to_2(w_i_2, w_i)\
1031 __asm__ volatile("vperm2i128 $0x01, %%"#w_i", %%"#w_i", %%"#w_i_2" "::);\
1032 __asm__ volatile("vpshufd $0x0e, %%"#w_i_2", %%"#w_i_2" "::);\
1033
1034#endif /* HAVE_INTEL_AVX2 */
1035
1036
1037/*** Transform Body ***/
1038#if defined(HAVE_INTEL_AVX1)
1039static int Transform_AVX1(wc_Sha512* sha512)
1040{
1041 const word64* K = K512;
1042 word64 W_X[16+4] = {0};
1043 word32 j;
1044 word64 T[8];
1045
1046 /* Copy digest to working vars */
1047 XMEMCPY(T, sha512->digest, sizeof(T));
1048
1049 W_from_buff(W_X, sha512->buffer);
1050 for (j = 0; j < 80; j += 16) {
1051 Rx_1( 0); Block_0_1(W_X); Rx_2( 0); Block_0_2(W_X); Rx_3( 0); Block_0_3();
1052 Rx_1( 1); Block_0_4(); Rx_2( 1); Block_0_5(); Rx_3( 1); Block_0_6(W_X);
1053 Rx_1( 2); Block_0_7(W_X); Rx_2( 2); Block_0_8(W_X); Rx_3( 2); Block_0_9();
1054 Rx_1( 3); Block_0_10();Rx_2( 3); Block_0_11();Rx_3( 3); Block_0_12(W_X);
1055
1056 Rx_1( 4); Block_4_1(W_X); Rx_2( 4); Block_4_2(W_X); Rx_3( 4); Block_4_3();
1057 Rx_1( 5); Block_4_4(); Rx_2( 5); Block_4_5(); Rx_3( 5); Block_4_6(W_X);
1058 Rx_1( 6); Block_4_7(W_X); Rx_2( 6); Block_4_8(W_X); Rx_3( 6); Block_4_9();
1059 Rx_1( 7); Block_4_10();Rx_2( 7); Block_4_11();Rx_3( 7); Block_4_12(W_X);
1060
1061 Rx_1( 8); Block_8_1(W_X); Rx_2( 8); Block_8_2(W_X); Rx_3( 8); Block_8_3();
1062 Rx_1( 9); Block_8_4(); Rx_2( 9); Block_8_5(); Rx_3( 9); Block_8_6(W_X);
1063 Rx_1(10); Block_8_7(W_X); Rx_2(10); Block_8_8(W_X); Rx_3(10); Block_8_9();
1064 Rx_1(11); Block_8_10();Rx_2(11); Block_8_11();Rx_3(11); Block_8_12(W_X);
1065
1066 Rx_1(12); Block_12_1(W_X); Rx_2(12); Block_12_2(W_X); Rx_3(12); Block_12_3();
1067 Rx_1(13); Block_12_4(); Rx_2(13); Block_12_5(); Rx_3(13); Block_12_6(W_X);
1068 Rx_1(14); Block_12_7(W_X); Rx_2(14); Block_12_8(W_X); Rx_3(14); Block_12_9();
1069 Rx_1(15); Block_12_10();Rx_2(15); Block_12_11();Rx_3(15); Block_12_12(W_X);
1070 }
1071
1072 /* Add the working vars back into digest */
1073 sha512->digest[0] += a(0);
1074 sha512->digest[1] += b(0);
1075 sha512->digest[2] += c(0);
1076 sha512->digest[3] += d(0);
1077 sha512->digest[4] += e(0);
1078 sha512->digest[5] += f(0);
1079 sha512->digest[6] += g(0);
1080 sha512->digest[7] += h(0);
1081
1082 /* Wipe variables */
1083#if !defined(HAVE_INTEL_AVX1) && !defined(HAVE_INTEL_AVX2)
1084 XMEMSET(W_X, 0, sizeof(word64) * 16);
1085#endif
1086 XMEMSET(T, 0, sizeof(T));
1087
1088 return 0;
1089}
1090#endif /* HAVE_INTEL_AVX1 */
1091
1092#if defined(HAVE_INTEL_AVX2) && defined(HAVE_INTEL_AVX1) && defined(HAVE_INTEL_RORX)
1093static int Transform_AVX1_RORX(wc_Sha512* sha512)
1094{
1095 const word64* K = K512;
1096 word64 W_X[16+4] = {0};
1097 word32 j;
1098 word64 T[8];
1099
1100 /* Copy digest to working vars */
1101 XMEMCPY(T, sha512->digest, sizeof(T));
1102
1103 W_from_buff(W_X, sha512->buffer);
1104 for (j = 0; j < 80; j += 16) {
1105 Rx_RORX_1( 0); Block_0_1(W_X); Rx_RORX_2( 0); Block_0_2(W_X);
1106 Rx_RORX_3( 0); Block_0_3();
1107 Rx_RORX_1( 1); Block_0_4(); Rx_RORX_2( 1); Block_0_5();
1108 Rx_RORX_3( 1); Block_0_6(W_X);
1109 Rx_RORX_1( 2); Block_0_7(W_X); Rx_RORX_2( 2); Block_0_8(W_X);
1110 Rx_RORX_3( 2); Block_0_9();
1111 Rx_RORX_1( 3); Block_0_10();Rx_RORX_2( 3); Block_0_11();
1112 Rx_RORX_3( 3); Block_0_12(W_X);
1113
1114 Rx_RORX_1( 4); Block_4_1(W_X); Rx_RORX_2( 4); Block_4_2(W_X);
1115 Rx_RORX_3( 4); Block_4_3();
1116 Rx_RORX_1( 5); Block_4_4(); Rx_RORX_2( 5); Block_4_5();
1117 Rx_RORX_3( 5); Block_4_6(W_X);
1118 Rx_RORX_1( 6); Block_4_7(W_X); Rx_RORX_2( 6); Block_4_8(W_X);
1119 Rx_RORX_3( 6); Block_4_9();
1120 Rx_RORX_1( 7); Block_4_10();Rx_RORX_2( 7); Block_4_11();
1121 Rx_RORX_3( 7); Block_4_12(W_X);
1122
1123 Rx_RORX_1( 8); Block_8_1(W_X); Rx_RORX_2( 8); Block_8_2(W_X);
1124 Rx_RORX_3( 8); Block_8_3();
1125 Rx_RORX_1( 9); Block_8_4(); Rx_RORX_2( 9); Block_8_5();
1126 Rx_RORX_3( 9); Block_8_6(W_X);
1127 Rx_RORX_1(10); Block_8_7(W_X); Rx_RORX_2(10); Block_8_8(W_X);
1128 Rx_RORX_3(10); Block_8_9();
1129 Rx_RORX_1(11); Block_8_10();Rx_RORX_2(11); Block_8_11();
1130 Rx_RORX_3(11); Block_8_12(W_X);
1131
1132 Rx_RORX_1(12); Block_12_1(W_X); Rx_RORX_2(12); Block_12_2(W_X);
1133 Rx_RORX_3(12); Block_12_3();
1134 Rx_RORX_1(13); Block_12_4(); Rx_RORX_2(13); Block_12_5();
1135 Rx_RORX_3(13); Block_12_6(W_X);
1136 Rx_RORX_1(14); Block_12_7(W_X); Rx_RORX_2(14); Block_12_8(W_X);
1137 Rx_RORX_3(14); Block_12_9();
1138 Rx_RORX_1(15); Block_12_10();Rx_RORX_2(15); Block_12_11();
1139 Rx_RORX_3(15); Block_12_12(W_X);
1140 }
1141
1142 /* Add the working vars back into digest */
1143 sha512->digest[0] += a(0);
1144 sha512->digest[1] += b(0);
1145 sha512->digest[2] += c(0);
1146 sha512->digest[3] += d(0);
1147 sha512->digest[4] += e(0);
1148 sha512->digest[5] += f(0);
1149 sha512->digest[6] += g(0);
1150 sha512->digest[7] += h(0);
1151
1152 /* Wipe variables */
1153#if !defined(HAVE_INTEL_AVX1)&&!defined(HAVE_INTEL_AVX2)
1154 XMEMSET(W_X, 0, sizeof(word64) * 16);
1155#endif
1156 XMEMSET(T, 0, sizeof(T));
1157
1158 return 0;
1159}
1160#endif /* HAVE_INTEL_AVX2 && HAVE_INTEL_AVX1 && HAVE_INTEL_RORX */
1161
1162#if defined(HAVE_INTEL_AVX2)
1163
1164#define s0_1y(dest, src) AVX2_S(dest, src, 1);
1165#define s0_2y(dest, src) AVX2_S(G_TEMPy, src, 8); XORy(dest, G_TEMPy, dest);
1166#define s0_3y(dest, src) AVX2_R(G_TEMPy, src, 7); XORy(dest, G_TEMPy, dest);
1167
1168#define s1_1y(dest, src) AVX2_S(dest, src, 19);
1169#define s1_2y(dest, src) AVX2_S(G_TEMPy, src, 61); XORy(dest, G_TEMPy, dest);
1170#define s1_3y(dest, src) AVX2_R(G_TEMPy, src, 6); XORy(dest, G_TEMPy, dest);
1171
1172#define s0_y(dest, src) s0_1y(dest, src); s0_2y(dest, src); s0_3y(dest, src)
1173#define s1_y(dest, src) s1_1y(dest, src); s1_2y(dest, src); s1_3y(dest, src)
1174
1175
1176#define Block_Y_xx_1(i, w_0, w_4, w_8, w_12)\
1177 MOVE_W_to_W_I_15(W_I_15y, w_0, w_4);\
1178 MOVE_W_to_W_I_7 (W_I_7y, w_8, w_12);\
1179 MOVE_W_to_W_I_2 (W_I_2y, w_12);\
1180
1181#define Block_Y_xx_2(i, w_0, w_4, w_8, w_12)\
1182 s0_1y (YMM_TEMP0, W_I_15y);\
1183
1184#define Block_Y_xx_3(i, w_0, w_4, w_8, w_12)\
1185 s0_2y (YMM_TEMP0, W_I_15y);\
1186
1187#define Block_Y_xx_4(i, w_0, w_4, w_8, w_12)\
1188 s0_3y (YMM_TEMP0, W_I_15y);\
1189
1190#define Block_Y_xx_5(i, w_0, w_4, w_8, w_12)\
1191 ADDy(W_I_TEMPy, w_0, YMM_TEMP0);\
1192
1193#define Block_Y_xx_6(i, w_0, w_4, w_8, w_12)\
1194 ADDy(W_I_TEMPy, W_I_TEMPy, W_I_7y);\
1195 s1_1y (YMM_TEMP0, W_I_2y);\
1196
1197#define Block_Y_xx_7(i, w_0, w_4, w_8, w_12)\
1198 s1_2y (YMM_TEMP0, W_I_2y);\
1199
1200#define Block_Y_xx_8(i, w_0, w_4, w_8, w_12)\
1201 s1_3y (YMM_TEMP0, W_I_2y);\
1202 ADDy(w_0, W_I_TEMPy, YMM_TEMP0);\
1203
1204#define Block_Y_xx_9(i, w_0, w_4, w_8, w_12)\
1205 FEEDBACK1_to_W_I_2(W_I_2y, w_0);\
1206
1207#define Block_Y_xx_10(i, w_0, w_4, w_8, w_12) \
1208 s1_1y (YMM_TEMP0, W_I_2y);\
1209
1210#define Block_Y_xx_11(i, w_0, w_4, w_8, w_12) \
1211 s1_2y (YMM_TEMP0, W_I_2y);\
1212
1213#define Block_Y_xx_12(i, w_0, w_4, w_8, w_12)\
1214 s1_3y (YMM_TEMP0, W_I_2y);\
1215 ADDy(w_0, W_I_TEMPy, YMM_TEMP0);\
1216 MOVE_to_MEMy(w,0, w_4);\
1217
1218
1219static INLINE void Block_Y_0_1(void) { Block_Y_xx_1(0, W_0y, W_4y, W_8y, W_12y); }
1220static INLINE void Block_Y_0_2(void) { Block_Y_xx_2(0, W_0y, W_4y, W_8y, W_12y); }
1221static INLINE void Block_Y_0_3(void) { Block_Y_xx_3(0, W_0y, W_4y, W_8y, W_12y); }
1222static INLINE void Block_Y_0_4(void) { Block_Y_xx_4(0, W_0y, W_4y, W_8y, W_12y); }
1223static INLINE void Block_Y_0_5(void) { Block_Y_xx_5(0, W_0y, W_4y, W_8y, W_12y); }
1224static INLINE void Block_Y_0_6(void) { Block_Y_xx_6(0, W_0y, W_4y, W_8y, W_12y); }
1225static INLINE void Block_Y_0_7(void) { Block_Y_xx_7(0, W_0y, W_4y, W_8y, W_12y); }
1226static INLINE void Block_Y_0_8(void) { Block_Y_xx_8(0, W_0y, W_4y, W_8y, W_12y); }
1227static INLINE void Block_Y_0_9(void) { Block_Y_xx_9(0, W_0y, W_4y, W_8y, W_12y); }
1228static INLINE void Block_Y_0_10(void){ Block_Y_xx_10(0, W_0y, W_4y, W_8y, W_12y); }
1229static INLINE void Block_Y_0_11(void){ Block_Y_xx_11(0, W_0y, W_4y, W_8y, W_12y); }
1230static INLINE void Block_Y_0_12(word64 *w){ Block_Y_xx_12(0, W_0y, W_4y, W_8y, W_12y); }
1231
1232static INLINE void Block_Y_4_1(void) { Block_Y_xx_1(4, W_4y, W_8y, W_12y, W_0y); }
1233static INLINE void Block_Y_4_2(void) { Block_Y_xx_2(4, W_4y, W_8y, W_12y, W_0y); }
1234static INLINE void Block_Y_4_3(void) { Block_Y_xx_3(4, W_4y, W_8y, W_12y, W_0y); }
1235static INLINE void Block_Y_4_4(void) { Block_Y_xx_4(4, W_4y, W_8y, W_12y, W_0y); }
1236static INLINE void Block_Y_4_5(void) { Block_Y_xx_5(4, W_4y, W_8y, W_12y, W_0y); }
1237static INLINE void Block_Y_4_6(void) { Block_Y_xx_6(4, W_4y, W_8y, W_12y, W_0y); }
1238static INLINE void Block_Y_4_7(void) { Block_Y_xx_7(4, W_4y, W_8y, W_12y, W_0y); }
1239static INLINE void Block_Y_4_8(void) { Block_Y_xx_8(4, W_4y, W_8y, W_12y, W_0y); }
1240static INLINE void Block_Y_4_9(void) { Block_Y_xx_9(4, W_4y, W_8y, W_12y, W_0y); }
1241static INLINE void Block_Y_4_10(void) { Block_Y_xx_10(4, W_4y, W_8y, W_12y, W_0y); }
1242static INLINE void Block_Y_4_11(void) { Block_Y_xx_11(4, W_4y, W_8y, W_12y, W_0y); }
1243static INLINE void Block_Y_4_12(word64 *w) { Block_Y_xx_12(4, W_4y, W_8y, W_12y, W_0y); }
1244
1245static INLINE void Block_Y_8_1(void) { Block_Y_xx_1(8, W_8y, W_12y, W_0y, W_4y); }
1246static INLINE void Block_Y_8_2(void) { Block_Y_xx_2(8, W_8y, W_12y, W_0y, W_4y); }
1247static INLINE void Block_Y_8_3(void) { Block_Y_xx_3(8, W_8y, W_12y, W_0y, W_4y); }
1248static INLINE void Block_Y_8_4(void) { Block_Y_xx_4(8, W_8y, W_12y, W_0y, W_4y); }
1249static INLINE void Block_Y_8_5(void) { Block_Y_xx_5(8, W_8y, W_12y, W_0y, W_4y); }
1250static INLINE void Block_Y_8_6(void) { Block_Y_xx_6(8, W_8y, W_12y, W_0y, W_4y); }
1251static INLINE void Block_Y_8_7(void) { Block_Y_xx_7(8, W_8y, W_12y, W_0y, W_4y); }
1252static INLINE void Block_Y_8_8(void) { Block_Y_xx_8(8, W_8y, W_12y, W_0y, W_4y); }
1253static INLINE void Block_Y_8_9(void) { Block_Y_xx_9(8, W_8y, W_12y, W_0y, W_4y); }
1254static INLINE void Block_Y_8_10(void) { Block_Y_xx_10(8, W_8y, W_12y, W_0y, W_4y); }
1255static INLINE void Block_Y_8_11(void) { Block_Y_xx_11(8, W_8y, W_12y, W_0y, W_4y); }
1256static INLINE void Block_Y_8_12(word64 *w) { Block_Y_xx_12(8, W_8y, W_12y, W_0y, W_4y); }
1257
1258static INLINE void Block_Y_12_1(void) { Block_Y_xx_1(12, W_12y, W_0y, W_4y, W_8y); }
1259static INLINE void Block_Y_12_2(void) { Block_Y_xx_2(12, W_12y, W_0y, W_4y, W_8y); }
1260static INLINE void Block_Y_12_3(void) { Block_Y_xx_3(12, W_12y, W_0y, W_4y, W_8y); }
1261static INLINE void Block_Y_12_4(void) { Block_Y_xx_4(12, W_12y, W_0y, W_4y, W_8y); }
1262static INLINE void Block_Y_12_5(void) { Block_Y_xx_5(12, W_12y, W_0y, W_4y, W_8y); }
1263static INLINE void Block_Y_12_6(void) { Block_Y_xx_6(12, W_12y, W_0y, W_4y, W_8y); }
1264static INLINE void Block_Y_12_7(void) { Block_Y_xx_7(12, W_12y, W_0y, W_4y, W_8y); }
1265static INLINE void Block_Y_12_8(void) { Block_Y_xx_8(12, W_12y, W_0y, W_4y, W_8y); }
1266static INLINE void Block_Y_12_9(void) { Block_Y_xx_9(12, W_12y, W_0y, W_4y, W_8y); }
1267static INLINE void Block_Y_12_10(void) { Block_Y_xx_10(12, W_12y, W_0y, W_4y, W_8y); }
1268static INLINE void Block_Y_12_11(void) { Block_Y_xx_11(12, W_12y, W_0y, W_4y, W_8y); }
1269static INLINE void Block_Y_12_12(word64 *w) { Block_Y_xx_12(12, W_12y, W_0y, W_4y, W_8y); }
1270
1271
1272static int Transform_AVX2(wc_Sha512* sha512)
1273{
1274 const word64* K = K512;
1275 word64 w[4];
1276 word32 j;
1277 word64 T[8];
1278
1279 /* Copy digest to working vars */
1280 XMEMCPY(T, sha512->digest, sizeof(T));
1281
1282 W_from_buff_Y(sha512->buffer);
1283 MOVE_to_MEMy(w,0, W_0y);
1284 for (j = 0; j < 80; j += 16) {
1285 Ry_1( 0, w[0]); Block_Y_0_1(); Ry_2( 0, w[0]); Block_Y_0_2();
1286 Ry_3( 0, w[0]); Block_Y_0_3();
1287 Ry_1( 1, w[1]); Block_Y_0_4(); Ry_2( 1, w[1]); Block_Y_0_5();
1288 Ry_3( 1, w[1]); Block_Y_0_6();
1289 Ry_1( 2, w[2]); Block_Y_0_7(); Ry_2( 2, w[2]); Block_Y_0_8();
1290 Ry_3( 2, w[2]); Block_Y_0_9();
1291 Ry_1( 3, w[3]); Block_Y_0_10();Ry_2( 3, w[3]); Block_Y_0_11();
1292 Ry_3( 3, w[3]); Block_Y_0_12(w);
1293
1294 Ry_1( 4, w[0]); Block_Y_4_1(); Ry_2( 4, w[0]); Block_Y_4_2();
1295 Ry_3( 4, w[0]); Block_Y_4_3();
1296 Ry_1( 5, w[1]); Block_Y_4_4(); Ry_2( 5, w[1]); Block_Y_4_5();
1297 Ry_3( 5, w[1]); Block_Y_4_6();
1298 Ry_1( 6, w[2]); Block_Y_4_7(); Ry_2( 6, w[2]); Block_Y_4_8();
1299 Ry_3( 6, w[2]); Block_Y_4_9();
1300 Ry_1( 7, w[3]); Block_Y_4_10(); Ry_2( 7, w[3]);Block_Y_4_11();
1301 Ry_3( 7, w[3]);Block_Y_4_12(w);
1302
1303 Ry_1( 8, w[0]); Block_Y_8_1(); Ry_2( 8, w[0]); Block_Y_8_2();
1304 Ry_3( 8, w[0]); Block_Y_8_3();
1305 Ry_1( 9, w[1]); Block_Y_8_4(); Ry_2( 9, w[1]); Block_Y_8_5();
1306 Ry_3( 9, w[1]); Block_Y_8_6();
1307 Ry_1(10, w[2]); Block_Y_8_7(); Ry_2(10, w[2]); Block_Y_8_8();
1308 Ry_3(10, w[2]); Block_Y_8_9();
1309 Ry_1(11, w[3]); Block_Y_8_10();Ry_2(11, w[3]); Block_Y_8_11();
1310 Ry_3(11, w[3]); Block_Y_8_12(w);
1311
1312 Ry_1(12, w[0]); Block_Y_12_1(); Ry_2(12, w[0]); Block_Y_12_2();
1313 Ry_3(12, w[0]); Block_Y_12_3();
1314 Ry_1(13, w[1]); Block_Y_12_4(); Ry_2(13, w[1]); Block_Y_12_5();
1315 Ry_3(13, w[1]); Block_Y_12_6();
1316 Ry_1(14, w[2]); Block_Y_12_7(); Ry_2(14, w[2]); Block_Y_12_8();
1317 Ry_3(14, w[2]); Block_Y_12_9();
1318 Ry_1(15, w[3]); Block_Y_12_10();Ry_2(15, w[3]); Block_Y_12_11();
1319 Ry_3(15, w[3]);Block_Y_12_12(w);
1320 }
1321
1322 /* Add the working vars back into digest */
1323 sha512->digest[0] += a(0);
1324 sha512->digest[1] += b(0);
1325 sha512->digest[2] += c(0);
1326 sha512->digest[3] += d(0);
1327 sha512->digest[4] += e(0);
1328 sha512->digest[5] += f(0);
1329 sha512->digest[6] += g(0);
1330 sha512->digest[7] += h(0);
1331
1332 /* Wipe variables */
1333#if !defined(HAVE_INTEL_AVX1) && !defined(HAVE_INTEL_AVX2)
1334 XMEMSET(W, 0, sizeof(word64) * 16);
1335#endif
1336 XMEMSET(T, 0, sizeof(T));
1337
1338 return 0;
1339}
1340#endif /* HAVE_INTEL_AVX2 */
1341
1342
1343
1344/* -------------------------------------------------------------------------- */
1345/* SHA384 */
1346/* -------------------------------------------------------------------------- */
1347#ifdef WOLFSSL_SHA384
1348static int InitSha384(wc_Sha384* sha384)
1349{
1350 if (sha384 == NULL) {
1351 return BAD_FUNC_ARG;
1352 }
1353
1354 sha384->digest[0] = W64LIT(0xcbbb9d5dc1059ed8);
1355 sha384->digest[1] = W64LIT(0x629a292a367cd507);
1356 sha384->digest[2] = W64LIT(0x9159015a3070dd17);
1357 sha384->digest[3] = W64LIT(0x152fecd8f70e5939);
1358 sha384->digest[4] = W64LIT(0x67332667ffc00b31);
1359 sha384->digest[5] = W64LIT(0x8eb44a8768581511);
1360 sha384->digest[6] = W64LIT(0xdb0c2e0d64f98fa7);
1361 sha384->digest[7] = W64LIT(0x47b5481dbefa4fa4);
1362
1363 sha384->buffLen = 0;
1364 sha384->loLen = 0;
1365 sha384->hiLen = 0;
1366
1367 return 0;
1368}
1369
1370int wc_Sha384Update(wc_Sha384* sha384, const byte* data, word32 len)
1371{
1372 if (sha384 == NULL || (data == NULL && len > 0)) {
1373 return BAD_FUNC_ARG;
1374 }
1375
1376#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA384)
1377 if (sha384->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA384) {
1378 #if defined(HAVE_INTEL_QA)
1379 return IntelQaSymSha384(&sha384->asyncDev, NULL, data, len);
1380 #endif
1381 }
1382#endif /* WOLFSSL_ASYNC_CRYPT */
1383
1384 return Sha512Update((wc_Sha512*)sha384, data, len);
1385}
1386
1387
1388int wc_Sha384Final(wc_Sha384* sha384, byte* hash)
1389{
1390 int ret;
1391
1392 if (sha384 == NULL || hash == NULL) {
1393 return BAD_FUNC_ARG;
1394 }
1395
1396#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA384)
1397 if (sha384->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA384) {
1398 #if defined(HAVE_INTEL_QA)
1399 return IntelQaSymSha384(&sha384->asyncDev, hash, NULL,
1400 WC_SHA384_DIGEST_SIZE);
1401 #endif
1402 }
1403#endif /* WOLFSSL_ASYNC_CRYPT */
1404
1405 ret = Sha512Final((wc_Sha512*)sha384);
1406 if (ret != 0)
1407 return ret;
1408
1409 XMEMCPY(hash, sha384->digest, WC_SHA384_DIGEST_SIZE);
1410
1411 return InitSha384(sha384); /* reset state */
1412}
1413
1414
1415/* Hardware Acceleration */
1416#if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
1417 int wc_InitSha384_ex(wc_Sha384* sha384, void* heap, int devId)
1418 {
1419 int ret = InitSha384(sha384);
1420
1421 (void)heap;
1422 (void)devId;
1423
1424 Sha512_SetTransform();
1425
1426 return ret;
1427 }
1428#else
1429int wc_InitSha384_ex(wc_Sha384* sha384, void* heap, int devId)
1430{
1431 int ret;
1432
1433 if (sha384 == NULL) {
1434 return BAD_FUNC_ARG;
1435 }
1436
1437 sha384->heap = heap;
1438 ret = InitSha384(sha384);
1439 if (ret != 0)
1440 return ret;
1441
1442#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA384)
1443 ret = wolfAsync_DevCtxInit(&sha384->asyncDev, WOLFSSL_ASYNC_MARKER_SHA384,
1444 sha384->heap, devId);
1445#else
1446 (void)devId;
1447#endif /* WOLFSSL_ASYNC_CRYPT */
1448
1449 return ret;
1450}
1451#endif
1452
1453int wc_InitSha384(wc_Sha384* sha384)
1454{
1455 return wc_InitSha384_ex(sha384, NULL, INVALID_DEVID);
1456}
1457
1458void wc_Sha384Free(wc_Sha384* sha384)
1459{
1460 if (sha384 == NULL)
1461 return;
1462
1463#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA384)
1464 wolfAsync_DevCtxFree(&sha384->asyncDev, WOLFSSL_ASYNC_MARKER_SHA384);
1465#endif /* WOLFSSL_ASYNC_CRYPT */
1466}
1467
1468#endif /* WOLFSSL_SHA384 */
1469
1470#endif /* HAVE_FIPS */
1471
1472
1473int wc_Sha512GetHash(wc_Sha512* sha512, byte* hash)
1474{
1475 int ret;
1476 wc_Sha512 tmpSha512;
1477
1478 if (sha512 == NULL || hash == NULL)
1479 return BAD_FUNC_ARG;
1480
1481 ret = wc_Sha512Copy(sha512, &tmpSha512);
1482 if (ret == 0) {
1483 ret = wc_Sha512Final(&tmpSha512, hash);
1484 }
1485 return ret;
1486}
1487
1488int wc_Sha512Copy(wc_Sha512* src, wc_Sha512* dst)
1489{
1490 int ret = 0;
1491
1492 if (src == NULL || dst == NULL)
1493 return BAD_FUNC_ARG;
1494
1495 XMEMCPY(dst, src, sizeof(wc_Sha512));
1496
1497#ifdef WOLFSSL_ASYNC_CRYPT
1498 ret = wolfAsync_DevCopy(&src->asyncDev, &dst->asyncDev);
1499#endif
1500
1501 return ret;
1502}
1503
1504#ifdef WOLFSSL_SHA384
1505int wc_Sha384GetHash(wc_Sha384* sha384, byte* hash)
1506{
1507 int ret;
1508 wc_Sha384 tmpSha384;
1509
1510 if (sha384 == NULL || hash == NULL)
1511 return BAD_FUNC_ARG;
1512
1513 ret = wc_Sha384Copy(sha384, &tmpSha384);
1514 if (ret == 0) {
1515 ret = wc_Sha384Final(&tmpSha384, hash);
1516 }
1517 return ret;
1518}
1519int wc_Sha384Copy(wc_Sha384* src, wc_Sha384* dst)
1520{
1521 int ret = 0;
1522
1523 if (src == NULL || dst == NULL)
1524 return BAD_FUNC_ARG;
1525
1526 XMEMCPY(dst, src, sizeof(wc_Sha384));
1527
1528#ifdef WOLFSSL_ASYNC_CRYPT
1529 ret = wolfAsync_DevCopy(&src->asyncDev, &dst->asyncDev);
1530#endif
1531
1532 return ret;
1533}
1534#endif /* WOLFSSL_SHA384 */
1535
1536#endif /* WOLFSSL_SHA512 */
Note: See TracBrowser for help on using the repository browser.