/* * Copyright 2015-2016 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy * in the file LICENSE in the source distribution or at * https://www.openssl.org/source/license.html */ /* Adapted from the public domain code by D. Bernstein from SUPERCOP. */ #include #include "internal/chacha.h" typedef unsigned int u32; typedef unsigned char u8; typedef union { u32 u[16]; u8 c[64]; } chacha_buf; # define ROTATE(v, n) (((v) << (n)) | ((v) >> (32 - (n)))) # define U32TO8_LITTLE(p, v) do { \ (p)[0] = (u8)(v >> 0); \ (p)[1] = (u8)(v >> 8); \ (p)[2] = (u8)(v >> 16); \ (p)[3] = (u8)(v >> 24); \ } while(0) /* QUARTERROUND updates a, b, c, d with a ChaCha "quarter" round. */ # define QUARTERROUND(a,b,c,d) ( \ x[a] += x[b], x[d] = ROTATE((x[d] ^ x[a]),16), \ x[c] += x[d], x[b] = ROTATE((x[b] ^ x[c]),12), \ x[a] += x[b], x[d] = ROTATE((x[d] ^ x[a]), 8), \ x[c] += x[d], x[b] = ROTATE((x[b] ^ x[c]), 7) ) /* chacha_core performs 20 rounds of ChaCha on the input words in * |input| and writes the 64 output bytes to |output|. */ static void chacha20_core(chacha_buf *output, const u32 input[16]) { u32 x[16]; int i; const union { long one; char little; } is_endian = { 1 }; memcpy(x, input, sizeof(x)); for (i = 20; i > 0; i -= 2) { QUARTERROUND(0, 4, 8, 12); QUARTERROUND(1, 5, 9, 13); QUARTERROUND(2, 6, 10, 14); QUARTERROUND(3, 7, 11, 15); QUARTERROUND(0, 5, 10, 15); QUARTERROUND(1, 6, 11, 12); QUARTERROUND(2, 7, 8, 13); QUARTERROUND(3, 4, 9, 14); } if (is_endian.little) { for (i = 0; i < 16; ++i) output->u[i] = x[i] + input[i]; } else { for (i = 0; i < 16; ++i) U32TO8_LITTLE(output->c + 4 * i, (x[i] + input[i])); } } void ChaCha20_ctr32(unsigned char *out, const unsigned char *inp, size_t len, const unsigned int key[8], const unsigned int counter[4]) { u32 input[16]; chacha_buf buf; size_t todo, i; /* sigma constant "expand 32-byte k" in little-endian encoding */ input[0] = ((u32)'e') | ((u32)'x'<<8) | ((u32)'p'<<16) | ((u32)'a'<<24); input[1] = ((u32)'n') | ((u32)'d'<<8) | ((u32)' '<<16) | ((u32)'3'<<24); input[2] = ((u32)'2') | ((u32)'-'<<8) | ((u32)'b'<<16) | ((u32)'y'<<24); input[3] = ((u32)'t') | ((u32)'e'<<8) | ((u32)' '<<16) | ((u32)'k'<<24); input[4] = key[0]; input[5] = key[1]; input[6] = key[2]; input[7] = key[3]; input[8] = key[4]; input[9] = key[5]; input[10] = key[6]; input[11] = key[7]; input[12] = counter[0]; input[13] = counter[1]; input[14] = counter[2]; input[15] = counter[3]; while (len > 0) { todo = sizeof(buf); if (len < todo) todo = len; chacha20_core(&buf, input); for (i = 0; i < todo; i++) out[i] = inp[i] ^ buf.c[i]; out += todo; inp += todo; len -= todo; /* * Advance 32-bit counter. Note that as subroutine is so to * say nonce-agnostic, this limited counter width doesn't * prevent caller from implementing wider counter. It would * simply take two calls split on counter overflow... */ input[12]++; } }