switch arc4random to chacha

Adrian Chadd adrian at freebsd.org
Fri May 30 02:54:22 UTC 2014


Hi!

How about running this via the security team here?

Start with Mark Murray (markm at freebsd.org)

I'm sure they'll at least want you to fire this up in a VM and test. :P




-a


On 29 May 2014 18:04, Ted Unangst <tedu at tedunangst.com> wrote:
> This syncs libc arc4random.c with OpenBSD, mostly to change the
> implementation to ChaCha20.
>
> I removed the more complicated seed fetching code and changed it to
> just sysctl(). A quick check revealed that the FreeBSD kernel supports
> this for at least five years now. It's much simpler to use code that
> always works instead of a series of untested fallbacks that are even
> less likely to work.
>
> Also removes the addrandom interface as a useless complication. If the
> kernel is incapable of properly seeding arc4random, application code
> can't do any better.
>
> Unfortunately, I don't have any FreeBSD systems running at the moment,
> so I can't make any promises that this will even compile, but it
> passed the eyeball test.
>
> --- arc4random.c.orig   Thu May 29 20:28:49 2014
> +++ arc4random.c        Thu May 29 20:51:59 2014
> @@ -1,8 +1,9 @@
> -/*     $OpenBSD: arc4random.c,v 1.22 2010/12/22 08:23:42 otto Exp $    */
> +/*     $OpenBSD: arc4random.c,v 1.30 2014/05/06 16:06:33 tedu Exp $    */
>
>  /*
>   * Copyright (c) 1996, David Mazieres <dm at uun.org>
>   * Copyright (c) 2008, Damien Miller <djm at openbsd.org>
> + * Copyright (c) 2013, Markus Friedl <markus at openbsd.org>
>   *
>   * Permission to use, copy, modify, and distribute this software for any
>   * purpose with or without fee is hereby granted, provided that the above
> @@ -18,15 +19,7 @@
>   */
>
>  /*
> - * Arc4 random number generator for OpenBSD.
> - *
> - * This code is derived from section 17.1 of Applied Cryptography,
> - * second edition, which describes a stream cipher allegedly
> - * compatible with RSA Labs "RC4" cipher (the actual description of
> - * which is a trade secret).  The same algorithm is used as a stream
> - * cipher called "arcfour" in Tatu Ylonen's ssh package.
> - *
> - * RC4 is a registered trademark of RSA Laboratories.
> + * ChaCha based random number generator for OpenBSD.
>   */
>
>  #include <sys/cdefs.h>
> @@ -40,28 +33,24 @@
>  #include <sys/types.h>
>  #include <sys/param.h>
>  #include <sys/sysctl.h>
> +#include <sys/mman.h>
>  #include <sys/time.h>
>  #include <pthread.h>
>
>  #include "libc_private.h"
>  #include "un-namespace.h"
>
> +#define KEYSTREAM_ONLY
> +#include "chacha_private.h"
> +
>  #ifdef __GNUC__
>  #define inline __inline
>  #else                          /* !__GNUC__ */
>  #define inline
>  #endif                         /* !__GNUC__ */
>
> -struct arc4_stream {
> -       u_int8_t i;
> -       u_int8_t j;
> -       u_int8_t s[256];
> -};
> -
>  static pthread_mutex_t arc4random_mtx = PTHREAD_MUTEX_INITIALIZER;
>
> -#define        RANDOMDEV       "/dev/random"
> -#define        KEYSIZE         128
>  #define        _ARC4_LOCK()                                            \
>         do {                                                    \
>                 if (__isthreaded)                               \
> @@ -74,187 +63,149 @@
>                         _pthread_mutex_unlock(&arc4random_mtx); \
>         } while (0)
>
> +#define KEYSZ   32
> +#define IVSZ    8
> +#define BLOCKSZ 64
> +#define RSBUFSZ (16*BLOCKSZ)
>  static int rs_initialized;
> -static struct arc4_stream rs;
> -static pid_t arc4_stir_pid;
> -static int arc4_count;
> +static pid_t rs_stir_pid;
> +static chacha_ctx *rs;          /* chacha context for random keystream */
> +static u_char *rs_buf;          /* keystream blocks */
> +static size_t rs_have;          /* valid bytes at end of rs_buf */
> +static size_t rs_count;         /* bytes till reseed */
>
> +static inline void _rs_rekey(u_char *dat, size_t datlen);
> +
>  extern int __sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp,
>      void *newp, size_t newlen);
>
> -static inline u_int8_t arc4_getbyte(void);
> -static void arc4_stir(void);
> -
>  static inline void
> -arc4_init(void)
> +_rs_init(u_char *buf, size_t n)
>  {
> -       int     n;
> +       if (n < KEYSZ + IVSZ)
> +               return;
>
> -       for (n = 0; n < 256; n++)
> -               rs.s[n] = n;
> -       rs.i = 0;
> -       rs.j = 0;
> -}
> +       if (rs == NULL && (rs = mmap(NULL, sizeof(*rs), PROT_READ|PROT_WRITE,
> +           MAP_ANON, -1, 0)) == MAP_FAILED)
> +               abort();
> +       if (rs_buf == NULL && (rs_buf = mmap(NULL, RSBUFSZ, PROT_READ|PROT_WRITE,
> +           MAP_ANON, -1, 0)) == MAP_FAILED)
> +               abort();
>
> -static inline void
> -arc4_addrandom(u_char *dat, int datlen)
> -{
> -       int     n;
> -       u_int8_t si;
> -
> -       rs.i--;
> -       for (n = 0; n < 256; n++) {
> -               rs.i = (rs.i + 1);
> -               si = rs.s[rs.i];
> -               rs.j = (rs.j + si + dat[n % datlen]);
> -               rs.s[rs.i] = rs.s[rs.j];
> -               rs.s[rs.j] = si;
> -       }
> -       rs.j = rs.i;
> +       chacha_keysetup(rs, buf, KEYSZ * 8, 0);
> +       chacha_ivsetup(rs, buf + KEYSZ);
>  }
>
> -static size_t
> -arc4_sysctl(u_char *buf, size_t size)
> +static void
> +_rs_stir(void)
>  {
> -       int mib[2];
> -       size_t len, done;
> +       int     mib[2];
> +       size_t  len;
> +       u_char rnd[KEYSZ + IVSZ];
>
>         mib[0] = CTL_KERN;
>         mib[1] = KERN_ARND;
> -       done = 0;
>
> -       do {
> -               len = size;
> -               if (__sysctl(mib, 2, buf, &len, NULL, 0) == -1)
> -                       return (done);
> -               done += len;
> -               buf += len;
> -               size -= len;
> -       } while (size > 0);
> +       len = sizeof(rnd);
> +       __sysctl(mib, 2, rnd, &len, NULL, 0);
>
> -       return (done);
> -}
> -
> -static void
> -arc4_stir(void)
> -{
> -       int done, fd, i;
> -       struct {
> -               struct timeval  tv;
> -               pid_t           pid;
> -               u_char          rnd[KEYSIZE];
> -       } rdat;
> -
>         if (!rs_initialized) {
> -               arc4_init();
>                 rs_initialized = 1;
> -       }
> -       done = 0;
> -       if (arc4_sysctl((u_char *)&rdat, KEYSIZE) == KEYSIZE)
> -               done = 1;
> -       if (!done) {
> -               fd = _open(RANDOMDEV, O_RDONLY | O_CLOEXEC, 0);
> -               if (fd >= 0) {
> -                       if (_read(fd, &rdat, KEYSIZE) == KEYSIZE)
> -                               done = 1;
> -                       (void)_close(fd);
> -               }
> -       }
> -       if (!done) {
> -               (void)gettimeofday(&rdat.tv, NULL);
> -               rdat.pid = getpid();
> -               /* We'll just take whatever was on the stack too... */
> -       }
> +               _rs_init(rnd, sizeof(rnd));
> +       } else
> +               _rs_rekey(rnd, sizeof(rnd));
> +       bzero(rnd, sizeof(rnd)); /* explicit_bzero */
>
> -       arc4_addrandom((u_char *)&rdat, KEYSIZE);
> +       /* invalidate rs_buf */
> +       rs_have = 0;
> +       memset(rs_buf, 0, RSBUFSZ);
>
> -       /*
> -        * Discard early keystream, as per recommendations in:
> -        * "(Not So) Random Shuffles of RC4" by Ilya Mironov.
> -        */
> -       for (i = 0; i < 1024; i++)
> -               (void)arc4_getbyte();
> -       arc4_count = 1600000;
> +       rs_count = 1600000;
>  }
>
> -static void
> -arc4_stir_if_needed(void)
> +static inline void
> +_rs_stir_if_needed(size_t len)
>  {
>         pid_t pid = getpid();
>
> -       if (arc4_count <= 0 || !rs_initialized || arc4_stir_pid != pid)
> -       {
> -               arc4_stir_pid = pid;
> -               arc4_stir();
> -       }
> +       if (rs_count <= len || !rs_initialized || rs_stir_pid != pid) {
> +               rs_stir_pid = pid;
> +               _rs_stir();
> +       } else
> +               rs_count -= len;
>  }
>
> -static inline u_int8_t
> -arc4_getbyte(void)
> +static inline void
> +_rs_rekey(u_char *dat, size_t datlen)
>  {
> -       u_int8_t si, sj;
> +#ifndef KEYSTREAM_ONLY
> +       memset(rs_buf, 0,RSBUFSZ);
> +#endif
> +       /* fill rs_buf with the keystream */
> +       chacha_encrypt_bytes(rs, rs_buf, rs_buf, RSBUFSZ);
> +       /* mix in optional user provided data */
> +       if (dat) {
> +               size_t i, m;
>
> -       rs.i = (rs.i + 1);
> -       si = rs.s[rs.i];
> -       rs.j = (rs.j + si);
> -       sj = rs.s[rs.j];
> -       rs.s[rs.i] = sj;
> -       rs.s[rs.j] = si;
> -       return (rs.s[(si + sj) & 0xff]);
> +               m = MIN(datlen, KEYSZ + IVSZ);
> +               for (i = 0; i < m; i++)
> +                       rs_buf[i] ^= dat[i];
> +       }
> +       /* immediately reinit for backtracking resistance */
> +       _rs_init(rs_buf, KEYSZ + IVSZ);
> +       memset(rs_buf, 0, KEYSZ + IVSZ);
> +       rs_have = RSBUFSZ - KEYSZ - IVSZ;
>  }
>
> -static inline u_int32_t
> -arc4_getword(void)
> +static inline void
> +_rs_random_buf(void *_buf, size_t n)
>  {
> -       u_int32_t val;
> -       val = arc4_getbyte() << 24;
> -       val |= arc4_getbyte() << 16;
> -       val |= arc4_getbyte() << 8;
> -       val |= arc4_getbyte();
> -       return val;
> -}
> +       u_char *buf = (u_char *)_buf;
> +       size_t m;
>
> -void
> -arc4random_stir(void)
> -{
> -       _ARC4_LOCK();
> -       arc4_stir();
> -       _ARC4_UNLOCK();
> +       _rs_stir_if_needed(n);
> +       while (n > 0) {
> +               if (rs_have > 0) {
> +                       m = MIN(n, rs_have);
> +                       memcpy(buf, rs_buf + RSBUFSZ - rs_have, m);
> +                       memset(rs_buf + RSBUFSZ - rs_have, 0, m);
> +                       buf += m;
> +                       n -= m;
> +                       rs_have -= m;
> +               }
> +               if (rs_have == 0)
> +                       _rs_rekey(NULL, 0);
> +       }
>  }
>
> -void
> -arc4random_addrandom(u_char *dat, int datlen)
> +static inline void
> +_rs_random_u32(u_int32_t *val)
>  {
> -       _ARC4_LOCK();
> -       if (!rs_initialized)
> -               arc4_stir();
> -       arc4_addrandom(dat, datlen);
> -       _ARC4_UNLOCK();
> +       _rs_stir_if_needed(sizeof(*val));
> +       if (rs_have < sizeof(*val))
> +               _rs_rekey(NULL, 0);
> +       memcpy(val, rs_buf + RSBUFSZ - rs_have, sizeof(*val));
> +       memset(rs_buf + RSBUFSZ - rs_have, 0, sizeof(*val));
> +       rs_have -= sizeof(*val);
> +       return;
>  }
>
>  u_int32_t
>  arc4random(void)
>  {
>         u_int32_t val;
> +
>         _ARC4_LOCK();
> -       arc4_count -= 4;
> -       arc4_stir_if_needed();
> -       val = arc4_getword();
> +       _rs_random_u32(&val);
>         _ARC4_UNLOCK();
>         return val;
>  }
>
>  void
> -arc4random_buf(void *_buf, size_t n)
> +arc4random_buf(void *buf, size_t n)
>  {
> -       u_char *buf = (u_char *)_buf;
>         _ARC4_LOCK();
> -       arc4_stir_if_needed();
> -       while (n--) {
> -               if (--arc4_count <= 0)
> -                       arc4_stir();
> -               buf[n] = arc4_getbyte();
> -       }
> +       _rs_random_buf(buf, n);
>         _ARC4_UNLOCK();
>  }
>
> @@ -276,17 +227,8 @@
>         if (upper_bound < 2)
>                 return 0;
>
> -#if (ULONG_MAX > 0xffffffffUL)
> -       min = 0x100000000UL % upper_bound;
> -#else
> -       /* Calculate (2**32 % upper_bound) avoiding 64-bit math */
> -       if (upper_bound > 0x80000000)
> -               min = 1 + ~upper_bound;         /* 2**32 - upper_bound */
> -       else {
> -               /* (2**32 - (x * 2)) % x == 2**32 % x when x <= 2**31 */
> -               min = ((0xffffffff - (upper_bound * 2)) + 1) % upper_bound;
> -       }
> -#endif
> +       /* 2**32 % x == (2**32 - x) % x */
> +       min = -upper_bound % upper_bound;
>
>         /*
>          * This could theoretically loop forever but each retry has
> @@ -302,24 +244,3 @@
>
>         return r % upper_bound;
>  }
> -
> -#if 0
> -/*-------- Test code for i386 --------*/
> -#include <stdio.h>
> -#include <machine/pctr.h>
> -int
> -main(int argc, char **argv)
> -{
> -       const int iter = 1000000;
> -       int     i;
> -       pctrval v;
> -
> -       v = rdtsc();
> -       for (i = 0; i < iter; i++)
> -               arc4random();
> -       v = rdtsc() - v;
> -       v /= iter;
> -
> -       printf("%qd cycles\n", v);
> -}
> -#endif
> --- /dev/null   Thu May 29 20:52:04 2014
> +++ chacha_private.h    Thu May 29 20:40:54 2014
> @@ -0,0 +1,222 @@
> +/*
> +chacha-merged.c version 20080118
> +D. J. Bernstein
> +Public domain.
> +*/
> +
> +/* $OpenBSD: chacha_private.h,v 1.2 2013/10/04 07:02:27 djm Exp $ */
> +
> +typedef unsigned char u8;
> +typedef unsigned int u32;
> +
> +typedef struct
> +{
> +  u32 input[16]; /* could be compressed */
> +} chacha_ctx;
> +
> +#define U8C(v) (v##U)
> +#define U32C(v) (v##U)
> +
> +#define U8V(v) ((u8)(v) & U8C(0xFF))
> +#define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF))
> +
> +#define ROTL32(v, n) \
> +  (U32V((v) << (n)) | ((v) >> (32 - (n))))
> +
> +#define U8TO32_LITTLE(p) \
> +  (((u32)((p)[0])      ) | \
> +   ((u32)((p)[1]) <<  8) | \
> +   ((u32)((p)[2]) << 16) | \
> +   ((u32)((p)[3]) << 24))
> +
> +#define U32TO8_LITTLE(p, v) \
> +  do { \
> +    (p)[0] = U8V((v)      ); \
> +    (p)[1] = U8V((v) >>  8); \
> +    (p)[2] = U8V((v) >> 16); \
> +    (p)[3] = U8V((v) >> 24); \
> +  } while (0)
> +
> +#define ROTATE(v,c) (ROTL32(v,c))
> +#define XOR(v,w) ((v) ^ (w))
> +#define PLUS(v,w) (U32V((v) + (w)))
> +#define PLUSONE(v) (PLUS((v),1))
> +
> +#define QUARTERROUND(a,b,c,d) \
> +  a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \
> +  c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \
> +  a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \
> +  c = PLUS(c,d); b = ROTATE(XOR(b,c), 7);
> +
> +static const char sigma[16] = "expand 32-byte k";
> +static const char tau[16] = "expand 16-byte k";
> +
> +static void
> +chacha_keysetup(chacha_ctx *x,const u8 *k,u32 kbits,u32 ivbits)
> +{
> +  const char *constants;
> +
> +  x->input[4] = U8TO32_LITTLE(k + 0);
> +  x->input[5] = U8TO32_LITTLE(k + 4);
> +  x->input[6] = U8TO32_LITTLE(k + 8);
> +  x->input[7] = U8TO32_LITTLE(k + 12);
> +  if (kbits == 256) { /* recommended */
> +    k += 16;
> +    constants = sigma;
> +  } else { /* kbits == 128 */
> +    constants = tau;
> +  }
> +  x->input[8] = U8TO32_LITTLE(k + 0);
> +  x->input[9] = U8TO32_LITTLE(k + 4);
> +  x->input[10] = U8TO32_LITTLE(k + 8);
> +  x->input[11] = U8TO32_LITTLE(k + 12);
> +  x->input[0] = U8TO32_LITTLE(constants + 0);
> +  x->input[1] = U8TO32_LITTLE(constants + 4);
> +  x->input[2] = U8TO32_LITTLE(constants + 8);
> +  x->input[3] = U8TO32_LITTLE(constants + 12);
> +}
> +
> +static void
> +chacha_ivsetup(chacha_ctx *x,const u8 *iv)
> +{
> +  x->input[12] = 0;
> +  x->input[13] = 0;
> +  x->input[14] = U8TO32_LITTLE(iv + 0);
> +  x->input[15] = U8TO32_LITTLE(iv + 4);
> +}
> +
> +static void
> +chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u32 bytes)
> +{
> +  u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
> +  u32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
> +  u8 *ctarget = NULL;
> +  u8 tmp[64];
> +  u_int i;
> +
> +  if (!bytes) return;
> +
> +  j0 = x->input[0];
> +  j1 = x->input[1];
> +  j2 = x->input[2];
> +  j3 = x->input[3];
> +  j4 = x->input[4];
> +  j5 = x->input[5];
> +  j6 = x->input[6];
> +  j7 = x->input[7];
> +  j8 = x->input[8];
> +  j9 = x->input[9];
> +  j10 = x->input[10];
> +  j11 = x->input[11];
> +  j12 = x->input[12];
> +  j13 = x->input[13];
> +  j14 = x->input[14];
> +  j15 = x->input[15];
> +
> +  for (;;) {
> +    if (bytes < 64) {
> +      for (i = 0;i < bytes;++i) tmp[i] = m[i];
> +      m = tmp;
> +      ctarget = c;
> +      c = tmp;
> +    }
> +    x0 = j0;
> +    x1 = j1;
> +    x2 = j2;
> +    x3 = j3;
> +    x4 = j4;
> +    x5 = j5;
> +    x6 = j6;
> +    x7 = j7;
> +    x8 = j8;
> +    x9 = j9;
> +    x10 = j10;
> +    x11 = j11;
> +    x12 = j12;
> +    x13 = j13;
> +    x14 = j14;
> +    x15 = j15;
> +    for (i = 20;i > 0;i -= 2) {
> +      QUARTERROUND( x0, x4, x8,x12)
> +      QUARTERROUND( x1, x5, x9,x13)
> +      QUARTERROUND( x2, x6,x10,x14)
> +      QUARTERROUND( x3, x7,x11,x15)
> +      QUARTERROUND( x0, x5,x10,x15)
> +      QUARTERROUND( x1, x6,x11,x12)
> +      QUARTERROUND( x2, x7, x8,x13)
> +      QUARTERROUND( x3, x4, x9,x14)
> +    }
> +    x0 = PLUS(x0,j0);
> +    x1 = PLUS(x1,j1);
> +    x2 = PLUS(x2,j2);
> +    x3 = PLUS(x3,j3);
> +    x4 = PLUS(x4,j4);
> +    x5 = PLUS(x5,j5);
> +    x6 = PLUS(x6,j6);
> +    x7 = PLUS(x7,j7);
> +    x8 = PLUS(x8,j8);
> +    x9 = PLUS(x9,j9);
> +    x10 = PLUS(x10,j10);
> +    x11 = PLUS(x11,j11);
> +    x12 = PLUS(x12,j12);
> +    x13 = PLUS(x13,j13);
> +    x14 = PLUS(x14,j14);
> +    x15 = PLUS(x15,j15);
> +
> +#ifndef KEYSTREAM_ONLY
> +    x0 = XOR(x0,U8TO32_LITTLE(m + 0));
> +    x1 = XOR(x1,U8TO32_LITTLE(m + 4));
> +    x2 = XOR(x2,U8TO32_LITTLE(m + 8));
> +    x3 = XOR(x3,U8TO32_LITTLE(m + 12));
> +    x4 = XOR(x4,U8TO32_LITTLE(m + 16));
> +    x5 = XOR(x5,U8TO32_LITTLE(m + 20));
> +    x6 = XOR(x6,U8TO32_LITTLE(m + 24));
> +    x7 = XOR(x7,U8TO32_LITTLE(m + 28));
> +    x8 = XOR(x8,U8TO32_LITTLE(m + 32));
> +    x9 = XOR(x9,U8TO32_LITTLE(m + 36));
> +    x10 = XOR(x10,U8TO32_LITTLE(m + 40));
> +    x11 = XOR(x11,U8TO32_LITTLE(m + 44));
> +    x12 = XOR(x12,U8TO32_LITTLE(m + 48));
> +    x13 = XOR(x13,U8TO32_LITTLE(m + 52));
> +    x14 = XOR(x14,U8TO32_LITTLE(m + 56));
> +    x15 = XOR(x15,U8TO32_LITTLE(m + 60));
> +#endif
> +
> +    j12 = PLUSONE(j12);
> +    if (!j12) {
> +      j13 = PLUSONE(j13);
> +      /* stopping at 2^70 bytes per nonce is user's responsibility */
> +    }
> +
> +    U32TO8_LITTLE(c + 0,x0);
> +    U32TO8_LITTLE(c + 4,x1);
> +    U32TO8_LITTLE(c + 8,x2);
> +    U32TO8_LITTLE(c + 12,x3);
> +    U32TO8_LITTLE(c + 16,x4);
> +    U32TO8_LITTLE(c + 20,x5);
> +    U32TO8_LITTLE(c + 24,x6);
> +    U32TO8_LITTLE(c + 28,x7);
> +    U32TO8_LITTLE(c + 32,x8);
> +    U32TO8_LITTLE(c + 36,x9);
> +    U32TO8_LITTLE(c + 40,x10);
> +    U32TO8_LITTLE(c + 44,x11);
> +    U32TO8_LITTLE(c + 48,x12);
> +    U32TO8_LITTLE(c + 52,x13);
> +    U32TO8_LITTLE(c + 56,x14);
> +    U32TO8_LITTLE(c + 60,x15);
> +
> +    if (bytes <= 64) {
> +      if (bytes < 64) {
> +        for (i = 0;i < bytes;++i) ctarget[i] = c[i];
> +      }
> +      x->input[12] = j12;
> +      x->input[13] = j13;
> +      return;
> +    }
> +    bytes -= 64;
> +    c += 64;
> +#ifndef KEYSTREAM_ONLY
> +    m += 64;
> +#endif
> +  }
> +}
> _______________________________________________
> freebsd-hackers at freebsd.org mailing list
> http://lists.freebsd.org/mailman/listinfo/freebsd-hackers
> To unsubscribe, send any mail to "freebsd-hackers-unsubscribe at freebsd.org"


More information about the freebsd-hackers mailing list