svn commit: r451183 - in head/security/nss: . files

Jan Beich jbeich at FreeBSD.org
Wed Oct 4 07:51:06 UTC 2017


Author: jbeich
Date: Wed Oct  4 07:51:04 2017
New Revision: 451183
URL: https://svnweb.freebsd.org/changeset/ports/451183

Log:
  security/nss: fix crash on pre-SSE2 hardware
  
  PR:		221466
  Obtained from:	upstream

Added:
  head/security/nss/files/patch-bug1400603   (contents, props changed)
Modified:
  head/security/nss/Makefile   (contents, props changed)

Modified: head/security/nss/Makefile
==============================================================================
--- head/security/nss/Makefile	Wed Oct  4 07:50:17 2017	(r451182)
+++ head/security/nss/Makefile	Wed Oct  4 07:51:04 2017	(r451183)
@@ -3,6 +3,7 @@
 
 PORTNAME=	nss
 PORTVERSION=	3.33
+PORTREVISION=	1
 #DISTVERSIONSUFFIX=	-with-ckbi-1.98
 CATEGORIES=	security
 MASTER_SITES=	MOZILLA/security/${PORTNAME}/releases/${DISTNAME:tu:C/[-.]/_/g}_RTM/src
@@ -55,6 +56,10 @@ MAKE_ENV+=	BUILD_OPT=1
 BINS=		${DIST}/${OPSYS}${OSREL}_OPT.OBJ
 .else
 BINS=		${DIST}/${OPSYS}${OSREL}_DBG.OBJ
+.endif
+
+.if ${OPSYS} == FreeBSD && ${ARCH} == i386
+USES+=		compiler:c++14-lang # gcm.h
 .endif
 
 .if ${OPSYS} == FreeBSD && ${ARCH} == amd64

Added: head/security/nss/files/patch-bug1400603
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/security/nss/files/patch-bug1400603	Wed Oct  4 07:51:04 2017	(r451183)
@@ -0,0 +1,942 @@
+commit e84403331d99
+Author: Daiki Ueno <dueno at redhat.com>
+Date:   Fri Sep 22 11:27:34 2017 +0200
+
+    Bug 1400603 - freebl: Reorganize AES-GCM source code based on hw/sw implementation, r=franziskus
+    
+    Reviewers: franziskus
+    
+    Reviewed By: franziskus
+    
+    Bug #: 1400603
+    
+    Differential Revision: https://phabricator.services.mozilla.com/D65
+---
+ lib/freebl/Makefile   |   4 +-
+ lib/freebl/aes-x86.c  | 157 +++++++++++++++++++++++++++++++++++++++++
+ lib/freebl/freebl.gyp |  65 ++++++++++-------
+ lib/freebl/gcm-x86.c  | 127 ++++++++++++++++++++++++++++++++++
+ lib/freebl/gcm.c      | 162 +++++++++++++------------------------------
+ lib/freebl/gcm.h      |  14 ++++
+ lib/freebl/rijndael.c | 188 ++++++++------------------------------------------
+ lib/freebl/rijndael.h |  18 ++++-
+ 8 files changed, 436 insertions(+), 299 deletions(-)
+
+diff --git lib/freebl/Makefile lib/freebl/Makefile
+index d50e18696b..bc1ea86a5e 100644
+--- lib/freebl/Makefile
++++ lib/freebl/Makefile
+@@ -110,7 +110,9 @@ endif
+ # NSS_X86_OR_X64 means the target is either x86 or x64
+ ifeq (,$(filter-out i386 x386 x86 x86_64,$(CPU_ARCH)))
+         DEFINES += -DNSS_X86_OR_X64
+-        CFLAGS += -mpclmul -maes
++        EXTRA_SRCS += gcm-x86.c aes-x86.c
++$(OBJDIR)/gcm-x86.o: CFLAGS += -mpclmul -maes
++$(OBJDIR)/aes-x86.o: CFLAGS += -mpclmul -maes
+ ifneq (,$(USE_64)$(USE_X32))
+         DEFINES += -DNSS_X64
+ else
+diff --git lib/freebl/aes-x86.c lib/freebl/aes-x86.c
+new file mode 100644
+index 0000000000..830b4782fe
+--- /dev/null
++++ lib/freebl/aes-x86.c
+@@ -0,0 +1,157 @@
++/* This Source Code Form is subject to the terms of the Mozilla Public
++ * License, v. 2.0. If a copy of the MPL was not distributed with this
++ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
++
++#ifdef FREEBL_NO_DEPEND
++#include "stubs.h"
++#endif
++#include "rijndael.h"
++#include "secerr.h"
++
++#include <wmmintrin.h> /* aes-ni */
++
++#define EXPAND_KEY128(k, rcon, res)                   \
++    tmp_key = _mm_aeskeygenassist_si128(k, rcon);     \
++    tmp_key = _mm_shuffle_epi32(tmp_key, 0xFF);       \
++    tmp = _mm_xor_si128(k, _mm_slli_si128(k, 4));     \
++    tmp = _mm_xor_si128(tmp, _mm_slli_si128(tmp, 4)); \
++    tmp = _mm_xor_si128(tmp, _mm_slli_si128(tmp, 4)); \
++    res = _mm_xor_si128(tmp, tmp_key)
++
++static void
++native_key_expansion128(AESContext *cx, const unsigned char *key)
++{
++    __m128i *keySchedule = cx->keySchedule;
++    pre_align __m128i tmp_key post_align;
++    pre_align __m128i tmp post_align;
++    keySchedule[0] = _mm_loadu_si128((__m128i *)key);
++    EXPAND_KEY128(keySchedule[0], 0x01, keySchedule[1]);
++    EXPAND_KEY128(keySchedule[1], 0x02, keySchedule[2]);
++    EXPAND_KEY128(keySchedule[2], 0x04, keySchedule[3]);
++    EXPAND_KEY128(keySchedule[3], 0x08, keySchedule[4]);
++    EXPAND_KEY128(keySchedule[4], 0x10, keySchedule[5]);
++    EXPAND_KEY128(keySchedule[5], 0x20, keySchedule[6]);
++    EXPAND_KEY128(keySchedule[6], 0x40, keySchedule[7]);
++    EXPAND_KEY128(keySchedule[7], 0x80, keySchedule[8]);
++    EXPAND_KEY128(keySchedule[8], 0x1B, keySchedule[9]);
++    EXPAND_KEY128(keySchedule[9], 0x36, keySchedule[10]);
++}
++
++#define EXPAND_KEY192_PART1(res, k0, kt, rcon)                                \
++    tmp2 = _mm_slli_si128(k0, 4);                                             \
++    tmp1 = _mm_xor_si128(k0, tmp2);                                           \
++    tmp2 = _mm_slli_si128(tmp2, 4);                                           \
++    tmp1 = _mm_xor_si128(_mm_xor_si128(tmp1, tmp2), _mm_slli_si128(tmp2, 4)); \
++    tmp2 = _mm_aeskeygenassist_si128(kt, rcon);                               \
++    res = _mm_xor_si128(tmp1, _mm_shuffle_epi32(tmp2, 0x55))
++
++#define EXPAND_KEY192_PART2(res, k1, k2)             \
++    tmp2 = _mm_xor_si128(k1, _mm_slli_si128(k1, 4)); \
++    res = _mm_xor_si128(tmp2, _mm_shuffle_epi32(k2, 0xFF))
++
++#define EXPAND_KEY192(k0, res1, res2, res3, carry, rcon1, rcon2)         \
++    EXPAND_KEY192_PART1(tmp3, k0, res1, rcon1);                          \
++    EXPAND_KEY192_PART2(carry, res1, tmp3);                              \
++    res1 = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(res1),       \
++                                           _mm_castsi128_pd(tmp3), 0));  \
++    res2 = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(tmp3),       \
++                                           _mm_castsi128_pd(carry), 1)); \
++    EXPAND_KEY192_PART1(res3, tmp3, carry, rcon2)
++
++static void
++native_key_expansion192(AESContext *cx, const unsigned char *key)
++{
++    __m128i *keySchedule = cx->keySchedule;
++    pre_align __m128i tmp1 post_align;
++    pre_align __m128i tmp2 post_align;
++    pre_align __m128i tmp3 post_align;
++    pre_align __m128i carry post_align;
++    keySchedule[0] = _mm_loadu_si128((__m128i *)key);
++    keySchedule[1] = _mm_loadu_si128((__m128i *)(key + 16));
++    EXPAND_KEY192(keySchedule[0], keySchedule[1], keySchedule[2],
++                  keySchedule[3], carry, 0x1, 0x2);
++    EXPAND_KEY192_PART2(keySchedule[4], carry, keySchedule[3]);
++    EXPAND_KEY192(keySchedule[3], keySchedule[4], keySchedule[5],
++                  keySchedule[6], carry, 0x4, 0x8);
++    EXPAND_KEY192_PART2(keySchedule[7], carry, keySchedule[6]);
++    EXPAND_KEY192(keySchedule[6], keySchedule[7], keySchedule[8],
++                  keySchedule[9], carry, 0x10, 0x20);
++    EXPAND_KEY192_PART2(keySchedule[10], carry, keySchedule[9]);
++    EXPAND_KEY192(keySchedule[9], keySchedule[10], keySchedule[11],
++                  keySchedule[12], carry, 0x40, 0x80);
++}
++
++#define EXPAND_KEY256_PART(res, rconx, k1x, k2x, X)                           \
++    tmp_key = _mm_shuffle_epi32(_mm_aeskeygenassist_si128(k2x, rconx), X);    \
++    tmp2 = _mm_slli_si128(k1x, 4);                                            \
++    tmp1 = _mm_xor_si128(k1x, tmp2);                                          \
++    tmp2 = _mm_slli_si128(tmp2, 4);                                           \
++    tmp1 = _mm_xor_si128(_mm_xor_si128(tmp1, tmp2), _mm_slli_si128(tmp2, 4)); \
++    res = _mm_xor_si128(tmp1, tmp_key);
++
++#define EXPAND_KEY256(res1, res2, k1, k2, rcon)   \
++    EXPAND_KEY256_PART(res1, rcon, k1, k2, 0xFF); \
++    EXPAND_KEY256_PART(res2, 0x00, k2, res1, 0xAA)
++
++static void
++native_key_expansion256(AESContext *cx, const unsigned char *key)
++{
++    __m128i *keySchedule = cx->keySchedule;
++    pre_align __m128i tmp_key post_align;
++    pre_align __m128i tmp1 post_align;
++    pre_align __m128i tmp2 post_align;
++    keySchedule[0] = _mm_loadu_si128((__m128i *)key);
++    keySchedule[1] = _mm_loadu_si128((__m128i *)(key + 16));
++    EXPAND_KEY256(keySchedule[2], keySchedule[3], keySchedule[0],
++                  keySchedule[1], 0x01);
++    EXPAND_KEY256(keySchedule[4], keySchedule[5], keySchedule[2],
++                  keySchedule[3], 0x02);
++    EXPAND_KEY256(keySchedule[6], keySchedule[7], keySchedule[4],
++                  keySchedule[5], 0x04);
++    EXPAND_KEY256(keySchedule[8], keySchedule[9], keySchedule[6],
++                  keySchedule[7], 0x08);
++    EXPAND_KEY256(keySchedule[10], keySchedule[11], keySchedule[8],
++                  keySchedule[9], 0x10);
++    EXPAND_KEY256(keySchedule[12], keySchedule[13], keySchedule[10],
++                  keySchedule[11], 0x20);
++    EXPAND_KEY256_PART(keySchedule[14], 0x40, keySchedule[12],
++                       keySchedule[13], 0xFF);
++}
++
++/*
++ * AES key expansion using aes-ni instructions.
++ */
++void
++rijndael_native_key_expansion(AESContext *cx, const unsigned char *key,
++                              unsigned int Nk)
++{
++    switch (Nk) {
++        case 4:
++            native_key_expansion128(cx, key);
++            return;
++        case 6:
++            native_key_expansion192(cx, key);
++            return;
++        case 8:
++            native_key_expansion256(cx, key);
++            return;
++        default:
++            /* This shouldn't happen (checked by the caller). */
++            return;
++    }
++}
++
++void
++rijndael_native_encryptBlock(AESContext *cx,
++                             unsigned char *output,
++                             const unsigned char *input)
++{
++    int i;
++    pre_align __m128i m post_align = _mm_loadu_si128((__m128i *)input);
++    m = _mm_xor_si128(m, cx->keySchedule[0]);
++    for (i = 1; i < cx->Nr; ++i) {
++        m = _mm_aesenc_si128(m, cx->keySchedule[i]);
++    }
++    m = _mm_aesenclast_si128(m, cx->keySchedule[cx->Nr]);
++    _mm_storeu_si128((__m128i *)output, m);
++}
+diff --git lib/freebl/freebl.gyp lib/freebl/freebl.gyp
+index 1e93475004..5f59eef29c 100644
+--- lib/freebl/freebl.gyp
++++ lib/freebl/freebl.gyp
+@@ -22,6 +22,37 @@
+         '-mssse3'
+       ]
+     },
++    {
++      'target_name': 'gcm-aes-x86_c_lib',
++      'type': 'static_library',
++      'sources': [
++        'gcm-x86.c', 'aes-x86.c'
++      ],
++      'dependencies': [
++        '<(DEPTH)/exports.gyp:nss_exports'
++      ],
++      # Enable isa option for pclmul and aes-ni; supported since gcc 4.4.
++      # This is only supported by x84/x64. It's not needed for Windows,
++      # unless clang-cl is used.
++      'cflags_mozilla': [
++        '-mpclmul', '-maes'
++      ],
++      'conditions': [
++        [ 'OS=="linux" or OS=="android" or OS=="dragonfly" or OS=="freebsd" or OS=="netbsd" or OS=="openbsd"', {
++          'cflags': [
++            '-mpclmul', '-maes'
++          ],
++        }],
++        # macOS build doesn't use cflags.
++        [ 'OS=="mac"', {
++          'xcode_settings': {
++            'OTHER_CFLAGS': [
++              '-mpclmul', '-maes'
++            ],
++          },
++        }]
++      ]
++    },
+     {
+       'target_name': 'freebl',
+       'type': 'static_library',
+@@ -45,6 +76,11 @@
+         '<(DEPTH)/exports.gyp:nss_exports',
+       ],
+       'conditions': [
++        [ 'target_arch=="ia32" or target_arch=="x64"', {
++          'dependencies': [
++            'gcm-aes-x86_c_lib'
++          ],
++        }],
+         [ 'OS=="linux"', {
+           'defines!': [
+             'FREEBL_NO_DEPEND',
+@@ -76,6 +112,11 @@
+         '<(DEPTH)/exports.gyp:nss_exports',
+       ],
+       'conditions': [
++        [ 'target_arch=="ia32" or target_arch=="x64"', {
++          'dependencies': [
++            'gcm-aes-x86_c_lib'
++          ]
++        }],
+         [ 'OS!="linux" and OS!="android"', {
+           'conditions': [
+             [ 'moz_fold_libs==0', {
+@@ -154,27 +195,11 @@
+       'MP_API_COMPATIBLE'
+     ],
+     'conditions': [
+-      [ 'target_arch=="ia32" or target_arch=="x64"', {
+-        'cflags_mozilla': [
+-          '-mpclmul',
+-          '-maes',
+-        ],
+-        'conditions': [
+-          [ 'OS=="dragonfly" or OS=="freebsd" or OS=="netbsd" or OS=="openbsd"', {
+-            'cflags': [
+-              '-mpclmul',
+-              '-maes',
+-            ],
+-          }],
+-        ],
+-      }],
+       [ 'OS=="mac"', {
+         'xcode_settings': {
+           # I'm not sure since when this is supported.
+           # But I hope that doesn't matter. We also assume this is x86/x64.
+           'OTHER_CFLAGS': [
+-            '-mpclmul',
+-            '-maes',
+             '-std=gnu99',
+           ],
+         },
+@@ -268,14 +293,6 @@
+               'MP_USE_UINT_DIGIT',
+             ],
+           }],
+-          [ 'target_arch=="ia32" or target_arch=="x64"', {
+-            'cflags': [
+-              # enable isa option for pclmul am aes-ni; supported since gcc 4.4
+-              # This is only support by x84/x64. It's not needed for Windows.
+-              '-mpclmul',
+-              '-maes',
+-            ],
+-          }],
+           [ 'target_arch=="arm"', {
+             'defines': [
+               'MP_ASSEMBLY_MULTIPLY',
+diff --git lib/freebl/gcm-x86.c lib/freebl/gcm-x86.c
+new file mode 100644
+index 0000000000..e34d633943
+--- /dev/null
++++ lib/freebl/gcm-x86.c
+@@ -0,0 +1,127 @@
++/* This Source Code Form is subject to the terms of the Mozilla Public
++ * License, v. 2.0. If a copy of the MPL was not distributed with this
++ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
++
++#ifdef FREEBL_NO_DEPEND
++#include "stubs.h"
++#endif
++#include "gcm.h"
++#include "secerr.h"
++
++#include <wmmintrin.h> /* clmul */
++
++#define WRITE64(x, bytes)   \
++    (bytes)[0] = (x) >> 56; \
++    (bytes)[1] = (x) >> 48; \
++    (bytes)[2] = (x) >> 40; \
++    (bytes)[3] = (x) >> 32; \
++    (bytes)[4] = (x) >> 24; \
++    (bytes)[5] = (x) >> 16; \
++    (bytes)[6] = (x) >> 8;  \
++    (bytes)[7] = (x);
++
++SECStatus
++gcm_HashWrite_hw(gcmHashContext *ghash, unsigned char *outbuf)
++{
++    uint64_t tmp_out[2];
++    _mm_storeu_si128((__m128i *)tmp_out, ghash->x);
++    /* maxout must be larger than 16 byte (checked by the caller). */
++    WRITE64(tmp_out[0], outbuf + 8);
++    WRITE64(tmp_out[1], outbuf);
++    return SECSuccess;
++}
++
++SECStatus
++gcm_HashMult_hw(gcmHashContext *ghash, const unsigned char *buf,
++                unsigned int count)
++{
++    size_t i;
++    pre_align __m128i z_high post_align;
++    pre_align __m128i z_low post_align;
++    pre_align __m128i C post_align;
++    pre_align __m128i D post_align;
++    pre_align __m128i E post_align;
++    pre_align __m128i F post_align;
++    pre_align __m128i bin post_align;
++    pre_align __m128i Ci post_align;
++    pre_align __m128i tmp post_align;
++
++    for (i = 0; i < count; i++, buf += 16) {
++        bin = _mm_set_epi16(((uint16_t)buf[0] << 8) | buf[1],
++                            ((uint16_t)buf[2] << 8) | buf[3],
++                            ((uint16_t)buf[4] << 8) | buf[5],
++                            ((uint16_t)buf[6] << 8) | buf[7],
++                            ((uint16_t)buf[8] << 8) | buf[9],
++                            ((uint16_t)buf[10] << 8) | buf[11],
++                            ((uint16_t)buf[12] << 8) | buf[13],
++                            ((uint16_t)buf[14] << 8) | buf[15]);
++        Ci = _mm_xor_si128(bin, ghash->x);
++
++        /* Do binary mult ghash->X = Ci * ghash->H. */
++        C = _mm_clmulepi64_si128(Ci, ghash->h, 0x00);
++        D = _mm_clmulepi64_si128(Ci, ghash->h, 0x11);
++        E = _mm_clmulepi64_si128(Ci, ghash->h, 0x01);
++        F = _mm_clmulepi64_si128(Ci, ghash->h, 0x10);
++        tmp = _mm_xor_si128(E, F);
++        z_high = _mm_xor_si128(tmp, _mm_slli_si128(D, 8));
++        z_high = _mm_unpackhi_epi64(z_high, D);
++        z_low = _mm_xor_si128(_mm_slli_si128(tmp, 8), C);
++        z_low = _mm_unpackhi_epi64(_mm_slli_si128(C, 8), z_low);
++
++        /* Shift one to the left (multiply by x) as gcm spec is stupid. */
++        C = _mm_slli_si128(z_low, 8);
++        E = _mm_srli_epi64(C, 63);
++        D = _mm_slli_si128(z_high, 8);
++        F = _mm_srli_epi64(D, 63);
++        /* Carry over */
++        C = _mm_srli_si128(z_low, 8);
++        D = _mm_srli_epi64(C, 63);
++        z_low = _mm_or_si128(_mm_slli_epi64(z_low, 1), E);
++        z_high = _mm_or_si128(_mm_or_si128(_mm_slli_epi64(z_high, 1), F), D);
++
++        /* Reduce */
++        C = _mm_slli_si128(z_low, 8);
++        /* D = z_low << 127 */
++        D = _mm_slli_epi64(C, 63);
++        /* E = z_low << 126 */
++        E = _mm_slli_epi64(C, 62);
++        /* F = z_low << 121 */
++        F = _mm_slli_epi64(C, 57);
++        /* z_low ^= (z_low << 127) ^ (z_low << 126) ^ (z_low << 121); */
++        z_low = _mm_xor_si128(_mm_xor_si128(_mm_xor_si128(z_low, D), E), F);
++        C = _mm_srli_si128(z_low, 8);
++        /* D = z_low >> 1 */
++        D = _mm_slli_epi64(C, 63);
++        D = _mm_or_si128(_mm_srli_epi64(z_low, 1), D);
++        /* E = z_low >> 2 */
++        E = _mm_slli_epi64(C, 62);
++        E = _mm_or_si128(_mm_srli_epi64(z_low, 2), E);
++        /* F = z_low >> 7 */
++        F = _mm_slli_epi64(C, 57);
++        F = _mm_or_si128(_mm_srli_epi64(z_low, 7), F);
++        /* ghash->x ^= z_low ^ (z_low >> 1) ^ (z_low >> 2) ^ (z_low >> 7); */
++        ghash->x = _mm_xor_si128(_mm_xor_si128(
++                                     _mm_xor_si128(_mm_xor_si128(z_high, z_low), D), E),
++                                 F);
++    }
++    return SECSuccess;
++}
++
++SECStatus
++gcm_HashInit_hw(gcmHashContext *ghash)
++{
++    ghash->ghash_mul = gcm_HashMult_hw;
++    ghash->x = _mm_setzero_si128();
++    /* MSVC requires __m64 to load epi64. */
++    ghash->h = _mm_set_epi32(ghash->h_high >> 32, (uint32_t)ghash->h_high,
++                             ghash->h_low >> 32, (uint32_t)ghash->h_low);
++    ghash->hw = PR_TRUE;
++    return SECSuccess;
++}
++
++SECStatus
++gcm_HashZeroX_hw(gcmHashContext *ghash)
++{
++    ghash->x = _mm_setzero_si128();
++    return SECSuccess;
++}
+diff --git lib/freebl/gcm.c lib/freebl/gcm.c
+index 780b7a6322..f1e16da78e 100644
+--- lib/freebl/gcm.c
++++ lib/freebl/gcm.c
+@@ -17,18 +17,50 @@
+ 
+ #include <limits.h>
+ 
+-#ifdef NSS_X86_OR_X64
+-#include <wmmintrin.h> /* clmul */
+-#endif
+-
+ /* Forward declarations */
++SECStatus gcm_HashInit_hw(gcmHashContext *ghash);
++SECStatus gcm_HashWrite_hw(gcmHashContext *ghash, unsigned char *outbuf);
+ SECStatus gcm_HashMult_hw(gcmHashContext *ghash, const unsigned char *buf,
+                           unsigned int count);
++SECStatus gcm_HashZeroX_hw(gcmHashContext *ghash);
+ SECStatus gcm_HashMult_sftw(gcmHashContext *ghash, const unsigned char *buf,
+                             unsigned int count);
+ SECStatus gcm_HashMult_sftw32(gcmHashContext *ghash, const unsigned char *buf,
+                               unsigned int count);
+ 
++/* Stub definitions for the above *_hw functions, which shouldn't be
++ * used unless NSS_X86_OR_X64 is defined */
++#ifndef NSS_X86_OR_X64
++SECStatus
++gcm_HashWrite_hw(gcmHashContext *ghash, unsigned char *outbuf)
++{
++    PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
++    return SECFailure;
++}
++
++SECStatus
++gcm_HashMult_hw(gcmHashContext *ghash, const unsigned char *buf,
++                unsigned int count)
++{
++    PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
++    return SECFailure;
++}
++
++SECStatus
++gcm_HashInit_hw(gcmHashContext *ghash)
++{
++    PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
++    return SECFailure;
++}
++
++SECStatus
++gcm_HashZeroX_hw(gcmHashContext *ghash)
++{
++    PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
++    return SECFailure;
++}
++#endif /* NSS_X86_OR_X64 */
++
+ uint64_t
+ get64(const unsigned char *bytes)
+ {
+@@ -46,6 +78,8 @@ get64(const unsigned char *bytes)
+ SECStatus
+ gcmHash_InitContext(gcmHashContext *ghash, const unsigned char *H, PRBool sw)
+ {
++    SECStatus rv = SECSuccess;
++
+     ghash->cLen = 0;
+     ghash->bufLen = 0;
+     PORT_Memset(ghash->counterBuf, 0, sizeof(ghash->counterBuf));
+@@ -53,17 +87,7 @@ gcmHash_InitContext(gcmHashContext *ghash, const unsigned char *H, PRBool sw)
+     ghash->h_low = get64(H + 8);
+     ghash->h_high = get64(H);
+     if (clmul_support() && !sw) {
+-#ifdef NSS_X86_OR_X64
+-        ghash->ghash_mul = gcm_HashMult_hw;
+-        ghash->x = _mm_setzero_si128();
+-        /* MSVC requires __m64 to load epi64. */
+-        ghash->h = _mm_set_epi32(ghash->h_high >> 32, (uint32_t)ghash->h_high,
+-                                 ghash->h_low >> 32, (uint32_t)ghash->h_low);
+-        ghash->hw = PR_TRUE;
+-#else
+-        PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+-        return SECFailure;
+-#endif /* NSS_X86_OR_X64 */
++        rv = gcm_HashInit_hw(ghash);
+     } else {
+ /* We fall back to the software implementation if we can't use / don't
+          * want to use pclmul. */
+@@ -75,7 +99,7 @@ gcmHash_InitContext(gcmHashContext *ghash, const unsigned char *H, PRBool sw)
+         ghash->x_high = ghash->x_low = 0;
+         ghash->hw = PR_FALSE;
+     }
+-    return SECSuccess;
++    return rv;
+ }
+ 
+ #ifdef HAVE_INT128_SUPPORT
+@@ -283,102 +307,17 @@ gcm_HashMult_sftw32(gcmHashContext *ghash, const unsigned char *buf,
+ }
+ #endif /* HAVE_INT128_SUPPORT */
+ 
+-SECStatus
+-gcm_HashMult_hw(gcmHashContext *ghash, const unsigned char *buf,
+-                unsigned int count)
+-{
+-#ifdef NSS_X86_OR_X64
+-    size_t i;
+-    pre_align __m128i z_high post_align;
+-    pre_align __m128i z_low post_align;
+-    pre_align __m128i C post_align;
+-    pre_align __m128i D post_align;
+-    pre_align __m128i E post_align;
+-    pre_align __m128i F post_align;
+-    pre_align __m128i bin post_align;
+-    pre_align __m128i Ci post_align;
+-    pre_align __m128i tmp post_align;
+-
+-    for (i = 0; i < count; i++, buf += 16) {
+-        bin = _mm_set_epi16(((uint16_t)buf[0] << 8) | buf[1],
+-                            ((uint16_t)buf[2] << 8) | buf[3],
+-                            ((uint16_t)buf[4] << 8) | buf[5],
+-                            ((uint16_t)buf[6] << 8) | buf[7],
+-                            ((uint16_t)buf[8] << 8) | buf[9],
+-                            ((uint16_t)buf[10] << 8) | buf[11],
+-                            ((uint16_t)buf[12] << 8) | buf[13],
+-                            ((uint16_t)buf[14] << 8) | buf[15]);
+-        Ci = _mm_xor_si128(bin, ghash->x);
+-
+-        /* Do binary mult ghash->X = Ci * ghash->H. */
+-        C = _mm_clmulepi64_si128(Ci, ghash->h, 0x00);
+-        D = _mm_clmulepi64_si128(Ci, ghash->h, 0x11);
+-        E = _mm_clmulepi64_si128(Ci, ghash->h, 0x01);
+-        F = _mm_clmulepi64_si128(Ci, ghash->h, 0x10);
+-        tmp = _mm_xor_si128(E, F);
+-        z_high = _mm_xor_si128(tmp, _mm_slli_si128(D, 8));
+-        z_high = _mm_unpackhi_epi64(z_high, D);
+-        z_low = _mm_xor_si128(_mm_slli_si128(tmp, 8), C);
+-        z_low = _mm_unpackhi_epi64(_mm_slli_si128(C, 8), z_low);
+-
+-        /* Shift one to the left (multiply by x) as gcm spec is stupid. */
+-        C = _mm_slli_si128(z_low, 8);
+-        E = _mm_srli_epi64(C, 63);
+-        D = _mm_slli_si128(z_high, 8);
+-        F = _mm_srli_epi64(D, 63);
+-        /* Carry over */
+-        C = _mm_srli_si128(z_low, 8);
+-        D = _mm_srli_epi64(C, 63);
+-        z_low = _mm_or_si128(_mm_slli_epi64(z_low, 1), E);
+-        z_high = _mm_or_si128(_mm_or_si128(_mm_slli_epi64(z_high, 1), F), D);
+-
+-        /* Reduce */
+-        C = _mm_slli_si128(z_low, 8);
+-        /* D = z_low << 127 */
+-        D = _mm_slli_epi64(C, 63);
+-        /* E = z_low << 126 */
+-        E = _mm_slli_epi64(C, 62);
+-        /* F = z_low << 121 */
+-        F = _mm_slli_epi64(C, 57);
+-        /* z_low ^= (z_low << 127) ^ (z_low << 126) ^ (z_low << 121); */
+-        z_low = _mm_xor_si128(_mm_xor_si128(_mm_xor_si128(z_low, D), E), F);
+-        C = _mm_srli_si128(z_low, 8);
+-        /* D = z_low >> 1 */
+-        D = _mm_slli_epi64(C, 63);
+-        D = _mm_or_si128(_mm_srli_epi64(z_low, 1), D);
+-        /* E = z_low >> 2 */
+-        E = _mm_slli_epi64(C, 62);
+-        E = _mm_or_si128(_mm_srli_epi64(z_low, 2), E);
+-        /* F = z_low >> 7 */
+-        F = _mm_slli_epi64(C, 57);
+-        F = _mm_or_si128(_mm_srli_epi64(z_low, 7), F);
+-        /* ghash->x ^= z_low ^ (z_low >> 1) ^ (z_low >> 2) ^ (z_low >> 7); */
+-        ghash->x = _mm_xor_si128(_mm_xor_si128(
+-                                     _mm_xor_si128(_mm_xor_si128(z_high, z_low), D), E),
+-                                 F);
+-    }
+-    return SECSuccess;
+-#else
+-    PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+-    return SECFailure;
+-#endif /* NSS_X86_OR_X64 */
+-}
+-
+ static SECStatus
+ gcm_zeroX(gcmHashContext *ghash)
+ {
++    SECStatus rv = SECSuccess;
++
+     if (ghash->hw) {
+-#ifdef NSS_X86_OR_X64
+-        ghash->x = _mm_setzero_si128();
+-        return SECSuccess;
+-#else
+-        PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+-        return SECFailure;
+-#endif /* NSS_X86_OR_X64 */
++        rv = gcm_HashZeroX_hw(ghash);
+     }
+ 
+     ghash->x_high = ghash->x_low = 0;
+-    return SECSuccess;
++    return rv;
+ }
+ 
+ /*
+@@ -503,15 +442,10 @@ gcmHash_Final(gcmHashContext *ghash, unsigned char *outbuf,
+     }
+ 
+     if (ghash->hw) {
+-#ifdef NSS_X86_OR_X64
+-        uint64_t tmp_out[2];
+-        _mm_storeu_si128((__m128i *)tmp_out, ghash->x);
+-        WRITE64(tmp_out[0], T + 8);
+-        WRITE64(tmp_out[1], T);
+-#else
+-        PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+-        return SECFailure;
+-#endif /* NSS_X86_OR_X64 */
++        rv = gcm_HashWrite_hw(ghash, T);
++        if (rv != SECSuccess) {
++            goto cleanup;
++        }
+     } else {
+         WRITE64(ghash->x_low, T + 8);
+         WRITE64(ghash->x_high, T);
+diff --git lib/freebl/gcm.h lib/freebl/gcm.h
+index 0c707a0811..42ef0f7179 100644
+--- lib/freebl/gcm.h
++++ lib/freebl/gcm.h
+@@ -9,7 +9,21 @@
+ #include <stdint.h>
+ 
+ #ifdef NSS_X86_OR_X64
++/* GCC <= 4.8 doesn't support including emmintrin.h without enabling SSE2 */
++#if !defined(__clang__) && defined(__GNUC__) && defined(__GNUC_MINOR__) && \
++    (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ <= 8))
++#pragma GCC push_options
++#pragma GCC target("sse2")
++#undef NSS_DISABLE_SSE2
++#define NSS_DISABLE_SSE2 1
++#endif /* GCC <= 4.8 */
++
+ #include <emmintrin.h> /* __m128i */
++
++#ifdef NSS_DISABLE_SSE2
++#undef NSS_DISABLE_SSE2
++#pragma GCC pop_options
++#endif /* NSS_DISABLE_SSE2 */
+ #endif
+ 
+ SEC_BEGIN_PROTOS
+diff --git lib/freebl/rijndael.c lib/freebl/rijndael.c
+index a09f13098e..5de27de9ce 100644
+--- lib/freebl/rijndael.c
++++ lib/freebl/rijndael.c
+@@ -27,6 +27,34 @@
+ #include "intel-gcm.h"
+ #endif /* INTEL_GCM */
+ 
++/* Forward declarations */
++void rijndael_native_key_expansion(AESContext *cx, const unsigned char *key,
++                                   unsigned int Nk);
++void rijndael_native_encryptBlock(AESContext *cx,
++                                  unsigned char *output,
++                                  const unsigned char *input);
++
++/* Stub definitions for the above rijndael_native_* functions, which
++ * shouldn't be used unless NSS_X86_OR_X64 is defined */
++#ifndef NSS_X86_OR_X64
++void
++rijndael_native_key_expansion(AESContext *cx, const unsigned char *key,
++                              unsigned int Nk)
++{
++    PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
++    PORT_Assert(0);
++}
++
++void
++rijndael_native_encryptBlock(AESContext *cx,
++                             unsigned char *output,
++                             const unsigned char *input)
++{
++    PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
++    PORT_Assert(0);
++}
++#endif /* NSS_X86_OR_X64 */
++
+ /*
+  * There are currently three ways to build this code, varying in performance
+  * and code size.
+@@ -309,162 +337,6 @@ rijndael_key_expansion7(AESContext *cx, const unsigned char *key, unsigned int N
+     }
+ }
+ 
+-#if defined(NSS_X86_OR_X64)
+-#define EXPAND_KEY128(k, rcon, res)                   \
+-    tmp_key = _mm_aeskeygenassist_si128(k, rcon);     \
+-    tmp_key = _mm_shuffle_epi32(tmp_key, 0xFF);       \
+-    tmp = _mm_xor_si128(k, _mm_slli_si128(k, 4));     \
+-    tmp = _mm_xor_si128(tmp, _mm_slli_si128(tmp, 4)); \
+-    tmp = _mm_xor_si128(tmp, _mm_slli_si128(tmp, 4)); \
+-    res = _mm_xor_si128(tmp, tmp_key)
+-
+-static void
+-native_key_expansion128(AESContext *cx, const unsigned char *key)
+-{
+-    __m128i *keySchedule = cx->keySchedule;
+-    pre_align __m128i tmp_key post_align;
+-    pre_align __m128i tmp post_align;
+-    keySchedule[0] = _mm_loadu_si128((__m128i *)key);
+-    EXPAND_KEY128(keySchedule[0], 0x01, keySchedule[1]);
+-    EXPAND_KEY128(keySchedule[1], 0x02, keySchedule[2]);
+-    EXPAND_KEY128(keySchedule[2], 0x04, keySchedule[3]);
+-    EXPAND_KEY128(keySchedule[3], 0x08, keySchedule[4]);
+-    EXPAND_KEY128(keySchedule[4], 0x10, keySchedule[5]);
+-    EXPAND_KEY128(keySchedule[5], 0x20, keySchedule[6]);
+-    EXPAND_KEY128(keySchedule[6], 0x40, keySchedule[7]);
+-    EXPAND_KEY128(keySchedule[7], 0x80, keySchedule[8]);
+-    EXPAND_KEY128(keySchedule[8], 0x1B, keySchedule[9]);
+-    EXPAND_KEY128(keySchedule[9], 0x36, keySchedule[10]);
+-}
+-
+-#define EXPAND_KEY192_PART1(res, k0, kt, rcon)                                \
+-    tmp2 = _mm_slli_si128(k0, 4);                                             \
+-    tmp1 = _mm_xor_si128(k0, tmp2);                                           \
+-    tmp2 = _mm_slli_si128(tmp2, 4);                                           \
+-    tmp1 = _mm_xor_si128(_mm_xor_si128(tmp1, tmp2), _mm_slli_si128(tmp2, 4)); \
+-    tmp2 = _mm_aeskeygenassist_si128(kt, rcon);                               \
+-    res = _mm_xor_si128(tmp1, _mm_shuffle_epi32(tmp2, 0x55))
+-
+-#define EXPAND_KEY192_PART2(res, k1, k2)             \
+-    tmp2 = _mm_xor_si128(k1, _mm_slli_si128(k1, 4)); \
+-    res = _mm_xor_si128(tmp2, _mm_shuffle_epi32(k2, 0xFF))
+-
+-#define EXPAND_KEY192(k0, res1, res2, res3, carry, rcon1, rcon2)         \
+-    EXPAND_KEY192_PART1(tmp3, k0, res1, rcon1);                          \
+-    EXPAND_KEY192_PART2(carry, res1, tmp3);                              \
+-    res1 = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(res1),       \
+-                                           _mm_castsi128_pd(tmp3), 0));  \
+-    res2 = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(tmp3),       \
+-                                           _mm_castsi128_pd(carry), 1)); \
+-    EXPAND_KEY192_PART1(res3, tmp3, carry, rcon2)
+-
+-static void
+-native_key_expansion192(AESContext *cx, const unsigned char *key)
+-{
+-    __m128i *keySchedule = cx->keySchedule;
+-    pre_align __m128i tmp1 post_align;
+-    pre_align __m128i tmp2 post_align;
+-    pre_align __m128i tmp3 post_align;
+-    pre_align __m128i carry post_align;
+-    keySchedule[0] = _mm_loadu_si128((__m128i *)key);
+-    keySchedule[1] = _mm_loadu_si128((__m128i *)(key + 16));
+-    EXPAND_KEY192(keySchedule[0], keySchedule[1], keySchedule[2],
+-                  keySchedule[3], carry, 0x1, 0x2);
+-    EXPAND_KEY192_PART2(keySchedule[4], carry, keySchedule[3]);
+-    EXPAND_KEY192(keySchedule[3], keySchedule[4], keySchedule[5],
+-                  keySchedule[6], carry, 0x4, 0x8);
+-    EXPAND_KEY192_PART2(keySchedule[7], carry, keySchedule[6]);
+-    EXPAND_KEY192(keySchedule[6], keySchedule[7], keySchedule[8],
+-                  keySchedule[9], carry, 0x10, 0x20);
+-    EXPAND_KEY192_PART2(keySchedule[10], carry, keySchedule[9]);
+-    EXPAND_KEY192(keySchedule[9], keySchedule[10], keySchedule[11],
+-                  keySchedule[12], carry, 0x40, 0x80);
+-}
+-
+-#define EXPAND_KEY256_PART(res, rconx, k1x, k2x, X)                           \
+-    tmp_key = _mm_shuffle_epi32(_mm_aeskeygenassist_si128(k2x, rconx), X);    \
+-    tmp2 = _mm_slli_si128(k1x, 4);                                            \
+-    tmp1 = _mm_xor_si128(k1x, tmp2);                                          \
+-    tmp2 = _mm_slli_si128(tmp2, 4);                                           \
+-    tmp1 = _mm_xor_si128(_mm_xor_si128(tmp1, tmp2), _mm_slli_si128(tmp2, 4)); \
+-    res = _mm_xor_si128(tmp1, tmp_key);
+-
+-#define EXPAND_KEY256(res1, res2, k1, k2, rcon)   \
+-    EXPAND_KEY256_PART(res1, rcon, k1, k2, 0xFF); \
+-    EXPAND_KEY256_PART(res2, 0x00, k2, res1, 0xAA)
+-
+-static void
+-native_key_expansion256(AESContext *cx, const unsigned char *key)
+-{
+-    __m128i *keySchedule = cx->keySchedule;
+-    pre_align __m128i tmp_key post_align;
+-    pre_align __m128i tmp1 post_align;
+-    pre_align __m128i tmp2 post_align;
+-    keySchedule[0] = _mm_loadu_si128((__m128i *)key);
+-    keySchedule[1] = _mm_loadu_si128((__m128i *)(key + 16));
+-    EXPAND_KEY256(keySchedule[2], keySchedule[3], keySchedule[0],
+-                  keySchedule[1], 0x01);
+-    EXPAND_KEY256(keySchedule[4], keySchedule[5], keySchedule[2],
+-                  keySchedule[3], 0x02);
+-    EXPAND_KEY256(keySchedule[6], keySchedule[7], keySchedule[4],
+-                  keySchedule[5], 0x04);
+-    EXPAND_KEY256(keySchedule[8], keySchedule[9], keySchedule[6],
+-                  keySchedule[7], 0x08);
+-    EXPAND_KEY256(keySchedule[10], keySchedule[11], keySchedule[8],
+-                  keySchedule[9], 0x10);
+-    EXPAND_KEY256(keySchedule[12], keySchedule[13], keySchedule[10],
+-                  keySchedule[11], 0x20);
+-    EXPAND_KEY256_PART(keySchedule[14], 0x40, keySchedule[12],
+-                       keySchedule[13], 0xFF);
+-}
+-
+-#endif /* NSS_X86_OR_X64 */
+-
+-/*
+- * AES key expansion using aes-ni instructions.
+- */
+-static void
+-native_key_expansion(AESContext *cx, const unsigned char *key, unsigned int Nk)
+-{
+-#ifdef NSS_X86_OR_X64
+-    switch (Nk) {
+-        case 4:
+-            native_key_expansion128(cx, key);
+-            return;
+-        case 6:
+-            native_key_expansion192(cx, key);
+-            return;
+-        case 8:
+-            native_key_expansion256(cx, key);
+-            return;
+-        default:
+-            /* This shouldn't happen. */
+-            PORT_Assert(0);
+-    }
+-#else
+-    PORT_Assert(0);
+-#endif /* NSS_X86_OR_X64 */
+-}
+-
+-static void
+-native_encryptBlock(AESContext *cx,
+-                    unsigned char *output,
+-                    const unsigned char *input)
+-{
+-#ifdef NSS_X86_OR_X64
+-    int i;
+-    pre_align __m128i m post_align = _mm_loadu_si128((__m128i *)input);
+-    m = _mm_xor_si128(m, cx->keySchedule[0]);
+-    for (i = 1; i < cx->Nr; ++i) {
+-        m = _mm_aesenc_si128(m, cx->keySchedule[i]);
+-    }
+-    m = _mm_aesenclast_si128(m, cx->keySchedule[cx->Nr]);
+-    _mm_storeu_si128((__m128i *)output, m);
+-#else
+-    PORT_Assert(0);
+-#endif /* NSS_X86_OR_X64 */
+-}
+-
+ /* rijndael_key_expansion
+  *
+  * Generate the expanded key from the key input by the user.
+@@ -830,7 +702,7 @@ rijndael_encryptECB(AESContext *cx, unsigned char *output,
+ 
+     if (aesni_support()) {
+         /* Use hardware acceleration for normal AES parameters. */
+-        encryptor = &native_encryptBlock;
++        encryptor = &rijndael_native_encryptBlock;
+     } else {
+         encryptor = &rijndael_encryptBlock128;
+     }
+@@ -1026,7 +898,7 @@ aes_InitContext(AESContext *cx, const unsigned char *key, unsigned int keysize,
+                                cx->mode == NSS_AES_CTR)) {
+                 PORT_Assert(keysize == 16 || keysize == 24 || keysize == 32);
+                 /* Prepare hardware key for normal AES parameters. */
+-                native_key_expansion(cx, key, Nk);
++                rijndael_native_key_expansion(cx, key, Nk);
+             } else {
+                 rijndael_key_expansion(cx, key, Nk);
+             }
+diff --git lib/freebl/rijndael.h lib/freebl/rijndael.h
+index 1f4a8a9f73..1b63a323da 100644
+--- lib/freebl/rijndael.h
++++ lib/freebl/rijndael.h
+@@ -8,8 +8,22 @@
+ #include "blapii.h"
+ #include <stdint.h>
+ 
+-#ifdef NSS_X86_OR_X64
+-#include <wmmintrin.h> /* aes-ni */
++#if defined(NSS_X86_OR_X64)
++/* GCC <= 4.8 doesn't support including emmintrin.h without enabling SSE2 */
++#if !defined(__clang__) && defined(__GNUC__) && defined(__GNUC_MINOR__) && \
++    (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ <= 8))
++#pragma GCC push_options
++#pragma GCC target("sse2")
++#undef NSS_DISABLE_SSE2
++#define NSS_DISABLE_SSE2 1
++#endif /* GCC <= 4.8 */
++
++#include <emmintrin.h> /* __m128i */
++
++#ifdef NSS_DISABLE_SSE2
++#undef NSS_DISABLE_SSE2
++#pragma GCC pop_options
++#endif /* NSS_DISABLE_SSE2 */
+ #endif
+ 
+ typedef void AESBlockFunc(AESContext *cx,


More information about the svn-ports-head mailing list