Backed out changeset 009a7163c80a (Bug 1152625) for crash in arm_aes_encrypt_cbc_128 on Android 8.0 Pixel2 pgo
authorJ.C. Jones <jc@mozilla.com>
Fri, 02 Aug 2019 14:49:09 +0000
changeset 15243 777b6070fe76b1dc3c51b968e87bc39668fc0282
parent 15242 085e429af86cc9e3808c076f0dd4a37ebe4a1a8c
child 15244 54d7aee231cc83431a415666c75bd16d54a74c34
push id3455
push userjjones@mozilla.com
push dateMon, 05 Aug 2019 15:52:22 +0000
bugs1152625, 1570991
backs out009a7163c80a711bd681b155ecffa5962fd98a46
Backed out changeset 009a7163c80a (Bug 1152625) for crash in arm_aes_encrypt_cbc_128 on Android 8.0 Pixel2 pgo See bug 1570991
lib/freebl/Makefile
lib/freebl/aes-armv8.c
lib/freebl/aes-armv8.h
lib/freebl/freebl.gyp
lib/freebl/intel-aes.h
lib/freebl/rijndael.c
--- a/lib/freebl/Makefile
+++ b/lib/freebl/Makefile
@@ -236,34 +236,19 @@ ifeq ($(CPU_ARCH),x86)
     DEFINES += -DMP_ASSEMBLY_DIV_2DX1D -DMP_USE_UINT_DIGIT
     DEFINES += -DMP_IS_LITTLE_ENDIAN
 endif
 ifeq ($(CPU_ARCH),arm)
     DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE 
     DEFINES += -DMP_USE_UINT_DIGIT
     DEFINES += -DSHA_NO_LONG_LONG # avoid 64-bit arithmetic in SHA512
     MPI_SRCS += mpi_arm.c
-    ifdef CC_IS_CLANG
-        DEFINES += -DUSE_HW_AES
-        EXTRA_SRCS += aes-armv8.c
-    else ifeq (1,$(CC_IS_GCC))
-        # Old compiler doesn't support ARM AES.
-        ifneq (,$(filter 4.9,$(word 1,$(GCC_VERSION)).$(word 2,$(GCC_VERSION))))
-            DEFINES += -DUSE_HW_AES
-            EXTRA_SRCS += aes-armv8.c
-        endif
-        ifeq (,$(filter 0 1 2 3 4,$(word 1,$(GCC_VERSION))))
-            DEFINES += -DUSE_HW_AES
-            EXTRA_SRCS += aes-armv8.c
-        endif
-    endif
 endif
 ifeq ($(CPU_ARCH),aarch64)
-    DEFINES += -DUSE_HW_AES
-    EXTRA_SRCS += aes-armv8.c gcm-aarch64.c
+    EXTRA_SRCS += gcm-aarch64.c
 endif
 ifeq ($(CPU_ARCH),ppc)
 ifdef USE_64
     DEFINES += -DNSS_NO_INIT_SUPPORT
 endif # USE_64
 endif # ppc
 endif # Linux
 
@@ -771,15 +756,11 @@ endif
 
 ifdef INTEL_GCM_CLANG_CL
 #
 # clang-cl needs -mssse3
 #
 $(OBJDIR)/$(PROG_PREFIX)intel-gcm-wrap$(OBJ_SUFFIX): CFLAGS += -mssse3
 endif
 
-ifeq ($(CPU_ARCH),arm)
-$(OBJDIR)/$(PROG_PREFIX)aes-armv8$(OBJ_SUFFIX): CFLAGS += -march=armv8-a -mfpu=crypto-neon-fp-armv8
-endif
 ifeq ($(CPU_ARCH),aarch64)
-$(OBJDIR)/$(PROG_PREFIX)aes-armv8$(OBJ_SUFFIX): CFLAGS += -march=armv8-a+crypto
 $(OBJDIR)/$(PROG_PREFIX)gcm-aarch64$(OBJ_SUFFIX): CFLAGS += -march=armv8-a+crypto
 endif
deleted file mode 100644
--- a/lib/freebl/aes-armv8.c
+++ /dev/null
@@ -1,1168 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-#include "secerr.h"
-#include "rijndael.h"
-
-#if (defined(__clang__) ||                            \
-     (defined(__GNUC__) && defined(__GNUC_MINOR__) && \
-      (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 8))))
-
-#ifndef __ARM_FEATURE_CRYPTO
-#error "Compiler option is invalid"
-#endif
-
-#include <arm_neon.h>
-
-SECStatus
-arm_aes_encrypt_ecb_128(AESContext *cx, unsigned char *output,
-                        unsigned int *outputLen,
-                        unsigned int maxOutputLen,
-                        const unsigned char *input,
-                        unsigned int inputLen,
-                        unsigned int blocksize)
-{
-#if !defined(HAVE_UNALIGNED_ACCESS)
-    pre_align unsigned char buf[16] post_align;
-#endif
-    uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
-    uint8x16_t key11;
-    const PRUint8 *key = (const PRUint8 *)cx->expandedKey;
-
-    if (!inputLen) {
-        return SECSuccess;
-    }
-
-    key1 = vld1q_u8(__builtin_assume_aligned(key, 16));
-    key2 = vld1q_u8(__builtin_assume_aligned(key + 16, 16));
-    key3 = vld1q_u8(__builtin_assume_aligned(key + 32, 16));
-    key4 = vld1q_u8(__builtin_assume_aligned(key + 48, 16));
-    key5 = vld1q_u8(__builtin_assume_aligned(key + 64, 16));
-    key6 = vld1q_u8(__builtin_assume_aligned(key + 80, 16));
-    key7 = vld1q_u8(__builtin_assume_aligned(key + 96, 16));
-    key8 = vld1q_u8(__builtin_assume_aligned(key + 112, 16));
-    key9 = vld1q_u8(__builtin_assume_aligned(key + 128, 16));
-    key10 = vld1q_u8(__builtin_assume_aligned(key + 144, 16));
-    key11 = vld1q_u8(__builtin_assume_aligned(key + 160, 16));
-
-    while (inputLen > 0) {
-        uint8x16_t state;
-#if defined(HAVE_UNALIGNED_ACCESS)
-        state = vld1q_u8(input);
-#else
-        if ((uintptr_t)input & 0x7) {
-            memcpy(buf, input, 16);
-            state = vld1q_u8(__builtin_assume_aligned(buf, 16));
-        } else {
-            state = vld1q_u8(__builtin_assume_aligned(input, 8));
-        }
-#endif
-        input += 16;
-        inputLen -= 16;
-
-        /* Rounds */
-        state = vaeseq_u8(state, key1);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key2);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key3);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key4);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key5);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key6);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key7);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key8);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key9);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key10);
-        /* AddRoundKey */
-        state = veorq_u8(state, key11);
-
-#if defined(HAVE_UNALIGNED_ACCESS)
-        vst1q_u8(output, state);
-#else
-        if ((uintptr_t)output & 0x7) {
-            vst1q_u8(__builtin_assume_aligned(buf, 16), state);
-            memcpy(output, buf, 16);
-        } else {
-            vst1q_u8(__builtin_assume_aligned(output, 8), state);
-        }
-#endif
-        output += 16;
-    }
-
-    return SECSuccess;
-}
-
-SECStatus
-arm_aes_decrypt_ecb_128(AESContext *cx, unsigned char *output,
-                        unsigned int *outputLen,
-                        unsigned int maxOutputLen,
-                        const unsigned char *input,
-                        unsigned int inputLen,
-                        unsigned int blocksize)
-{
-#if !defined(HAVE_UNALIGNED_ACCESS)
-    pre_align unsigned char buf[16] post_align;
-#endif
-    uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
-    uint8x16_t key11;
-    const PRUint8 *key = (const PRUint8 *)cx->expandedKey;
-
-    if (inputLen == 0) {
-        return SECSuccess;
-    }
-
-    key1 = vld1q_u8(__builtin_assume_aligned(key, 16));
-    key2 = vld1q_u8(__builtin_assume_aligned(key + 16, 16));
-    key3 = vld1q_u8(__builtin_assume_aligned(key + 32, 16));
-    key4 = vld1q_u8(__builtin_assume_aligned(key + 48, 16));
-    key5 = vld1q_u8(__builtin_assume_aligned(key + 64, 16));
-    key6 = vld1q_u8(__builtin_assume_aligned(key + 80, 16));
-    key7 = vld1q_u8(__builtin_assume_aligned(key + 96, 16));
-    key8 = vld1q_u8(__builtin_assume_aligned(key + 112, 16));
-    key9 = vld1q_u8(__builtin_assume_aligned(key + 128, 16));
-    key10 = vld1q_u8(__builtin_assume_aligned(key + 144, 16));
-    key11 = vld1q_u8(__builtin_assume_aligned(key + 160, 16));
-
-    while (inputLen > 0) {
-        uint8x16_t state;
-#if defined(HAVE_UNALIGNED_ACCESS)
-        state = vld1q_u8(input);
-#else
-        if ((uintptr_t)input & 0x7) {
-            memcpy(buf, input, 16);
-            state = vld1q_u8(__builtin_assume_aligned(buf, 16));
-        } else {
-            state = vld1q_u8(__builtin_assume_aligned(input, 8));
-        }
-#endif
-        input += 16;
-        inputLen -= 16;
-
-        /* Rounds */
-        state = vaesdq_u8(state, key11);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key10);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key9);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key8);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key7);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key6);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key5);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key4);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key3);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key2);
-        /* AddRoundKey */
-        state = veorq_u8(state, key1);
-
-#if defined(HAVE_UNALIGNED_ACCESS)
-        vst1q_u8(output, state);
-#else
-        if ((uintptr_t)output & 0x7) {
-            vst1q_u8(__builtin_assume_aligned(buf, 16), state);
-            memcpy(output, buf, 16);
-        } else {
-            vst1q_u8(__builtin_assume_aligned(output, 8), state);
-        }
-#endif
-        output += 16;
-    }
-
-    return SECSuccess;
-}
-
-SECStatus
-arm_aes_encrypt_cbc_128(AESContext *cx, unsigned char *output,
-                        unsigned int *outputLen,
-                        unsigned int maxOutputLen,
-                        const unsigned char *input,
-                        unsigned int inputLen,
-                        unsigned int blocksize)
-{
-#if !defined(HAVE_UNALIGNED_ACCESS)
-    pre_align unsigned char buf[16] post_align;
-#endif
-    uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
-    uint8x16_t key11;
-    uint8x16_t iv;
-    const PRUint8 *key = (const PRUint8 *)cx->expandedKey;
-
-    if (!inputLen) {
-        return SECSuccess;
-    }
-
-    /* iv */
-    iv = vld1q_u8(__builtin_assume_aligned(cx->iv, 16));
-
-    /* expanedKey */
-    key1 = vld1q_u8(__builtin_assume_aligned(key, 16));
-    key2 = vld1q_u8(__builtin_assume_aligned(key + 16, 16));
-    key3 = vld1q_u8(__builtin_assume_aligned(key + 32, 16));
-    key4 = vld1q_u8(__builtin_assume_aligned(key + 48, 16));
-    key5 = vld1q_u8(__builtin_assume_aligned(key + 64, 16));
-    key6 = vld1q_u8(__builtin_assume_aligned(key + 80, 16));
-    key7 = vld1q_u8(__builtin_assume_aligned(key + 96, 16));
-    key8 = vld1q_u8(__builtin_assume_aligned(key + 112, 16));
-    key9 = vld1q_u8(__builtin_assume_aligned(key + 128, 16));
-    key10 = vld1q_u8(__builtin_assume_aligned(key + 144, 16));
-    key11 = vld1q_u8(__builtin_assume_aligned(key + 160, 16));
-
-    while (inputLen > 0) {
-        uint8x16_t state;
-#if defined(HAVE_UNALIGNED_ACCESS)
-        state = vld1q_u8(input);
-#else
-        if ((uintptr_t)input & 0x7) {
-            memcpy(buf, input, 16);
-            state = vld1q_u8(__builtin_assume_aligned(buf, 16));
-        } else {
-            state = vld1q_u8(__builtin_assume_aligned(input, 8));
-        }
-#endif
-        input += 16;
-        inputLen -= 16;
-
-        state = veorq_u8(state, iv);
-
-        /* Rounds */
-        state = vaeseq_u8(state, key1);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key2);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key3);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key4);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key5);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key6);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key7);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key8);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key9);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key10);
-        /* AddRoundKey */
-        state = veorq_u8(state, key11);
-
-#if defined(HAVE_UNALIGNED_ACCESS)
-        vst1q_u8(output, state);
-#else
-        if ((uintptr_t)output & 0x7) {
-            vst1q_u8(__builtin_assume_aligned(buf, 16), state);
-            memcpy(output, buf, 16);
-        } else {
-            vst1q_u8(__builtin_assume_aligned(output, 8), state);
-        }
-#endif
-        output += 16;
-        iv = state;
-    }
-    vst1q_u8(__builtin_assume_aligned(cx->iv, 16), iv);
-
-    return SECSuccess;
-}
-
-SECStatus
-arm_aes_decrypt_cbc_128(AESContext *cx, unsigned char *output,
-                        unsigned int *outputLen,
-                        unsigned int maxOutputLen,
-                        const unsigned char *input,
-                        unsigned int inputLen,
-                        unsigned int blocksize)
-{
-#if !defined(HAVE_UNALIGNED_ACCESS)
-    pre_align unsigned char buf[16] post_align;
-#endif
-    uint8x16_t iv;
-    uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
-    uint8x16_t key11;
-    const PRUint8 *key = (const PRUint8 *)cx->expandedKey;
-
-    if (!inputLen) {
-        return SECSuccess;
-    }
-
-    /* iv */
-    iv = vld1q_u8(__builtin_assume_aligned(cx->iv, 16));
-
-    /* expanedKey */
-    key1 = vld1q_u8(__builtin_assume_aligned(key, 16));
-    key2 = vld1q_u8(__builtin_assume_aligned(key + 16, 16));
-    key3 = vld1q_u8(__builtin_assume_aligned(key + 32, 16));
-    key4 = vld1q_u8(__builtin_assume_aligned(key + 48, 16));
-    key5 = vld1q_u8(__builtin_assume_aligned(key + 64, 16));
-    key6 = vld1q_u8(__builtin_assume_aligned(key + 80, 16));
-    key7 = vld1q_u8(__builtin_assume_aligned(key + 96, 16));
-    key8 = vld1q_u8(__builtin_assume_aligned(key + 112, 16));
-    key9 = vld1q_u8(__builtin_assume_aligned(key + 128, 16));
-    key10 = vld1q_u8(__builtin_assume_aligned(key + 144, 16));
-    key11 = vld1q_u8(__builtin_assume_aligned(key + 160, 16));
-
-    while (inputLen > 0) {
-        uint8x16_t state, old_state;
-#if defined(HAVE_UNALIGNED_ACCESS)
-        state = vld1q_u8(input);
-#else
-        if ((uintptr_t)input & 0x7) {
-            memcpy(buf, input, 16);
-            state = vld1q_u8(__builtin_assume_aligned(buf, 16));
-        } else {
-            state = vld1q_u8(__builtin_assume_aligned(input, 8));
-        }
-#endif
-        old_state = state;
-        input += 16;
-        inputLen -= 16;
-
-        /* Rounds */
-        state = vaesdq_u8(state, key11);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key10);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key9);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key8);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key7);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key6);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key5);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key4);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key3);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key2);
-        /* AddRoundKey */
-        state = veorq_u8(state, key1);
-
-        state = veorq_u8(state, iv);
-
-#if defined(HAVE_UNALIGNED_ACCESS)
-        vst1q_u8(output, state);
-#else
-        if ((uintptr_t)output & 0x7) {
-            vst1q_u8(__builtin_assume_aligned(buf, 16), state);
-            memcpy(output, buf, 16);
-        } else {
-            vst1q_u8(__builtin_assume_aligned(output, 8), state);
-        }
-#endif
-        output += 16;
-
-        iv = old_state;
-    }
-    vst1q_u8(__builtin_assume_aligned(cx->iv, 16), iv);
-
-    return SECSuccess;
-}
-
-SECStatus
-arm_aes_encrypt_ecb_192(AESContext *cx, unsigned char *output,
-                        unsigned int *outputLen,
-                        unsigned int maxOutputLen,
-                        const unsigned char *input,
-                        unsigned int inputLen,
-                        unsigned int blocksize)
-{
-#if !defined(HAVE_UNALIGNED_ACCESS)
-    pre_align unsigned char buf[16] post_align;
-#endif
-    uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
-    uint8x16_t key11, key12, key13;
-    PRUint8 *key = (PRUint8 *)cx->expandedKey;
-
-    if (!inputLen) {
-        return SECSuccess;
-    }
-
-    key1 = vld1q_u8(__builtin_assume_aligned(key, 16));
-    key2 = vld1q_u8(__builtin_assume_aligned(key + 16, 16));
-    key3 = vld1q_u8(__builtin_assume_aligned(key + 32, 16));
-    key4 = vld1q_u8(__builtin_assume_aligned(key + 48, 16));
-    key5 = vld1q_u8(__builtin_assume_aligned(key + 64, 16));
-    key6 = vld1q_u8(__builtin_assume_aligned(key + 80, 16));
-    key7 = vld1q_u8(__builtin_assume_aligned(key + 96, 16));
-    key8 = vld1q_u8(__builtin_assume_aligned(key + 112, 16));
-    key9 = vld1q_u8(__builtin_assume_aligned(key + 128, 16));
-    key10 = vld1q_u8(__builtin_assume_aligned(key + 144, 16));
-    key11 = vld1q_u8(__builtin_assume_aligned(key + 160, 16));
-    key12 = vld1q_u8(__builtin_assume_aligned(key + 176, 16));
-    key13 = vld1q_u8(__builtin_assume_aligned(key + 192, 16));
-
-    while (inputLen > 0) {
-        uint8x16_t state;
-#if defined(HAVE_UNALIGNED_ACCESS)
-        state = vld1q_u8(input);
-#else
-        if ((uintptr_t)input & 0x7) {
-            memcpy(buf, input, 16);
-            state = vld1q_u8(__builtin_assume_aligned(buf, 16));
-        } else {
-            state = vld1q_u8(__builtin_assume_aligned(input, 8));
-        }
-#endif
-        input += 16;
-        inputLen -= 16;
-
-        /* Rounds */
-        state = vaeseq_u8(state, key1);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key2);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key3);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key4);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key5);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key6);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key7);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key8);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key9);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key10);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key11);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key12);
-        /* AddRoundKey */
-        state = veorq_u8(state, key13);
-
-#if defined(HAVE_UNALIGNED_ACCESS)
-        vst1q_u8(output, state);
-#else
-        if ((uintptr_t)output & 0x7) {
-            vst1q_u8(__builtin_assume_aligned(buf, 16), state);
-            memcpy(output, buf, 16);
-        } else {
-            vst1q_u8(__builtin_assume_aligned(output, 8), state);
-        }
-#endif
-        output += 16;
-    }
-
-    return SECSuccess;
-}
-
-SECStatus
-arm_aes_decrypt_ecb_192(AESContext *cx, unsigned char *output,
-                        unsigned int *outputLen,
-                        unsigned int maxOutputLen,
-                        const unsigned char *input,
-                        unsigned int inputLen,
-                        unsigned int blocksize)
-{
-#if !defined(HAVE_UNALIGNED_ACCESS)
-    pre_align unsigned char buf[16] post_align;
-#endif
-    uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
-    uint8x16_t key11, key12, key13;
-    const PRUint8 *key = (const PRUint8 *)cx->expandedKey;
-
-    if (!inputLen) {
-        return SECSuccess;
-    }
-
-    key1 = vld1q_u8(__builtin_assume_aligned(key, 16));
-    key2 = vld1q_u8(__builtin_assume_aligned(key + 16, 16));
-    key3 = vld1q_u8(__builtin_assume_aligned(key + 32, 16));
-    key4 = vld1q_u8(__builtin_assume_aligned(key + 48, 16));
-    key5 = vld1q_u8(__builtin_assume_aligned(key + 64, 16));
-    key6 = vld1q_u8(__builtin_assume_aligned(key + 80, 16));
-    key7 = vld1q_u8(__builtin_assume_aligned(key + 96, 16));
-    key8 = vld1q_u8(__builtin_assume_aligned(key + 112, 16));
-    key9 = vld1q_u8(__builtin_assume_aligned(key + 128, 16));
-    key10 = vld1q_u8(__builtin_assume_aligned(key + 144, 16));
-    key11 = vld1q_u8(__builtin_assume_aligned(key + 160, 16));
-    key12 = vld1q_u8(__builtin_assume_aligned(key + 176, 16));
-    key13 = vld1q_u8(__builtin_assume_aligned(key + 192, 16));
-
-    while (inputLen > 0) {
-        uint8x16_t state;
-#if defined(HAVE_UNALIGNED_ACCESS)
-        state = vld1q_u8(input);
-#else
-        if ((uintptr_t)input & 0x7) {
-            memcpy(buf, input, 16);
-            state = vld1q_u8(__builtin_assume_aligned(buf, 16));
-        } else {
-            state = vld1q_u8(__builtin_assume_aligned(input, 8));
-        }
-#endif
-        input += 16;
-        inputLen -= 16;
-
-        /* Rounds */
-        state = vaesdq_u8(state, key13);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key12);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key11);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key10);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key9);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key8);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key7);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key6);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key5);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key4);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key3);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key2);
-        /* AddRoundKey */
-        state = veorq_u8(state, key1);
-
-#if defined(HAVE_UNALIGNED_ACCESS)
-        vst1q_u8(output, state);
-#else
-        if ((uintptr_t)output & 0x7) {
-            vst1q_u8(__builtin_assume_aligned(buf, 16), state);
-            memcpy(output, buf, 16);
-        } else {
-            vst1q_u8(__builtin_assume_aligned(output, 8), state);
-        }
-#endif
-        output += 16;
-    }
-
-    return SECSuccess;
-}
-
-SECStatus
-arm_aes_encrypt_cbc_192(AESContext *cx, unsigned char *output,
-                        unsigned int *outputLen,
-                        unsigned int maxOutputLen,
-                        const unsigned char *input,
-                        unsigned int inputLen,
-                        unsigned int blocksize)
-{
-#if !defined(HAVE_UNALIGNED_ACCESS)
-    pre_align unsigned char buf[16] post_align;
-#endif
-    uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
-    uint8x16_t key11, key12, key13;
-    uint8x16_t iv;
-    PRUint8 *key = (PRUint8 *)cx->expandedKey;
-
-    if (!inputLen) {
-        return SECSuccess;
-    }
-
-    /* iv */
-    iv = vld1q_u8(cx->iv);
-
-    /* expanedKey */
-    key1 = vld1q_u8(__builtin_assume_aligned(key, 16));
-    key2 = vld1q_u8(__builtin_assume_aligned(key + 16, 16));
-    key3 = vld1q_u8(__builtin_assume_aligned(key + 32, 16));
-    key4 = vld1q_u8(__builtin_assume_aligned(key + 48, 16));
-    key5 = vld1q_u8(__builtin_assume_aligned(key + 64, 16));
-    key6 = vld1q_u8(__builtin_assume_aligned(key + 80, 16));
-    key7 = vld1q_u8(__builtin_assume_aligned(key + 96, 16));
-    key8 = vld1q_u8(__builtin_assume_aligned(key + 112, 16));
-    key9 = vld1q_u8(__builtin_assume_aligned(key + 128, 16));
-    key10 = vld1q_u8(__builtin_assume_aligned(key + 144, 16));
-    key11 = vld1q_u8(__builtin_assume_aligned(key + 160, 16));
-    key12 = vld1q_u8(__builtin_assume_aligned(key + 176, 16));
-    key13 = vld1q_u8(__builtin_assume_aligned(key + 192, 16));
-
-    while (inputLen > 0) {
-        uint8x16_t state;
-#if defined(HAVE_UNALIGNED_ACCESS)
-        state = vld1q_u8(input);
-#else
-        if ((uintptr_t)input & 0x7) {
-            memcpy(buf, input, 16);
-            state = vld1q_u8(__builtin_assume_aligned(buf, 16));
-        } else {
-            state = vld1q_u8(__builtin_assume_aligned(input, 8));
-        }
-#endif
-        input += 16;
-        inputLen -= 16;
-
-        state = veorq_u8(state, iv);
-
-        /* Rounds */
-        state = vaeseq_u8(state, key1);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key2);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key3);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key4);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key5);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key6);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key7);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key8);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key9);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key10);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key11);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key12);
-        state = veorq_u8(state, key13);
-
-#if defined(HAVE_UNALIGNED_ACCESS)
-        vst1q_u8(output, state);
-#else
-        if ((uintptr_t)output & 0x7) {
-            vst1q_u8(__builtin_assume_aligned(buf, 16), state);
-            memcpy(output, buf, 16);
-        } else {
-            vst1q_u8(__builtin_assume_aligned(output, 8), state);
-        }
-#endif
-        output += 16;
-        iv = state;
-    }
-    vst1q_u8(__builtin_assume_aligned(cx->iv, 16), iv);
-
-    return SECSuccess;
-}
-
-SECStatus
-arm_aes_decrypt_cbc_192(AESContext *cx, unsigned char *output,
-                        unsigned int *outputLen,
-                        unsigned int maxOutputLen,
-                        const unsigned char *input,
-                        unsigned int inputLen,
-                        unsigned int blocksize)
-{
-#if !defined(HAVE_UNALIGNED_ACCESS)
-    pre_align unsigned char buf[16] post_align;
-#endif
-    uint8x16_t iv;
-    uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
-    uint8x16_t key11, key12, key13;
-    const PRUint8 *key = (const PRUint8 *)cx->expandedKey;
-
-    if (!inputLen) {
-        return SECSuccess;
-    }
-
-    /* iv */
-    iv = vld1q_u8(__builtin_assume_aligned(cx->iv, 16));
-
-    /* expanedKey */
-    key1 = vld1q_u8(__builtin_assume_aligned(key, 16));
-    key2 = vld1q_u8(__builtin_assume_aligned(key + 16, 16));
-    key3 = vld1q_u8(__builtin_assume_aligned(key + 32, 16));
-    key4 = vld1q_u8(__builtin_assume_aligned(key + 48, 16));
-    key5 = vld1q_u8(__builtin_assume_aligned(key + 64, 16));
-    key6 = vld1q_u8(__builtin_assume_aligned(key + 80, 16));
-    key7 = vld1q_u8(__builtin_assume_aligned(key + 96, 16));
-    key8 = vld1q_u8(__builtin_assume_aligned(key + 112, 16));
-    key9 = vld1q_u8(__builtin_assume_aligned(key + 128, 16));
-    key10 = vld1q_u8(__builtin_assume_aligned(key + 144, 16));
-    key11 = vld1q_u8(__builtin_assume_aligned(key + 160, 16));
-    key12 = vld1q_u8(__builtin_assume_aligned(key + 176, 16));
-    key13 = vld1q_u8(__builtin_assume_aligned(key + 192, 16));
-
-    while (inputLen > 0) {
-        uint8x16_t state, old_state;
-#if defined(HAVE_UNALIGNED_ACCESS)
-        state = vld1q_u8(input);
-#else
-        if ((uintptr_t)input & 0x7) {
-            memcpy(buf, input, 16);
-            state = vld1q_u8(__builtin_assume_aligned(buf, 16));
-        } else {
-            state = vld1q_u8(__builtin_assume_aligned(input, 8));
-        }
-#endif
-        old_state = state;
-        input += 16;
-        inputLen -= 16;
-
-        /* Rounds */
-        state = vaesdq_u8(state, key13);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key12);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key11);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key10);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key9);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key8);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key7);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key6);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key5);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key4);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key3);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key2);
-        /* AddRoundKey */
-        state = veorq_u8(state, key1);
-
-        state = veorq_u8(state, iv);
-
-#if defined(HAVE_UNALIGNED_ACCESS)
-        vst1q_u8(output, state);
-#else
-        if ((uintptr_t)output & 0x7) {
-            vst1q_u8(__builtin_assume_aligned(buf, 16), state);
-            memcpy(output, buf, 16);
-        } else {
-            vst1q_u8(__builtin_assume_aligned(output, 8), state);
-        }
-#endif
-        output += 16;
-
-        iv = old_state;
-    }
-    vst1q_u8(__builtin_assume_aligned(cx->iv, 16), iv);
-
-    return SECSuccess;
-}
-
-SECStatus
-arm_aes_encrypt_ecb_256(AESContext *cx, unsigned char *output,
-                        unsigned int *outputLen,
-                        unsigned int maxOutputLen,
-                        const unsigned char *input,
-                        unsigned int inputLen,
-                        unsigned int blocksize)
-{
-#if !defined(HAVE_UNALIGNED_ACCESS)
-    pre_align unsigned char buf[16] post_align;
-#endif
-    uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
-    uint8x16_t key11, key12, key13, key14, key15;
-    PRUint8 *key = (PRUint8 *)cx->expandedKey;
-
-    if (inputLen == 0) {
-        return SECSuccess;
-    }
-
-    key1 = vld1q_u8(__builtin_assume_aligned(key, 16));
-    key2 = vld1q_u8(__builtin_assume_aligned(key + 16, 16));
-    key3 = vld1q_u8(__builtin_assume_aligned(key + 32, 16));
-    key4 = vld1q_u8(__builtin_assume_aligned(key + 48, 16));
-    key5 = vld1q_u8(__builtin_assume_aligned(key + 64, 16));
-    key6 = vld1q_u8(__builtin_assume_aligned(key + 80, 16));
-    key7 = vld1q_u8(__builtin_assume_aligned(key + 96, 16));
-    key8 = vld1q_u8(__builtin_assume_aligned(key + 112, 16));
-    key9 = vld1q_u8(__builtin_assume_aligned(key + 128, 16));
-    key10 = vld1q_u8(__builtin_assume_aligned(key + 144, 16));
-    key11 = vld1q_u8(__builtin_assume_aligned(key + 160, 16));
-    key12 = vld1q_u8(__builtin_assume_aligned(key + 176, 16));
-    key13 = vld1q_u8(__builtin_assume_aligned(key + 192, 16));
-    key14 = vld1q_u8(__builtin_assume_aligned(key + 208, 16));
-    key15 = vld1q_u8(__builtin_assume_aligned(key + 224, 16));
-
-    while (inputLen > 0) {
-        uint8x16_t state;
-#if defined(HAVE_UNALIGNED_ACCESS)
-        state = vld1q_u8(input);
-#else
-        if ((uintptr_t)input & 0x7) {
-            memcpy(buf, input, 16);
-            state = vld1q_u8(__builtin_assume_aligned(buf, 16));
-        } else {
-            state = vld1q_u8(__builtin_assume_aligned(input, 8));
-        }
-#endif
-        input += 16;
-        inputLen -= 16;
-
-        /* Rounds */
-        state = vaeseq_u8(state, key1);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key2);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key3);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key4);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key5);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key6);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key7);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key8);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key9);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key10);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key11);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key12);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key13);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key14);
-        /* AddRoundKey */
-        state = veorq_u8(state, key15);
-
-#if defined(HAVE_UNALIGNED_ACCESS)
-        vst1q_u8(output, state);
-#else
-        if ((uintptr_t)output & 0x7) {
-            vst1q_u8(__builtin_assume_aligned(buf, 16), state);
-            memcpy(output, buf, 16);
-        } else {
-            vst1q_u8(__builtin_assume_aligned(output, 8), state);
-        }
-#endif
-        output += 16;
-    }
-    return SECSuccess;
-}
-
-SECStatus
-arm_aes_decrypt_ecb_256(AESContext *cx, unsigned char *output,
-                        unsigned int *outputLen,
-                        unsigned int maxOutputLen,
-                        const unsigned char *input,
-                        unsigned int inputLen,
-                        unsigned int blocksize)
-{
-#if !defined(HAVE_UNALIGNED_ACCESS)
-    pre_align unsigned char buf[16] post_align;
-#endif
-    uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
-    uint8x16_t key11, key12, key13, key14, key15;
-    const PRUint8 *key = (const PRUint8 *)cx->expandedKey;
-
-    if (!inputLen) {
-        return SECSuccess;
-    }
-
-    key1 = vld1q_u8(__builtin_assume_aligned(key, 16));
-    key2 = vld1q_u8(__builtin_assume_aligned(key + 16, 16));
-    key3 = vld1q_u8(__builtin_assume_aligned(key + 32, 16));
-    key4 = vld1q_u8(__builtin_assume_aligned(key + 48, 16));
-    key5 = vld1q_u8(__builtin_assume_aligned(key + 64, 16));
-    key6 = vld1q_u8(__builtin_assume_aligned(key + 80, 16));
-    key7 = vld1q_u8(__builtin_assume_aligned(key + 96, 16));
-    key8 = vld1q_u8(__builtin_assume_aligned(key + 112, 16));
-    key9 = vld1q_u8(__builtin_assume_aligned(key + 128, 16));
-    key10 = vld1q_u8(__builtin_assume_aligned(key + 144, 16));
-    key11 = vld1q_u8(__builtin_assume_aligned(key + 160, 16));
-    key12 = vld1q_u8(__builtin_assume_aligned(key + 176, 16));
-    key13 = vld1q_u8(__builtin_assume_aligned(key + 192, 16));
-    key14 = vld1q_u8(__builtin_assume_aligned(key + 208, 16));
-    key15 = vld1q_u8(__builtin_assume_aligned(key + 224, 16));
-
-    while (inputLen > 0) {
-        uint8x16_t state;
-#if defined(HAVE_UNALIGNED_ACCESS)
-        state = vld1q_u8(input);
-#else
-        if ((uintptr_t)input & 0x7) {
-            memcpy(buf, input, 16);
-            state = vld1q_u8(__builtin_assume_aligned(buf, 16));
-        } else {
-            state = vld1q_u8(__builtin_assume_aligned(input, 8));
-        }
-#endif
-        input += 16;
-        inputLen -= 16;
-
-        /* Rounds */
-        state = vaesdq_u8(state, key15);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key14);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key13);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key12);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key11);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key10);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key9);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key8);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key7);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key6);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key5);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key4);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key3);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key2);
-        /* AddRoundKey */
-        state = veorq_u8(state, key1);
-
-#if defined(HAVE_UNALIGNED_ACCESS)
-        vst1q_u8(output, state);
-#else
-        if ((uintptr_t)output & 0x7) {
-            vst1q_u8(__builtin_assume_aligned(buf, 16), state);
-            memcpy(output, buf, 16);
-        } else {
-            vst1q_u8(__builtin_assume_aligned(output, 8), state);
-        }
-#endif
-        output += 16;
-    }
-
-    return SECSuccess;
-}
-
-SECStatus
-arm_aes_encrypt_cbc_256(AESContext *cx, unsigned char *output,
-                        unsigned int *outputLen,
-                        unsigned int maxOutputLen,
-                        const unsigned char *input,
-                        unsigned int inputLen,
-                        unsigned int blocksize)
-{
-#if !defined(HAVE_UNALIGNED_ACCESS)
-    pre_align unsigned char buf[16] post_align;
-#endif
-    uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
-    uint8x16_t key11, key12, key13, key14, key15;
-    uint8x16_t iv;
-    const PRUint8 *key = (const PRUint8 *)cx->expandedKey;
-
-    if (!inputLen) {
-        return SECSuccess;
-    }
-
-    /* iv */
-    iv = vld1q_u8(cx->iv);
-
-    /* expanedKey */
-    key1 = vld1q_u8(__builtin_assume_aligned(key, 16));
-    key2 = vld1q_u8(__builtin_assume_aligned(key + 16, 16));
-    key3 = vld1q_u8(__builtin_assume_aligned(key + 32, 16));
-    key4 = vld1q_u8(__builtin_assume_aligned(key + 48, 16));
-    key5 = vld1q_u8(__builtin_assume_aligned(key + 64, 16));
-    key6 = vld1q_u8(__builtin_assume_aligned(key + 80, 16));
-    key7 = vld1q_u8(__builtin_assume_aligned(key + 96, 16));
-    key8 = vld1q_u8(__builtin_assume_aligned(key + 112, 16));
-    key9 = vld1q_u8(__builtin_assume_aligned(key + 128, 16));
-    key10 = vld1q_u8(__builtin_assume_aligned(key + 144, 16));
-    key11 = vld1q_u8(__builtin_assume_aligned(key + 160, 16));
-    key12 = vld1q_u8(__builtin_assume_aligned(key + 176, 16));
-    key13 = vld1q_u8(__builtin_assume_aligned(key + 192, 16));
-    key14 = vld1q_u8(__builtin_assume_aligned(key + 208, 16));
-    key15 = vld1q_u8(__builtin_assume_aligned(key + 224, 16));
-
-    while (inputLen > 0) {
-        uint8x16_t state;
-#if defined(HAVE_UNALIGNED_ACCESS)
-        state = vld1q_u8(input);
-#else
-        if ((uintptr_t)input & 0x7) {
-            memcpy(buf, input, 16);
-            state = vld1q_u8(__builtin_assume_aligned(buf, 16));
-        } else {
-            state = vld1q_u8(__builtin_assume_aligned(input, 8));
-        }
-#endif
-        input += 16;
-        inputLen -= 16;
-
-        state = veorq_u8(state, iv);
-
-        /* Rounds */
-        state = vaeseq_u8(state, key1);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key2);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key3);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key4);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key5);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key6);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key7);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key8);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key9);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key10);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key11);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key12);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key13);
-        state = vaesmcq_u8(state);
-        state = vaeseq_u8(state, key14);
-        /* AddRoundKey */
-        state = veorq_u8(state, key15);
-
-#if defined(HAVE_UNALIGNED_ACCESS)
-        vst1q_u8(output, state);
-#else
-        if ((uintptr_t)output & 0x7) {
-            vst1q_u8(__builtin_assume_aligned(buf, 16), state);
-            memcpy(output, buf, 16);
-        } else {
-            vst1q_u8(__builtin_assume_aligned(output, 8), state);
-        }
-#endif
-        output += 16;
-        iv = state;
-    }
-    vst1q_u8(__builtin_assume_aligned(cx->iv, 16), iv);
-
-    return SECSuccess;
-}
-
-SECStatus
-arm_aes_decrypt_cbc_256(AESContext *cx, unsigned char *output,
-                        unsigned int *outputLen,
-                        unsigned int maxOutputLen,
-                        const unsigned char *input,
-                        unsigned int inputLen,
-                        unsigned int blocksize)
-{
-#if !defined(HAVE_UNALIGNED_ACCESS)
-    pre_align unsigned char buf[16] post_align;
-#endif
-    uint8x16_t iv;
-    uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
-    uint8x16_t key11, key12, key13, key14, key15;
-    const PRUint8 *key = (const PRUint8 *)cx->expandedKey;
-
-    if (!inputLen) {
-        return SECSuccess;
-    }
-
-    /* iv */
-    iv = vld1q_u8(cx->iv);
-
-    /* expanedKey */
-    key1 = vld1q_u8(__builtin_assume_aligned(key, 16));
-    key2 = vld1q_u8(__builtin_assume_aligned(key + 16, 16));
-    key3 = vld1q_u8(__builtin_assume_aligned(key + 32, 16));
-    key4 = vld1q_u8(__builtin_assume_aligned(key + 48, 16));
-    key5 = vld1q_u8(__builtin_assume_aligned(key + 64, 16));
-    key6 = vld1q_u8(__builtin_assume_aligned(key + 80, 16));
-    key7 = vld1q_u8(__builtin_assume_aligned(key + 96, 16));
-    key8 = vld1q_u8(__builtin_assume_aligned(key + 112, 16));
-    key9 = vld1q_u8(__builtin_assume_aligned(key + 128, 16));
-    key10 = vld1q_u8(__builtin_assume_aligned(key + 144, 16));
-    key11 = vld1q_u8(__builtin_assume_aligned(key + 160, 16));
-    key12 = vld1q_u8(__builtin_assume_aligned(key + 176, 16));
-    key13 = vld1q_u8(__builtin_assume_aligned(key + 192, 16));
-    key14 = vld1q_u8(__builtin_assume_aligned(key + 208, 16));
-    key15 = vld1q_u8(__builtin_assume_aligned(key + 224, 16));
-
-    while (inputLen > 0) {
-        uint8x16_t state, old_state;
-#if defined(HAVE_UNALIGNED_ACCESS)
-        state = vld1q_u8(input);
-#else
-        if ((uintptr_t)input & 0x7) {
-            memcpy(buf, input, 16);
-            state = vld1q_u8(__builtin_assume_aligned(buf, 16));
-        } else {
-            state = vld1q_u8(__builtin_assume_aligned(input, 8));
-        }
-#endif
-        old_state = state;
-        input += 16;
-        inputLen -= 16;
-
-        /* Rounds */
-        state = vaesdq_u8(state, key15);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key14);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key13);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key12);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key11);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key10);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key9);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key8);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key7);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key6);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key5);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key4);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key3);
-        state = vaesimcq_u8(state);
-        state = vaesdq_u8(state, key2);
-        /* AddRoundKey */
-        state = veorq_u8(state, key1);
-
-        state = veorq_u8(state, iv);
-
-#if defined(HAVE_UNALIGNED_ACCESS)
-        vst1q_u8(output, state);
-#else
-        if ((uintptr_t)output & 0x7) {
-            vst1q_u8(__builtin_assume_aligned(buf, 16), state);
-            memcpy(output, buf, 16);
-        } else {
-            vst1q_u8(__builtin_assume_aligned(output, 8), state);
-        }
-#endif
-        output += 16;
-
-        iv = old_state;
-    }
-    vst1q_u8(__builtin_assume_aligned(cx->iv, 16), iv);
-
-    return SECSuccess;
-}
-
-#endif
deleted file mode 100644
--- a/lib/freebl/aes-armv8.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-SECStatus arm_aes_encrypt_ecb_128(AESContext *cx, unsigned char *output,
-                                  unsigned int *outputLen,
-                                  unsigned int maxOutputLen,
-                                  const unsigned char *input,
-                                  unsigned int inputLen,
-                                  unsigned int blocksize);
-SECStatus arm_aes_decrypt_ecb_128(AESContext *cx, unsigned char *output,
-                                  unsigned int *outputLen,
-                                  unsigned int maxOutputLen,
-                                  const unsigned char *input,
-                                  unsigned int inputLen,
-                                  unsigned int blocksize);
-SECStatus arm_aes_encrypt_cbc_128(AESContext *cx, unsigned char *output,
-                                  unsigned int *outputLen,
-                                  unsigned int maxOutputLen,
-                                  const unsigned char *input,
-                                  unsigned int inputLen,
-                                  unsigned int blocksize);
-SECStatus arm_aes_decrypt_cbc_128(AESContext *cx, unsigned char *output,
-                                  unsigned int *outputLen,
-                                  unsigned int maxOutputLen,
-                                  const unsigned char *input,
-                                  unsigned int inputLen,
-                                  unsigned int blocksize);
-SECStatus arm_aes_encrypt_ecb_192(AESContext *cx, unsigned char *output,
-                                  unsigned int *outputLen,
-                                  unsigned int maxOutputLen,
-                                  const unsigned char *input,
-                                  unsigned int inputLen,
-                                  unsigned int blocksize);
-SECStatus arm_aes_decrypt_ecb_192(AESContext *cx, unsigned char *output,
-                                  unsigned int *outputLen,
-                                  unsigned int maxOutputLen,
-                                  const unsigned char *input,
-                                  unsigned int inputLen,
-                                  unsigned int blocksize);
-SECStatus arm_aes_encrypt_cbc_192(AESContext *cx, unsigned char *output,
-                                  unsigned int *outputLen,
-                                  unsigned int maxOutputLen,
-                                  const unsigned char *input,
-                                  unsigned int inputLen,
-                                  unsigned int blocksize);
-SECStatus arm_aes_decrypt_cbc_192(AESContext *cx, unsigned char *output,
-                                  unsigned int *outputLen,
-                                  unsigned int maxOutputLen,
-                                  const unsigned char *input,
-                                  unsigned int inputLen,
-                                  unsigned int blocksize);
-SECStatus arm_aes_encrypt_ecb_256(AESContext *cx, unsigned char *output,
-                                  unsigned int *outputLen,
-                                  unsigned int maxOutputLen,
-                                  const unsigned char *input,
-                                  unsigned int inputLen,
-                                  unsigned int blocksize);
-SECStatus arm_aes_decrypt_ecb_256(AESContext *cx, unsigned char *output,
-                                  unsigned int *outputLen,
-                                  unsigned int maxOutputLen,
-                                  const unsigned char *input,
-                                  unsigned int inputLen,
-                                  unsigned int blocksize);
-SECStatus arm_aes_encrypt_cbc_256(AESContext *cx, unsigned char *output,
-                                  unsigned int *outputLen,
-                                  unsigned int maxOutputLen,
-                                  const unsigned char *input,
-                                  unsigned int inputLen,
-                                  unsigned int blocksize);
-SECStatus arm_aes_decrypt_cbc_256(AESContext *cx, unsigned char *output,
-                                  unsigned int *outputLen,
-                                  unsigned int maxOutputLen,
-                                  const unsigned char *input,
-                                  unsigned int inputLen,
-                                  unsigned int blocksize);
-
-#define native_aes_ecb_worker(encrypt, keysize)                          \
-    ((encrypt)                                                           \
-         ? ((keysize) == 16 ? arm_aes_encrypt_ecb_128                    \
-                            : (keysize) == 24 ? arm_aes_encrypt_ecb_192  \
-                                              : arm_aes_encrypt_ecb_256) \
-         : ((keysize) == 16 ? arm_aes_decrypt_ecb_128                    \
-                            : (keysize) == 24 ? arm_aes_decrypt_ecb_192  \
-                                              : arm_aes_decrypt_ecb_256))
-
-#define native_aes_cbc_worker(encrypt, keysize)                          \
-    ((encrypt)                                                           \
-         ? ((keysize) == 16 ? arm_aes_encrypt_cbc_128                    \
-                            : (keysize) == 24 ? arm_aes_encrypt_cbc_192  \
-                                              : arm_aes_encrypt_cbc_256) \
-         : ((keysize) == 16 ? arm_aes_decrypt_cbc_128                    \
-                            : (keysize) == 24 ? arm_aes_decrypt_cbc_192  \
-                                              : arm_aes_decrypt_cbc_256))
-
-#define native_aes_init(encrypt, keysize)           \
-    do {                                            \
-        if (encrypt) {                              \
-            rijndael_key_expansion(cx, key, Nk);    \
-        } else {                                    \
-            rijndael_invkey_expansion(cx, key, Nk); \
-        }                                           \
-    } while (0)
--- a/lib/freebl/freebl.gyp
+++ b/lib/freebl/freebl.gyp
@@ -128,45 +128,16 @@
       'cflags': [
         '-march=armv8-a+crypto'
       ],
       'cflags_mozilla': [
         '-march=armv8-a+crypto'
       ]
     },
     {
-      'target_name': 'armv8_c_lib',
-      'type': 'static_library',
-      'sources': [
-        'aes-armv8.c',
-      ],
-      'dependencies': [
-        '<(DEPTH)/exports.gyp:nss_exports'
-      ],
-      'conditions': [
-        [ 'target_arch=="arm"', {
-          'cflags': [
-            '-march=armv8-a',
-            '-mfpu=crypto-neon-fp-armv8'
-          ],
-          'cflags_mozilla': [
-            '-march=armv8-a',
-            '-mfpu=crypto-neon-fp-armv8'
-          ],
-        }, 'target_arch=="arm64" or target_arch=="aarch64"', {
-          'cflags': [
-            '-march=armv8-a+crypto'
-          ],
-          'cflags_mozilla': [
-            '-march=armv8-a+crypto'
-          ],
-        }]
-      ]
-    },
-    {
       'target_name': 'freebl',
       'type': 'static_library',
       'sources': [
         'loader.c'
       ],
       'dependencies': [
         '<(DEPTH)/exports.gyp:nss_exports'
       ]
@@ -184,20 +155,16 @@
         '<(DEPTH)/exports.gyp:nss_exports',
         'hw-acc-crypto',
       ],
       'conditions': [
         [ 'target_arch=="ia32" or target_arch=="x64"', {
           'dependencies': [
             'gcm-aes-x86_c_lib',
           ],
-        }, 'target_arch=="arm" or target_arch=="arm64" or target_arch=="aarch64"', {
-          'dependencies': [
-            'armv8_c_lib'
-          ],
         }],
         [ 'target_arch=="arm64" or target_arch=="aarch64"', {
           'dependencies': [
             'gcm-aes-aarch64_c_lib',
           ],
         }],
         [ 'OS=="linux"', {
           'defines!': [
@@ -230,20 +197,16 @@
         '<(DEPTH)/exports.gyp:nss_exports',
         'hw-acc-crypto',
       ],
       'conditions': [
         [ 'target_arch=="ia32" or target_arch=="x64"', {
           'dependencies': [
             'gcm-aes-x86_c_lib',
           ]
-        }, 'target_arch=="arm" or target_arch=="arm64" or target_arch=="aarch64"', {
-          'dependencies': [
-            'armv8_c_lib',
-          ],
         }],
         [ 'target_arch=="arm64" or target_arch=="aarch64"', {
           'dependencies': [
             'gcm-aes-aarch64_c_lib',
           ],
         }],
         [ 'OS!="linux"', {
           'conditions': [
@@ -461,22 +424,16 @@
           }],
           [ 'target_arch=="arm"', {
             'defines': [
               'MP_ASSEMBLY_MULTIPLY',
               'MP_ASSEMBLY_SQUARE',
               'MP_USE_UINT_DIGIT',
               'SHA_NO_LONG_LONG',
               'ARMHF',
-              'USE_HW_AES',
-            ],
-          }],
-          [ 'target_arch=="arm64" or target_arch=="aarch64"', {
-            'defines': [
-              'USE_HW_AES',
             ],
           }],
         ],
       }],
     ],
   },
   'variables': {
     'module': 'nss',
--- a/lib/freebl/intel-aes.h
+++ b/lib/freebl/intel-aes.h
@@ -95,40 +95,40 @@ SECStatus intel_aes_decrypt_cbc_256(AESC
                                     unsigned int blocksize);
 SECStatus intel_aes_encrypt_ctr_256(CTRContext *cx, unsigned char *output,
                                     unsigned int *outputLen,
                                     unsigned int maxOutputLen,
                                     const unsigned char *input,
                                     unsigned int inputLen,
                                     unsigned int blocksize);
 
-#define native_aes_ecb_worker(encrypt, keysize)                            \
+#define intel_aes_ecb_worker(encrypt, keysize)                             \
     ((encrypt)                                                             \
          ? ((keysize) == 16 ? intel_aes_encrypt_ecb_128                    \
                             : (keysize) == 24 ? intel_aes_encrypt_ecb_192  \
                                               : intel_aes_encrypt_ecb_256) \
          : ((keysize) == 16 ? intel_aes_decrypt_ecb_128                    \
                             : (keysize) == 24 ? intel_aes_decrypt_ecb_192  \
                                               : intel_aes_decrypt_ecb_256))
 
-#define native_aes_cbc_worker(encrypt, keysize)                            \
+#define intel_aes_cbc_worker(encrypt, keysize)                             \
     ((encrypt)                                                             \
          ? ((keysize) == 16 ? intel_aes_encrypt_cbc_128                    \
                             : (keysize) == 24 ? intel_aes_encrypt_cbc_192  \
                                               : intel_aes_encrypt_cbc_256) \
          : ((keysize) == 16 ? intel_aes_decrypt_cbc_128                    \
                             : (keysize) == 24 ? intel_aes_decrypt_cbc_192  \
                                               : intel_aes_decrypt_cbc_256))
 
 #define intel_aes_ctr_worker(nr)                         \
     ((nr) == 10 ? intel_aes_encrypt_ctr_128              \
                 : (nr) == 12 ? intel_aes_encrypt_ctr_192 \
                              : intel_aes_encrypt_ctr_256)
 
-#define native_aes_init(encrypt, keysize)                         \
+#define intel_aes_init(encrypt, keysize)                          \
     do {                                                          \
         if (encrypt) {                                            \
             if (keysize == 16)                                    \
                 intel_aes_encrypt_init_128(key, cx->expandedKey); \
             else if (keysize == 24)                               \
                 intel_aes_encrypt_init_192(key, cx->expandedKey); \
             else                                                  \
                 intel_aes_encrypt_init_256(key, cx->expandedKey); \
--- a/lib/freebl/rijndael.c
+++ b/lib/freebl/rijndael.c
@@ -15,28 +15,19 @@
 #include "blapi.h"
 #include "rijndael.h"
 
 #include "cts.h"
 #include "ctr.h"
 #include "gcm.h"
 #include "mpi.h"
 
-#if !defined(IS_LITTLE_ENDIAN) && !defined(NSS_X86_OR_X64)
-// not test yet on big endian platform of arm
-#undef USE_HW_AES
+#ifdef USE_HW_AES
+#include "intel-aes.h"
 #endif
-
-#ifdef USE_HW_AES
-#ifdef NSS_X86_OR_X64
-#include "intel-aes.h"
-#else
-#include "aes-armv8.h"
-#endif
-#endif /* USE_HW_AES */
 #ifdef INTEL_GCM
 #include "intel-gcm.h"
 #endif /* INTEL_GCM */
 
 /* Forward declarations */
 void rijndael_native_key_expansion(AESContext *cx, const unsigned char *key,
                                    unsigned int Nk);
 void rijndael_native_encryptBlock(AESContext *cx,
@@ -851,62 +842,58 @@ aes_InitContext(AESContext *cx, const un
     if (mode == NSS_AES_CBC && iv == NULL) {
         PORT_SetError(SEC_ERROR_INVALID_ARGS);
         return SECFailure;
     }
     if (!cx) {
         PORT_SetError(SEC_ERROR_INVALID_ARGS);
         return SECFailure;
     }
-#if defined(NSS_X86_OR_X64) || defined(USE_HW_AES)
-    use_hw_aes = (aesni_support() || arm_aes_support()) && (keysize % 8) == 0;
-#else
-    use_hw_aes = PR_FALSE;
-#endif
+    use_hw_aes = aesni_support() && (keysize % 8) == 0;
     /* Nb = (block size in bits) / 32 */
     cx->Nb = AES_BLOCK_SIZE / 4;
     /* Nk = (key size in bits) / 32 */
     Nk = keysize / 4;
     /* Obtain number of rounds from "table" */
     cx->Nr = RIJNDAEL_NUM_ROUNDS(Nk, cx->Nb);
     /* copy in the iv, if neccessary */
     if (mode == NSS_AES_CBC) {
         memcpy(cx->iv, iv, AES_BLOCK_SIZE);
 #ifdef USE_HW_AES
         if (use_hw_aes) {
             cx->worker = (freeblCipherFunc)
-                native_aes_cbc_worker(encrypt, keysize);
+                intel_aes_cbc_worker(encrypt, keysize);
         } else
 #endif
         {
             cx->worker = (freeblCipherFunc)(encrypt
                                                 ? &rijndael_encryptCBC
                                                 : &rijndael_decryptCBC);
         }
     } else {
 #ifdef USE_HW_AES
         if (use_hw_aes) {
             cx->worker = (freeblCipherFunc)
-                native_aes_ecb_worker(encrypt, keysize);
+                intel_aes_ecb_worker(encrypt, keysize);
         } else
 #endif
         {
             cx->worker = (freeblCipherFunc)(encrypt
                                                 ? &rijndael_encryptECB
                                                 : &rijndael_decryptECB);
         }
     }
     PORT_Assert((cx->Nb * (cx->Nr + 1)) <= RIJNDAEL_MAX_EXP_KEY_SIZE);
     if ((cx->Nb * (cx->Nr + 1)) > RIJNDAEL_MAX_EXP_KEY_SIZE) {
         PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
         return SECFailure;
     }
 #ifdef USE_HW_AES
     if (use_hw_aes) {
-        native_aes_init(encrypt, keysize);
+        intel_aes_init(encrypt, keysize);
     } else
 #endif
     {
         /* Generate expanded key */
         if (encrypt) {
             if (use_hw_aes && (cx->mode == NSS_AES_GCM || cx->mode == NSS_AES ||
                                cx->mode == NSS_AES_CTR)) {
                 PORT_Assert(keysize == 16 || keysize == 24 || keysize == 32);