Bug 1152625 - Part 2. Remove __builtin_assume to avoid crash on PGO. r=kjacobs,mt
authorMakoto Kato <m_kato@ga2.so-net.ne.jp>
Fri, 11 Oct 2019 19:32:43 +0000
changeset 15331 1b0f5c5335ee7480e5856ae3890b0463e4882b95
parent 15330 efb895a43899642a3f3a6bfe2957870dbaccb91b
child 15332 9abcea09fdd4b252e7774d450c2f9a622f38f1ad
push id3529
push userjjones@mozilla.com
push dateFri, 11 Oct 2019 19:34:03 +0000
reviewerskjacobs, mt
bugs1152625
Bug 1152625 - Part 2. Remove __builtin_assume to avoid crash on PGO. r=kjacobs,mt `AESContext->iv` doesn't align to 16 bytes on PGO build, so we should remove __builtin_assume. Also, I guess that `expandedKey` has same problem. Differential Revision: https://phabricator.services.mozilla.com/D40607
lib/freebl/aes-armv8.c
--- a/lib/freebl/aes-armv8.c
+++ b/lib/freebl/aes-armv8.c
@@ -29,27 +29,27 @@ arm_aes_encrypt_ecb_128(AESContext *cx, 
     uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
     uint8x16_t key11;
     const PRUint8 *key = (const PRUint8 *)cx->expandedKey;
 
     if (!inputLen) {
         return SECSuccess;
     }
 
-    key1 = vld1q_u8(__builtin_assume_aligned(key, 16));
-    key2 = vld1q_u8(__builtin_assume_aligned(key + 16, 16));
-    key3 = vld1q_u8(__builtin_assume_aligned(key + 32, 16));
-    key4 = vld1q_u8(__builtin_assume_aligned(key + 48, 16));
-    key5 = vld1q_u8(__builtin_assume_aligned(key + 64, 16));
-    key6 = vld1q_u8(__builtin_assume_aligned(key + 80, 16));
-    key7 = vld1q_u8(__builtin_assume_aligned(key + 96, 16));
-    key8 = vld1q_u8(__builtin_assume_aligned(key + 112, 16));
-    key9 = vld1q_u8(__builtin_assume_aligned(key + 128, 16));
-    key10 = vld1q_u8(__builtin_assume_aligned(key + 144, 16));
-    key11 = vld1q_u8(__builtin_assume_aligned(key + 160, 16));
+    key1 = vld1q_u8(key);
+    key2 = vld1q_u8(key + 16);
+    key3 = vld1q_u8(key + 32);
+    key4 = vld1q_u8(key + 48);
+    key5 = vld1q_u8(key + 64);
+    key6 = vld1q_u8(key + 80);
+    key7 = vld1q_u8(key + 96);
+    key8 = vld1q_u8(key + 112);
+    key9 = vld1q_u8(key + 128);
+    key10 = vld1q_u8(key + 144);
+    key11 = vld1q_u8(key + 160);
 
     while (inputLen > 0) {
         uint8x16_t state;
 #if defined(HAVE_UNALIGNED_ACCESS)
         state = vld1q_u8(input);
 #else
         if ((uintptr_t)input & 0x7) {
             memcpy(buf, input, 16);
@@ -114,27 +114,27 @@ arm_aes_decrypt_ecb_128(AESContext *cx, 
     uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
     uint8x16_t key11;
     const PRUint8 *key = (const PRUint8 *)cx->expandedKey;
 
     if (inputLen == 0) {
         return SECSuccess;
     }
 
-    key1 = vld1q_u8(__builtin_assume_aligned(key, 16));
-    key2 = vld1q_u8(__builtin_assume_aligned(key + 16, 16));
-    key3 = vld1q_u8(__builtin_assume_aligned(key + 32, 16));
-    key4 = vld1q_u8(__builtin_assume_aligned(key + 48, 16));
-    key5 = vld1q_u8(__builtin_assume_aligned(key + 64, 16));
-    key6 = vld1q_u8(__builtin_assume_aligned(key + 80, 16));
-    key7 = vld1q_u8(__builtin_assume_aligned(key + 96, 16));
-    key8 = vld1q_u8(__builtin_assume_aligned(key + 112, 16));
-    key9 = vld1q_u8(__builtin_assume_aligned(key + 128, 16));
-    key10 = vld1q_u8(__builtin_assume_aligned(key + 144, 16));
-    key11 = vld1q_u8(__builtin_assume_aligned(key + 160, 16));
+    key1 = vld1q_u8(key);
+    key2 = vld1q_u8(key + 16);
+    key3 = vld1q_u8(key + 32);
+    key4 = vld1q_u8(key + 48);
+    key5 = vld1q_u8(key + 64);
+    key6 = vld1q_u8(key + 80);
+    key7 = vld1q_u8(key + 96);
+    key8 = vld1q_u8(key + 112);
+    key9 = vld1q_u8(key + 128);
+    key10 = vld1q_u8(key + 144);
+    key11 = vld1q_u8(key + 160);
 
     while (inputLen > 0) {
         uint8x16_t state;
 #if defined(HAVE_UNALIGNED_ACCESS)
         state = vld1q_u8(input);
 #else
         if ((uintptr_t)input & 0x7) {
             memcpy(buf, input, 16);
@@ -201,30 +201,30 @@ arm_aes_encrypt_cbc_128(AESContext *cx, 
     uint8x16_t iv;
     const PRUint8 *key = (const PRUint8 *)cx->expandedKey;
 
     if (!inputLen) {
         return SECSuccess;
     }
 
     /* iv */
-    iv = vld1q_u8(__builtin_assume_aligned(cx->iv, 16));
+    iv = vld1q_u8(cx->iv);
 
     /* expanedKey */
-    key1 = vld1q_u8(__builtin_assume_aligned(key, 16));
-    key2 = vld1q_u8(__builtin_assume_aligned(key + 16, 16));
-    key3 = vld1q_u8(__builtin_assume_aligned(key + 32, 16));
-    key4 = vld1q_u8(__builtin_assume_aligned(key + 48, 16));
-    key5 = vld1q_u8(__builtin_assume_aligned(key + 64, 16));
-    key6 = vld1q_u8(__builtin_assume_aligned(key + 80, 16));
-    key7 = vld1q_u8(__builtin_assume_aligned(key + 96, 16));
-    key8 = vld1q_u8(__builtin_assume_aligned(key + 112, 16));
-    key9 = vld1q_u8(__builtin_assume_aligned(key + 128, 16));
-    key10 = vld1q_u8(__builtin_assume_aligned(key + 144, 16));
-    key11 = vld1q_u8(__builtin_assume_aligned(key + 160, 16));
+    key1 = vld1q_u8(key);
+    key2 = vld1q_u8(key + 16);
+    key3 = vld1q_u8(key + 32);
+    key4 = vld1q_u8(key + 48);
+    key5 = vld1q_u8(key + 64);
+    key6 = vld1q_u8(key + 80);
+    key7 = vld1q_u8(key + 96);
+    key8 = vld1q_u8(key + 112);
+    key9 = vld1q_u8(key + 128);
+    key10 = vld1q_u8(key + 144);
+    key11 = vld1q_u8(key + 160);
 
     while (inputLen > 0) {
         uint8x16_t state;
 #if defined(HAVE_UNALIGNED_ACCESS)
         state = vld1q_u8(input);
 #else
         if ((uintptr_t)input & 0x7) {
             memcpy(buf, input, 16);
@@ -269,17 +269,17 @@ arm_aes_encrypt_cbc_128(AESContext *cx, 
             memcpy(output, buf, 16);
         } else {
             vst1q_u8(__builtin_assume_aligned(output, 8), state);
         }
 #endif
         output += 16;
         iv = state;
     }
-    vst1q_u8(__builtin_assume_aligned(cx->iv, 16), iv);
+    vst1q_u8(cx->iv, iv);
 
     return SECSuccess;
 }
 
 SECStatus
 arm_aes_decrypt_cbc_128(AESContext *cx, unsigned char *output,
                         unsigned int *outputLen,
                         unsigned int maxOutputLen,
@@ -295,30 +295,30 @@ arm_aes_decrypt_cbc_128(AESContext *cx, 
     uint8x16_t key11;
     const PRUint8 *key = (const PRUint8 *)cx->expandedKey;
 
     if (!inputLen) {
         return SECSuccess;
     }
 
     /* iv */
-    iv = vld1q_u8(__builtin_assume_aligned(cx->iv, 16));
+    iv = vld1q_u8(cx->iv);
 
     /* expanedKey */
-    key1 = vld1q_u8(__builtin_assume_aligned(key, 16));
-    key2 = vld1q_u8(__builtin_assume_aligned(key + 16, 16));
-    key3 = vld1q_u8(__builtin_assume_aligned(key + 32, 16));
-    key4 = vld1q_u8(__builtin_assume_aligned(key + 48, 16));
-    key5 = vld1q_u8(__builtin_assume_aligned(key + 64, 16));
-    key6 = vld1q_u8(__builtin_assume_aligned(key + 80, 16));
-    key7 = vld1q_u8(__builtin_assume_aligned(key + 96, 16));
-    key8 = vld1q_u8(__builtin_assume_aligned(key + 112, 16));
-    key9 = vld1q_u8(__builtin_assume_aligned(key + 128, 16));
-    key10 = vld1q_u8(__builtin_assume_aligned(key + 144, 16));
-    key11 = vld1q_u8(__builtin_assume_aligned(key + 160, 16));
+    key1 = vld1q_u8(key);
+    key2 = vld1q_u8(key + 16);
+    key3 = vld1q_u8(key + 32);
+    key4 = vld1q_u8(key + 48);
+    key5 = vld1q_u8(key + 64);
+    key6 = vld1q_u8(key + 80);
+    key7 = vld1q_u8(key + 96);
+    key8 = vld1q_u8(key + 112);
+    key9 = vld1q_u8(key + 128);
+    key10 = vld1q_u8(key + 144);
+    key11 = vld1q_u8(key + 160);
 
     while (inputLen > 0) {
         uint8x16_t state, old_state;
 #if defined(HAVE_UNALIGNED_ACCESS)
         state = vld1q_u8(input);
 #else
         if ((uintptr_t)input & 0x7) {
             memcpy(buf, input, 16);
@@ -365,17 +365,17 @@ arm_aes_decrypt_cbc_128(AESContext *cx, 
         } else {
             vst1q_u8(__builtin_assume_aligned(output, 8), state);
         }
 #endif
         output += 16;
 
         iv = old_state;
     }
-    vst1q_u8(__builtin_assume_aligned(cx->iv, 16), iv);
+    vst1q_u8(cx->iv, iv);
 
     return SECSuccess;
 }
 
 SECStatus
 arm_aes_encrypt_ecb_192(AESContext *cx, unsigned char *output,
                         unsigned int *outputLen,
                         unsigned int maxOutputLen,
@@ -389,29 +389,29 @@ arm_aes_encrypt_ecb_192(AESContext *cx, 
     uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
     uint8x16_t key11, key12, key13;
     PRUint8 *key = (PRUint8 *)cx->expandedKey;
 
     if (!inputLen) {
         return SECSuccess;
     }
 
-    key1 = vld1q_u8(__builtin_assume_aligned(key, 16));
-    key2 = vld1q_u8(__builtin_assume_aligned(key + 16, 16));
-    key3 = vld1q_u8(__builtin_assume_aligned(key + 32, 16));
-    key4 = vld1q_u8(__builtin_assume_aligned(key + 48, 16));
-    key5 = vld1q_u8(__builtin_assume_aligned(key + 64, 16));
-    key6 = vld1q_u8(__builtin_assume_aligned(key + 80, 16));
-    key7 = vld1q_u8(__builtin_assume_aligned(key + 96, 16));
-    key8 = vld1q_u8(__builtin_assume_aligned(key + 112, 16));
-    key9 = vld1q_u8(__builtin_assume_aligned(key + 128, 16));
-    key10 = vld1q_u8(__builtin_assume_aligned(key + 144, 16));
-    key11 = vld1q_u8(__builtin_assume_aligned(key + 160, 16));
-    key12 = vld1q_u8(__builtin_assume_aligned(key + 176, 16));
-    key13 = vld1q_u8(__builtin_assume_aligned(key + 192, 16));
+    key1 = vld1q_u8(key);
+    key2 = vld1q_u8(key + 16);
+    key3 = vld1q_u8(key + 32);
+    key4 = vld1q_u8(key + 48);
+    key5 = vld1q_u8(key + 64);
+    key6 = vld1q_u8(key + 80);
+    key7 = vld1q_u8(key + 96);
+    key8 = vld1q_u8(key + 112);
+    key9 = vld1q_u8(key + 128);
+    key10 = vld1q_u8(key + 144);
+    key11 = vld1q_u8(key + 160);
+    key12 = vld1q_u8(key + 176);
+    key13 = vld1q_u8(key + 192);
 
     while (inputLen > 0) {
         uint8x16_t state;
 #if defined(HAVE_UNALIGNED_ACCESS)
         state = vld1q_u8(input);
 #else
         if ((uintptr_t)input & 0x7) {
             memcpy(buf, input, 16);
@@ -480,29 +480,29 @@ arm_aes_decrypt_ecb_192(AESContext *cx, 
     uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
     uint8x16_t key11, key12, key13;
     const PRUint8 *key = (const PRUint8 *)cx->expandedKey;
 
     if (!inputLen) {
         return SECSuccess;
     }
 
-    key1 = vld1q_u8(__builtin_assume_aligned(key, 16));
-    key2 = vld1q_u8(__builtin_assume_aligned(key + 16, 16));
-    key3 = vld1q_u8(__builtin_assume_aligned(key + 32, 16));
-    key4 = vld1q_u8(__builtin_assume_aligned(key + 48, 16));
-    key5 = vld1q_u8(__builtin_assume_aligned(key + 64, 16));
-    key6 = vld1q_u8(__builtin_assume_aligned(key + 80, 16));
-    key7 = vld1q_u8(__builtin_assume_aligned(key + 96, 16));
-    key8 = vld1q_u8(__builtin_assume_aligned(key + 112, 16));
-    key9 = vld1q_u8(__builtin_assume_aligned(key + 128, 16));
-    key10 = vld1q_u8(__builtin_assume_aligned(key + 144, 16));
-    key11 = vld1q_u8(__builtin_assume_aligned(key + 160, 16));
-    key12 = vld1q_u8(__builtin_assume_aligned(key + 176, 16));
-    key13 = vld1q_u8(__builtin_assume_aligned(key + 192, 16));
+    key1 = vld1q_u8(key);
+    key2 = vld1q_u8(key + 16);
+    key3 = vld1q_u8(key + 32);
+    key4 = vld1q_u8(key + 48);
+    key5 = vld1q_u8(key + 64);
+    key6 = vld1q_u8(key + 80);
+    key7 = vld1q_u8(key + 96);
+    key8 = vld1q_u8(key + 112);
+    key9 = vld1q_u8(key + 128);
+    key10 = vld1q_u8(key + 144);
+    key11 = vld1q_u8(key + 160);
+    key12 = vld1q_u8(key + 176);
+    key13 = vld1q_u8(key + 192);
 
     while (inputLen > 0) {
         uint8x16_t state;
 #if defined(HAVE_UNALIGNED_ACCESS)
         state = vld1q_u8(input);
 #else
         if ((uintptr_t)input & 0x7) {
             memcpy(buf, input, 16);
@@ -576,29 +576,29 @@ arm_aes_encrypt_cbc_192(AESContext *cx, 
     if (!inputLen) {
         return SECSuccess;
     }
 
     /* iv */
     iv = vld1q_u8(cx->iv);
 
     /* expanedKey */
-    key1 = vld1q_u8(__builtin_assume_aligned(key, 16));
-    key2 = vld1q_u8(__builtin_assume_aligned(key + 16, 16));
-    key3 = vld1q_u8(__builtin_assume_aligned(key + 32, 16));
-    key4 = vld1q_u8(__builtin_assume_aligned(key + 48, 16));
-    key5 = vld1q_u8(__builtin_assume_aligned(key + 64, 16));
-    key6 = vld1q_u8(__builtin_assume_aligned(key + 80, 16));
-    key7 = vld1q_u8(__builtin_assume_aligned(key + 96, 16));
-    key8 = vld1q_u8(__builtin_assume_aligned(key + 112, 16));
-    key9 = vld1q_u8(__builtin_assume_aligned(key + 128, 16));
-    key10 = vld1q_u8(__builtin_assume_aligned(key + 144, 16));
-    key11 = vld1q_u8(__builtin_assume_aligned(key + 160, 16));
-    key12 = vld1q_u8(__builtin_assume_aligned(key + 176, 16));
-    key13 = vld1q_u8(__builtin_assume_aligned(key + 192, 16));
+    key1 = vld1q_u8(key);
+    key2 = vld1q_u8(key + 16);
+    key3 = vld1q_u8(key + 32);
+    key4 = vld1q_u8(key + 48);
+    key5 = vld1q_u8(key + 64);
+    key6 = vld1q_u8(key + 80);
+    key7 = vld1q_u8(key + 96);
+    key8 = vld1q_u8(key + 112);
+    key9 = vld1q_u8(key + 128);
+    key10 = vld1q_u8(key + 144);
+    key11 = vld1q_u8(key + 160);
+    key12 = vld1q_u8(key + 176);
+    key13 = vld1q_u8(key + 192);
 
     while (inputLen > 0) {
         uint8x16_t state;
 #if defined(HAVE_UNALIGNED_ACCESS)
         state = vld1q_u8(input);
 #else
         if ((uintptr_t)input & 0x7) {
             memcpy(buf, input, 16);
@@ -646,17 +646,17 @@ arm_aes_encrypt_cbc_192(AESContext *cx, 
             memcpy(output, buf, 16);
         } else {
             vst1q_u8(__builtin_assume_aligned(output, 8), state);
         }
 #endif
         output += 16;
         iv = state;
     }
-    vst1q_u8(__builtin_assume_aligned(cx->iv, 16), iv);
+    vst1q_u8(cx->iv, iv);
 
     return SECSuccess;
 }
 
 SECStatus
 arm_aes_decrypt_cbc_192(AESContext *cx, unsigned char *output,
                         unsigned int *outputLen,
                         unsigned int maxOutputLen,
@@ -672,32 +672,32 @@ arm_aes_decrypt_cbc_192(AESContext *cx, 
     uint8x16_t key11, key12, key13;
     const PRUint8 *key = (const PRUint8 *)cx->expandedKey;
 
     if (!inputLen) {
         return SECSuccess;
     }
 
     /* iv */
-    iv = vld1q_u8(__builtin_assume_aligned(cx->iv, 16));
+    iv = vld1q_u8(cx->iv);
 
     /* expanedKey */
-    key1 = vld1q_u8(__builtin_assume_aligned(key, 16));
-    key2 = vld1q_u8(__builtin_assume_aligned(key + 16, 16));
-    key3 = vld1q_u8(__builtin_assume_aligned(key + 32, 16));
-    key4 = vld1q_u8(__builtin_assume_aligned(key + 48, 16));
-    key5 = vld1q_u8(__builtin_assume_aligned(key + 64, 16));
-    key6 = vld1q_u8(__builtin_assume_aligned(key + 80, 16));
-    key7 = vld1q_u8(__builtin_assume_aligned(key + 96, 16));
-    key8 = vld1q_u8(__builtin_assume_aligned(key + 112, 16));
-    key9 = vld1q_u8(__builtin_assume_aligned(key + 128, 16));
-    key10 = vld1q_u8(__builtin_assume_aligned(key + 144, 16));
-    key11 = vld1q_u8(__builtin_assume_aligned(key + 160, 16));
-    key12 = vld1q_u8(__builtin_assume_aligned(key + 176, 16));
-    key13 = vld1q_u8(__builtin_assume_aligned(key + 192, 16));
+    key1 = vld1q_u8(key);
+    key2 = vld1q_u8(key + 16);
+    key3 = vld1q_u8(key + 32);
+    key4 = vld1q_u8(key + 48);
+    key5 = vld1q_u8(key + 64);
+    key6 = vld1q_u8(key + 80);
+    key7 = vld1q_u8(key + 96);
+    key8 = vld1q_u8(key + 112);
+    key9 = vld1q_u8(key + 128);
+    key10 = vld1q_u8(key + 144);
+    key11 = vld1q_u8(key + 160);
+    key12 = vld1q_u8(key + 176);
+    key13 = vld1q_u8(key + 192);
 
     while (inputLen > 0) {
         uint8x16_t state, old_state;
 #if defined(HAVE_UNALIGNED_ACCESS)
         state = vld1q_u8(input);
 #else
         if ((uintptr_t)input & 0x7) {
             memcpy(buf, input, 16);
@@ -748,17 +748,17 @@ arm_aes_decrypt_cbc_192(AESContext *cx, 
         } else {
             vst1q_u8(__builtin_assume_aligned(output, 8), state);
         }
 #endif
         output += 16;
 
         iv = old_state;
     }
-    vst1q_u8(__builtin_assume_aligned(cx->iv, 16), iv);
+    vst1q_u8(cx->iv, iv);
 
     return SECSuccess;
 }
 
 SECStatus
 arm_aes_encrypt_ecb_256(AESContext *cx, unsigned char *output,
                         unsigned int *outputLen,
                         unsigned int maxOutputLen,
@@ -772,31 +772,31 @@ arm_aes_encrypt_ecb_256(AESContext *cx, 
     uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
     uint8x16_t key11, key12, key13, key14, key15;
     PRUint8 *key = (PRUint8 *)cx->expandedKey;
 
     if (inputLen == 0) {
         return SECSuccess;
     }
 
-    key1 = vld1q_u8(__builtin_assume_aligned(key, 16));
-    key2 = vld1q_u8(__builtin_assume_aligned(key + 16, 16));
-    key3 = vld1q_u8(__builtin_assume_aligned(key + 32, 16));
-    key4 = vld1q_u8(__builtin_assume_aligned(key + 48, 16));
-    key5 = vld1q_u8(__builtin_assume_aligned(key + 64, 16));
-    key6 = vld1q_u8(__builtin_assume_aligned(key + 80, 16));
-    key7 = vld1q_u8(__builtin_assume_aligned(key + 96, 16));
-    key8 = vld1q_u8(__builtin_assume_aligned(key + 112, 16));
-    key9 = vld1q_u8(__builtin_assume_aligned(key + 128, 16));
-    key10 = vld1q_u8(__builtin_assume_aligned(key + 144, 16));
-    key11 = vld1q_u8(__builtin_assume_aligned(key + 160, 16));
-    key12 = vld1q_u8(__builtin_assume_aligned(key + 176, 16));
-    key13 = vld1q_u8(__builtin_assume_aligned(key + 192, 16));
-    key14 = vld1q_u8(__builtin_assume_aligned(key + 208, 16));
-    key15 = vld1q_u8(__builtin_assume_aligned(key + 224, 16));
+    key1 = vld1q_u8(key);
+    key2 = vld1q_u8(key + 16);
+    key3 = vld1q_u8(key + 32);
+    key4 = vld1q_u8(key + 48);
+    key5 = vld1q_u8(key + 64);
+    key6 = vld1q_u8(key + 80);
+    key7 = vld1q_u8(key + 96);
+    key8 = vld1q_u8(key + 112);
+    key9 = vld1q_u8(key + 128);
+    key10 = vld1q_u8(key + 144);
+    key11 = vld1q_u8(key + 160);
+    key12 = vld1q_u8(key + 176);
+    key13 = vld1q_u8(key + 192);
+    key14 = vld1q_u8(key + 208);
+    key15 = vld1q_u8(key + 224);
 
     while (inputLen > 0) {
         uint8x16_t state;
 #if defined(HAVE_UNALIGNED_ACCESS)
         state = vld1q_u8(input);
 #else
         if ((uintptr_t)input & 0x7) {
             memcpy(buf, input, 16);
@@ -868,31 +868,31 @@ arm_aes_decrypt_ecb_256(AESContext *cx, 
     uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
     uint8x16_t key11, key12, key13, key14, key15;
     const PRUint8 *key = (const PRUint8 *)cx->expandedKey;
 
     if (!inputLen) {
         return SECSuccess;
     }
 
-    key1 = vld1q_u8(__builtin_assume_aligned(key, 16));
-    key2 = vld1q_u8(__builtin_assume_aligned(key + 16, 16));
-    key3 = vld1q_u8(__builtin_assume_aligned(key + 32, 16));
-    key4 = vld1q_u8(__builtin_assume_aligned(key + 48, 16));
-    key5 = vld1q_u8(__builtin_assume_aligned(key + 64, 16));
-    key6 = vld1q_u8(__builtin_assume_aligned(key + 80, 16));
-    key7 = vld1q_u8(__builtin_assume_aligned(key + 96, 16));
-    key8 = vld1q_u8(__builtin_assume_aligned(key + 112, 16));
-    key9 = vld1q_u8(__builtin_assume_aligned(key + 128, 16));
-    key10 = vld1q_u8(__builtin_assume_aligned(key + 144, 16));
-    key11 = vld1q_u8(__builtin_assume_aligned(key + 160, 16));
-    key12 = vld1q_u8(__builtin_assume_aligned(key + 176, 16));
-    key13 = vld1q_u8(__builtin_assume_aligned(key + 192, 16));
-    key14 = vld1q_u8(__builtin_assume_aligned(key + 208, 16));
-    key15 = vld1q_u8(__builtin_assume_aligned(key + 224, 16));
+    key1 = vld1q_u8(key);
+    key2 = vld1q_u8(key + 16);
+    key3 = vld1q_u8(key + 32);
+    key4 = vld1q_u8(key + 48);
+    key5 = vld1q_u8(key + 64);
+    key6 = vld1q_u8(key + 80);
+    key7 = vld1q_u8(key + 96);
+    key8 = vld1q_u8(key + 112);
+    key9 = vld1q_u8(key + 128);
+    key10 = vld1q_u8(key + 144);
+    key11 = vld1q_u8(key + 160);
+    key12 = vld1q_u8(key + 176);
+    key13 = vld1q_u8(key + 192);
+    key14 = vld1q_u8(key + 208);
+    key15 = vld1q_u8(key + 224);
 
     while (inputLen > 0) {
         uint8x16_t state;
 #if defined(HAVE_UNALIGNED_ACCESS)
         state = vld1q_u8(input);
 #else
         if ((uintptr_t)input & 0x7) {
             memcpy(buf, input, 16);
@@ -970,31 +970,31 @@ arm_aes_encrypt_cbc_256(AESContext *cx, 
     if (!inputLen) {
         return SECSuccess;
     }
 
     /* iv */
     iv = vld1q_u8(cx->iv);
 
     /* expanedKey */
-    key1 = vld1q_u8(__builtin_assume_aligned(key, 16));
-    key2 = vld1q_u8(__builtin_assume_aligned(key + 16, 16));
-    key3 = vld1q_u8(__builtin_assume_aligned(key + 32, 16));
-    key4 = vld1q_u8(__builtin_assume_aligned(key + 48, 16));
-    key5 = vld1q_u8(__builtin_assume_aligned(key + 64, 16));
-    key6 = vld1q_u8(__builtin_assume_aligned(key + 80, 16));
-    key7 = vld1q_u8(__builtin_assume_aligned(key + 96, 16));
-    key8 = vld1q_u8(__builtin_assume_aligned(key + 112, 16));
-    key9 = vld1q_u8(__builtin_assume_aligned(key + 128, 16));
-    key10 = vld1q_u8(__builtin_assume_aligned(key + 144, 16));
-    key11 = vld1q_u8(__builtin_assume_aligned(key + 160, 16));
-    key12 = vld1q_u8(__builtin_assume_aligned(key + 176, 16));
-    key13 = vld1q_u8(__builtin_assume_aligned(key + 192, 16));
-    key14 = vld1q_u8(__builtin_assume_aligned(key + 208, 16));
-    key15 = vld1q_u8(__builtin_assume_aligned(key + 224, 16));
+    key1 = vld1q_u8(key);
+    key2 = vld1q_u8(key + 16);
+    key3 = vld1q_u8(key + 32);
+    key4 = vld1q_u8(key + 48);
+    key5 = vld1q_u8(key + 64);
+    key6 = vld1q_u8(key + 80);
+    key7 = vld1q_u8(key + 96);
+    key8 = vld1q_u8(key + 112);
+    key9 = vld1q_u8(key + 128);
+    key10 = vld1q_u8(key + 144);
+    key11 = vld1q_u8(key + 160);
+    key12 = vld1q_u8(key + 176);
+    key13 = vld1q_u8(key + 192);
+    key14 = vld1q_u8(key + 208);
+    key15 = vld1q_u8(key + 224);
 
     while (inputLen > 0) {
         uint8x16_t state;
 #if defined(HAVE_UNALIGNED_ACCESS)
         state = vld1q_u8(input);
 #else
         if ((uintptr_t)input & 0x7) {
             memcpy(buf, input, 16);
@@ -1047,17 +1047,17 @@ arm_aes_encrypt_cbc_256(AESContext *cx, 
             memcpy(output, buf, 16);
         } else {
             vst1q_u8(__builtin_assume_aligned(output, 8), state);
         }
 #endif
         output += 16;
         iv = state;
     }
-    vst1q_u8(__builtin_assume_aligned(cx->iv, 16), iv);
+    vst1q_u8(cx->iv, iv);
 
     return SECSuccess;
 }
 
 SECStatus
 arm_aes_decrypt_cbc_256(AESContext *cx, unsigned char *output,
                         unsigned int *outputLen,
                         unsigned int maxOutputLen,
@@ -1076,31 +1076,31 @@ arm_aes_decrypt_cbc_256(AESContext *cx, 
     if (!inputLen) {
         return SECSuccess;
     }
 
     /* iv */
     iv = vld1q_u8(cx->iv);
 
     /* expanedKey */
-    key1 = vld1q_u8(__builtin_assume_aligned(key, 16));
-    key2 = vld1q_u8(__builtin_assume_aligned(key + 16, 16));
-    key3 = vld1q_u8(__builtin_assume_aligned(key + 32, 16));
-    key4 = vld1q_u8(__builtin_assume_aligned(key + 48, 16));
-    key5 = vld1q_u8(__builtin_assume_aligned(key + 64, 16));
-    key6 = vld1q_u8(__builtin_assume_aligned(key + 80, 16));
-    key7 = vld1q_u8(__builtin_assume_aligned(key + 96, 16));
-    key8 = vld1q_u8(__builtin_assume_aligned(key + 112, 16));
-    key9 = vld1q_u8(__builtin_assume_aligned(key + 128, 16));
-    key10 = vld1q_u8(__builtin_assume_aligned(key + 144, 16));
-    key11 = vld1q_u8(__builtin_assume_aligned(key + 160, 16));
-    key12 = vld1q_u8(__builtin_assume_aligned(key + 176, 16));
-    key13 = vld1q_u8(__builtin_assume_aligned(key + 192, 16));
-    key14 = vld1q_u8(__builtin_assume_aligned(key + 208, 16));
-    key15 = vld1q_u8(__builtin_assume_aligned(key + 224, 16));
+    key1 = vld1q_u8(key);
+    key2 = vld1q_u8(key + 16);
+    key3 = vld1q_u8(key + 32);
+    key4 = vld1q_u8(key + 48);
+    key5 = vld1q_u8(key + 64);
+    key6 = vld1q_u8(key + 80);
+    key7 = vld1q_u8(key + 96);
+    key8 = vld1q_u8(key + 112);
+    key9 = vld1q_u8(key + 128);
+    key10 = vld1q_u8(key + 144);
+    key11 = vld1q_u8(key + 160);
+    key12 = vld1q_u8(key + 176);
+    key13 = vld1q_u8(key + 192);
+    key14 = vld1q_u8(key + 208);
+    key15 = vld1q_u8(key + 224);
 
     while (inputLen > 0) {
         uint8x16_t state, old_state;
 #if defined(HAVE_UNALIGNED_ACCESS)
         state = vld1q_u8(input);
 #else
         if ((uintptr_t)input & 0x7) {
             memcpy(buf, input, 16);
@@ -1155,14 +1155,14 @@ arm_aes_decrypt_cbc_256(AESContext *cx, 
         } else {
             vst1q_u8(__builtin_assume_aligned(output, 8), state);
         }
 #endif
         output += 16;
 
         iv = old_state;
     }
-    vst1q_u8(__builtin_assume_aligned(cx->iv, 16), iv);
+    vst1q_u8(cx->iv, iv);
 
     return SECSuccess;
 }
 
 #endif