Bug 1518994 - Enable NEON in AudioNodeEngine on aarch64. r=drno
authorMakoto Kato <m_kato@ga2.so-net.ne.jp>
Fri, 11 Jan 2019 14:39:51 +0000
changeset 453625 ebca585ed555
parent 453624 87a1eddf108d
child 453626 10907464fcc2
push id35362
push userncsoregi@mozilla.com
push dateSat, 12 Jan 2019 21:35:38 +0000
treeherdermozilla-central@877169d8ef49 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersdrno
bugs1518994, 1303952
milestone66.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1518994 - Enable NEON in AudioNodeEngine on aarch64. r=drno Although we don't define BUILD_ARM_NEON on aarch64 (bug 1303952), aarch64 supports NEON, so we should turn on NEON for AudioNodeEngine. OpenMAX DL doesn't support aarch64 since we modify some codes. So FFTBlock.h still use ARM32 only. Also, MSVC cannot use arm_neon.h header, doesn't allow `float32x4_t zero = {0, 0, 0, 0};` and throws compiler warning. So we need some workarounds to use this on MSVC. Differential Revision: https://phabricator.services.mozilla.com/D16278
dom/media/webaudio/AudioNodeEngine.cpp
dom/media/webaudio/AudioNodeEngineNEON.cpp
dom/media/webaudio/moz.build
--- a/dom/media/webaudio/AudioNodeEngine.cpp
+++ b/dom/media/webaudio/AudioNodeEngine.cpp
@@ -2,17 +2,17 @@
 /* vim:set ts=2 sw=2 sts=2 et cindent: */
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "AudioNodeEngine.h"
 
 #include "mozilla/AbstractThread.h"
-#ifdef BUILD_ARM_NEON
+#ifdef USE_NEON
 #include "mozilla/arm.h"
 #include "AudioNodeEngineNEON.h"
 #endif
 #ifdef USE_SSE2
 #include "mozilla/SSE.h"
 #include "AlignmentUtils.h"
 #include "AudioNodeEngineSSE2.h"
 #endif
@@ -57,17 +57,17 @@ void AudioBufferCopyWithScale(const floa
     for (uint32_t i = 0; i < aSize; ++i) {
       aOutput[i] = aInput[i] * aScale;
     }
   }
 }
 
 void AudioBufferAddWithScale(const float* aInput, float aScale, float* aOutput,
                              uint32_t aSize) {
-#ifdef BUILD_ARM_NEON
+#ifdef USE_NEON
   if (mozilla::supports_neon()) {
     AudioBufferAddWithScale_NEON(aInput, aScale, aOutput, aSize);
     return;
   }
 #endif
 
 #ifdef USE_SSE2
   if (mozilla::supports_sse2()) {
@@ -117,17 +117,17 @@ void AudioBlockAddChannelWithScale(const
   AudioBufferAddWithScale(aInput, aScale, aOutput, WEBAUDIO_BLOCK_SIZE);
 }
 
 void AudioBlockCopyChannelWithScale(const float* aInput, float aScale,
                                     float* aOutput) {
   if (aScale == 1.0f) {
     memcpy(aOutput, aInput, WEBAUDIO_BLOCK_SIZE * sizeof(float));
   } else {
-#ifdef BUILD_ARM_NEON
+#ifdef USE_NEON
     if (mozilla::supports_neon()) {
       AudioBlockCopyChannelWithScale_NEON(aInput, aScale, aOutput);
       return;
     }
 #endif
 
 #ifdef USE_SSE2
     if (mozilla::supports_sse2()) {
@@ -172,17 +172,17 @@ float AudioBufferPeakValue(const float* 
     }
   }
   return max;
 }
 
 void AudioBlockCopyChannelWithScale(const float aInput[WEBAUDIO_BLOCK_SIZE],
                                     const float aScale[WEBAUDIO_BLOCK_SIZE],
                                     float aOutput[WEBAUDIO_BLOCK_SIZE]) {
-#ifdef BUILD_ARM_NEON
+#ifdef USE_NEON
   if (mozilla::supports_neon()) {
     AudioBlockCopyChannelWithScale_NEON(aInput, aScale, aOutput);
     return;
   }
 #endif
 
 #ifdef USE_SSE2
   if (mozilla::supports_sse2()) {
@@ -204,17 +204,17 @@ void AudioBlockInPlaceScale(float aBlock
                             float aScale[WEBAUDIO_BLOCK_SIZE]) {
   AudioBufferInPlaceScale(aBlock, aScale, WEBAUDIO_BLOCK_SIZE);
 }
 
 void AudioBufferInPlaceScale(float* aBlock, float aScale, uint32_t aSize) {
   if (aScale == 1.0f) {
     return;
   }
-#ifdef BUILD_ARM_NEON
+#ifdef USE_NEON
   if (mozilla::supports_neon()) {
     AudioBufferInPlaceScale_NEON(aBlock, aScale, aSize);
     return;
   }
 #endif
 
 #ifdef USE_SSE2
   if (mozilla::supports_sse2()) {
@@ -224,17 +224,17 @@ void AudioBufferInPlaceScale(float* aBlo
 #endif
 
   for (uint32_t i = 0; i < aSize; ++i) {
     *aBlock++ *= aScale;
   }
 }
 
 void AudioBufferInPlaceScale(float* aBlock, float* aScale, uint32_t aSize) {
-#ifdef BUILD_ARM_NEON
+#ifdef USE_NEON
   if (mozilla::supports_neon()) {
     AudioBufferInPlaceScale_NEON(aBlock, aScale, aSize);
     return;
   }
 #endif
 
 #ifdef USE_SSE2
   if (mozilla::supports_sse2()) {
@@ -265,17 +265,17 @@ void AudioBlockPanMonoToStereo(const flo
   AudioBlockCopyChannelWithScale(aInput, aGainR, aOutputR);
 }
 
 void AudioBlockPanStereoToStereo(const float aInputL[WEBAUDIO_BLOCK_SIZE],
                                  const float aInputR[WEBAUDIO_BLOCK_SIZE],
                                  float aGainL, float aGainR, bool aIsOnTheLeft,
                                  float aOutputL[WEBAUDIO_BLOCK_SIZE],
                                  float aOutputR[WEBAUDIO_BLOCK_SIZE]) {
-#ifdef BUILD_ARM_NEON
+#ifdef USE_NEON
   if (mozilla::supports_neon()) {
     AudioBlockPanStereoToStereo_NEON(aInputL, aInputR, aGainL, aGainR,
                                      aIsOnTheLeft, aOutputL, aOutputR);
     return;
   }
 #endif
 
 #ifdef USE_SSE2
@@ -303,17 +303,17 @@ void AudioBlockPanStereoToStereo(const f
 
 void AudioBlockPanStereoToStereo(const float aInputL[WEBAUDIO_BLOCK_SIZE],
                                  const float aInputR[WEBAUDIO_BLOCK_SIZE],
                                  float aGainL[WEBAUDIO_BLOCK_SIZE],
                                  float aGainR[WEBAUDIO_BLOCK_SIZE],
                                  bool aIsOnTheLeft[WEBAUDIO_BLOCK_SIZE],
                                  float aOutputL[WEBAUDIO_BLOCK_SIZE],
                                  float aOutputR[WEBAUDIO_BLOCK_SIZE]) {
-#ifdef BUILD_ARM_NEON
+#ifdef USE_NEON
   if (mozilla::supports_neon()) {
     AudioBlockPanStereoToStereo_NEON(aInputL, aInputR, aGainL, aGainR,
                                      aIsOnTheLeft, aOutputL, aOutputR);
     return;
   }
 #endif
 
   uint32_t i;
--- a/dom/media/webaudio/AudioNodeEngineNEON.cpp
+++ b/dom/media/webaudio/AudioNodeEngineNEON.cpp
@@ -1,15 +1,19 @@
 /* -*- mode: c++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
 /* this source code form is subject to the terms of the mozilla public
  * license, v. 2.0. if a copy of the mpl was not distributed with this file,
  * You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "AudioNodeEngineNEON.h"
+#if defined(_MSC_VER) && defined(_M_ARM64)
+#include <arm64_neon.h>
+#else
 #include <arm_neon.h>
+#endif
 
 //#ifdef DEBUG
 #if 0  // see bug 921099
 #define ASSERT_ALIGNED(ptr)                                     \
   MOZ_ASSERT((((uintptr_t)ptr + 15) & ~0x0F) == (uintptr_t)ptr, \
              #ptr " has to be aligned 16-bytes aligned.");
 #else
 #define ASSERT_ALIGNED(ptr)
@@ -266,19 +270,25 @@ void AudioBlockPanStereoToStereo_NEON(
   float32x4_t vinL0, vinL1;
   float32x4_t vinR0, vinR1;
   float32x4_t voutL0, voutL1;
   float32x4_t voutR0, voutR1;
   float32x4_t vscaleL0, vscaleL1;
   float32x4_t vscaleR0, vscaleR1;
   float32x4_t onleft0, onleft1, notonleft0, notonleft1;
 
-  float32x4_t zero = {0, 0, 0, 0};
+  float32x4_t zero = vmovq_n_f32(0);
   uint8x8_t isOnTheLeft;
 
+  // Although MSVC throws uninitialized value warning for voutL0 and voutL1,
+  // since we fill all lanes by vsetq_lane_f32, we can ignore it. But to avoid
+  // compiler warning, set zero.
+  voutL0 = zero;
+  voutL1 = zero;
+
   for (uint32_t i = 0; i < WEBAUDIO_BLOCK_SIZE; i += 8) {
     vinL0 = vld1q_f32(ADDRESS_OF(aInputL, i));
     vinL1 = vld1q_f32(ADDRESS_OF(aInputL, i + 4));
 
     vinR0 = vld1q_f32(ADDRESS_OF(aInputR, i));
     vinR1 = vld1q_f32(ADDRESS_OF(aInputR, i + 4));
 
     vscaleL0 = vld1q_f32(ADDRESS_OF(aGainL, i));
--- a/dom/media/webaudio/moz.build
+++ b/dom/media/webaudio/moz.build
@@ -115,22 +115,24 @@ UNIFIED_SOURCES += [
     'PeriodicWave.cpp',
     'ScriptProcessorNode.cpp',
     'StereoPannerNode.cpp',
     'ThreeDPoint.cpp',
     'WaveShaperNode.cpp',
     'WebAudioUtils.cpp',
 ]
 
-if CONFIG['CPU_ARCH'] == 'arm' and CONFIG['BUILD_ARM_NEON']:
+if CONFIG['CPU_ARCH'] == 'aarch64' or CONFIG['BUILD_ARM_NEON']:
+    DEFINES['USE_NEON'] = True
     SOURCES += ['AudioNodeEngineNEON.cpp']
     SOURCES['AudioNodeEngineNEON.cpp'].flags += CONFIG['NEON_FLAGS']
-    LOCAL_INCLUDES += [
-        '/media/openmax_dl/dl/api/'
-    ]
+    if CONFIG['BUILD_ARM_NEON']:
+        LOCAL_INCLUDES += [
+            '/media/openmax_dl/dl/api/'
+        ]
 
 # Are we targeting x86 or x64?  If so, build SSE2 files.
 if CONFIG['INTEL_ARCHITECTURE']:
     SOURCES += ['AudioNodeEngineSSE2.cpp']
     DEFINES['USE_SSE2'] = True
     SOURCES['AudioNodeEngineSSE2.cpp'].flags += CONFIG['SSE2_FLAGS']