gfx/ycbcr/win64.patch
author Nathan Froyd <froydnj@mozilla.com>
Wed, 07 Oct 2015 16:50:25 -0400
changeset 287061 91d4539e00cecb658604e021675a923c60ef3235
parent 69125 1461de626881724cb66b4b6fd29662c0eaf260e0
permissions -rw-r--r--
Bug 1207245 - part 6 - rename nsRefPtr<T> to RefPtr<T>; r=ehsan; a=Tomcat The bulk of this commit was generated with a script, executed at the top level of a typical source code checkout. The only non-machine-generated part was modifying MFBT's moz.build to reflect the new naming. # The main substitution. find . -name '*.cpp' -o -name '*.cc' -o -name '*.h' -o -name '*.mm' -o -name '*.idl'| \ xargs perl -p -i -e ' s/nsRefPtr\.h/RefPtr\.h/g; # handle includes s/nsRefPtr ?</RefPtr</g; # handle declarations and variables ' # Handle a special friend declaration in gfx/layers/AtomicRefCountedWithFinalize.h. perl -p -i -e 's/::nsRefPtr;/::RefPtr;/' gfx/layers/AtomicRefCountedWithFinalize.h # Handle nsRefPtr.h itself, a couple places that define constructors # from nsRefPtr, and code generators specially. We do this here, rather # than indiscriminantly s/nsRefPtr/RefPtr/, because that would rename # things like nsRefPtrHashtable. perl -p -i -e 's/nsRefPtr/RefPtr/g' \ mfbt/nsRefPtr.h \ xpcom/glue/nsCOMPtr.h \ xpcom/base/OwningNonNull.h \ ipc/ipdl/ipdl/lower.py \ ipc/ipdl/ipdl/builtin.py \ dom/bindings/Codegen.py \ python/lldbutils/lldbutils/utils.py # In our indiscriminate substitution above, we renamed # nsRefPtrGetterAddRefs, the class behind getter_AddRefs. Fix that up. find . -name '*.cpp' -o -name '*.h' -o -name '*.idl' | \ xargs perl -p -i -e 's/nsRefPtrGetterAddRefs/RefPtrGetterAddRefs/g' if [ -d .git ]; then git mv mfbt/nsRefPtr.h mfbt/RefPtr.h else hg mv mfbt/nsRefPtr.h mfbt/RefPtr.h fi

diff --git a/gfx/ycbcr/yuv_row_win64.cpp b/gfx/ycbcr/yuv_row_win64.cpp
new file mode 100644
--- /dev/null
+++ b/gfx/ycbcr/yuv_row_win64.cpp
@@ -0,0 +1,205 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "yuv_row.h"
+
+extern "C" {
+
+// x64 compiler doesn't support MMX and inline assembler.  Use SSE2 intrinsics.
+
+#define kCoefficientsRgbU (reinterpret_cast<uint8*>(kCoefficientsRgbY) + 2048)
+#define kCoefficientsRgbV (reinterpret_cast<uint8*>(kCoefficientsRgbY) + 4096)
+
+#include <emmintrin.h>
+
+static void FastConvertYUVToRGB32Row_SSE2(const uint8* y_buf,
+                                          const uint8* u_buf,
+                                          const uint8* v_buf,
+                                          uint8* rgb_buf,
+                                          int width) {
+  __m128i xmm0, xmmY1, xmmY2;
+  __m128  xmmY;
+
+  while (width >= 2) {
+    xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbU + 8 * *u_buf++)),
+                          _mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbV + 8 * *v_buf++)));
+
+    xmmY1 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * *y_buf++));
+    xmmY1 = _mm_adds_epi16(xmmY1, xmm0);
+
+    xmmY2 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * *y_buf++));
+    xmmY2 = _mm_adds_epi16(xmmY2, xmm0);
+
+    xmmY = _mm_shuffle_ps(_mm_castsi128_ps(xmmY1), _mm_castsi128_ps(xmmY2),
+                          0x44);
+    xmmY1 = _mm_srai_epi16(_mm_castps_si128(xmmY), 6);
+    xmmY1 = _mm_packus_epi16(xmmY1, xmmY1);
+
+    _mm_storel_epi64(reinterpret_cast<__m128i*>(rgb_buf), xmmY1);
+    rgb_buf += 8;
+    width -= 2;
+  }
+
+  if (width) {
+    xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbU + 8 * *u_buf)),
+                          _mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbV + 8 * *v_buf)));
+    xmmY1 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * *y_buf));
+    xmmY1 = _mm_adds_epi16(xmmY1, xmm0);
+    xmmY1 = _mm_srai_epi16(xmmY1, 6);
+    xmmY1 = _mm_packus_epi16(xmmY1, xmmY1);
+    *reinterpret_cast<uint32*>(rgb_buf) = _mm_cvtsi128_si32(xmmY1);
+  }
+}
+
+static void ScaleYUVToRGB32Row_SSE2(const uint8* y_buf,
+                                    const uint8* u_buf,
+                                    const uint8* v_buf,
+                                    uint8* rgb_buf,
+                                    int width,
+                                    int source_dx) {
+  __m128i xmm0, xmmY1, xmmY2;
+  __m128  xmmY;
+  uint8 u, v, y;
+  int x = 0;
+
+  while (width >= 2) {
+    u = u_buf[x >> 17];
+    v = v_buf[x >> 17];
+    y = y_buf[x >> 16];
+    x += source_dx;
+
+    xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbU + 8 * u)),
+                          _mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbV + 8 * v)));
+    xmmY1 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * y));
+    xmmY1 = _mm_adds_epi16(xmmY1, xmm0);
+
+    y = y_buf[x >> 16];
+    x += source_dx;
+
+    xmmY2 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * y));
+    xmmY2 = _mm_adds_epi16(xmmY2, xmm0);
+
+    xmmY = _mm_shuffle_ps(_mm_castsi128_ps(xmmY1), _mm_castsi128_ps(xmmY2),
+                          0x44);
+    xmmY1 = _mm_srai_epi16(_mm_castps_si128(xmmY), 6);
+    xmmY1 = _mm_packus_epi16(xmmY1, xmmY1);
+
+    _mm_storel_epi64(reinterpret_cast<__m128i*>(rgb_buf), xmmY1);
+    rgb_buf += 8;
+    width -= 2;
+  }
+
+  if (width) {
+    u = u_buf[x >> 17];
+    v = v_buf[x >> 17];
+    y = y_buf[x >> 16];
+
+    xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbU + 8 * u)),
+                          _mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbV + 8 * v)));
+    xmmY1 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * y));
+    xmmY1 = _mm_adds_epi16(xmmY1, xmm0);
+    xmmY1 = _mm_srai_epi16(xmmY1, 6);
+    xmmY1 = _mm_packus_epi16(xmmY1, xmmY1);
+    *reinterpret_cast<uint32*>(rgb_buf) = _mm_cvtsi128_si32(xmmY1);
+  }
+}
+
+static void LinearScaleYUVToRGB32Row_SSE2(const uint8* y_buf,
+                                          const uint8* u_buf,
+                                          const uint8* v_buf,
+                                          uint8* rgb_buf,
+                                          int width,
+                                          int source_dx) {
+  __m128i xmm0, xmmY1, xmmY2;
+  __m128  xmmY;
+  uint8 u0, u1, v0, v1, y0, y1;
+  uint32 uv_frac, y_frac, u, v, y;
+  int x = 0;
+
+  if (source_dx >= 0x20000) {
+    x = 32768;
+  }
+
+  while(width >= 2) {
+    u0 = u_buf[x >> 17];
+    u1 = u_buf[(x >> 17) + 1];
+    v0 = v_buf[x >> 17];
+    v1 = v_buf[(x >> 17) + 1];
+    y0 = y_buf[x >> 16];
+    y1 = y_buf[(x >> 16) + 1];
+    uv_frac = (x & 0x1fffe);
+    y_frac = (x & 0xffff);
+    u = (uv_frac * u1 + (uv_frac ^ 0x1fffe) * u0) >> 17;
+    v = (uv_frac * v1 + (uv_frac ^ 0x1fffe) * v0) >> 17;
+    y = (y_frac * y1 + (y_frac ^ 0xffff) * y0) >> 16;
+    x += source_dx;
+
+    xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbU + 8 * u)),
+                          _mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbV + 8 * v)));
+    xmmY1 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * y));
+    xmmY1 = _mm_adds_epi16(xmmY1, xmm0);
+
+    y0 = y_buf[x >> 16];
+    y1 = y_buf[(x >> 16) + 1];
+    y_frac = (x & 0xffff);
+    y = (y_frac * y1 + (y_frac ^ 0xffff) * y0) >> 16;
+    x += source_dx;
+
+    xmmY2 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * y));
+    xmmY2 = _mm_adds_epi16(xmmY2, xmm0);
+
+    xmmY = _mm_shuffle_ps(_mm_castsi128_ps(xmmY1), _mm_castsi128_ps(xmmY2),
+                          0x44);
+    xmmY1 = _mm_srai_epi16(_mm_castps_si128(xmmY), 6);
+    xmmY1 = _mm_packus_epi16(xmmY1, xmmY1);
+
+    _mm_storel_epi64(reinterpret_cast<__m128i*>(rgb_buf), xmmY1);
+    rgb_buf += 8;
+    width -= 2;
+  }
+
+  if (width) {
+    u = u_buf[x >> 17];
+    v = v_buf[x >> 17];
+    y = y_buf[x >> 16];
+
+    xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbU + 8 * u)),
+                          _mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbV + 8 * v)));
+    xmmY1 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * y));
+
+    xmmY1 = _mm_adds_epi16(xmmY1, xmm0);
+    xmmY1 = _mm_srai_epi16(xmmY1, 6);
+    xmmY1 = _mm_packus_epi16(xmmY1, xmmY1);
+    *reinterpret_cast<uint32*>(rgb_buf) = _mm_cvtsi128_si32(xmmY1);
+  }
+}
+
+void FastConvertYUVToRGB32Row(const uint8* y_buf,
+                              const uint8* u_buf,
+                              const uint8* v_buf,
+                              uint8* rgb_buf,
+                              int width) {
+  FastConvertYUVToRGB32Row_SSE2(y_buf, u_buf, v_buf, rgb_buf, width);
+}
+
+void ScaleYUVToRGB32Row(const uint8* y_buf,
+                        const uint8* u_buf,
+                        const uint8* v_buf,
+                        uint8* rgb_buf,
+                        int width,
+                        int source_dx) {
+  ScaleYUVToRGB32Row_SSE2(y_buf, u_buf, v_buf, rgb_buf, width, source_dx);
+}
+
+void LinearScaleYUVToRGB32Row(const uint8* y_buf,
+                              const uint8* u_buf,
+                              const uint8* v_buf,
+                              uint8* rgb_buf,
+                              int width,
+                              int source_dx) {
+  LinearScaleYUVToRGB32Row_SSE2(y_buf, u_buf, v_buf, rgb_buf, width,
+                                source_dx);
+}
+
+} // extern "C"