Bug 1032255 - TPS has to exit with code != 0 in case of failures. r=aeftimie, dhunt a=testonly DONTBUILD

#include <xmmintrin.h>
#include <emmintrin.h>

/* Before Nehalem _mm_loadu_si128 could be very slow, this trick is a little
 * faster. Once enough people are on architectures where _mm_loadu_si128 is
 * fast we can migrate to it.
MOZ_ALWAYS_INLINE __m128i loadUnaligned128(const __m128i *aSource)
  // Yes! We use uninitialized memory here, we'll overwrite it though!
  __m128 res = _mm_loadl_pi(_mm_set1_ps(0), (const __m64*)aSource);
  return _mm_castps_si128(_mm_loadh_pi(res, ((const __m64*)(aSource)) + 1));