461 lines
11 KiB
C++
461 lines
11 KiB
C++
/*
|
|
* Copyright 2010-2016 Branimir Karadzic. All rights reserved.
|
|
* License: https://github.com/bkaradzic/bx#license-bsd-2-clause
|
|
*/
|
|
|
|
#ifndef BX_SIMD_T_H_HEADER_GUARD
|
|
#define BX_SIMD_T_H_HEADER_GUARD
|
|
|
|
#include "bx.h"
|
|
|
|
#define BX_SIMD_FORCE_INLINE BX_FORCE_INLINE
|
|
#define BX_SIMD_INLINE inline
|
|
|
|
#define BX_SIMD_AVX 0
|
|
#define BX_SIMD_LANGEXT 0
|
|
#define BX_SIMD_NEON 0
|
|
#define BX_SIMD_SSE 0
|
|
|
|
#if defined(__AVX__) || defined(__AVX2__)
|
|
# include <immintrin.h>
|
|
# undef BX_SIMD_AVX
|
|
# define BX_SIMD_AVX 1
|
|
#endif //
|
|
|
|
#if defined(__SSE2__) || (BX_COMPILER_MSVC && (BX_ARCH_64BIT || _M_IX86_FP >= 2) )
|
|
# include <emmintrin.h> // __m128i
|
|
# if defined(__SSE4_1__)
|
|
# include <smmintrin.h>
|
|
# endif // defined(__SSE4_1__)
|
|
# include <xmmintrin.h> // __m128
|
|
# undef BX_SIMD_SSE
|
|
# define BX_SIMD_SSE 1
|
|
#elif defined(__ARM_NEON__) && !BX_COMPILER_CLANG
|
|
# include <arm_neon.h>
|
|
# undef BX_SIMD_NEON
|
|
# define BX_SIMD_NEON 1
|
|
#elif BX_COMPILER_CLANG \
|
|
&& !BX_PLATFORM_EMSCRIPTEN \
|
|
&& !BX_PLATFORM_IOS \
|
|
&& BX_CLANG_HAS_EXTENSION(attribute_ext_vector_type)
|
|
# include <math.h>
|
|
# undef BX_SIMD_LANGEXT
|
|
# define BX_SIMD_LANGEXT 1
|
|
#endif //
|
|
|
|
namespace bx
|
|
{
|
|
#define ELEMx 0
|
|
#define ELEMy 1
|
|
#define ELEMz 2
|
|
#define ELEMw 3
|
|
#define BX_SIMD128_IMPLEMENT_SWIZZLE(_x, _y, _z, _w) \
|
|
template<typename Ty> \
|
|
BX_SIMD_FORCE_INLINE Ty simd_swiz_##_x##_y##_z##_w(Ty _a);
|
|
#include "simd128_swizzle.inl"
|
|
|
|
#undef BX_SIMD128_IMPLEMENT_SWIZZLE
|
|
#undef ELEMw
|
|
#undef ELEMz
|
|
#undef ELEMy
|
|
#undef ELEMx
|
|
|
|
#define BX_SIMD128_IMPLEMENT_TEST(_xyzw) \
|
|
template<typename Ty> \
|
|
BX_SIMD_FORCE_INLINE bool simd_test_any_##_xyzw(Ty _test); \
|
|
\
|
|
template<typename Ty> \
|
|
BX_SIMD_FORCE_INLINE bool simd_test_all_##_xyzw(Ty _test)
|
|
|
|
BX_SIMD128_IMPLEMENT_TEST(x );
|
|
BX_SIMD128_IMPLEMENT_TEST(y );
|
|
BX_SIMD128_IMPLEMENT_TEST(xy );
|
|
BX_SIMD128_IMPLEMENT_TEST(z );
|
|
BX_SIMD128_IMPLEMENT_TEST(xz );
|
|
BX_SIMD128_IMPLEMENT_TEST(yz );
|
|
BX_SIMD128_IMPLEMENT_TEST(xyz );
|
|
BX_SIMD128_IMPLEMENT_TEST(w );
|
|
BX_SIMD128_IMPLEMENT_TEST(xw );
|
|
BX_SIMD128_IMPLEMENT_TEST(yw );
|
|
BX_SIMD128_IMPLEMENT_TEST(xyw );
|
|
BX_SIMD128_IMPLEMENT_TEST(zw );
|
|
BX_SIMD128_IMPLEMENT_TEST(xzw );
|
|
BX_SIMD128_IMPLEMENT_TEST(yzw );
|
|
BX_SIMD128_IMPLEMENT_TEST(xyzw);
|
|
#undef BX_SIMD128_IMPLEMENT_TEST
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_FORCE_INLINE Ty simd_shuf_xyAB(Ty _a, Ty _b);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_FORCE_INLINE Ty simd_shuf_ABxy(Ty _a, Ty _b);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_FORCE_INLINE Ty simd_shuf_CDzw(Ty _a, Ty _b);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_FORCE_INLINE Ty simd_shuf_zwCD(Ty _a, Ty _b);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_FORCE_INLINE Ty simd_shuf_xAyB(Ty _a, Ty _b);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_FORCE_INLINE Ty simd_shuf_yBxA(Ty _a, Ty _b);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_FORCE_INLINE Ty simd_shuf_zCwD(Ty _a, Ty _b);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_FORCE_INLINE Ty simd_shuf_CzDw(Ty _a, Ty _b);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_FORCE_INLINE float simd_x(Ty _a);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_FORCE_INLINE float simd_y(Ty _a);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_FORCE_INLINE float simd_z(Ty _a);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_FORCE_INLINE float simd_w(Ty _a);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_FORCE_INLINE Ty simd_ld(const void* _ptr);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_FORCE_INLINE void simd_st(void* _ptr, Ty _a);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_FORCE_INLINE void simd_stx(void* _ptr, Ty _a);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_FORCE_INLINE void simd_stream(void* _ptr, Ty _a);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_FORCE_INLINE Ty simd_ld(float _x, float _y, float _z, float _w);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_FORCE_INLINE Ty simd_ld(float _x, float _y, float _z, float _w, float _a, float _b, float _c, float _d);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_FORCE_INLINE Ty simd_ild(uint32_t _x, uint32_t _y, uint32_t _z, uint32_t _w);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_FORCE_INLINE Ty simd_ild(uint32_t _x, uint32_t _y, uint32_t _z, uint32_t _w, uint32_t _a, uint32_t _b, uint32_t _c, uint32_t _d);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_FORCE_INLINE Ty simd_splat(const void* _ptr);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_FORCE_INLINE Ty simd_splat(float _a);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_FORCE_INLINE Ty simd_isplat(uint32_t _a);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_FORCE_INLINE Ty simd_zero();
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_FORCE_INLINE Ty simd_itof(Ty _a);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_FORCE_INLINE Ty simd_ftoi(Ty _a);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_FORCE_INLINE Ty simd_round(Ty _a);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_FORCE_INLINE Ty simd_add(Ty _a, Ty _b);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_FORCE_INLINE Ty simd_sub(Ty _a, Ty _b);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_FORCE_INLINE Ty simd_mul(Ty _a, Ty _b);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_FORCE_INLINE Ty simd_div(Ty _a, Ty _b);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_FORCE_INLINE Ty simd_rcp_est(Ty _a);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_FORCE_INLINE Ty simd_sqrt(Ty _a);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_FORCE_INLINE Ty simd_rsqrt_est(Ty _a);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_FORCE_INLINE Ty simd_dot3(Ty _a, Ty _b);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_FORCE_INLINE Ty simd_dot(Ty _a, Ty _b);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_FORCE_INLINE Ty simd_cmpeq(Ty _a, Ty _b);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_FORCE_INLINE Ty simd_cmplt(Ty _a, Ty _b);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_FORCE_INLINE Ty simd_cmple(Ty _a, Ty _b);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_FORCE_INLINE Ty simd_cmpgt(Ty _a, Ty _b);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_FORCE_INLINE Ty simd_cmpge(Ty _a, Ty _b);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_FORCE_INLINE Ty simd_min(Ty _a, Ty _b);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_FORCE_INLINE Ty simd_max(Ty _a, Ty _b);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_FORCE_INLINE Ty simd_and(Ty _a, Ty _b);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_FORCE_INLINE Ty simd_andc(Ty _a, Ty _b);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_FORCE_INLINE Ty simd_or(Ty _a, Ty _b);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_FORCE_INLINE Ty simd_xor(Ty _a, Ty _b);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_FORCE_INLINE Ty simd_sll(Ty _a, int _count);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_FORCE_INLINE Ty simd_srl(Ty _a, int _count);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_FORCE_INLINE Ty simd_sra(Ty _a, int _count);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_FORCE_INLINE Ty simd_icmpeq(Ty _a, Ty _b);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_FORCE_INLINE Ty simd_icmplt(Ty _a, Ty _b);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_FORCE_INLINE Ty simd_icmpgt(Ty _a, Ty _b);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_FORCE_INLINE Ty simd_imin(Ty _a, Ty _b);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_FORCE_INLINE Ty simd_imax(Ty _a, Ty _b);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_FORCE_INLINE Ty simd_iadd(Ty _a, Ty _b);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_FORCE_INLINE Ty simd_isub(Ty _a, Ty _b);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_INLINE Ty simd_shuf_xAzC(Ty _a, Ty _b);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_INLINE Ty simd_shuf_yBwD(Ty _a, Ty _b);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_INLINE Ty simd_rcp(Ty _a);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_INLINE Ty simd_orx(Ty _a);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_INLINE Ty simd_orc(Ty _a, Ty _b);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_INLINE Ty simd_neg(Ty _a);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_INLINE Ty simd_madd(Ty _a, Ty _b, Ty _c);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_INLINE Ty simd_nmsub(Ty _a, Ty _b, Ty _c);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_INLINE Ty simd_div_nr(Ty _a, Ty _b);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_INLINE Ty simd_selb(Ty _mask, Ty _a, Ty _b);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_INLINE Ty simd_sels(Ty _test, Ty _a, Ty _b);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_INLINE Ty simd_not(Ty _a);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_INLINE Ty simd_abs(Ty _a);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_INLINE Ty simd_clamp(Ty _a, Ty _min, Ty _max);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_INLINE Ty simd_lerp(Ty _a, Ty _b, Ty _s);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_INLINE Ty simd_rsqrt(Ty _a);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_INLINE Ty simd_rsqrt_nr(Ty _a);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_INLINE Ty simd_rsqrt_carmack(Ty _a);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_INLINE Ty simd_sqrt_nr(Ty _a);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_INLINE Ty simd_log2(Ty _a);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_INLINE Ty simd_exp2(Ty _a);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_INLINE Ty simd_pow(Ty _a, Ty _b);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_INLINE Ty simd_cross3(Ty _a, Ty _b);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_INLINE Ty simd_normalize3(Ty _a);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_INLINE Ty simd_ceil(Ty _a);
|
|
|
|
template<typename Ty>
|
|
BX_SIMD_INLINE Ty simd_floor(Ty _a);
|
|
|
|
#if BX_SIMD_AVX
|
|
typedef __m256 simd256_avx_t;
|
|
#endif // BX_SIMD_SSE
|
|
|
|
#if BX_SIMD_LANGEXT
|
|
union simd128_langext_t
|
|
{
|
|
float __attribute__((vector_size(16))) vf;
|
|
int32_t __attribute__((vector_size(16))) vi;
|
|
uint32_t __attribute__((vector_size(16))) vu;
|
|
float fxyzw[4];
|
|
int32_t ixyzw[4];
|
|
uint32_t uxyzw[4];
|
|
|
|
};
|
|
#endif // BX_SIMD_LANGEXT
|
|
|
|
#if BX_SIMD_NEON
|
|
typedef float32x4_t simd128_neon_t;
|
|
#endif // BX_SIMD_NEON
|
|
|
|
#if BX_SIMD_SSE
|
|
typedef __m128 simd128_sse_t;
|
|
#endif // BX_SIMD_SSE
|
|
|
|
} // namespace bx
|
|
|
|
#if BX_SIMD_AVX
|
|
# include "simd256_avx.inl"
|
|
#endif // BX_SIMD_AVX
|
|
|
|
#if BX_SIMD_LANGEXT
|
|
# include "simd128_langext.inl"
|
|
#endif // BX_SIMD_LANGEXT
|
|
|
|
#if BX_SIMD_NEON
|
|
# include "simd128_neon.inl"
|
|
#endif // BX_SIMD_NEON
|
|
|
|
#if BX_SIMD_SSE
|
|
# include "simd128_sse.inl"
|
|
#endif // BX_SIMD_SSE
|
|
|
|
namespace bx
|
|
{
|
|
union simd128_ref_t
|
|
{
|
|
float fxyzw[4];
|
|
int32_t ixyzw[4];
|
|
uint32_t uxyzw[4];
|
|
};
|
|
|
|
#ifndef BX_SIMD_WARN_REFERENCE_IMPL
|
|
# define BX_SIMD_WARN_REFERENCE_IMPL 0
|
|
#endif // BX_SIMD_WARN_REFERENCE_IMPL
|
|
|
|
#if !( BX_SIMD_LANGEXT \
|
|
|| BX_SIMD_NEON \
|
|
|| BX_SIMD_SSE \
|
|
)
|
|
# if BX_SIMD_WARN_REFERENCE_IMPL
|
|
# pragma message("*** Using SIMD128 reference implementation! ***")
|
|
# endif // BX_SIMD_WARN_REFERENCE_IMPL
|
|
|
|
typedef simd128_ref_t simd128_t;
|
|
#endif //
|
|
|
|
struct simd256_ref_t
|
|
{
|
|
simd128_t simd128_0;
|
|
simd128_t simd128_1;
|
|
};
|
|
|
|
#if !BX_SIMD_AVX
|
|
# if BX_SIMD_WARN_REFERENCE_IMPL
|
|
# pragma message("*** Using SIMD256 reference implementation! ***")
|
|
# endif // BX_SIMD_WARN_REFERENCE_IMPL
|
|
|
|
typedef simd256_ref_t simd256_t;
|
|
#endif // !BX_SIMD_AVX
|
|
|
|
} // namespace bx
|
|
|
|
#include "simd128_ref.inl"
|
|
#include "simd256_ref.inl"
|
|
|
|
namespace bx
|
|
{
|
|
BX_SIMD_FORCE_INLINE simd128_t simd_zero()
|
|
{
|
|
return simd_zero<simd128_t>();
|
|
}
|
|
|
|
BX_SIMD_FORCE_INLINE simd128_t simd_ld(const void* _ptr)
|
|
{
|
|
return simd_ld<simd128_t>(_ptr);
|
|
}
|
|
|
|
BX_SIMD_FORCE_INLINE simd128_t simd_ld(float _x, float _y, float _z, float _w)
|
|
{
|
|
return simd_ld<simd128_t>(_x, _y, _z, _w);
|
|
}
|
|
|
|
BX_SIMD_FORCE_INLINE simd128_t simd_ild(uint32_t _x, uint32_t _y, uint32_t _z, uint32_t _w)
|
|
{
|
|
return simd_ild<simd128_t>(_x, _y, _z, _w);
|
|
}
|
|
|
|
BX_SIMD_FORCE_INLINE simd128_t simd_splat(const void* _ptr)
|
|
{
|
|
return simd_splat<simd128_t>(_ptr);
|
|
}
|
|
|
|
BX_SIMD_FORCE_INLINE simd128_t simd_splat(float _a)
|
|
{
|
|
return simd_splat<simd128_t>(_a);
|
|
}
|
|
|
|
BX_SIMD_FORCE_INLINE simd128_t simd_isplat(uint32_t _a)
|
|
{
|
|
return simd_isplat<simd128_t>(_a);
|
|
}
|
|
} // namespace bx
|
|
|
|
#endif // BX_SIMD_T_H_HEADER_GUARD
|