diff --git a/.gitignore b/.gitignore index 00f55db..ce41685 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,10 @@ .*.swp +*CMakeFiles/** /.idea /scratch* /build* /cmake-build* /3rdparty/ozz-animation/media +**/CMakeFiles:w + diff --git a/3rdparty/vectorial/.gitignore b/3rdparty/vectorial/.gitignore new file mode 100644 index 0000000..b0f0d30 --- /dev/null +++ b/3rdparty/vectorial/.gitignore @@ -0,0 +1,3 @@ +*.o +*.orig +specsuite-* diff --git a/3rdparty/vectorial/.travis.yml b/3rdparty/vectorial/.travis.yml new file mode 100644 index 0000000..856ed6d --- /dev/null +++ b/3rdparty/vectorial/.travis.yml @@ -0,0 +1,6 @@ +language: cpp +compiler: + - gcc + - clang + +script: make diff --git a/3rdparty/vectorial/LICENSE b/3rdparty/vectorial/LICENSE new file mode 100644 index 0000000..f9d49e0 --- /dev/null +++ b/3rdparty/vectorial/LICENSE @@ -0,0 +1,22 @@ +Copyright 2010 Mikko Lehtonen. All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are +permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, this list + of conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. diff --git a/3rdparty/vectorial/Makefile b/3rdparty/vectorial/Makefile new file mode 100644 index 0000000..f23405c --- /dev/null +++ b/3rdparty/vectorial/Makefile @@ -0,0 +1,294 @@ + +CXX?=g++ +CLANG_CC=clang +CLANG_CXX=clang++ + +IPHONE_PLATFORM_PATH = /Developer/Platforms/iPhoneOS.platform/Developer +IPHONE_ISYSROOT_PATH = $(IPHONE_PLATFORM_PATH)/SDKs/iPhoneOS4.2.sdk/ +IPHONE_CC = $(IPHONE_PLATFORM_PATH)/usr/bin/g++ -isysroot $(IPHONE_ISYSROOT_PATH) -arch armv7 +# -mfloat-abi=softfp -mfpu=neon + +#CXXFLAGS += -Iinclude -O0 +#CXXFLAGS += -g -Iinclude -Wall -Wextra -pedantic -Wno-unused -O3 -fstrict-aliasing -Wstrict-aliasing=2 -ffast-math +CXXFLAGS += -Iinclude -Wall -Wextra -pedantic -Wno-unused -O3 -fstrict-aliasing -Wstrict-aliasing=2 -ffast-math -D__extern_always_inline=inline + +SPEC_SRC = $(wildcard spec/*.cpp) +SPEC_OBJ = $(SPEC_SRC:.cpp=.o) + +BENCH_SRC = $(wildcard bench/*.cpp) +BENCH_OBJ = $(BENCH_SRC:.cpp=.o) +BENCH_ASM = $(patsubst %.cpp,asm$(SUFFIX)/%.S,$(BENCH_SRC)) + +SUFFIX= + +DEFAULT_CC=1 + +ifeq ($(FORCE_SCALAR),1) + CXXFLAGS+= -DVECTORIAL_FORCED -DVECTORIAL_SCALAR + SUFFIX=-scalar +endif + +ifeq ($(FORCE_SSE),1) + CXXFLAGS+= -DVECTORIAL_FORCED -DVECTORIAL_SSE -msse -msse2 -mfpmath=sse + SUFFIX=-sse +endif + +ifeq ($(FORCE_GNU),1) + CXXFLAGS+= -DVECTORIAL_FORCED -DVECTORIAL_GNU + #-msse -msse2 -mfpmath=sse + SUFFIX=-gnu +endif + +ifeq ($(FORCE_NEON),1) + CXXFLAGS+= -DVECTORIAL_FORCED -DVECTORIAL_NEON + SUFFIX=-neon + ARM=1 +endif + + +ifeq ($(ARM),1) +ifeq ($(shell uname -s),Darwin) + CC=$(IPHONE_CC) + CXX=$(IPHONE_CC) +endif +# CXXFLAGS+= -mcpu=cortex-a8 + CXXFLAGS+= -mno-thumb -mfloat-abi=softfp -mfpu=neon + DEFAULT_CC=0 +endif + +ifeq ($(CLANG),1) + CC=$(CLANG_CC) + CXX=$(CLANG_CXX) + DEFAULT_CC=0 +endif + +ifeq ($(DEFAULT_CC),1) +# CXXFLAGS += -msse -msse2 -mfpmath=sse +endif + +ifeq ($(ASM),1) + CC+= -S + CXX+= -S +endif + +BUILDDIR=build$(SUFFIX) +SPEC_OBJ := $(addprefix $(BUILDDIR)/,$(SPEC_OBJ)) +BENCH_OBJ := $(addprefix $(BUILDDIR)/,$(BENCH_OBJ)) +SILENT=@ +MKDIR=mkdir -p +PATH_SEPARATOR=/ + +$(BUILDDIR)/%.o: %.cpp + @echo CXX $< + $(SILENT) $(MKDIR) $(subst /,$(PATH_SEPARATOR),$(dir $@)) + $(SILENT) $(COMPILE.cc) -o $@ $< + + + +.PHONY: all +all: specsuite$(SUFFIX) + ./specsuite$(SUFFIX) + + +.PHONY: full +full: + @clear + @echo FULL COMPILE at `date +%H:%M:%S` +# FORCE_SCALAR=1 $(MAKE) clean + @FORCE_SCALAR=1 $(MAKE) specsuite-scalar +# FORCE_GNU=1 $(MAKE) clean + @FORCE_GNU=1 $(MAKE) specsuite-gnu +# FORCE_SSE=1 $(MAKE) clean + @FORCE_SSE=1 $(MAKE) specsuite-sse +# FORCE_NEON=1 $(MAKE) clean +# FORCE_NEON=1 $(MAKE) specsuite-neon + @./specsuite-scalar + @./specsuite-sse + @./specsuite-gnu + +specsuite$(SUFFIX): $(SPEC_OBJ) + @echo LINK $@ + @$(CXX) $(LDFLAGS) $^ -o $@ + +.PHONY: depend +depend: + @echo DEP + @makedepend -Y -- $(CXXFLAGS) -- $(SPEC_SRC) $(BENCH_SRC) -p$(BUILDDIR)/ > /dev/null 2>&1 + @$(RM) Makefile.bak + +define asm-command +@mkdir -p $(dir asm$(SUFFIX)/$(1)) +$(CXX) $(CXXFLAGS) -S $(1) -o asm$(SUFFIX)/$(1).S + +endef + +bench-asm: $(BENCH_SRC) + $(foreach p,$(BENCH_SRC),$(call asm-command,$(p))) + +benchmark$(SUFFIX): $(BENCH_OBJ) bench-asm + $(CXX) $(BENCH_OBJ) -o $@ + +.PHONY: bench-full +bench-full: + FORCE_SCALAR=1 $(MAKE) benchmark-scalar + FORCE_GNU=1 $(MAKE) benchmark-gnu + FORCE_SSE=1 $(MAKE) benchmark-sse +# FORCE_NEON=1 $(MAKE) clean +# FORCE_NEON=1 $(MAKE) benchmark-neon + ./benchmark-scalar + ./benchmark-sse + ./benchmark-gnu + +.PHONY: clean +clean: + rm -f $(SPEC_OBJ) $(BENCH_OBJ) benchmark$(SUFFIX) specsuite$(SUFFIX) + rm -rf asm$(SUFFIX) + +.PHONY: realclean +realclean: clean + rm -f specsuite* + rm -rf build* + + +.PHONY: update_spec +update_spec: + ./tools/update_spec.rb spec/spec_*.cpp + +ifeq ($(MAKECMDGOALS),export) +ifeq ($(origin to),undefined) +$(error to not set, like make export to=/foo/bar) +endif +endif + +.PHONY: export +export: + $(SILENT) git archive --format tar master | tar x -C $(to) + + +include/vectorial/vec2f.h include/vectorial/vec3f.h include/vectorial/vec4f.h: include/vectorial/simd4f.h +include/vectorial/simd4f.h: include/vectorial/simd4f_scalar.h +include/vectorial/simd4f.h: include/vectorial/simd4f_neon.h +include/vectorial/simd4f.h: include/vectorial/simd4f_gnu.h +include/vectorial/simd4f.h: include/vectorial/simd4f_sse.h +include/vectorial/simd4f.h: include/vectorial/simd4f_scalar.h +include/vectorial/simd4f.h: include/vectorial/config.h +include/vectorial/simd4x4f.h: include/vectorial/simd4f.h +include/vectorial/simd4x4f.h: include/vectorial/simd4x4f_scalar.h +include/vectorial/simd4x4f.h: include/vectorial/simd4x4f_neon.h +include/vectorial/simd4x4f.h: include/vectorial/simd4x4f_gnu.h +include/vectorial/simd4x4f.h: include/vectorial/simd4x4f_sse.h +include/vectorial/simd4x4f.h: include/vectorial/config.h +spec/spec_helper.h: include/vectorial/simd4x4f.h include/vectorial/simd4f.h include/vectorial/vec4f.h include/vectorial/vec3f.h include/vectorial/vec2f.h +spec/spec.cpp: spec/spec.h +spec/spec_main.cpp: spec/spec.h +spec/spec_simd4f.cpp: spec/spec_helper.h +spec/spec_simd4x4f.cpp: spec/spec_helper.h +spec/spec_vec2f.cpp: spec/spec_helper.h +spec/spec_vec3f.cpp: spec/spec_helper.h +spec/spec_vec4f.cpp: spec/spec_helper.h + +$(BUILDDIR)/spec/spec_simd4f.o: \ + include/vectorial/simd4x4f.h include/vectorial/simd4f.h \ + include/vectorial/simd4f_scalar.h include/vectorial/simd4f_neon.h \ + include/vectorial/simd4f_gnu.h include/vectorial/simd4f_sse.h \ + include/vectorial/config.h + +$(BUILDDIR)/spec/spec_simd4x4f.o: \ + include/vectorial/simd4x4f.h include/vectorial/simd4f.h \ + include/vectorial/simd4f_scalar.h include/vectorial/simd4f_neon.h \ + include/vectorial/simd4f_gnu.h include/vectorial/simd4f_sse.h \ + include/vectorial/simd4x4f_scalar.h include/vectorial/simd4x4f_neon.h \ + include/vectorial/simd4x4f_gnu.h include/vectorial/simd4x4f_sse.h include/vectorial/config.h + +$(BUILDDIR)/spec/spec_vec2f.o $(BUILDDIR)/spec/spec_vec3f.o $(BUILDDIR)/spec/spec_vec4f.o: \ + include/vectorial/simd4x4f.h include/vectorial/simd4f.h \ + include/vectorial/vec4f.h include/vectorial/vec3f.h include/vectorial/vec2f.h \ + include/vectorial/simd4f_scalar.h include/vectorial/simd4f_neon.h \ + include/vectorial/simd4f_gnu.h include/vectorial/simd4f_sse.h \ + include/vectorial/simd4x4f_scalar.h include/vectorial/simd4x4f_neon.h \ + include/vectorial/simd4x4f_gnu.h include/vectorial/simd4x4f_sse.h include/vectorial/config.h + + + + + +# DO NOT DELETE + +$(BUILDDIR)/spec/spec.o: spec/spec.h +$(BUILDDIR)/spec/spec_main.o: spec/spec.h +$(BUILDDIR)/spec/spec_mat4f.o: spec/spec_helper.h spec/spec.h +$(BUILDDIR)/spec/spec_mat4f.o: include/vectorial/simd4f.h +$(BUILDDIR)/spec/spec_mat4f.o: include/vectorial/config.h +$(BUILDDIR)/spec/spec_mat4f.o: include/vectorial/simd4f_gnu.h +$(BUILDDIR)/spec/spec_mat4f.o: include/vectorial/simd4f_common.h +$(BUILDDIR)/spec/spec_mat4f.o: include/vectorial/vec4f.h include/vectorial/vec3f.h +$(BUILDDIR)/spec/spec_mat4f.o: include/vectorial/vec2f.h +$(BUILDDIR)/spec/spec_mat4f.o: include/vectorial/simd4x4f.h +$(BUILDDIR)/spec/spec_mat4f.o: include/vectorial/simd4f.h +$(BUILDDIR)/spec/spec_mat4f.o: include/vectorial/simd4x4f_gnu.h +$(BUILDDIR)/spec/spec_mat4f.o: include/vectorial/mat4f.h +$(BUILDDIR)/spec/spec_simd4f.o: spec/spec_helper.h spec/spec.h +$(BUILDDIR)/spec/spec_simd4f.o: include/vectorial/simd4f.h +$(BUILDDIR)/spec/spec_simd4f.o: include/vectorial/config.h +$(BUILDDIR)/spec/spec_simd4f.o: include/vectorial/simd4f_gnu.h +$(BUILDDIR)/spec/spec_simd4f.o: include/vectorial/simd4f_common.h +$(BUILDDIR)/spec/spec_simd4f.o: include/vectorial/vec4f.h include/vectorial/vec3f.h +$(BUILDDIR)/spec/spec_simd4f.o: include/vectorial/vec2f.h +$(BUILDDIR)/spec/spec_simd4f.o: include/vectorial/simd4x4f.h +$(BUILDDIR)/spec/spec_simd4f.o: include/vectorial/simd4f.h +$(BUILDDIR)/spec/spec_simd4f.o: include/vectorial/simd4x4f_gnu.h +$(BUILDDIR)/spec/spec_simd4f.o: include/vectorial/mat4f.h +$(BUILDDIR)/spec/spec_simd4x4f.o: spec/spec_helper.h spec/spec.h +$(BUILDDIR)/spec/spec_simd4x4f.o: include/vectorial/simd4f.h +$(BUILDDIR)/spec/spec_simd4x4f.o: include/vectorial/config.h +$(BUILDDIR)/spec/spec_simd4x4f.o: include/vectorial/simd4f_gnu.h +$(BUILDDIR)/spec/spec_simd4x4f.o: include/vectorial/simd4f_common.h +$(BUILDDIR)/spec/spec_simd4x4f.o: include/vectorial/vec4f.h +$(BUILDDIR)/spec/spec_simd4x4f.o: include/vectorial/vec3f.h +$(BUILDDIR)/spec/spec_simd4x4f.o: include/vectorial/vec2f.h +$(BUILDDIR)/spec/spec_simd4x4f.o: include/vectorial/simd4x4f.h +$(BUILDDIR)/spec/spec_simd4x4f.o: include/vectorial/simd4f.h +$(BUILDDIR)/spec/spec_simd4x4f.o: include/vectorial/simd4x4f_gnu.h +$(BUILDDIR)/spec/spec_simd4x4f.o: include/vectorial/mat4f.h +$(BUILDDIR)/spec/spec_vec2f.o: spec/spec_helper.h spec/spec.h +$(BUILDDIR)/spec/spec_vec2f.o: include/vectorial/simd4f.h +$(BUILDDIR)/spec/spec_vec2f.o: include/vectorial/config.h +$(BUILDDIR)/spec/spec_vec2f.o: include/vectorial/simd4f_gnu.h +$(BUILDDIR)/spec/spec_vec2f.o: include/vectorial/simd4f_common.h +$(BUILDDIR)/spec/spec_vec2f.o: include/vectorial/vec4f.h include/vectorial/vec3f.h +$(BUILDDIR)/spec/spec_vec2f.o: include/vectorial/vec2f.h +$(BUILDDIR)/spec/spec_vec2f.o: include/vectorial/simd4x4f.h +$(BUILDDIR)/spec/spec_vec2f.o: include/vectorial/simd4f.h +$(BUILDDIR)/spec/spec_vec2f.o: include/vectorial/simd4x4f_gnu.h +$(BUILDDIR)/spec/spec_vec2f.o: include/vectorial/mat4f.h +$(BUILDDIR)/spec/spec_vec3f.o: spec/spec_helper.h spec/spec.h +$(BUILDDIR)/spec/spec_vec3f.o: include/vectorial/simd4f.h +$(BUILDDIR)/spec/spec_vec3f.o: include/vectorial/config.h +$(BUILDDIR)/spec/spec_vec3f.o: include/vectorial/simd4f_gnu.h +$(BUILDDIR)/spec/spec_vec3f.o: include/vectorial/simd4f_common.h +$(BUILDDIR)/spec/spec_vec3f.o: include/vectorial/vec4f.h include/vectorial/vec3f.h +$(BUILDDIR)/spec/spec_vec3f.o: include/vectorial/vec2f.h +$(BUILDDIR)/spec/spec_vec3f.o: include/vectorial/simd4x4f.h +$(BUILDDIR)/spec/spec_vec3f.o: include/vectorial/simd4f.h +$(BUILDDIR)/spec/spec_vec3f.o: include/vectorial/simd4x4f_gnu.h +$(BUILDDIR)/spec/spec_vec3f.o: include/vectorial/mat4f.h +$(BUILDDIR)/spec/spec_vec4f.o: spec/spec_helper.h spec/spec.h +$(BUILDDIR)/spec/spec_vec4f.o: include/vectorial/simd4f.h +$(BUILDDIR)/spec/spec_vec4f.o: include/vectorial/config.h +$(BUILDDIR)/spec/spec_vec4f.o: include/vectorial/simd4f_gnu.h +$(BUILDDIR)/spec/spec_vec4f.o: include/vectorial/simd4f_common.h +$(BUILDDIR)/spec/spec_vec4f.o: include/vectorial/vec4f.h include/vectorial/vec3f.h +$(BUILDDIR)/spec/spec_vec4f.o: include/vectorial/vec2f.h +$(BUILDDIR)/spec/spec_vec4f.o: include/vectorial/simd4x4f.h +$(BUILDDIR)/spec/spec_vec4f.o: include/vectorial/simd4f.h +$(BUILDDIR)/spec/spec_vec4f.o: include/vectorial/simd4x4f_gnu.h +$(BUILDDIR)/spec/spec_vec4f.o: include/vectorial/mat4f.h +$(BUILDDIR)/bench/add_bench.o: bench/bench.h include/vectorial/vec4f.h +$(BUILDDIR)/bench/bench.o: bench/bench.h include/vectorial/config.h +$(BUILDDIR)/bench/dot_bench.o: bench/bench.h include/vectorial/vec4f.h +$(BUILDDIR)/bench/matrix_bench.o: bench/bench.h include/vectorial/simd4x4f.h +$(BUILDDIR)/bench/matrix_bench.o: include/vectorial/simd4f.h +$(BUILDDIR)/bench/matrix_bench.o: include/vectorial/simd4x4f_gnu.h +$(BUILDDIR)/bench/quad_bench.o: bench/bench.h include/vectorial/simd4x4f.h +$(BUILDDIR)/bench/quad_bench.o: include/vectorial/simd4f.h +$(BUILDDIR)/bench/quad_bench.o: include/vectorial/simd4x4f_gnu.h diff --git a/3rdparty/vectorial/README b/3rdparty/vectorial/README new file mode 100644 index 0000000..86d06a8 --- /dev/null +++ b/3rdparty/vectorial/README @@ -0,0 +1,60 @@ + + Vectorial - vector math library + + + + Motivation + + I couldn't find an open source math library that was usable and + supported simd - especially the ARM NEON variant. + + + Features + + Supports NEON, SSE, scalar and generic gcc vector extension. + Most basic vector and matrix math is available, but not quite + yet full featured. + + + Design + + Vectorial consists of two main parts, pure-C wrapper around + platform-specific vector instructions in the simd*.h files + and C++ classes for common uses, the vec*.h and mat*.h + + The config.h autodetects approriate vector instructions to use. + + The platform-specific support is done with intrisincs only, + allowing the compiler to have a full view of the code, hopefully + resulting in better optimizations especially with reordering etc. + + + Installation / Usage + + Add vectorial/include to your include path + + #include "vectorial/simd4f.h" + for C-only simd wrapper, using it looks like this: + simd4f v = simd4f_normalize( simd4f_add( simd4f_create(1,2,3,4), y) ); + float z = simd4f_get_z(v); + + #include "vectorial/vectorial.h" + for C++ classes. They reside in vectorial namespace, you might + want to alias them to your own namespace + namespace myproject { + using namespace ::vectorial; + // if you like different name: typedef vec3f Vector3; + } + using myproject::vec4f; + + vec4f v = normalize( vec4f(1,2,3,4) + y ); + float z = v.z(); + + + License + + 2-clause BSD. See LICENSE + + + + diff --git a/3rdparty/vectorial/bench/add_bench.cpp b/3rdparty/vectorial/bench/add_bench.cpp new file mode 100644 index 0000000..481caf4 --- /dev/null +++ b/3rdparty/vectorial/bench/add_bench.cpp @@ -0,0 +1,60 @@ + +#include "bench.h" +#include + +#include +#include "vectorial/vec4f.h" + +#define NUM (81920) +#define ITER 100 +using namespace vectorial; + +namespace { + vec4f* alloc_vec4f(size_t n) { + void *ptr = memalign(n*sizeof(vec4f), 16); + return static_cast(ptr); + } +} + + + +static vec4f * a; +static vec4f * b; +static vec4f * c; + + + + +void add_func() { + + vec4f* vectorial_restrict aa = a; + vec4f* vectorial_restrict bb = b; + vec4f* vectorial_restrict cc = c; + + for(size_t i = 0; i < NUM; ++i) + { + cc[i] = aa[i] + bb[i]; + } +} + +void add_bench() { + + a = alloc_vec4f(NUM); + b = alloc_vec4f(NUM); + c = alloc_vec4f(NUM); + + + for(size_t i = 0; i < NUM; ++i) + { + a[i]=vec4f(i,i,i,i); + b[i]=vec4f(NUM-i, NUM-i, NUM-i, NUM-i); + } + + profile("add", add_func, ITER, NUM); + + memfree(a); + memfree(b); + memfree(c); + + +} diff --git a/3rdparty/vectorial/bench/bench.cpp b/3rdparty/vectorial/bench/bench.cpp new file mode 100644 index 0000000..fb0bb63 --- /dev/null +++ b/3rdparty/vectorial/bench/bench.cpp @@ -0,0 +1,117 @@ +#include "bench.h" +#include +#include +#include "vectorial/config.h" + + +namespace profiler { + + #ifdef BENCH_MACH + mach_timebase_info_data_t info; + void init() { + mach_timebase_info(&info); + } + #endif + + #ifdef BENCH_GTOD + void init() { + } + #endif + + #ifdef BENCH_QPC + double frequency; + void init() { + LARGE_INTEGER freq; + QueryPerformanceFrequency(&freq); + frequency = (double)freq.QuadPart; + } + #endif + + + time_t now() { + + #ifdef BENCH_MACH + return mach_absolute_time(); + #endif + + #ifdef BENCH_GTOD + time_t v; + gettimeofday(&v, NULL); + return v; + #endif + + #ifdef BENCH_QPC + LARGE_INTEGER v; + QueryPerformanceCounter(&v); + return v; + #endif + + } + + + double diffTime(time_t start, time_t end) { + + #ifdef BENCH_GTOD + return (end.tv_sec - start.tv_sec) + (end.tv_usec - start.tv_usec) / 1000000.0; + #endif + + #ifdef BENCH_MACH + return ((end-start) * info.numer / info.denom) / 1000000000.0; + #endif + + #ifdef BENCH_QPC + return (end.QuadPart - start.QuadPart) / frequency; + #endif + } + +} + + +std::string formatTime(double d, double relative ) { + const double sec = 1.0; + const double milli = 0.001; + const double micro = 0.000001; + const double nano = 0.000000001; + std::stringstream ss; + if( relative < 0.0) relative=d; + if( relative >= sec ) ss << d << "s"; + else if( relative >= milli ) ss << d/milli << "ms"; + else if( relative >= micro ) ss << d/micro <<"us"; + else ss << d/nano << "ns"; + return ss.str(); +} + +void profile(const char* name, void (*func)(), int iterations, int elements) { + + profiler::init(); + profiler::time_t start = profiler::now(); + for(int i = 0; i < iterations; ++i) + { + func(); + } + profiler::time_t end = profiler::now(); + + std::cout << "Using simd: " << VECTORIAL_SIMD_TYPE << std::endl; + std::cout << "Testing: " << name << std::endl; + std::cout << "Duration " << formatTime(profiler::diffTime(start,end)) << std::endl; + std::cout << "Per iter " << formatTime(profiler::diffTime(start,end) / iterations) << std::endl; + std::cout << "Per item " << formatTime(profiler::diffTime(start,end) / iterations / elements) << std::endl; + + +} + +void add_bench(); +void dot_bench(); +void quad_bench(); +void matrix_bench(); + +int main() { + +// add_bench(); +// dot_bench(); +// quad_bench(); + matrix_bench(); + + return 0; +} + diff --git a/3rdparty/vectorial/bench/bench.h b/3rdparty/vectorial/bench/bench.h new file mode 100644 index 0000000..ddefcce --- /dev/null +++ b/3rdparty/vectorial/bench/bench.h @@ -0,0 +1,65 @@ +#ifndef BENCH_H +#define BENCH_H + +#include +#include + +#ifdef __APPLE__ + #define BENCH_MACH + #include + #include +#elif defined(_WIN32) + #define BENCH_QPC + #define WIN32_LEAN_AND_MEAN + #include + #include +#else + #define BENCH_GTOD + #include +#endif + + +static void* memalign(size_t count, size_t align) { + #ifdef _WIN32 + return _aligned_malloc(count,align); + #else + void *ptr; + int e = posix_memalign(&ptr, align, count); + // if( e == EINVAL ) printf("EINVAL posix_memalign\n"); + // if( e == ENOMEM ) printf("ENOMEM posix_memalign\n"); + return ptr; + #endif +} + +static void memfree(void* ptr) { + #ifdef _WIN32 + _aligned_free(ptr); + #else + free(ptr); + #endif +} + +namespace profiler { + + #ifdef BENCH_GTOD + typedef struct timeval time_t; + #endif + #ifdef BENCH_MACH + typedef const uint64_t time_t; + #endif + #ifdef BENCH_QPC + typedef LARGE_INTEGER time_t; + #endif + + void init(); + time_t now(); + + double diffTime(time_t start, time_t end); + +} + +std::string formatTime(double d, double relative=-1); +void profile(const char* name, void (*func)(), int iterations, int elements); + + +#endif diff --git a/3rdparty/vectorial/bench/dot_bench.cpp b/3rdparty/vectorial/bench/dot_bench.cpp new file mode 100644 index 0000000..932dc6f --- /dev/null +++ b/3rdparty/vectorial/bench/dot_bench.cpp @@ -0,0 +1,60 @@ + +#include "bench.h" +#include + +#include +#include "vectorial/vec4f.h" + +#define NUM (81920) +#define ITER 100 +using namespace vectorial; + +namespace { + vec4f* alloc_vec4f(size_t n) { + void *ptr = memalign(n*sizeof(vec4f), 16); + return static_cast(ptr); + } +} + + + +static vec4f * a; +static vec4f * b; +static float * c; + + + + +void dot_func() { + + vec4f* vectorial_restrict aa = a; + vec4f* vectorial_restrict bb = b; + float* vectorial_restrict cc = c; + + for(size_t i = 0; i < NUM; ++i) + { + cc[i] = dot(aa[i], bb[i]); + } +} + +void dot_bench() { + + a = alloc_vec4f(NUM); + b = alloc_vec4f(NUM); + c = static_cast(malloc(NUM * sizeof(float))); + + + for(size_t i = 0; i < NUM; ++i) + { + a[i]=vec4f(i,i,i,i); + b[i]=vec4f(NUM-i, NUM-i, NUM-i, NUM-i); + } + + profile("dot", dot_func, ITER, NUM); + + memfree(a); + memfree(b); + memfree(c); + + +} diff --git a/3rdparty/vectorial/bench/matrix_bench.cpp b/3rdparty/vectorial/bench/matrix_bench.cpp new file mode 100644 index 0000000..d30b247 --- /dev/null +++ b/3rdparty/vectorial/bench/matrix_bench.cpp @@ -0,0 +1,62 @@ + +#include "bench.h" +#include + +#include +#include "vectorial/simd4x4f.h" + +#define NUM (819200) +#define ITER 100 +//using namespace vectorial; + +namespace { + simd4x4f* alloc_vec4x4f(size_t n) { + void *ptr = memalign(n*sizeof(simd4x4f), 16); + return static_cast(ptr); + } +} + + + +static simd4x4f * a; +static simd4x4f * b; +static simd4x4f * c; + + + + +void matrix_func() { + + simd4x4f* vectorial_restrict aa = a; + simd4x4f* vectorial_restrict bb = b; + simd4x4f* vectorial_restrict cc = c; + + for(size_t i = 0; i < NUM; ++i) + { + simd4x4f_matrix_mul(&aa[i], &bb[i], &bb[i]); + } +} + +void matrix_bench() { + + a = alloc_vec4x4f(NUM); + b = alloc_vec4x4f(NUM); + c = alloc_vec4x4f(NUM); + + + for(size_t i = 0; i < NUM; ++i) + { + simd4f v = simd4f_create(i,i,i,i); + simd4f vi = simd4f_create(NUM-i,NUM-i,NUM-i,NUM-i); + a[i]=simd4x4f_create(v,v,v,v); + b[i]=simd4x4f_create(vi,vi,vi,vi); + } + + profile("matrix mul", matrix_func, ITER, NUM); + + memfree(a); + memfree(b); + memfree(c); + + +} diff --git a/3rdparty/vectorial/bench/quad_bench.cpp b/3rdparty/vectorial/bench/quad_bench.cpp new file mode 100644 index 0000000..6930b43 --- /dev/null +++ b/3rdparty/vectorial/bench/quad_bench.cpp @@ -0,0 +1,123 @@ + +#include "bench.h" +#include + +#include +#include "vectorial/simd4x4f.h" + +#define NUM (81920) +#define ITER 100 +//using namespace vectorial; + +namespace { + simd4x4f* alloc_simd4x4f(size_t n) { + void *ptr = memalign(n*sizeof(simd4x4f), 16); + return static_cast(ptr); + } +} + + + +static simd4x4f * a; +static simd4x4f * b; +static simd4x4f * c; + + + +static simd4x4f add_4x4(SIMD_PARAM(simd4x4f, a), SIMD_PARAM(simd4x4f, b)) { + return simd4x4f_create( + simd4f_add(a.x, b.x), + simd4f_add(a.y, b.y), + simd4f_add(a.z, b.z), + simd4f_add(a.w, b.w) + ); +} + +static simd4x4f add_4x4_rp(simd4x4f *a, simd4x4f *b) { + return simd4x4f_create( + simd4f_add(a->x, b->x), + simd4f_add(a->y, b->y), + simd4f_add(a->z, b->z), + simd4f_add(a->w, b->w) + ); +} + + +static void add_4x4_p(simd4x4f *a, simd4x4f *b, simd4x4f *out) { + out->x = simd4f_add(a->x, b->x); + out->y = simd4f_add(a->y, b->y); + out->z = simd4f_add(a->z, b->z); + out->w = simd4f_add(a->w, b->w); +} + + + + +void quad_return_func() { + + + simd4x4f* aa = a; + simd4x4f* bb = b; + simd4x4f* cc = c; + + for(size_t i = 0; i < NUM; ++i) + { + bb[i] = add_4x4(aa[i], bb[i]); + } +} + + +void quad_pointer_func() { + + simd4x4f* aa = a; + simd4x4f* bb = b; + simd4x4f* cc = c; + + for(size_t i = 0; i < NUM; ++i) + { + add_4x4_p(&aa[i], &bb[i], &bb[i]); + } + + +} + +void quad_pointer_return_func() { + + simd4x4f* aa = a; + simd4x4f* bb = b; + simd4x4f* cc = c; + + for(size_t i = 0; i < NUM; ++i) + { + bb[i] = add_4x4_rp(&aa[i], &bb[i]); + } + + +} + + +void quad_bench() { + + a = alloc_simd4x4f(NUM); + b = alloc_simd4x4f(NUM); + c = alloc_simd4x4f(NUM); + + + for(size_t i = 0; i < NUM; ++i) + { + simd4f t = simd4f_create(i,i,i,i); + simd4f t2 = simd4f_create(NUM-i,NUM-i,NUM-i,NUM-i); + a[i]=simd4x4f_create(t,t,t,t); + b[i]=simd4x4f_create(t2,t2,t2,t2); + } + + profile("quad return-value", quad_return_func, ITER, NUM); + profile("quad pass-by-pointer", quad_pointer_func, ITER, NUM); + profile("quad pass-by-pointer return-value", quad_pointer_return_func, ITER, NUM); + + memfree(a); + memfree(b); + memfree(c); + + +} diff --git a/3rdparty/vectorial/include/vectorial/config.h b/3rdparty/vectorial/include/vectorial/config.h new file mode 100644 index 0000000..864a6d0 --- /dev/null +++ b/3rdparty/vectorial/include/vectorial/config.h @@ -0,0 +1,101 @@ +/* + Vectorial + Copyright (c) 2010 Mikko Lehtonen + Licensed under the terms of the two-clause BSD License (see LICENSE) +*/ +#ifndef VECTORIAL_CONFIG_H +#define VECTORIAL_CONFIG_H + + +#ifndef VECTORIAL_FORCED + #if defined(__SSE__) || (_M_IX86_FP > 0) || (_M_X64 > 0) + + #define VECTORIAL_SSE + + // __ARM_NEON is used instead of __ARM_NEON__ on armv8. + #elif defined(__ARM_NEON__) || defined(__ARM_NEON) + + #define VECTORIAL_NEON + + // Don't use gnu extension for arm, buggy with some gccs with armv6 and -Os, + // Also doesn't seem perform as well + #elif defined(__GNUC__) && !defined(__arm__) + + #define VECTORIAL_GNU + + #else + + #define VECTORIAL_SCALAR + + #endif +#endif + + + +#ifdef VECTORIAL_SCALAR + #define VECTORIAL_SIMD_TYPE "scalar" +#endif + +#ifdef VECTORIAL_SSE + #define VECTORIAL_SIMD_TYPE "sse" +#endif + +#ifdef VECTORIAL_NEON + #define VECTORIAL_SIMD_TYPE "neon" + #define VECTORIAL_HAVE_SIMD2F +#endif + +#ifdef VECTORIAL_GNU + #define VECTORIAL_SIMD_TYPE "gnu" +#endif + + + +#if defined(VECTORIAL_FORCED) && !defined(VECTORIAL_SIMD_TYPE) + #error VECTORIAL_FORCED set but no simd-type found, try f.ex. VECTORIAL_SCALAR +#endif + + +#define vectorial_inline static inline + +#if defined(__GNUC__) + #if defined(__cplusplus) + #define vectorial_restrict __restrict + #endif + #define simd4f_aligned16 __attribute__ ((aligned (16))) +#elif defined(_WIN32) + #define vectorial_restrict + #define simd4f_aligned16 __declspec(align(16)) +#else + #define vectorial_restrict restrict + #define simd4f_aligned16 +#endif +// #define vectorial_restrict + +#ifdef __GNUC__ + #define vectorial_pure __attribute__((pure)) +#else + #define vectorial_pure +#endif + +#ifdef _WIN32 + #if defined(min) || defined(max) +#pragma message ( "set NOMINMAX as preprocessor macro, undefining min/max " ) +#undef min +#undef max + #endif +#endif + +#ifdef __cplusplus + // Hack around msvc badness + #define SIMD_PARAM(t, p) const t& p +#else + #define SIMD_PARAM(t, p) t p +#endif + +#define VECTORIAL_PI 3.14159265f +#define VECTORIAL_HALFPI 1.57079633f + + + +#endif diff --git a/3rdparty/vectorial/include/vectorial/mat4f.h b/3rdparty/vectorial/include/vectorial/mat4f.h new file mode 100644 index 0000000..4e7b319 --- /dev/null +++ b/3rdparty/vectorial/include/vectorial/mat4f.h @@ -0,0 +1,197 @@ +/* + Vectorial + Copyright (c) 2010 Mikko Lehtonen + Licensed under the terms of the two-clause BSD License (see LICENSE) +*/ +#ifndef VECTORIAL_MAT4F_H +#define VECTORIAL_MAT4F_H + +#ifndef VECTORIAL_SIMD4X4F_H + #include "vectorial/simd4x4f.h" +#endif + +#ifndef VECTORIAL_VEC4F_H + #include "vectorial/vec4f.h" +#endif + + +namespace vectorial { + + + class mat4f { + public: + + simd4x4f value; + + inline mat4f() {} + inline mat4f(const mat4f& m) : value(m.value) {} + inline mat4f(const simd4x4f& v) : value(v) {} + inline mat4f(const vec4f& v0, const vec4f& v1, const vec4f& v2, const vec4f& v3) : value(simd4x4f_create(v0.value, v1.value, v2.value, v3.value)) {} + explicit inline mat4f(const float *ary) { simd4x4f_uload(&value, ary); } + + inline void load(const float *ary) { + value.x = simd4f_uload4(ary); + value.y = simd4f_uload4(ary+4); + value.z = simd4f_uload4(ary+8); + value.w = simd4f_uload4(ary+12); + } + + inline void store(float *ary) const { + simd4f_ustore4(value.x, ary); + simd4f_ustore4(value.y, ary+4); + simd4f_ustore4(value.z, ary+8); + simd4f_ustore4(value.w, ary+12); + } + + static mat4f identity() { mat4f m; simd4x4f_identity(&m.value); return m; } + + static mat4f perspective(float fovy, float aspect, float znear, float zfar) { + simd4x4f m; + simd4x4f_perspective(&m, fovy, aspect, znear, zfar); + return m; + } + + static mat4f ortho(float left, float right, float bottom, float top, float znear, float zfar) { + simd4x4f m; + simd4x4f_ortho(&m, left, right, bottom, top, znear, zfar); + return m; + } + + static mat4f lookAt(const vec3f& eye, const vec3f& center, const vec3f& up) { + simd4x4f m; + simd4x4f_lookat(&m, eye.value, center.value, up.value); + return m; + } + + static mat4f translation(const vec3f& pos) { + simd4x4f m; + simd4x4f_translation(&m, pos.x(), pos.y(), pos.z()); + return m; + } + + static mat4f axisRotation(float angle, const vec3f& axis) { + simd4x4f m; + simd4x4f_axis_rotation(&m, angle, axis.value); + return m; + } + + static mat4f scale(float scale) { + return simd4x4f_create( simd4f_create(scale,0,0,0), + simd4f_create(0,scale,0,0), + simd4f_create(0,0,scale,0), + simd4f_create(0,0,0,1) ); + } + + static mat4f scale(const vec3f& scale) { + return simd4x4f_create( simd4f_create(scale.x(),0,0,0), + simd4f_create(0,scale.y(),0,0), + simd4f_create(0,0,scale.z(),0), + simd4f_create(0,0,0,1) ); + } + + }; + + + vectorial_inline mat4f operator*(const mat4f& lhs, const mat4f& rhs) { + mat4f ret; + simd4x4f_matrix_mul(&lhs.value, &rhs.value, &ret.value); + return ret; + } + + vectorial_inline mat4f operator*=(mat4f& lhs, const mat4f& rhs) { + const simd4x4f tmp = lhs.value; + simd4x4f_matrix_mul(&tmp, &rhs.value, &lhs.value); + return lhs; + } + + + vectorial_inline vec4f operator*(const mat4f& lhs, const vec4f& rhs) { + vec4f ret; + simd4x4f_matrix_vector_mul(&lhs.value, &rhs.value, &ret.value); + return ret; + } + + vectorial_inline vec3f transformVector(const mat4f& lhs, const vec3f& rhs) { + vec3f ret; + simd4x4f_matrix_vector3_mul(&lhs.value, &rhs.value, &ret.value); + return ret; + } + + vectorial_inline vec4f transformVector(const mat4f& lhs, const vec4f& rhs) { + vec4f ret; + simd4x4f_matrix_vector_mul(&lhs.value, &rhs.value, &ret.value); + return ret; + } + + vectorial_inline vec3f transformPoint(const mat4f& lhs, const vec3f& rhs) { + vec3f ret; + simd4x4f_matrix_point3_mul(&lhs.value, &rhs.value, &ret.value); + return ret; + } + + vectorial_inline vec3f orthoInverseTransformPoint(const mat4f& lhs, const vec3f& rhs) { + vec3f ret; + simd4x4f_inv_ortho_matrix_point3_mul(&lhs.value, &rhs.value, &ret.value); + return ret; + } + + vectorial_inline vec3f orthoInverseTransformVector(const mat4f& lhs, const vec3f& rhs) { + vec3f ret; + simd4x4f_inv_ortho_matrix_vector3_mul(&lhs.value, &rhs.value, &ret.value); + return ret; + } + + + vectorial_inline mat4f transpose(const mat4f& m) { + mat4f ret; + simd4x4f_transpose(&m.value, &ret.value); + return ret; + } + + + vectorial_inline mat4f inverse(const mat4f& m) { + mat4f ret; + simd4x4f_inverse(&m.value, &ret.value); + return ret; + } + + + +} + + + +#ifdef VECTORIAL_OSTREAM +//#include + +vectorial_inline std::ostream& operator<<(std::ostream& os, const vectorial::mat4f& v) { + + os << "[ "; + os << simd4f_get_x(v.value.x) << ", "; + os << simd4f_get_x(v.value.y) << ", "; + os << simd4f_get_x(v.value.z) << ", "; + os << simd4f_get_x(v.value.w) << " ; "; + + os << simd4f_get_y(v.value.x) << ", "; + os << simd4f_get_y(v.value.y) << ", "; + os << simd4f_get_y(v.value.z) << ", "; + os << simd4f_get_y(v.value.w) << " ; "; + + os << simd4f_get_z(v.value.x) << ", "; + os << simd4f_get_z(v.value.y) << ", "; + os << simd4f_get_z(v.value.z) << ", "; + os << simd4f_get_z(v.value.w) << " ; "; + + os << simd4f_get_w(v.value.x) << ", "; + os << simd4f_get_w(v.value.y) << ", "; + os << simd4f_get_w(v.value.z) << ", "; + os << simd4f_get_w(v.value.w) << " ]"; + + return os; +} +#endif + + + + +#endif diff --git a/3rdparty/vectorial/include/vectorial/simd2f.h b/3rdparty/vectorial/include/vectorial/simd2f.h new file mode 100644 index 0000000..3af1959 --- /dev/null +++ b/3rdparty/vectorial/include/vectorial/simd2f.h @@ -0,0 +1,38 @@ +/* + Vectorial + Copyright (c) 2014 Google, Inc. + Licensed under the terms of the two-clause BSD License (see LICENSE) +*/ + +#ifndef VECTORIAL_SIMD2F_H +#define VECTORIAL_SIMD2F_H + +#include "vectorial/config.h" + +#if defined(VECTORIAL_NEON) + #include "simd2f_neon.h" +#else + #error No implementation defined +#endif + +#include "simd2f_common.h" + +#ifdef __cplusplus + + #ifdef VECTORIAL_OSTREAM + #include + + vectorial_inline std::ostream& operator<<(std::ostream& os, const simd2f& v) { + os << "simd2f(" << simd2f_get_x(v) << ", " + << simd2f_get_y(v) << ")"; + return os; + } + #endif + +#endif + + + + +#endif + diff --git a/3rdparty/vectorial/include/vectorial/simd2f_common.h b/3rdparty/vectorial/include/vectorial/simd2f_common.h new file mode 100644 index 0000000..e2046ee --- /dev/null +++ b/3rdparty/vectorial/include/vectorial/simd2f_common.h @@ -0,0 +1,22 @@ +/* + Vectorial + Copyright (c) 2014 Google + Licensed under the terms of the two-clause BSD License (see LICENSE) +*/ +#ifndef VECTORIAL_SIMD2F_COMMON_H +#define VECTORIAL_SIMD2F_COMMON_H + +vectorial_inline simd2f simd2f_length2(simd2f v) { + return simd2f_sqrt( simd2f_dot2(v,v) ); +} + +vectorial_inline simd2f simd2f_length2_squared(simd2f v) { + return simd2f_dot2(v,v); +} + +vectorial_inline simd2f simd2f_normalize2(simd2f a) { + simd2f invlen = simd2f_rsqrt( simd2f_dot2(a,a) ); + return simd2f_mul(a, invlen); +} + +#endif diff --git a/3rdparty/vectorial/include/vectorial/simd2f_neon.h b/3rdparty/vectorial/include/vectorial/simd2f_neon.h new file mode 100644 index 0000000..ca72e04 --- /dev/null +++ b/3rdparty/vectorial/include/vectorial/simd2f_neon.h @@ -0,0 +1,159 @@ +/* + Vectorial + Copyright (c) 2010 Mikko Lehtonen + Copyright (c) 2014 Google, Inc. + Licensed under the terms of the two-clause BSD License (see LICENSE) +*/ +#ifndef VECTORIAL_SIMD2F_NEON_H +#define VECTORIAL_SIMD2F_NEON_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + + +typedef float32x2_t simd2f; + +typedef union { + simd2f s ; + float f[2]; +} _simd2f_union; + + + +vectorial_inline simd2f simd2f_create(float x, float y) { + const float32_t d[2] = { x,y }; + simd2f s = vld1_f32(d); + return s; +} + +vectorial_inline simd2f simd2f_zero() { return vdup_n_f32(0.0f); } + +vectorial_inline simd2f simd2f_uload2(const float *ary) { + const float32_t* ary32 = (const float32_t*)ary; + simd2f s = vld1_f32(ary32); + return s; +} + +vectorial_inline void simd2f_ustore2(const simd2f val, float *ary) { + vst1_f32( (float32_t*)ary, val); +} + +vectorial_inline simd2f simd2f_splat(float v) { + simd2f s = vdup_n_f32(v); + return s; +} + +vectorial_inline simd2f simd2f_splat_x(simd2f v) { + simd2f ret = vdup_lane_f32(v, 0); + return ret; +} + +vectorial_inline simd2f simd2f_splat_y(simd2f v) { + simd2f ret = vdup_lane_f32(v, 1); + return ret; +} + +vectorial_inline simd2f simd2f_reciprocal(simd2f v) { + simd2f estimate = vrecpe_f32(v); + estimate = vmul_f32(vrecps_f32(estimate, v), estimate); + estimate = vmul_f32(vrecps_f32(estimate, v), estimate); + return estimate; +} + +vectorial_inline void simd2f_rsqrt_1iteration(const simd2f& v, simd2f& estimate) { + simd2f estimate2 = vmul_f32(estimate, v); + estimate = vmul_f32(estimate, vrsqrts_f32(estimate2, estimate)); +} + +vectorial_inline simd2f simd2f_rsqrt1(simd2f v) { + simd2f estimate = vrsqrte_f32(v); + simd2f_rsqrt_1iteration(v, estimate); + return estimate; +} + +vectorial_inline simd2f simd2f_rsqrt2(simd2f v) { + simd2f estimate = vrsqrte_f32(v); + simd2f_rsqrt_1iteration(v, estimate); + simd2f_rsqrt_1iteration(v, estimate); + return estimate; +} + +vectorial_inline simd2f simd2f_rsqrt3(simd2f v) { + simd2f estimate = vrsqrte_f32(v); + simd2f_rsqrt_1iteration(v, estimate); + simd2f_rsqrt_1iteration(v, estimate); + simd2f_rsqrt_1iteration(v, estimate); + return estimate; +} + +// http://en.wikipedia.org/wiki/Fast_inverse_square_root makes the argument for +// one iteration but two gives a signficant accuracy improvment. +vectorial_inline simd2f simd2f_rsqrt(simd2f v) { + return simd2f_rsqrt2(v); +} + +vectorial_inline simd2f simd2f_sqrt(simd2f v) { + + return vreinterpret_f32_u32(vand_u32( vtst_u32(vreinterpret_u32_f32(v), + vreinterpret_u32_f32(v)), + vreinterpret_u32_f32( + simd2f_reciprocal(simd2f_rsqrt(v))) + ) + ); + +} + +// arithmetics + +vectorial_inline simd2f simd2f_add(simd2f lhs, simd2f rhs) { + simd2f ret = vadd_f32(lhs, rhs); + return ret; +} + +vectorial_inline simd2f simd2f_sub(simd2f lhs, simd2f rhs) { + simd2f ret = vsub_f32(lhs, rhs); + return ret; +} + +vectorial_inline simd2f simd2f_mul(simd2f lhs, simd2f rhs) { + simd2f ret = vmul_f32(lhs, rhs); + return ret; +} + +vectorial_inline simd2f simd2f_div(simd2f lhs, simd2f rhs) { + simd2f recip = simd2f_reciprocal( rhs ); + simd2f ret = vmul_f32(lhs, recip); + return ret; +} + +vectorial_inline simd2f simd2f_madd(simd2f m1, simd2f m2, simd2f a) { + return vmla_f32( a, m1, m2 ); +} + +vectorial_inline float simd2f_get_x(simd2f s) { return vget_lane_f32(s, 0); } +vectorial_inline float simd2f_get_y(simd2f s) { return vget_lane_f32(s, 1); } + +vectorial_inline simd2f simd2f_dot2(simd2f lhs, simd2f rhs) { + const simd2f m = simd2f_mul(lhs, rhs); + return vpadd_f32(m, m); +} + +vectorial_inline simd2f simd2f_min(simd2f a, simd2f b) { + return vmin_f32( a, b ); +} + +vectorial_inline simd2f simd2f_max(simd2f a, simd2f b) { + return vmax_f32( a, b ); +} + + +#ifdef __cplusplus +} +#endif + + +#endif + diff --git a/3rdparty/vectorial/include/vectorial/simd4f.h b/3rdparty/vectorial/include/vectorial/simd4f.h new file mode 100644 index 0000000..81037b7 --- /dev/null +++ b/3rdparty/vectorial/include/vectorial/simd4f.h @@ -0,0 +1,51 @@ +/* + Vectorial + Copyright (c) 2010 Mikko Lehtonen + Licensed under the terms of the two-clause BSD License (see LICENSE) +*/ + +#ifndef VECTORIAL_SIMD4F_H +#define VECTORIAL_SIMD4F_H + +#ifndef VECTORIAL_CONFIG_H + #include "vectorial/config.h" +#endif + + +#ifdef VECTORIAL_SCALAR + #include "simd4f_scalar.h" +#elif defined(VECTORIAL_SSE) + #include "simd4f_sse.h" +#elif defined(VECTORIAL_GNU) + #include "simd4f_gnu.h" +#elif defined(VECTORIAL_NEON) + #include "simd4f_neon.h" +#else + #error No implementation defined +#endif + +#include "simd4f_common.h" + + + +#ifdef __cplusplus + + #ifdef VECTORIAL_OSTREAM + #include + + vectorial_inline std::ostream& operator<<(std::ostream& os, const simd4f& v) { + os << "simd4f(" << simd4f_get_x(v) << ", " + << simd4f_get_y(v) << ", " + << simd4f_get_z(v) << ", " + << simd4f_get_w(v) << ")"; + return os; + } + #endif + +#endif + + + + +#endif + diff --git a/3rdparty/vectorial/include/vectorial/simd4f_common.h b/3rdparty/vectorial/include/vectorial/simd4f_common.h new file mode 100644 index 0000000..f22111f --- /dev/null +++ b/3rdparty/vectorial/include/vectorial/simd4f_common.h @@ -0,0 +1,74 @@ +/* + Vectorial + Copyright (c) 2010 Mikko Lehtonen + Copyright (c) 2014 Google, Inc. + Licensed under the terms of the two-clause BSD License (see LICENSE) +*/ +#ifndef VECTORIAL_SIMD4F_COMMON_H +#define VECTORIAL_SIMD4F_COMMON_H + + +vectorial_inline simd4f simd4f_sum(simd4f v) { + const simd4f s1 = simd4f_add(simd4f_splat_x(v), simd4f_splat_y(v)); + const simd4f s2 = simd4f_add(s1, simd4f_splat_z(v)); + const simd4f s3 = simd4f_add(s2, simd4f_splat_w(v)); + return s3; +} + +vectorial_inline simd4f simd4f_dot4(simd4f lhs, simd4f rhs) { + return simd4f_sum( simd4f_mul(lhs, rhs) ); +} + +vectorial_inline simd4f simd4f_dot2(simd4f lhs, simd4f rhs) { + const simd4f m = simd4f_mul(lhs, rhs); + const simd4f s1 = simd4f_add(simd4f_splat_x(m), simd4f_splat_y(m)); + return s1; +} + + +vectorial_inline simd4f simd4f_length4(simd4f v) { + return simd4f_sqrt( simd4f_dot4(v,v) ); +} + +vectorial_inline simd4f simd4f_length3(simd4f v) { + return simd4f_sqrt( simd4f_dot3(v,v) ); +} + +vectorial_inline simd4f simd4f_length2(simd4f v) { + return simd4f_sqrt( simd4f_dot2(v,v) ); +} + +vectorial_inline simd4f simd4f_length4_squared(simd4f v) { + return simd4f_dot4(v,v); +} + +vectorial_inline simd4f simd4f_length3_squared(simd4f v) { + return simd4f_dot3(v,v); +} + +vectorial_inline float simd4f_length3_squared_scalar(simd4f v) { + return simd4f_dot3_scalar(v,v); +} + +vectorial_inline simd4f simd4f_length2_squared(simd4f v) { + return simd4f_dot2(v,v); +} + + +vectorial_inline simd4f simd4f_normalize4(simd4f a) { + simd4f invlen = simd4f_rsqrt( simd4f_dot4(a,a) ); + return simd4f_mul(a, invlen); +} + +vectorial_inline simd4f simd4f_normalize3(simd4f a) { + simd4f invlen = simd4f_rsqrt( simd4f_dot3(a,a) ); + return simd4f_mul(a, invlen); +} + +vectorial_inline simd4f simd4f_normalize2(simd4f a) { + simd4f invlen = simd4f_rsqrt( simd4f_dot2(a,a) ); + return simd4f_mul(a, invlen); +} + + +#endif diff --git a/3rdparty/vectorial/include/vectorial/simd4f_gnu.h b/3rdparty/vectorial/include/vectorial/simd4f_gnu.h new file mode 100644 index 0000000..4e48289 --- /dev/null +++ b/3rdparty/vectorial/include/vectorial/simd4f_gnu.h @@ -0,0 +1,225 @@ +/* + Vectorial + Copyright (c) 2010 Mikko Lehtonen + Licensed under the terms of the two-clause BSD License (see LICENSE) +*/ +#ifndef VECTORIAL_SIMD4F_GNU_H +#define VECTORIAL_SIMD4F_GNU_H + +#include +#include // memcpy + + +#ifdef __cplusplus +extern "C" { +#endif + + +typedef float simd4f __attribute__ ((vector_size (16))); + +typedef union { + simd4f s ; + float f[4]; +} _simd4f_union; + +vectorial_inline float simd4f_get_x(simd4f s) { _simd4f_union u={s}; return u.f[0]; } +vectorial_inline float simd4f_get_y(simd4f s) { _simd4f_union u={s}; return u.f[1]; } +vectorial_inline float simd4f_get_z(simd4f s) { _simd4f_union u={s}; return u.f[2]; } +vectorial_inline float simd4f_get_w(simd4f s) { _simd4f_union u={s}; return u.f[3]; } + + +vectorial_inline simd4f simd4f_create(float x, float y, float z, float w) { + simd4f s = { x, y, z, w }; + return s; +} + +vectorial_inline simd4f simd4f_zero() { return simd4f_create(0.0f, 0.0f, 0.0f, 0.0f); } + +vectorial_inline simd4f simd4f_uload4(const float *ary) { + simd4f s = { ary[0], ary[1], ary[2], ary[3] }; + return s; +} + +vectorial_inline simd4f simd4f_uload3(const float *ary) { + simd4f s = { ary[0], ary[1], ary[2], 0 }; + return s; +} + +vectorial_inline simd4f simd4f_uload2(const float *ary) { + simd4f s = { ary[0], ary[1], 0, 0 }; + return s; +} + + +vectorial_inline void simd4f_ustore4(const simd4f val, float *ary) { + memcpy(ary, &val, sizeof(float) * 4); +} + +vectorial_inline void simd4f_ustore3(const simd4f val, float *ary) { + memcpy(ary, &val, sizeof(float) * 3); +} + +vectorial_inline void simd4f_ustore2(const simd4f val, float *ary) { + memcpy(ary, &val, sizeof(float) * 2); +} + + +vectorial_inline simd4f simd4f_splat(float v) { + simd4f s = { v, v, v, v }; + return s; +} + +vectorial_inline simd4f simd4f_splat_x(simd4f v) { + float s = simd4f_get_x(v); + simd4f ret = { s, s, s, s }; + return ret; +} + +vectorial_inline simd4f simd4f_splat_y(simd4f v) { + float s = simd4f_get_y(v); + simd4f ret = { s, s, s, s }; + return ret; +} + +vectorial_inline simd4f simd4f_splat_z(simd4f v) { + float s = simd4f_get_z(v); + simd4f ret = { s, s, s, s }; + return ret; +} + +vectorial_inline simd4f simd4f_splat_w(simd4f v) { + float s = simd4f_get_w(v); + simd4f ret = { s, s, s, s }; + return ret; +} + +vectorial_inline simd4f simd4f_reciprocal(simd4f v) { + return simd4f_splat(1.0f) / v; +} + +vectorial_inline simd4f simd4f_sqrt(simd4f v) { + simd4f ret = { sqrtf(simd4f_get_x(v)), sqrtf(simd4f_get_y(v)), sqrtf(simd4f_get_z(v)), sqrtf(simd4f_get_w(v)) }; + return ret; +} + +vectorial_inline simd4f simd4f_rsqrt(simd4f v) { + return simd4f_splat(1.0f) / simd4f_sqrt(v); +} + + + +vectorial_inline simd4f simd4f_add(simd4f lhs, simd4f rhs) { + simd4f ret = lhs + rhs; + return ret; +} + +vectorial_inline simd4f simd4f_sub(simd4f lhs, simd4f rhs) { + simd4f ret = lhs - rhs; + return ret; +} + +vectorial_inline simd4f simd4f_mul(simd4f lhs, simd4f rhs) { + simd4f ret = lhs * rhs; + return ret; +} + +vectorial_inline simd4f simd4f_div(simd4f lhs, simd4f rhs) { + simd4f ret = lhs / rhs; + return ret; +} + +vectorial_inline simd4f simd4f_madd(simd4f m1, simd4f m2, simd4f a) { + return simd4f_add( simd4f_mul(m1, m2), a ); +} + +vectorial_inline float simd4f_dot3_scalar(simd4f lhs, simd4f rhs) { + _simd4f_union l = {lhs}; + _simd4f_union r = {rhs}; + return l.f[0] * r.f[0] + l.f[1] * r.f[1] + l.f[2] * r.f[2]; +} + +vectorial_inline simd4f simd4f_dot3(simd4f lhs, simd4f rhs) { + return simd4f_splat( simd4f_dot3_scalar(lhs, rhs) ); +} + +vectorial_inline simd4f simd4f_cross3(simd4f l, simd4f r) { + _simd4f_union lhs = {l}; + _simd4f_union rhs = {r}; + + return simd4f_create( lhs.f[1] * rhs.f[2] - lhs.f[2] * rhs.f[1], + lhs.f[2] * rhs.f[0] - lhs.f[0] * rhs.f[2], + lhs.f[0] * rhs.f[1] - lhs.f[1] * rhs.f[0], 0); +} + + +vectorial_inline simd4f simd4f_shuffle_wxyz(simd4f s) { + _simd4f_union u = {s}; + return simd4f_create(u.f[3], u.f[0], u.f[1], u.f[2]); +} + +vectorial_inline simd4f simd4f_shuffle_zwxy(simd4f s) { + _simd4f_union u = {s}; + return simd4f_create(u.f[2], u.f[3], u.f[0], u.f[1]); +} + +vectorial_inline simd4f simd4f_shuffle_yzwx(simd4f s) { + _simd4f_union u = {s}; + return simd4f_create(u.f[1], u.f[2], u.f[3], u.f[0]); +} + + +vectorial_inline simd4f simd4f_zero_w(simd4f s) { + _simd4f_union u = {s}; + return simd4f_create(u.f[0], u.f[1], u.f[2], 0.0f); +} + +vectorial_inline simd4f simd4f_zero_zw(simd4f s) { + _simd4f_union u = {s}; + return simd4f_create(u.f[0], u.f[1], 0.0f, 0.0f); +} + + +vectorial_inline simd4f simd4f_merge_high(simd4f abcd, simd4f xyzw) { + _simd4f_union u1 = {abcd}; + _simd4f_union u2 = {xyzw}; + return simd4f_create(u1.f[2], u1.f[3], u2.f[2], u2.f[3]); +} + +vectorial_inline simd4f simd4f_flip_sign_0101(simd4f s) { + _simd4f_union u = {s}; + return simd4f_create(u.f[0], -u.f[1], u.f[2], -u.f[3]); +} + +vectorial_inline simd4f simd4f_flip_sign_1010(simd4f s) { + _simd4f_union u = {s}; + return simd4f_create(-u.f[0], u.f[1], -u.f[2], u.f[3]); +} + + +vectorial_inline simd4f simd4f_min(simd4f a, simd4f b) { + _simd4f_union ua = {a}; + _simd4f_union ub = {b}; + return simd4f_create( ua.f[0] < ub.f[0] ? ua.f[0] : ub.f[0], + ua.f[1] < ub.f[1] ? ua.f[1] : ub.f[1], + ua.f[2] < ub.f[2] ? ua.f[2] : ub.f[2], + ua.f[3] < ub.f[3] ? ua.f[3] : ub.f[3] ); +} + +vectorial_inline simd4f simd4f_max(simd4f a, simd4f b) { + _simd4f_union ua = {a}; + _simd4f_union ub = {b}; + return simd4f_create( ua.f[0] > ub.f[0] ? ua.f[0] : ub.f[0], + ua.f[1] > ub.f[1] ? ua.f[1] : ub.f[1], + ua.f[2] > ub.f[2] ? ua.f[2] : ub.f[2], + ua.f[3] > ub.f[3] ? ua.f[3] : ub.f[3] ); +} + + + +#ifdef __cplusplus +} +#endif + + +#endif + diff --git a/3rdparty/vectorial/include/vectorial/simd4f_neon.h b/3rdparty/vectorial/include/vectorial/simd4f_neon.h new file mode 100644 index 0000000..8ec7d7c --- /dev/null +++ b/3rdparty/vectorial/include/vectorial/simd4f_neon.h @@ -0,0 +1,280 @@ +/* + Vectorial + Copyright (c) 2010 Mikko Lehtonen + Copyright (c) 2014 Google, Inc. + Licensed under the terms of the two-clause BSD License (see LICENSE) +*/ +#ifndef VECTORIAL_SIMD4F_NEON_H +#define VECTORIAL_SIMD4F_NEON_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + + +typedef float32x4_t simd4f; +typedef float32x2_t simd2f; + +typedef union { + simd4f s ; + float f[4]; +} _simd4f_union; + + + +vectorial_inline simd4f simd4f_create(float x, float y, float z, float w) { + const float32_t d[4] = { x,y,z,w }; + simd4f s = vld1q_f32(d); + return s; +} + +vectorial_inline simd4f simd4f_zero() { return vdupq_n_f32(0.0f); } + +vectorial_inline simd4f simd4f_uload4(const float *ary) { + const float32_t* ary32 = (const float32_t*)ary; + simd4f s = vld1q_f32(ary32); + return s; +} + +vectorial_inline simd4f simd4f_uload3(const float *ary) { + simd4f s = simd4f_create(ary[0], ary[1], ary[2], 0); + return s; +} + +vectorial_inline simd4f simd4f_uload2(const float *ary) { + const float32_t* ary32 = (const float32_t*)ary; + float32x2_t low = vld1_f32(ary32); + const float32_t zero = 0; + float32x2_t high = vld1_dup_f32(&zero); // { 0,0 } but stupid warnings from llvm-gcc + return vcombine_f32(low, high); +} + + +vectorial_inline void simd4f_ustore4(const simd4f val, float *ary) { + vst1q_f32( (float32_t*)ary, val); +} + +vectorial_inline void simd4f_ustore3(const simd4f val, float *ary) { + float* local_data = ary; + vst1q_lane_f32(local_data++, val, 0); + vst1q_lane_f32(local_data++, val, 1); + vst1q_lane_f32(local_data, val, 2); +} + +vectorial_inline void simd4f_ustore2(const simd4f val, float *ary) { + const float32x2_t low = vget_low_f32(val); + vst1_f32( (float32_t*)ary, low); +} + + + + +vectorial_inline simd4f simd4f_splat(float v) { + simd4f s = vdupq_n_f32(v); + return s; +} + +// todo: or is simd4f_splat(simd4f_get_x(v)) better? + +vectorial_inline simd4f simd4f_splat_x(simd4f v) { + float32x2_t o = vget_low_f32(v); + simd4f ret = vdupq_lane_f32(o, 0); + return ret; +} + +vectorial_inline simd4f simd4f_splat_y(simd4f v) { + float32x2_t o = vget_low_f32(v); + simd4f ret = vdupq_lane_f32(o, 1); + return ret; +} + +vectorial_inline simd4f simd4f_splat_z(simd4f v) { + float32x2_t o = vget_high_f32(v); + simd4f ret = vdupq_lane_f32(o, 0); + return ret; +} + +vectorial_inline simd4f simd4f_splat_w(simd4f v) { + float32x2_t o = vget_high_f32(v); + simd4f ret = vdupq_lane_f32(o, 1); + return ret; +} + +vectorial_inline simd4f simd4f_reciprocal(simd4f v) { + simd4f estimate = vrecpeq_f32(v); + estimate = vmulq_f32(vrecpsq_f32(estimate, v), estimate); + estimate = vmulq_f32(vrecpsq_f32(estimate, v), estimate); + return estimate; +} + +vectorial_inline void simd4f_rsqrt_1iteration(const simd4f& v, simd4f& estimate) { + simd4f estimate2 = vmulq_f32(estimate, v); + estimate = vmulq_f32(estimate, vrsqrtsq_f32(estimate2, estimate)); +} + +vectorial_inline simd4f simd4f_rsqrt1(simd4f v) { + simd4f estimate = vrsqrteq_f32(v); + simd4f_rsqrt_1iteration(v, estimate); + return estimate; +} + +vectorial_inline simd4f simd4f_rsqrt2(simd4f v) { + simd4f estimate = vrsqrteq_f32(v); + simd4f_rsqrt_1iteration(v, estimate); + simd4f_rsqrt_1iteration(v, estimate); + return estimate; +} + +vectorial_inline simd4f simd4f_rsqrt3(simd4f v) { + simd4f estimate = vrsqrteq_f32(v); + simd4f_rsqrt_1iteration(v, estimate); + simd4f_rsqrt_1iteration(v, estimate); + simd4f_rsqrt_1iteration(v, estimate); + return estimate; +} + +// http://en.wikipedia.org/wiki/Fast_inverse_square_root makes the argument for +// one iteration but two gives a signficant accuracy improvment. +vectorial_inline simd4f simd4f_rsqrt(simd4f v) { + return simd4f_rsqrt2(v); +} + +vectorial_inline simd4f simd4f_sqrt(simd4f v) { + + return vreinterpretq_f32_u32(vandq_u32( vtstq_u32(vreinterpretq_u32_f32(v), + vreinterpretq_u32_f32(v)), + vreinterpretq_u32_f32( + simd4f_reciprocal(simd4f_rsqrt(v))) + ) + ); + +} + + + +// arithmetics + +vectorial_inline simd4f simd4f_add(simd4f lhs, simd4f rhs) { + simd4f ret = vaddq_f32(lhs, rhs); + return ret; +} + +vectorial_inline simd4f simd4f_sub(simd4f lhs, simd4f rhs) { + simd4f ret = vsubq_f32(lhs, rhs); + return ret; +} + +vectorial_inline simd4f simd4f_mul(simd4f lhs, simd4f rhs) { + simd4f ret = vmulq_f32(lhs, rhs); + return ret; +} + +vectorial_inline simd4f simd4f_div(simd4f lhs, simd4f rhs) { + simd4f recip = simd4f_reciprocal( rhs ); + simd4f ret = vmulq_f32(lhs, recip); + return ret; +} + +vectorial_inline simd4f simd4f_madd(simd4f m1, simd4f m2, simd4f a) { + return vmlaq_f32( a, m1, m2 ); +} + + + +vectorial_inline float simd4f_get_x(simd4f s) { return vgetq_lane_f32(s, 0); } +vectorial_inline float simd4f_get_y(simd4f s) { return vgetq_lane_f32(s, 1); } +vectorial_inline float simd4f_get_z(simd4f s) { return vgetq_lane_f32(s, 2); } +vectorial_inline float simd4f_get_w(simd4f s) { return vgetq_lane_f32(s, 3); } + +// This function returns x*x+y*y+z*z and ignores the w component. +vectorial_inline float simd4f_dot3_scalar(simd4f lhs, simd4f rhs) { + const simd4f m = simd4f_mul(lhs, rhs); + simd2f s1 = vpadd_f32(vget_low_f32(m), vget_low_f32(m)); + s1 = vadd_f32(s1, vget_high_f32(m)); + return vget_lane_f32(s1, 0); +} + +vectorial_inline simd4f simd4f_dot3(simd4f lhs, simd4f rhs) { + return simd4f_splat(simd4f_dot3_scalar(lhs, rhs)); +} + +vectorial_inline simd4f simd4f_cross3(simd4f lhs, simd4f rhs) { + // Compute lhs and rhs in order yzx + simd2f lhs_low = vget_low_f32(lhs); + simd2f rhs_low = vget_low_f32(rhs); + simd4f lhs_yzx = vcombine_f32(vext_f32(lhs_low, vget_high_f32(lhs),1), lhs_low); + simd4f rhs_yzx = vcombine_f32(vext_f32(rhs_low, vget_high_f32(rhs),1), rhs_low); + // Compute cross in order zxy + simd4f s3 = simd4f_sub(simd4f_mul(rhs_yzx, lhs), simd4f_mul(lhs_yzx, rhs)); + // Permute cross to order xyz and zero out the fourth value + simd2f low = vget_low_f32(s3); + static const uint32_t mask_array[] = { + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0}; + static const int32x4_t mask = vld1q_s32((const int32_t*)mask_array); + s3 = vcombine_f32(vext_f32(low, vget_high_f32(s3), 1), low); + return (simd4f)vandq_s32((int32x4_t)s3,mask); +} + +vectorial_inline simd4f simd4f_shuffle_wxyz(simd4f s) { + _simd4f_union u = {s}; + return simd4f_create( u.f[3], u.f[0], u.f[1], u.f[2]); +} + +vectorial_inline simd4f simd4f_shuffle_zwxy(simd4f s) { + _simd4f_union u = {s}; + return simd4f_create(u.f[2], u.f[3], u.f[0], u.f[1]); +} + +vectorial_inline simd4f simd4f_shuffle_yzwx(simd4f s) { + _simd4f_union u = {s}; + return simd4f_create(u.f[1], u.f[2], u.f[3], u.f[0]); +} + + +vectorial_inline simd4f simd4f_zero_w(simd4f s) { + _simd4f_union u = {s}; + return simd4f_create(u.f[0], u.f[1], u.f[2], 0.0f); +} + +vectorial_inline simd4f simd4f_zero_zw(simd4f s) { + _simd4f_union u = {s}; + return simd4f_create(u.f[0], u.f[1], 0.0f, 0.0f); +} + + +vectorial_inline simd4f simd4f_merge_high(simd4f xyzw, simd4f abcd) { + _simd4f_union u1 = {xyzw}; + _simd4f_union u2 = {abcd}; + return simd4f_create(u1.f[2], u1.f[3], u2.f[2], u2.f[3]); +} + +vectorial_inline simd4f simd4f_flip_sign_0101(simd4f s) { + const unsigned int upnpn[4] = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 }; + const uint32x4_t pnpn = vld1q_u32( upnpn ); + return vreinterpretq_f32_u32( veorq_u32( vreinterpretq_u32_f32(s), pnpn ) ); +} + +vectorial_inline simd4f simd4f_flip_sign_1010(simd4f s) { + const unsigned int unpnp[4] = { 0x80000000, 0x00000000, 0x80000000, 0x00000000 }; + const uint32x4_t npnp = vld1q_u32( unpnp ); + return vreinterpretq_f32_u32( veorq_u32( vreinterpretq_u32_f32(s), npnp ) ); +} + + +vectorial_inline simd4f simd4f_min(simd4f a, simd4f b) { + return vminq_f32( a, b ); +} + +vectorial_inline simd4f simd4f_max(simd4f a, simd4f b) { + return vmaxq_f32( a, b ); +} + + +#ifdef __cplusplus +} +#endif + + +#endif diff --git a/3rdparty/vectorial/include/vectorial/simd4f_scalar.h b/3rdparty/vectorial/include/vectorial/simd4f_scalar.h new file mode 100644 index 0000000..3a3ea8c --- /dev/null +++ b/3rdparty/vectorial/include/vectorial/simd4f_scalar.h @@ -0,0 +1,199 @@ +/* + Vectorial + Copyright (c) 2010 Mikko Lehtonen + Licensed under the terms of the two-clause BSD License (see LICENSE) +*/ +#ifndef VECTORIAL_SIMD4F_SCALAR_H +#define VECTORIAL_SIMD4F_SCALAR_H + +#include +#include // memcpy + +#ifdef __cplusplus +extern "C" { +#endif + + +typedef struct { + float x; + float y; + float z; + float w; +} simd4f; + + + +vectorial_inline simd4f simd4f_create(float x, float y, float z, float w) { + simd4f s = { x, y, z, w }; + return s; +} + +vectorial_inline simd4f simd4f_zero() { return simd4f_create(0.0f, 0.0f, 0.0f, 0.0f); } + +vectorial_inline simd4f simd4f_uload4(const float *ary) { + simd4f s = { ary[0], ary[1], ary[2], ary[3] }; + return s; +} + +vectorial_inline simd4f simd4f_uload3(const float *ary) { + simd4f s = { ary[0], ary[1], ary[2], 0 }; + return s; +} + +vectorial_inline simd4f simd4f_uload2(const float *ary) { + simd4f s = { ary[0], ary[1], 0, 0 }; + return s; +} + + +vectorial_inline void simd4f_ustore4(const simd4f val, float *ary) { + memcpy(ary, &val, sizeof(float) * 4); +} + +vectorial_inline void simd4f_ustore3(const simd4f val, float *ary) { + memcpy(ary, &val, sizeof(float) * 3); +} + +vectorial_inline void simd4f_ustore2(const simd4f val, float *ary) { + memcpy(ary, &val, sizeof(float) * 2); +} + + + +// utilities +vectorial_inline simd4f simd4f_splat(float v) { + simd4f s = { v, v, v, v }; + return s; +} + +vectorial_inline simd4f simd4f_splat_x(simd4f v) { + simd4f s = { v.x, v.x, v.x, v.x }; + return s; +} + +vectorial_inline simd4f simd4f_splat_y(simd4f v) { + simd4f s = { v.y, v.y, v.y, v.y }; + return s; +} + +vectorial_inline simd4f simd4f_splat_z(simd4f v) { + simd4f s = { v.z, v.z, v.z, v.z }; + return s; +} + +vectorial_inline simd4f simd4f_splat_w(simd4f v) { + simd4f s = { v.w, v.w, v.w, v.w }; + return s; +} + +vectorial_inline simd4f simd4f_reciprocal(simd4f v) { + simd4f s = { 1.0f/v.x, 1.0f/v.y, 1.0f/v.z, 1.0f/v.w }; + return s; +} + +vectorial_inline simd4f simd4f_sqrt(simd4f v) { + simd4f s = { sqrtf(v.x), sqrtf(v.y), sqrtf(v.z), sqrtf(v.w) }; + return s; +} + +vectorial_inline simd4f simd4f_rsqrt(simd4f v) { + simd4f s = { 1.0f/sqrtf(v.x), 1.0f/sqrtf(v.y), 1.0f/sqrtf(v.z), 1.0f/sqrtf(v.w) }; + return s; +} + + +// arithmetic + +vectorial_inline simd4f simd4f_add(simd4f lhs, simd4f rhs) { + simd4f ret = { lhs.x + rhs.x, lhs.y + rhs.y, lhs.z + rhs.z, lhs.w + rhs.w }; + return ret; +} + +vectorial_inline simd4f simd4f_sub(simd4f lhs, simd4f rhs) { + simd4f ret = { lhs.x - rhs.x, lhs.y - rhs.y, lhs.z - rhs.z, lhs.w - rhs.w }; + return ret; +} + +vectorial_inline simd4f simd4f_mul(simd4f lhs, simd4f rhs) { + simd4f ret = { lhs.x * rhs.x, lhs.y * rhs.y, lhs.z * rhs.z, lhs.w * rhs.w }; + return ret; +} + +vectorial_inline simd4f simd4f_div(simd4f lhs, simd4f rhs) { + simd4f ret = { lhs.x / rhs.x, lhs.y / rhs.y, lhs.z / rhs.z, lhs.w / rhs.w }; + return ret; +} + +vectorial_inline simd4f simd4f_madd(simd4f m1, simd4f m2, simd4f a) { + return simd4f_add( simd4f_mul(m1, m2), a ); +} + +vectorial_inline float simd4f_dot3_scalar(simd4f lhs, simd4f rhs) { + return lhs.x * rhs.x + lhs.y * rhs.y + lhs.z * rhs.z; +} + +vectorial_inline simd4f simd4f_dot3(simd4f lhs, simd4f rhs) { + return simd4f_splat( simd4f_dot3_scalar(lhs, rhs) ); +} + +vectorial_inline simd4f simd4f_cross3(simd4f lhs, simd4f rhs) { + return simd4f_create( lhs.y * rhs.z - lhs.z * rhs.y, + lhs.z * rhs.x - lhs.x * rhs.z, + lhs.x * rhs.y - lhs.y * rhs.x, 0); +} + + +vectorial_inline float simd4f_get_x(simd4f s) { return s.x; } +vectorial_inline float simd4f_get_y(simd4f s) { return s.y; } +vectorial_inline float simd4f_get_z(simd4f s) { return s.z; } +vectorial_inline float simd4f_get_w(simd4f s) { return s.w; } + + +vectorial_inline simd4f simd4f_shuffle_wxyz(simd4f s) { return simd4f_create(s.w, s.x, s.y, s.z); } +vectorial_inline simd4f simd4f_shuffle_zwxy(simd4f s) { return simd4f_create(s.z, s.w, s.x, s.y); } +vectorial_inline simd4f simd4f_shuffle_yzwx(simd4f s) { return simd4f_create(s.y, s.z, s.w, s.x); } + + +vectorial_inline simd4f simd4f_zero_w(simd4f s) { + return simd4f_create(s.x, s.y, s.z, 0.0f); +} + +vectorial_inline simd4f simd4f_zero_zw(simd4f s) { + return simd4f_create(s.x, s.y, 0.0f, 0.0f); +} + + +vectorial_inline simd4f simd4f_merge_high(simd4f abcd, simd4f xyzw) { + return simd4f_create(abcd.z, abcd.w, xyzw.z, xyzw.w); +} + +vectorial_inline simd4f simd4f_flip_sign_0101(simd4f s) { + return simd4f_create(s.x, -s.y, s.z, -s.w); +} + +vectorial_inline simd4f simd4f_flip_sign_1010(simd4f s) { + return simd4f_create(-s.x, s.y, -s.z, s.w); +} + +vectorial_inline simd4f simd4f_min(simd4f a, simd4f b) { + return simd4f_create( a.x < b.x ? a.x : b.x, + a.y < b.y ? a.y : b.y, + a.z < b.z ? a.z : b.z, + a.w < b.w ? a.w : b.w ); +} + +vectorial_inline simd4f simd4f_max(simd4f a, simd4f b) { + return simd4f_create( a.x > b.x ? a.x : b.x, + a.y > b.y ? a.y : b.y, + a.z > b.z ? a.z : b.z, + a.w > b.w ? a.w : b.w ); +} + + +#ifdef __cplusplus +} +#endif + + +#endif + diff --git a/3rdparty/vectorial/include/vectorial/simd4f_sse.h b/3rdparty/vectorial/include/vectorial/simd4f_sse.h new file mode 100644 index 0000000..c5684b9 --- /dev/null +++ b/3rdparty/vectorial/include/vectorial/simd4f_sse.h @@ -0,0 +1,236 @@ +/* + Vectorial + Copyright (c) 2010 Mikko Lehtonen + Copyright (c) 2014 Google, Inc. + Licensed under the terms of the two-clause BSD License (see LICENSE) +*/ +#ifndef VECTORIAL_SIMD4F_SSE_H +#define VECTORIAL_SIMD4F_SSE_H + +// Conditionally enable SSE4.1 otherwise fallback to SSE. +#if defined(_M_IX86_FP) + #if _M_IX86_FP >=2 + #define VECTORIAL_USE_SSE4_1 + #endif +#elif defined(__SSE4_1__) + #define VECTORIAL_USE_SSE4_1 +#endif + +#include +#if defined(VECTORIAL_USE_SSE4_1) + #include +#endif +#include // memcpy + +#ifdef __cplusplus +extern "C" { +#endif + + +typedef __m128 simd4f; + +typedef union { + simd4f s ; + float f[4]; + unsigned int ui[4]; +} _simd4f_union; + +// creating + +vectorial_inline simd4f simd4f_create(float x, float y, float z, float w) { + simd4f s = { x, y, z, w }; + return s; +} + +vectorial_inline simd4f simd4f_zero() { return _mm_setzero_ps(); } + +vectorial_inline simd4f simd4f_uload4(const float *ary) { + simd4f s = _mm_loadu_ps(ary); + return s; +} + +vectorial_inline simd4f simd4f_uload3(const float *ary) { + simd4f s = simd4f_create(ary[0], ary[1], ary[2], 0); + return s; +} + +vectorial_inline simd4f simd4f_uload2(const float *ary) { + simd4f s = simd4f_create(ary[0], ary[1], 0, 0); + return s; +} + + +vectorial_inline void simd4f_ustore4(const simd4f val, float *ary) { + _mm_storeu_ps(ary, val); +} + +vectorial_inline void simd4f_ustore3(const simd4f val, float *ary) { + memcpy(ary, &val, sizeof(float) * 3); +} + +vectorial_inline void simd4f_ustore2(const simd4f val, float *ary) { + memcpy(ary, &val, sizeof(float) * 2); +} + + +// utilites + +vectorial_inline simd4f simd4f_splat(float v) { + simd4f s = _mm_set1_ps(v); + return s; +} + +vectorial_inline simd4f simd4f_splat_x(simd4f v) { + simd4f s = _mm_shuffle_ps(v, v, _MM_SHUFFLE(0,0,0,0)); + return s; +} + +vectorial_inline simd4f simd4f_splat_y(simd4f v) { + simd4f s = _mm_shuffle_ps(v, v, _MM_SHUFFLE(1,1,1,1)); + return s; +} + +vectorial_inline simd4f simd4f_splat_z(simd4f v) { + simd4f s = _mm_shuffle_ps(v, v, _MM_SHUFFLE(2,2,2,2)); + return s; +} + +vectorial_inline simd4f simd4f_splat_w(simd4f v) { + simd4f s = _mm_shuffle_ps(v, v, _MM_SHUFFLE(3,3,3,3)); + return s; +} + + +// arithmetic + +vectorial_inline simd4f simd4f_add(simd4f lhs, simd4f rhs) { + simd4f ret = _mm_add_ps(lhs, rhs); + return ret; +} + +vectorial_inline simd4f simd4f_sub(simd4f lhs, simd4f rhs) { + simd4f ret = _mm_sub_ps(lhs, rhs); + return ret; +} + +vectorial_inline simd4f simd4f_mul(simd4f lhs, simd4f rhs) { + simd4f ret = _mm_mul_ps(lhs, rhs); + return ret; +} + +vectorial_inline simd4f simd4f_div(simd4f lhs, simd4f rhs) { + simd4f ret = _mm_div_ps(lhs, rhs); + return ret; +} + +vectorial_inline simd4f simd4f_madd(simd4f m1, simd4f m2, simd4f a) { + return simd4f_add( simd4f_mul(m1, m2), a ); +} + + + + +vectorial_inline simd4f simd4f_reciprocal(simd4f v) { + simd4f s = _mm_rcp_ps(v); + const simd4f two = simd4f_create(2.0f, 2.0f, 2.0f, 2.0f); + s = simd4f_mul(s, simd4f_sub(two, simd4f_mul(v, s))); + return s; +} + +vectorial_inline simd4f simd4f_sqrt(simd4f v) { + simd4f s = _mm_sqrt_ps(v); + return s; +} + +vectorial_inline simd4f simd4f_rsqrt(simd4f v) { + simd4f s = _mm_rsqrt_ps(v); + const simd4f half = simd4f_create(0.5f, 0.5f, 0.5f, 0.5f); + const simd4f three = simd4f_create(3.0f, 3.0f, 3.0f, 3.0f); + s = simd4f_mul(simd4f_mul(s, half), simd4f_sub(three, simd4f_mul(s, simd4f_mul(v,s)))); + return s; +} + +vectorial_inline float simd4f_get_x(simd4f s) { _simd4f_union u={s}; return u.f[0]; } +vectorial_inline float simd4f_get_y(simd4f s) { _simd4f_union u={s}; return u.f[1]; } +vectorial_inline float simd4f_get_z(simd4f s) { _simd4f_union u={s}; return u.f[2]; } +vectorial_inline float simd4f_get_w(simd4f s) { _simd4f_union u={s}; return u.f[3]; } + +vectorial_inline simd4f simd4f_dot3(simd4f lhs,simd4f rhs) { +#if defined(VECTORIAL_USE_SSE4_1) + return _mm_dp_ps(lhs, rhs, 0x7f); +#else + simd4f_aligned16 const unsigned int mask_array[] = { 0xffffffff, 0xffffffff, 0xffffffff, 0 }; + const simd4f mask = _mm_load_ps((const float*)mask_array); + const simd4f m = _mm_mul_ps(lhs, rhs); + const simd4f s0 = _mm_and_ps(m, mask); + const simd4f s1 = _mm_add_ps(s0, _mm_movehl_ps(s0, s0)); + const simd4f s2 = _mm_add_ss(s1, _mm_shuffle_ps(s1, s1, 1)); + return _mm_shuffle_ps(s2,s2, 0); +#endif +} + +vectorial_inline float simd4f_dot3_scalar(simd4f lhs,simd4f rhs) { + return simd4f_get_x(simd4f_dot3(lhs, rhs)); +} + +vectorial_inline simd4f simd4f_cross3(simd4f lhs, simd4f rhs) { + + const simd4f lyzx = _mm_shuffle_ps(lhs, lhs, _MM_SHUFFLE(3,0,2,1)); + const simd4f lzxy = _mm_shuffle_ps(lhs, lhs, _MM_SHUFFLE(3,1,0,2)); + + const simd4f ryzx = _mm_shuffle_ps(rhs, rhs, _MM_SHUFFLE(3,0,2,1)); + const simd4f rzxy = _mm_shuffle_ps(rhs, rhs, _MM_SHUFFLE(3,1,0,2)); + + return _mm_sub_ps(_mm_mul_ps(lyzx, rzxy), _mm_mul_ps(lzxy, ryzx)); + +} + +vectorial_inline simd4f simd4f_shuffle_wxyz(simd4f s) { return _mm_shuffle_ps(s,s, _MM_SHUFFLE(2,1,0,3) ); } +vectorial_inline simd4f simd4f_shuffle_zwxy(simd4f s) { return _mm_shuffle_ps(s,s, _MM_SHUFFLE(1,0,3,2) ); } +vectorial_inline simd4f simd4f_shuffle_yzwx(simd4f s) { return _mm_shuffle_ps(s,s, _MM_SHUFFLE(0,3,2,1) ); } + +vectorial_inline simd4f simd4f_zero_w(simd4f s) { + simd4f r = _mm_unpackhi_ps(s, _mm_setzero_ps()); + return _mm_movelh_ps(s, r); +} + +vectorial_inline simd4f simd4f_zero_zw(simd4f s) { + return _mm_movelh_ps(s, _mm_setzero_ps()); +} + +vectorial_inline simd4f simd4f_merge_high(simd4f xyzw, simd4f abcd) { + return _mm_movehl_ps(abcd, xyzw); +} + + +typedef simd4f_aligned16 union { + unsigned int ui[4]; + float f[4]; +} _simd4f_uif; + +vectorial_inline simd4f simd4f_flip_sign_0101(simd4f s) { + const _simd4f_uif upnpn = { { 0x00000000, 0x80000000, 0x00000000, 0x80000000 } }; + return _mm_xor_ps( s, _mm_load_ps(upnpn.f) ); +} + +vectorial_inline simd4f simd4f_flip_sign_1010(simd4f s) { + const _simd4f_uif unpnp = { { 0x80000000, 0x00000000, 0x80000000, 0x00000000 } }; + return _mm_xor_ps( s, _mm_load_ps(unpnp.f) ); +} + +vectorial_inline simd4f simd4f_min(simd4f a, simd4f b) { + return _mm_min_ps( a, b ); +} + +vectorial_inline simd4f simd4f_max(simd4f a, simd4f b) { + return _mm_max_ps( a, b ); +} + + + +#ifdef __cplusplus +} +#endif + + +#endif diff --git a/3rdparty/vectorial/include/vectorial/simd4x4f.h b/3rdparty/vectorial/include/vectorial/simd4x4f.h new file mode 100644 index 0000000..78266b8 --- /dev/null +++ b/3rdparty/vectorial/include/vectorial/simd4x4f.h @@ -0,0 +1,412 @@ +/* + Vectorial + Copyright (c) 2010 Mikko Lehtonen + Copyright (c) 2014 Google, Inc. + Licensed under the terms of the two-clause BSD License (see LICENSE) +*/ +#ifndef VECTORIAL_SIMD4X4F_H +#define VECTORIAL_SIMD4X4F_H + + +#include "simd4f.h" + +#include + +/* + Note, x,y,z,w are conceptually columns with matrix math. +*/ + +typedef struct { + simd4f x,y,z,w; +} simd4x4f; + + + +vectorial_inline simd4x4f simd4x4f_create(simd4f x, simd4f y, simd4f z, SIMD_PARAM(simd4f, w)) { + simd4x4f s = { x, y, z, w }; + return s; +} + + +vectorial_inline void simd4x4f_identity(simd4x4f* m) { + *m = simd4x4f_create( simd4f_create(1.0f, 0.0f, 0.0f, 0.0f), + simd4f_create(0.0f, 1.0f, 0.0f, 0.0f), + simd4f_create(0.0f, 0.0f, 1.0f, 0.0f), + simd4f_create(0.0f, 0.0f, 0.0f, 1.0f)); +} + + + +vectorial_inline void simd4x4f_uload(simd4x4f* m, const float *f) { + + m->x = simd4f_uload4(f + 0); + m->y = simd4f_uload4(f + 4); + m->z = simd4f_uload4(f + 8); + m->w = simd4f_uload4(f + 12); + +} + + + + + +#ifdef VECTORIAL_SCALAR + #include "simd4x4f_scalar.h" +#elif defined(VECTORIAL_SSE) + #include "simd4x4f_sse.h" +#elif defined(VECTORIAL_GNU) + #include "simd4x4f_gnu.h" +#elif defined(VECTORIAL_NEON) + #include "simd4x4f_neon.h" +#else + #error No implementation defined +#endif + +vectorial_inline void simd4x4f_sum(const simd4x4f* a, simd4f* out) { + simd4f t; + t = simd4f_add(a->x, a->y); + t = simd4f_add(t, a->z); + t = simd4f_add(t, a->w); + *out = t; +} + +vectorial_inline void simd4x4f_matrix_vector_mul(const simd4x4f* a, const simd4f * b, simd4f* out) { + + const simd4f x = a->x; + const simd4f y = a->y; + const simd4f z = a->z; + const simd4f w = a->w; + const simd4f v = *b; + const simd4f vx = simd4f_splat_x(v); + const simd4f vy = simd4f_splat_y(v); + const simd4f vz = simd4f_splat_z(v); + const simd4f vw = simd4f_splat_w(v); + + #if 0 + // In a hasty benchmark, this actually performed worse on neon + // TODO: revisit and conditionalize accordingly + + *out = simd4f_madd(x, vx, + simd4f_madd(y, vy, + simd4f_madd(z, vz, + simd4f_mul(w, vw) ) ) ); + + #else + + *out = simd4f_add(simd4f_mul(x, vx), + simd4f_add(simd4f_mul(y, vy), + simd4f_add(simd4f_mul(z, vz), + simd4f_mul(w, vw) ) ) ); + + #endif +} + +vectorial_inline void simd4x4f_matrix_vector3_mul(const simd4x4f* a, const simd4f * b, simd4f* out) { + + #if 0 + *out = simd4f_madd( a->x, simd4f_splat_x(*b), + simd4f_madd( a->y, simd4f_splat_y(*b), + simd4f_mul(a->z, simd4f_splat_z(*b)) ) ); + #else + *out = simd4f_add( simd4f_mul(a->x, simd4f_splat_x(*b)), + simd4f_add( simd4f_mul(a->y, simd4f_splat_y(*b)), + simd4f_mul(a->z, simd4f_splat_z(*b)) ) ); + #endif + +} + +vectorial_inline void simd4x4f_matrix_point3_mul(const simd4x4f* a, const simd4f * b, simd4f* out) { + + #if 0 + *out = simd4f_madd( a->x, simd4f_splat_x(*b), + simd4f_madd( a->y, simd4f_splat_y(*b), + simd4f_madd( a->z, simd4f_splat_z(*b), + a->w ) ) ); + #else + *out = simd4f_add( simd4f_mul(a->x, simd4f_splat_x(*b)), + simd4f_add( simd4f_mul(a->y, simd4f_splat_y(*b)), + simd4f_add( simd4f_mul(a->z, simd4f_splat_z(*b)), + a->w ) ) ); + #endif + +} + +vectorial_inline void simd4x4f_inv_ortho_matrix_point3_mul(const simd4x4f* a, const simd4f * b, simd4f* out) { + simd4f translation = simd4f_sub(*b, a->w); + + simd4x4f transpose = *a; + + transpose.w = simd4f_create(0,0,0,0); + simd4x4f_transpose_inplace(&transpose); + + simd4x4f_matrix_point3_mul(&transpose, &translation, out); +} + +vectorial_inline void simd4x4f_inv_ortho_matrix_vector3_mul(const simd4x4f* a, const simd4f * b, simd4f* out) { + simd4f translation = *b; + + simd4x4f transpose = *a; + + transpose.w = simd4f_create(0,0,0,0); + simd4x4f_transpose_inplace(&transpose); + + simd4x4f_matrix_vector3_mul(&transpose, &translation, out); +} + + +vectorial_inline void simd4x4f_matrix_mul(const simd4x4f* a, const simd4x4f* b, simd4x4f* out) { + + simd4x4f_matrix_vector_mul(a, &b->x, &out->x); + simd4x4f_matrix_vector_mul(a, &b->y, &out->y); + simd4x4f_matrix_vector_mul(a, &b->z, &out->z); + simd4x4f_matrix_vector_mul(a, &b->w, &out->w); + +} + + + + +vectorial_inline void simd4x4f_perspective(simd4x4f *m, float fovy_radians, float aspect, float znear, float zfar) { + + float deltaz = zfar - znear; + float cotangent = tanf( VECTORIAL_HALFPI - fovy_radians * 0.5f ); + + float a = cotangent / aspect; + float b = cotangent; + float c = -(zfar + znear) / deltaz; + float d = -2 * znear * zfar / deltaz; + + m->x = simd4f_create( a, 0, 0, 0); + m->y = simd4f_create( 0, b, 0, 0); + m->z = simd4f_create( 0, 0, c, -1); + m->w = simd4f_create( 0, 0, d, 0); + +} + +vectorial_inline void simd4x4f_ortho(simd4x4f *m, float left, float right, float bottom, float top, float znear, float zfar) { + + float deltax = right - left; + float deltay = top - bottom; + float deltaz = zfar - znear; + + float a = 2.0f / deltax; + float b = -(right + left) / deltax; + float c = 2.0f / deltay; + float d = -(top + bottom) / deltay; + float e = -2.0f / deltaz; + float f = -(zfar + znear) / deltaz; + + m->x = simd4f_create( a, 0, 0, 0); + m->y = simd4f_create( 0, c, 0, 0); + m->z = simd4f_create( 0, 0, e, 0); + m->w = simd4f_create( b, d, f, 1); + +} + + +vectorial_inline void simd4x4f_lookat(simd4x4f *m, simd4f eye, simd4f center, simd4f up) { + + simd4f zaxis = simd4f_normalize3( simd4f_sub(center, eye) ); + simd4f xaxis = simd4f_normalize3( simd4f_cross3( zaxis, up ) ); + simd4f yaxis = simd4f_cross3(xaxis, zaxis); + + zaxis = simd4f_sub( simd4f_zero(), zaxis); + + float x = -simd4f_dot3_scalar(xaxis, eye); + float y = -simd4f_dot3_scalar(yaxis, eye); + float z = -simd4f_dot3_scalar(zaxis, eye); + + m->x = xaxis; + m->y = yaxis; + m->z = zaxis; + + m->w = simd4f_create( 0,0,0, 1); + simd4x4f_transpose_inplace(m); + m->w = simd4f_create( x,y,z,1); + +} + + +vectorial_inline void simd4x4f_translation(simd4x4f* m, float x, float y, float z) { + *m = simd4x4f_create( simd4f_create(1.0f, 0.0f, 0.0f, 0.0f), + simd4f_create(0.0f, 1.0f, 0.0f, 0.0f), + simd4f_create(0.0f, 0.0f, 1.0f, 0.0f), + simd4f_create( x, y, z, 1.0f)); +} + + +vectorial_inline void simd4x4f_axis_rotation(simd4x4f* m, float radians, simd4f axis) { + + radians = -radians; + + axis = simd4f_normalize3(axis); + + const float sine = sinf(radians); + const float cosine = cosf(radians); + + const float x = simd4f_get_x(axis); + const float y = simd4f_get_y(axis); + const float z = simd4f_get_z(axis); + + const float ab = x * y * (1 - cosine); + const float bc = y * z * (1 - cosine); + const float ca = z * x * (1 - cosine); + + const float tx = x * x; + const float ty = y * y; + const float tz = z * z; + + const simd4f i = simd4f_create( tx + cosine * (1 - tx), ab - z * sine, ca + y * sine, 0); + const simd4f j = simd4f_create( ab + z * sine, ty + cosine * (1 - ty), bc - x * sine, 0); + const simd4f k = simd4f_create( ca - y * sine, bc + x * sine, tz + cosine * (1 - tz), 0); + + *m = simd4x4f_create( i,j,k, simd4f_create(0, 0, 0, 1) ); + +} + + + +vectorial_inline void simd4x4f_add(const simd4x4f* a, const simd4x4f* b, simd4x4f* out) { + + out->x = simd4f_add(a->x, b->x); + out->y = simd4f_add(a->y, b->y); + out->z = simd4f_add(a->z, b->z); + out->w = simd4f_add(a->w, b->w); + +} + +vectorial_inline void simd4x4f_sub(const simd4x4f* a, const simd4x4f* b, simd4x4f* out) { + + out->x = simd4f_sub(a->x, b->x); + out->y = simd4f_sub(a->y, b->y); + out->z = simd4f_sub(a->z, b->z); + out->w = simd4f_sub(a->w, b->w); + +} + +vectorial_inline void simd4x4f_mul(const simd4x4f* a, const simd4x4f* b, simd4x4f* out) { + + out->x = simd4f_mul(a->x, b->x); + out->y = simd4f_mul(a->y, b->y); + out->z = simd4f_mul(a->z, b->z); + out->w = simd4f_mul(a->w, b->w); + +} + +vectorial_inline void simd4x4f_div(simd4x4f* a, simd4x4f* b, simd4x4f* out) { + + out->x = simd4f_div(a->x, b->x); + out->y = simd4f_div(a->y, b->y); + out->z = simd4f_div(a->z, b->z); + out->w = simd4f_div(a->w, b->w); + +} + +vectorial_inline simd4f simd4x4f_inverse(const simd4x4f* a, simd4x4f* out) { + + const simd4f c0 = a->x; + const simd4f c1 = a->y; + const simd4f c2 = a->z; + const simd4f c3 = a->w; + + const simd4f c0_wxyz = simd4f_shuffle_wxyz(c0); + const simd4f c0_zwxy = simd4f_shuffle_zwxy(c0); + const simd4f c0_yzwx = simd4f_shuffle_yzwx(c0); + + const simd4f c1_wxyz = simd4f_shuffle_wxyz(c1); + const simd4f c1_zwxy = simd4f_shuffle_zwxy(c1); + const simd4f c1_yzwx = simd4f_shuffle_yzwx(c1); + + const simd4f c2_wxyz = simd4f_shuffle_wxyz(c2); + const simd4f c2_zwxy = simd4f_shuffle_zwxy(c2); + const simd4f c2_yzwx = simd4f_shuffle_yzwx(c2); + + const simd4f c3_wxyz = simd4f_shuffle_wxyz(c3); + const simd4f c3_zwxy = simd4f_shuffle_zwxy(c3); + const simd4f c3_yzwx = simd4f_shuffle_yzwx(c3); + + const simd4f c0_wxyz_x_c1 = simd4f_mul(c0_wxyz, c1); + const simd4f c0_wxyz_x_c1_yzwx = simd4f_mul(c0_wxyz, c1_yzwx); + const simd4f c0_wxyz_x_c1_zwxy = simd4f_mul(c0_wxyz, c1_zwxy); + + const simd4f c2_wxyz_x_c3 = simd4f_mul(c2_wxyz, c3); + const simd4f c2_wxyz_x_c3_yzwx = simd4f_mul(c2_wxyz, c3_yzwx); + const simd4f c2_wxyz_x_c3_zwxy = simd4f_mul(c2_wxyz, c3_zwxy); + + const simd4f ar1 = simd4f_sub( simd4f_shuffle_wxyz(c2_wxyz_x_c3_zwxy), simd4f_shuffle_zwxy(c2_wxyz_x_c3) ); + const simd4f ar2 = simd4f_sub( simd4f_shuffle_zwxy(c2_wxyz_x_c3_yzwx), c2_wxyz_x_c3_yzwx ); + const simd4f ar3 = simd4f_sub( c2_wxyz_x_c3_zwxy, simd4f_shuffle_wxyz(c2_wxyz_x_c3) ); + + const simd4f br1 = simd4f_sub( simd4f_shuffle_wxyz(c0_wxyz_x_c1_zwxy), simd4f_shuffle_zwxy(c0_wxyz_x_c1) ); + const simd4f br2 = simd4f_sub( simd4f_shuffle_zwxy(c0_wxyz_x_c1_yzwx), c0_wxyz_x_c1_yzwx ); + const simd4f br3 = simd4f_sub( c0_wxyz_x_c1_zwxy, simd4f_shuffle_wxyz(c0_wxyz_x_c1) ); + + + const simd4f c0_sum = simd4f_madd(c0_yzwx, ar3, + simd4f_madd(c0_zwxy, ar2, + simd4f_mul(c0_wxyz, ar1))); + + const simd4f c1_sum = simd4f_madd(c1_wxyz, ar1, + simd4f_madd(c1_zwxy, ar2, + simd4f_mul(c1_yzwx, ar3))); + + const simd4f c2_sum = simd4f_madd(c2_yzwx, br3, + simd4f_madd(c2_zwxy, br2, + simd4f_mul(c2_wxyz, br1))); + + const simd4f c3_sum = simd4f_madd(c3_yzwx, br3, + simd4f_madd(c3_zwxy, br2, + simd4f_mul(c3_wxyz, br1))); + + + const simd4f d0 = simd4f_mul(c1_sum, c0); + const simd4f d1 = simd4f_add(d0, simd4f_merge_high(d0, d0)); + const simd4f det = simd4f_sub(d1, simd4f_splat_y(d1)); + + const simd4f invdet = simd4f_splat_x( simd4f_div(simd4f_splat(1.0f), det) ); + + const simd4f o0 = simd4f_mul( simd4f_flip_sign_0101(c1_sum), invdet ); + const simd4f o1 = simd4f_mul( simd4f_flip_sign_1010(c0_sum), invdet ); + const simd4f o2 = simd4f_mul( simd4f_flip_sign_0101(c3_sum), invdet ); + const simd4f o3 = simd4f_mul( simd4f_flip_sign_1010(c2_sum), invdet ); + + const simd4x4f mt = simd4x4f_create(o0, o1, o2, o3); + + simd4x4f_transpose( &mt, out); + + return det; +} + +#ifdef __cplusplus + + #ifdef VECTORIAL_OSTREAM + #include + + vectorial_inline std::ostream& operator<<(std::ostream& os, const simd4x4f& v) { + os << "simd4x4f(simd4f(" << simd4f_get_x(v.x) << ", " + << simd4f_get_y(v.x) << ", " + << simd4f_get_z(v.x) << ", " + << simd4f_get_w(v.x) << "),\n" + << " simd4f(" << simd4f_get_x(v.y) << ", " + << simd4f_get_y(v.y) << ", " + << simd4f_get_z(v.y) << ", " + << simd4f_get_w(v.y) << "),\n" + << " simd4f(" << simd4f_get_x(v.z) << ", " + << simd4f_get_y(v.z) << ", " + << simd4f_get_z(v.z) << ", " + << simd4f_get_w(v.z) << "),\n" + << " simd4f(" << simd4f_get_x(v.w) << ", " + << simd4f_get_y(v.w) << ", " + << simd4f_get_z(v.w) << ", " + << simd4f_get_w(v.w) << "))"; + return os; + } + #endif + +#endif + + + + + +#endif diff --git a/3rdparty/vectorial/include/vectorial/simd4x4f_gnu.h b/3rdparty/vectorial/include/vectorial/simd4x4f_gnu.h new file mode 100644 index 0000000..476c9d4 --- /dev/null +++ b/3rdparty/vectorial/include/vectorial/simd4x4f_gnu.h @@ -0,0 +1,36 @@ +/* + Vectorial + Copyright (c) 2010 Mikko Lehtonen + Licensed under the terms of the two-clause BSD License (see LICENSE) +*/ +#ifndef VECTORIAL_SIMD4X4F_GNU_H +#define VECTORIAL_SIMD4X4F_GNU_H + + + +vectorial_inline void simd4x4f_transpose_inplace(simd4x4f* s) { + const _simd4f_union sx = { s->x }; + const _simd4f_union sy = { s->y }; + const _simd4f_union sz = { s->z }; + const _simd4f_union sw = { s->w }; + + const simd4f dx = { sx.f[0], sy.f[0], sz.f[0], sw.f[0] }; + const simd4f dy = { sx.f[1], sy.f[1], sz.f[1], sw.f[1] }; + const simd4f dz = { sx.f[2], sy.f[2], sz.f[2], sw.f[2] }; + const simd4f dw = { sx.f[3], sy.f[3], sz.f[3], sw.f[3] }; + + s->x = dx; + s->y = dy; + s->z = dz; + s->w = dw; + +} + +vectorial_inline void simd4x4f_transpose(const simd4x4f *s, simd4x4f *out) { + *out=*s; + simd4x4f_transpose_inplace(out); +} + + + +#endif diff --git a/3rdparty/vectorial/include/vectorial/simd4x4f_neon.h b/3rdparty/vectorial/include/vectorial/simd4x4f_neon.h new file mode 100644 index 0000000..b59537b --- /dev/null +++ b/3rdparty/vectorial/include/vectorial/simd4x4f_neon.h @@ -0,0 +1,35 @@ +/* + Vectorial + Copyright (c) 2010 Mikko Lehtonen + Licensed under the terms of the two-clause BSD License (see LICENSE) +*/ +#ifndef VECTORIAL_SIMD4X4F_NEON_H +#define VECTORIAL_SIMD4X4F_NEON_H + + +vectorial_inline void simd4x4f_transpose_inplace(simd4x4f* s) { + const _simd4f_union sx = { s->x }; + const _simd4f_union sy = { s->y }; + const _simd4f_union sz = { s->z }; + const _simd4f_union sw = { s->w }; + + const simd4f dx = simd4f_create( sx.f[0], sy.f[0], sz.f[0], sw.f[0] ); + const simd4f dy = simd4f_create( sx.f[1], sy.f[1], sz.f[1], sw.f[1] ); + const simd4f dz = simd4f_create( sx.f[2], sy.f[2], sz.f[2], sw.f[2] ); + const simd4f dw = simd4f_create( sx.f[3], sy.f[3], sz.f[3], sw.f[3] ); + + s->x = dx; + s->y = dy; + s->z = dz; + s->w = dw; + +} + +vectorial_inline void simd4x4f_transpose(const simd4x4f *s, simd4x4f *out) { + *out=*s; + simd4x4f_transpose_inplace(out); +} + + + +#endif diff --git a/3rdparty/vectorial/include/vectorial/simd4x4f_scalar.h b/3rdparty/vectorial/include/vectorial/simd4x4f_scalar.h new file mode 100644 index 0000000..66bbe2b --- /dev/null +++ b/3rdparty/vectorial/include/vectorial/simd4x4f_scalar.h @@ -0,0 +1,41 @@ +/* + Vectorial + Copyright (c) 2010 Mikko Lehtonen + Licensed under the terms of the two-clause BSD License (see LICENSE) +*/ +#ifndef VECTORIAL_SIMD4X4F_SCALAR_H +#define VECTORIAL_SIMD4X4F_SCALAR_H + + +vectorial_inline void simd4x4f_transpose_inplace(simd4x4f *s) { + simd4x4f d=*s; + s->x.x = d.x.x; + s->x.y = d.y.x; + s->x.z = d.z.x; + s->x.w = d.w.x; + + s->y.x = d.x.y; + s->y.y = d.y.y; + s->y.z = d.z.y; + s->y.w = d.w.y; + + s->z.x = d.x.z; + s->z.y = d.y.z; + s->z.z = d.z.z; + s->z.w = d.w.z; + + s->w.x = d.x.w; + s->w.y = d.y.w; + s->w.z = d.z.w; + s->w.w = d.w.w; + +} + +vectorial_inline void simd4x4f_transpose(const simd4x4f *s, simd4x4f *out) { + *out=*s; + simd4x4f_transpose_inplace(out); +} + + + +#endif diff --git a/3rdparty/vectorial/include/vectorial/simd4x4f_sse.h b/3rdparty/vectorial/include/vectorial/simd4x4f_sse.h new file mode 100644 index 0000000..edf632f --- /dev/null +++ b/3rdparty/vectorial/include/vectorial/simd4x4f_sse.h @@ -0,0 +1,23 @@ +/* + Vectorial + Copyright (c) 2010 Mikko Lehtonen + Licensed under the terms of the two-clause BSD License (see LICENSE) +*/ +#ifndef VECTORIAL_SIMD4X4F_SSE_H +#define VECTORIAL_SIMD4X4F_SSE_H + + + +vectorial_inline void simd4x4f_transpose_inplace(simd4x4f *s) { + _MM_TRANSPOSE4_PS(s->x, s->y, s->z, s->w); +} + +vectorial_inline void simd4x4f_transpose(const simd4x4f *s, simd4x4f *out) { + *out=*s; + simd4x4f_transpose_inplace(out); +} + + + + +#endif diff --git a/3rdparty/vectorial/include/vectorial/vec2f.h b/3rdparty/vectorial/include/vectorial/vec2f.h new file mode 100644 index 0000000..8eccef9 --- /dev/null +++ b/3rdparty/vectorial/include/vectorial/vec2f.h @@ -0,0 +1,191 @@ +/* + Vectorial + Copyright (c) 2010 Mikko Lehtonen + Licensed under the terms of the two-clause BSD License (see LICENSE) +*/ +#ifndef VECTORIAL_VEC2F_H + +#ifndef VECTORIAL_SIMD4F_H + #include "vectorial/simd4f.h" +#endif + + + +namespace vectorial { + + class vec4f; + class vec3f; + + class vec2f { + public: + + simd4f value; + + inline vec2f() {} + inline vec2f(const vec2f& v) : value(v.value) {} + inline vec2f(const simd4f& v) : value(v) {} + explicit inline vec2f(float xy) : value( simd4f_splat(xy) ) {} + inline vec2f(float x, float y) : value( simd4f_create(x,y,0,0) ) {} + explicit inline vec2f(const float *ary) : value( simd4f_uload2(ary) ) { } + + inline float x() const { return simd4f_get_x(value); } + inline float y() const { return simd4f_get_y(value); } + + inline void load(const float *ary) { value = simd4f_uload2(ary); } + inline void store(float *ary) const { simd4f_ustore2(value, ary); } + + enum { elements = 2 }; + + static vec2f zero() { return vec2f(simd4f_zero()); } + static vec2f one() { return vec2f(1.0f); } + static vec2f xAxis() { return vec2f(1.0f, 0.0f); } + static vec2f yAxis() { return vec2f(0.0f, 1.0f); } + + inline vec4f xyzw(float z, float w) const; + inline vec4f xy00() const; + inline vec4f xy01() const; + inline vec3f xyz(float z) const; + inline vec3f xy0() const; + inline vec2f xy() const; + + }; + + vectorial_inline vec2f operator-(const vec2f& lhs) { + return vec2f( simd4f_sub(simd4f_zero(), lhs.value) ); + } + + + vectorial_inline vec2f operator+(const vec2f& lhs, const vec2f& rhs) { + return vec2f( simd4f_add(lhs.value, rhs.value) ); + } + + vectorial_inline vec2f operator-(const vec2f& lhs, const vec2f& rhs) { + return vec2f( simd4f_sub(lhs.value, rhs.value) ); + } + + vectorial_inline vec2f operator*(const vec2f& lhs, const vec2f& rhs) { + return vec2f( simd4f_mul(lhs.value, rhs.value) ); + } + + vectorial_inline vec2f operator/(const vec2f& lhs, const vec2f& rhs) { + return vec2f( simd4f_div(lhs.value, rhs.value) ); + } + + + vectorial_inline vec2f operator+=(vec2f& lhs, const vec2f& rhs) { + return lhs = vec2f( simd4f_add(lhs.value, rhs.value) ); + } + + vectorial_inline vec2f operator-=(vec2f& lhs, const vec2f& rhs) { + return lhs = vec2f( simd4f_sub(lhs.value, rhs.value) ); + } + + vectorial_inline vec2f operator*=(vec2f& lhs, const vec2f& rhs) { + return lhs = vec2f( simd4f_mul(lhs.value, rhs.value) ); + } + + vectorial_inline vec2f operator/=(vec2f& lhs, const vec2f& rhs) { + return lhs = vec2f( simd4f_div(lhs.value, rhs.value) ); + } + + + + vectorial_inline vec2f operator+(const vec2f& lhs, float rhs) { + return vec2f( simd4f_add(lhs.value, simd4f_splat(rhs)) ); + } + + vectorial_inline vec2f operator-(const vec2f& lhs, float rhs) { + return vec2f( simd4f_sub(lhs.value, simd4f_splat(rhs)) ); + } + + vectorial_inline vec2f operator*(const vec2f& lhs, float rhs) { + return vec2f( simd4f_mul(lhs.value, simd4f_splat(rhs)) ); + } + + vectorial_inline vec2f operator/(const vec2f& lhs, float rhs) { + return vec2f( simd4f_div(lhs.value, simd4f_splat(rhs)) ); + } + + vectorial_inline vec2f operator+(float lhs, const vec2f& rhs) { + return vec2f( simd4f_add(simd4f_splat(lhs), rhs.value) ); + } + + vectorial_inline vec2f operator-(float lhs, const vec2f& rhs) { + return vec2f( simd4f_sub(simd4f_splat(lhs), rhs.value) ); + } + + vectorial_inline vec2f operator*(float lhs, const vec2f& rhs) { + return vec2f( simd4f_mul(simd4f_splat(lhs), rhs.value) ); + } + + vectorial_inline vec2f operator/(float lhs, const vec2f& rhs) { + return vec2f( simd4f_div(simd4f_splat(lhs), rhs.value) ); + } + + + vectorial_inline vec2f operator+=(vec2f& lhs, float rhs) { + return lhs = vec2f( simd4f_add(lhs.value, simd4f_splat(rhs)) ); + } + + vectorial_inline vec2f operator-=(vec2f& lhs, float rhs) { + return lhs = vec2f( simd4f_sub(lhs.value, simd4f_splat(rhs)) ); + } + + vectorial_inline vec2f operator*=(vec2f& lhs, float rhs) { + return lhs = vec2f( simd4f_mul(lhs.value, simd4f_splat(rhs)) ); + } + + vectorial_inline vec2f operator/=(vec2f& lhs, float rhs) { + return lhs = vec2f( simd4f_div(lhs.value, simd4f_splat(rhs)) ); + } + + + vectorial_inline float dot(const vec2f& lhs, const vec2f& rhs) { + return simd4f_get_x( simd4f_dot2(lhs.value, rhs.value) ); + } + + + vectorial_inline float length(const vec2f& v) { + return simd4f_get_x( simd4f_length2(v.value) ); + } + + vectorial_inline float length_squared(const vec2f& v) { + return simd4f_get_x( simd4f_length2_squared(v.value) ); + } + + vectorial_inline vec2f normalize(const vec2f& v) { + return vec2f( simd4f_normalize2(v.value) ); + } + + vectorial_inline vec2f min(const vec2f& a, const vec2f& b) { + return vec2f( simd4f_min(a.value, b.value) ); + } + + vectorial_inline vec2f max(const vec2f& a, const vec2f& b) { + return vec2f( simd4f_max(a.value, b.value) ); + } + + +} + + +namespace std { + inline ::vectorial::vec2f min(const ::vectorial::vec2f& a, const ::vectorial::vec2f& b) { return ::vectorial::min(a,b); } + inline ::vectorial::vec2f max(const ::vectorial::vec2f& a, const ::vectorial::vec2f& b) { return ::vectorial::max(a,b); } +} + + +#ifdef VECTORIAL_OSTREAM +#include + +vectorial_inline std::ostream& operator<<(std::ostream& os, const vectorial::vec2f& v) { + os << "[ " << v.x() << ", " + << v.y() << " ]"; + return os; +} +#endif + + + + +#endif diff --git a/3rdparty/vectorial/include/vectorial/vec3f.h b/3rdparty/vectorial/include/vectorial/vec3f.h new file mode 100644 index 0000000..c52e7d5 --- /dev/null +++ b/3rdparty/vectorial/include/vectorial/vec3f.h @@ -0,0 +1,197 @@ +/* + Vectorial + Copyright (c) 2010 Mikko Lehtonen + Copyright (c) 2014 Google, Inc. + Licensed under the terms of the two-clause BSD License (see LICENSE) +*/ +#ifndef VECTORIAL_VEC3F_H + +#ifndef VECTORIAL_SIMD4F_H + #include "vectorial/simd4f.h" +#endif + + + +namespace vectorial { + + class vec4f; + class vec2f; + + class vec3f { + public: + + simd4f value; + + inline vec3f() {} + inline vec3f(const vec3f& v) : value(v.value) {} + inline vec3f(const simd4f& v) : value(v) {} + explicit inline vec3f(float xyz) : value( simd4f_splat(xyz) ) {} + inline vec3f(float x, float y, float z) : value( simd4f_create(x,y,z,0) ) {} + explicit inline vec3f(const float *ary) : value( simd4f_uload3(ary) ) { } + + inline float x() const { return simd4f_get_x(value); } + inline float y() const { return simd4f_get_y(value); } + inline float z() const { return simd4f_get_z(value); } + + inline void load(const float *ary) { value = simd4f_uload3(ary); } + inline void store(float *ary) const { simd4f_ustore3(value, ary); } + + enum { elements = 3 }; + + static vec3f zero() { return vec3f(simd4f_zero()); } + static vec3f one() { return vec3f(1.0f); } + static vec3f xAxis() { return vec3f(1.0f, 0.0f, 0.0f); } + static vec3f yAxis() { return vec3f(0.0f, 1.0f, 0.0f); } + static vec3f zAxis() { return vec3f(0.0f, 0.0f, 1.0f); } + + inline vec4f xyz0() const; + inline vec4f xyz1() const; + inline vec4f xyzw(float w) const; + inline vec3f xyz() const; + inline vec3f xy0() const; + inline vec2f xy() const; + }; + + vectorial_inline vec3f operator-(const vec3f& lhs) { + return vec3f( simd4f_sub(simd4f_zero(), lhs.value) ); + } + + + vectorial_inline vec3f operator+(const vec3f& lhs, const vec3f& rhs) { + return vec3f( simd4f_add(lhs.value, rhs.value) ); + } + + vectorial_inline vec3f operator-(const vec3f& lhs, const vec3f& rhs) { + return vec3f( simd4f_sub(lhs.value, rhs.value) ); + } + + vectorial_inline vec3f operator*(const vec3f& lhs, const vec3f& rhs) { + return vec3f( simd4f_mul(lhs.value, rhs.value) ); + } + + vectorial_inline vec3f operator/(const vec3f& lhs, const vec3f& rhs) { + return vec3f( simd4f_div(lhs.value, rhs.value) ); + } + + + vectorial_inline vec3f operator+=(vec3f& lhs, const vec3f& rhs) { + return lhs = vec3f( simd4f_add(lhs.value, rhs.value) ); + } + + vectorial_inline vec3f operator-=(vec3f& lhs, const vec3f& rhs) { + return lhs = vec3f( simd4f_sub(lhs.value, rhs.value) ); + } + + vectorial_inline vec3f operator*=(vec3f& lhs, const vec3f& rhs) { + return lhs = vec3f( simd4f_mul(lhs.value, rhs.value) ); + } + + vectorial_inline vec3f operator/=(vec3f& lhs, const vec3f& rhs) { + return lhs = vec3f( simd4f_div(lhs.value, rhs.value) ); + } + + + + vectorial_inline vec3f operator+(const vec3f& lhs, float rhs) { + return vec3f( simd4f_add(lhs.value, simd4f_splat(rhs)) ); + } + + vectorial_inline vec3f operator-(const vec3f& lhs, float rhs) { + return vec3f( simd4f_sub(lhs.value, simd4f_splat(rhs)) ); + } + + vectorial_inline vec3f operator*(const vec3f& lhs, float rhs) { + return vec3f( simd4f_mul(lhs.value, simd4f_splat(rhs)) ); + } + + vectorial_inline vec3f operator/(const vec3f& lhs, float rhs) { + return vec3f( simd4f_div(lhs.value, simd4f_splat(rhs)) ); + } + + vectorial_inline vec3f operator+(float lhs, const vec3f& rhs) { + return vec3f( simd4f_add(simd4f_splat(lhs), rhs.value) ); + } + + vectorial_inline vec3f operator-(float lhs, const vec3f& rhs) { + return vec3f( simd4f_sub(simd4f_splat(lhs), rhs.value) ); + } + + vectorial_inline vec3f operator*(float lhs, const vec3f& rhs) { + return vec3f( simd4f_mul(simd4f_splat(lhs), rhs.value) ); + } + + vectorial_inline vec3f operator/(float lhs, const vec3f& rhs) { + return vec3f( simd4f_div(simd4f_splat(lhs), rhs.value) ); + } + + + vectorial_inline vec3f operator+=(vec3f& lhs, float rhs) { + return lhs = vec3f( simd4f_add(lhs.value, simd4f_splat(rhs)) ); + } + + vectorial_inline vec3f operator-=(vec3f& lhs, float rhs) { + return lhs = vec3f( simd4f_sub(lhs.value, simd4f_splat(rhs)) ); + } + + vectorial_inline vec3f operator*=(vec3f& lhs, float rhs) { + return lhs = vec3f( simd4f_mul(lhs.value, simd4f_splat(rhs)) ); + } + + vectorial_inline vec3f operator/=(vec3f& lhs, float rhs) { + return lhs = vec3f( simd4f_div(lhs.value, simd4f_splat(rhs)) ); + } + + + vectorial_inline float dot(const vec3f& lhs, const vec3f& rhs) { + return simd4f_dot3_scalar(lhs.value, rhs.value); + } + + vectorial_inline vec3f cross(const vec3f& lhs, const vec3f& rhs) { + return simd4f_cross3(lhs.value, rhs.value); + } + + + vectorial_inline float length(const vec3f& v) { + return simd4f_get_x( simd4f_length3(v.value) ); + } + + vectorial_inline float length_squared(const vec3f& v) { + return simd4f_get_x( simd4f_length3_squared(v.value) ); + } + + vectorial_inline vec3f normalize(const vec3f& v) { + return vec3f( simd4f_normalize3(v.value) ); + } + + vectorial_inline vec3f min(const vec3f& a, const vec3f& b) { + return vec3f( simd4f_min(a.value, b.value) ); + } + + vectorial_inline vec3f max(const vec3f& a, const vec3f& b) { + return vec3f( simd4f_max(a.value, b.value) ); + } + +} + + +namespace std { + inline ::vectorial::vec3f min(const ::vectorial::vec3f& a, const ::vectorial::vec3f& b) { return ::vectorial::min(a,b); } + inline ::vectorial::vec3f max(const ::vectorial::vec3f& a, const ::vectorial::vec3f& b) { return ::vectorial::max(a,b); } +} + + +#ifdef VECTORIAL_OSTREAM +#include + +vectorial_inline std::ostream& operator<<(std::ostream& os, const vectorial::vec3f& v) { + os << "[ " << v.x() << ", " + << v.y() << ", " + << v.z() << " ]"; + return os; +} +#endif + + + + +#endif diff --git a/3rdparty/vectorial/include/vectorial/vec4f.h b/3rdparty/vectorial/include/vectorial/vec4f.h new file mode 100644 index 0000000..ecd59b0 --- /dev/null +++ b/3rdparty/vectorial/include/vectorial/vec4f.h @@ -0,0 +1,195 @@ +/* + Vectorial + Copyright (c) 2010 Mikko Lehtonen + Licensed under the terms of the two-clause BSD License (see LICENSE) +*/ +#ifndef VECTORIAL_VEC4F_H +#define VECTORIAL_VEC4F_H + +#ifndef VECTORIAL_SIMD4F_H + #include "vectorial/simd4f.h" +#endif + + + +namespace vectorial { + + class vec3f; + class vec2f; + + class vec4f { + public: + + simd4f value; + + inline vec4f() {} + inline vec4f(const vec4f& v) : value(v.value) {} + inline vec4f(const simd4f& v) : value(v) {} + explicit inline vec4f(float xyzw) : value( simd4f_splat(xyzw) ) {} + inline vec4f(float x, float y, float z, float w) : value( simd4f_create(x,y,z,w) ) {} + explicit inline vec4f(const float *ary) : value( simd4f_uload4(ary) ) { } + + inline float x() const { return simd4f_get_x(value); } + inline float y() const { return simd4f_get_y(value); } + inline float z() const { return simd4f_get_z(value); } + inline float w() const { return simd4f_get_w(value); } + + inline void load(const float *ary) { value = simd4f_uload4(ary); } + inline void store(float *ary) const { simd4f_ustore4(value, ary); } + + enum { elements = 4 }; + + + static vec4f zero() { return vec4f(simd4f_zero()); } + static vec4f one() { return vec4f(1.0f); } + static vec4f xAxis() { return vec4f(1.0f, 0.0f, 0.0f, 0.0f); } + static vec4f yAxis() { return vec4f(0.0f, 1.0f, 0.0f, 0.0f); } + static vec4f zAxis() { return vec4f(0.0f, 0.0f, 1.0f, 0.0f); } + static vec4f wAxis() { return vec4f(0.0f, 0.0f, 0.0f, 1.0f); } + + + inline vec3f xyz() const; + inline vec2f xy() const; + + }; + + + vectorial_inline vec4f operator-(const vec4f& lhs) { + return vec4f( simd4f_sub(simd4f_zero(), lhs.value) ); + } + + + vectorial_inline vec4f operator+(const vec4f& lhs, const vec4f& rhs) { + return vec4f( simd4f_add(lhs.value, rhs.value) ); + } + + vectorial_inline vec4f operator-(const vec4f& lhs, const vec4f& rhs) { + return vec4f( simd4f_sub(lhs.value, rhs.value) ); + } + + vectorial_inline vec4f operator*(const vec4f& lhs, const vec4f& rhs) { + return vec4f( simd4f_mul(lhs.value, rhs.value) ); + } + + vectorial_inline vec4f operator/(const vec4f& lhs, const vec4f& rhs) { + return vec4f( simd4f_div(lhs.value, rhs.value) ); + } + + + vectorial_inline vec4f operator+=(vec4f& lhs, const vec4f& rhs) { + return lhs = vec4f( simd4f_add(lhs.value, rhs.value) ); + } + + vectorial_inline vec4f operator-=(vec4f& lhs, const vec4f& rhs) { + return lhs = vec4f( simd4f_sub(lhs.value, rhs.value) ); + } + + vectorial_inline vec4f operator*=(vec4f& lhs, const vec4f& rhs) { + return lhs = vec4f( simd4f_mul(lhs.value, rhs.value) ); + } + + vectorial_inline vec4f operator/=(vec4f& lhs, const vec4f& rhs) { + return lhs = vec4f( simd4f_div(lhs.value, rhs.value) ); + } + + + + vectorial_inline vec4f operator+(const vec4f& lhs, float rhs) { + return vec4f( simd4f_add(lhs.value, simd4f_splat(rhs)) ); + } + + vectorial_inline vec4f operator-(const vec4f& lhs, float rhs) { + return vec4f( simd4f_sub(lhs.value, simd4f_splat(rhs)) ); + } + + vectorial_inline vec4f operator*(const vec4f& lhs, float rhs) { + return vec4f( simd4f_mul(lhs.value, simd4f_splat(rhs)) ); + } + + vectorial_inline vec4f operator/(const vec4f& lhs, float rhs) { + return vec4f( simd4f_div(lhs.value, simd4f_splat(rhs)) ); + } + + vectorial_inline vec4f operator+(float lhs, const vec4f& rhs) { + return vec4f( simd4f_add(simd4f_splat(lhs), rhs.value) ); + } + + vectorial_inline vec4f operator-(float lhs, const vec4f& rhs) { + return vec4f( simd4f_sub(simd4f_splat(lhs), rhs.value) ); + } + + vectorial_inline vec4f operator*(float lhs, const vec4f& rhs) { + return vec4f( simd4f_mul(simd4f_splat(lhs), rhs.value) ); + } + + vectorial_inline vec4f operator/(float lhs, const vec4f& rhs) { + return vec4f( simd4f_div(simd4f_splat(lhs), rhs.value) ); + } + + + vectorial_inline vec4f operator+=(vec4f& lhs, float rhs) { + return lhs = vec4f( simd4f_add(lhs.value, simd4f_splat(rhs)) ); + } + + vectorial_inline vec4f operator-=(vec4f& lhs, float rhs) { + return lhs = vec4f( simd4f_sub(lhs.value, simd4f_splat(rhs)) ); + } + + vectorial_inline vec4f operator*=(vec4f& lhs, float rhs) { + return lhs = vec4f( simd4f_mul(lhs.value, simd4f_splat(rhs)) ); + } + + vectorial_inline vec4f operator/=(vec4f& lhs, float rhs) { + return lhs = vec4f( simd4f_div(lhs.value, simd4f_splat(rhs)) ); + } + + + vectorial_inline float dot(const vec4f& lhs, const vec4f& rhs) { + return simd4f_get_x( simd4f_dot4(lhs.value, rhs.value) ); + } + + + vectorial_inline float length(const vec4f& v) { + return simd4f_get_x( simd4f_length4(v.value) ); + } + + vectorial_inline float length_squared(const vec4f& v) { + return simd4f_get_x( simd4f_length4_squared(v.value) ); + } + + vectorial_inline vec4f normalize(const vec4f& v) { + return vec4f( simd4f_normalize4(v.value) ); + } + + vectorial_inline vec4f min(const vec4f& a, const vec4f& b) { + return vec4f( simd4f_min(a.value, b.value) ); + } + + vectorial_inline vec4f max(const vec4f& a, const vec4f& b) { + return vec4f( simd4f_max(a.value, b.value) ); + } + + +} + + +namespace std { + inline ::vectorial::vec4f min(const ::vectorial::vec4f& a, const ::vectorial::vec4f& b) { return ::vectorial::min(a,b); } + inline ::vectorial::vec4f max(const ::vectorial::vec4f& a, const ::vectorial::vec4f& b) { return ::vectorial::max(a,b); } +} + + +#ifdef VECTORIAL_OSTREAM +#include + +vectorial_inline std::ostream& operator<<(std::ostream& os, const vectorial::vec4f& v) { + os << "[ " << v.x() << ", " + << v.y() << ", " + << v.z() << ", " + << v.w() << " ]"; + return os; +} +#endif + + +#endif diff --git a/3rdparty/vectorial/include/vectorial/vec_convert.h b/3rdparty/vectorial/include/vectorial/vec_convert.h new file mode 100644 index 0000000..98aac8e --- /dev/null +++ b/3rdparty/vectorial/include/vectorial/vec_convert.h @@ -0,0 +1,31 @@ +/* + Vectorial + Copyright (c) 2010 Mikko Lehtonen + Licensed under the terms of the two-clause BSD License (see LICENSE) +*/ +#ifndef VECTORIAL_VEC_CONVERT_H +#define VECTORIAL_VEC_CONVERT_H + + +namespace vectorial { + + inline vec3f vec4f::xyz() const { return vec3f(value); } + inline vec2f vec4f::xy() const { return vec2f(value); } + + inline vec4f vec3f::xyz0() const { return vec4f(simd4f_zero_w(value)); } + inline vec4f vec3f::xyz1() const { return xyz0() + vec4f(0.0f, 0.0f, 0.0f, 1.0f); } + inline vec4f vec3f::xyzw(float w) const { return xyz0() + vec4f(0.0f, 0.0f, 0.0f, w); } + inline vec3f vec3f::xyz() const { return vec3f(value); } + inline vec3f vec3f::xy0() const { return vec3f(value) * vec3f(1.0f, 1.0f, 0.0f); } + inline vec2f vec3f::xy() const { return vec2f(value); } + + inline vec4f vec2f::xy00() const { return vec4f(simd4f_zero_zw(value)); } + inline vec4f vec2f::xy01() const { return xy00() + vec4f(0.0f, 0.0f, 0.0f, 1.0f); } + inline vec4f vec2f::xyzw(float z, float w) const { return xy00() + vec4f(0.0f, 0.0f, z, w); } + inline vec3f vec2f::xy0() const { return vec3f(simd4f_zero_zw(value)); } + inline vec2f vec2f::xy() const { return vec2f(value); } + +} + + +#endif diff --git a/3rdparty/vectorial/include/vectorial/vectorial.h b/3rdparty/vectorial/include/vectorial/vectorial.h new file mode 100644 index 0000000..31f71b9 --- /dev/null +++ b/3rdparty/vectorial/include/vectorial/vectorial.h @@ -0,0 +1,19 @@ +/* + Vectorial + Copyright (c) 2010 Mikko Lehtonen + Licensed under the terms of the two-clause BSD License (see LICENSE) +*/ +#ifndef VECTORIAL_VECTORIAL_H +#define VECTORIAL_VECTORIAL_H + + +#include "vectorial/vec2f.h" +#include "vectorial/vec3f.h" +#include "vectorial/vec4f.h" + +#include "vectorial/vec_convert.h" + +#include "vectorial/mat4f.h" + + +#endif diff --git a/3rdparty/vectorial/spec/spec.cpp b/3rdparty/vectorial/spec/spec.cpp new file mode 100644 index 0000000..16c16cc --- /dev/null +++ b/3rdparty/vectorial/spec/spec.cpp @@ -0,0 +1,229 @@ +/* Specific - Minimal C++ spec framework. + + +The zlib/libpng License + + +Copyright (c) 2008 Mikko Lehtonen + +This software is provided 'as-is', without any express or implied +warranty. In no event will the authors be held liable for any damages +arising from the use of this software. + +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it +freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + + 3. This notice may not be removed or altered from any source + distribution. +*/ + + +#include "spec.h" + +#include + +namespace specific { + + + + void SpecWriter::startGroup(std::string /*group*/, std::string /*description*/) {} + + void SpecWriter::addFailedAssertation(std::string msg, const char *file, int line) { + mFailures.push_back( SpecFailure(msg,file,line) ); + } + void SpecWriter::addSpecResult(SpecResult r) { + mResults.push_back( r ); + } + void SpecWriter::start() {} + void SpecWriter::stop() { + std::cout << std::endl; + size_t nth = 0; + for(std::vector::iterator i=mFailures.begin(); i != mFailures.end(); ++i, ++nth) + { + std::cout << std::endl; + std::cout << (nth+1) << ") Failed assertation at " << i->file << ":" + << i->line << ":" << std::endl << " " << i->msg << std::endl; + } + std::cout << std::endl << mResults.size() << " examples, " << mFailures.size() << " failures" << std::endl; + + } + + + + void ProgressWriter::addSpecResult(SpecResult r) { + SpecWriter::addSpecResult(r); + switch(r.type) { + case SpecResult::PASSED: + std::cout << "."; + break; + case SpecResult::FAILED: + std::cout << "F"; + break; + case SpecResult::ERRORED: + std::cout << "E"; + break; + } + std::cout << std::flush; + } + + + + void SpecdocWriter::startGroup(std::string group, std::string description) { + std::cout << group << ": " << description << std::endl; + } + + + void SpecdocWriter::addSpecResult(SpecResult r) { + SpecWriter::addSpecResult(r); + size_t nth = mFailures.size(); + std::cout << "- " << r.test; + switch(r.type) { + case SpecResult::PASSED: + std::cout << " [OK]"; + break; + case SpecResult::FAILED: + std::cout << " [FAILED - " << nth << "]"; + break; + case SpecResult::ERRORED: + std::cout << " [ERROR - "<< nth <<"]"; + break; + } + std::cout << std::endl; + } + + + + + class spec_failure {}; + + + + SpecBase::SpecBase() : mWriter(NULL), mName(NULL), + mFailed(false), mLastFailed(false), mError(false), mExecutionPoint(0), mContinuePoint(0) + { + SpecRunner::getInstance().add(this); + } + + + SpecBase::~SpecBase() { + + } + + + bool SpecBase::startSpec(const char* name) + { + endSpec(); + + mExecutionPoint++; + if(mExecutionPoint <= mContinuePoint) return false; + mContinuePoint++; + + mName = name; + return true; + } + + + void SpecBase::endSpec() + { + if(!mName) return; + + SpecResult r; + r.group = getGroup(); + r.description = getDescription(); + r.type = SpecResult::PASSED; + if(mLastFailed) r.type = SpecResult::FAILED; + if(mError) r.type = SpecResult::ERRORED; + r.test = mName; + mWriter->addSpecResult( r ); + + mName = NULL; + } + + + void SpecBase::should_test(bool value, const char* message, const char* file, int line) { + mLastFailed=false; + if(!value) { + mWriter->addFailedAssertation(message, file, line); + mLastFailed = mFailed = true; + throw spec_failure(); + } + } + + + void SpecBase::error(std::string msg) { + mWriter->addFailedAssertation(msg, "exception", 0); + mLastFailed = true; + mFailed = true; + mError = true; + } + + bool SpecBase::done() { + if( mError ) { + mError = false; + return false; + } + return true; + } + + + SpecRunner::SpecRunner() {} + SpecRunner::~SpecRunner() { } + + SpecRunner& SpecRunner::getInstance() { + static SpecRunner* instance = NULL; + if( instance == NULL ) { + instance = new SpecRunner; + } + return *instance; + } + + + bool SpecRunner::run(SpecWriter& writer, const std::string subset) { + bool success = true; + + writer.start(); + std::vector::iterator i = mSpecs.begin(); + for(; i != mSpecs.end(); ++i) { + SpecBase *b = *i; + if( b->getGroup().find(subset, 0) == std::string::npos ) continue; + b->mContinuePoint = 0; + b->setWriter(&writer); + writer.startGroup( b->getGroup(), b->getDescription() ); + do { + b->mExecutionPoint = 0; + try { + b->specify(); + } catch(spec_failure& e) { + b->mError=true; + } catch( std::exception& e) { + b->error(e.what()); + } catch( ... ) { + b->error("unknown exception"); + } + b->endSpec(); + + } while( !b->done() ); + + success = success && b->isSuccessful(); + + } + writer.stop(); + + return success; + } + + +} + + + + diff --git a/3rdparty/vectorial/spec/spec.h b/3rdparty/vectorial/spec/spec.h new file mode 100644 index 0000000..d5644d5 --- /dev/null +++ b/3rdparty/vectorial/spec/spec.h @@ -0,0 +1,217 @@ +/* Specific - Minimal C++ spec framework. + + +The zlib/libpng License + + +Copyright (c) 2008 Mikko Lehtonen + +This software is provided 'as-is', without any express or implied +warranty. In no event will the authors be held liable for any damages +arising from the use of this software. + +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it +freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + + 3. This notice may not be removed or altered from any source + distribution. +*/ + + +#ifndef SPECIFIC_SPEC_H +#define SPECIFIC_SPEC_H + +#include +#include +#include +#include + +namespace specific { + + + class SpecResult { + public: + typedef enum { + PASSED, + FAILED, + ERRORED + } Type; + + Type type; + + std::string group; + std::string description; + std::string test; + }; + + + class SpecFailure { + public: + SpecFailure(std::string amsg, const char* afile, int aline) + : msg(amsg), file(afile), line(aline) { } + std::string msg; + const char* file; + int line; + }; + + + class SpecWriter { + public: + std::vector mResults; + std::vector mFailures; + SpecWriter() {} + virtual ~SpecWriter() {} + virtual void startGroup(std::string group, std::string description); + virtual void addFailedAssertation(std::string msg, const char *file, int line); + virtual void addSpecResult(SpecResult r); + virtual void start(); + virtual void stop(); + }; + + + class ProgressWriter : public SpecWriter { + public: + void addSpecResult(SpecResult r); + }; + + + + class SpecdocWriter : public SpecWriter { + public: + void startGroup(std::string group, std::string description); + void addSpecResult(SpecResult r); + }; + + + + template std::string inspect(const T& value) { + std::stringstream ss; + ss << value; + return ss.str(); + } + + + class SpecBase { + public: + SpecBase(); + virtual ~SpecBase(); + + virtual void specify() = 0; + + void setWriter(SpecWriter* w) { mWriter = w; } + + bool startSpec(const char* name); + void endSpec(); + + void should_test(bool value, const char* message, const char* file, int line); + + template void should_equal_template(const T1& a, const T2& b, const char* file, int line) { + std::stringstream ss; + ss << "`" << ::specific::inspect(a) << "'" << " == " << "`" << ::specific::inspect(b) << "'"; + should_test( a == b, ss.str().c_str(), file, line); + } + + template void should_not_equal_template(const T1& a, const T2& b, const char* file, int line) { + std::stringstream ss; + ss << "`" << ::specific::inspect(a) << "'" << " != " << "`" << ::specific::inspect(b) << "'"; + should_test( a != b, ss.str().c_str(), file, line); + } + + + + virtual std::string getGroup() = 0; + virtual std::string getDescription() = 0; + + bool isSuccessful() { return !mFailed; } + + bool done(); + + void error(std::string msg); + + SpecWriter* mWriter; + const char* mName; + bool mFailed; + bool mLastFailed; + bool mError; + int mExecutionPoint; + int mContinuePoint; + char *mFile; + std::string mErrorMessage; + int mLine; + }; + + + class SpecRunner { + public: + static SpecRunner& getInstance(); + void add(SpecBase* spec) { mSpecs.push_back( spec ); } + bool run(SpecWriter& writer, const std::string subset = ""); + private: + + std::vector mSpecs; + + SpecRunner(); + ~SpecRunner(); + }; + + #define SPEC_UNIQUE_NAME3(x,y) x##y + #define SPEC_UNIQUE_NAME2(x,y) SPEC_UNIQUE_NAME3(x,y) + + #define SPEC_NAME(x) SPEC_UNIQUE_NAME2(SPEC_##x, SPEC_UNIQUE_NAME2(_startingOnLine, __LINE__) ) + + + #define describe(group, description) \ + class SPEC_NAME(group) : public specific::SpecBase \ + { \ + public: \ + void specify(); \ + std::string getGroup() { return #group; } \ + std::string getDescription() { return description; } \ + }; \ + static SPEC_NAME(group) SPEC_UNIQUE_NAME2(SPEC_NAME(group), _instance); \ + void SPEC_NAME(group)::specify() + + + #define it(description) if(startSpec(description)) + + + // Matchers + #define should_be_true(a) should_test(a, #a, __FILE__, __LINE__) + #define should_be_false(a) should_be_true( !a ) + + #ifndef SPECIFIC_NO_OSTREAM + #define should_equal(a, b) should_equal_template( a,b, __FILE__, __LINE__ ) + #define should_not_equal(a, b) should_not_equal_template( a,b, __FILE__, __LINE__ ) + #else + #define should_equal(a, b) should_be_true( (a) == (b) ) + #define should_not_equal(a, b) should_be_true( (a) != (b) ) + #endif + + #define should_throw(code, what) \ + do { \ + bool _thrown = false; \ + try { \ + code ; \ + } catch(what& e) { \ + _thrown = true; \ + } \ + should_test(_thrown, "should throw exception " #what, __FILE__, __LINE__); \ + } while(0) + + + +} + + + +#endif /* Include guard */ + diff --git a/3rdparty/vectorial/spec/spec_helper.h b/3rdparty/vectorial/spec/spec_helper.h new file mode 100644 index 0000000..cb964d7 --- /dev/null +++ b/3rdparty/vectorial/spec/spec_helper.h @@ -0,0 +1,215 @@ +#ifndef VECTORIAL_SPEC_HELPER_H +#define VECTORIAL_SPEC_HELPER_H + +#define VECTORIAL_OSTREAM + +#include "spec.h" + +#include "vectorial/vectorial.h" + +#ifdef VECTORIAL_HAVE_SIMD2F +#include "vectorial/simd2f.h" +#endif + +#include +#include +#include + +#define should_be_close_to(a,b,tolerance) should_be_close_to_(this, a,b,tolerance,__FILE__,__LINE__) +#define should_be_equal_simd4f( a, b, tolerance) should_be_equal_simd4f_(this, a,b,tolerance,__FILE__,__LINE__) +#define should_be_equal_simd2f( a, b, tolerance) should_be_equal_simd2f_(this, a,b,tolerance,__FILE__,__LINE__) +#define should_be_equal_vec4f( a, b, tolerance) should_be_equal_vec4f_(this, a,b,tolerance,__FILE__,__LINE__) +#define should_be_equal_vec3f( a, b, tolerance) should_be_equal_vec3f_(this, a,b,tolerance,__FILE__,__LINE__) +#define should_be_equal_vec2f( a, b, tolerance) should_be_equal_vec2f_(this, a,b,tolerance,__FILE__,__LINE__) + +#define should_be_equal_simd4x4f( a, b, tolerance) should_be_equal_simd4x4f_(this, a,b,tolerance,__FILE__,__LINE__) + +#define should_be_equal_mat4f( a, b, tolerance) should_be_equal_mat4f_(this, a,b,tolerance,__FILE__,__LINE__) + +// Based on: +// http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm +// +static inline bool compare_floats(float A, float B, int maxUlps) +{ + // Make sure maxUlps is non-negative and small enough that the + // default NAN won't compare as equal to anything. + // assert(maxUlps > 0 && maxUlps < 4 * 1024 * 1024); + union { + float f; + int i; + } f2iA, f2iB; + f2iA.f = A; + f2iB.f = B; + + int aInt = f2iA.i; +// int aInt = *(int*)&A; + // Make aInt lexicographically ordered as a twos-complement int + if (aInt < 0) + aInt = 0x80000000 - aInt; + // Make bInt lexicographically ordered as a twos-complement int + int bInt = f2iB.i; +// int bInt = *(int*)&B; + if (bInt < 0) + bInt = 0x80000000 - bInt; + int intDiff = abs(aInt - bInt); + if (intDiff <= maxUlps) + return true; + return false; +} + + + + + + + + + +static inline void should_be_close_to_(specific::SpecBase *spec, float a, float b, int tolerance, const char *file, int line) { + + bool equal=true; + if( !compare_floats(a,b,tolerance) ) equal = false; + + std::stringstream ss; + ss << a << " == " << b << " (with tolerance of " << tolerance << ")"; + spec->should_test(equal, ss.str().c_str(), file, line); + + +} + +#ifdef VECTORIAL_HAVE_SIMD2F +static inline void should_be_equal_simd2f_(specific::SpecBase *spec, const simd2f& a, const simd2f& b, int tolerance, const char *file, int line) { + + bool equal=true; + if( !compare_floats( simd2f_get_x(a), simd2f_get_x(b), tolerance) ) equal = false; + if( !compare_floats( simd2f_get_y(a), simd2f_get_y(b), tolerance) ) equal = false; + + std::stringstream ss; + ss << a << " == " << b << " (with tolerance of " << tolerance << ")"; + spec->should_test(equal, ss.str().c_str(), file, line); + +} +#endif + +static inline void should_be_equal_simd4f_(specific::SpecBase *spec, const simd4f& a, const simd4f& b, int tolerance, const char *file, int line) { + + bool equal=true; + if( !compare_floats( simd4f_get_x(a), simd4f_get_x(b), tolerance) ) equal = false; + if( !compare_floats( simd4f_get_y(a), simd4f_get_y(b), tolerance) ) equal = false; + if( !compare_floats( simd4f_get_z(a), simd4f_get_z(b), tolerance) ) equal = false; + if( !compare_floats( simd4f_get_w(a), simd4f_get_w(b), tolerance) ) equal = false; + + std::stringstream ss; + ss << a << " == " << b << " (with tolerance of " << tolerance << ")"; + spec->should_test(equal, ss.str().c_str(), file, line); + + +} + +static inline void should_be_equal_vec4f_(specific::SpecBase *spec, const vectorial::vec4f& a, const vectorial::vec4f& b, int tolerance, const char *file, int line) { + + bool equal=true; + if( !compare_floats( a.x(), b.x(), tolerance) ) equal = false; + if( !compare_floats( a.y(), b.y(), tolerance) ) equal = false; + if( !compare_floats( a.z(), b.z(), tolerance) ) equal = false; + if( !compare_floats( a.w(), b.w(), tolerance) ) equal = false; + + std::stringstream ss; + ss << a << " == " << b << " (with tolerance of " << tolerance << ")"; + spec->should_test(equal, ss.str().c_str(), file, line); + + +} + +static inline void should_be_equal_vec3f_(specific::SpecBase *spec, const vectorial::vec3f& a, const vectorial::vec3f& b, int tolerance, const char *file, int line) { + + bool equal=true; + if( !compare_floats( a.x(), b.x(), tolerance) ) equal = false; + if( !compare_floats( a.y(), b.y(), tolerance) ) equal = false; + if( !compare_floats( a.z(), b.z(), tolerance) ) equal = false; + + std::stringstream ss; + ss << a << " == " << b << " (with tolerance of " << tolerance << ")"; + spec->should_test(equal, ss.str().c_str(), file, line); + + +} + +static inline void should_be_equal_vec2f_(specific::SpecBase *spec, const vectorial::vec2f& a, const vectorial::vec2f& b, int tolerance, const char *file, int line) { + + bool equal=true; + if( !compare_floats( a.x(), b.x(), tolerance) ) equal = false; + if( !compare_floats( a.y(), b.y(), tolerance) ) equal = false; + + std::stringstream ss; + ss << a << " == " << b << " (with tolerance of " << tolerance << ")"; + spec->should_test(equal, ss.str().c_str(), file, line); + + +} + + + +static inline void should_be_equal_simd4x4f_(specific::SpecBase *spec, const simd4x4f& a, const simd4x4f& b, int tolerance, const char *file, int line) { + + bool equal=true; + if( !compare_floats( simd4f_get_x(a.x), simd4f_get_x(b.x), tolerance) ) equal = false; + if( !compare_floats( simd4f_get_y(a.x), simd4f_get_y(b.x), tolerance) ) equal = false; + if( !compare_floats( simd4f_get_z(a.x), simd4f_get_z(b.x), tolerance) ) equal = false; + if( !compare_floats( simd4f_get_w(a.x), simd4f_get_w(b.x), tolerance) ) equal = false; + + if( !compare_floats( simd4f_get_x(a.y), simd4f_get_x(b.y), tolerance) ) equal = false; + if( !compare_floats( simd4f_get_y(a.y), simd4f_get_y(b.y), tolerance) ) equal = false; + if( !compare_floats( simd4f_get_z(a.y), simd4f_get_z(b.y), tolerance) ) equal = false; + if( !compare_floats( simd4f_get_w(a.y), simd4f_get_w(b.y), tolerance) ) equal = false; + + if( !compare_floats( simd4f_get_x(a.z), simd4f_get_x(b.z), tolerance) ) equal = false; + if( !compare_floats( simd4f_get_y(a.z), simd4f_get_y(b.z), tolerance) ) equal = false; + if( !compare_floats( simd4f_get_z(a.z), simd4f_get_z(b.z), tolerance) ) equal = false; + if( !compare_floats( simd4f_get_w(a.z), simd4f_get_w(b.z), tolerance) ) equal = false; + + if( !compare_floats( simd4f_get_x(a.w), simd4f_get_x(b.w), tolerance) ) equal = false; + if( !compare_floats( simd4f_get_y(a.w), simd4f_get_y(b.w), tolerance) ) equal = false; + if( !compare_floats( simd4f_get_z(a.w), simd4f_get_z(b.w), tolerance) ) equal = false; + if( !compare_floats( simd4f_get_w(a.w), simd4f_get_w(b.w), tolerance) ) equal = false; + + std::stringstream ss; + ss << a << " == " << b << " (with tolerance of " << tolerance << ")"; + spec->should_test(equal, ss.str().c_str(), file, line); + + +} + +static inline void should_be_equal_mat4f_(specific::SpecBase *spec, const vectorial::mat4f& a, const vectorial::mat4f& b, int tolerance, const char *file, int line) { + + bool equal=true; + if( !compare_floats( simd4f_get_x(a.value.x), simd4f_get_x(b.value.x), tolerance) ) equal = false; + if( !compare_floats( simd4f_get_y(a.value.x), simd4f_get_y(b.value.x), tolerance) ) equal = false; + if( !compare_floats( simd4f_get_z(a.value.x), simd4f_get_z(b.value.x), tolerance) ) equal = false; + if( !compare_floats( simd4f_get_w(a.value.x), simd4f_get_w(b.value.x), tolerance) ) equal = false; + + if( !compare_floats( simd4f_get_x(a.value.y), simd4f_get_x(b.value.y), tolerance) ) equal = false; + if( !compare_floats( simd4f_get_y(a.value.y), simd4f_get_y(b.value.y), tolerance) ) equal = false; + if( !compare_floats( simd4f_get_z(a.value.y), simd4f_get_z(b.value.y), tolerance) ) equal = false; + if( !compare_floats( simd4f_get_w(a.value.y), simd4f_get_w(b.value.y), tolerance) ) equal = false; + + if( !compare_floats( simd4f_get_x(a.value.z), simd4f_get_x(b.value.z), tolerance) ) equal = false; + if( !compare_floats( simd4f_get_y(a.value.z), simd4f_get_y(b.value.z), tolerance) ) equal = false; + if( !compare_floats( simd4f_get_z(a.value.z), simd4f_get_z(b.value.z), tolerance) ) equal = false; + if( !compare_floats( simd4f_get_w(a.value.z), simd4f_get_w(b.value.z), tolerance) ) equal = false; + + if( !compare_floats( simd4f_get_x(a.value.w), simd4f_get_x(b.value.w), tolerance) ) equal = false; + if( !compare_floats( simd4f_get_y(a.value.w), simd4f_get_y(b.value.w), tolerance) ) equal = false; + if( !compare_floats( simd4f_get_z(a.value.w), simd4f_get_z(b.value.w), tolerance) ) equal = false; + if( !compare_floats( simd4f_get_w(a.value.w), simd4f_get_w(b.value.w), tolerance) ) equal = false; + + std::stringstream ss; + ss << a << " == " << b << " (with tolerance of " << tolerance << " ulps)"; + spec->should_test(equal, ss.str().c_str(), file, line); + + +} + + + +#endif diff --git a/3rdparty/vectorial/spec/spec_main.cpp b/3rdparty/vectorial/spec/spec_main.cpp new file mode 100644 index 0000000..b3031a8 --- /dev/null +++ b/3rdparty/vectorial/spec/spec_main.cpp @@ -0,0 +1,55 @@ +/* Specific - Minimal C++ spec framework. + + +The zlib/libpng License + + +Copyright (c) 2008 Mikko Lehtonen + +This software is provided 'as-is', without any express or implied +warranty. In no event will the authors be held liable for any damages +arising from the use of this software. + +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it +freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + + 3. This notice may not be removed or altered from any source + distribution. +*/ + + +#include "spec.h" +#include + +int main(int argc, char *argv[]) +{ + + std::string subset(""); + + specific::ProgressWriter progressWriter; + specific::SpecdocWriter specdocWriter; + specific::SpecWriter* writer = &progressWriter; + + for(size_t i = 1; i < size_t(argc); ++i) { + if( std::string("-s") == argv[i] ) { + writer = &specdocWriter; + } else { + subset = argv[i]; + } + } + + + bool success = specific::SpecRunner::getInstance().run(*writer, subset); + + return success ? EXIT_SUCCESS : EXIT_FAILURE; +} + diff --git a/3rdparty/vectorial/spec/spec_mat4f.cpp b/3rdparty/vectorial/spec/spec_mat4f.cpp new file mode 100644 index 0000000..f54cb6b --- /dev/null +++ b/3rdparty/vectorial/spec/spec_mat4f.cpp @@ -0,0 +1,29 @@ +#include "spec_helper.h" +#include +using vectorial::vec4f; +using vectorial::mat4f; + +const int epsilon = 1; + +describe(mat4f, "constructing") { + it("should have default constructor that does nothing..") { + mat4f x; + } + + it("should have constructor that constructs from four vec4") { + mat4f x( vec4f(1,2,3,4), vec4f(5,6,7,8), vec4f(9,10,11,12), vec4f(13,14,15,16) ); + + // octave mat4f: [1,5,9,13 ; 2,6,10,14 ; 3,7,11,15 ; 4,8,12,16 ] + should_be_equal_mat4f(x, simd4x4f_create(simd4f_create(1.000000000000000f, 2.000000000000000f, 3.000000000000000f, 4.000000000000000f), simd4f_create(5.000000000000000f, 6.000000000000000f, 7.000000000000000f, 8.000000000000000f), simd4f_create(9.000000000000000f, 10.000000000000000f, 11.000000000000000f, 12.000000000000000f), simd4f_create(13.000000000000000f, 14.000000000000000f, 15.000000000000000f, 16.000000000000000f)), epsilon ); + } + + it("should have static function to create identity matrix") { + + mat4f x = mat4f::identity(); + + // octave mat4f: [1,0,0,0;0,1,0,0;0,0,1,0;0,0,0,1] + should_be_equal_mat4f(x, simd4x4f_create(simd4f_create(1.000000000000000f, 0.000000000000000f, 0.000000000000000f, 0.000000000000000f), simd4f_create(0.000000000000000f, 1.000000000000000f, 0.000000000000000f, 0.000000000000000f), simd4f_create(0.000000000000000f, 0.000000000000000f, 1.000000000000000f, 0.000000000000000f), simd4f_create(0.000000000000000f, 0.000000000000000f, 0.000000000000000f, 1.000000000000000f)), epsilon ); + } + +} + diff --git a/3rdparty/vectorial/spec/spec_simd2f.cpp b/3rdparty/vectorial/spec/spec_simd2f.cpp new file mode 100644 index 0000000..5a25aa1 --- /dev/null +++ b/3rdparty/vectorial/spec/spec_simd2f.cpp @@ -0,0 +1,242 @@ + +#include "spec_helper.h" + +const int epsilon = 1; + +#ifdef VECTORIAL_HAVE_SIMD2F + +describe(simd2f, "sanity") { + it("VECTORIAL_SIMD_TYPE should be defined to a string") { + std::cout << "Simd type: " << VECTORIAL_SIMD_TYPE << std::endl; + } +} + +describe(simd2f, "creating") { + + it("should be possible to create with simd2f_create") { + + simd2f x = simd2f_create(1, 2); + + should_be_close_to( simd2f_get_x(x), 1, epsilon); + should_be_close_to( simd2f_get_y(x), 2, epsilon); + + // octave simd2f: [1,2] + should_be_equal_simd2f(x, simd2f_create(1.000000000000000f, 2.000000000000000f), epsilon ); + + } + + it("should have simd2f_zero for zero vector") { + + simd2f x = simd2f_zero(); + + // octave simd2f: [0,0] + should_be_equal_simd2f(x, simd2f_create(0.000000000000000f, 0.000000000000000f), epsilon ); + } + + +} +#ifdef _MSC_VER +#include +#else +#include +#endif + +#define unaligned_mem(n) ((float*)((unsigned char*)alloca(sizeof(float)*n+4)+4)) + +describe(simd2f, "utilities") { + + it("should have simd2f_uload2 for loading two float values from float an unaligned array into simd2f") { + float *f = unaligned_mem(2); + f[0] = 1; + f[1] = 2; + simd2f x = simd2f_uload2(f); + // octave simd2f: [1,2] + should_be_equal_simd2f(x, simd2f_create(1.000000000000000f, 2.000000000000000f), epsilon ); + } + + it("should have simd2f_ustore2 for storing two float values from simd2f to an unaligned array") { + float *f = unaligned_mem(2); + f[0] = -1; + f[1] = -1; + simd2f a = simd2f_create(1,2); + simd2f_ustore2(a, f); + should_be_close_to(f[0], 1, epsilon); + should_be_close_to(f[1], 2, epsilon); + } + + + it("should have simd2f_splat that expands a single scalar to all elements") { + simd2f x = simd2f_splat(42); + // octave simd2f: [42,42] + should_be_equal_simd2f(x, simd2f_create(42.000000000000000f, 42.000000000000000f), epsilon ); + } + + it("should have simd2f_splat_x,y splatting of an element") { + simd2f a = simd2f_create(1,2); + + simd2f x; + + x = simd2f_splat_x(a); + // octave simd2f: [1,1] + should_be_equal_simd2f(x, simd2f_create(1.000000000000000f, 1.000000000000000f), epsilon ); + + x = simd2f_splat_y(a); + // octave simd2f: [2,2] + should_be_equal_simd2f(x, simd2f_create(2.000000000000000f, 2.000000000000000f), epsilon ); + + } + +#if 0 + it("should have simd2f_sum that adds elements") { + simd2f a = simd2f_create(1,2); + simd2f x = simd2f_sum(a); + // octave simd2f: [sum([1,2]), sum([1,2,3,4])] + should_be_equal_simd2f(x, simd2f_create(3.000000000000000f, 10.000000000000000f), epsilon ); + + } +#endif + + it("should have simd2f_reciprocal") { + simd2f a = simd2f_create(0.00001f, 2.00001f); + simd2f x = simd2f_reciprocal(a); + // octave simd2f: 1 ./ [0.00001, 2.00001] + should_be_equal_simd2f(x, simd2f_create(99999.999999999985448f, 0.499997500012500f), epsilon ); + } + + it("should have simd2f_sqrt") { + simd2f a = simd2f_create(0.00001f, 2.00001f); + simd2f x = simd2f_sqrt(a); + // octave simd2f: sqrt([0.00001, 2.00001]) + should_be_equal_simd2f(x, simd2f_create(0.003162277660168f, 1.414217097902582f), epsilon ); + + x = simd2f_sqrt( simd2f_create(0.0f, 0.0f) ); + // octave simd2f: sqrt([0, 0]) + should_be_equal_simd2f(x, simd2f_create(0.000000000000000f, 0.000000000000000f), epsilon ); + } + + it("should have simd2f_rsqrt for reciprocal of square-root") { + simd2f a = simd2f_create(0.00001f, 2.00001f); + simd2f x = simd2f_rsqrt(a); + const int epsilon = 4; // Grant larger error + // octave simd2f: 1 ./ sqrt([0.00001, 2.00001]) + should_be_equal_simd2f(x, simd2f_create(316.227766016837904f, 0.707105013426224f), epsilon ); + } + +} + +describe(simd2f, "arithmetic with another simd2f") { + + it("should have simd2f_add for component-wise addition") { + simd2f a = simd2f_create(1,2); + simd2f b = simd2f_create(10,20); + + simd2f x = simd2f_add(a,b); + // octave simd2f: [1,2] + [10,20] + should_be_equal_simd2f(x, simd2f_create(11.000000000000000f, 22.000000000000000f), epsilon ); + } + + it("should have simd2f_sub for component-wise subtraction") { + simd2f a = simd2f_create(1,2); + simd2f b = simd2f_create(10,20); + + simd2f x = simd2f_sub(b,a); + // octave simd2f: [10,20] - [1,2] + should_be_equal_simd2f(x, simd2f_create(9.000000000000000f, 18.000000000000000f), epsilon ); + } + + it("should have simd2f_mul for component-wise multiply") { + simd2f a = simd2f_create(1,2); + simd2f b = simd2f_create(10,20); + + simd2f x = simd2f_mul(a,b); + // octave simd2f: [1,2] .* [10,20] + should_be_equal_simd2f(x, simd2f_create(10.000000000000000f, 40.000000000000000f), epsilon ); + } + + it("should have simd2f_div for component-wise division") { + simd2f a = simd2f_create(1,2); + simd2f b = simd2f_create(10,20); + + simd2f x = simd2f_div(b,a); + // octave simd2f: [10,20] ./ [1,2] + should_be_equal_simd2f(x, simd2f_create(10.000000000000000f, 10.000000000000000f), epsilon ); + } + + it("should have simd2f_madd for multiply-add") { + simd2f a = simd2f_create(1,2); + simd2f b = simd2f_create(100,100); + simd2f c = simd2f_create(6,7); + + simd2f x = simd2f_madd(a,b,c); + // octave simd2f: [1,2] .* [100,100] .+ [6,7] + should_be_equal_simd2f(x, simd2f_create(106.000000000000000f, 207.000000000000000f), epsilon ); + + } + +} + + +describe(simd2f, "vector math") { + + it("should have simd2f_dot2 for two component dot product") { + simd2f a = simd2f_create(1,2); + simd2f b = simd2f_create(10,20); + + simd2f x = simd2f_dot2(a,b); + // octave simd2f: [dot([1, 2], [10, 20]),dot([1, 2], [10, 20])] + should_be_equal_simd2f(x, simd2f_create(50.000000000000000f, 50.000000000000000f), epsilon ); + } + + it("should have simd2f_length2 for two component vector length") { + simd2f a = simd2f_create(1,2); + simd2f x = simd2f_length2(a); + // octave simd2f: [norm([1,2]),norm([1,2])] + should_be_equal_simd2f(x, simd2f_create(2.236067977499790f, 2.236067977499790f), epsilon ); + + } + + + it("should have simd2f_length2_squared for two component squared vector length") { + simd2f a = simd2f_create(1,2); + simd2f x = simd2f_length2_squared(a); + // octave simd2f: ([dot([1,2], [1,2]), dot([1,2], [1,2])]) + should_be_equal_simd2f(x, simd2f_create(5.000000000000000f, 5.000000000000000f), epsilon ); + + } + + it("should have simd2f_normalize2 for normalizing two component vector to unit length") { + simd2f a = simd2f_create(1,2); + simd2f x = simd2f_normalize2(a); + // octave simd2f: [1,2] / norm([1,2]) + should_be_equal_simd2f(x, simd2f_create(0.447213595499958f, 0.894427190999916f), epsilon ); + } + +} + + +describe(simd2f, "min-max") { + + it("should have simd2f_min for choosing minimum elements") { + simd2f a = simd2f_create(1.0f, 2.0f); + simd2f b = simd2f_create(2.0f, -2.0f); + + simd2f x = simd2f_min(a,b); + should_be_equal_simd2f(x, simd2f_create(1.0f, -2.0f), epsilon); + + } + + it("should have simd2f_max for choosing maximum elements") { + simd2f a = simd2f_create(1.0f, 2.0f); + simd2f b = simd2f_create(2.0f, -2.0f); + + simd2f x = simd2f_max(a,b); + should_be_equal_simd2f(x, simd2f_create(2.0f, 2.0f), epsilon); + + } + +} + + + +#endif + diff --git a/3rdparty/vectorial/spec/spec_simd4f.cpp b/3rdparty/vectorial/spec/spec_simd4f.cpp new file mode 100644 index 0000000..f3b1dd7 --- /dev/null +++ b/3rdparty/vectorial/spec/spec_simd4f.cpp @@ -0,0 +1,457 @@ + +#include "spec_helper.h" + +const int epsilon = 1; + +describe(simd4f, "sanity") { + it("VECTORIAL_SIMD_TYPE should be defined to a string") { + std::cout << "Simd type: " << VECTORIAL_SIMD_TYPE << std::endl; + } +} + +describe(simd4f, "creating") { + + it("should be possible to create with simd4f_create") { + + simd4f x = simd4f_create(1, 2, 3, 4); + + should_be_close_to( simd4f_get_x(x), 1, epsilon); + should_be_close_to( simd4f_get_y(x), 2, epsilon); + should_be_close_to( simd4f_get_z(x), 3, epsilon); + should_be_close_to( simd4f_get_w(x), 4, epsilon); + + // octave simd4f: [1,2,3,4] + should_be_equal_simd4f(x, simd4f_create(1.000000000000000f, 2.000000000000000f, 3.000000000000000f, 4.000000000000000f), epsilon ); + + } + + it("should have simd4f_zero for zero vector") { + + simd4f x = simd4f_zero(); + + // octave simd4f: [0,0,0,0] + should_be_equal_simd4f(x, simd4f_create(0.000000000000000f, 0.000000000000000f, 0.000000000000000f, 0.000000000000000f), epsilon ); + } + + +} +#ifdef _MSC_VER +#include +#else +#include +#endif + +#define unaligned_mem(n) ((float*)((unsigned char*)alloca(sizeof(float)*n+4)+4)) + +describe(simd4f, "utilities") { + + it("should have simd4f_uload4 for loading four float values from an unaligned float array into simd4f") { + float *f = unaligned_mem(4); + f[0] = 1; + f[1] = 2; + f[2] = 3; + f[3] = 4; + simd4f x = simd4f_uload4(f); + // octave simd4f: [1,2,3,4] + should_be_equal_simd4f(x, simd4f_create(1.000000000000000f, 2.000000000000000f, 3.000000000000000f, 4.000000000000000f), epsilon ); + } + + it("should have simd4f_uload3 for loading three float values from an unaligned float array into simd4f") { + float *f = unaligned_mem(3); + f[0] = 1; + f[1] = 2; + f[2] = 3; + simd4f x = simd4f_uload3(f); + // octave simd4f: [1,2,3] + should_be_equal_simd4f(x, simd4f_create(1.000000000000000f, 2.000000000000000f, 3.000000000000000f, 0.0f), epsilon ); + } + + it("should have simd4f_uload2 for loading two float values from float an unaligned array into simd4f") { + float *f = unaligned_mem(2); + f[0] = 1; + f[1] = 2; + simd4f x = simd4f_uload2(f); + // octave simd4f: [1,2] + should_be_equal_simd4f(x, simd4f_create(1.000000000000000f, 2.000000000000000f, 0.0f, 0.0f), epsilon ); + } + + + it("should have simd4f_ustore4 for storing four float values from simd4f to an unaligned array") { + float *f = unaligned_mem(4); + f[0] = -1; + f[1] = -1; + f[2] = -1; + f[3] = -1; + simd4f a = simd4f_create(1,2,3,4); + simd4f_ustore4(a, f); + should_be_close_to(f[0], 1, epsilon); + should_be_close_to(f[1], 2, epsilon); + should_be_close_to(f[2], 3, epsilon); + should_be_close_to(f[3], 4, epsilon); + } + + it("should have simd4f_ustore3 for storing three float values from simd4f to an unaligned array") { + float *f = unaligned_mem(3); + f[0] = -1; + f[1] = -1; + f[2] = -1; + simd4f a = simd4f_create(1,2,3,4); + simd4f_ustore3(a, f); + should_be_close_to(f[0], 1, epsilon); + should_be_close_to(f[1], 2, epsilon); + should_be_close_to(f[2], 3, epsilon); + } + + it("should have simd4f_ustore2 for storing two float values from simd4f to an unaligned array") { + float *f = unaligned_mem(2); + f[0] = -1; + f[1] = -1; + simd4f a = simd4f_create(1,2,3,4); + simd4f_ustore2(a, f); + should_be_close_to(f[0], 1, epsilon); + should_be_close_to(f[1], 2, epsilon); + } + + + + + it("should have simd4f_splat that expands a single scalar to all elements") { + simd4f x = simd4f_splat(42); + // octave simd4f: [42,42,42,42] + should_be_equal_simd4f(x, simd4f_create(42.000000000000000f, 42.000000000000000f, 42.000000000000000f, 42.000000000000000f), epsilon ); + } + + it("should have simd4f_splat_x,y,z,w splatting of an element") { + simd4f a = simd4f_create(1,2,3,4); + + simd4f x; + + x = simd4f_splat_x(a); + // octave simd4f: [1,1,1,1] + should_be_equal_simd4f(x, simd4f_create(1.000000000000000f, 1.000000000000000f, 1.000000000000000f, 1.000000000000000f), epsilon ); + + x = simd4f_splat_y(a); + // octave simd4f: [2,2,2,2] + should_be_equal_simd4f(x, simd4f_create(2.000000000000000f, 2.000000000000000f, 2.000000000000000f, 2.000000000000000f), epsilon ); + + x = simd4f_splat_z(a); + // octave simd4f: [3,3,3,3] + should_be_equal_simd4f(x, simd4f_create(3.000000000000000f, 3.000000000000000f, 3.000000000000000f, 3.000000000000000f), epsilon ); + + x = simd4f_splat_w(a); + // octave simd4f: [4,4,4,4] + should_be_equal_simd4f(x, simd4f_create(4.000000000000000f, 4.000000000000000f, 4.000000000000000f, 4.000000000000000f), epsilon ); + } + + it("should have simd4f_sum that adds elements") { + simd4f a = simd4f_create(1,2,3,4); + simd4f x = simd4f_sum(a); + // octave simd4f: [sum([1,2,3,4]), sum([1,2,3,4]), sum([1,2,3,4]), sum([1,2,3,4])] + should_be_equal_simd4f(x, simd4f_create(10.000000000000000f, 10.000000000000000f, 10.000000000000000f, 10.000000000000000f), epsilon ); + + } + + it("should have simd4f_reciprocal") { + simd4f a = simd4f_create(0.00001f, 2.00001f, 3.0f, 99999999.0f); + simd4f x = simd4f_reciprocal(a); + // octave simd4f: 1 ./ [0.00001, 2.00001, 3.0, 99999999.0] + should_be_equal_simd4f(x, simd4f_create(99999.999999999985448f, 0.499997500012500f, 0.333333333333333f, 0.000000010000000f), epsilon ); + } + + it("should have simd4f_sqrt") { + simd4f a = simd4f_create(0.00001f, 2.00001f, 3.0f, 99999999.0f); + simd4f x = simd4f_sqrt(a); + // octave simd4f: sqrt([0.00001, 2.00001, 3.0, 99999999.0]) + should_be_equal_simd4f(x, simd4f_create(0.003162277660168f, 1.414217097902582f, 1.732050807568877f, 9999.999949999999444f), epsilon ); + + x = simd4f_sqrt( simd4f_create(0.0f, 0.0f, 0.0f, 0.0f) ); + // octave simd4f: sqrt([0, 0, 0, 0]) + should_be_equal_simd4f(x, simd4f_create(0.000000000000000f, 0.000000000000000f, 0.000000000000000f, 0.000000000000000f), epsilon ); + } + + it("should have simd4f_rsqrt for reciprocal of square-root") { + simd4f a = simd4f_create(0.00001f, 2.00001f, 3.0f, 99999999.0f); + simd4f x = simd4f_rsqrt(a); + const int epsilon = 4; // Grant larger error + // octave simd4f: 1 ./ sqrt([0.00001, 2.00001, 3.0, 99999999.0]) + should_be_equal_simd4f(x, simd4f_create(316.227766016837904f, 0.707105013426224f, 0.577350269189626f, 0.000100000000500f), epsilon ); + } + +} + +describe(simd4f, "arithmetic with another simd4f") { + + it("should have simd4f_add for component-wise addition") { + simd4f a = simd4f_create(1,2,3,4); + simd4f b = simd4f_create(10,20,30,40); + + simd4f x = simd4f_add(a,b); + // octave simd4f: [1,2,3,4] + [10,20,30,40] + should_be_equal_simd4f(x, simd4f_create(11.000000000000000f, 22.000000000000000f, 33.000000000000000f, 44.000000000000000f), epsilon ); + } + + it("should have simd4f_sub for component-wise subtraction") { + simd4f a = simd4f_create(1,2,3,4); + simd4f b = simd4f_create(10,20,30,40); + + simd4f x = simd4f_sub(b,a); + // octave simd4f: [10,20,30,40] - [1,2,3,4] + should_be_equal_simd4f(x, simd4f_create(9.000000000000000f, 18.000000000000000f, 27.000000000000000f, 36.000000000000000f), epsilon ); + } + + it("should have simd4f_mul for component-wise multiply") { + simd4f a = simd4f_create(1,2,3,4); + simd4f b = simd4f_create(10,20,30,40); + + simd4f x = simd4f_mul(a,b); + // octave simd4f: [1,2,3,4] .* [10,20,30,40] + should_be_equal_simd4f(x, simd4f_create(10.000000000000000f, 40.000000000000000f, 90.000000000000000f, 160.000000000000000f), epsilon ); + } + + it("should have simd4f_div for component-wise division") { + simd4f a = simd4f_create(1,2,3,4); + simd4f b = simd4f_create(10,20,30,40); + + simd4f x = simd4f_div(b,a); + // octave simd4f: [10,20,30,40] ./ [1,2,3,4] + should_be_equal_simd4f(x, simd4f_create(10.000000000000000f, 10.000000000000000f, 10.000000000000000f, 10.000000000000000f), epsilon ); + } + + it("should have simd4f_madd for multiply-add") { + simd4f a = simd4f_create(1,2,3,4); + simd4f b = simd4f_create(100,100,100,100); + simd4f c = simd4f_create(6,7,8,9); + + simd4f x = simd4f_madd(a,b,c); + // octave simd4f: [1,2,3,4] .* [100,100,100,100] .+ [6,7,8,9] + should_be_equal_simd4f(x, simd4f_create(106.000000000000000f, 207.000000000000000f, 308.000000000000000f, 409.000000000000000f), epsilon ); + + } + +} + + +describe(simd4f, "vector math") { + + it("should have simd4f_dot4 for four component dot product") { + simd4f a = simd4f_create(1,2,3,4); + simd4f b = simd4f_create(10,20,30,40); + + simd4f x = simd4f_dot4(a,b); + // octave simd4f: [dot([1, 2, 3, 4], [10, 20, 30, 40]),dot([1, 2, 3, 4], [10, 20, 30, 40]),dot([1, 2, 3, 4], [10, 20, 30, 40]),dot([1, 2, 3, 4], [10, 20, 30, 40])] + should_be_equal_simd4f(x, simd4f_create(300.000000000000000f, 300.000000000000000f, 300.000000000000000f, 300.000000000000000f), epsilon ); + } + + it("should have simd4f_dot3_scalar for three component dot product returning float") { + simd4f a = simd4f_create(1,2,3,9999); + simd4f b = simd4f_create(10,20,30,-9990); + + float x = simd4f_dot3_scalar(a,b); + // octave float: dot([1, 2, 3], [10, 20, 30]) + should_be_close_to(x, 140.000000000000000f, epsilon ); + } + + it("should have simd4f_dot3 for three component dot product returning simd4f") { + simd4f a = simd4f_create(1,2,3,9999); + simd4f b = simd4f_create(10,20,30,-9990); + + simd4f x = simd4f_dot3(a,b); + // octave simd4f: [dot([1, 2, 3], [10, 20, 30]),dot([1, 2, 3], [10, 20, 30]),dot([1, 2, 3], [10, 20, 30]),dot([1, 2, 3], [10, 20, 30])] + should_be_equal_simd4f(x, simd4f_create(140.000000000000000f, 140.000000000000000f, 140.000000000000000f, 140.000000000000000f), epsilon ); + } + + it("should have simd4f_dot2 for two component dot product") { + simd4f a = simd4f_create(1,2,3,9999); + simd4f b = simd4f_create(10,20,30,-9990); + + simd4f x = simd4f_dot2(a,b); + // octave simd4f: [dot([1, 2], [10, 20]),dot([1, 2], [10, 20]),dot([1, 2], [10, 20]),dot([1, 2], [10, 20])] + should_be_equal_simd4f(x, simd4f_create(50.000000000000000f, 50.000000000000000f, 50.000000000000000f, 50.000000000000000f), epsilon ); + } + + it("should have simd4f_length4 for four component vector length") { + simd4f a = simd4f_create(1,2,-3,9999); + simd4f x = simd4f_length4(a); + // octave simd4f: [norm([1,2,-3,9999]), norm([1,2,-3,9999]), norm([1,2,-3,9999]), norm([1,2,-3,9999])] + should_be_equal_simd4f(x, simd4f_create(9999.000700069982486f, 9999.000700069982486f, 9999.000700069982486f, 9999.000700069982486f), epsilon ); + + } + + it("should have simd4f_length3 for three component vector length") { + simd4f a = simd4f_create(1,2,-3,9999); + simd4f x = simd4f_length3(a); + // octave simd4f: [norm([1,2,-3]), norm([1,2,-3]), norm([1,2,-3]), norm([1,2,-3])] + should_be_equal_simd4f(x, simd4f_create(3.741657386773941f, 3.741657386773941f, 3.741657386773941f, 3.741657386773941f), epsilon ); + + } + + it("should have simd4f_length2 for two component vector length") { + simd4f a = simd4f_create(1,2,-3,9999); + simd4f x = simd4f_length2(a); + // octave simd4f: [norm([1,2]),norm([1,2]),norm([1,2]),norm([1,2])] + should_be_equal_simd4f(x, simd4f_create(2.236067977499790f, 2.236067977499790f, 2.236067977499790f, 2.236067977499790f), epsilon ); + + } + + + it("should have simd4f_length4_squared for four component squared vector length") { + simd4f a = simd4f_create(1,2,-3,9999); + simd4f x = simd4f_length4_squared(a); + // octave simd4f: ([(dot([1,2,-3,9999], [1,2,-3,9999])), (dot([1,2,-3,9999], [1,2,-3,9999])), (dot([1,2,-3,9999], [1,2,-3,9999])), (dot([1,2,-3,9999], [1,2,-3,9999]))]) + should_be_equal_simd4f(x, simd4f_create(99980015.000000000000000f, 99980015.000000000000000f, 99980015.000000000000000f, 99980015.000000000000000f), epsilon ); + + } + + it("should have simd4f_length3_squared for three component squared vector length") { + simd4f a = simd4f_create(1,2,-3,9999); + simd4f x = simd4f_length3_squared(a); + // octave simd4f: ([dot([1,2,-3], [1,2,-3]), dot([1,2,-3], [1,2,-3]), dot([1,2,-3], [1,2,-3]), dot([1,2,-3], [1,2,-3])]) + should_be_equal_simd4f(x, simd4f_create(14.000000000000000f, 14.000000000000000f, 14.000000000000000f, 14.000000000000000f), epsilon ); + + } + + it("should have simd4f_length2_squared for two component squared vector length") { + simd4f a = simd4f_create(1,2,-3,9999); + simd4f x = simd4f_length2_squared(a); + // octave simd4f: ([dot([1,2], [1,2]), dot([1,2], [1,2]), dot([1,2], [1,2]), dot([1,2], [1,2])]) + should_be_equal_simd4f(x, simd4f_create(5.000000000000000f, 5.000000000000000f, 5.000000000000000f, 5.000000000000000f), epsilon ); + + } + + + + it("should have simd4f_cross3 for cross product") { + simd4f a = simd4f_create(1,12,3,-9999); + simd4f b = simd4f_create(5,6,-17, 9999); + + simd4f x = simd4f_cross3(a,b); + // octave simd4f: horzcat( cross( [1,12,3], [5,6,-17] ) , [0] ) + should_be_equal_simd4f(x, simd4f_create(-222.000000000000000f, 32.000000000000000f, -54.000000000000000f, 0.000000000000000f), epsilon ); + + } + + it("should have simd4f_normalize4 for normalizing four const vector to unit length") { + simd4f a = simd4f_create(1,2,3,4); + simd4f x = simd4f_normalize4(a); + // octave simd4f: [1,2,3,4] / norm([1,2,3,4]) + should_be_equal_simd4f(x, simd4f_create(0.182574185835055f, 0.365148371670111f, 0.547722557505166f, 0.730296743340221f), epsilon ); + } + + it("should have simd4f_normalize3 for normalizing three component vector to unit length") { + simd4f a = simd4f_create(1,2,3,0); + simd4f x = simd4f_normalize3(a); + // octave simd4f: [1,2,3,0] / norm([1,2,3]) + should_be_equal_simd4f(x, simd4f_create(0.267261241912424f, 0.534522483824849f, 0.801783725737273f, 0.000000000000000f), epsilon ); + } + + it("should have simd4f_normalize2 for normalizing two component vector to unit length") { + simd4f a = simd4f_create(1,2,0,0); + simd4f x = simd4f_normalize2(a); + // octave simd4f: [1,2,0,0] / norm([1,2]) + should_be_equal_simd4f(x, simd4f_create(0.447213595499958f, 0.894427190999916f, 0.000000000000000f, 0.000000000000000f), epsilon ); + } + + +} + +describe(simd4f, "shuffles and merges") { + + it("should have simd4f_shuffle_wxyz") { + simd4f a = simd4f_create(1,2,3,4); + simd4f x = simd4f_shuffle_wxyz(a); + should_be_equal_simd4f(x, simd4f_create(4,1,2,3), epsilon ); + } + + it("should have simd4f_shuffle_zwxy") { + simd4f a = simd4f_create(1,2,3,4); + simd4f x = simd4f_shuffle_zwxy(a); + should_be_equal_simd4f(x, simd4f_create(3,4,1,2), epsilon ); + } + + it("should have simd4f_shuffle_yzwx") { + simd4f a = simd4f_create(1,2,3,4); + simd4f x = simd4f_shuffle_yzwx(a); + should_be_equal_simd4f(x, simd4f_create(2,3,4,1), epsilon ); + } + + it("should have simd4f_merge_high") { + simd4f a = simd4f_create(1,2,3,4); + simd4f b = simd4f_create(5,6,7,8); + simd4f x = simd4f_merge_high(a,b); + should_be_equal_simd4f(x, simd4f_create(3,4,7,8), epsilon ); + } + +} + +describe(simd4f, "signs") { + + it("should have simd4f_flip_sign_0101 for flipping even elements sign") { + simd4f a = simd4f_create(1,2,3,4); + simd4f x = simd4f_flip_sign_0101(a); + should_be_equal_simd4f(x, simd4f_create(1,-2,3,-4), epsilon ); + } + + it("should have simd4f_flip_sign_1010 for flipping even elements sign") { + simd4f a = simd4f_create(1,2,3,4); + simd4f x = simd4f_flip_sign_1010(a); + should_be_equal_simd4f(x, simd4f_create(-1,2,-3,4), epsilon ); + } + +} + +describe(simd4f, "min-max") { + + it("should have simd4f_min for choosing minimum elements") { + simd4f a = simd4f_create(1.0f, 2.0f, -300000000.0f, -0.000002f); + simd4f b = simd4f_create(2.0f, -2.0f, 300000000.0f, 0.000001f); + + simd4f x = simd4f_min(a,b); + should_be_equal_simd4f(x, simd4f_create(1.0f, -2.0f, -300000000.0f, -0.000002f), epsilon); + + } + + it("should have simd4f_max for choosing maximum elements") { + simd4f a = simd4f_create(1.0f, 2.0f, -300000000.0f, -0.000002f); + simd4f b = simd4f_create(2.0f, -2.0f, 300000000.0f, 0.000001f); + + simd4f x = simd4f_max(a,b); + should_be_equal_simd4f(x, simd4f_create(2.0f, 2.0f, 300000000.0f, 0.000001f), epsilon); + + } + + + +} + + +describe(simd4f, "zeroing") +{ + + it("should have simd4f_zero_w that zeros the last element") + { + const float nan = sqrtf(-1.0f); + simd4f a = simd4f_create(1.0f, 2.0f, 3.0f, 4.0f); + simd4f b = simd4f_create(1.0f, 2.0f, 3.0f, nan); + simd4f x = simd4f_zero_w(a); + should_be_equal_simd4f(x, simd4f_create(1.0f, 2.0f, 3.0f, 0.0f), epsilon); + x = simd4f_zero_w(b); + should_be_equal_simd4f(x, simd4f_create(1.0f, 2.0f, 3.0f, 0.0f), epsilon); + } + + it("should have simd4f_zero_zw that zeros the last element") + { + const float nan = sqrtf(-1.0f); + simd4f a = simd4f_create(1.0f, 2.0f, 3.0f, 4.0f); + simd4f b = simd4f_create(1.0f, 2.0f, nan, nan); + simd4f x = simd4f_zero_zw(a); + should_be_equal_simd4f(x, simd4f_create(1.0f, 2.0f, 0.0f, 0.0f), epsilon); + x = simd4f_zero_zw(b); + should_be_equal_simd4f(x, simd4f_create(1.0f, 2.0f, 0.0f, 0.0f), epsilon); + } + +} + + + + + diff --git a/3rdparty/vectorial/spec/spec_simd4x4f.cpp b/3rdparty/vectorial/spec/spec_simd4x4f.cpp new file mode 100644 index 0000000..fd82397 --- /dev/null +++ b/3rdparty/vectorial/spec/spec_simd4x4f.cpp @@ -0,0 +1,381 @@ +#include "spec_helper.h" + +const int epsilon = 1; + +#ifndef M_PI +#define M_PI 3.141592f +#endif + +describe(simd4x4f, "creating") { + + it("should be possible to create with params") { + + simd4x4f x = simd4x4f_create(simd4f_create(1, 2, 3, 4 ), + simd4f_create(5, 6, 7, 8 ), + simd4f_create(9, 10, 11, 12 ), + simd4f_create(13, 14, 15, 16 )); + + should_be_equal_simd4f( x.x, simd4f_create(1, 2, 3, 4 ) , epsilon); + should_be_equal_simd4f( x.y, simd4f_create(5, 6, 7, 8 ) , epsilon); + should_be_equal_simd4f( x.z, simd4f_create(9, 10, 11, 12 ), epsilon); + should_be_equal_simd4f( x.w, simd4f_create(13, 14, 15, 16 ), epsilon); + + // octave simd4x4f: [1,5,9,13 ; 2,6,10,14 ; 3,7,11,15 ; 4,8,12,16 ] + should_be_equal_simd4x4f(x, simd4x4f_create(simd4f_create(1.000000000000000f, 2.000000000000000f, 3.000000000000000f, 4.000000000000000f), simd4f_create(5.000000000000000f, 6.000000000000000f, 7.000000000000000f, 8.000000000000000f), simd4f_create(9.000000000000000f, 10.000000000000000f, 11.000000000000000f, 12.000000000000000f), simd4f_create(13.000000000000000f, 14.000000000000000f, 15.000000000000000f, 16.000000000000000f)), epsilon ); + + } + + + it("should be possible to set to identity") { + simd4x4f x; + simd4x4f_identity(&x); + + // octave simd4x4f: [1,0,0,0; 0,1,0,0; 0,0,1,0; 0,0,0,1] + should_be_equal_simd4x4f(x, simd4x4f_create(simd4f_create(1.000000000000000f, 0.000000000000000f, 0.000000000000000f, 0.000000000000000f), simd4f_create(0.000000000000000f, 1.000000000000000f, 0.000000000000000f, 0.000000000000000f), simd4f_create(0.000000000000000f, 0.000000000000000f, 1.000000000000000f, 0.000000000000000f), simd4f_create(0.000000000000000f, 0.000000000000000f, 0.000000000000000f, 1.000000000000000f)), epsilon ); + } +} + + +describe(simd4x4f, "loading and storing") { + + it("should be possible to load from array of 16 floats with simd4x4f_uload") { + + simd4x4f x; + float f[16] = {1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16 }; + simd4x4f_uload(&x, f); + + should_be_equal_simd4x4f(x, simd4x4f_create( simd4f_create(1,2,3,4), + simd4f_create(5,6,7,8), + simd4f_create(9,10,11,12), + simd4f_create(13,14,15,16) ), epsilon); + + } + +} + + +describe(simd4x4f, "matrix utility") { + + it("should have simd4x4f_transpose_inplace for transpose") { + + simd4x4f x = simd4x4f_create(simd4f_create(1, 2, 3, 4 ), + simd4f_create(5, 6, 7, 8 ), + simd4f_create(9, 10, 11, 12 ), + simd4f_create(13, 14, 15, 16 )); + + simd4x4f_transpose_inplace(&x); + + // octave simd4x4f: transpose([1,5,9,13 ; 2,6,10,14 ; 3,7,11,15 ; 4,8,12,16 ]) + should_be_equal_simd4x4f(x, simd4x4f_create(simd4f_create(1.000000000000000f, 5.000000000000000f, 9.000000000000000f, 13.000000000000000f), simd4f_create(2.000000000000000f, 6.000000000000000f, 10.000000000000000f, 14.000000000000000f), simd4f_create(3.000000000000000f, 7.000000000000000f, 11.000000000000000f, 15.000000000000000f), simd4f_create(4.000000000000000f, 8.000000000000000f, 12.000000000000000f, 16.000000000000000f)), epsilon ); + } + + it("should have simd4x4f_transpose for transpose") { + + simd4x4f in = simd4x4f_create(simd4f_create(1, 2, 3, 4 ), + simd4f_create(5, 6, 7, 8 ), + simd4f_create(9, 10, 11, 12 ), + simd4f_create(13, 14, 15, 16 )); + + simd4x4f x; + simd4x4f_transpose(&in, &x); + + // octave simd4x4f: transpose([1,5,9,13 ; 2,6,10,14 ; 3,7,11,15 ; 4,8,12,16 ]) + should_be_equal_simd4x4f(x, simd4x4f_create(simd4f_create(1.000000000000000f, 5.000000000000000f, 9.000000000000000f, 13.000000000000000f), simd4f_create(2.000000000000000f, 6.000000000000000f, 10.000000000000000f, 14.000000000000000f), simd4f_create(3.000000000000000f, 7.000000000000000f, 11.000000000000000f, 15.000000000000000f), simd4f_create(4.000000000000000f, 8.000000000000000f, 12.000000000000000f, 16.000000000000000f)), epsilon ); + } + + it("should have simd4x4f_matrix_vector_mul for matrix-vector multiply") { + + simd4x4f a = simd4x4f_create(simd4f_create( 1, 9, 17, 25 ), + simd4f_create( 3, 11, 19, 27 ), + simd4f_create( 5, 13, 21, 29 ), + simd4f_create( 7, 15, 23, 31 )); + + simd4f b = simd4f_create( 26, -28, 30, -32 ); + + simd4f x; + simd4x4f_matrix_vector_mul(&a, &b, &x); + + // octave simd4f: [1,3,5,7;9,11,13,15;17,19,21,23;25,27,29,31] * [26;-28;30;-32] + should_be_equal_simd4f(x, simd4f_create(-132.000000000000000f, -164.000000000000000f, -196.000000000000000f, -228.000000000000000f), epsilon ); + } + + it("should have simd4x4f_matrix_vector3_mul for matrix-vector3 multiply") { + + simd4x4f a = simd4x4f_create(simd4f_create( 1, 9, 17, 25 ), + simd4f_create( 3, 11, 19, 27 ), + simd4f_create( 5, 13, 21, 29 ), + simd4f_create( 7, 15, 23, 31 )); + + simd4f b = simd4f_create( 26, -28, 30, -32 ); + + simd4f x; + simd4x4f_matrix_vector3_mul(&a, &b, &x); + + // TODO octave simd4f: + + } + + it("should have simd4x4f_matrix_vector3_mul for matrix-vector3 multiply") { + + simd4x4f a = simd4x4f_create(simd4f_create( 1, 9, 17, 25 ), + simd4f_create( 3, 11, 19, 27 ), + simd4f_create( 5, 13, 21, 29 ), + simd4f_create( 7, 15, 23, 31 )); + + simd4f b = simd4f_create( 26, -28, 30, -32 ); + + simd4f x; + simd4x4f_matrix_vector3_mul(&a, &b, &x); + + // TODO octave simd4f: + + } + + it("should have simd4x4f_matrix_point3_mul") { /* TODO */ } + + it("should have simd4x4f_inv_ortho_matrix_point3_mul for transforming point with inverse of a orhtonormal matrix") { + + simd4x4f a = simd4x4f_create(simd4f_create( 0, -1, 0, 0 ), + simd4f_create( 1, 0, 0, 0 ), + simd4f_create( 0, 0, 1, 0 ), + simd4f_create( 1, 2, 3, 1 )); + + simd4f b = simd4f_create(5,6,7,0); + + simd4f x; + simd4x4f_inv_ortho_matrix_point3_mul(&a, &b, &x); + + // octave simd4f: inverse([0,1,0,1; -1,0,0,2; 0,0,1,3; 0,0,0,1]) * [5;6;7;1] .* [1;1;1;0] + should_be_equal_simd4f(x, simd4f_create(-4.000000000000000f, 4.000000000000000f, 4.000000000000000f, 0.000000000000000f), epsilon ); + } + + + it("should have simd4x4f_matrix_mul for matrix multiply") { + + simd4x4f a = simd4x4f_create(simd4f_create( 1, 9, 17, 25 ), + simd4f_create( 3, 11, 19, 27 ), + simd4f_create( 5, 13, 21, 29 ), + simd4f_create( 7, 15, 23, 31 )); + + simd4x4f b = simd4x4f_create(simd4f_create( 2 , -10, 18 , -26 ), + simd4f_create( -4, 12, -20, 28 ), + simd4f_create( 6, -14, 22, -30 ), + simd4f_create( -8, 16, -24, 32 )); + + simd4x4f x; + simd4x4f_matrix_mul(&a, &b, &x); + + // octave simd4x4f: [1,3,5,7;9,11,13,15;17,19,21,23;25,27,29,31] * [2,-4,6,-8;-10,12,-14,16;18,-20,22,-24;-26,28,-30,32] + should_be_equal_simd4x4f(x, simd4x4f_create(simd4f_create(-120.000000000000000f, -248.000000000000000f, -376.000000000000000f, -504.000000000000000f), simd4f_create(128.000000000000000f, 256.000000000000000f, 384.000000000000000f, 512.000000000000000f), simd4f_create(-136.000000000000000f, -264.000000000000000f, -392.000000000000000f, -520.000000000000000f), simd4f_create(144.000000000000000f, 272.000000000000000f, 400.000000000000000f, 528.000000000000000f)), epsilon ); + } + + + + + it("should have simd4x4f_inverse for calculating inverse matrix") { + + simd4x4f a = simd4x4f_create(simd4f_create(7, 2, 87, 5 ), + simd4f_create(5, 24, 6, 3 ), + simd4f_create(4, 6, 5, 6 ), + simd4f_create(5, 7, 4, 6 )); + + simd4x4f x; + simd4x4f_inverse(&a, &x); + + // octave simd4x4f: inverse( [7,5,4,5 ; 2,24,6,7 ; 87,6,5,4 ; 5,3,6,6] ) + should_be_equal_simd4x4f(x, simd4x4f_create(simd4f_create(0.015309310560300f, -0.049885440533222f, -1.081337221412206f, 1.093522182878568f), simd4f_create(-0.004061653822120f, 0.054051239325141f, 0.123620079150177f, -0.147260987294314f), simd4f_create(0.011247656738180f, 0.004165798791918f, 0.042282857737971f, -0.053738804415747f), simd4f_create(-0.015517600499896f, -0.024265777962924f, 0.728702353676318f, -0.536971464278276f)), epsilon ); + + simd4x4f x2; + simd4x4f_matrix_mul(&x, &a, &x2); + simd4x4f identity; + simd4x4f_identity(&identity); + // Allow larger error for M * M' = I + const int epsilon = 0x35100000; + should_be_equal_simd4x4f(x2, identity, epsilon); + + } + + + +} + + +describe(simd4x4f, "math on elements") { + + it("should have simd4x4f_add for element-wise addition") { + + simd4x4f a = simd4x4f_create(simd4f_create(1, 2, 3, 4 ), + simd4f_create(5, 6, 7, 8 ), + simd4f_create(9, 10, 11, 12 ), + simd4f_create(13, 14, 15, 16 )); + + simd4x4f b = simd4x4f_create(simd4f_create( 2 , -10, 18 , -26 ), + simd4f_create( -4, 12, -20, 28 ), + simd4f_create( 6, -14, 22, -30 ), + simd4f_create( -8, 16, -24, 32 )); + + simd4x4f x; + + simd4x4f_add(&a, &b, &x); + + + // octave simd4x4f: [1,5,9,13 ; 2,6,10,14 ; 3,7,11,15 ; 4,8,12,16 ] + [2,-4,6,-8;-10,12,-14,16;18,-20,22,-24;-26,28,-30,32] + should_be_equal_simd4x4f(x, simd4x4f_create(simd4f_create(3.000000000000000f, -8.000000000000000f, 21.000000000000000f, -22.000000000000000f), simd4f_create(1.000000000000000f, 18.000000000000000f, -13.000000000000000f, 36.000000000000000f), simd4f_create(15.000000000000000f, -4.000000000000000f, 33.000000000000000f, -18.000000000000000f), simd4f_create(5.000000000000000f, 30.000000000000000f, -9.000000000000000f, 48.000000000000000f)), epsilon ); + + } + + it("should have simd4x4f_sub for element-wise substraction") { + + simd4x4f a = simd4x4f_create(simd4f_create(1, 2, 3, 4 ), + simd4f_create(5, 6, 7, 8 ), + simd4f_create(9, 10, 11, 12 ), + simd4f_create(13, 14, 15, 16 )); + + simd4x4f b = simd4x4f_create(simd4f_create( 2 , -10, 18 , -26 ), + simd4f_create( -4, 12, -20, 28 ), + simd4f_create( 6, -14, 22, -30 ), + simd4f_create( -8, 16, -24, 32 )); + + simd4x4f x; + + simd4x4f_sub(&a, &b, &x); + + + // octave simd4x4f: [1,5,9,13 ; 2,6,10,14 ; 3,7,11,15 ; 4,8,12,16 ] - [2,-4,6,-8;-10,12,-14,16;18,-20,22,-24;-26,28,-30,32] + should_be_equal_simd4x4f(x, simd4x4f_create(simd4f_create(-1.000000000000000f, 12.000000000000000f, -15.000000000000000f, 30.000000000000000f), simd4f_create(9.000000000000000f, -6.000000000000000f, 27.000000000000000f, -20.000000000000000f), simd4f_create(3.000000000000000f, 24.000000000000000f, -11.000000000000000f, 42.000000000000000f), simd4f_create(21.000000000000000f, -2.000000000000000f, 39.000000000000000f, -16.000000000000000f)), epsilon ); + + } + + it("should have simd4x4f_mul for element-wise multiplication") { + + simd4x4f a = simd4x4f_create(simd4f_create(1, 2, 3, 4 ), + simd4f_create(5, 6, 7, 8 ), + simd4f_create(9, 10, 11, 12 ), + simd4f_create(13, 14, 15, 16 )); + + simd4x4f b = simd4x4f_create(simd4f_create( 2 , -10, 18 , -26 ), + simd4f_create( -4, 12, -20, 28 ), + simd4f_create( 6, -14, 22, -30 ), + simd4f_create( -8, 16, -24, 32 )); + + simd4x4f x; + + simd4x4f_mul(&a, &b, &x); + + + // octave simd4x4f: [1,5,9,13 ; 2,6,10,14 ; 3,7,11,15 ; 4,8,12,16 ] .* [2,-4,6,-8;-10,12,-14,16;18,-20,22,-24;-26,28,-30,32] + should_be_equal_simd4x4f(x, simd4x4f_create(simd4f_create(2.000000000000000f, -20.000000000000000f, 54.000000000000000f, -104.000000000000000f), simd4f_create(-20.000000000000000f, 72.000000000000000f, -140.000000000000000f, 224.000000000000000f), simd4f_create(54.000000000000000f, -140.000000000000000f, 242.000000000000000f, -360.000000000000000f), simd4f_create(-104.000000000000000f, 224.000000000000000f, -360.000000000000000f, 512.000000000000000f)), epsilon ); + + } + + it("should have simd4x4f_div for element-wise division") { + + simd4x4f a = simd4x4f_create(simd4f_create(1, 2, 3, 4 ), + simd4f_create(5, 6, 7, 8 ), + simd4f_create(9, 10, 11, 12 ), + simd4f_create(13, 14, 15, 16 )); + + simd4x4f b = simd4x4f_create(simd4f_create( 2 , -10, 18 , -26 ), + simd4f_create( -4, 12, -20, 28 ), + simd4f_create( 6, -14, 22, -30 ), + simd4f_create( -8, 16, -24, 32 )); + + simd4x4f x; + + simd4x4f_div(&a, &b, &x); + + + // octave simd4x4f: [1,5,9,13 ; 2,6,10,14 ; 3,7,11,15 ; 4,8,12,16 ] ./ [2,-4,6,-8;-10,12,-14,16;18,-20,22,-24;-26,28,-30,32] + should_be_equal_simd4x4f(x, simd4x4f_create(simd4f_create(0.500000000000000f, -0.200000000000000f, 0.166666666666667f, -0.153846153846154f), simd4f_create(-1.250000000000000f, 0.500000000000000f, -0.350000000000000f, 0.285714285714286f), simd4f_create(1.500000000000000f, -0.714285714285714f, 0.500000000000000f, -0.400000000000000f), simd4f_create(-1.625000000000000f, 0.875000000000000f, -0.625000000000000f, 0.500000000000000f)), epsilon ); + + } + + +} + + +describe(simd4x4f, "creating projection and view matrices") { + + it("should have simd4x4f_perspective for creating perspective projection matrix") { + + const float fov = 10.0f * M_PI / 180.0f; + const float aspect = 1.6f; + const float znear = 2.0f; + const float zfar = 50.0f; + + const int epsilon = 50; + + simd4x4f x; + simd4x4f_perspective(&x, fov, aspect, znear, zfar); + + should_be_equal_simd4x4f(x, simd4x4f_create(simd4f_create(7.14378, 0, 0, 0), + simd4f_create(0, 11.4301, 0, 0), + simd4f_create(0, 0, -1.08333, -1), + simd4f_create(-0, -0, -4.16667, -0)), epsilon); + + + } + + it("should have simd4x4f_ortho for creating orthogonal projection matrix") { + + + simd4x4f x; + simd4x4f_ortho(&x, -10, 20, -30, 40, -50, 60); + const int epsilon = 20; + should_be_equal_simd4x4f(x, simd4x4f_create(simd4f_create(0.0666667, 0, 0, 0), + simd4f_create(0, 0.0285714, 0, 0), + simd4f_create(-0, -0, -0.0181818, -0), + simd4f_create(-0.333333, -0.142857, -0.0909091, 1)), epsilon); + + + } + + it("should have simd4x4f_lookat for creating look-at matrix") { + + simd4f eye = simd4f_create(1,2,3,0); + simd4f center = simd4f_create(3,4,5,0); + simd4f up = simd4f_create(0,1,0,0); + + simd4x4f x; + simd4x4f_lookat(&x, eye, center, up); + + const int epsilon = 40; + + should_be_equal_simd4x4f(x, simd4x4f_create(simd4f_create(-0.707107, -0.408248, -0.57735, 0), + simd4f_create(0, 0.816497, -0.57735, 0), + simd4f_create(0.707107, -0.408248, -0.57735, 0), + simd4f_create(-1.41421, 0, 3.4641, 1)), epsilon); + + + } + + + it("should have simd4x4f_translation for creating translation matrix") { + + simd4x4f x; + simd4x4f_translation(&x, 1,2,3); + + // octave simd4x4f: [1,0,0,1; 0,1,0,2; 0,0,1,3; 0,0,0,1] + should_be_equal_simd4x4f(x, simd4x4f_create(simd4f_create(1.000000000000000f, 0.000000000000000f, 0.000000000000000f, 0.000000000000000f), simd4f_create(0.000000000000000f, 1.000000000000000f, 0.000000000000000f, 0.000000000000000f), simd4f_create(0.000000000000000f, 0.000000000000000f, 1.000000000000000f, 0.000000000000000f), simd4f_create(1.000000000000000f, 2.000000000000000f, 3.000000000000000f, 1.000000000000000f)), epsilon ); + } + + it("should have simd4x4f_axis_rotation for creating a rotation matrix along a axis") { + + simd4x4f x; + + simd4x4f_axis_rotation(&x, 45 * M_PI / 180.0f, simd4f_create(1,2,3,0)); + + const int epsilon = 20; + + should_be_equal_simd4x4f(x, simd4x4f_create(simd4f_create(0.728028, 0.608789, -0.315202, 0), + simd4f_create(-0.525105, 0.790791, 0.314508, 0), + simd4f_create(0.440727, -0.0634566, 0.895395, 0), + simd4f_create(0, 0, 0, 1)), epsilon); + + + } + + +} + + diff --git a/3rdparty/vectorial/spec/spec_vec2f.cpp b/3rdparty/vectorial/spec/spec_vec2f.cpp new file mode 100644 index 0000000..385079f --- /dev/null +++ b/3rdparty/vectorial/spec/spec_vec2f.cpp @@ -0,0 +1,255 @@ +#include "spec_helper.h" +#include +using vectorial::vec2f; + +const int epsilon = 1; + +describe(vec2f, "constructing") { + it("should have default constructor that does nothing..") { + vec2f x; + } + + it("should have constructor with element values") { + vec2f x(10,20); + // octave vec2f: [10,20] + should_be_equal_vec2f(x, simd4f_create(10.000000000000000f, 20.000000000000000f, 0.0f, 0.0f), epsilon ); + + } + + it("should have constructor that loads from a float array") { + float ary[2] = { 1,2 }; + vec2f x(ary); + // octave vec2f: [1,2] + should_be_equal_vec2f(x, simd4f_create(1.000000000000000f, 2.000000000000000f, 0.0f, 0.0f), epsilon ); + } + +} + +describe(vec2f, "loads and stores") { + + it("should have method for loading from a float array") { + float ary[2] = { 1, 2 }; + vec2f x(-1, -1 ); + x.load(ary); + // octave vec2f: [1,2] + should_be_equal_vec2f(x, simd4f_create(1.000000000000000f, 2.000000000000000f, 0.0f, 0.0f), epsilon ); + } + + it("should have method for storing to a float array") { + float ary[2] = { -1, -1 }; + vec2f x(1, 2); + x.store(ary); + should_be_close_to(ary[0], 1, epsilon); + should_be_close_to(ary[1], 2, epsilon); + } + +} + + +describe(vec2f, "arithmetic with another vec2f") { + + it("should have operator+ for component-wise addition") { + vec2f a(1,2); + vec2f b(10,20); + vec2f x = a + b; + // octave vec2f: [1,2] + [10,20] + should_be_equal_vec2f(x, simd4f_create(11.000000000000000f, 22.000000000000000f, 0.0f, 0.0f), epsilon ); + + } + + it("should have operator- for component-wise subtraction") { + vec2f a(1,2); + vec2f b(10,20); + vec2f x = b - a; + // octave vec2f: [10,20] - [1,2] + should_be_equal_vec2f(x, simd4f_create(9.000000000000000f, 18.000000000000000f, 0.0f, 0.0f), epsilon ); + + } + + it("should have operator* for component-wise multiplication") { + vec2f a(1,2); + vec2f b(10,20); + vec2f x = a * b; + // octave vec2f: [1,2] .* [10,20] + should_be_equal_vec2f(x, simd4f_create(10.000000000000000f, 40.000000000000000f, 0.0f, 0.0f), epsilon ); + + } + + it("should have operator/ for component-wise division") { + vec2f a(1,2); + vec2f b(10,20); + vec2f x = b / a; + // octave vec2f: [10,20] ./ [1,2] + should_be_equal_vec2f(x, simd4f_create(10.000000000000000f, 10.000000000000000f, 0.0f, 0.0f), epsilon ); + + } + + + + it("should have operator+= for component-wise addition") { + vec2f x(1,2); + vec2f b(10,20); + x += b; + // octave vec2f: [1,2] + [10,20] + should_be_equal_vec2f(x, simd4f_create(11.000000000000000f, 22.000000000000000f, 0.0f, 0.0f), epsilon ); + + } + + it("should have operator-= for component-wise subtraction") { + vec2f a(1,2); + vec2f x(10,20); + x -= a; + // octave vec2f: [10,20] - [1,2] + should_be_equal_vec2f(x, simd4f_create(9.000000000000000f, 18.000000000000000f, 0.0f, 0.0f), epsilon ); + + } + + it("should have operator*= for component-wise multiplication") { + vec2f x(1,2); + vec2f b(10,20); + x *= b; + // octave vec2f: [1,2] .* [10,20] + should_be_equal_vec2f(x, simd4f_create(10.000000000000000f, 40.000000000000000f, 0.0f, 0.0f), epsilon ); + + } + + it("should have operator/= for component-wise division") { + vec2f a(1,2); + vec2f x(10,20); + x /= a; + // octave vec2f: [10,20] ./ [1,2] + should_be_equal_vec2f(x, simd4f_create(10.000000000000000f, 10.000000000000000f, 0.0f, 0.0f), epsilon ); + + } + + +} + + +describe(vec2f, "arithmetic with scalar") { + + it("should have operator+ for component-wise addition") { + vec2f a(1,2); + float b=10; + vec2f x = a + b; + // octave vec2f: [1,2] + 10 + should_be_equal_vec2f(x, simd4f_create(11.000000000000000f, 12.000000000000000f, 0.0f, 0.0f), epsilon ); + + } + + it("should have operator- for component-wise subtraction") { + float a=10; + vec2f b(10,20); + vec2f x = b - a; + // octave vec2f: [10,20] - 10 + should_be_equal_vec2f(x, simd4f_create(0.000000000000000f, 10.000000000000000f, 0.0f, 0.0f), epsilon ); + + } + + it("should have operator* for component-wise multiplication") { + vec2f a(1,2); + float b=10; + vec2f x = a * b; + // octave vec2f: [1,2] .* 10 + should_be_equal_vec2f(x, simd4f_create(10.000000000000000f, 20.000000000000000f, 0.0f, 0.0f), epsilon ); + + } + + it("should have operator/ for component-wise division") { + vec2f a(10,20); + float b=10; + vec2f x = a / b; + // octave vec2f: [10,20] ./ 10 + should_be_equal_vec2f(x, simd4f_create(1.000000000000000f, 2.000000000000000f, 0.0f, 0.0f), epsilon ); + + } + + + + it("should have operator+ for component-wise addition (float as lhs)") { + vec2f b(1,2); + float a=10; + vec2f x = a + b; + // octave vec2f: 10 + [1,2] + should_be_equal_vec2f(x, simd4f_create(11.000000000000000f, 12.000000000000000f, 0.0f, 0.0f), epsilon ); + + } + + it("should have operator- for component-wise subtraction (float as lhs)") { + float b=50; + vec2f a(10,20); + vec2f x = b - a; + // octave vec2f: 50 - [10,20] + should_be_equal_vec2f(x, simd4f_create(40.000000000000000f, 30.000000000000000f, 0.0f, 0.0f), epsilon ); + + } + + it("should have operator* for component-wise multiplication (float as lhs)") { + vec2f b(1,2); + float a=10; + vec2f x = a * b; + // octave vec2f: 10 .* [1,2] + should_be_equal_vec2f(x, simd4f_create(10.000000000000000f, 20.000000000000000f, 0.0f, 0.0f), epsilon ); + + } + + it("should have operator* for component-wise multiplication (float as lhs)") { + vec2f b(10,20); + float a=40; + vec2f x = a / b; + // octave vec2f: 40 ./ [10,20] + should_be_equal_vec2f(x, simd4f_create(4.000000000000000f, 2.000000000000000f, 0.0f, 0.0f), epsilon ); + + } + + +} + + + +describe(vec2f, "vector math") { + + it("should have unary minus operator") { + vec2f a(1,2); + vec2f x = -a; + // octave vec2f: -[1,2] + should_be_equal_vec2f(x, simd4f_create(-1.000000000000000f, -2.000000000000000f, 0.0f, 0.0f), epsilon ); + } + + + it("should have dot function") { + vec2f a(1,2); + vec2f b(6,7); + float x = vectorial::dot(a,b); + + // octave vec2f: dot([1,2],[6,7]) + should_be_close_to(x, 20.000000000000000f, epsilon ); + } + + it("should have length_squared function") { + vec2f a(1,2); + float x = vectorial::length_squared(a); + + // octave vec2f: dot([1,2],[1,2]) + should_be_close_to(x, 5.000000000000000f, epsilon ); + } + + it("should have length function") { + vec2f a(1,2); + float x = vectorial::length(a); + + // octave vec2f: norm([1,2]) + should_be_close_to(x, 2.236067977499790f, epsilon ); + } + + + it("should have normalize function") { + vec2f a(1,2); + vec2f x = vectorial::normalize(a); + // octave vec2f: [1,2] / norm([1,2]) + should_be_equal_vec2f(x, simd4f_create(0.447213595499958f, 0.894427190999916f, 0.0f, 0.0f), epsilon ); + } + +} + + diff --git a/3rdparty/vectorial/spec/spec_vec3f.cpp b/3rdparty/vectorial/spec/spec_vec3f.cpp new file mode 100644 index 0000000..f435660 --- /dev/null +++ b/3rdparty/vectorial/spec/spec_vec3f.cpp @@ -0,0 +1,263 @@ +#include "spec_helper.h" +#include +using vectorial::vec3f; + +const int epsilon = 1; + +describe(vec3f, "constructing") { + it("should have default constructor that does nothing..") { + vec3f x; + } + + it("should have constructor with element values") { + vec3f x(10,20,30); + // octave vec3f: [10,20,30] + should_be_equal_vec3f(x, simd4f_create(10.000000000000000f, 20.000000000000000f, 30.000000000000000f, 0.0f), epsilon ); + + } + + it("should have constructor that loads from a float array") { + float ary[3] = { 1,2,3 }; + vec3f x(ary); + // octave vec3f: [1,2,3] + should_be_equal_vec3f(x, simd4f_create(1.000000000000000f, 2.000000000000000f, 3.000000000000000f, 0.0f), epsilon ); + } + +} + +describe(vec3f, "loads and stores") { + + it("should have method for loading from a float array") { + float ary[3] = { 1,2,3 }; + vec3f x(-1, -1, -1 ); + x.load(ary); + // octave vec3f: [1,2,3] + should_be_equal_vec3f(x, simd4f_create(1.000000000000000f, 2.000000000000000f, 3.000000000000000f, 0.0f), epsilon ); + } + + it("should have method for storing to a float array") { + float ary[3] = { -1, -1, -1 }; + vec3f x(1, 2, 3); + x.store(ary); + should_be_close_to(ary[0], 1, epsilon); + should_be_close_to(ary[1], 2, epsilon); + should_be_close_to(ary[2], 3, epsilon); + } + +} + +describe(vec3f, "arithmetic with another vec3f") { + + it("should have operator+ for component-wise addition") { + vec3f a(1,2,3); + vec3f b(10,20,30); + vec3f x = a + b; + // octave vec3f: [1,2,3] + [10,20,30] + should_be_equal_vec3f(x, simd4f_create(11.000000000000000f, 22.000000000000000f, 33.000000000000000f, 0.0f), epsilon ); + + } + + it("should have operator- for component-wise subtraction") { + vec3f a(1,2,3); + vec3f b(10,20,30); + vec3f x = b - a; + // octave vec3f: [10,20,30] - [1,2,3] + should_be_equal_vec3f(x, simd4f_create(9.000000000000000f, 18.000000000000000f, 27.000000000000000f, 0.0f), epsilon ); + + } + + it("should have operator* for component-wise multiplication") { + vec3f a(1,2,3); + vec3f b(10,20,30); + vec3f x = a * b; + // octave vec3f: [1,2,3] .* [10,20,30] + should_be_equal_vec3f(x, simd4f_create(10.000000000000000f, 40.000000000000000f, 90.000000000000000f, 0.0f), epsilon ); + + } + + it("should have operator/ for component-wise division") { + vec3f a(1,2,3); + vec3f b(10,20,30); + vec3f x = b / a; + // octave vec3f: [10,20,30] ./ [1,2,3] + should_be_equal_vec3f(x, simd4f_create(10.000000000000000f, 10.000000000000000f, 10.000000000000000f, 0.0f), epsilon ); + + } + + + + it("should have operator+= for component-wise addition") { + vec3f x(1,2,3); + vec3f b(10,20,30); + x += b; + // octave vec3f: [1,2,3] + [10,20,30] + should_be_equal_vec3f(x, simd4f_create(11.000000000000000f, 22.000000000000000f, 33.000000000000000f, 0.0f), epsilon ); + + } + + it("should have operator-= for component-wise subtraction") { + vec3f a(1,2,3); + vec3f x(10,20,30); + x -= a; + // octave vec3f: [10,20,30] - [1,2,3] + should_be_equal_vec3f(x, simd4f_create(9.000000000000000f, 18.000000000000000f, 27.000000000000000f, 0.0f), epsilon ); + + } + + it("should have operator*= for component-wise multiplication") { + vec3f x(1,2,3); + vec3f b(10,20,30); + x *= b; + // octave vec3f: [1,2,3] .* [10,20,30] + should_be_equal_vec3f(x, simd4f_create(10.000000000000000f, 40.000000000000000f, 90.000000000000000f, 0.0f), epsilon ); + + } + + it("should have operator/= for component-wise division") { + vec3f a(1,2,3); + vec3f x(10,20,30); + x /= a; + // octave vec3f: [10,20,30] ./ [1,2,3] + should_be_equal_vec3f(x, simd4f_create(10.000000000000000f, 10.000000000000000f, 10.000000000000000f, 0.0f), epsilon ); + + } + +} + + +describe(vec3f, "arithmetic with scalar") { + + it("should have operator+ for component-wise addition") { + vec3f a(1,2,3); + float b=10; + vec3f x = a + b; + // octave vec3f: [1,2,3] + 10 + should_be_equal_vec3f(x, simd4f_create(11.000000000000000f, 12.000000000000000f, 13.000000000000000f, 0.0f), epsilon ); + + } + + it("should have operator- for component-wise subtraction") { + float a=10; + vec3f b(10,20,30); + vec3f x = b - a; + // octave vec3f: [10,20,30] - 10 + should_be_equal_vec3f(x, simd4f_create(0.000000000000000f, 10.000000000000000f, 20.000000000000000f, 0.0f), epsilon ); + + } + + it("should have operator* for component-wise multiplication") { + vec3f a(1,2,3); + float b=10; + vec3f x = a * b; + // octave vec3f: [1,2,3] .* 10 + should_be_equal_vec3f(x, simd4f_create(10.000000000000000f, 20.000000000000000f, 30.000000000000000f, 0.0f), epsilon ); + + } + + it("should have operator/ for component-wise division") { + vec3f a(10,20,30); + float b=10; + vec3f x = a / b; + // octave vec3f: [10,20,30] ./ 10 + should_be_equal_vec3f(x, simd4f_create(1.000000000000000f, 2.000000000000000f, 3.000000000000000f, 0.0f), epsilon ); + + } + + + + it("should have operator+ for component-wise addition (float as lhs)") { + vec3f b(1,2,3); + float a=10; + vec3f x = a + b; + // octave vec3f: 10 + [1,2,3] + should_be_equal_vec3f(x, simd4f_create(11.000000000000000f, 12.000000000000000f, 13.000000000000000f, 0.0f), epsilon ); + + } + + it("should have operator- for component-wise subtraction (float as lhs)") { + float b=50; + vec3f a(10,20,30); + vec3f x = b - a; + // octave vec3f: 50 - [10,20,30] + should_be_equal_vec3f(x, simd4f_create(40.000000000000000f, 30.000000000000000f, 20.000000000000000f, 0.0f), epsilon ); + + } + + it("should have operator* for component-wise multiplication (float as lhs)") { + vec3f b(1,2,3); + float a=10; + vec3f x = a * b; + // octave vec3f: 10 .* [1,2,3] + should_be_equal_vec3f(x, simd4f_create(10.000000000000000f, 20.000000000000000f, 30.000000000000000f, 0.0f), epsilon ); + + } + + it("should have operator* for component-wise multiplication (float as lhs)") { + vec3f b(10,20,30); + float a=40; + vec3f x = a / b; + // octave vec3f: 40 ./ [10,20,30] + should_be_equal_vec3f(x, simd4f_create(4.000000000000000f, 2.000000000000000f, 1.333333333333333f, 0.0f), epsilon ); + + } + + +} + + + +describe(vec3f, "vector math") { + + it("should have unary minus operator") { + vec3f a(1,2,3); + vec3f x = -a; + // octave vec3f: -[1,2,3] + should_be_equal_vec3f(x, simd4f_create(-1.000000000000000f, -2.000000000000000f, -3.000000000000000f, 0.0f), epsilon ); + } + + + it("should have dot function") { + vec3f a(1,2,3); + vec3f b(6,7,8); + float x = vectorial::dot(a,b); + + // octave vec3f: dot([1,2,3],[6,7,8]) + should_be_close_to(x, 44.000000000000000f, epsilon ); + } + + it("should have cross function") { + vec3f a(1,2,3); + vec3f b(6,7,8); + vec3f x = vectorial::cross(a,b); + + // octave vec3f: cross([1,2,3],[6,7,8]) + should_be_equal_vec3f(x, simd4f_create(-5.000000000000000f, 10.000000000000000f, -5.000000000000000f, 0.0f), epsilon ); + } + + it("should have length_squared function") { + vec3f a(1,2,3); + float x = vectorial::length_squared(a); + + // octave vec3f: dot([1,2,3],[1,2,3]) + should_be_close_to(x, 14.000000000000000f, epsilon ); + } + + it("should have length function") { + vec3f a(1,2,3); + float x = vectorial::length(a); + + // octave vec3f: norm([1,2,3]) + should_be_close_to(x, 3.741657386773941f, epsilon ); + } + + + it("should have normalize function") { + vec3f a(1,2,3); + vec3f x = vectorial::normalize(a); + // octave vec3f: [1,2,3] / norm([1,2,3]) + should_be_equal_vec3f(x, simd4f_create(0.267261241912424f, 0.534522483824849f, 0.801783725737273f, 0.0f), epsilon ); + } + +} + + diff --git a/3rdparty/vectorial/spec/spec_vec4f.cpp b/3rdparty/vectorial/spec/spec_vec4f.cpp new file mode 100644 index 0000000..020be8d --- /dev/null +++ b/3rdparty/vectorial/spec/spec_vec4f.cpp @@ -0,0 +1,258 @@ +#include "spec_helper.h" +#include +using vectorial::vec4f; + +const int epsilon = 1; + +describe(vec4f, "constructing") { + it("should have default constructor that does nothing..") { + vec4f x; + } + + it("should have constructor with element values") { + vec4f x(10,20,30,40); + // octave vec4f: [10,20,30,40] + should_be_equal_vec4f(x, simd4f_create(10.000000000000000f, 20.000000000000000f, 30.000000000000000f, 40.000000000000000f), epsilon ); + + } + + it("should have constructor that loads from a float array") { + float ary[4] = { 1,2,3,4 }; + vec4f x(ary); + // octave vec4f: [1,2,3,4] + should_be_equal_vec4f(x, simd4f_create(1.000000000000000f, 2.000000000000000f, 3.000000000000000f, 4.000000000000000f), epsilon ); + } + +} + +describe(vec4f, "loads and stores") { + + + it("should have method for loading from a float array") { + float ary[4] = { 1,2,3,4 }; + vec4f x(-1, -1, -1, -1); + x.load(ary); + // octave vec4f: [1,2,3,4] + should_be_equal_vec4f(x, simd4f_create(1.000000000000000f, 2.000000000000000f, 3.000000000000000f, 4.000000000000000f), epsilon ); + } + + it("should have method for storing to a float array") { + float ary[4] = { -1, -1, -1, -1 }; + vec4f x(1, 2, 3, 4); + x.store(ary); + should_be_close_to(ary[0], 1, epsilon); + should_be_close_to(ary[1], 2, epsilon); + should_be_close_to(ary[2], 3, epsilon); + should_be_close_to(ary[3], 4, epsilon); + } + +} + +describe(vec4f, "arithmetic with another vec4f") { + + it("should have operator+ for component-wise addition") { + vec4f a(1,2,3,4); + vec4f b(10,20,30,40); + vec4f x = a + b; + // octave vec4f: [1,2,3,4] + [10,20,30,40] + should_be_equal_vec4f(x, simd4f_create(11.000000000000000f, 22.000000000000000f, 33.000000000000000f, 44.000000000000000f), epsilon ); + + } + + it("should have operator- for component-wise subtraction") { + vec4f a(1,2,3,4); + vec4f b(10,20,30,40); + vec4f x = b - a; + // octave vec4f: [10,20,30,40] - [1,2,3,4] + should_be_equal_vec4f(x, simd4f_create(9.000000000000000f, 18.000000000000000f, 27.000000000000000f, 36.000000000000000f), epsilon ); + + } + + it("should have operator* for component-wise multiplication") { + vec4f a(1,2,3,4); + vec4f b(10,20,30,40); + vec4f x = a * b; + // octave vec4f: [1,2,3,4] .* [10,20,30,40] + should_be_equal_vec4f(x, simd4f_create(10.000000000000000f, 40.000000000000000f, 90.000000000000000f, 160.000000000000000f), epsilon ); + + } + + it("should have operator/ for component-wise division") { + vec4f a(1,2,3,4); + vec4f b(10,20,30,40); + vec4f x = b / a; + // octave vec4f: [10,20,30,40] ./ [1,2,3,4] + should_be_equal_vec4f(x, simd4f_create(10.000000000000000f, 10.000000000000000f, 10.000000000000000f, 10.000000000000000f), epsilon ); + + } + + + + + it("should have operator+= for component-wise addition") { + vec4f x(1,2,3,4); + vec4f b(10,20,30,40); + x += b; + // octave vec4f: [1,2,3,4] + [10,20,30,40] + should_be_equal_vec4f(x, simd4f_create(11.000000000000000f, 22.000000000000000f, 33.000000000000000f, 44.000000000000000f), epsilon ); + + } + + it("should have operator-= for component-wise subtraction") { + vec4f a(1,2,3,4); + vec4f x(10,20,30,40); + x -= a; + // octave vec4f: [10,20,30,40] - [1,2,3,4] + should_be_equal_vec4f(x, simd4f_create(9.000000000000000f, 18.000000000000000f, 27.000000000000000f, 36.000000000000000f), epsilon ); + + } + + it("should have operator*= for component-wise multiplication") { + vec4f x(1,2,3,4); + vec4f b(10,20,30,40); + x *= b; + // octave vec4f: [1,2,3,4] .* [10,20,30,40] + should_be_equal_vec4f(x, simd4f_create(10.000000000000000f, 40.000000000000000f, 90.000000000000000f, 160.000000000000000f), epsilon ); + + } + + it("should have operator/= for component-wise division") { + vec4f a(1,2,3,4); + vec4f x(10,20,30,40); + x /= a; + // octave vec4f: [10,20,30,40] ./ [1,2,3,4] + should_be_equal_vec4f(x, simd4f_create(10.000000000000000f, 10.000000000000000f, 10.000000000000000f, 10.000000000000000f), epsilon ); + + } + + + +} + + +describe(vec4f, "arithmetic with scalar") { + + it("should have operator+ for component-wise addition") { + vec4f a(1,2,3,4); + float b=10; + vec4f x = a + b; + // octave vec4f: [1,2,3,4] + 10 + should_be_equal_vec4f(x, simd4f_create(11.000000000000000f, 12.000000000000000f, 13.000000000000000f, 14.000000000000000f), epsilon ); + + } + + it("should have operator- for component-wise subtraction") { + float a=10; + vec4f b(10,20,30,40); + vec4f x = b - a; + // octave vec4f: [10,20,30,40] - 10 + should_be_equal_vec4f(x, simd4f_create(0.000000000000000f, 10.000000000000000f, 20.000000000000000f, 30.000000000000000f), epsilon ); + + } + + it("should have operator* for component-wise multiplication") { + vec4f a(1,2,3,4); + float b=10; + vec4f x = a * b; + // octave vec4f: [1,2,3,4] .* 10 + should_be_equal_vec4f(x, simd4f_create(10.000000000000000f, 20.000000000000000f, 30.000000000000000f, 40.000000000000000f), epsilon ); + + } + + it("should have operator/ for component-wise division") { + vec4f a(10,20,30,40); + float b=10; + vec4f x = a / b; + // octave vec4f: [10,20,30,40] ./ 10 + should_be_equal_vec4f(x, simd4f_create(1.000000000000000f, 2.000000000000000f, 3.000000000000000f, 4.000000000000000f), epsilon ); + + } + + + + it("should have operator+ for component-wise addition (float as lhs)") { + vec4f b(1,2,3,4); + float a=10; + vec4f x = a + b; + // octave vec4f: 10 + [1,2,3,4] + should_be_equal_vec4f(x, simd4f_create(11.000000000000000f, 12.000000000000000f, 13.000000000000000f, 14.000000000000000f), epsilon ); + + } + + it("should have operator- for component-wise subtraction (float as lhs)") { + float b=50; + vec4f a(10,20,30,40); + vec4f x = b - a; + // octave vec4f: 50 - [10,20,30,40] + should_be_equal_vec4f(x, simd4f_create(40.000000000000000f, 30.000000000000000f, 20.000000000000000f, 10.000000000000000f), epsilon ); + + } + + it("should have operator* for component-wise multiplication (float as lhs)") { + vec4f b(1,2,3,4); + float a=10; + vec4f x = a * b; + // octave vec4f: 10 .* [1,2,3,4] + should_be_equal_vec4f(x, simd4f_create(10.000000000000000f, 20.000000000000000f, 30.000000000000000f, 40.000000000000000f), epsilon ); + + } + + it("should have operator* for component-wise multiplication (float as lhs)") { + vec4f b(10,20,30,40); + float a=40; + vec4f x = a / b; + // octave vec4f: 40 ./ [10,20,30,40] + should_be_equal_vec4f(x, simd4f_create(4.000000000000000f, 2.000000000000000f, 1.333333333333333f, 1.000000000000000f), epsilon ); + + } + + +} + + + +describe(vec4f, "vector math") { + + it("should have unary minus operator") { + vec4f a(1,2,3,4); + vec4f x = -a; + // octave vec4f: -[1,2,3,4] + should_be_equal_vec4f(x, simd4f_create(-1.000000000000000f, -2.000000000000000f, -3.000000000000000f, -4.000000000000000f), epsilon ); + } + + it("should have dot function") { + vec4f a(1,2,3,4); + vec4f b(6,7,8,9); + float x = vectorial::dot(a,b); + + // octave vec4f: dot([1,2,3,4],[6,7,8,9]) + should_be_close_to(x, 80.000000000000000f, epsilon ); + } + + it("should have length_squared function") { + vec4f a(1,2,3,4); + float x = vectorial::length_squared(a); + + // octave vec4f: dot([1,2,3,4],[1,2,3,4]) + should_be_close_to(x, 30.000000000000000f, epsilon ); + } + + it("should have length function") { + vec4f a(1,2,3,4); + float x = vectorial::length(a); + + // octave vec4f: norm([1,2,3,4]) + should_be_close_to(x, 5.477225575051661f, epsilon ); + } + + + it("should have normalize function") { + vec4f a(1,2,3,4); + vec4f x = vectorial::normalize(a); + // octave vec4f: [1,2,3,4] / norm([1,2,3,4]) + should_be_equal_vec4f(x, simd4f_create(0.182574185835055f, 0.365148371670111f, 0.547722557505166f, 0.730296743340221f), epsilon ); + } + +} + + diff --git a/3rdparty/vectorial/tools/spechelper.m b/3rdparty/vectorial/tools/spechelper.m new file mode 100644 index 0000000..ba49405 --- /dev/null +++ b/3rdparty/vectorial/tools/spechelper.m @@ -0,0 +1,45 @@ +#!/usr/bin/env octave + +1; + +function spec_formatter (val,type) + + if( isscalar(val) == 1 ) + printf(" should_be_close_to(x, %15.15ff, epsilon );", val); + return; + endif + + if( size(val) == [1,2] ) + if( strcmp(type,"simd2f") == 1 ) + printf(" should_be_equal_%s(x, simd2f_create(%15.15ff, %15.15ff), epsilon );",type, val(1), val(2)); + else + printf(" should_be_equal_%s(x, simd4f_create(%15.15ff, %15.15ff, 0.0f, 0.0f), epsilon );",type, val(1), val(2)); + endif + return; + endif + + if( size(val) == [1,3] ) + printf(" should_be_equal_%s(x, simd4f_create(%15.15ff, %15.15ff, %15.15ff, 0.0f), epsilon );",type, val(1), val(2), val(3)); + return; + endif + + if( size(val) == [1,4] ) + printf(" should_be_equal_%s(x, simd4f_create(%15.15ff, %15.15ff, %15.15ff, %15.15ff), epsilon );",type, val(1), val(2), val(3), val(4)); + return; + endif + + if( size(val) == [4,1] ) + printf(" should_be_equal_%s(x, simd4f_create(%15.15ff, %15.15ff, %15.15ff, %15.15ff), epsilon );",type, val(1), val(2), val(3), val(4)); + return; + endif + + if( size(val) == [4,4] ) + printf(" should_be_equal_%s(x, simd4x4f_create(simd4f_create(%15.15ff, %15.15ff, %15.15ff, %15.15ff), simd4f_create(%15.15ff, %15.15ff, %15.15ff, %15.15ff), simd4f_create(%15.15ff, %15.15ff, %15.15ff, %15.15ff), simd4f_create(%15.15ff, %15.15ff, %15.15ff, %15.15ff)), epsilon );",type, + val(1), val(2), val(3), val(4), val(5), val(6), val(7), val(8), val(9), val(10), val(11), val(12), val(13), val(14), val(15), val(16) + ); + return; + endif + + +endfunction + diff --git a/3rdparty/vectorial/tools/update_spec.rb b/3rdparty/vectorial/tools/update_spec.rb new file mode 100755 index 0000000..c1323de --- /dev/null +++ b/3rdparty/vectorial/tools/update_spec.rb @@ -0,0 +1,24 @@ +#!/usr/bin/env ruby + +SPECHELPER = File.join(File.dirname(__FILE__), "spechelper.m") +def octave_eval(str, type) + puts "evalling (#{type}): #{str}" + ret = `octave --quiet --eval 'source("#{SPECHELPER}"); spec_formatter(#{str}, "#{type}")'` + puts " = #{ret.strip}" + ret +end + + +ARGV.each do |fn| + str = File.read(fn) + str.gsub!(%r{(// octave (\w+):)(.*?)\n(.*?\n)}) do |match| + e = octave_eval($3, $2) + + [$1, $3, "\n", e, "\n"].join + end + File.open(fn, "w") do |f| + f.write str + end + +end + diff --git a/3rdparty/vectorial/vectorial.sln b/3rdparty/vectorial/vectorial.sln new file mode 100644 index 0000000..7701f8d --- /dev/null +++ b/3rdparty/vectorial/vectorial.sln @@ -0,0 +1,31 @@ + +Microsoft Visual Studio Solution File, Format Version 10.00 +# Visual C++ Express 2008 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "vectorial specsuite", "vectorial.vcproj", "{9450BCE8-02CB-4169-8471-2DFF764817F4}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "vectorial benchmark", "vectorialbenchmark.vcproj", "{1E78F64D-C404-4048-8AE6-217089480E8A}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Win32 = Debug|Win32 + Release Scalar|Win32 = Release Scalar|Win32 + Release SSE|Win32 = Release SSE|Win32 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {9450BCE8-02CB-4169-8471-2DFF764817F4}.Debug|Win32.ActiveCfg = Debug|Win32 + {9450BCE8-02CB-4169-8471-2DFF764817F4}.Debug|Win32.Build.0 = Debug|Win32 + {9450BCE8-02CB-4169-8471-2DFF764817F4}.Release Scalar|Win32.ActiveCfg = Release Scalar|Win32 + {9450BCE8-02CB-4169-8471-2DFF764817F4}.Release Scalar|Win32.Build.0 = Release Scalar|Win32 + {9450BCE8-02CB-4169-8471-2DFF764817F4}.Release SSE|Win32.ActiveCfg = Release|Win32 + {9450BCE8-02CB-4169-8471-2DFF764817F4}.Release SSE|Win32.Build.0 = Release|Win32 + {1E78F64D-C404-4048-8AE6-217089480E8A}.Debug|Win32.ActiveCfg = Debug|Win32 + {1E78F64D-C404-4048-8AE6-217089480E8A}.Debug|Win32.Build.0 = Debug|Win32 + {1E78F64D-C404-4048-8AE6-217089480E8A}.Release Scalar|Win32.ActiveCfg = Release Scalar|Win32 + {1E78F64D-C404-4048-8AE6-217089480E8A}.Release Scalar|Win32.Build.0 = Release Scalar|Win32 + {1E78F64D-C404-4048-8AE6-217089480E8A}.Release SSE|Win32.ActiveCfg = Release|Win32 + {1E78F64D-C404-4048-8AE6-217089480E8A}.Release SSE|Win32.Build.0 = Release|Win32 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/3rdparty/vectorial/vectorial.vcproj b/3rdparty/vectorial/vectorial.vcproj new file mode 100644 index 0000000..e800a17 --- /dev/null +++ b/3rdparty/vectorial/vectorial.vcproj @@ -0,0 +1,350 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/3rdparty/vectorial/vectorialbenchmark.vcproj b/3rdparty/vectorial/vectorialbenchmark.vcproj new file mode 100644 index 0000000..802ddd7 --- /dev/null +++ b/3rdparty/vectorial/vectorialbenchmark.vcproj @@ -0,0 +1,340 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/CMakeLists.txt b/CMakeLists.txt index 2c359f0..392922a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,10 +1,10 @@ -cmake_minimum_required (VERSION 3.13) +cmake_minimum_required(VERSION 3.13) cmake_policy(SET CMP0077 NEW) -project (AnimTestbed - VERSION 0.0.1 - LANGUAGES CXX C) +project(AnimTestbed + VERSION 0.0.1 + LANGUAGES CXX C) find_package(Threads) find_package(OpenGL REQUIRED) @@ -14,7 +14,7 @@ set(GLFW_BUILD_DOCS Off) set(GLFW_BUILD_EXAMPLES Off) set(GLFW_BUILD_TESTS Off) set(GLFW_BUILD_INSTALL Off) -add_subdirectory (3rdparty/glfw) +add_subdirectory(3rdparty/glfw) # ozz set(ozz_build_tools ON) @@ -23,30 +23,32 @@ set(ozz_build_gltf ON) set(ozz_build_data OFF CACHE BOOL "") set(ozz_build_samples OFF CACHE BOOL "") set(ozz_build_howtos OFF CACHE BOOL "") -set(ozz_build_tests OFF CACHE BOOL "" ) +set(ozz_build_tests OFF CACHE BOOL "") set(ozz_build_simd_ref OFF CACHE BOOL "") set(ozz_build_msvc_rt_dll OFF CACHE BOOL "") -add_subdirectory (3rdparty/ozz-animation) +add_subdirectory(3rdparty/ozz-animation) # Simulator Executable add_executable(AnimTestbed) target_include_directories( - AnimTestbed - PUBLIC $ - PUBLIC $ - PUBLIC $ - PUBLIC $ - PUBLIC $ + AnimTestbed + PUBLIC $ + PUBLIC $ + PUBLIC $ + PUBLIC $ + PUBLIC $ + PUBLIC $ ) target_link_libraries(AnimTestbed glfw ozz_base ozz_geometry ozz_animation ${OPENGL_LIBRARIES}) -target_sources(AnimTestbed PRIVATE - src/main.cc - 3rdparty/glfw/deps/glad_gl.c - 3rdparty/imgui/imgui.cpp - 3rdparty/imgui/imgui_draw.cpp - 3rdparty/imgui/imgui_widgets.cpp - 3rdparty/imgui/imgui_demo.cpp - 3rdparty/imgui/backends/imgui_impl_glfw.cpp - 3rdparty/imgui/backends/imgui_impl_opengl3.cpp - ) +target_sources(AnimTestbed PRIVATE + src/main.cc + src/Camera.c + 3rdparty/glfw/deps/glad_gl.c + 3rdparty/imgui/imgui.cpp + 3rdparty/imgui/imgui_draw.cpp + 3rdparty/imgui/imgui_widgets.cpp + 3rdparty/imgui/imgui_demo.cpp + 3rdparty/imgui/backends/imgui_impl_glfw.cpp + 3rdparty/imgui/backends/imgui_impl_opengl3.cpp + ) diff --git a/src/Camera.c b/src/Camera.c new file mode 100644 index 0000000..5890221 --- /dev/null +++ b/src/Camera.c @@ -0,0 +1,158 @@ +#include "Camera.h" + +#include + +#include "string.h" +#include "vectorial/simd4x4f.h" + +inline void Camera_Init(Camera* camera) { + // clang-format off + static float mtx_identity[16] = { + 1.f, 0.f, 0.f, 0.f, + 0.f, 1.f, 0.f, 0.f, + 0.f, 0.f, 1.f, 0.f, + 0.f, 0.f, 0.f, 1.f + }; + // clang-format on + camera->near = 0.01; + camera->far = 1000.0; + camera->fov = 90 * M_PI / 180.f; + + camera->forward[0] = -1.f; + camera->forward[1] = 0.f; + camera->forward[2] = -1.f; + + camera->right[0] = 1.f; + camera->right[1] = 0.f; + camera->right[2] = 0.f; + + camera->up[0] = 0.f; + camera->up[1] = 1.f; + camera->up[2] = 0.f; + + camera->pos[0] = 2.f; + camera->pos[1] = 1.2f; + camera->pos[2] = 2.f; + + camera->vel[0] = 0.f; + camera->vel[1] = 0.f; + camera->vel[2] = 0.f; + + camera->heading = -45.0 * M_PI / 180.0f; + camera->pitch = 10 * M_PI / 180.0f; + + memcpy(&camera->mtxView, &mtx_identity, sizeof(camera->mtxView)); + Camera_CalcToMatrix(camera, &camera->mtxView); + Camera_CalcFromMatrix(camera, &camera->mtxView); +} + +void Camera_CalcFromMatrix(Camera* camera, float* mat) { + simd4x4f mtx; + simd4x4f_uload(&mtx, mat); + + camera->forward[0] = mtx.x[2]; + camera->forward[1] = mtx.y[2]; + camera->forward[2] = mtx.z[2]; + + camera->right[0] = mtx.x[0]; + camera->right[1] = mtx.y[0]; + camera->right[2] = mtx.z[0]; + + camera->heading = atan2(-camera->forward[2], camera->forward[0]); + camera->pitch = asin(camera->forward[1]); + + simd4x4f rot_mat = mtx; + rot_mat.w = simd4f_create(0.f, 0.f, 0.f, 1.f); + simd4x4f_transpose_inplace(&rot_mat); + + simd4f eye; + simd4x4f_matrix_point3_mul(&rot_mat, &mtx.w, &eye); + + camera->pos[0] = -simd4f_get_x(eye); + camera->pos[1] = -simd4f_get_y(eye); + camera->pos[2] = -simd4f_get_z(eye); + +// gLog ("ViewMat"); +// gLog ("%f, %f, %f, %f", mtx->x[0], mtx->x[1], mtx->x[2], mtx->x[3]); +// gLog ("%f, %f, %f, %f", mtx->y[0], mtx->y[1], mtx->y[2], mtx->y[3]); +// gLog ("%f, %f, %f, %f", mtx->z[0], mtx->z[1], mtx->z[2], mtx->z[3]); +// gLog ("%f, %f, %f, %f", mtx->w[0], mtx->w[1], mtx->w[2], mtx->w[3]); +} + +void Camera_CalcToMatrix(Camera* camera, float* mat) { + float sp = sin(camera->pitch); + float cp = cos(camera->pitch); + float ch = cos(camera->heading); + float sh = sin(camera->heading); + + const float d = 10.0f; + + simd4f eye = simd4f_create (camera->pos[0], camera->pos[1], camera->pos[2], 1.f); + simd4f forward = simd4f_create (-cp * ch, -sp, cp * sh, 0.f); + simd4f right = simd4f_cross3 (forward, simd4f_create (0.f, 1.f, 0.f, 1.f)); + simd4f up = simd4f_cross3(right, forward); + simd4f center = simd4f_add(simd4f_mul(forward, simd4f_splat(d)), eye); + + camera->forward[0] = -simd4f_get_x(forward); + camera->forward[1] = -simd4f_get_y(forward); + camera->forward[2] = -simd4f_get_z(forward); + + camera->right[0] = simd4f_get_x(right); + camera->right[1] = simd4f_get_y(right); + camera->right[2] = simd4f_get_z(right); + + simd4x4f mtx; + simd4x4f_lookat(&mtx, eye, center, up); + + simd4f_ustore4(mtx.x, mat); + simd4f_ustore4(mtx.y, mat +4); + simd4f_ustore4(mtx.z, mat +8); + simd4f_ustore4(mtx.w, mat +12); +} + +inline void Camera_Update( + Camera* camera, + int width, + int height, + float dt, + float mouse_dx, + float mouse_dy, + float accel[3]) { + assert(camera); + assert((width > 0) && (height > 0)); + const float w = (float) width; + const float h = (float) height; + simd4x4f proj; + simd4x4f_perspective(&proj, camera->fov, w/h, camera->near, camera->far); + simd4f_ustore4(proj.x, camera->mtxProj); + simd4f_ustore4(proj.y, camera->mtxProj +4); + simd4f_ustore4(proj.z, camera->mtxProj +8); + simd4f_ustore4(proj.w, camera->mtxProj +12); + + if (mouse_dx != 0.f || mouse_dy != 0.f || accel != NULL) { + const float mouse_sensitivity = 20.0f; + + camera->heading -= dt * mouse_dx * mouse_sensitivity * M_PI / 180.f; + if (camera->heading < -M_PI) { + camera->heading += M_PI * 2.f; + } else if (camera->heading > M_PI) { + camera->heading -= M_PI * 2.f; + } + camera->pitch += dt * mouse_dy * mouse_sensitivity * M_PI / 180.f; + if (camera->pitch < -M_PI * 0.49) { + camera->pitch = -M_PI * 0.49; + } else if (camera->pitch > M_PI * 0.49) { + camera->pitch = M_PI * 0.49; + } + + for (int i = 0; i < 3; i++) { + camera->vel[i] += dt * accel[0] * camera->forward[i] + + dt * accel[2] * camera->right[i] + + dt * accel[1] * camera->up[i]; + camera->pos[i] += dt * camera->vel[i]; + camera->vel[i] = camera->vel[i] * 0.1; + } + + Camera_CalcToMatrix(camera, &camera->mtxView); + } +} \ No newline at end of file diff --git a/src/Camera.h b/src/Camera.h new file mode 100644 index 0000000..82f2690 --- /dev/null +++ b/src/Camera.h @@ -0,0 +1,43 @@ +// +// Created by martin on 19.10.21. +// + +#ifndef RBDLSIM_RENDER_UTILS_H +#define RBDLSIM_RENDER_UTILS_H + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + float mtxProj[16]; + float mtxView[16]; + float near; + float far; + float fov; + float heading; + float pitch; + float vel[3]; + float pos[3]; + float forward[3]; + float right[3]; + float up[3]; +} Camera; + +void Camera_Init(Camera* camera); +void Camera_CalcFromMatrix(Camera* camera, float* mtx); +void Camera_CalcToMatrix(Camera* camera, float* mtx); +void Camera_Update( + Camera* camera, + int width, + int height, + float dt, + float mouse_dx, + float mouse_dy, + float accel[3]); + +#ifdef __cplusplus +} +#endif + +#endif //RBDLSIM_RENDER_UTILS_H diff --git a/src/main.cc b/src/main.cc index 3161332..b2f9d28 100644 --- a/src/main.cc +++ b/src/main.cc @@ -14,6 +14,7 @@ #define GLFW_INCLUDE_NONE #include +#include "Camera.h" #include "GLFW/glfw3.h" const int Width = 1024; @@ -68,7 +69,7 @@ typedef struct { static struct { std::unique_ptr ozz; sg_pass_action pass_action; - camera_t camera; + Camera camera; struct { bool skeleton; bool animation; @@ -85,6 +86,25 @@ static struct { } time; } state; +typedef struct { + int32_t mousedX; + int32_t mousedY; + int32_t mouseX; + int32_t mouseY; + uint8_t mouseButton; + int32_t mouseScroll; + char key; +} GuiInputState; + +GuiInputState gGuiInputState = {0, 0, 0, 0, 0, 0, 0}; + +enum class ControlMode { + ControlModeNone, + ControlModeFPS +}; + +ControlMode gControlMode = ControlMode::ControlModeNone; + // io buffers for skeleton and animation data files, we know the max file size upfront static uint8_t skel_data_buffer[4 * 1024]; static uint8_t anim_data_buffer[32 * 1024]; @@ -93,11 +113,35 @@ static void load_skeleton(void); static void load_animation(void); static void eval_animation(void); static void draw_skeleton(void); +static void draw_grid(void); static void draw_ui(void); // static void skeleton_data_loaded(const sfetch_response_t* response); // static void animation_data_loaded(const sfetch_response_t* response); static void frame(void); +void handle_mouse(GLFWwindow* w, GuiInputState* io_input_state) { + if (!glfwGetWindowAttrib(w, GLFW_FOCUSED)) { + return; + } + + double mouse_x, mouse_y; + glfwGetCursorPos(w, &mouse_x, &mouse_y); + + if (io_input_state->mouseButton) { + io_input_state->mousedX = int32_t(mouse_x) - io_input_state->mouseX; + io_input_state->mousedY = int32_t(mouse_y) - io_input_state->mouseY; + } else { + io_input_state->mousedX = 0; + io_input_state->mousedY = 0; + } + io_input_state->mouseX = int32_t(mouse_x); + io_input_state->mouseY = int32_t(mouse_y); + + io_input_state->mouseButton = glfwGetMouseButton(w, 0) + + (glfwGetMouseButton(w, 1) << 1) + + (glfwGetMouseButton(w, 2) << 2); +} + int main() { // window and GL context via GLFW and flextGL glfwInit(); @@ -106,7 +150,7 @@ int main() { glfwWindowHint(GLFW_OPENGL_FORWARD_COMPAT, GLFW_TRUE); glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE); glfwWindowHint(GLFW_COCOA_RETINA_FRAMEBUFFER, GLFW_FALSE); - GLFWwindow* w = glfwCreateWindow(Width, Height, "Sokol+ImGui+GLFW", 0, 0); + GLFWwindow* w = glfwCreateWindow(Width, Height, "AnimTestbed", 0, 0); glfwMakeContextCurrent(w); glfwSwapInterval(1); @@ -154,16 +198,7 @@ int main() { state.ozz = std::make_unique(); state.time.factor = 1.0f; - // initialize camera helper - camera_desc_t camdesc = {}; - camdesc.min_dist = 1.0f; - camdesc.max_dist = 100.0f; - camdesc.farz = 1000.0f; - camdesc.center.Y = 1.0f; - camdesc.distance = 30.0f; - camdesc.latitude = 10.0f; - camdesc.longitude = 20.0f; - cam_init(&state.camera, &camdesc); + Camera_Init(&state.camera); // setup Dear Imgui ImGui::CreateContext(); @@ -265,7 +300,7 @@ int main() { // initial clear color pass_action.colors[0].action = SG_ACTION_CLEAR; - pass_action.colors[0].value = {0.0f, 0.5f, 0.7f, 1.0f}; + pass_action.colors[0].value = {0.1f, 0.1f, 0.1f, 1.0f}; load_skeleton(); load_animation(); @@ -278,14 +313,81 @@ int main() { int cur_width, cur_height; glfwGetFramebufferSize(w, &cur_width, &cur_height); - cam_update(&state.camera, cur_width, cur_height); - // this is standard ImGui demo code ImGuiIO& io = ImGui::GetIO(); io.DisplaySize = ImVec2(float(cur_width), float(cur_height)); io.DeltaTime = (float)stm_sec(stm_laptime(&last_time)); ImGui::NewFrame(); + ImGui::Begin("Camera"); + ImGui::SliderFloat3("pos", state.camera.pos, -100.f, 100.f); + ImGui::SliderFloat("near", &state.camera.near, 0.001f, 10.f); + ImGui::SliderFloat("far", &state.camera.far, 1.0f, 10000.f); + ImGui::SliderFloat("heading", &state.camera.heading, -180.0f, 180.f); + ImGui::SliderFloat("pitch", &state.camera.pitch, -179.0f, 179.f); + ImGui::End(); + + // handle input + handle_mouse (w, &gGuiInputState); + + if (glfwGetMouseButton(w, GLFW_MOUSE_BUTTON_RIGHT)) { + if (gControlMode == ControlMode::ControlModeNone) { + gControlMode = ControlMode::ControlModeFPS; + Camera_CalcFromMatrix(&state.camera, &state.camera.mtxView[0]); + glfwSetInputMode(w, GLFW_CURSOR, GLFW_CURSOR_DISABLED); + } + } else { + gControlMode = ControlMode::ControlModeNone; + glfwSetInputMode(w, GLFW_CURSOR, GLFW_CURSOR_NORMAL); + Camera_Update( + &state.camera, + cur_width, + cur_height, + state.time.frame, + 0, + 0, + nullptr); + } + + if (gControlMode == ControlMode::ControlModeFPS) { + float camera_accel[3] = {0.f, 0.f, 0.f}; + float accel_scale = 100.0; + + if (glfwGetKey(w, GLFW_KEY_LEFT_SHIFT)) { + accel_scale *= 3.; + } else if (glfwGetKey(w, GLFW_KEY_LEFT_CONTROL)) { + accel_scale /= 3.; + } + + if (glfwGetKey(w, GLFW_KEY_W)) { + camera_accel[0] -= accel_scale; + } + if (glfwGetKey(w, GLFW_KEY_S)) { + camera_accel[0] += accel_scale; + } + if (glfwGetKey(w, GLFW_KEY_C)) { + camera_accel[1] -= accel_scale; + } + if (glfwGetKey(w, GLFW_KEY_SPACE)) { + camera_accel[1] += accel_scale; + } + if (glfwGetKey(w, GLFW_KEY_A)) { + camera_accel[2] -= accel_scale; + } + if (glfwGetKey(w, GLFW_KEY_D)) { + camera_accel[2] += accel_scale; + } + + Camera_Update( + &state.camera, + cur_width, + cur_height, + state.time.frame, + gGuiInputState.mousedX, + gGuiInputState.mousedY, + camera_accel); + } + if (ImGui::BeginMainMenuBar()) { ImGui::Text("AnimTestbed"); ImGui::Checkbox("ImGui Demo", &show_imgui_demo_window); @@ -301,14 +403,6 @@ int main() { ImGui::ShowDemoWindow(); } - ImGui::Begin("Camera"); - ImGui::SliderFloat("min_dist", &state.camera.min_dist, 1.0f, 100.f); - ImGui::SliderFloat("max_dist", &state.camera.max_dist, 1.0f, 100.f); - ImGui::SliderFloat("center.Y", &state.camera.center.Y, 1.0f, 100.f); - ImGui::SliderFloat("distance", &state.camera.distance, 1.0f, 1000.f); - ImGui::SliderFloat("latitude", &state.camera.latitude, 1.0f, 100.f); - ImGui::SliderFloat("longitude", &state.camera.longitude, -179.0f, 179.f); - ImGui::End(); // the sokol_gfx draw pass sg_begin_default_pass(&pass_action, cur_width, cur_height); @@ -433,6 +527,8 @@ static void draw_ui() { } static void frame() { + draw_grid(); + if (state.loaded.animation && state.loaded.skeleton) { if (!state.time.paused) { state.time.absolute += state.time.frame * state.time.factor; @@ -516,14 +612,50 @@ static void draw_joint(int joint_index, int parent_joint_index) { draw_line(p5, p2); } +static void draw_grid(void) { + sgl_defaults(); + sgl_matrix_mode_projection(); + sgl_load_matrix((const float*)&state.camera.mtxProj); + sgl_matrix_mode_modelview(); + sgl_load_matrix((const float*)&state.camera.mtxView); + + const int grid_size = 10; + + sgl_begin_lines(); + sgl_c3f(0.4f, 0.4f, 0.4f); + for (int i = -grid_size; i <= grid_size; i++) { + if (i == 0) { + continue; + } + ozz::math::SimdFloat4 p0 = ozz::math::simd_float4::Load(i * 1.0f, 0.f, -grid_size * 1.0f, 1.f); + ozz::math::SimdFloat4 p1 = ozz::math::simd_float4::Load(i * 1.0f, 0.f, grid_size * 1.0f, 1.f); + draw_line(p0, p1); + + p0 = ozz::math::simd_float4::Load(-grid_size * 1.0f, 0.f, i * 1.0f, 1.f); + p1 = ozz::math::simd_float4::Load(grid_size * 1.0f, 0.f, i * 1.0f, 1.f); + draw_line(p0, p1); + } + sgl_c3f (0.7f, 0.4f, 0.2f); + ozz::math::SimdFloat4 p0 = ozz::math::simd_float4::Load(0, 0.f, -grid_size * 1.0f, 1.f); + ozz::math::SimdFloat4 p1 = ozz::math::simd_float4::Load(0, 0.f, grid_size * 1.0f, 1.f); + draw_line(p0, p1); + + sgl_c3f (0.2f, 0.4f, 0.7f); + p0 = ozz::math::simd_float4::Load(-grid_size * 1.0f, 0.f, 0.f, 1.f); + p1 = ozz::math::simd_float4::Load(grid_size * 1.0f, 0.f, 0.f, 1.f); + draw_line(p0, p1); + + sgl_end(); +} + static void draw_skeleton(void) { sgl_defaults(); sgl_matrix_mode_projection(); - sgl_load_matrix((const float*)&state.camera.proj); + sgl_load_matrix((const float*)&state.camera.mtxProj); sgl_matrix_mode_modelview(); - hmm_mat4 scale_mat = HMM_Scale (HMM_Vec3(0.1f, 0.1f, 0.1f)); - sgl_load_matrix((const float*)&state.camera.view); - sgl_mult_matrix((const float*)&scale_mat); + hmm_mat4 scale_mat = HMM_Scale(HMM_Vec3(0.01f, 0.01f, 0.01f)); + sgl_load_matrix((const float*)&state.camera.mtxView); + sgl_mult_matrix((const float*)&scale_mat); const int num_joints = state.ozz->skeleton.num_joints(); ozz::span joint_parents = state.ozz->skeleton.joint_parents();