SSE optmizations.

git-svn-id: http://svn.leocad.org/trunk@371 c7d43263-9d01-0410-8a33-9dba5d9f93d6
author: leo 2005-02-01 20:13:16 +0000
committer: leo 2005-02-01 20:13:16 +0000
commit: c53e2df73b367d43fc657413025390c5896f414d (patch)
tree: 061a4a8ed675deb14264769d15751195889e555e /common
parent: b622625f950d656a795557afaaa36472fceac13c (diff)
2 files changed, 213 insertions, 16 deletions
diff --git a/common/algebra.cpp b/common/algebra.cpp
index e4e3770..71e1d22 100644
--- a/common/algebra.cpp
+++ b/common/algebra.cpp
@@ -5,10 +5,28 @@
 #include "algebra.h"
 
 // ============================================================================
-// Linear Algebra functions.
+// 4x4 Matrix class.
 
-float DistancePointSegmentSquared(const Point3& Pt, const Point3& SegStart, const Point3& SegEnd)
+void Matrix44::CreateLookAt(const Point3& Eye, const Point3& Target, const Vector3& Up)
 {
-	return 0;
-}
+	Vector3 x, y, z;
+
+	// Z = Eye - Target
+	z = Eye - Target;
+
+  // X = Y Cross Z
+	x = Cross3(Up, z);
 
+  // Y = Z Cross X
+	y = Cross3(z, x);
+
+	// Normalize everything.
+	x.Normalize();
+	y.Normalize();
+	z.Normalize();
+
+	m_Rows[0] = Float4(x.GetX(), y.GetX(), z.GetX(), 0.0f);
+	m_Rows[1] = Float4(x.GetY(), y.GetY(), z.GetY(), 0.0f);
+	m_Rows[2] = Float4(x.GetZ(), y.GetZ(), z.GetZ(), 0.0f);
+	m_Rows[3] = m_Rows[0]*-Eye.GetX() + m_Rows[1]*-Eye.GetY() + m_Rows[2]*-Eye.GetZ();
+}
diff --git a/common/algebra.h b/common/algebra.h
index 76aaab2..671fe4f 100644
--- a/common/algebra.h
+++ b/common/algebra.h
@@ -4,15 +4,16 @@
 #include <math.h>
 
 //
-// Simple math library and algebra functions.
+// Simple math library and linear algebra functions.
 //
 // Everything is based on the Float4 class, so changing that class should be enough
-// to add support for compiler specific math intrinsics. For now only the reference
-// implementation is supported.
-//
-// TODO: Add SSE support.
+// to add support for compiler specific math intrinsics.
 //
 
+// TODO: Move this define to config.h
+#define LC_MATH_FLOAT
+//#define LC_MATH_SSE
+
 // Classes defined in this file:
 class Float4;
 class Point3;
@@ -21,7 +22,9 @@ class Matrix33;
 class Matrix44;
 
 // ============================================================================
-// Float4 class.
+// Float4 class (float version).
+
+#ifdef LC_MATH_FLOAT
 
 class Float4
 {
@@ -43,6 +46,9 @@ public:
 	inline void SetZ(const float _z) { z = _z; };
 	inline void SetW(const float _w) { w = _w; };
 
+	template<typename T>
+	inline const float operator[](T i) const { return ((const float*)this)[i]; };
+
 	// Comparison.
 	friend inline bool operator==(const Float4& a, const Float4& b)
 	{ return (a.x == b.x) && (a.y == b.y) && (a.z == b.z) && (a.w == b.w); };
@@ -78,10 +84,147 @@ public:
 	inline float Length3() const
 	{ return sqrtf(Dot3(*this, *this)); };
 
+	inline void Normalize3()
+	{ *this = *this / Length3(); };
+
+	inline void Abs()
+	{
+		if (x < 0.0f) x = -x;
+		if (y < 0.0f) y = -y;
+		if (z < 0.0f) z = -z;
+		if (w < 0.0f) w = -w;
+	};
+
 protected:
 	float x, y, z, w;
 };
 
+#endif
+
+// ============================================================================
+// Float4 class (SSE version).
+
+#ifdef LC_MATH_SSE
+
+// If you can't find this file you need to install the VS6 Processor Pack.
+#include <xmmintrin.h>
+
+class __declspec(align(16)) Float4
+{
+public:
+	// Constructors.
+	inline Float4() { };
+	inline explicit Float4(const __m128& _xyzw)
+		: xyzw(_xyzw) { };
+	inline explicit Float4(const float _x, const float _y, const float _z)
+		: xyzw(_mm_setr_ps(_x, _y, _z, _z)) { };
+	inline explicit Float4(const float _x, const float _y, const float _z, const float _w)
+		: xyzw(_mm_setr_ps(_x, _y, _z, _w)) { };
+
+	// Get/Set functions.
+	inline float GetX() const { return ((const float*)this)[0]; };
+	inline float GetY() const { return ((const float*)this)[1]; };
+	inline float GetZ() const { return ((const float*)this)[2]; };
+	inline float GetW() const { return ((const float*)this)[3]; };
+	inline void SetX(const float _x)
+	{
+		__m128 xxyy = _mm_shuffle_ps(_mm_load_ps1(&_x), xyzw, _MM_SHUFFLE(1, 1, 0, 0));
+		xyzw = _mm_shuffle_ps(xxyy, xyzw, _MM_SHUFFLE(3, 2, 2, 0));
+	};
+	inline void SetY(const float _y)
+	{
+		__m128 xxyy = _mm_shuffle_ps(xyzw, _mm_load_ps1(&_y), _MM_SHUFFLE(1, 1, 0, 0));
+		xyzw = _mm_shuffle_ps(xxyy, xyzw, _MM_SHUFFLE(3, 2, 2, 0));
+	};
+	inline void SetZ(const float _z)
+	{
+		__m128 zzww = _mm_shuffle_ps(_mm_load_ps1(&_z), xyzw, _MM_SHUFFLE(3, 3, 2, 2));
+		xyzw = _mm_shuffle_ps(xyzw, zzww, _MM_SHUFFLE(2, 0, 1, 0));
+	};
+	inline void SetW(const float _w)
+	{
+		__m128 zzww = _mm_shuffle_ps(xyzw, _mm_load_ps1(&_w), _MM_SHUFFLE(3, 3, 2, 2));
+		xyzw = _mm_shuffle_ps(xyzw, zzww, _MM_SHUFFLE(2, 0, 1, 0));
+	};
+
+	template<typename T>
+	inline const float operator[](T i) const { return ((const float*)this)[i]; };
+
+	// Comparison.
+	friend inline bool operator==(const Float4& a, const Float4& b)
+	{ return !_mm_movemask_ps(_mm_cmpneq_ps(a.xyzw, b.xyzw)); };
+
+	friend inline bool Compare3(const Float4& a, const Float4& b)
+	{ return (_mm_movemask_ps(_mm_cmpeq_ps(a.xyzw, b.xyzw)) & 0x7) == 0x7; };
+
+	// Math operations.
+	friend inline Float4 operator+(const Float4& a, const Float4& b)
+	{ return Float4(_mm_add_ps(a.xyzw, b.xyzw)); };
+
+	friend inline Float4 operator-(const Float4& a, const Float4& b)
+	{ return Float4(_mm_sub_ps(a.xyzw, b.xyzw)); };
+
+	friend inline Float4 operator*(const Float4& a, float f)
+	{ return Float4(_mm_mul_ps(a.xyzw, _mm_load_ps1(&f))); };
+
+	friend inline Float4 operator*(const Float4& a, const Float4& b)
+	{ return Float4(_mm_mul_ps(a.xyzw, b.xyzw)); };
+
+	friend inline Float4 operator/(const Float4& a, float f)
+	{ return Float4(_mm_div_ps(a.xyzw, _mm_load_ps1(&f))); };
+
+	// Dot product.
+	friend inline float Dot3(const Float4& a, const Float4& b)
+	{
+		__m128 tmp = _mm_mul_ps(a.xyzw, b.xyzw);
+		__m128 yz = _mm_add_ss(_mm_shuffle_ps(tmp, tmp, _MM_SHUFFLE(1, 1, 1, 1)), _mm_shuffle_ps(tmp, tmp, _MM_SHUFFLE(2, 2, 2, 2)));
+		tmp = _mm_add_ss(tmp, yz);
+
+		return *(const float*)&tmp;
+	};
+
+	// Cross product.
+	friend inline Float4 Cross3(const Float4& a, const Float4& b)
+	{
+		// a(yzx)*b(zxy)-a(zxy)*b(yzx)
+		__m128 r1 = _mm_mul_ps(_mm_shuffle_ps(a.xyzw, a.xyzw, _MM_SHUFFLE(0, 0, 2, 1)), _mm_shuffle_ps(b.xyzw, b.xyzw, _MM_SHUFFLE(0, 1, 0, 2)));
+		__m128 r2 = _mm_mul_ps(_mm_shuffle_ps(a.xyzw, a.xyzw, _MM_SHUFFLE(0, 1, 0, 2)), _mm_shuffle_ps(b.xyzw, b.xyzw, _MM_SHUFFLE(0, 0, 2, 1)));
+
+		return Float4(_mm_sub_ps(r1, r2));
+	};
+
+	// Other functions.
+	inline float Length3() const
+	{
+		__m128 tmp = _mm_mul_ps(xyzw, xyzw);
+		__m128 yz = _mm_add_ss(_mm_shuffle_ps(tmp, tmp, _MM_SHUFFLE(1, 1, 1, 1)), _mm_shuffle_ps(tmp, tmp, _MM_SHUFFLE(2, 2, 2, 2)));
+		tmp = _mm_add_ss(tmp, yz);
+		tmp = _mm_sqrt_ss(tmp);
+
+		return *(const float*)&tmp;
+	};
+
+	inline void Normalize3()
+	{
+		__m128 tmp = _mm_mul_ps(xyzw, xyzw);
+		__m128 yz = _mm_add_ss(_mm_shuffle_ps(tmp, tmp, _MM_SHUFFLE(1, 1, 1, 1)), _mm_shuffle_ps(tmp, tmp, _MM_SHUFFLE(2, 2, 2, 2)));
+		tmp = _mm_add_ss(tmp, yz);
+		tmp = _mm_rsqrt_ss(tmp);
+		tmp = _mm_shuffle_ps(tmp, tmp, _MM_SHUFFLE(0, 0, 0, 0));
+		xyzw = _mm_mul_ps(xyzw, tmp);
+	};
+
+	inline void Abs()
+	{
+		static const __declspec(align(16)) unsigned int Mask[4] = { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff };
+		xyzw = _mm_and_ps(xyzw, *(__m128*)&Mask);
+	};
+
+protected:
+	__m128 xyzw;
+};
+
+#endif
 
 // ============================================================================
 // 3D Point class.
@@ -105,6 +248,9 @@ public:
 	inline void SetY(const float _y) { m_Value.SetY(_y); };
 	inline void SetZ(const float _z) { m_Value.SetZ(_z); };
 
+	template<typename T>
+	inline const float operator[](T i) const { return m_Value[i]; };
+
 	// Math operations.
 	template<typename T> inline Point3& operator+=(const T& a) { return *this = *this + a; }
 	template<typename T> inline Point3& operator-=(const T& a) { return *this = *this - a; }
@@ -138,6 +284,9 @@ public:
 	inline void SetY(const float _y) { m_Value.SetY(_y); };
 	inline void SetZ(const float _z) { m_Value.SetZ(_z); };
 
+	template<typename T>
+	inline const float operator[](T i) const { return m_Value[i]; };
+
 	// Math operations.
 	template<typename T> inline Vector3& operator+=(const T& a) { return *this = *this + a; }
 	template<typename T> inline Vector3& operator-=(const T& a) { return *this = *this - a; }
@@ -151,6 +300,12 @@ public:
 	inline float LengthSquared() const
 	{ return Dot3(m_Value, m_Value); };
 
+	inline void Normalize()
+	{ m_Value.Normalize3(); };
+
+	inline void Abs()
+	{ m_Value.Abs(); };
+
 protected:
 	Float4 m_Value;
 };
@@ -162,19 +317,45 @@ protected:
 class Matrix33
 {
 public:
+	inline Matrix33()
+		{ };
+	inline Matrix33(const Vector3& Row0, const Vector3& Row1, const Vector3& Row2)
+		{ m_Rows[0] = Row0; m_Rows[1] = Row1; m_Rows[2] = Row2; };
 
 protected:
+	Vector3 m_Rows[3];
 };
 
 
 // ============================================================================
-// 4x4 Matrix class.
+// 4x4 Matrix class (actually 4x3).
 
 class Matrix44
 {
 public:
+	inline Matrix44()
+		{ };
+	inline Matrix44(const Float4& Row0, const Float4& Row1, const Float4& Row2, const Float4& Row3)
+		{ m_Rows[0] = Row0; m_Rows[1] = Row1; m_Rows[2] = Row2; m_Rows[3] = Row3; };
+
+	inline void Transpose3()
+	{
+		Float4 a = m_Rows[0], b = m_Rows[1], c = m_Rows[2];
+		m_Rows[0] = Float4(a.GetX(), b.GetX(), c.GetX(), a.GetW());
+		m_Rows[1] = Float4(a.GetY(), b.GetY(), c.GetY(), b.GetW());
+		m_Rows[2] = Float4(a.GetZ(), b.GetZ(), c.GetZ(), c.GetW());
+	}
+
+	inline void SetTranslation(const Point3& a)
+	{ m_Rows[3] = Float4(a.GetX(), a.GetY(), a.GetZ(), 0.0f); };
+
+	void CreateLookAt(const Point3& Eye, const Point3& Target, const Vector3& Up);
+
+	friend inline Point3 operator*(const Point3& a, const Matrix44& b)
+	{ return Point3(b.m_Rows[0]*a.GetX() + b.m_Rows[1]*a.GetY() + b.m_Rows[2]*a.GetZ() + b.m_Rows[3]); };
 
 protected:
+	Float4 m_Rows[4];
 };
 
 
@@ -244,10 +425,8 @@ inline Vector3 operator-(const Vector3& a, const Vector3& b)
 inline float Dot3(const Vector3& a, const Vector3& b)
 { return Dot3(a.GetValue(), b.GetValue()); };
 
-
-// ============================================================================
-// Linear Algebra functions.
-
-float DistancePointSegmentSquared(const Point3& Pt, const Point3& SegStart, const Point3& SegEnd);
+// Cross product.
+inline Vector3 Cross3(const Vector3& a, const Vector3& b)
+{ return Vector3(Cross3(a.GetValue(), b.GetValue())); };
 
 #endif
author	leo	2005-02-01 20:13:16 +0000
committer	leo	2005-02-01 20:13:16 +0000
commit	c53e2df73b367d43fc657413025390c5896f414d (patch)
tree	061a4a8ed675deb14264769d15751195889e555e /common
parent	b622625f950d656a795557afaaa36472fceac13c (diff)