00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #ifndef LUX_MATRIX4X4_H
00024 #define LUX_MATRIX4X4_H
00025
00026 #include <xmmintrin.h>
00027
00028 namespace lux {
00029 static float _matrix44_sse_ident[16] = {
00030 1.0f, 0.0f, 0.0f, 0.0f,
00031 0.0f, 1.0f, 0.0f, 0.0f,
00032 0.0f, 0.0f, 1.0f, 0.0f,
00033 0.0f, 0.0f, 0.0f, 1.0f,
00034 };
00035
00036 class Matrix4x4 {
00037 public:
00038
00039 Matrix4x4() {
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052 memcpy(&_11, _matrix44_sse_ident, sizeof(_matrix44_sse_ident));
00053 }
00054 Matrix4x4(const Matrix4x4 & m):_L1(m._L1), _L2(m._L2), _L3(m._L3),
00055 _L4(m._L4)
00056 {}
00057
00058 Matrix4x4(const __m128 &l1, const __m128 &l2, const __m128 &l3, const __m128 &l4):_L1(l1), _L2(l2), _L3(l3), _L4(l4)
00059 {}
00060
00061 Matrix4x4(float mat[4][4]);
00062 Matrix4x4(float t00, float t01, float t02, float t03,
00063 float t10, float t11, float t12, float t13,
00064 float t20, float t21, float t22, float t23,
00065 float t30, float t31, float t32, float t33);
00066 boost::shared_ptr<Matrix4x4> Transpose() const;
00067 void Print(ostream &os) const {
00068 os<<"[ ";
00069 for(int i=0;i<4;i++) {
00070 os<<"[ ";
00071 for(int j=0;j<4;j++) {
00072 os<<(*this)[j][i];
00073 if(j!=3) os<<", ";
00074 }
00075 os<<" ] ";
00076 }
00077 os<<"]";
00078 }
00079 static boost::shared_ptr<Matrix4x4>
00080 Mul(const boost::shared_ptr<Matrix4x4> &A,
00081 const boost::shared_ptr<Matrix4x4> &B) {
00082 __m128 r1, r2, r3, r4;
00083 __m128 B1 = A->_L1, B2 = A->_L2, B3 = A->_L3, B4 = A->_L4;
00084
00085 r1 = _mm_mul_ps(_mm_set_ps1(B->_11) , B1);
00086 r2 = _mm_mul_ps(_mm_set_ps1(B->_21) , B1);
00087 r1 = _mm_add_ps(r1, _mm_mul_ps(_mm_set_ps1(B->_12) , B2));
00088 r2 = _mm_add_ps(r2, _mm_mul_ps(_mm_set_ps1(B->_22) , B2));
00089 r1 = _mm_add_ps(r1, _mm_mul_ps(_mm_set_ps1(B->_13) , B3));
00090 r2 = _mm_add_ps(r2, _mm_mul_ps(_mm_set_ps1(B->_23) , B3));
00091 r1 = _mm_add_ps(r1, _mm_mul_ps(_mm_set_ps1(B->_14) , B4));
00092 r2 = _mm_add_ps(r2, _mm_mul_ps(_mm_set_ps1(B->_24) , B4));
00093
00094
00095
00096 r3 = _mm_mul_ps(_mm_set_ps1(B->_31) , B1);
00097 r4 = _mm_mul_ps(_mm_set_ps1(B->_41) , B1);
00098 r3 = _mm_add_ps(r3, _mm_mul_ps(_mm_set_ps1(B->_32) , B2));
00099 r4 = _mm_add_ps(r4, _mm_mul_ps(_mm_set_ps1(B->_42) , B2));
00100 r3 = _mm_add_ps(r3, _mm_mul_ps(_mm_set_ps1(B->_33) , B3));
00101 r4 = _mm_add_ps(r4, _mm_mul_ps(_mm_set_ps1(B->_43) , B3));
00102 r3 = _mm_add_ps(r3, _mm_mul_ps(_mm_set_ps1(B->_34) , B4));
00103 r4 = _mm_add_ps(r4, _mm_mul_ps(_mm_set_ps1(B->_44) , B4));
00104
00105
00106
00107
00108
00109 boost::shared_ptr<Matrix4x4> o(new Matrix4x4(r1, r2, r3, r4));
00110 return o;
00111 }
00112 boost::shared_ptr<Matrix4x4> Inverse() const;
00113
00114
00115 union {
00116 struct {
00117 __m128 _L1, _L2, _L3, _L4;
00118 };
00119 struct {
00120 float _11, _12, _13, _14;
00121 float _21, _22, _23, _24;
00122 float _31, _32, _33, _34;
00123 float _41, _42, _43, _44;
00124 };
00125 struct {
00126 float _t11, _t21, _t31, _t41;
00127 float _t12, _t22, _t32, _t42;
00128 float _t13, _t23, _t33, _t43;
00129 float _t14, _t24, _t34, _t44;
00130 };
00131 };
00132
00133 void* operator new(size_t t) { return _mm_malloc(t, 16); }
00134 void operator delete(void* ptr, size_t t) { _mm_free(ptr); }
00135 void* operator new[](size_t t) { return _mm_malloc(t, 16); }
00136 void operator delete[] (void* ptr) { _mm_free(ptr); }
00137
00138 private:
00139
00140 float* operator [] (int i) const {
00141
00142 return (float*)((&_L1)+i);
00143 }
00144 };
00145
00146 }
00147
00148 #endif //LUX_MATRIX4X4_H