Merged identical VertexManager code from DX9/DX11/OGL plugins into VideoCommon. Still...
[dolphin.git] / Source / Core / VideoCommon / Src / VertexLoader_Position.cpp
blob1ed6c6ef631008004c1e7ee2ea77e38e6fb73897
1 // Copyright (C) 2003 Dolphin Project.
3 // This program is free software: you can redistribute it and/or modify
4 // it under the terms of the GNU General Public License as published by
5 // the Free Software Foundation, version 2.0.
7 // This program is distributed in the hope that it will be useful,
8 // but WITHOUT ANY WARRANTY; without even the implied warranty of
9 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 // GNU General Public License 2.0 for more details.
12 // A copy of the GPL 2.0 should have been included with the program.
13 // If not, see http://www.gnu.org/licenses/
15 // Official SVN repository and contact information can be found at
16 // http://code.google.com/p/dolphin-emu/
18 #include "Common.h"
19 #include "VideoCommon.h"
20 #include "VertexLoader.h"
21 #include "VertexLoader_Position.h"
22 #include "VertexManagerBase.h"
23 #include "CPUDetect.h"
25 #if _M_SSE >= 0x301 && !(defined __GNUC__ && !defined __SSSE3__)
26 #include <tmmintrin.h>
27 #endif
29 extern float posScale;
30 extern TVtxAttr *pVtxAttr;
32 // Thoughts on the implementation of a vertex loader compiler.
33 // s_pCurBufferPointer should definitely be in a register.
34 // Could load the position scale factor in XMM7, for example.
36 // The pointer inside DataReadU8 in another.
37 // Let's check out Pos_ReadDirect_UByte(). For Byte, replace MOVZX with MOVSX.
40 MOVZX(32, R(EAX), MOffset(ESI, 0));
41 MOVZX(32, R(EBX), MOffset(ESI, 1));
42 MOVZX(32, R(ECX), MOffset(ESI, 2));
43 MOVD(XMM0, R(EAX));
44 MOVD(XMM1, R(EBX));
45 MOVD(XMM2, R(ECX));
46 CVTDQ2PS(XMM0, XMM0);
47 CVTDQ2PS(XMM1, XMM1);
48 CVTDQ2PS(XMM2, XMM2);
49 MULSS(XMM0, XMM7);
50 MULSS(XMM1, XMM7);
51 MULSS(XMM2, XMM7);
52 MOVSS(MOffset(EDI, 0), XMM0);
53 MOVSS(MOffset(EDI, 4), XMM1);
54 MOVSS(MOffset(EDI, 8), XMM2);
56 Alternatively, lookup table:
57 MOVZX(32, R(EAX), MOffset(ESI, 0));
58 MOVZX(32, R(EBX), MOffset(ESI, 1));
59 MOVZX(32, R(ECX), MOffset(ESI, 2));
60 MOV(32, R(EAX), MComplex(LUTREG, EAX, 4));
61 MOV(32, R(EBX), MComplex(LUTREG, EBX, 4));
62 MOV(32, R(ECX), MComplex(LUTREG, ECX, 4));
63 MOV(MOffset(EDI, 0), XMM0);
64 MOV(MOffset(EDI, 4), XMM1);
65 MOV(MOffset(EDI, 8), XMM2);
67 SSE4:
68 PINSRB(XMM0, MOffset(ESI, 0), 0);
69 PINSRB(XMM0, MOffset(ESI, 1), 4);
70 PINSRB(XMM0, MOffset(ESI, 2), 8);
71 CVTDQ2PS(XMM0, XMM0);
72 <two unpacks here to sign extend>
73 MULPS(XMM0, XMM7);
74 MOVUPS(MOffset(EDI, 0), XMM0);
78 // ==============================================================================
79 // Direct
80 // ==============================================================================
82 template <class T, bool three>
83 void Pos_ReadDirect()
85 ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(T)DataRead<T>() * posScale;
86 ((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(T)DataRead<T>() * posScale;
87 if (three)
88 ((float*)VertexManager::s_pCurBufferPointer)[2] = (float)(T)DataRead<T>() * posScale;
89 else
90 ((float*)VertexManager::s_pCurBufferPointer)[2] = 0.0f;
91 LOG_VTX();
92 VertexManager::s_pCurBufferPointer += 12;
95 void LOADERDECL Pos_ReadDirect_UByte3() { Pos_ReadDirect<u8, true>(); }
96 void LOADERDECL Pos_ReadDirect_Byte3() { Pos_ReadDirect<s8, true>(); }
97 void LOADERDECL Pos_ReadDirect_UShort3() { Pos_ReadDirect<u16, true>(); }
98 void LOADERDECL Pos_ReadDirect_Short3() { Pos_ReadDirect<s16, true>(); }
99 void LOADERDECL Pos_ReadDirect_UByte2() { Pos_ReadDirect<u8, false>(); }
100 void LOADERDECL Pos_ReadDirect_Byte2() { Pos_ReadDirect<s8, false>(); }
101 void LOADERDECL Pos_ReadDirect_UShort2() { Pos_ReadDirect<u16, false>(); }
102 void LOADERDECL Pos_ReadDirect_Short2() { Pos_ReadDirect<s16, false>(); }
104 void LOADERDECL Pos_ReadDirect_Float3()
106 // No need to use floating point here.
107 ((u32 *)VertexManager::s_pCurBufferPointer)[0] = DataReadU32();
108 ((u32 *)VertexManager::s_pCurBufferPointer)[1] = DataReadU32();
109 ((u32 *)VertexManager::s_pCurBufferPointer)[2] = DataReadU32();
110 LOG_VTX();
111 VertexManager::s_pCurBufferPointer += 12;
114 void LOADERDECL Pos_ReadDirect_Float2()
116 // No need to use floating point here.
117 ((u32 *)VertexManager::s_pCurBufferPointer)[0] = DataReadU32();
118 ((u32 *)VertexManager::s_pCurBufferPointer)[1] = DataReadU32();
119 ((u32 *)VertexManager::s_pCurBufferPointer)[2] = 0;
120 LOG_VTX();
121 VertexManager::s_pCurBufferPointer += 12;
125 template<class T, bool three>
126 inline void Pos_ReadIndex_Byte(int Index)
128 const u8* pData = cached_arraybases[ARRAY_POSITION] + ((u32)Index * arraystrides[ARRAY_POSITION]);
129 ((float*)VertexManager::s_pCurBufferPointer)[0] = ((float)(T)(pData[0])) * posScale;
130 ((float*)VertexManager::s_pCurBufferPointer)[1] = ((float)(T)(pData[1])) * posScale;
131 if (three)
132 ((float*)VertexManager::s_pCurBufferPointer)[2] = ((float)(T)(pData[2])) * posScale;
133 else
134 ((float*)VertexManager::s_pCurBufferPointer)[2] = 0.0f;
135 LOG_VTX();
136 VertexManager::s_pCurBufferPointer += 12;
139 template<class T, bool three>
140 inline void Pos_ReadIndex_Short(int Index)
142 const u16* pData = (const u16 *)(cached_arraybases[ARRAY_POSITION] + ((u32)Index * arraystrides[ARRAY_POSITION]));
143 ((float*)VertexManager::s_pCurBufferPointer)[0] = ((float)(T)Common::swap16(pData[0])) * posScale;
144 ((float*)VertexManager::s_pCurBufferPointer)[1] = ((float)(T)Common::swap16(pData[1])) * posScale;
145 if (three)
146 ((float*)VertexManager::s_pCurBufferPointer)[2] = ((float)(T)Common::swap16(pData[2])) * posScale;
147 else
148 ((float*)VertexManager::s_pCurBufferPointer)[2] = 0.0f;
149 LOG_VTX();
150 VertexManager::s_pCurBufferPointer += 12;
153 template<bool three>
154 void Pos_ReadIndex_Float(int Index)
156 const u32* pData = (const u32 *)(cached_arraybases[ARRAY_POSITION] + (Index * arraystrides[ARRAY_POSITION]));
157 ((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[0]);
158 ((u32*)VertexManager::s_pCurBufferPointer)[1] = Common::swap32(pData[1]);
159 if (three)
160 ((u32*)VertexManager::s_pCurBufferPointer)[2] = Common::swap32(pData[2]);
161 else
162 ((float*)VertexManager::s_pCurBufferPointer)[2] = 0.0f;
163 LOG_VTX();
164 VertexManager::s_pCurBufferPointer += 12;
167 #if _M_SSE >= 0x301
168 static const __m128i kMaskSwap32_3 = _mm_set_epi32(0xFFFFFFFFL, 0x08090A0BL, 0x04050607L, 0x00010203L);
169 static const __m128i kMaskSwap32_2 = _mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0x04050607L, 0x00010203L);
171 template<bool three>
172 void Pos_ReadIndex_Float_SSSE3(int Index)
174 const u32* pData = (const u32 *)(cached_arraybases[ARRAY_POSITION] + (Index * arraystrides[ARRAY_POSITION]));
175 const __m128i a = _mm_loadu_si128((__m128i*)pData);
176 __m128i b = _mm_shuffle_epi8(a, three ? kMaskSwap32_3 : kMaskSwap32_2);
177 _mm_storeu_si128((__m128i*)VertexManager::s_pCurBufferPointer, b);
178 LOG_VTX();
179 VertexManager::s_pCurBufferPointer += 12;
181 #endif
183 // Explicitly instantiate these functions to decrease the possibility of
184 // symbol binding problems when (only) calling them from JIT compiled code.
185 template void Pos_ReadDirect<u8, true>();
186 template void Pos_ReadDirect<s8, true>();
187 template void Pos_ReadDirect<u16, true>();
188 template void Pos_ReadDirect<s16, true>();
189 template void Pos_ReadDirect<u8, false>();
190 template void Pos_ReadDirect<s8, false>();
191 template void Pos_ReadDirect<u16, false>();
192 template void Pos_ReadDirect<s16, false>();
193 template void Pos_ReadIndex_Byte<u8, true>(int Index);
194 template void Pos_ReadIndex_Byte<s8, true>(int Index);
195 template void Pos_ReadIndex_Short<u16, true>(int Index);
196 template void Pos_ReadIndex_Short<s16, true>(int Index);
197 template void Pos_ReadIndex_Float<true>(int Index);
198 template void Pos_ReadIndex_Byte<u8, false>(int Index);
199 template void Pos_ReadIndex_Byte<s8, false>(int Index);
200 template void Pos_ReadIndex_Short<u16, false>(int Index);
201 template void Pos_ReadIndex_Short<s16, false>(int Index);
202 template void Pos_ReadIndex_Float<false>(int Index);
204 // ==============================================================================
205 // Index 8
206 // ==============================================================================
207 void LOADERDECL Pos_ReadIndex8_UByte3() {Pos_ReadIndex_Byte<u8, true> (DataReadU8());}
208 void LOADERDECL Pos_ReadIndex8_Byte3() {Pos_ReadIndex_Byte<s8, true> (DataReadU8());}
209 void LOADERDECL Pos_ReadIndex8_UShort3() {Pos_ReadIndex_Short<u16, true> (DataReadU8());}
210 void LOADERDECL Pos_ReadIndex8_Short3() {Pos_ReadIndex_Short<s16, true> (DataReadU8());}
211 void LOADERDECL Pos_ReadIndex8_Float3() {Pos_ReadIndex_Float<true> (DataReadU8());}
212 void LOADERDECL Pos_ReadIndex8_UByte2() {Pos_ReadIndex_Byte<u8, false>(DataReadU8());}
213 void LOADERDECL Pos_ReadIndex8_Byte2() {Pos_ReadIndex_Byte<s8, false>(DataReadU8());}
214 void LOADERDECL Pos_ReadIndex8_UShort2() {Pos_ReadIndex_Short<u16, false>(DataReadU8());}
215 void LOADERDECL Pos_ReadIndex8_Short2() {Pos_ReadIndex_Short<s16, false>(DataReadU8());}
216 void LOADERDECL Pos_ReadIndex8_Float2() {Pos_ReadIndex_Float<false> (DataReadU8());}
218 // ==============================================================================
219 // Index 16
220 // ==============================================================================
221 void LOADERDECL Pos_ReadIndex16_UByte3() {Pos_ReadIndex_Byte<u8, true> (DataReadU16());}
222 void LOADERDECL Pos_ReadIndex16_Byte3() {Pos_ReadIndex_Byte<s8, true> (DataReadU16());}
223 void LOADERDECL Pos_ReadIndex16_UShort3() {Pos_ReadIndex_Short<u16, true> (DataReadU16());}
224 void LOADERDECL Pos_ReadIndex16_Short3() {Pos_ReadIndex_Short<s16, true> (DataReadU16());}
225 void LOADERDECL Pos_ReadIndex16_Float3() {Pos_ReadIndex_Float<true> (DataReadU16());}
226 void LOADERDECL Pos_ReadIndex16_UByte2() {Pos_ReadIndex_Byte<u8, false>(DataReadU16());}
227 void LOADERDECL Pos_ReadIndex16_Byte2() {Pos_ReadIndex_Byte<s8, false>(DataReadU16());}
228 void LOADERDECL Pos_ReadIndex16_UShort2() {Pos_ReadIndex_Short<u16, false>(DataReadU16());}
229 void LOADERDECL Pos_ReadIndex16_Short2() {Pos_ReadIndex_Short<s16, false>(DataReadU16());}
230 void LOADERDECL Pos_ReadIndex16_Float2() {Pos_ReadIndex_Float<false> (DataReadU16());}
232 #if _M_SSE >= 0x301
233 void LOADERDECL Pos_ReadIndex8_Float3_SSSE3() {Pos_ReadIndex_Float_SSSE3<true> (DataReadU8());}
234 void LOADERDECL Pos_ReadIndex8_Float2_SSSE3() {Pos_ReadIndex_Float_SSSE3<false> (DataReadU8());}
235 void LOADERDECL Pos_ReadIndex16_Float3_SSSE3() {Pos_ReadIndex_Float_SSSE3<true> (DataReadU16());}
236 void LOADERDECL Pos_ReadIndex16_Float2_SSSE3() {Pos_ReadIndex_Float_SSSE3<false> (DataReadU16());}
237 #endif
239 static TPipelineFunction tableReadPosition[4][8][2] = {
241 {NULL, NULL,},
242 {NULL, NULL,},
243 {NULL, NULL,},
244 {NULL, NULL,},
245 {NULL, NULL,},
248 {Pos_ReadDirect_UByte2, Pos_ReadDirect_UByte3,},
249 {Pos_ReadDirect_Byte2, Pos_ReadDirect_Byte3,},
250 {Pos_ReadDirect_UShort2, Pos_ReadDirect_UShort3,},
251 {Pos_ReadDirect_Short2, Pos_ReadDirect_Short3,},
252 {Pos_ReadDirect_Float2, Pos_ReadDirect_Float3,},
255 {Pos_ReadIndex8_UByte2, Pos_ReadIndex8_UByte3,},
256 {Pos_ReadIndex8_Byte2, Pos_ReadIndex8_Byte3,},
257 {Pos_ReadIndex8_UShort2, Pos_ReadIndex8_UShort3,},
258 {Pos_ReadIndex8_Short2, Pos_ReadIndex8_Short3,},
259 {Pos_ReadIndex8_Float2, Pos_ReadIndex8_Float3,},
262 {Pos_ReadIndex16_UByte2, Pos_ReadIndex16_UByte3,},
263 {Pos_ReadIndex16_Byte2, Pos_ReadIndex16_Byte3,},
264 {Pos_ReadIndex16_UShort2, Pos_ReadIndex16_UShort3,},
265 {Pos_ReadIndex16_Short2, Pos_ReadIndex16_Short3,},
266 {Pos_ReadIndex16_Float2, Pos_ReadIndex16_Float3,},
270 static int tableReadPositionVertexSize[4][8][2] = {
272 {0, 0,},
273 {0, 0,},
274 {0, 0,},
275 {0, 0,},
276 {0, 0,},
279 {2, 3,},
280 {2, 3,},
281 {4, 6,},
282 {4, 6,},
283 {8, 12,},
286 {1, 1,},
287 {1, 1,},
288 {1, 1,},
289 {1, 1,},
290 {1, 1,},
293 {2, 2,},
294 {2, 2,},
295 {2, 2,},
296 {2, 2,},
297 {2, 2,},
302 void VertexLoader_Position::Init(void) {
304 #if _M_SSE >= 0x301
306 if (cpu_info.bSSSE3) {
307 tableReadPosition[2][4][0] = Pos_ReadIndex8_Float2_SSSE3;
308 tableReadPosition[2][4][1] = Pos_ReadIndex8_Float3_SSSE3;
309 tableReadPosition[3][4][0] = Pos_ReadIndex16_Float2_SSSE3;
310 tableReadPosition[3][4][1] = Pos_ReadIndex16_Float3_SSSE3;
313 #endif
317 unsigned int VertexLoader_Position::GetSize(unsigned int _type, unsigned int _format, unsigned int _elements) {
318 return tableReadPositionVertexSize[_type][_format][_elements];
321 TPipelineFunction VertexLoader_Position::GetFunction(unsigned int _type, unsigned int _format, unsigned int _elements) {
322 return tableReadPosition[_type][_format][_elements];