1 // Copyright (C) 2003 Dolphin Project.
3 // This program is free software: you can redistribute it and/or modify
4 // it under the terms of the GNU General Public License as published by
5 // the Free Software Foundation, version 2.0.
7 // This program is distributed in the hope that it will be useful,
8 // but WITHOUT ANY WARRANTY; without even the implied warranty of
9 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 // GNU General Public License 2.0 for more details.
12 // A copy of the GPL 2.0 should have been included with the program.
13 // If not, see http://www.gnu.org/licenses/
15 // Official SVN repository and contact information can be found at
16 // http://code.google.com/p/dolphin-emu/
21 #include "VideoCommon.h"
22 #include "VideoConfig.h"
24 #include "MemoryUtil.h"
25 #include "StringUtil.h"
26 #include "x64Emitter.h"
28 #include "PixelEngine.h"
30 #include "LookUpTables.h"
31 #include "Statistics.h"
32 #include "VertexLoaderManager.h"
33 #include "VertexLoader.h"
35 #include "DataReader.h"
36 #include "VertexManagerBase.h"
38 #include "VertexLoader_Position.h"
39 #include "VertexLoader_Normal.h"
40 #include "VertexLoader_Color.h"
41 #include "VertexLoader_TextCoord.h"
45 extern float GC_ALIGNED16(g_fProjectionMatrix
[16]);
49 #define COMPILED_CODE_SIZE 4096
51 NativeVertexFormat
*g_nativeVertexFmt
;
58 // Matrix components are first in GC format but later in PC format - we need to store it temporarily
59 // when decoding each vertex.
60 static u8 s_curposmtx
;
61 static u8 s_curtexmtx
[8];
62 static int s_texmtxwrite
= 0;
63 static int s_texmtxread
= 0;
65 static int loop_counter
;
67 // Vertex loaders read these. Although the scale ones should be baked into the shader.
75 static const float fractionTable
[32] = {
76 1.0f
/ (1U << 0), 1.0f
/ (1U << 1), 1.0f
/ (1U << 2), 1.0f
/ (1U << 3),
77 1.0f
/ (1U << 4), 1.0f
/ (1U << 5), 1.0f
/ (1U << 6), 1.0f
/ (1U << 7),
78 1.0f
/ (1U << 8), 1.0f
/ (1U << 9), 1.0f
/ (1U << 10), 1.0f
/ (1U << 11),
79 1.0f
/ (1U << 12), 1.0f
/ (1U << 13), 1.0f
/ (1U << 14), 1.0f
/ (1U << 15),
80 1.0f
/ (1U << 16), 1.0f
/ (1U << 17), 1.0f
/ (1U << 18), 1.0f
/ (1U << 19),
81 1.0f
/ (1U << 20), 1.0f
/ (1U << 21), 1.0f
/ (1U << 22), 1.0f
/ (1U << 23),
82 1.0f
/ (1U << 24), 1.0f
/ (1U << 25), 1.0f
/ (1U << 26), 1.0f
/ (1U << 27),
83 1.0f
/ (1U << 28), 1.0f
/ (1U << 29), 1.0f
/ (1U << 30), 1.0f
/ (1U << 31),
88 void LOADERDECL
PosMtx_ReadDirect_UByte()
90 s_curposmtx
= DataReadU8() & 0x3f;
91 PRIM_LOG("posmtx: %d, ", s_curposmtx
);
94 void LOADERDECL
PosMtx_Write()
96 *VertexManager::s_pCurBufferPointer
++ = s_curposmtx
;
97 *VertexManager::s_pCurBufferPointer
++ = 0;
98 *VertexManager::s_pCurBufferPointer
++ = 0;
99 *VertexManager::s_pCurBufferPointer
++ = 0;
102 void LOADERDECL
UpdateBoundingBox()
104 if (!PixelEngine::bbox_active
)
107 // Truly evil hack, reading backwards from the write pointer. If we were writing to write-only
108 // memory like we might have been with a D3D vertex buffer, this would have been a bad idea.
109 float *data
= (float *)(VertexManager::s_pCurBufferPointer
- 12);
110 // We must transform the just loaded point by the current world and projection matrix - in software.
111 // Then convert to screen space and update the bounding box.
112 float p
[3] = {data
[0], data
[1], data
[2]};
114 const float *world_matrix
= (float*)xfmem
+ MatrixIndexA
.PosNormalMtxIdx
* 4;
115 const float *proj_matrix
= &g_fProjectionMatrix
[0];
118 t
[0] = p
[0] * world_matrix
[0] + p
[1] * world_matrix
[1] + p
[2] * world_matrix
[2] + world_matrix
[3];
119 t
[1] = p
[0] * world_matrix
[4] + p
[1] * world_matrix
[5] + p
[2] * world_matrix
[6] + world_matrix
[7];
120 t
[2] = p
[0] * world_matrix
[8] + p
[1] * world_matrix
[9] + p
[2] * world_matrix
[10] + world_matrix
[11];
123 o
[2] = t
[0] * proj_matrix
[8] + t
[1] * proj_matrix
[9] + t
[2] * proj_matrix
[10] + proj_matrix
[11];
126 // No pixels are likely to be drawn - don't update bounding box.
129 o
[0] = t
[0] * proj_matrix
[0] + t
[1] * proj_matrix
[1] + t
[2] * proj_matrix
[2] + proj_matrix
[3];
130 o
[1] = t
[0] * proj_matrix
[4] + t
[1] * proj_matrix
[5] + t
[2] * proj_matrix
[6] + proj_matrix
[7];
131 o
[3] = t
[0] * proj_matrix
[12] + t
[1] * proj_matrix
[13] + t
[2] * proj_matrix
[14] + proj_matrix
[15];
136 // should possibly adjust for viewport?
137 o
[0] = (o
[0] + 1.0f
) * 320.0f
;
138 o
[1] = (o
[1] + 1.0f
) * 240.0f
;
140 if (o
[0] < PixelEngine::bbox
[0]) PixelEngine::bbox
[0] = (u16
)std::max(0.0f
, o
[0]);
141 if (o
[0] > PixelEngine::bbox
[1]) PixelEngine::bbox
[1] = (u16
)std::min(640.0f
, o
[0]);
142 if (o
[1] < PixelEngine::bbox
[2]) PixelEngine::bbox
[2] = (u16
)std::max(0.0f
, o
[1]);
143 if (o
[1] > PixelEngine::bbox
[3]) PixelEngine::bbox
[3] = (u16
)std::min(480.0f
, o
[1]);
145 if (GetAsyncKeyState(VK_LSHIFT)) {
146 ERROR_LOG(VIDEO, "XForm: %f %f %f to %f %f", p[0], p[1], p[2], o[0], o[1]);
147 ERROR_LOG(VIDEO, "%i %i %i %i", g_VideoInitialize.pBBox[0], g_VideoInitialize.pBBox[1], g_VideoInitialize.pBBox[2], g_VideoInitialize.pBBox[3]);
151 void LOADERDECL
TexMtx_ReadDirect_UByte()
153 s_curtexmtx
[s_texmtxread
] = DataReadU8() & 0x3f;
154 PRIM_LOG("texmtx%d: %d, ", s_texmtxread
, s_curtexmtx
[s_texmtxread
]);
158 void LOADERDECL
TexMtx_Write_Float()
160 *(float*)VertexManager::s_pCurBufferPointer
= (float)s_curtexmtx
[s_texmtxwrite
++];
161 VertexManager::s_pCurBufferPointer
+= 4;
164 void LOADERDECL
TexMtx_Write_Float2()
166 ((float*)VertexManager::s_pCurBufferPointer
)[0] = 0;
167 ((float*)VertexManager::s_pCurBufferPointer
)[1] = (float)s_curtexmtx
[s_texmtxwrite
++];
168 VertexManager::s_pCurBufferPointer
+= 8;
171 void LOADERDECL
TexMtx_Write_Float4()
173 ((float*)VertexManager::s_pCurBufferPointer
)[0] = 0;
174 ((float*)VertexManager::s_pCurBufferPointer
)[1] = 0;
175 ((float*)VertexManager::s_pCurBufferPointer
)[2] = s_curtexmtx
[s_texmtxwrite
++];
176 ((float*)VertexManager::s_pCurBufferPointer
)[3] = 0; // Just to fill out with 0.
177 VertexManager::s_pCurBufferPointer
+= 16;
180 VertexLoader::VertexLoader(const TVtxDesc
&vtx_desc
, const VAT
&vtx_attr
)
182 m_compiledCode
= NULL
;
183 m_numLoadedVertices
= 0;
185 m_numPipelineStages
= 0;
186 m_NativeFmt
= NativeVertexFormat::Create();
188 VertexLoader_Normal::Init();
189 VertexLoader_Position::Init();
190 VertexLoader_TextCoord::Init();
192 m_VtxDesc
= vtx_desc
;
193 SetVAT(vtx_attr
.g0
.Hex
, vtx_attr
.g1
.Hex
, vtx_attr
.g2
.Hex
);
195 AllocCodeSpace(COMPILED_CODE_SIZE
);
196 CompileVertexTranslator();
200 VertexLoader::~VertexLoader()
206 void VertexLoader::CompileVertexTranslator()
209 const TVtxAttr
&vtx_attr
= m_VtxAttr
;
213 PanicAlert("trying to recompile a vtx translator");
215 m_compiledCode
= GetCodePtr();
219 const u8
*loop_start
= GetCodePtr();
221 // Reset component counters if present in vertex format only.
222 if (m_VtxDesc
.Tex0Coord
|| m_VtxDesc
.Tex1Coord
|| m_VtxDesc
.Tex2Coord
|| m_VtxDesc
.Tex3Coord
||
223 m_VtxDesc
.Tex4Coord
|| m_VtxDesc
.Tex5Coord
|| m_VtxDesc
.Tex6Coord
|| m_VtxDesc
.Tex7Coord
) {
224 WriteSetVariable(32, &tcIndex
, Imm32(0));
226 if (m_VtxDesc
.Color0
|| m_VtxDesc
.Color1
) {
227 WriteSetVariable(32, &colIndex
, Imm32(0));
229 if (m_VtxDesc
.Tex0MatIdx
|| m_VtxDesc
.Tex1MatIdx
|| m_VtxDesc
.Tex2MatIdx
|| m_VtxDesc
.Tex3MatIdx
||
230 m_VtxDesc
.Tex4MatIdx
|| m_VtxDesc
.Tex5MatIdx
|| m_VtxDesc
.Tex6MatIdx
|| m_VtxDesc
.Tex7MatIdx
) {
231 WriteSetVariable(32, &s_texmtxwrite
, Imm32(0));
232 WriteSetVariable(32, &s_texmtxread
, Imm32(0));
237 const int col
[2] = {m_VtxDesc
.Color0
, m_VtxDesc
.Color1
};
239 // Since m_VtxDesc.Text7Coord is broken across a 32 bit word boundary, retrieve its value manually.
240 // If we didn't do this, the vertex format would be read as one bit offset from where it should be, making
241 // 01 become 00, and 10/11 become 01
243 m_VtxDesc
.Tex0Coord
, m_VtxDesc
.Tex1Coord
, m_VtxDesc
.Tex2Coord
, m_VtxDesc
.Tex3Coord
,
244 m_VtxDesc
.Tex4Coord
, m_VtxDesc
.Tex5Coord
, m_VtxDesc
.Tex6Coord
, (m_VtxDesc
.Hex
>> 31) & 3
248 m_numPipelineStages
= 0;
250 // It's a bit ugly that we poke inside m_NativeFmt in this function. Planning to fix this.
251 m_NativeFmt
->m_components
= 0;
253 // Position in pc vertex format.
255 PortableVertexDeclaration vtx_decl
;
256 memset(&vtx_decl
, 0, sizeof(vtx_decl
));
257 for (int i
= 0; i
< 8; i
++) {
258 vtx_decl
.texcoord_offset
[i
] = -1;
261 // m_VBVertexStride for texmtx and posmtx is computed later when writing.
263 // Position Matrix Index
264 if (m_VtxDesc
.PosMatIdx
) {
265 WriteCall(PosMtx_ReadDirect_UByte
);
266 m_NativeFmt
->m_components
|= VB_HAS_POSMTXIDX
;
270 if (m_VtxDesc
.Tex0MatIdx
) {m_VertexSize
+= 1; m_NativeFmt
->m_components
|= VB_HAS_TEXMTXIDX0
; WriteCall(TexMtx_ReadDirect_UByte
); }
271 if (m_VtxDesc
.Tex1MatIdx
) {m_VertexSize
+= 1; m_NativeFmt
->m_components
|= VB_HAS_TEXMTXIDX1
; WriteCall(TexMtx_ReadDirect_UByte
); }
272 if (m_VtxDesc
.Tex2MatIdx
) {m_VertexSize
+= 1; m_NativeFmt
->m_components
|= VB_HAS_TEXMTXIDX2
; WriteCall(TexMtx_ReadDirect_UByte
); }
273 if (m_VtxDesc
.Tex3MatIdx
) {m_VertexSize
+= 1; m_NativeFmt
->m_components
|= VB_HAS_TEXMTXIDX3
; WriteCall(TexMtx_ReadDirect_UByte
); }
274 if (m_VtxDesc
.Tex4MatIdx
) {m_VertexSize
+= 1; m_NativeFmt
->m_components
|= VB_HAS_TEXMTXIDX4
; WriteCall(TexMtx_ReadDirect_UByte
); }
275 if (m_VtxDesc
.Tex5MatIdx
) {m_VertexSize
+= 1; m_NativeFmt
->m_components
|= VB_HAS_TEXMTXIDX5
; WriteCall(TexMtx_ReadDirect_UByte
); }
276 if (m_VtxDesc
.Tex6MatIdx
) {m_VertexSize
+= 1; m_NativeFmt
->m_components
|= VB_HAS_TEXMTXIDX6
; WriteCall(TexMtx_ReadDirect_UByte
); }
277 if (m_VtxDesc
.Tex7MatIdx
) {m_VertexSize
+= 1; m_NativeFmt
->m_components
|= VB_HAS_TEXMTXIDX7
; WriteCall(TexMtx_ReadDirect_UByte
); }
279 // Write vertex position loader
280 WriteCall(VertexLoader_Position::GetFunction(m_VtxDesc
.Position
, m_VtxAttr
.PosFormat
, m_VtxAttr
.PosElements
));
281 m_VertexSize
+= VertexLoader_Position::GetSize(m_VtxDesc
.Position
, m_VtxAttr
.PosFormat
, m_VtxAttr
.PosElements
);
284 // OK, so we just got a point. Let's go back and read it for the bounding box.
287 WriteCall(UpdateBoundingBox
);
291 vtx_decl
.num_normals
= 0;
292 if (m_VtxDesc
.Normal
!= NOT_PRESENT
) {
293 m_VertexSize
+= VertexLoader_Normal::GetSize(m_VtxDesc
.Normal
, m_VtxAttr
.NormalFormat
, m_VtxAttr
.NormalElements
, m_VtxAttr
.NormalIndex3
);
294 TPipelineFunction pFunc
= VertexLoader_Normal::GetFunction(m_VtxDesc
.Normal
, m_VtxAttr
.NormalFormat
, m_VtxAttr
.NormalElements
, m_VtxAttr
.NormalIndex3
, g_Config
.bAllowSignedBytes
);
298 sprintf(temp
,"%i %i %i %i", m_VtxDesc
.Normal
, m_VtxAttr
.NormalFormat
, m_VtxAttr
.NormalElements
, m_VtxAttr
.NormalIndex3
);
299 g_VideoInitialize
.pSysMessage("VertexLoader_Normal::GetFunction returned zero!");
303 vtx_decl
.num_normals
= vtx_attr
.NormalElements
? 3 : 1;
304 vtx_decl
.normal_offset
[0] = -1;
305 vtx_decl
.normal_offset
[1] = -1;
306 vtx_decl
.normal_offset
[2] = -1;
307 switch (vtx_attr
.NormalFormat
) {
311 vtx_decl
.normal_gl_type
= VAR_BYTE
;
313 if (vtx_attr
.NormalFormat
== FORMAT_BYTE
&& !g_Config
.bAllowSignedBytes
)
315 vtx_decl
.normal_gl_type
= VAR_SHORT
;
318 vtx_decl
.normal_gl_size
= 4;
319 vtx_decl
.normal_offset
[0] = nat_offset
;
320 nat_offset
+= native_size
;
321 if (vtx_attr
.NormalElements
) {
322 vtx_decl
.normal_offset
[1] = nat_offset
;
323 nat_offset
+= native_size
;
324 vtx_decl
.normal_offset
[2] = nat_offset
;
325 nat_offset
+= native_size
;
331 vtx_decl
.normal_gl_type
= VAR_SHORT
;
332 vtx_decl
.normal_gl_size
= 4;
333 vtx_decl
.normal_offset
[0] = nat_offset
;
335 if (vtx_attr
.NormalElements
) {
336 vtx_decl
.normal_offset
[1] = nat_offset
;
338 vtx_decl
.normal_offset
[2] = nat_offset
;
343 vtx_decl
.normal_gl_type
= VAR_FLOAT
;
344 vtx_decl
.normal_gl_size
= 3;
345 vtx_decl
.normal_offset
[0] = nat_offset
;
347 if (vtx_attr
.NormalElements
) {
348 vtx_decl
.normal_offset
[1] = nat_offset
;
350 vtx_decl
.normal_offset
[2] = nat_offset
;
354 default: _assert_(0); break;
357 int numNormals
= (m_VtxAttr
.NormalElements
== 1) ? NRM_THREE
: NRM_ONE
;
358 m_NativeFmt
->m_components
|= VB_HAS_NRM0
;
360 if (numNormals
== NRM_THREE
)
361 m_NativeFmt
->m_components
|= VB_HAS_NRM1
| VB_HAS_NRM2
;
364 vtx_decl
.color_gl_type
= VAR_UNSIGNED_BYTE
;
365 vtx_decl
.color_offset
[0] = -1;
366 vtx_decl
.color_offset
[1] = -1;
367 for (int i
= 0; i
< 2; i
++) {
368 m_NativeFmt
->m_components
|= VB_HAS_COL0
<< i
;
372 m_NativeFmt
->m_components
&= ~(VB_HAS_COL0
<< i
);
373 vtx_decl
.color_offset
[i
] = -1;
376 switch (m_VtxAttr
.color
[i
].Comp
)
378 case FORMAT_16B_565
: m_VertexSize
+= 2; WriteCall(Color_ReadDirect_16b_565
); break;
379 case FORMAT_24B_888
: m_VertexSize
+= 3; WriteCall(Color_ReadDirect_24b_888
); break;
380 case FORMAT_32B_888x
: m_VertexSize
+= 4; WriteCall(Color_ReadDirect_32b_888x
); break;
381 case FORMAT_16B_4444
: m_VertexSize
+= 2; WriteCall(Color_ReadDirect_16b_4444
); break;
382 case FORMAT_24B_6666
: m_VertexSize
+= 3; WriteCall(Color_ReadDirect_24b_6666
); break;
383 case FORMAT_32B_8888
: m_VertexSize
+= 4; WriteCall(Color_ReadDirect_32b_8888
); break;
384 default: _assert_(0); break;
389 switch (m_VtxAttr
.color
[i
].Comp
)
391 case FORMAT_16B_565
: WriteCall(Color_ReadIndex8_16b_565
); break;
392 case FORMAT_24B_888
: WriteCall(Color_ReadIndex8_24b_888
); break;
393 case FORMAT_32B_888x
: WriteCall(Color_ReadIndex8_32b_888x
); break;
394 case FORMAT_16B_4444
: WriteCall(Color_ReadIndex8_16b_4444
); break;
395 case FORMAT_24B_6666
: WriteCall(Color_ReadIndex8_24b_6666
); break;
396 case FORMAT_32B_8888
: WriteCall(Color_ReadIndex8_32b_8888
); break;
397 default: _assert_(0); break;
402 switch (m_VtxAttr
.color
[i
].Comp
)
404 case FORMAT_16B_565
: WriteCall(Color_ReadIndex16_16b_565
); break;
405 case FORMAT_24B_888
: WriteCall(Color_ReadIndex16_24b_888
); break;
406 case FORMAT_32B_888x
: WriteCall(Color_ReadIndex16_32b_888x
); break;
407 case FORMAT_16B_4444
: WriteCall(Color_ReadIndex16_16b_4444
); break;
408 case FORMAT_24B_6666
: WriteCall(Color_ReadIndex16_24b_6666
); break;
409 case FORMAT_32B_8888
: WriteCall(Color_ReadIndex16_32b_8888
); break;
410 default: _assert_(0); break;
414 // Common for the three bottom cases
415 if (col
[i
] != NOT_PRESENT
) {
416 vtx_decl
.color_offset
[i
] = nat_offset
;
421 // Texture matrix indices (remove if corresponding texture coordinate isn't enabled)
422 for (int i
= 0; i
< 8; i
++) {
423 vtx_decl
.texcoord_offset
[i
] = -1;
424 const int format
= m_VtxAttr
.texCoord
[i
].Format
;
425 const int elements
= m_VtxAttr
.texCoord
[i
].Elements
;
427 if (tc
[i
] == NOT_PRESENT
) {
428 m_NativeFmt
->m_components
&= ~(VB_HAS_UV0
<< i
);
430 _assert_msg_(VIDEO
, DIRECT
<= tc
[i
] && tc
[i
] <= INDEX16
, "Invalid texture coordinates!\n(tc[i] = %d)", tc
[i
]);
431 _assert_msg_(VIDEO
, FORMAT_UBYTE
<= format
&& format
<= FORMAT_FLOAT
, "Invalid texture coordinates format!\n(format = %d)", format
);
432 _assert_msg_(VIDEO
, 0 <= elements
&& elements
<= 1, "Invalid number of texture coordinates elemnts!\n(elements = %d)", elements
);
434 m_NativeFmt
->m_components
|= VB_HAS_UV0
<< i
;
435 WriteCall(VertexLoader_TextCoord::GetFunction(tc
[i
], format
, elements
));
436 m_VertexSize
+= VertexLoader_TextCoord::GetSize(tc
[i
], format
, elements
);
439 if (m_NativeFmt
->m_components
& (VB_HAS_TEXMTXIDX0
<< i
)) {
440 if (tc
[i
] != NOT_PRESENT
) {
441 // if texmtx is included, texcoord will always be 3 floats, z will be the texmtx index
442 vtx_decl
.texcoord_offset
[i
] = nat_offset
;
443 vtx_decl
.texcoord_gl_type
[i
] = VAR_FLOAT
;
444 vtx_decl
.texcoord_size
[i
] = 3;
446 WriteCall(m_VtxAttr
.texCoord
[i
].Elements
? TexMtx_Write_Float
: TexMtx_Write_Float2
);
449 m_NativeFmt
->m_components
|= VB_HAS_UV0
<< i
; // have to include since using now
450 vtx_decl
.texcoord_offset
[i
] = nat_offset
;
451 vtx_decl
.texcoord_gl_type
[i
] = VAR_FLOAT
;
452 vtx_decl
.texcoord_size
[i
] = 4;
453 nat_offset
+= 16; // still include the texture coordinate, but this time as 6 + 2 bytes
454 WriteCall(TexMtx_Write_Float4
);
458 if (tc
[i
] != NOT_PRESENT
) {
459 vtx_decl
.texcoord_offset
[i
] = nat_offset
;
460 vtx_decl
.texcoord_gl_type
[i
] = VAR_FLOAT
;
461 vtx_decl
.texcoord_size
[i
] = vtx_attr
.texCoord
[i
].Elements
? 2 : 1;
462 nat_offset
+= 4 * (vtx_attr
.texCoord
[i
].Elements
? 2 : 1);
466 if (tc
[i
] == NOT_PRESENT
) {
467 // if there's more tex coords later, have to write a dummy call
470 if (tc
[j
] != NOT_PRESENT
) {
471 WriteCall(VertexLoader_TextCoord::GetDummyFunction()); // important to get indices right!
476 if (j
== 8 && !((m_NativeFmt
->m_components
& VB_HAS_TEXMTXIDXALL
) & (VB_HAS_TEXMTXIDXALL
<< (i
+ 1)))) {
477 // no more tex coords and tex matrices, so exit loop
483 if (m_VtxDesc
.PosMatIdx
) {
484 WriteCall(PosMtx_Write
);
485 vtx_decl
.posmtx_offset
= nat_offset
;
488 vtx_decl
.posmtx_offset
= -1;
491 native_stride
= nat_offset
;
492 vtx_decl
.stride
= native_stride
;
497 MOV(64, R(RAX
), Imm64((u64
)&loop_counter
));
498 SUB(32, MatR(RAX
), Imm8(1));
500 SUB(32, M(&loop_counter
), Imm8(1));
503 J_CC(CC_NZ
, loop_start
, true);
506 m_NativeFmt
->Initialize(vtx_decl
);
509 void VertexLoader::WriteCall(TPipelineFunction func
)
513 MOV(64, R(RAX
), Imm64((u64
)func
));
519 m_PipelineStages
[m_numPipelineStages
++] = func
;
523 void VertexLoader::WriteGetVariable(int bits
, OpArg dest
, void *address
)
527 MOV(64, R(RAX
), Imm64((u64
)address
));
528 MOV(bits
, dest
, MatR(RAX
));
530 MOV(bits
, dest
, M(address
));
535 void VertexLoader::WriteSetVariable(int bits
, void *address
, OpArg value
)
539 MOV(64, R(RAX
), Imm64((u64
)address
));
540 MOV(bits
, MatR(RAX
), value
);
542 MOV(bits
, M(address
), value
);
547 void VertexLoader::RunVertices(int vtx_attr_group
, int primitive
, int count
)
551 m_numLoadedVertices
+= count
;
553 // Flush if our vertex format is different from the currently set.
554 if (g_nativeVertexFmt
!= NULL
&& g_nativeVertexFmt
!= m_NativeFmt
)
556 // We really must flush here. It's possible that the native representations
557 // of the two vtx formats are the same, but we have no way to easily check that
559 VertexManager::Flush();
560 // Also move the Set() here?
562 g_nativeVertexFmt
= m_NativeFmt
;
564 if (bpmem
.genMode
.cullmode
== 3 && primitive
< 5)
566 // if cull mode is none, ignore triangles and quads
567 DataSkip(count
* m_VertexSize
);
571 m_NativeFmt
->EnableComponents(m_NativeFmt
->m_components
);
573 // Load position and texcoord scale factors.
574 m_VtxAttr
.PosFrac
= g_VtxAttr
[vtx_attr_group
].g0
.PosFrac
;
575 m_VtxAttr
.texCoord
[0].Frac
= g_VtxAttr
[vtx_attr_group
].g0
.Tex0Frac
;
576 m_VtxAttr
.texCoord
[1].Frac
= g_VtxAttr
[vtx_attr_group
].g1
.Tex1Frac
;
577 m_VtxAttr
.texCoord
[2].Frac
= g_VtxAttr
[vtx_attr_group
].g1
.Tex2Frac
;
578 m_VtxAttr
.texCoord
[3].Frac
= g_VtxAttr
[vtx_attr_group
].g1
.Tex3Frac
;
579 m_VtxAttr
.texCoord
[4].Frac
= g_VtxAttr
[vtx_attr_group
].g2
.Tex4Frac
;
580 m_VtxAttr
.texCoord
[5].Frac
= g_VtxAttr
[vtx_attr_group
].g2
.Tex5Frac
;
581 m_VtxAttr
.texCoord
[6].Frac
= g_VtxAttr
[vtx_attr_group
].g2
.Tex6Frac
;
582 m_VtxAttr
.texCoord
[7].Frac
= g_VtxAttr
[vtx_attr_group
].g2
.Tex7Frac
;
584 pVtxAttr
= &m_VtxAttr
;
585 posScale
= fractionTable
[m_VtxAttr
.PosFrac
];
586 if (m_NativeFmt
->m_components
& VB_HAS_UVALL
)
587 for (int i
= 0; i
< 8; i
++)
588 tcScale
[i
] = fractionTable
[m_VtxAttr
.texCoord
[i
].Frac
];
589 for (int i
= 0; i
< 2; i
++)
590 colElements
[i
] = m_VtxAttr
.color
[i
].Elements
;
592 // if strips or fans, make sure all vertices can fit in buffer, otherwise flush
595 case 3: // strip .. hm, weird
597 if (VertexManager::GetRemainingSize() < 3 * native_stride
)
598 VertexManager::Flush();
600 case 6: // line strip
601 if (VertexManager::GetRemainingSize() < 2 * native_stride
)
602 VertexManager::Flush();
604 case 0: granularity
= 4; break; // quads
605 case 2: granularity
= 3; break; // tris
606 case 5: granularity
= 2; break; // lines
609 int startv
= 0, extraverts
= 0;
612 //int remainingVerts2 = VertexManager::GetRemainingVertices(primitive);
615 int remainingVerts
= VertexManager::GetRemainingSize() / native_stride
;
616 //if (remainingVerts2 - v + startv < remainingVerts)
617 //remainingVerts = remainingVerts2 - v + startv;
618 if (remainingVerts
< granularity
) {
619 INCSTAT(stats
.thisFrame
.numBufferSplits
);
620 // This buffer full - break current primitive and flush, to switch to the next buffer.
621 u8
* plastptr
= VertexManager::s_pCurBufferPointer
;
623 VertexManager::AddVertices(primitive
, v
- startv
+ extraverts
);
624 VertexManager::Flush();
625 //remainingVerts2 = VertexManager::GetRemainingVertices(primitive);
626 // Why does this need to be so complicated?
628 case 3: // triangle strip, copy last two vertices
629 // a little trick since we have to keep track of signs
631 memcpy_gc(VertexManager::s_pCurBufferPointer
, plastptr
-2*native_stride
, native_stride
);
632 memcpy_gc(VertexManager::s_pCurBufferPointer
+native_stride
, plastptr
-native_stride
*2, 2*native_stride
);
633 VertexManager::s_pCurBufferPointer
+= native_stride
*3;
637 memcpy_gc(VertexManager::s_pCurBufferPointer
, plastptr
-native_stride
*2, native_stride
*2);
638 VertexManager::s_pCurBufferPointer
+= native_stride
*2;
642 case 4: // tri fan, copy first and last vert
643 memcpy_gc(VertexManager::s_pCurBufferPointer
, plastptr
-native_stride
*(v
-startv
+extraverts
), native_stride
);
644 VertexManager::s_pCurBufferPointer
+= native_stride
;
645 memcpy_gc(VertexManager::s_pCurBufferPointer
, plastptr
-native_stride
, native_stride
);
646 VertexManager::s_pCurBufferPointer
+= native_stride
;
649 case 6: // line strip
650 memcpy_gc(VertexManager::s_pCurBufferPointer
, plastptr
-native_stride
, native_stride
);
651 VertexManager::s_pCurBufferPointer
+= native_stride
;
660 int remainingPrims
= remainingVerts
/ granularity
;
661 remainingVerts
= remainingPrims
* granularity
;
662 if (count
- v
< remainingVerts
)
663 remainingVerts
= count
- v
;
666 if (remainingVerts
> 0) {
667 loop_counter
= remainingVerts
;
668 ((void (*)())(void*)m_compiledCode
)();
671 for (int s
= 0; s
< remainingVerts
; s
++)
675 s_texmtxwrite
= s_texmtxread
= 0;
676 for (int i
= 0; i
< m_numPipelineStages
; i
++)
677 m_PipelineStages
[i
]();
685 VertexManager::AddVertices(primitive
, count
- startv
+ extraverts
);
691 void VertexLoader::RunCompiledVertices(int vtx_attr_group
, int primitive
, int count
, u8
* Data
)
695 m_numLoadedVertices
+= count
;
697 // Flush if our vertex format is different from the currently set.
698 if (g_nativeVertexFmt
!= NULL
&& g_nativeVertexFmt
!= m_NativeFmt
)
700 // We really must flush here. It's possible that the native representations
701 // of the two vtx formats are the same, but we have no way to easily check that
703 VertexManager::Flush();
704 // Also move the Set() here?
706 g_nativeVertexFmt
= m_NativeFmt
;
708 if (bpmem
.genMode
.cullmode
== 3 && primitive
< 5)
710 // if cull mode is none, ignore triangles and quads
711 DataSkip(count
* m_VertexSize
);
715 m_NativeFmt
->EnableComponents(m_NativeFmt
->m_components
);
717 // Load position and texcoord scale factors.
718 m_VtxAttr
.PosFrac
= g_VtxAttr
[vtx_attr_group
].g0
.PosFrac
;
719 m_VtxAttr
.texCoord
[0].Frac
= g_VtxAttr
[vtx_attr_group
].g0
.Tex0Frac
;
720 m_VtxAttr
.texCoord
[1].Frac
= g_VtxAttr
[vtx_attr_group
].g1
.Tex1Frac
;
721 m_VtxAttr
.texCoord
[2].Frac
= g_VtxAttr
[vtx_attr_group
].g1
.Tex2Frac
;
722 m_VtxAttr
.texCoord
[3].Frac
= g_VtxAttr
[vtx_attr_group
].g1
.Tex3Frac
;
723 m_VtxAttr
.texCoord
[4].Frac
= g_VtxAttr
[vtx_attr_group
].g2
.Tex4Frac
;
724 m_VtxAttr
.texCoord
[5].Frac
= g_VtxAttr
[vtx_attr_group
].g2
.Tex5Frac
;
725 m_VtxAttr
.texCoord
[6].Frac
= g_VtxAttr
[vtx_attr_group
].g2
.Tex6Frac
;
726 m_VtxAttr
.texCoord
[7].Frac
= g_VtxAttr
[vtx_attr_group
].g2
.Tex7Frac
;
728 pVtxAttr
= &m_VtxAttr
;
729 posScale
= fractionTable
[m_VtxAttr
.PosFrac
];
730 if (m_NativeFmt
->m_components
& VB_HAS_UVALL
)
731 for (int i
= 0; i
< 8; i
++)
732 tcScale
[i
] = fractionTable
[m_VtxAttr
.texCoord
[i
].Frac
];
733 for (int i
= 0; i
< 2; i
++)
734 colElements
[i
] = m_VtxAttr
.color
[i
].Elements
;
736 if(VertexManager::GetRemainingSize() < native_stride
* count
)
737 VertexManager::Flush();
738 memcpy_gc(VertexManager::s_pCurBufferPointer
, Data
, native_stride
* count
);
739 VertexManager::s_pCurBufferPointer
+= native_stride
* count
;
740 DataSkip(count
* m_VertexSize
);
741 VertexManager::AddVertices(primitive
, count
);
746 void VertexLoader::SetVAT(u32 _group0
, u32 _group1
, u32 _group2
)
749 vat
.g0
.Hex
= _group0
;
750 vat
.g1
.Hex
= _group1
;
751 vat
.g2
.Hex
= _group2
;
753 m_VtxAttr
.PosElements
= vat
.g0
.PosElements
;
754 m_VtxAttr
.PosFormat
= vat
.g0
.PosFormat
;
755 m_VtxAttr
.PosFrac
= vat
.g0
.PosFrac
;
756 m_VtxAttr
.NormalElements
= vat
.g0
.NormalElements
;
757 m_VtxAttr
.NormalFormat
= vat
.g0
.NormalFormat
;
758 m_VtxAttr
.color
[0].Elements
= vat
.g0
.Color0Elements
;
759 m_VtxAttr
.color
[0].Comp
= vat
.g0
.Color0Comp
;
760 m_VtxAttr
.color
[1].Elements
= vat
.g0
.Color1Elements
;
761 m_VtxAttr
.color
[1].Comp
= vat
.g0
.Color1Comp
;
762 m_VtxAttr
.texCoord
[0].Elements
= vat
.g0
.Tex0CoordElements
;
763 m_VtxAttr
.texCoord
[0].Format
= vat
.g0
.Tex0CoordFormat
;
764 m_VtxAttr
.texCoord
[0].Frac
= vat
.g0
.Tex0Frac
;
765 m_VtxAttr
.ByteDequant
= vat
.g0
.ByteDequant
;
766 m_VtxAttr
.NormalIndex3
= vat
.g0
.NormalIndex3
;
768 m_VtxAttr
.texCoord
[1].Elements
= vat
.g1
.Tex1CoordElements
;
769 m_VtxAttr
.texCoord
[1].Format
= vat
.g1
.Tex1CoordFormat
;
770 m_VtxAttr
.texCoord
[1].Frac
= vat
.g1
.Tex1Frac
;
771 m_VtxAttr
.texCoord
[2].Elements
= vat
.g1
.Tex2CoordElements
;
772 m_VtxAttr
.texCoord
[2].Format
= vat
.g1
.Tex2CoordFormat
;
773 m_VtxAttr
.texCoord
[2].Frac
= vat
.g1
.Tex2Frac
;
774 m_VtxAttr
.texCoord
[3].Elements
= vat
.g1
.Tex3CoordElements
;
775 m_VtxAttr
.texCoord
[3].Format
= vat
.g1
.Tex3CoordFormat
;
776 m_VtxAttr
.texCoord
[3].Frac
= vat
.g1
.Tex3Frac
;
777 m_VtxAttr
.texCoord
[4].Elements
= vat
.g1
.Tex4CoordElements
;
778 m_VtxAttr
.texCoord
[4].Format
= vat
.g1
.Tex4CoordFormat
;
780 m_VtxAttr
.texCoord
[4].Frac
= vat
.g2
.Tex4Frac
;
781 m_VtxAttr
.texCoord
[5].Elements
= vat
.g2
.Tex5CoordElements
;
782 m_VtxAttr
.texCoord
[5].Format
= vat
.g2
.Tex5CoordFormat
;
783 m_VtxAttr
.texCoord
[5].Frac
= vat
.g2
.Tex5Frac
;
784 m_VtxAttr
.texCoord
[6].Elements
= vat
.g2
.Tex6CoordElements
;
785 m_VtxAttr
.texCoord
[6].Format
= vat
.g2
.Tex6CoordFormat
;
786 m_VtxAttr
.texCoord
[6].Frac
= vat
.g2
.Tex6Frac
;
787 m_VtxAttr
.texCoord
[7].Elements
= vat
.g2
.Tex7CoordElements
;
788 m_VtxAttr
.texCoord
[7].Format
= vat
.g2
.Tex7CoordFormat
;
789 m_VtxAttr
.texCoord
[7].Frac
= vat
.g2
.Tex7Frac
;
792 void VertexLoader::AppendToString(std::string
*dest
) const
795 static const char *posMode
[4] = {
801 static const char *posFormats
[5] = {
802 "u8", "s8", "u16", "s16", "flt",
804 static const char *colorFormat
[8] = {
815 dest
->append(StringFromFormat("%ib skin: %i P: %i %s-%s ",
816 m_VertexSize
, m_VtxDesc
.PosMatIdx
,
817 m_VtxAttr
.PosElements
? 3 : 2, posMode
[m_VtxDesc
.Position
], posFormats
[m_VtxAttr
.PosFormat
]));
818 if (m_VtxDesc
.Normal
) {
819 dest
->append(StringFromFormat("Nrm: %i %s-%s ",
820 m_VtxAttr
.NormalElements
, posMode
[m_VtxDesc
.Normal
], posFormats
[m_VtxAttr
.NormalFormat
]));
822 int color_mode
[2] = {m_VtxDesc
.Color0
, m_VtxDesc
.Color1
};
823 for (int i
= 0; i
< 2; i
++)
827 dest
->append(StringFromFormat("C%i: %i %s-%s ", i
, m_VtxAttr
.color
[i
].Elements
, posMode
[color_mode
[i
]], colorFormat
[m_VtxAttr
.color
[i
].Comp
]));
831 m_VtxDesc
.Tex0Coord
, m_VtxDesc
.Tex1Coord
, m_VtxDesc
.Tex2Coord
, m_VtxDesc
.Tex3Coord
,
832 m_VtxDesc
.Tex4Coord
, m_VtxDesc
.Tex5Coord
, m_VtxDesc
.Tex6Coord
, m_VtxDesc
.Tex7Coord
834 for (int i
= 0; i
< 8; i
++)
838 dest
->append(StringFromFormat("T%i: %i %s-%s ",
839 i
, m_VtxAttr
.texCoord
[i
].Elements
, posMode
[tex_mode
[i
]], posFormats
[m_VtxAttr
.texCoord
[i
].Format
]));
842 dest
->append(StringFromFormat(" - %i v\n", m_numLoadedVertices
));