Remove second template parameter from class GUIList
[openttd/fttd.git] / src / viewport_sprite_sorter_sse4.cpp
blobf10af4f5795c1c3967a1aab88b38f79c28e0c246
1 /* $Id$ */
3 /*
4 * This file is part of OpenTTD.
5 * OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2.
6 * OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
7 * See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see <http://www.gnu.org/licenses/>.
8 */
10 /** @file viewport_sprite_sorter_sse.cpp Sprite sorter that uses SSE4.1. */
12 #ifdef WITH_SSE
14 #include "stdafx.h"
15 #include "smmintrin.h"
16 #include "viewport_sprite_sorter.h"
18 #ifdef _SQ64
19 assert_compile((sizeof(ParentSpriteToDraw) % 16) == 0);
20 #define LOAD_128 _mm_load_si128
21 #else
22 #define LOAD_128 _mm_loadu_si128
23 #endif
25 struct CompareParentSpritesSSE41 {
26 const __m128i mask_ptest;
28 CompareParentSpritesSSE41 (void) :
29 mask_ptest (_mm_setr_epi8 (-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0))
33 bool operator() (const ParentSpriteToDraw *ps1,
34 const ParentSpriteToDraw *ps2) const;
37 inline bool CompareParentSpritesSSE41::operator()
38 (const ParentSpriteToDraw *ps1, const ParentSpriteToDraw *ps2) const
41 * Original code:
42 * if (ps->xmax >= ps2->xmin && ps->xmin <= ps2->xmax && // overlap in X?
43 * ps->ymax >= ps2->ymin && ps->ymin <= ps2->ymax && // overlap in Y?
44 * ps->zmax >= ps2->zmin && ps->zmin <= ps2->zmax) { // overlap in Z?
46 * Above conditions are equivalent to:
47 * 1/ !( (ps->xmax >= ps2->xmin) && (ps->ymax >= ps2->ymin) && (ps->zmax >= ps2->zmin) && (ps->xmin <= ps2->xmax) && (ps->ymin <= ps2->ymax) && (ps->zmin <= ps2->zmax) )
48 * 2/ !( (ps->xmax >= ps2->xmin) && (ps->ymax >= ps2->ymin) && (ps->zmax >= ps2->zmin) && (ps2->xmax >= ps->xmin) && (ps2->ymax >= ps->ymin) && (ps2->zmax >= ps->zmin) )
49 * 3/ !( ( (ps->xmax >= ps2->xmin) && (ps->ymax >= ps2->ymin) && (ps->zmax >= ps2->zmin) ) && ( (ps2->xmax >= ps->xmin) && (ps2->ymax >= ps->ymin) && (ps2->zmax >= ps->zmin) ) )
50 * 4/ !( !( (ps->xmax < ps2->xmin) || (ps->ymax < ps2->ymin) || (ps->zmax < ps2->zmin) ) && !( (ps2->xmax < ps->xmin) || (ps2->ymax < ps->ymin) || (ps2->zmax < ps->zmin) ) )
51 * 5/ PTEST <---------------------------------- rslt1 ----------------------------------> <------------------------------ rslt2 -------------------------------------->
53 __m128i ps1_max = LOAD_128((const __m128i*) &ps1->xmax);
54 __m128i ps2_min = LOAD_128((const __m128i*) &ps2->xmin);
55 __m128i rslt1 = _mm_cmplt_epi32 (ps1_max, ps2_min);
56 if (!_mm_testz_si128 (this->mask_ptest, rslt1)) return true;
58 __m128i ps1_min = LOAD_128((const __m128i*) &ps1->xmin);
59 __m128i ps2_max = LOAD_128((const __m128i*) &ps2->xmax);
60 __m128i rslt2 = _mm_cmplt_epi32 (ps2_max, ps1_min);
61 if (!_mm_testz_si128 (this->mask_ptest, rslt2)) return false;
63 /* Use X+Y+Z as the sorting order, so sprites closer to the bottom of
64 * the screen and with higher Z elevation, are drawn in front. Here
65 * X,Y,Z are the coordinates of the "center of mass" of the sprite,
66 * i.e. X=(left+right)/2, etc. However, since we only care about
67 * order, don't actually divide / 2. */
68 return ps1->xmin + ps1->xmax + ps1->ymin + ps1->ymax + ps1->zmin + ps1->zmax <=
69 ps2->xmin + ps2->xmax + ps2->ymin + ps2->ymax + ps2->zmin + ps2->zmax;
72 /** Sort parent sprites pointer array using SSE4.1 optimizations. */
73 void ViewportSortParentSpritesSSE41 (ParentSpriteToDraw **psd,
74 const ParentSpriteToDraw *const *psdvend)
76 CompareParentSpritesSSE41 comparator;
77 SortParentSprites (comparator, psd, psdvend);
80 #endif /* WITH_SSE */