2 Copyright (C) 1996-1997 Id Software, Inc.
4 This program is free software; you can redistribute it and/or
5 modify it under the terms of the GNU General Public License
6 as published by the Free Software Foundation; either version 2
7 of the License, or (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
13 See the GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
22 // x86 assembly-language polygon model drawing code
32 // !!! if this is changed
, it must
be changed in d_polyse.c too
!!!
33 #define DPS_MAXSPANS MAXHEIGHT+1
34 // 1 extra for spanpackage that marks end
36 //#define SPAN_SIZE (((DPS_MAXSPANS + 1 + ((CACHE_SIZE - 1) / spanpackage_t_size)) + 1) * spanpackage_t_size)
37 #define SPAN_SIZE (1024+1+1+1)*32
43 p10_minus_p20
: .single 0
44 p01_minus_p21
: .single 0
49 aff8entryvec_table
: .long LDraw8, LDraw7, LDraw6, LDraw5
50 .long LDraw4, LDraw3, LDraw2, LDraw1
58 .extern C(D_PolysetSetEdgeTable)
59 .extern C(D_RasterizeAliasPolySmooth)
62 //----------------------------------------------------------------------
63 // affine triangle gradient calculation code
64 //----------------------------------------------------------------------
68 .globl C(D_PolysetCalcGradients)
69 C
(D_PolysetCalcGradients
):
71 // p00_minus_p20
= r_p0
[0] - r_p2
[0];
72 // p01_minus_p21
= r_p0
[1] - r_p2
[1];
73 // p10_minus_p20
= r_p1
[0] - r_p2
[0];
74 // p11_minus_p21
= r_p1
[1] - r_p2
[1];
76 // xstepdenominv
= 1.0 / (p10_minus_p20
* p01_minus_p21
-
77 // p00_minus_p20
* p11_minus_p21
);
79 // ystepdenominv
= -xstepdenominv;
81 fildl C
(r_p0
)+0 // r_p0
[0]
82 fildl C
(r_p2
)+0 // r_p2
[0] | r_p0
[0]
83 fildl C
(r_p0
)+4 // r_p0
[1] | r_p2
[0] | r_p0
[0]
84 fildl C
(r_p2
)+4 // r_p2
[1] | r_p0
[1] | r_p2
[0] | r_p0
[0]
85 fildl C
(r_p1
)+0 // r_p1
[0] | r_p2
[1] | r_p0
[1] | r_p2
[0] | r_p0
[0]
86 fildl C
(r_p1
)+4 // r_p1
[1] | r_p1
[0] | r_p2
[1] | r_p0
[1] |
88 fxch
%st(3) // r_p0
[1] | r_p1
[0] | r_p2
[1] | r_p1
[1] |
90 fsub %st(2),%st(0) // p01_minus_p21 | r_p1
[0] | r_p2
[1] | r_p1
[1] |
92 fxch
%st(1) // r_p1
[0] | p01_minus_p21 | r_p2
[1] | r_p1
[1] |
94 fsub %st(4),%st(0) // p10_minus_p20 | p01_minus_p21 | r_p2
[1] |
95 // r_p1
[1] | r_p2
[0] | r_p0
[0]
96 fxch
%st(5) // r_p0
[0] | p01_minus_p21 | r_p2
[1] |
97 // r_p1
[1] | r_p2
[0] | p10_minus_p20
98 fsubp
%st(0),%st(4) // p01_minus_p21 | r_p2
[1] | r_p1
[1] |
99 // p00_minus_p20 | p10_minus_p20
100 fxch
%st(2) // r_p1
[1] | r_p2
[1] | p01_minus_p21 |
101 // p00_minus_p20 | p10_minus_p20
102 fsubp
%st(0),%st(1) // p11_minus_p21 | p01_minus_p21 |
103 // p00_minus_p20 | p10_minus_p20
104 fxch
%st(1) // p01_minus_p21 | p11_minus_p21 |
105 // p00_minus_p20 | p10_minus_p20
106 flds C
(d_xdenom
) // d_xdenom | p01_minus_p21 | p11_minus_p21 |
107 // p00_minus_p20 | p10_minus_p20
108 fxch
%st(4) // p10_minus_p20 | p01_minus_p21 | p11_minus_p21 |
109 // p00_minus_p20 | d_xdenom
110 fstps p10_minus_p20
// p01_minus_p21 | p11_minus_p21 |
111 // p00_minus_p20 | d_xdenom
112 fstps p01_minus_p21
// p11_minus_p21 | p00_minus_p20 | xstepdenominv
113 fxch
%st(2) // xstepdenominv | p00_minus_p20 | p11_minus_p21
115 //// ceil
() for light so positive steps are exaggerated
, negative steps
116 //// diminished
, pushing us away from underflow toward overflow. Underflow is
117 //// very visible
, overflow is very unlikely
, because of ambient lighting
118 // t0
= r_p0
[4] - r_p2
[4];
119 // t1
= r_p1
[4] - r_p2
[4];
121 fildl C
(r_p2
)+16 // r_p2
[4] | xstepdenominv | p00_minus_p20 |
123 fildl C
(r_p0
)+16 // r_p0
[4] | r_p2
[4] | xstepdenominv |
124 // p00_minus_p20 | p11_minus_p21
125 fildl C
(r_p1
)+16 // r_p1
[4] | r_p0
[4] | r_p2
[4] | xstepdenominv |
126 // p00_minus_p20 | p11_minus_p21
127 fxch
%st(2) // r_p2
[4] | r_p0
[4] | r_p1
[4] | xstepdenominv |
128 // p00_minus_p20 | p11_minus_p21
129 fld
%st(0) // r_p2
[4] | r_p2
[4] | r_p0
[4] | r_p1
[4] |
130 // xstepdenominv | p00_minus_p20 | p11_minus_p21
131 fsubrp
%st(0),%st(2) // r_p2
[4] | t0 | r_p1
[4] | xstepdenominv |
132 // p00_minus_p20 | p11_minus_p21
133 fsubrp
%st(0),%st(2) // t0 | t1 | xstepdenominv | p00_minus_p20 |
137 // ceil
((t1
* p01_minus_p21
- t0
* p11_minus_p21
) * xstepdenominv
);
139 // ceil
((t1
* p00_minus_p20
- t0
* p10_minus_p20
) * ystepdenominv
);
141 fld
%st(0) // t0 | t0 | t1 | xstepdenominv | p00_minus_p20 |
143 fmul %st(5),%st(0) // t0
*p11_minus_p21 | t0 | t1 | xstepdenominv |
144 // p00_minus_p20 | p11_minus_p21
145 fxch
%st(2) // t1 | t0 | t0
*p11_minus_p21 | xstepdenominv |
146 // p00_minus_p20 | p11_minus_p21
147 fld
%st(0) // t1 | t1 | t0 | t0
*p11_minus_p21 |
148 // xstepdenominv | p00_minus_p20 | p11_minus_p21
149 fmuls p01_minus_p21
// t1
*p01_minus_p21 | t1 | t0 | t0
*p11_minus_p21 |
150 // xstepdenominv | p00_minus_p20 | p11_minus_p21
151 fxch
%st(2) // t0 | t1 | t1
*p01_minus_p21 | t0
*p11_minus_p21 |
152 // xstepdenominv | p00_minus_p20 | p11_minus_p21
153 fmuls p10_minus_p20
// t0
*p10_minus_p20 | t1 | t1
*p01_minus_p21 |
154 // t0
*p11_minus_p21 | xstepdenominv |
155 // p00_minus_p20 | p11_minus_p21
156 fxch
%st(1) // t1 | t0
*p10_minus_p20 | t1
*p01_minus_p21 |
157 // t0
*p11_minus_p21 | xstepdenominv |
158 // p00_minus_p20 | p11_minus_p21
159 fmul %st(5),%st(0) // t1
*p00_minus_p20 | t0
*p10_minus_p20 |
160 // t1
*p01_minus_p21 | t0
*p11_minus_p21 |
161 // xstepdenominv | p00_minus_p20 | p11_minus_p21
162 fxch
%st(2) // t1
*p01_minus_p21 | t0
*p10_minus_p20 |
163 // t1
*p00_minus_p20 | t0
*p11_minus_p21 |
164 // xstepdenominv | p00_minus_p20 | p11_minus_p21
165 fsubp
%st(0),%st(3) // t0
*p10_minus_p20 | t1
*p00_minus_p20 |
166 // t1
*p01_minus_p21
- t0
*p11_minus_p21 |
167 // xstepdenominv | p00_minus_p20 | p11_minus_p21
168 fsubrp
%st(0),%st(1) // t1
*p00_minus_p20
- t0
*p10_minus_p20 |
169 // t1
*p01_minus_p21
- t0
*p11_minus_p21 |
170 // xstepdenominv | p00_minus_p20 | p11_minus_p21
171 fld
%st(2) // xstepdenominv |
172 // t1
*p00_minus_p20
- t0
*p10_minus_p20 |
173 // t1
*p01_minus_p21
- t0
*p11_minus_p21 |
174 // xstepdenominv | p00_minus_p20 | p11_minus_p21
175 fmuls float_minus_1
// ystepdenominv |
176 // t1
*p00_minus_p20
- t0
*p10_minus_p20 |
177 // t1
*p01_minus_p21
- t0
*p11_minus_p21 |
178 // xstepdenominv | p00_minus_p20 | p11_minus_p21
179 fxch
%st(2) // t1
*p01_minus_p21
- t0
*p11_minus_p21 |
180 // t1
*p00_minus_p20
- t0
*p10_minus_p20 |
181 // ystepdenominv | xstepdenominv | p00_minus_p20 |
183 fmul %st(3),%st(0) // (t1
*p01_minus_p21
- t0
*p11_minus_p21
)*
185 // t1
*p00_minus_p20
- t0
*p10_minus_p20 |
186 // | ystepdenominv | xstepdenominv |
187 // p00_minus_p20 | p11_minus_p21
188 fxch
%st(1) // t1
*p00_minus_p20
- t0
*p10_minus_p20 |
189 // (t1
*p01_minus_p21
- t0
*p11_minus_p21
)*
190 // xstepdenominv | ystepdenominv |
191 // xstepdenominv | p00_minus_p20 | p11_minus_p21
192 fmul %st(2),%st(0) // (t1
*p00_minus_p20
- t0
*p10_minus_p20
)*
194 // (t1
*p01_minus_p21
- t0
*p11_minus_p21
)*
195 // xstepdenominv | ystepdenominv |
196 // xstepdenominv | p00_minus_p20 | p11_minus_p21
198 fistpl C
(r_lstepy
) // r_lstepx | ystepdenominv | xstepdenominv |
199 // p00_minus_p20 | p11_minus_p21
200 fistpl C
(r_lstepx
) // ystepdenominv | xstepdenominv | p00_minus_p20 |
204 // t0
= r_p0
[2] - r_p2
[2];
205 // t1
= r_p1
[2] - r_p2
[2];
207 fildl C
(r_p2
)+8 // r_p2
[2] | ystepdenominv | xstepdenominv |
208 // p00_minus_p20 | p11_minus_p21
209 fildl C
(r_p0
)+8 // r_p0
[2] | r_p2
[2] | ystepdenominv |
210 // xstepdenominv | p00_minus_p20 | p11_minus_p21
211 fildl C
(r_p1
)+8 // r_p1
[2] | r_p0
[2] | r_p2
[2] | ystepdenominv |
212 // xstepdenominv | p00_minus_p20 | p11_minus_p21
213 fxch
%st(2) // r_p2
[2] | r_p0
[2] | r_p1
[2] | ystepdenominv |
214 // xstepdenominv | p00_minus_p20 | p11_minus_p21
215 fld
%st(0) // r_p2
[2] | r_p2
[2] | r_p0
[2] | r_p1
[2] |
216 // ystepdenominv | xstepdenominv | p00_minus_p20 |
218 fsubrp
%st(0),%st(2) // r_p2
[2] | t0 | r_p1
[2] | ystepdenominv |
219 // xstepdenominv | p00_minus_p20 | p11_minus_p21
220 fsubrp
%st(0),%st(2) // t0 | t1 | ystepdenominv | xstepdenominv |
221 // p00_minus_p20 | p11_minus_p21
223 // r_sstepx
= (int
)((t1
* p01_minus_p21
- t0
* p11_minus_p21
) *
225 // r_sstepy
= (int
)((t1
* p00_minus_p20
- t0
* p10_minus_p20
) *
228 fld
%st(0) // t0 | t0 | t1 | ystepdenominv | xstepdenominv
229 fmul %st(6),%st(0) // t0
*p11_minus_p21 | t0 | t1 | ystepdenominv |
230 // xstepdenominv | p00_minus_p20 | p11_minus_p21
231 fxch
%st(2) // t1 | t0 | t0
*p11_minus_p21 | ystepdenominv |
232 // xstepdenominv | p00_minus_p20 | p11_minus_p21
233 fld
%st(0) // t1 | t1 | t0 | t0
*p11_minus_p21 |
234 // ystepdenominv | xstepdenominv | p00_minus_p20 |
236 fmuls p01_minus_p21
// t1
*p01_minus_p21 | t1 | t0 | t0
*p11_minus_p21 |
237 // ystepdenominv | xstepdenominv | p00_minus_p20 |
239 fxch
%st(2) // t0 | t1 | t1
*p01_minus_p21 | t0
*p11_minus_p21 |
240 // ystepdenominv | xstepdenominv | p00_minus_p20 |
242 fmuls p10_minus_p20
// t0
*p10_minus_p20 | t1 | t1
*p01_minus_p21 |
243 // t0
*p11_minus_p21 | ystepdenominv |
244 // xstepdenominv | p00_minus_p20 | p11_minus_p21
245 fxch
%st(1) // t1 | t0
*p10_minus_p20 | t1
*p01_minus_p21 |
246 // t0
*p11_minus_p21 | ystepdenominv |
247 // xstepdenominv | p00_minus_p20 | p11_minus_p21
248 fmul %st(6),%st(0) // t1
*p00_minus_p20 | t0
*p10_minus_p20 |
249 // t1
*p01_minus_p21 | t0
*p11_minus_p21 |
250 // ystepdenominv | xstepdenominv | p00_minus_p20 |
252 fxch
%st(2) // t1
*p01_minus_p21 | t0
*p10_minus_p20 |
253 // t1
*p00_minus_p20 | t0
*p11_minus_p21 |
254 // ystepdenominv | xstepdenominv | p00_minus_p20 |
256 fsubp
%st(0),%st(3) // t0
*p10_minus_p20 | t1
*p00_minus_p20 |
257 // t1
*p01_minus_p21
- t0
*p11_minus_p21 |
258 // ystepdenominv | xstepdenominv | p00_minus_p20 |
260 fsubrp
%st(0),%st(1) // t1
*p00_minus_p20
- t0
*p10_minus_p20 |
261 // t1
*p01_minus_p21
- t0
*p11_minus_p21 |
262 // ystepdenominv | xstepdenominv | p00_minus_p20 |
264 fmul %st(2),%st(0) // (t1
*p00_minus_p20
- t0
*p10_minus_p20
)*
266 // t1
*p01_minus_p21
- t0
*p11_minus_p21 |
267 // ystepdenominv | xstepdenominv | p00_minus_p20 |
269 fxch
%st(1) // t1
*p01_minus_p21
- t0
*p11_minus_p21 |
270 // (t1
*p00_minus_p20
- t0
*p10_minus_p20
)*
271 // ystepdenominv | ystepdenominv |
272 // xstepdenominv | p00_minus_p20 | p11_minus_p21
273 fmul %st(3),%st(0) // (t1
*p01_minus_p21
- t0
*p11_minus_p21
)*
275 // (t1
*p00_minus_p20
- t0
*p10_minus_p20
)*
276 // ystepdenominv | ystepdenominv |
277 // xstepdenominv | p00_minus_p20 | p11_minus_p21
278 fxch
%st(1) // (t1
*p00_minus_p20
- t0
*p10_minus_p20
)*
280 // (t1
*p01_minus_p21
- t0
*p11_minus_p21
)*
281 // xstepdenominv | ystepdenominv |
282 // xstepdenominv | p00_minus_p20 | p11_minus_p21
283 fistpl C
(r_sstepy
) // r_sstepx | ystepdenominv | xstepdenominv |
284 // p00_minus_p20 | p11_minus_p21
285 fistpl C
(r_sstepx
) // ystepdenominv | xstepdenominv | p00_minus_p20 |
288 // t0
= r_p0
[3] - r_p2
[3];
289 // t1
= r_p1
[3] - r_p2
[3];
291 fildl C
(r_p2
)+12 // r_p2
[3] | ystepdenominv | xstepdenominv |
292 // p00_minus_p20 | p11_minus_p21
293 fildl C
(r_p0
)+12 // r_p0
[3] | r_p2
[3] | ystepdenominv |
294 // xstepdenominv | p00_minus_p20 | p11_minus_p21
295 fildl C
(r_p1
)+12 // r_p1
[3] | r_p0
[3] | r_p2
[3] | ystepdenominv |
296 // xstepdenominv | p00_minus_p20 | p11_minus_p21
297 fxch
%st(2) // r_p2
[3] | r_p0
[3] | r_p1
[3] | ystepdenominv |
298 // xstepdenominv | p00_minus_p20 | p11_minus_p21
299 fld
%st(0) // r_p2
[3] | r_p2
[3] | r_p0
[3] | r_p1
[3] |
300 // ystepdenominv | xstepdenominv | p00_minus_p20 |
302 fsubrp
%st(0),%st(2) // r_p2
[3] | t0 | r_p1
[3] | ystepdenominv |
303 // xstepdenominv | p00_minus_p20 | p11_minus_p21
304 fsubrp
%st(0),%st(2) // t0 | t1 | ystepdenominv | xstepdenominv |
305 // p00_minus_p20 | p11_minus_p21
307 // r_tstepx
= (int
)((t1
* p01_minus_p21
- t0
* p11_minus_p21
) *
309 // r_tstepy
= (int
)((t1
* p00_minus_p20
- t0
* p10_minus_p20
) *
312 fld
%st(0) // t0 | t0 | t1 | ystepdenominv | xstepdenominv |
313 // p00_minus_p20 | p11_minus_p21
314 fmul %st(6),%st(0) // t0
*p11_minus_p21 | t0 | t1 | ystepdenominv |
315 // xstepdenominv | p00_minus_p20 | p11_minus_p21
316 fxch
%st(2) // t1 | t0 | t0
*p11_minus_p21 | ystepdenominv |
317 // xstepdenominv | p00_minus_p20 | p11_minus_p21
318 fld
%st(0) // t1 | t1 | t0 | t0
*p11_minus_p21 |
319 // ystepdenominv | xstepdenominv | p00_minus_p20 |
321 fmuls p01_minus_p21
// t1
*p01_minus_p21 | t1 | t0 | t0
*p11_minus_p21 |
322 // ystepdenominv | xstepdenominv | p00_minus_p20 |
324 fxch
%st(2) // t0 | t1 | t1
*p01_minus_p21 | t0
*p11_minus_p21 |
325 // ystepdenominv | xstepdenominv | p00_minus_p20 |
327 fmuls p10_minus_p20
// t0
*p10_minus_p20 | t1 | t1
*p01_minus_p21 |
328 // t0
*p11_minus_p21 | ystepdenominv |
329 // xstepdenominv | p00_minus_p20 | p11_minus_p21
330 fxch
%st(1) // t1 | t0
*p10_minus_p20 | t1
*p01_minus_p21 |
331 // t0
*p11_minus_p21 | ystepdenominv |
332 // xstepdenominv | p00_minus_p20 | p11_minus_p21
333 fmul %st(6),%st(0) // t1
*p00_minus_p20 | t0
*p10_minus_p20 |
334 // t1
*p01_minus_p21 | t0
*p11_minus_p21 |
335 // ystepdenominv | xstepdenominv | p00_minus_p20 |
337 fxch
%st(2) // t1
*p01_minus_p21 | t0
*p10_minus_p20 |
338 // t1
*p00_minus_p20 | t0
*p11_minus_p21 |
339 // ystepdenominv | xstepdenominv | p00_minus_p20 |
341 fsubp
%st(0),%st(3) // t0
*p10_minus_p20 | t1
*p00_minus_p20 |
342 // t1
*p01_minus_p21
- t0
*p11_minus_p21 |
343 // ystepdenominv | xstepdenominv | p00_minus_p20 |
345 fsubrp
%st(0),%st(1) // t1
*p00_minus_p20
- t0
*p10_minus_p20 |
346 // t1
*p01_minus_p21
- t0
*p11_minus_p21 |
347 // ystepdenominv | xstepdenominv | p00_minus_p20 |
349 fmul %st(2),%st(0) // (t1
*p00_minus_p20
- t0
*p10_minus_p20
)*
351 // t1
*p01_minus_p21
- t0
*p11_minus_p21 |
352 // ystepdenominv | xstepdenominv | p00_minus_p20 |
354 fxch
%st(1) // t1
*p01_minus_p21
- t0
*p11_minus_p21 |
355 // (t1
*p00_minus_p20
- t0
*p10_minus_p20
)*
356 // ystepdenominv | ystepdenominv |
357 // xstepdenominv | p00_minus_p20 | p11_minus_p21
358 fmul %st(3),%st(0) // (t1
*p01_minus_p21
- t0
*p11_minus_p21
)*
360 // (t1
*p00_minus_p20
- t0
*p10_minus_p20
)*
361 // ystepdenominv | ystepdenominv |
362 // xstepdenominv | p00_minus_p20 | p11_minus_p21
363 fxch
%st(1) // (t1
*p00_minus_p20
- t0
*p10_minus_p20
)*
365 // (t1
*p01_minus_p21
- t0
*p11_minus_p21
)*
366 // xstepdenominv | ystepdenominv |
367 // xstepdenominv | p00_minus_p20 | p11_minus_p21
368 fistpl C
(r_tstepy
) // r_tstepx | ystepdenominv | xstepdenominv |
369 // p00_minus_p20 | p11_minus_p21
370 fistpl C
(r_tstepx
) // ystepdenominv | xstepdenominv | p00_minus_p20 |
373 // t0
= r_p0
[5] - r_p2
[5];
374 // t1
= r_p1
[5] - r_p2
[5];
376 fildl C
(r_p2
)+20 // r_p2
[5] | ystepdenominv | xstepdenominv |
377 // p00_minus_p20 | p11_minus_p21
378 fildl C
(r_p0
)+20 // r_p0
[5] | r_p2
[5] | ystepdenominv |
379 // xstepdenominv | p00_minus_p20 | p11_minus_p21
380 fildl C
(r_p1
)+20 // r_p1
[5] | r_p0
[5] | r_p2
[5] | ystepdenominv |
381 // xstepdenominv | p00_minus_p20 | p11_minus_p21
382 fxch
%st(2) // r_p2
[5] | r_p0
[5] | r_p1
[5] | ystepdenominv |
383 // xstepdenominv | p00_minus_p20 | p11_minus_p21
384 fld
%st(0) // r_p2
[5] | r_p2
[5] | r_p0
[5] | r_p1
[5] |
385 // ystepdenominv | xstepdenominv | p00_minus_p20 |
387 fsubrp
%st(0),%st(2) // r_p2
[5] | t0 | r_p1
[5] | ystepdenominv |
388 // xstepdenominv | p00_minus_p20 | p11_minus_p21
389 fsubrp
%st(0),%st(2) // t0 | t1 | ystepdenominv | xstepdenominv |
390 // p00_minus_p20 | p11_minus_p21
392 // r_zistepx
= (int
)((t1
* p01_minus_p21
- t0
* p11_minus_p21
) *
394 // r_zistepy
= (int
)((t1
* p00_minus_p20
- t0
* p10_minus_p20
) *
397 fld
%st(0) // t0 | t0 | t1 | ystepdenominv | xstepdenominv |
398 // p00_minus_p20 | p11_minus_p21
399 fmulp
%st(0),%st(6) // t0 | t1 | ystepdenominv | xstepdenominv |
400 // p00_minus_p20 | t0
*p11_minus_p21
401 fxch
%st(1) // t1 | t0 | ystepdenominv | xstepdenominv |
402 // p00_minus_p20 | t0
*p11_minus_p21
403 fld
%st(0) // t1 | t1 | t0 | ystepdenominv | xstepdenominv |
404 // p00_minus_p20 | t0
*p11_minus_p21
405 fmuls p01_minus_p21
// t1
*p01_minus_p21 | t1 | t0 | ystepdenominv |
406 // xstepdenominv | p00_minus_p20 |
408 fxch
%st(2) // t0 | t1 | t1
*p01_minus_p21 | ystepdenominv |
409 // xstepdenominv | p00_minus_p20 |
411 fmuls p10_minus_p20
// t0
*p10_minus_p20 | t1 | t1
*p01_minus_p21 |
412 // ystepdenominv | xstepdenominv | p00_minus_p20 |
414 fxch
%st(1) // t1 | t0
*p10_minus_p20 | t1
*p01_minus_p21 |
415 // ystepdenominv | xstepdenominv | p00_minus_p20 |
417 fmulp
%st(0),%st(5) // t0
*p10_minus_p20 | t1
*p01_minus_p21 |
418 // ystepdenominv | xstepdenominv |
419 // t1
*p00_minus_p20 | t0
*p11_minus_p21
420 fxch
%st(5) // t0
*p11_minus_p21 | t1
*p01_minus_p21 |
421 // ystepdenominv | xstepdenominv |
422 // t1
*p00_minus_p20 | t0
*p10_minus_p20
423 fsubrp
%st(0),%st(1) // t1
*p01_minus_p21
- t0
*p11_minus_p21 |
424 // ystepdenominv | xstepdenominv |
425 // t1
*p00_minus_p20 | t0
*p10_minus_p20
426 fxch
%st(3) // t1
*p00_minus_p20 | ystepdenominv |
428 // t1
*p01_minus_p21
- t0
*p11_minus_p21 |
430 fsubp
%st(0),%st(4) // ystepdenominv | xstepdenominv |
431 // t1
*p01_minus_p21
- t0
*p11_minus_p21 |
432 // t1
*p00_minus_p20
- t0
*p10_minus_p20
433 fxch
%st(1) // xstepdenominv | ystepdenominv |
434 // t1
*p01_minus_p21
- t0
*p11_minus_p21 |
435 // t1
*p00_minus_p20
- t0
*p10_minus_p20
436 fmulp
%st(0),%st(2) // ystepdenominv |
437 // (t1
*p01_minus_p21
- t0
*p11_minus_p21
) *
439 // t1
*p00_minus_p20
- t0
*p10_minus_p20
440 fmulp
%st(0),%st(2) // (t1
*p01_minus_p21
- t0
*p11_minus_p21
) *
442 // (t1
*p00_minus_p20
- t0
*p10_minus_p20
) *
444 fistpl C
(r_zistepx
) // (t1
*p00_minus_p20
- t0
*p10_minus_p20
) *
448 // a_sstepxfrac
= r_sstepx
<< 16;
449 // a_tstepxfrac
= r_tstepx
<< 16;
451 // a_ststepxwhole
= r_affinetridesc.skinwidth
* (r_tstepx
>> 16) +
454 movl C
(r_sstepx
),%eax
455 movl C
(r_tstepx
),%edx
458 movl
%eax
,C
(a_sstepxfrac
)
459 movl
%edx
,C
(a_tstepxfrac
)
461 movl C
(r_sstepx
),%ecx
462 movl C
(r_tstepx
),%eax
465 imull skinwidth
(%esp
)
467 movl
%eax
,C
(a_ststepxwhole
)
472 //----------------------------------------------------------------------
473 // recursive subdivision affine triangle drawing code
475 // not C-callable because of stdcall return
476 //----------------------------------------------------------------------
482 .globl C(D_PolysetRecursiveTriangle)
483 C
(D_PolysetRecursiveTriangle
):
484 pushl
%ebp
// preserve caller stack frame pointer
485 pushl
%esi
// preserve register variables
499 // d
= lp2
[0] - lp1
[0];
500 // if
(d
< -1 || d
> 1)
516 // d
= lp2
[1] - lp1
[1];
517 // if
(d
< -1 || d
> 1)
525 // d
= lp3
[0] - lp2
[0];
526 // if
(d
< -1 || d
> 1)
540 // d
= lp3
[1] - lp2
[1];
541 // if
(d
< -1 || d
> 1)
549 // d
= lp1
[0] - lp3
[0];
550 // if
(d
< -1 || d
> 1)
567 // d
= lp1
[1] - lp3
[1];
568 // if
(d
< -1 || d
> 1)
578 // return;
// entire tri is filled
604 subl $
24,%esp
// allocate space for
a new vertex
607 // new
[0] = (lp1
[0] + lp2
[0]) >> 1;
608 // new
[1] = (lp1
[1] + lp2
[1]) >> 1;
609 // new
[2] = (lp1
[2] + lp2
[2]) >> 1;
610 // new
[3] = (lp1
[3] + lp2
[3]) >> 1;
611 // new
[5] = (lp1
[5] + lp2
[5]) >> 1;
650 //// draw the point if splitting
a leading edge
651 // if
(lp2
[1] > lp1
[1])
656 // if
((lp2
[1] == lp1
[1]) && (lp2
[0] < lp1
[0]))
673 // zbuf
= zspantable
[new
[1]] + new
[0];
674 movl C
(zspantable
)(,%ecx
,4),%eax
678 cmpw (%eax
,%ebp
,2),%dx
684 movw
%dx
,(%eax
,%ebp
,2)
686 // pix
= d_pcolormap
[skintable
[new
[3]>>16][new
[2]>>16]];
695 movl C
(skintable
)(,%eax
,4),%eax
698 movb
(%eax
,%edx
,),%cl
699 movl C
(d_pcolormap
),%edx
701 movb
(%edx
,%ecx
,),%dl
704 // d_viewbuffer
[d_scantable
[new
[1]] + new
[0]] = pix;
705 movl C
(d_scantable
)(,%ebp
,4),%eax
707 movl C
(d_viewbuffer
),%eax
708 movb
%dl
,(%eax
,%ecx
,1)
715 //// recursively continue
716 // D_PolysetRecursiveTriangle
(lp3
, lp1
, new
);
720 call C
(D_PolysetRecursiveTriangle
)
722 // D_PolysetRecursiveTriangle
(lp3
, new
, lp2
);
727 call C
(D_PolysetRecursiveTriangle
)
731 popl
%ebx
// restore register variables
734 popl
%ebp
// restore caller stack frame pointer
738 //----------------------------------------------------------------------
739 // 8-bpp horizontal span drawing code for affine polygons
, with smooth
740 // shading
and no transparency
741 //----------------------------------------------------------------------
745 .globl C(D_PolysetAff8Start)
746 C
(D_PolysetAff8Start
):
748 .globl C(D_PolysetDrawSpans8)
749 C
(D_PolysetDrawSpans8
):
750 pushl
%esi
// preserve register variables
753 movl pspans
(%esp
),%esi
// point to the first span descriptor
754 movl C
(r_zistepx
),%ecx
756 pushl
%ebp
// preserve caller
's stack frame
759 rorl $16,%ecx // put high 16 bits of 1/z step in low word
760 movl spanpackage_t_count(%esi),%edx
766 // lcount = d_aspancount - pspanpackage->count;
768 // errorterm += erroradjustup;
769 // if (errorterm >= 0)
771 // d_aspancount += d_countextrastep;
772 // errorterm -= erroradjustdown;
776 // d_aspancount += ubasestep;
778 movl C(d_aspancount),%eax
781 movl C(erroradjustup),%edx
782 movl C(errorterm),%ebx
786 movl C(erroradjustdown),%edx
787 movl C(d_countextrastep),%edi
789 movl C(d_aspancount),%ebp
790 movl %ebx,C(errorterm)
792 movl %ebp,C(d_aspancount)
793 jmp LRightEdgeStepped
796 movl C(d_aspancount),%edi
797 movl C(ubasestep),%edx
798 movl %ebx,C(errorterm)
800 movl %edi,C(d_aspancount)
809 // set up advancetable
811 movl C(a_ststepxwhole),%ecx
812 movl C(r_affinetridesc)+atd_skinwidth,%edx
814 movl %ecx,advancetable+4 // advance base in t
817 movl %ecx,advancetable // advance extra in t
818 movl C(a_tstepxfrac),%ecx
821 movl %eax,%edx // count
826 shrl $3,%edx // count of full and partial loops
827 movl spanpackage_t_sfrac(%esi),%ebx
830 movl spanpackage_t_pz(%esi),%ecx
834 movl spanpackage_t_pdest(%esi),%edi
835 andl $7,%eax // 0->0, 1->7, 2->6, ... , 7->1
837 subl %eax,%edi // compensate for hardwired offsets
841 movl spanpackage_t_tfrac(%esi),%edx
843 movw spanpackage_t_light(%esi),%dx
844 movl spanpackage_t_zi(%esi),%ebp
846 rorl $16,%ebp // put high 16 bits of 1/z in low word
849 movl spanpackage_t_ptex(%esi),%esi
850 jmp *aff8entryvec_table(,%eax,4)
852 // %bx = count of full and partial loops
853 // %ebx high word = sfrac
856 // %edx high word = tfrac
860 // tstep low word = C(r_lstepx)
861 // tstep high word = C(a_tstepxfrac)
862 // C(a_sstepxfrac) low word = 0
863 // C(a_sstepxfrac) high word = C(a_sstepxfrac)
867 // FIXME: do we need to clamp light? We may need at least a buffer bit to
868 // keep it from poking into tfrac and causing problems
877 movb 0x12345678(%eax),%al
885 addl C(a_sstepxfrac),%ebx
886 adcl advancetable+4(,%eax,4),%esi
895 movb 0x12345678(%eax),%al
903 addl C(a_sstepxfrac),%ebx
904 adcl advancetable+4(,%eax,4),%esi
913 movb 0x12345678(%eax),%al
921 addl C(a_sstepxfrac),%ebx
922 adcl advancetable+4(,%eax,4),%esi
931 movb 0x12345678(%eax),%al
939 addl C(a_sstepxfrac),%ebx
940 adcl advancetable+4(,%eax,4),%esi
949 movb 0x12345678(%eax),%al
957 addl C(a_sstepxfrac),%ebx
958 adcl advancetable+4(,%eax,4),%esi
967 movb 0x12345678(%eax),%al
975 addl C(a_sstepxfrac),%ebx
976 adcl advancetable+4(,%eax,4),%esi
985 movb 0x12345678(%eax),%al
993 addl C(a_sstepxfrac),%ebx
994 adcl advancetable+4(,%eax,4),%esi
1003 movb 0x12345678(%eax),%al
1011 addl C(a_sstepxfrac),%ebx
1012 adcl advancetable+4(,%eax,4),%esi
1020 popl %esi // restore spans pointer
1022 addl $(spanpackage_t_size),%esi // point to next span
1024 movl spanpackage_t_count(%esi),%edx
1025 cmpl $-999999,%edx // any more spans?
1026 jnz LSpanLoop // yes
1029 popl %ebp // restore the caller's stack frame
1030 popl
%ebx
// restore register variables
1035 // draw
a one-long span
1039 movl spanpackage_t_pz
(%esi
),%ecx
1040 movl spanpackage_t_zi
(%esi
),%ebp
1042 rorl $
16,%ebp
// put high
16 bits of
1/z in low word
1043 movl spanpackage_t_ptex
(%esi
),%ebx
1048 movl spanpackage_t_pdest
(%esi
),%edi
1049 movb spanpackage_t_light+
1(%esi
),%ah
1050 addl $
(spanpackage_t_size
),%esi
// point to next span
1053 movb
0x12345678(%eax
),%al
1059 .globl C(D_PolysetAff8End)
1060 C
(D_PolysetAff8End
):
1065 .globl C(D_Aff8Patch)
1067 movl pcolormap
(%esp
),%eax
1081 //----------------------------------------------------------------------
1082 // Alias model polygon dispatching code
, combined with subdivided affine
1083 // triangle drawing code
1084 //----------------------------------------------------------------------
1086 .globl C(D_PolysetDraw)
1089 // spanpackage_t spans
[DPS_MAXSPANS
+ 1 +
1090 // ((CACHE_SIZE
- 1) / sizeof
(spanpackage_t
)) + 1];
1091 // // one extra because of cache line pretouching
1093 // a_spans
= (spanpackage_t
*)
1094 // (((long
)&spans
[0] + CACHE_SIZE
- 1) & ~
(CACHE_SIZE
- 1));
1095 subl $
(SPAN_SIZE
),%esp
1097 addl $
(CACHE_SIZE
- 1),%eax
1098 andl $
(~
(CACHE_SIZE
- 1)),%eax
1099 movl
%eax
,C
(a_spans
)
1101 // if
(r_affinetridesc.drawtype
)
1104 // D_DrawNonSubdiv
();
1105 movl C
(r_affinetridesc
)+atd_drawtype
,%eax
1107 jz C
(D_DrawNonSubdiv
)
1109 pushl
%ebp
// preserve caller stack frame pointer
1111 // lnumtriangles
= r_affinetridesc.numtriangles;
1112 movl C
(r_affinetridesc
)+atd_numtriangles
,%ebp
1114 pushl
%esi
// preserve register variables
1118 // ptri
= r_affinetridesc.ptriangles;
1119 movl C
(r_affinetridesc
)+atd_ptriangles
,%ebx
1123 // mtriangle_t
*ptri;
1124 // finalvert_t
*pfv
, *index0
, *index1
, *index2;
1126 // int lnumtriangles;
1129 // pfv
= r_affinetridesc.pfinalverts;
1130 movl C
(r_affinetridesc
)+atd_pfinalverts
,%edi
1132 // for
(i
=0 ; i
<lnumtriangles ; i+
+)
1137 // index0
= pfv
+ ptri
[i
].vertindex[0];
1138 // index1
= pfv
+ ptri
[i
].vertindex[1];
1139 // index2
= pfv
+ ptri
[i
].vertindex[2];
1140 movl mtri_vertindex-
16+0(%ebx
,%ebp
,),%ecx
1141 movl mtri_vertindex-
16+4(%ebx
,%ebp
,),%esi
1143 shll $
(fv_shift
),%ecx
1144 movl mtri_vertindex-
16+8(%ebx
,%ebp
,),%edx
1146 shll $
(fv_shift
),%esi
1149 shll $
(fv_shift
),%edx
1154 // if
(((index0-
>v
[1]-index1-
>v
[1]) *
1155 // (index0-
>v
[0]-index2-
>v
[0]) -
1156 // (index0-
>v
[0]-index1-
>v
[0])*(index0-
>v
[1]-index2-
>v
[1])) >= 0)
1161 // d_pcolormap
= &((byte
*)acolormap
)[index0-
>v
[4] & 0xFF00];
1162 fildl fv_v+
4(%ecx
) // i0v1
1163 fildl fv_v+
4(%esi
) // i1v1 | i0v1
1164 fildl fv_v+
0(%ecx
) // i0v0 | i1v1 | i0v1
1165 fildl fv_v+
0(%edx
) // i2v0 | i0v0 | i1v1 | i0v1
1166 fxch
%st(2) // i1v1 | i0v0 | i2v0 | i0v1
1167 fsubr
%st(3),%st(0) // i0v1-i1v1 | i0v0 | i2v0 | i0v1
1168 fildl fv_v+
0(%esi
) // i1v0 | i0v1-i1v1 | i0v0 | i2v0 | i0v1
1169 fxch
%st(2) // i0v0 | i0v1-i1v1 | i1v0 | i2v0 | i0v1
1170 fsub %st(0),%st(3) // i0v0 | i0v1-i1v1 | i1v0 | i0v0-i2v0 | i0v1
1171 fildl fv_v+
4(%edx
) // i2v1 | i0v0 | i0v1-i1v1 | i1v0 | i0v0-i2v0| i0v1
1172 fxch
%st(1) // i0v0 | i2v1 | i0v1-i1v1 | i1v0 | i0v0-i2v0| i0v1
1173 fsubp
%st(0),%st(3) // i2v1 | i0v1-i1v1 | i0v0-i1v0 | i0v0-i2v0 | i0v1
1174 fxch
%st(1) // i0v1-i1v1 | i2v1 | i0v0-i1v0 | i0v0-i2v0 | i0v1
1175 fmulp
%st(0),%st(3) // i2v1 | i0v0-i1v0 | i0v1-i1v1
*i0v0-i2v0 | i0v1
1176 fsubrp
%st(0),%st(3) // i0v0-i1v0 | i0v1-i1v1
*i0v0-i2v0 | i0v1-i2v1
1177 movl fv_v+
16(%ecx
),%eax
1179 fmulp
%st(0),%st(2) // i0v1-i1v1
*i0v0-i2v0 | i0v0-i1v0
*i0v1-i2v1
1180 addl C
(acolormap
),%eax
1181 fsubp
%st(0),%st(1) // (i0v1-i1v1
)*(i0v0-i2v0
)-(i0v0-i1v0
)*(i0v1-i2v1
)
1182 movl
%eax
,C
(d_pcolormap
)
1185 subl $
0x80000001,%eax
1188 // if
(ptri
[i
].facesfront)
1190 // D_PolysetRecursiveTriangle
(index0-
>v
, index1-
>v
, index2-
>v
);
1191 movl mtri_facesfront-
16(%ebx
,%ebp
,),%eax
1198 call C
(D_PolysetRecursiveTriangle
)
1209 // s0
= index0-
>v
[2];
1210 // s1
= index1-
>v
[2];
1211 // s2
= index2-
>v
[2];
1212 movl fv_v+
8(%ecx
),%eax
1214 movl fv_v+
8(%esi
),%eax
1216 movl fv_v+
8(%edx
),%eax
1221 // if
(index0-
>flags
& ALIAS_ONSEAM
)
1222 // index0-
>v
[2] += r_affinetridesc.seamfixupX16;
1223 movl C
(r_affinetridesc
)+atd_seamfixupX16
,%eax
1224 testl $
(ALIAS_ONSEAM
),fv_flags
(%ecx
)
1226 addl
%eax
,fv_v+
8(%ecx
)
1229 // if
(index1-
>flags
& ALIAS_ONSEAM
)
1230 // index1-
>v
[2] += r_affinetridesc.seamfixupX16;
1231 testl $
(ALIAS_ONSEAM
),fv_flags
(%esi
)
1233 addl
%eax
,fv_v+
8(%esi
)
1236 // if
(index2-
>flags
& ALIAS_ONSEAM
)
1237 // index2-
>v
[2] += r_affinetridesc.seamfixupX16;
1238 testl $
(ALIAS_ONSEAM
),fv_flags
(%edx
)
1240 addl
%eax
,fv_v+
8(%edx
)
1243 // D_PolysetRecursiveTriangle
(index0-
>v
, index1-
>v
, index2-
>v
);
1247 call C
(D_PolysetRecursiveTriangle
)
1249 // index0-
>v
[2] = s0;
1250 // index1-
>v
[2] = s1;
1251 // index2-
>v
[2] = s2;
1255 movl
%eax
,fv_v+
8(%edx
)
1257 movl
%eax
,fv_v+
8(%esi
)
1259 movl
%eax
,fv_v+
8(%ecx
)
1268 popl
%edi
// restore the caller
's stack frame
1270 popl %esi // restore register variables
1273 addl $(SPAN_SIZE),%esp
1278 //----------------------------------------------------------------------
1279 // Alias model triangle left-edge scanning code
1280 //----------------------------------------------------------------------
1284 .globl C(D_PolysetScanLeftEdge)
1285 C(D_PolysetScanLeftEdge):
1286 pushl %ebp // preserve caller stack frame pointer
1287 pushl %esi // preserve register variables
1291 movl height(%esp),%eax
1292 movl C(d_sfrac),%ecx
1296 movl C(d_pedgespanpackage),%esi
1297 movl C(d_tfrac),%edx
1298 movl C(d_light),%edi
1303 // %ecx: d_sfrac in high word, count in low word
1305 // %esi: d_pedgespanpackage, errorterm, scratch alternately
1314 // d_pedgespanpackage->ptex = ptex;
1315 // d_pedgespanpackage->pdest = d_pdest;
1316 // d_pedgespanpackage->pz = d_pz;
1317 // d_pedgespanpackage->count = d_aspancount;
1318 // d_pedgespanpackage->light = d_light;
1319 // d_pedgespanpackage->zi = d_zi;
1320 // d_pedgespanpackage->sfrac = d_sfrac << 16;
1321 // d_pedgespanpackage->tfrac = d_tfrac << 16;
1322 movl %ebx,spanpackage_t_ptex(%esi)
1323 movl C(d_pdest),%eax
1324 movl %eax,spanpackage_t_pdest(%esi)
1326 movl %eax,spanpackage_t_pz(%esi)
1327 movl C(d_aspancount),%eax
1328 movl %eax,spanpackage_t_count(%esi)
1329 movl %edi,spanpackage_t_light(%esi)
1330 movl %ebp,spanpackage_t_zi(%esi)
1331 movl %ecx,spanpackage_t_sfrac(%esi)
1332 movl %edx,spanpackage_t_tfrac(%esi)
1334 // pretouch the next cache line
1335 movb spanpackage_t_size(%esi),%al
1337 // d_pedgespanpackage++;
1338 addl $(spanpackage_t_size),%esi
1339 movl C(erroradjustup),%eax
1340 movl %esi,C(d_pedgespanpackage)
1342 // errorterm += erroradjustup;
1343 movl C(errorterm),%esi
1345 movl C(d_pdest),%eax
1347 // if (errorterm >= 0)
1349 js LNoLeftEdgeTurnover
1351 // errorterm -= erroradjustdown;
1352 // d_pdest += d_pdestextrastep;
1353 subl C(erroradjustdown),%esi
1354 addl C(d_pdestextrastep),%eax
1355 movl %esi,C(errorterm)
1356 movl %eax,C(d_pdest)
1358 // d_pz += d_pzextrastep;
1359 // d_aspancount += d_countextrastep;
1360 // d_ptex += d_ptexextrastep;
1361 // d_sfrac += d_sfracextrastep;
1362 // d_ptex += d_sfrac >> 16;
1363 // d_sfrac &= 0xFFFF;
1364 // d_tfrac += d_tfracextrastep;
1366 movl C(d_aspancount),%esi
1367 addl C(d_pzextrastep),%eax
1368 addl C(d_sfracextrastep),%ecx
1369 adcl C(d_ptexextrastep),%ebx
1370 addl C(d_countextrastep),%esi
1372 movl C(d_tfracextrastep),%eax
1373 movl %esi,C(d_aspancount)
1376 // if (d_tfrac & 0x10000)
1380 // d_ptex += r_affinetridesc.skinwidth;
1381 // d_tfrac &= 0xFFFF;
1382 addl C(r_affinetridesc)+atd_skinwidth,%ebx
1388 // d_light += d_lightextrastep;
1389 // d_zi += d_ziextrastep;
1390 addl C(d_lightextrastep),%edi
1391 addl C(d_ziextrastep),%ebp
1394 movl C(d_pedgespanpackage),%esi
1408 LNoLeftEdgeTurnover:
1409 movl %esi,C(errorterm)
1411 // d_pdest += d_pdestbasestep;
1412 addl C(d_pdestbasestep),%eax
1413 movl %eax,C(d_pdest)
1415 // d_pz += d_pzbasestep;
1416 // d_aspancount += ubasestep;
1417 // d_ptex += d_ptexbasestep;
1418 // d_sfrac += d_sfracbasestep;
1419 // d_ptex += d_sfrac >> 16;
1420 // d_sfrac &= 0xFFFF;
1422 movl C(d_aspancount),%esi
1423 addl C(d_pzbasestep),%eax
1424 addl C(d_sfracbasestep),%ecx
1425 adcl C(d_ptexbasestep),%ebx
1426 addl C(ubasestep),%esi
1428 movl %esi,C(d_aspancount)
1430 // d_tfrac += d_tfracbasestep;
1431 movl C(d_tfracbasestep),%esi
1434 // if (d_tfrac & 0x10000)
1438 // d_ptex += r_affinetridesc.skinwidth;
1439 // d_tfrac &= 0xFFFF;
1440 addl C(r_affinetridesc)+atd_skinwidth,%ebx
1446 // d_light += d_lightbasestep;
1447 // d_zi += d_zibasestep;
1448 addl C(d_lightbasestep),%edi
1449 addl C(d_zibasestep),%ebp
1452 // } while (--height);
1453 movl C(d_pedgespanpackage),%esi
1465 //----------------------------------------------------------------------
1466 // Alias model vertex drawing code
1467 //----------------------------------------------------------------------
1470 #define numverts 8+8
1472 .globl C(D_PolysetDrawFinalVerts)
1473 C(D_PolysetDrawFinalVerts):
1474 pushl %ebp // preserve caller stack frame pointer
1480 movl numverts(%esp),%ecx
1483 pushl %esi // preserve register variables
1488 // for (i=0 ; i<numverts ; i++, fv++)
1490 // // valid triangle coordinates for filling can include the bottom and
1491 // // right clip edges, due to the fill rule; these shouldn't be drawn
1492 // if
((fv-
>v
[0] < r_refdef.vrectright
) &&
1493 // (fv-
>v
[1] < r_refdef.vrectbottom
))
1495 movl fv_v+
0(%ebx
),%eax
1496 movl C
(r_refdef
)+rd_vrectright
,%edx
1499 movl fv_v+
4(%ebx
),%esi
1500 movl C
(r_refdef
)+rd_vrectbottom
,%edx
1504 // zbuf
= zspantable
[fv-
>v
[1]] + fv-
>v
[0];
1505 movl C
(zspantable
)(,%esi
,4),%edi
1507 // z
= fv-
>v
[5]>>16;
1508 movl fv_v+
20(%ebx
),%edx
1514 cmpw (%edi
,%eax
,2),%dx
1518 movw
%dx
,(%edi
,%eax
,2)
1520 // pix
= skintable
[fv-
>v
[3]>>16][fv-
>v
[2]>>16];
1521 movl fv_v+
12(%ebx
),%edi
1523 movl C
(skintable
)(,%edi
,4),%edi
1524 movl fv_v+
8(%ebx
),%edx
1526 movb
(%edi
,%edx
),%dl
1528 // pix
= ((byte
*)acolormap
)[pix
+ (fv-
>v
[4] & 0xFF00)];
1529 movl fv_v+
16(%ebx
),%edi
1533 movl C
(acolormap
),%edx
1534 movb
(%edx
,%edi
,1),%dl
1536 // d_viewbuffer
[d_scantable
[fv-
>v
[1]] + fv-
>v
[0]] = pix;
1537 movl C
(d_scantable
)(,%esi
,4),%edi
1538 movl C
(d_viewbuffer
),%esi
1540 movb
%dl
,(%esi
,%edi
)
1546 addl $
(fv_size
),%ebx
1557 //----------------------------------------------------------------------
1558 // Alias model non-subdivided polygon dispatching code
1560 // not C-callable because of stack buffer cleanup
1561 //----------------------------------------------------------------------
1563 .globl C(D_DrawNonSubdiv)
1565 pushl
%ebp
// preserve caller stack frame pointer
1566 movl C
(r_affinetridesc
)+atd_numtriangles
,%ebp
1568 shll $
(mtri_shift
),%ebp
1569 pushl
%esi
// preserve register variables
1570 movl C
(r_affinetridesc
)+atd_ptriangles
,%esi
1573 // mtriangle_t
*ptri;
1574 // finalvert_t
*pfv
, *index0
, *index1
, *index2;
1576 // int lnumtriangles;
1578 // pfv
= r_affinetridesc.pfinalverts;
1579 // ptri
= r_affinetridesc.ptriangles;
1580 // lnumtriangles
= r_affinetridesc.numtriangles;
1584 // for
(i
=0 ; i
<lnumtriangles ; i+
+, ptri+
+)
1586 // index0
= pfv
+ ptri-
>vertindex
[0];
1587 // index1
= pfv
+ ptri-
>vertindex
[1];
1588 // index2
= pfv
+ ptri-
>vertindex
[2];
1589 movl C
(r_affinetridesc
)+atd_pfinalverts
,%edi
1590 movl mtri_vertindex+
0-mtri_size
(%esi
,%ebp
,1),%ecx
1591 shll $
(fv_shift
),%ecx
1592 movl mtri_vertindex+
4-mtri_size
(%esi
,%ebp
,1),%edx
1593 shll $
(fv_shift
),%edx
1594 movl mtri_vertindex+
8-mtri_size
(%esi
,%ebp
,1),%ebx
1595 shll $
(fv_shift
),%ebx
1600 // d_xdenom
= (index0-
>v
[1]-index1-
>v
[1]) *
1601 // (index0-
>v
[0]-index2-
>v
[0]) -
1602 // (index0-
>v
[0]-index1-
>v
[0])*(index0-
>v
[1]-index2-
>v
[1]);
1603 movl fv_v+
4(%ecx
),%eax
1604 movl fv_v+
0(%ecx
),%esi
1605 subl fv_v+
4(%edx
),%eax
1606 subl fv_v+
0(%ebx
),%esi
1608 movl fv_v+
0(%ecx
),%esi
1609 movl fv_v+
4(%ecx
),%edi
1610 subl fv_v+
0(%edx
),%esi
1611 subl fv_v+
4(%ebx
),%edi
1615 // if
(d_xdenom
>= 0)
1622 movl
%eax
,C
(d_xdenom
)
1625 // r_p0
[0] = index0-
>v
[0];
// u
1626 // r_p0
[1] = index0-
>v
[1];
// v
1627 // r_p0
[2] = index0-
>v
[2];
// s
1628 // r_p0
[3] = index0-
>v
[3];
// t
1629 // r_p0
[4] = index0-
>v
[4];
// light
1630 // r_p0
[5] = index0-
>v
[5];
// iz
1631 movl fv_v+
0(%ecx
),%eax
1632 movl fv_v+
4(%ecx
),%esi
1635 movl fv_v+
8(%ecx
),%eax
1636 movl fv_v+
12(%ecx
),%esi
1638 movl
%esi
,C
(r_p0
)+12
1639 movl fv_v+
16(%ecx
),%eax
1640 movl fv_v+
20(%ecx
),%esi
1641 movl
%eax
,C
(r_p0
)+16
1642 movl
%esi
,C
(r_p0
)+20
1646 // r_p1
[0] = index1-
>v
[0];
1647 // r_p1
[1] = index1-
>v
[1];
1648 // r_p1
[2] = index1-
>v
[2];
1649 // r_p1
[3] = index1-
>v
[3];
1650 // r_p1
[4] = index1-
>v
[4];
1651 // r_p1
[5] = index1-
>v
[5];
1652 movl fv_v+
0(%edx
),%eax
1653 movl fv_v+
4(%edx
),%esi
1656 movl fv_v+
8(%edx
),%eax
1657 movl fv_v+
12(%edx
),%esi
1659 movl
%esi
,C
(r_p1
)+12
1660 movl fv_v+
16(%edx
),%eax
1661 movl fv_v+
20(%edx
),%esi
1662 movl
%eax
,C
(r_p1
)+16
1663 movl
%esi
,C
(r_p1
)+20
1665 // r_p2
[0] = index2-
>v
[0];
1666 // r_p2
[1] = index2-
>v
[1];
1667 // r_p2
[2] = index2-
>v
[2];
1668 // r_p2
[3] = index2-
>v
[3];
1669 // r_p2
[4] = index2-
>v
[4];
1670 // r_p2
[5] = index2-
>v
[5];
1671 movl fv_v+
0(%ebx
),%eax
1672 movl fv_v+
4(%ebx
),%esi
1675 movl fv_v+
8(%ebx
),%eax
1676 movl fv_v+
12(%ebx
),%esi
1678 movl
%esi
,C
(r_p2
)+12
1679 movl fv_v+
16(%ebx
),%eax
1680 movl fv_v+
20(%ebx
),%esi
1681 movl
%eax
,C
(r_p2
)+16
1682 movl C
(r_affinetridesc
)+atd_ptriangles
,%edi
1683 movl
%esi
,C
(r_p2
)+20
1684 movl mtri_facesfront-mtri_size
(%edi
,%ebp
,1),%eax
1686 // if
(!ptri-
>facesfront
)
1691 // if
(index0-
>flags
& ALIAS_ONSEAM
)
1692 // r_p0
[2] += r_affinetridesc.seamfixupX16;
1693 movl fv_flags
(%ecx
),%eax
1694 movl fv_flags
(%edx
),%esi
1695 movl fv_flags
(%ebx
),%edi
1696 testl $
(ALIAS_ONSEAM
),%eax
1697 movl C
(r_affinetridesc
)+atd_seamfixupX16
,%eax
1702 // if
(index1-
>flags
& ALIAS_ONSEAM
)
1703 // r_p1
[2] += r_affinetridesc.seamfixupX16;
1704 testl $
(ALIAS_ONSEAM
),%esi
1709 // if
(index2-
>flags
& ALIAS_ONSEAM
)
1710 // r_p2
[2] += r_affinetridesc.seamfixupX16;
1711 testl $
(ALIAS_ONSEAM
),%edi
1722 // D_PolysetSetEdgeTable
();
1723 // D_RasterizeAliasPolySmooth
();
1724 call C
(D_PolysetSetEdgeTable
)
1725 call C
(D_RasterizeAliasPolySmooth
)
1728 movl C
(r_affinetridesc
)+atd_ptriangles
,%esi
1738 addl $
(SPAN_SIZE
),%esp