Replaced Exit() by exit() because Exit() is like abort().
[AROS-Contrib.git] / Games / Quake / d_polysa.s
blob71ad9b847a541bf002261378377de03054ad3cac
1 /*
2 Copyright (C) 1996-1997 Id Software, Inc.
4 This program is free software; you can redistribute it and/or
5 modify it under the terms of the GNU General Public License
6 as published by the Free Software Foundation; either version 2
7 of the License, or (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
13 See the GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21 // d_polysa.s
22 // x86 assembly-language polygon model drawing code
25 #include "asm_i386.h"
26 #include "quakeasm.h"
27 #include "asm_draw.h"
28 #include "d_ifacea.h"
30 #if id386
32 // !!! if this is changed, it must be changed in d_polyse.c too !!!
33 #define DPS_MAXSPANS MAXHEIGHT+1
34 // 1 extra for spanpackage that marks end
36 //#define SPAN_SIZE (((DPS_MAXSPANS + 1 + ((CACHE_SIZE - 1) / spanpackage_t_size)) + 1) * spanpackage_t_size)
37 #define SPAN_SIZE (1024+1+1+1)*32
40 .data
42 .align 4
43 p10_minus_p20: .single 0
44 p01_minus_p21: .single 0
45 temp0: .single 0
46 temp1: .single 0
47 Ltemp: .single 0
49 aff8entryvec_table: .long LDraw8, LDraw7, LDraw6, LDraw5
50 .long LDraw4, LDraw3, LDraw2, LDraw1
52 lzistepx: .long 0
55 .text
57 #ifndef NeXT
58 .extern C(D_PolysetSetEdgeTable)
59 .extern C(D_RasterizeAliasPolySmooth)
60 #endif
62 //----------------------------------------------------------------------
63 // affine triangle gradient calculation code
64 //----------------------------------------------------------------------
66 #define skinwidth 4+0
68 .globl C(D_PolysetCalcGradients)
69 C(D_PolysetCalcGradients):
71 // p00_minus_p20 = r_p0[0] - r_p2[0];
72 // p01_minus_p21 = r_p0[1] - r_p2[1];
73 // p10_minus_p20 = r_p1[0] - r_p2[0];
74 // p11_minus_p21 = r_p1[1] - r_p2[1];
76 // xstepdenominv = 1.0 / (p10_minus_p20 * p01_minus_p21 -
77 // p00_minus_p20 * p11_minus_p21);
79 // ystepdenominv = -xstepdenominv;
81 fildl C(r_p0)+0 // r_p0[0]
82 fildl C(r_p2)+0 // r_p2[0] | r_p0[0]
83 fildl C(r_p0)+4 // r_p0[1] | r_p2[0] | r_p0[0]
84 fildl C(r_p2)+4 // r_p2[1] | r_p0[1] | r_p2[0] | r_p0[0]
85 fildl C(r_p1)+0 // r_p1[0] | r_p2[1] | r_p0[1] | r_p2[0] | r_p0[0]
86 fildl C(r_p1)+4 // r_p1[1] | r_p1[0] | r_p2[1] | r_p0[1] |
87 // r_p2[0] | r_p0[0]
88 fxch %st(3) // r_p0[1] | r_p1[0] | r_p2[1] | r_p1[1] |
89 // r_p2[0] | r_p0[0]
90 fsub %st(2),%st(0) // p01_minus_p21 | r_p1[0] | r_p2[1] | r_p1[1] |
91 // r_p2[0] | r_p0[0]
92 fxch %st(1) // r_p1[0] | p01_minus_p21 | r_p2[1] | r_p1[1] |
93 // r_p2[0] | r_p0[0]
94 fsub %st(4),%st(0) // p10_minus_p20 | p01_minus_p21 | r_p2[1] |
95 // r_p1[1] | r_p2[0] | r_p0[0]
96 fxch %st(5) // r_p0[0] | p01_minus_p21 | r_p2[1] |
97 // r_p1[1] | r_p2[0] | p10_minus_p20
98 fsubp %st(0),%st(4) // p01_minus_p21 | r_p2[1] | r_p1[1] |
99 // p00_minus_p20 | p10_minus_p20
100 fxch %st(2) // r_p1[1] | r_p2[1] | p01_minus_p21 |
101 // p00_minus_p20 | p10_minus_p20
102 fsubp %st(0),%st(1) // p11_minus_p21 | p01_minus_p21 |
103 // p00_minus_p20 | p10_minus_p20
104 fxch %st(1) // p01_minus_p21 | p11_minus_p21 |
105 // p00_minus_p20 | p10_minus_p20
106 flds C(d_xdenom) // d_xdenom | p01_minus_p21 | p11_minus_p21 |
107 // p00_minus_p20 | p10_minus_p20
108 fxch %st(4) // p10_minus_p20 | p01_minus_p21 | p11_minus_p21 |
109 // p00_minus_p20 | d_xdenom
110 fstps p10_minus_p20 // p01_minus_p21 | p11_minus_p21 |
111 // p00_minus_p20 | d_xdenom
112 fstps p01_minus_p21 // p11_minus_p21 | p00_minus_p20 | xstepdenominv
113 fxch %st(2) // xstepdenominv | p00_minus_p20 | p11_minus_p21
115 //// ceil () for light so positive steps are exaggerated, negative steps
116 //// diminished, pushing us away from underflow toward overflow. Underflow is
117 //// very visible, overflow is very unlikely, because of ambient lighting
118 // t0 = r_p0[4] - r_p2[4];
119 // t1 = r_p1[4] - r_p2[4];
121 fildl C(r_p2)+16 // r_p2[4] | xstepdenominv | p00_minus_p20 |
122 // p11_minus_p21
123 fildl C(r_p0)+16 // r_p0[4] | r_p2[4] | xstepdenominv |
124 // p00_minus_p20 | p11_minus_p21
125 fildl C(r_p1)+16 // r_p1[4] | r_p0[4] | r_p2[4] | xstepdenominv |
126 // p00_minus_p20 | p11_minus_p21
127 fxch %st(2) // r_p2[4] | r_p0[4] | r_p1[4] | xstepdenominv |
128 // p00_minus_p20 | p11_minus_p21
129 fld %st(0) // r_p2[4] | r_p2[4] | r_p0[4] | r_p1[4] |
130 // xstepdenominv | p00_minus_p20 | p11_minus_p21
131 fsubrp %st(0),%st(2) // r_p2[4] | t0 | r_p1[4] | xstepdenominv |
132 // p00_minus_p20 | p11_minus_p21
133 fsubrp %st(0),%st(2) // t0 | t1 | xstepdenominv | p00_minus_p20 |
134 // p11_minus_p21
136 // r_lstepx = (int)
137 // ceil((t1 * p01_minus_p21 - t0 * p11_minus_p21) * xstepdenominv);
138 // r_lstepy = (int)
139 // ceil((t1 * p00_minus_p20 - t0 * p10_minus_p20) * ystepdenominv);
141 fld %st(0) // t0 | t0 | t1 | xstepdenominv | p00_minus_p20 |
142 // p11_minus_p21
143 fmul %st(5),%st(0) // t0*p11_minus_p21 | t0 | t1 | xstepdenominv |
144 // p00_minus_p20 | p11_minus_p21
145 fxch %st(2) // t1 | t0 | t0*p11_minus_p21 | xstepdenominv |
146 // p00_minus_p20 | p11_minus_p21
147 fld %st(0) // t1 | t1 | t0 | t0*p11_minus_p21 |
148 // xstepdenominv | p00_minus_p20 | p11_minus_p21
149 fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 |
150 // xstepdenominv | p00_minus_p20 | p11_minus_p21
151 fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 |
152 // xstepdenominv | p00_minus_p20 | p11_minus_p21
153 fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
154 // t0*p11_minus_p21 | xstepdenominv |
155 // p00_minus_p20 | p11_minus_p21
156 fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
157 // t0*p11_minus_p21 | xstepdenominv |
158 // p00_minus_p20 | p11_minus_p21
159 fmul %st(5),%st(0) // t1*p00_minus_p20 | t0*p10_minus_p20 |
160 // t1*p01_minus_p21 | t0*p11_minus_p21 |
161 // xstepdenominv | p00_minus_p20 | p11_minus_p21
162 fxch %st(2) // t1*p01_minus_p21 | t0*p10_minus_p20 |
163 // t1*p00_minus_p20 | t0*p11_minus_p21 |
164 // xstepdenominv | p00_minus_p20 | p11_minus_p21
165 fsubp %st(0),%st(3) // t0*p10_minus_p20 | t1*p00_minus_p20 |
166 // t1*p01_minus_p21 - t0*p11_minus_p21 |
167 // xstepdenominv | p00_minus_p20 | p11_minus_p21
168 fsubrp %st(0),%st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 |
169 // t1*p01_minus_p21 - t0*p11_minus_p21 |
170 // xstepdenominv | p00_minus_p20 | p11_minus_p21
171 fld %st(2) // xstepdenominv |
172 // t1*p00_minus_p20 - t0*p10_minus_p20 |
173 // t1*p01_minus_p21 - t0*p11_minus_p21 |
174 // xstepdenominv | p00_minus_p20 | p11_minus_p21
175 fmuls float_minus_1 // ystepdenominv |
176 // t1*p00_minus_p20 - t0*p10_minus_p20 |
177 // t1*p01_minus_p21 - t0*p11_minus_p21 |
178 // xstepdenominv | p00_minus_p20 | p11_minus_p21
179 fxch %st(2) // t1*p01_minus_p21 - t0*p11_minus_p21 |
180 // t1*p00_minus_p20 - t0*p10_minus_p20 |
181 // ystepdenominv | xstepdenominv | p00_minus_p20 |
182 // p11_minus_p21
183 fmul %st(3),%st(0) // (t1*p01_minus_p21 - t0*p11_minus_p21)*
184 // xstepdenominv |
185 // t1*p00_minus_p20 - t0*p10_minus_p20 |
186 // | ystepdenominv | xstepdenominv |
187 // p00_minus_p20 | p11_minus_p21
188 fxch %st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 |
189 // (t1*p01_minus_p21 - t0*p11_minus_p21)*
190 // xstepdenominv | ystepdenominv |
191 // xstepdenominv | p00_minus_p20 | p11_minus_p21
192 fmul %st(2),%st(0) // (t1*p00_minus_p20 - t0*p10_minus_p20)*
193 // ystepdenominv |
194 // (t1*p01_minus_p21 - t0*p11_minus_p21)*
195 // xstepdenominv | ystepdenominv |
196 // xstepdenominv | p00_minus_p20 | p11_minus_p21
197 fldcw ceil_cw
198 fistpl C(r_lstepy) // r_lstepx | ystepdenominv | xstepdenominv |
199 // p00_minus_p20 | p11_minus_p21
200 fistpl C(r_lstepx) // ystepdenominv | xstepdenominv | p00_minus_p20 |
201 // p11_minus_p21
202 fldcw single_cw
204 // t0 = r_p0[2] - r_p2[2];
205 // t1 = r_p1[2] - r_p2[2];
207 fildl C(r_p2)+8 // r_p2[2] | ystepdenominv | xstepdenominv |
208 // p00_minus_p20 | p11_minus_p21
209 fildl C(r_p0)+8 // r_p0[2] | r_p2[2] | ystepdenominv |
210 // xstepdenominv | p00_minus_p20 | p11_minus_p21
211 fildl C(r_p1)+8 // r_p1[2] | r_p0[2] | r_p2[2] | ystepdenominv |
212 // xstepdenominv | p00_minus_p20 | p11_minus_p21
213 fxch %st(2) // r_p2[2] | r_p0[2] | r_p1[2] | ystepdenominv |
214 // xstepdenominv | p00_minus_p20 | p11_minus_p21
215 fld %st(0) // r_p2[2] | r_p2[2] | r_p0[2] | r_p1[2] |
216 // ystepdenominv | xstepdenominv | p00_minus_p20 |
217 // p11_minus_p21
218 fsubrp %st(0),%st(2) // r_p2[2] | t0 | r_p1[2] | ystepdenominv |
219 // xstepdenominv | p00_minus_p20 | p11_minus_p21
220 fsubrp %st(0),%st(2) // t0 | t1 | ystepdenominv | xstepdenominv |
221 // p00_minus_p20 | p11_minus_p21
223 // r_sstepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) *
224 // xstepdenominv);
225 // r_sstepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) *
226 // ystepdenominv);
228 fld %st(0) // t0 | t0 | t1 | ystepdenominv | xstepdenominv
229 fmul %st(6),%st(0) // t0*p11_minus_p21 | t0 | t1 | ystepdenominv |
230 // xstepdenominv | p00_minus_p20 | p11_minus_p21
231 fxch %st(2) // t1 | t0 | t0*p11_minus_p21 | ystepdenominv |
232 // xstepdenominv | p00_minus_p20 | p11_minus_p21
233 fld %st(0) // t1 | t1 | t0 | t0*p11_minus_p21 |
234 // ystepdenominv | xstepdenominv | p00_minus_p20 |
235 // p11_minus_p21
236 fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 |
237 // ystepdenominv | xstepdenominv | p00_minus_p20 |
238 // p11_minus_p21
239 fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 |
240 // ystepdenominv | xstepdenominv | p00_minus_p20 |
241 // p11_minus_p21
242 fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
243 // t0*p11_minus_p21 | ystepdenominv |
244 // xstepdenominv | p00_minus_p20 | p11_minus_p21
245 fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
246 // t0*p11_minus_p21 | ystepdenominv |
247 // xstepdenominv | p00_minus_p20 | p11_minus_p21
248 fmul %st(6),%st(0) // t1*p00_minus_p20 | t0*p10_minus_p20 |
249 // t1*p01_minus_p21 | t0*p11_minus_p21 |
250 // ystepdenominv | xstepdenominv | p00_minus_p20 |
251 // p11_minus_p21
252 fxch %st(2) // t1*p01_minus_p21 | t0*p10_minus_p20 |
253 // t1*p00_minus_p20 | t0*p11_minus_p21 |
254 // ystepdenominv | xstepdenominv | p00_minus_p20 |
255 // p11_minus_p21
256 fsubp %st(0),%st(3) // t0*p10_minus_p20 | t1*p00_minus_p20 |
257 // t1*p01_minus_p21 - t0*p11_minus_p21 |
258 // ystepdenominv | xstepdenominv | p00_minus_p20 |
259 // p11_minus_p21
260 fsubrp %st(0),%st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 |
261 // t1*p01_minus_p21 - t0*p11_minus_p21 |
262 // ystepdenominv | xstepdenominv | p00_minus_p20 |
263 // p11_minus_p21
264 fmul %st(2),%st(0) // (t1*p00_minus_p20 - t0*p10_minus_p20)*
265 // ystepdenominv |
266 // t1*p01_minus_p21 - t0*p11_minus_p21 |
267 // ystepdenominv | xstepdenominv | p00_minus_p20 |
268 // p11_minus_p21
269 fxch %st(1) // t1*p01_minus_p21 - t0*p11_minus_p21 |
270 // (t1*p00_minus_p20 - t0*p10_minus_p20)*
271 // ystepdenominv | ystepdenominv |
272 // xstepdenominv | p00_minus_p20 | p11_minus_p21
273 fmul %st(3),%st(0) // (t1*p01_minus_p21 - t0*p11_minus_p21)*
274 // xstepdenominv |
275 // (t1*p00_minus_p20 - t0*p10_minus_p20)*
276 // ystepdenominv | ystepdenominv |
277 // xstepdenominv | p00_minus_p20 | p11_minus_p21
278 fxch %st(1) // (t1*p00_minus_p20 - t0*p10_minus_p20)*
279 // ystepdenominv |
280 // (t1*p01_minus_p21 - t0*p11_minus_p21)*
281 // xstepdenominv | ystepdenominv |
282 // xstepdenominv | p00_minus_p20 | p11_minus_p21
283 fistpl C(r_sstepy) // r_sstepx | ystepdenominv | xstepdenominv |
284 // p00_minus_p20 | p11_minus_p21
285 fistpl C(r_sstepx) // ystepdenominv | xstepdenominv | p00_minus_p20 |
286 // p11_minus_p21
288 // t0 = r_p0[3] - r_p2[3];
289 // t1 = r_p1[3] - r_p2[3];
291 fildl C(r_p2)+12 // r_p2[3] | ystepdenominv | xstepdenominv |
292 // p00_minus_p20 | p11_minus_p21
293 fildl C(r_p0)+12 // r_p0[3] | r_p2[3] | ystepdenominv |
294 // xstepdenominv | p00_minus_p20 | p11_minus_p21
295 fildl C(r_p1)+12 // r_p1[3] | r_p0[3] | r_p2[3] | ystepdenominv |
296 // xstepdenominv | p00_minus_p20 | p11_minus_p21
297 fxch %st(2) // r_p2[3] | r_p0[3] | r_p1[3] | ystepdenominv |
298 // xstepdenominv | p00_minus_p20 | p11_minus_p21
299 fld %st(0) // r_p2[3] | r_p2[3] | r_p0[3] | r_p1[3] |
300 // ystepdenominv | xstepdenominv | p00_minus_p20 |
301 // p11_minus_p21
302 fsubrp %st(0),%st(2) // r_p2[3] | t0 | r_p1[3] | ystepdenominv |
303 // xstepdenominv | p00_minus_p20 | p11_minus_p21
304 fsubrp %st(0),%st(2) // t0 | t1 | ystepdenominv | xstepdenominv |
305 // p00_minus_p20 | p11_minus_p21
307 // r_tstepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) *
308 // xstepdenominv);
309 // r_tstepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) *
310 // ystepdenominv);
312 fld %st(0) // t0 | t0 | t1 | ystepdenominv | xstepdenominv |
313 // p00_minus_p20 | p11_minus_p21
314 fmul %st(6),%st(0) // t0*p11_minus_p21 | t0 | t1 | ystepdenominv |
315 // xstepdenominv | p00_minus_p20 | p11_minus_p21
316 fxch %st(2) // t1 | t0 | t0*p11_minus_p21 | ystepdenominv |
317 // xstepdenominv | p00_minus_p20 | p11_minus_p21
318 fld %st(0) // t1 | t1 | t0 | t0*p11_minus_p21 |
319 // ystepdenominv | xstepdenominv | p00_minus_p20 |
320 // p11_minus_p21
321 fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 |
322 // ystepdenominv | xstepdenominv | p00_minus_p20 |
323 // p11_minus_p21
324 fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 |
325 // ystepdenominv | xstepdenominv | p00_minus_p20 |
326 // p11_minus_p21
327 fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
328 // t0*p11_minus_p21 | ystepdenominv |
329 // xstepdenominv | p00_minus_p20 | p11_minus_p21
330 fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
331 // t0*p11_minus_p21 | ystepdenominv |
332 // xstepdenominv | p00_minus_p20 | p11_minus_p21
333 fmul %st(6),%st(0) // t1*p00_minus_p20 | t0*p10_minus_p20 |
334 // t1*p01_minus_p21 | t0*p11_minus_p21 |
335 // ystepdenominv | xstepdenominv | p00_minus_p20 |
336 // p11_minus_p21
337 fxch %st(2) // t1*p01_minus_p21 | t0*p10_minus_p20 |
338 // t1*p00_minus_p20 | t0*p11_minus_p21 |
339 // ystepdenominv | xstepdenominv | p00_minus_p20 |
340 // p11_minus_p21
341 fsubp %st(0),%st(3) // t0*p10_minus_p20 | t1*p00_minus_p20 |
342 // t1*p01_minus_p21 - t0*p11_minus_p21 |
343 // ystepdenominv | xstepdenominv | p00_minus_p20 |
344 // p11_minus_p21
345 fsubrp %st(0),%st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 |
346 // t1*p01_minus_p21 - t0*p11_minus_p21 |
347 // ystepdenominv | xstepdenominv | p00_minus_p20 |
348 // p11_minus_p21
349 fmul %st(2),%st(0) // (t1*p00_minus_p20 - t0*p10_minus_p20)*
350 // ystepdenominv |
351 // t1*p01_minus_p21 - t0*p11_minus_p21 |
352 // ystepdenominv | xstepdenominv | p00_minus_p20 |
353 // p11_minus_p21
354 fxch %st(1) // t1*p01_minus_p21 - t0*p11_minus_p21 |
355 // (t1*p00_minus_p20 - t0*p10_minus_p20)*
356 // ystepdenominv | ystepdenominv |
357 // xstepdenominv | p00_minus_p20 | p11_minus_p21
358 fmul %st(3),%st(0) // (t1*p01_minus_p21 - t0*p11_minus_p21)*
359 // xstepdenominv |
360 // (t1*p00_minus_p20 - t0*p10_minus_p20)*
361 // ystepdenominv | ystepdenominv |
362 // xstepdenominv | p00_minus_p20 | p11_minus_p21
363 fxch %st(1) // (t1*p00_minus_p20 - t0*p10_minus_p20)*
364 // ystepdenominv |
365 // (t1*p01_minus_p21 - t0*p11_minus_p21)*
366 // xstepdenominv | ystepdenominv |
367 // xstepdenominv | p00_minus_p20 | p11_minus_p21
368 fistpl C(r_tstepy) // r_tstepx | ystepdenominv | xstepdenominv |
369 // p00_minus_p20 | p11_minus_p21
370 fistpl C(r_tstepx) // ystepdenominv | xstepdenominv | p00_minus_p20 |
371 // p11_minus_p21
373 // t0 = r_p0[5] - r_p2[5];
374 // t1 = r_p1[5] - r_p2[5];
376 fildl C(r_p2)+20 // r_p2[5] | ystepdenominv | xstepdenominv |
377 // p00_minus_p20 | p11_minus_p21
378 fildl C(r_p0)+20 // r_p0[5] | r_p2[5] | ystepdenominv |
379 // xstepdenominv | p00_minus_p20 | p11_minus_p21
380 fildl C(r_p1)+20 // r_p1[5] | r_p0[5] | r_p2[5] | ystepdenominv |
381 // xstepdenominv | p00_minus_p20 | p11_minus_p21
382 fxch %st(2) // r_p2[5] | r_p0[5] | r_p1[5] | ystepdenominv |
383 // xstepdenominv | p00_minus_p20 | p11_minus_p21
384 fld %st(0) // r_p2[5] | r_p2[5] | r_p0[5] | r_p1[5] |
385 // ystepdenominv | xstepdenominv | p00_minus_p20 |
386 // p11_minus_p21
387 fsubrp %st(0),%st(2) // r_p2[5] | t0 | r_p1[5] | ystepdenominv |
388 // xstepdenominv | p00_minus_p20 | p11_minus_p21
389 fsubrp %st(0),%st(2) // t0 | t1 | ystepdenominv | xstepdenominv |
390 // p00_minus_p20 | p11_minus_p21
392 // r_zistepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) *
393 // xstepdenominv);
394 // r_zistepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) *
395 // ystepdenominv);
397 fld %st(0) // t0 | t0 | t1 | ystepdenominv | xstepdenominv |
398 // p00_minus_p20 | p11_minus_p21
399 fmulp %st(0),%st(6) // t0 | t1 | ystepdenominv | xstepdenominv |
400 // p00_minus_p20 | t0*p11_minus_p21
401 fxch %st(1) // t1 | t0 | ystepdenominv | xstepdenominv |
402 // p00_minus_p20 | t0*p11_minus_p21
403 fld %st(0) // t1 | t1 | t0 | ystepdenominv | xstepdenominv |
404 // p00_minus_p20 | t0*p11_minus_p21
405 fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | ystepdenominv |
406 // xstepdenominv | p00_minus_p20 |
407 // t0*p11_minus_p21
408 fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | ystepdenominv |
409 // xstepdenominv | p00_minus_p20 |
410 // t0*p11_minus_p21
411 fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
412 // ystepdenominv | xstepdenominv | p00_minus_p20 |
413 // t0*p11_minus_p21
414 fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
415 // ystepdenominv | xstepdenominv | p00_minus_p20 |
416 // t0*p11_minus_p21
417 fmulp %st(0),%st(5) // t0*p10_minus_p20 | t1*p01_minus_p21 |
418 // ystepdenominv | xstepdenominv |
419 // t1*p00_minus_p20 | t0*p11_minus_p21
420 fxch %st(5) // t0*p11_minus_p21 | t1*p01_minus_p21 |
421 // ystepdenominv | xstepdenominv |
422 // t1*p00_minus_p20 | t0*p10_minus_p20
423 fsubrp %st(0),%st(1) // t1*p01_minus_p21 - t0*p11_minus_p21 |
424 // ystepdenominv | xstepdenominv |
425 // t1*p00_minus_p20 | t0*p10_minus_p20
426 fxch %st(3) // t1*p00_minus_p20 | ystepdenominv |
427 // xstepdenominv |
428 // t1*p01_minus_p21 - t0*p11_minus_p21 |
429 // t0*p10_minus_p20
430 fsubp %st(0),%st(4) // ystepdenominv | xstepdenominv |
431 // t1*p01_minus_p21 - t0*p11_minus_p21 |
432 // t1*p00_minus_p20 - t0*p10_minus_p20
433 fxch %st(1) // xstepdenominv | ystepdenominv |
434 // t1*p01_minus_p21 - t0*p11_minus_p21 |
435 // t1*p00_minus_p20 - t0*p10_minus_p20
436 fmulp %st(0),%st(2) // ystepdenominv |
437 // (t1*p01_minus_p21 - t0*p11_minus_p21) *
438 // xstepdenominv |
439 // t1*p00_minus_p20 - t0*p10_minus_p20
440 fmulp %st(0),%st(2) // (t1*p01_minus_p21 - t0*p11_minus_p21) *
441 // xstepdenominv |
442 // (t1*p00_minus_p20 - t0*p10_minus_p20) *
443 // ystepdenominv
444 fistpl C(r_zistepx) // (t1*p00_minus_p20 - t0*p10_minus_p20) *
445 // ystepdenominv
446 fistpl C(r_zistepy)
448 // a_sstepxfrac = r_sstepx << 16;
449 // a_tstepxfrac = r_tstepx << 16;
451 // a_ststepxwhole = r_affinetridesc.skinwidth * (r_tstepx >> 16) +
452 // (r_sstepx >> 16);
454 movl C(r_sstepx),%eax
455 movl C(r_tstepx),%edx
456 shll $16,%eax
457 shll $16,%edx
458 movl %eax,C(a_sstepxfrac)
459 movl %edx,C(a_tstepxfrac)
461 movl C(r_sstepx),%ecx
462 movl C(r_tstepx),%eax
463 sarl $16,%ecx
464 sarl $16,%eax
465 imull skinwidth(%esp)
466 addl %ecx,%eax
467 movl %eax,C(a_ststepxwhole)
472 //----------------------------------------------------------------------
473 // recursive subdivision affine triangle drawing code
475 // not C-callable because of stdcall return
476 //----------------------------------------------------------------------
478 #define lp1 4+16
479 #define lp2 8+16
480 #define lp3 12+16
482 .globl C(D_PolysetRecursiveTriangle)
483 C(D_PolysetRecursiveTriangle):
484 pushl %ebp // preserve caller stack frame pointer
485 pushl %esi // preserve register variables
486 pushl %edi
487 pushl %ebx
489 // int *temp;
490 // int d;
491 // int new[6];
492 // int i;
493 // int z;
494 // short *zbuf;
495 movl lp2(%esp),%esi
496 movl lp1(%esp),%ebx
497 movl lp3(%esp),%edi
499 // d = lp2[0] - lp1[0];
500 // if (d < -1 || d > 1)
501 // goto split;
502 movl 0(%esi),%eax
504 movl 0(%ebx),%edx
505 movl 4(%esi),%ebp
507 subl %edx,%eax
508 movl 4(%ebx),%ecx
510 subl %ecx,%ebp
511 incl %eax
513 cmpl $2,%eax
514 ja LSplit
516 // d = lp2[1] - lp1[1];
517 // if (d < -1 || d > 1)
518 // goto split;
519 movl 0(%edi),%eax
520 incl %ebp
522 cmpl $2,%ebp
523 ja LSplit
525 // d = lp3[0] - lp2[0];
526 // if (d < -1 || d > 1)
527 // goto split2;
528 movl 0(%esi),%edx
529 movl 4(%edi),%ebp
531 subl %edx,%eax
532 movl 4(%esi),%ecx
534 subl %ecx,%ebp
535 incl %eax
537 cmpl $2,%eax
538 ja LSplit2
540 // d = lp3[1] - lp2[1];
541 // if (d < -1 || d > 1)
542 // goto split2;
543 movl 0(%ebx),%eax
544 incl %ebp
546 cmpl $2,%ebp
547 ja LSplit2
549 // d = lp1[0] - lp3[0];
550 // if (d < -1 || d > 1)
551 // goto split3;
552 movl 0(%edi),%edx
553 movl 4(%ebx),%ebp
555 subl %edx,%eax
556 movl 4(%edi),%ecx
558 subl %ecx,%ebp
559 incl %eax
561 incl %ebp
562 movl %ebx,%edx
564 cmpl $2,%eax
565 ja LSplit3
567 // d = lp1[1] - lp3[1];
568 // if (d < -1 || d > 1)
569 // {
570 //split3:
571 // temp = lp1;
572 // lp3 = lp2;
573 // lp1 = lp3;
574 // lp2 = temp;
575 // goto split;
576 // }
578 // return; // entire tri is filled
580 cmpl $2,%ebp
581 jna LDone
583 LSplit3:
584 movl %edi,%ebx
585 movl %esi,%edi
586 movl %edx,%esi
587 jmp LSplit
589 //split2:
590 LSplit2:
592 // temp = lp1;
593 // lp1 = lp2;
594 // lp2 = lp3;
595 // lp3 = temp;
596 movl %ebx,%eax
597 movl %esi,%ebx
598 movl %edi,%esi
599 movl %eax,%edi
601 //split:
602 LSplit:
604 subl $24,%esp // allocate space for a new vertex
606 //// split this edge
607 // new[0] = (lp1[0] + lp2[0]) >> 1;
608 // new[1] = (lp1[1] + lp2[1]) >> 1;
609 // new[2] = (lp1[2] + lp2[2]) >> 1;
610 // new[3] = (lp1[3] + lp2[3]) >> 1;
611 // new[5] = (lp1[5] + lp2[5]) >> 1;
612 movl 8(%ebx),%eax
614 movl 8(%esi),%edx
615 movl 12(%ebx),%ecx
617 addl %edx,%eax
618 movl 12(%esi),%edx
620 sarl $1,%eax
621 addl %edx,%ecx
623 movl %eax,8(%esp)
624 movl 20(%ebx),%eax
626 sarl $1,%ecx
627 movl 20(%esi),%edx
629 movl %ecx,12(%esp)
630 addl %edx,%eax
632 movl 0(%ebx),%ecx
633 movl 0(%esi),%edx
635 sarl $1,%eax
636 addl %ecx,%edx
638 movl %eax,20(%esp)
639 movl 4(%ebx),%eax
641 sarl $1,%edx
642 movl 4(%esi),%ebp
644 movl %edx,0(%esp)
645 addl %eax,%ebp
647 sarl $1,%ebp
648 movl %ebp,4(%esp)
650 //// draw the point if splitting a leading edge
651 // if (lp2[1] > lp1[1])
652 // goto nodraw;
653 cmpl %eax,4(%esi)
654 jg LNoDraw
656 // if ((lp2[1] == lp1[1]) && (lp2[0] < lp1[0]))
657 // goto nodraw;
658 movl 0(%esi),%edx
659 jnz LDraw
661 cmpl %ecx,%edx
662 jl LNoDraw
664 LDraw:
666 // z = new[5] >> 16;
667 movl 20(%esp),%edx
668 movl 4(%esp),%ecx
670 sarl $16,%edx
671 movl 0(%esp),%ebp
673 // zbuf = zspantable[new[1]] + new[0];
674 movl C(zspantable)(,%ecx,4),%eax
676 // if (z >= *zbuf)
677 // {
678 cmpw (%eax,%ebp,2),%dx
679 jnge LNoDraw
681 // int pix;
683 // *zbuf = z;
684 movw %dx,(%eax,%ebp,2)
686 // pix = d_pcolormap[skintable[new[3]>>16][new[2]>>16]];
687 movl 12(%esp),%eax
689 sarl $16,%eax
690 movl 8(%esp),%edx
692 sarl $16,%edx
693 subl %ecx,%ecx
695 movl C(skintable)(,%eax,4),%eax
696 movl 4(%esp),%ebp
698 movb (%eax,%edx,),%cl
699 movl C(d_pcolormap),%edx
701 movb (%edx,%ecx,),%dl
702 movl 0(%esp),%ecx
704 // d_viewbuffer[d_scantable[new[1]] + new[0]] = pix;
705 movl C(d_scantable)(,%ebp,4),%eax
706 addl %eax,%ecx
707 movl C(d_viewbuffer),%eax
708 movb %dl,(%eax,%ecx,1)
710 // }
712 //nodraw:
713 LNoDraw:
715 //// recursively continue
716 // D_PolysetRecursiveTriangle (lp3, lp1, new);
717 pushl %esp
718 pushl %ebx
719 pushl %edi
720 call C(D_PolysetRecursiveTriangle)
722 // D_PolysetRecursiveTriangle (lp3, new, lp2);
723 movl %esp,%ebx
724 pushl %esi
725 pushl %ebx
726 pushl %edi
727 call C(D_PolysetRecursiveTriangle)
728 addl $24,%esp
730 LDone:
731 popl %ebx // restore register variables
732 popl %edi
733 popl %esi
734 popl %ebp // restore caller stack frame pointer
735 ret $12
738 //----------------------------------------------------------------------
739 // 8-bpp horizontal span drawing code for affine polygons, with smooth
740 // shading and no transparency
741 //----------------------------------------------------------------------
743 #define pspans 4+8
745 .globl C(D_PolysetAff8Start)
746 C(D_PolysetAff8Start):
748 .globl C(D_PolysetDrawSpans8)
749 C(D_PolysetDrawSpans8):
750 pushl %esi // preserve register variables
751 pushl %ebx
753 movl pspans(%esp),%esi // point to the first span descriptor
754 movl C(r_zistepx),%ecx
756 pushl %ebp // preserve caller's stack frame
757 pushl %edi
759 rorl $16,%ecx // put high 16 bits of 1/z step in low word
760 movl spanpackage_t_count(%esi),%edx
762 movl %ecx,lzistepx
764 LSpanLoop:
766 // lcount = d_aspancount - pspanpackage->count;
768 // errorterm += erroradjustup;
769 // if (errorterm >= 0)
770 // {
771 // d_aspancount += d_countextrastep;
772 // errorterm -= erroradjustdown;
773 // }
774 // else
775 // {
776 // d_aspancount += ubasestep;
777 // }
778 movl C(d_aspancount),%eax
779 subl %edx,%eax
781 movl C(erroradjustup),%edx
782 movl C(errorterm),%ebx
783 addl %edx,%ebx
784 js LNoTurnover
786 movl C(erroradjustdown),%edx
787 movl C(d_countextrastep),%edi
788 subl %edx,%ebx
789 movl C(d_aspancount),%ebp
790 movl %ebx,C(errorterm)
791 addl %edi,%ebp
792 movl %ebp,C(d_aspancount)
793 jmp LRightEdgeStepped
795 LNoTurnover:
796 movl C(d_aspancount),%edi
797 movl C(ubasestep),%edx
798 movl %ebx,C(errorterm)
799 addl %edx,%edi
800 movl %edi,C(d_aspancount)
802 LRightEdgeStepped:
803 cmpl $1,%eax
805 jl LNextSpan
806 jz LExactlyOneLong
809 // set up advancetable
811 movl C(a_ststepxwhole),%ecx
812 movl C(r_affinetridesc)+atd_skinwidth,%edx
814 movl %ecx,advancetable+4 // advance base in t
815 addl %edx,%ecx
817 movl %ecx,advancetable // advance extra in t
818 movl C(a_tstepxfrac),%ecx
820 movw C(r_lstepx),%cx
821 movl %eax,%edx // count
823 movl %ecx,tstep
824 addl $7,%edx
826 shrl $3,%edx // count of full and partial loops
827 movl spanpackage_t_sfrac(%esi),%ebx
829 movw %dx,%bx
830 movl spanpackage_t_pz(%esi),%ecx
832 negl %eax
834 movl spanpackage_t_pdest(%esi),%edi
835 andl $7,%eax // 0->0, 1->7, 2->6, ... , 7->1
837 subl %eax,%edi // compensate for hardwired offsets
838 subl %eax,%ecx
840 subl %eax,%ecx
841 movl spanpackage_t_tfrac(%esi),%edx
843 movw spanpackage_t_light(%esi),%dx
844 movl spanpackage_t_zi(%esi),%ebp
846 rorl $16,%ebp // put high 16 bits of 1/z in low word
847 pushl %esi
849 movl spanpackage_t_ptex(%esi),%esi
850 jmp *aff8entryvec_table(,%eax,4)
852 // %bx = count of full and partial loops
853 // %ebx high word = sfrac
854 // %ecx = pz
855 // %dx = light
856 // %edx high word = tfrac
857 // %esi = ptex
858 // %edi = pdest
859 // %ebp = 1/z
860 // tstep low word = C(r_lstepx)
861 // tstep high word = C(a_tstepxfrac)
862 // C(a_sstepxfrac) low word = 0
863 // C(a_sstepxfrac) high word = C(a_sstepxfrac)
865 LDrawLoop:
867 // FIXME: do we need to clamp light? We may need at least a buffer bit to
868 // keep it from poking into tfrac and causing problems
870 LDraw8:
871 cmpw (%ecx),%bp
872 jl Lp1
873 xorl %eax,%eax
874 movb %dh,%ah
875 movb (%esi),%al
876 movw %bp,(%ecx)
877 movb 0x12345678(%eax),%al
878 LPatch8:
879 movb %al,(%edi)
880 Lp1:
881 addl tstep,%edx
882 sbbl %eax,%eax
883 addl lzistepx,%ebp
884 adcl $0,%ebp
885 addl C(a_sstepxfrac),%ebx
886 adcl advancetable+4(,%eax,4),%esi
888 LDraw7:
889 cmpw 2(%ecx),%bp
890 jl Lp2
891 xorl %eax,%eax
892 movb %dh,%ah
893 movb (%esi),%al
894 movw %bp,2(%ecx)
895 movb 0x12345678(%eax),%al
896 LPatch7:
897 movb %al,1(%edi)
898 Lp2:
899 addl tstep,%edx
900 sbbl %eax,%eax
901 addl lzistepx,%ebp
902 adcl $0,%ebp
903 addl C(a_sstepxfrac),%ebx
904 adcl advancetable+4(,%eax,4),%esi
906 LDraw6:
907 cmpw 4(%ecx),%bp
908 jl Lp3
909 xorl %eax,%eax
910 movb %dh,%ah
911 movb (%esi),%al
912 movw %bp,4(%ecx)
913 movb 0x12345678(%eax),%al
914 LPatch6:
915 movb %al,2(%edi)
916 Lp3:
917 addl tstep,%edx
918 sbbl %eax,%eax
919 addl lzistepx,%ebp
920 adcl $0,%ebp
921 addl C(a_sstepxfrac),%ebx
922 adcl advancetable+4(,%eax,4),%esi
924 LDraw5:
925 cmpw 6(%ecx),%bp
926 jl Lp4
927 xorl %eax,%eax
928 movb %dh,%ah
929 movb (%esi),%al
930 movw %bp,6(%ecx)
931 movb 0x12345678(%eax),%al
932 LPatch5:
933 movb %al,3(%edi)
934 Lp4:
935 addl tstep,%edx
936 sbbl %eax,%eax
937 addl lzistepx,%ebp
938 adcl $0,%ebp
939 addl C(a_sstepxfrac),%ebx
940 adcl advancetable+4(,%eax,4),%esi
942 LDraw4:
943 cmpw 8(%ecx),%bp
944 jl Lp5
945 xorl %eax,%eax
946 movb %dh,%ah
947 movb (%esi),%al
948 movw %bp,8(%ecx)
949 movb 0x12345678(%eax),%al
950 LPatch4:
951 movb %al,4(%edi)
952 Lp5:
953 addl tstep,%edx
954 sbbl %eax,%eax
955 addl lzistepx,%ebp
956 adcl $0,%ebp
957 addl C(a_sstepxfrac),%ebx
958 adcl advancetable+4(,%eax,4),%esi
960 LDraw3:
961 cmpw 10(%ecx),%bp
962 jl Lp6
963 xorl %eax,%eax
964 movb %dh,%ah
965 movb (%esi),%al
966 movw %bp,10(%ecx)
967 movb 0x12345678(%eax),%al
968 LPatch3:
969 movb %al,5(%edi)
970 Lp6:
971 addl tstep,%edx
972 sbbl %eax,%eax
973 addl lzistepx,%ebp
974 adcl $0,%ebp
975 addl C(a_sstepxfrac),%ebx
976 adcl advancetable+4(,%eax,4),%esi
978 LDraw2:
979 cmpw 12(%ecx),%bp
980 jl Lp7
981 xorl %eax,%eax
982 movb %dh,%ah
983 movb (%esi),%al
984 movw %bp,12(%ecx)
985 movb 0x12345678(%eax),%al
986 LPatch2:
987 movb %al,6(%edi)
988 Lp7:
989 addl tstep,%edx
990 sbbl %eax,%eax
991 addl lzistepx,%ebp
992 adcl $0,%ebp
993 addl C(a_sstepxfrac),%ebx
994 adcl advancetable+4(,%eax,4),%esi
996 LDraw1:
997 cmpw 14(%ecx),%bp
998 jl Lp8
999 xorl %eax,%eax
1000 movb %dh,%ah
1001 movb (%esi),%al
1002 movw %bp,14(%ecx)
1003 movb 0x12345678(%eax),%al
1004 LPatch1:
1005 movb %al,7(%edi)
1006 Lp8:
1007 addl tstep,%edx
1008 sbbl %eax,%eax
1009 addl lzistepx,%ebp
1010 adcl $0,%ebp
1011 addl C(a_sstepxfrac),%ebx
1012 adcl advancetable+4(,%eax,4),%esi
1014 addl $8,%edi
1015 addl $16,%ecx
1017 decw %bx
1018 jnz LDrawLoop
1020 popl %esi // restore spans pointer
1021 LNextSpan:
1022 addl $(spanpackage_t_size),%esi // point to next span
1023 LNextSpanESISet:
1024 movl spanpackage_t_count(%esi),%edx
1025 cmpl $-999999,%edx // any more spans?
1026 jnz LSpanLoop // yes
1028 popl %edi
1029 popl %ebp // restore the caller's stack frame
1030 popl %ebx // restore register variables
1031 popl %esi
1035 // draw a one-long span
1037 LExactlyOneLong:
1039 movl spanpackage_t_pz(%esi),%ecx
1040 movl spanpackage_t_zi(%esi),%ebp
1042 rorl $16,%ebp // put high 16 bits of 1/z in low word
1043 movl spanpackage_t_ptex(%esi),%ebx
1045 cmpw (%ecx),%bp
1046 jl LNextSpan
1047 xorl %eax,%eax
1048 movl spanpackage_t_pdest(%esi),%edi
1049 movb spanpackage_t_light+1(%esi),%ah
1050 addl $(spanpackage_t_size),%esi // point to next span
1051 movb (%ebx),%al
1052 movw %bp,(%ecx)
1053 movb 0x12345678(%eax),%al
1054 LPatch9:
1055 movb %al,(%edi)
1057 jmp LNextSpanESISet
1059 .globl C(D_PolysetAff8End)
1060 C(D_PolysetAff8End):
1063 #define pcolormap 4
1065 .globl C(D_Aff8Patch)
1066 C(D_Aff8Patch):
1067 movl pcolormap(%esp),%eax
1068 movl %eax,LPatch1-4
1069 movl %eax,LPatch2-4
1070 movl %eax,LPatch3-4
1071 movl %eax,LPatch4-4
1072 movl %eax,LPatch5-4
1073 movl %eax,LPatch6-4
1074 movl %eax,LPatch7-4
1075 movl %eax,LPatch8-4
1076 movl %eax,LPatch9-4
1081 //----------------------------------------------------------------------
1082 // Alias model polygon dispatching code, combined with subdivided affine
1083 // triangle drawing code
1084 //----------------------------------------------------------------------
1086 .globl C(D_PolysetDraw)
1087 C(D_PolysetDraw):
1089 // spanpackage_t spans[DPS_MAXSPANS + 1 +
1090 // ((CACHE_SIZE - 1) / sizeof(spanpackage_t)) + 1];
1091 // // one extra because of cache line pretouching
1093 // a_spans = (spanpackage_t *)
1094 // (((long)&spans[0] + CACHE_SIZE - 1) & ~(CACHE_SIZE - 1));
1095 subl $(SPAN_SIZE),%esp
1096 movl %esp,%eax
1097 addl $(CACHE_SIZE - 1),%eax
1098 andl $(~(CACHE_SIZE - 1)),%eax
1099 movl %eax,C(a_spans)
1101 // if (r_affinetridesc.drawtype)
1102 // D_DrawSubdiv ();
1103 // else
1104 // D_DrawNonSubdiv ();
1105 movl C(r_affinetridesc)+atd_drawtype,%eax
1106 testl %eax,%eax
1107 jz C(D_DrawNonSubdiv)
1109 pushl %ebp // preserve caller stack frame pointer
1111 // lnumtriangles = r_affinetridesc.numtriangles;
1112 movl C(r_affinetridesc)+atd_numtriangles,%ebp
1114 pushl %esi // preserve register variables
1115 shll $4,%ebp
1117 pushl %ebx
1118 // ptri = r_affinetridesc.ptriangles;
1119 movl C(r_affinetridesc)+atd_ptriangles,%ebx
1121 pushl %edi
1123 // mtriangle_t *ptri;
1124 // finalvert_t *pfv, *index0, *index1, *index2;
1125 // int i;
1126 // int lnumtriangles;
1127 // int s0, s1, s2;
1129 // pfv = r_affinetridesc.pfinalverts;
1130 movl C(r_affinetridesc)+atd_pfinalverts,%edi
1132 // for (i=0 ; i<lnumtriangles ; i++)
1133 // {
1135 Llooptop:
1137 // index0 = pfv + ptri[i].vertindex[0];
1138 // index1 = pfv + ptri[i].vertindex[1];
1139 // index2 = pfv + ptri[i].vertindex[2];
1140 movl mtri_vertindex-16+0(%ebx,%ebp,),%ecx
1141 movl mtri_vertindex-16+4(%ebx,%ebp,),%esi
1143 shll $(fv_shift),%ecx
1144 movl mtri_vertindex-16+8(%ebx,%ebp,),%edx
1146 shll $(fv_shift),%esi
1147 addl %edi,%ecx
1149 shll $(fv_shift),%edx
1150 addl %edi,%esi
1152 addl %edi,%edx
1154 // if (((index0->v[1]-index1->v[1]) *
1155 // (index0->v[0]-index2->v[0]) -
1156 // (index0->v[0]-index1->v[0])*(index0->v[1]-index2->v[1])) >= 0)
1157 // {
1158 // continue;
1159 // }
1161 // d_pcolormap = &((byte *)acolormap)[index0->v[4] & 0xFF00];
1162 fildl fv_v+4(%ecx) // i0v1
1163 fildl fv_v+4(%esi) // i1v1 | i0v1
1164 fildl fv_v+0(%ecx) // i0v0 | i1v1 | i0v1
1165 fildl fv_v+0(%edx) // i2v0 | i0v0 | i1v1 | i0v1
1166 fxch %st(2) // i1v1 | i0v0 | i2v0 | i0v1
1167 fsubr %st(3),%st(0) // i0v1-i1v1 | i0v0 | i2v0 | i0v1
1168 fildl fv_v+0(%esi) // i1v0 | i0v1-i1v1 | i0v0 | i2v0 | i0v1
1169 fxch %st(2) // i0v0 | i0v1-i1v1 | i1v0 | i2v0 | i0v1
1170 fsub %st(0),%st(3) // i0v0 | i0v1-i1v1 | i1v0 | i0v0-i2v0 | i0v1
1171 fildl fv_v+4(%edx) // i2v1 | i0v0 | i0v1-i1v1 | i1v0 | i0v0-i2v0| i0v1
1172 fxch %st(1) // i0v0 | i2v1 | i0v1-i1v1 | i1v0 | i0v0-i2v0| i0v1
1173 fsubp %st(0),%st(3) // i2v1 | i0v1-i1v1 | i0v0-i1v0 | i0v0-i2v0 | i0v1
1174 fxch %st(1) // i0v1-i1v1 | i2v1 | i0v0-i1v0 | i0v0-i2v0 | i0v1
1175 fmulp %st(0),%st(3) // i2v1 | i0v0-i1v0 | i0v1-i1v1*i0v0-i2v0 | i0v1
1176 fsubrp %st(0),%st(3) // i0v0-i1v0 | i0v1-i1v1*i0v0-i2v0 | i0v1-i2v1
1177 movl fv_v+16(%ecx),%eax
1178 andl $0xFF00,%eax
1179 fmulp %st(0),%st(2) // i0v1-i1v1*i0v0-i2v0 | i0v0-i1v0*i0v1-i2v1
1180 addl C(acolormap),%eax
1181 fsubp %st(0),%st(1) // (i0v1-i1v1)*(i0v0-i2v0)-(i0v0-i1v0)*(i0v1-i2v1)
1182 movl %eax,C(d_pcolormap)
1183 fstps Ltemp
1184 movl Ltemp,%eax
1185 subl $0x80000001,%eax
1186 jc Lskip
1188 // if (ptri[i].facesfront)
1189 // {
1190 // D_PolysetRecursiveTriangle(index0->v, index1->v, index2->v);
1191 movl mtri_facesfront-16(%ebx,%ebp,),%eax
1192 testl %eax,%eax
1193 jz Lfacesback
1195 pushl %edx
1196 pushl %esi
1197 pushl %ecx
1198 call C(D_PolysetRecursiveTriangle)
1200 subl $16,%ebp
1201 jnz Llooptop
1202 jmp Ldone2
1204 // }
1205 // else
1206 // {
1207 Lfacesback:
1209 // s0 = index0->v[2];
1210 // s1 = index1->v[2];
1211 // s2 = index2->v[2];
1212 movl fv_v+8(%ecx),%eax
1213 pushl %eax
1214 movl fv_v+8(%esi),%eax
1215 pushl %eax
1216 movl fv_v+8(%edx),%eax
1217 pushl %eax
1218 pushl %ecx
1219 pushl %edx
1221 // if (index0->flags & ALIAS_ONSEAM)
1222 // index0->v[2] += r_affinetridesc.seamfixupX16;
1223 movl C(r_affinetridesc)+atd_seamfixupX16,%eax
1224 testl $(ALIAS_ONSEAM),fv_flags(%ecx)
1225 jz Lp11
1226 addl %eax,fv_v+8(%ecx)
1227 Lp11:
1229 // if (index1->flags & ALIAS_ONSEAM)
1230 // index1->v[2] += r_affinetridesc.seamfixupX16;
1231 testl $(ALIAS_ONSEAM),fv_flags(%esi)
1232 jz Lp12
1233 addl %eax,fv_v+8(%esi)
1234 Lp12:
1236 // if (index2->flags & ALIAS_ONSEAM)
1237 // index2->v[2] += r_affinetridesc.seamfixupX16;
1238 testl $(ALIAS_ONSEAM),fv_flags(%edx)
1239 jz Lp13
1240 addl %eax,fv_v+8(%edx)
1241 Lp13:
1243 // D_PolysetRecursiveTriangle(index0->v, index1->v, index2->v);
1244 pushl %edx
1245 pushl %esi
1246 pushl %ecx
1247 call C(D_PolysetRecursiveTriangle)
1249 // index0->v[2] = s0;
1250 // index1->v[2] = s1;
1251 // index2->v[2] = s2;
1252 popl %edx
1253 popl %ecx
1254 popl %eax
1255 movl %eax,fv_v+8(%edx)
1256 popl %eax
1257 movl %eax,fv_v+8(%esi)
1258 popl %eax
1259 movl %eax,fv_v+8(%ecx)
1261 // }
1262 // }
1263 Lskip:
1264 subl $16,%ebp
1265 jnz Llooptop
1267 Ldone2:
1268 popl %edi // restore the caller's stack frame
1269 popl %ebx
1270 popl %esi // restore register variables
1271 popl %ebp
1273 addl $(SPAN_SIZE),%esp
1278 //----------------------------------------------------------------------
1279 // Alias model triangle left-edge scanning code
1280 //----------------------------------------------------------------------
1282 #define height 4+16
1284 .globl C(D_PolysetScanLeftEdge)
1285 C(D_PolysetScanLeftEdge):
1286 pushl %ebp // preserve caller stack frame pointer
1287 pushl %esi // preserve register variables
1288 pushl %edi
1289 pushl %ebx
1291 movl height(%esp),%eax
1292 movl C(d_sfrac),%ecx
1293 andl $0xFFFF,%eax
1294 movl C(d_ptex),%ebx
1295 orl %eax,%ecx
1296 movl C(d_pedgespanpackage),%esi
1297 movl C(d_tfrac),%edx
1298 movl C(d_light),%edi
1299 movl C(d_zi),%ebp
1301 // %eax: scratch
1302 // %ebx: d_ptex
1303 // %ecx: d_sfrac in high word, count in low word
1304 // %edx: d_tfrac
1305 // %esi: d_pedgespanpackage, errorterm, scratch alternately
1306 // %edi: d_light
1307 // %ebp: d_zi
1309 // do
1310 // {
1312 LScanLoop:
1314 // d_pedgespanpackage->ptex = ptex;
1315 // d_pedgespanpackage->pdest = d_pdest;
1316 // d_pedgespanpackage->pz = d_pz;
1317 // d_pedgespanpackage->count = d_aspancount;
1318 // d_pedgespanpackage->light = d_light;
1319 // d_pedgespanpackage->zi = d_zi;
1320 // d_pedgespanpackage->sfrac = d_sfrac << 16;
1321 // d_pedgespanpackage->tfrac = d_tfrac << 16;
1322 movl %ebx,spanpackage_t_ptex(%esi)
1323 movl C(d_pdest),%eax
1324 movl %eax,spanpackage_t_pdest(%esi)
1325 movl C(d_pz),%eax
1326 movl %eax,spanpackage_t_pz(%esi)
1327 movl C(d_aspancount),%eax
1328 movl %eax,spanpackage_t_count(%esi)
1329 movl %edi,spanpackage_t_light(%esi)
1330 movl %ebp,spanpackage_t_zi(%esi)
1331 movl %ecx,spanpackage_t_sfrac(%esi)
1332 movl %edx,spanpackage_t_tfrac(%esi)
1334 // pretouch the next cache line
1335 movb spanpackage_t_size(%esi),%al
1337 // d_pedgespanpackage++;
1338 addl $(spanpackage_t_size),%esi
1339 movl C(erroradjustup),%eax
1340 movl %esi,C(d_pedgespanpackage)
1342 // errorterm += erroradjustup;
1343 movl C(errorterm),%esi
1344 addl %eax,%esi
1345 movl C(d_pdest),%eax
1347 // if (errorterm >= 0)
1348 // {
1349 js LNoLeftEdgeTurnover
1351 // errorterm -= erroradjustdown;
1352 // d_pdest += d_pdestextrastep;
1353 subl C(erroradjustdown),%esi
1354 addl C(d_pdestextrastep),%eax
1355 movl %esi,C(errorterm)
1356 movl %eax,C(d_pdest)
1358 // d_pz += d_pzextrastep;
1359 // d_aspancount += d_countextrastep;
1360 // d_ptex += d_ptexextrastep;
1361 // d_sfrac += d_sfracextrastep;
1362 // d_ptex += d_sfrac >> 16;
1363 // d_sfrac &= 0xFFFF;
1364 // d_tfrac += d_tfracextrastep;
1365 movl C(d_pz),%eax
1366 movl C(d_aspancount),%esi
1367 addl C(d_pzextrastep),%eax
1368 addl C(d_sfracextrastep),%ecx
1369 adcl C(d_ptexextrastep),%ebx
1370 addl C(d_countextrastep),%esi
1371 movl %eax,C(d_pz)
1372 movl C(d_tfracextrastep),%eax
1373 movl %esi,C(d_aspancount)
1374 addl %eax,%edx
1376 // if (d_tfrac & 0x10000)
1377 // {
1378 jnc LSkip1
1380 // d_ptex += r_affinetridesc.skinwidth;
1381 // d_tfrac &= 0xFFFF;
1382 addl C(r_affinetridesc)+atd_skinwidth,%ebx
1384 // }
1386 LSkip1:
1388 // d_light += d_lightextrastep;
1389 // d_zi += d_ziextrastep;
1390 addl C(d_lightextrastep),%edi
1391 addl C(d_ziextrastep),%ebp
1393 // }
1394 movl C(d_pedgespanpackage),%esi
1395 decl %ecx
1396 testl $0xFFFF,%ecx
1397 jnz LScanLoop
1399 popl %ebx
1400 popl %edi
1401 popl %esi
1402 popl %ebp
1405 // else
1406 // {
1408 LNoLeftEdgeTurnover:
1409 movl %esi,C(errorterm)
1411 // d_pdest += d_pdestbasestep;
1412 addl C(d_pdestbasestep),%eax
1413 movl %eax,C(d_pdest)
1415 // d_pz += d_pzbasestep;
1416 // d_aspancount += ubasestep;
1417 // d_ptex += d_ptexbasestep;
1418 // d_sfrac += d_sfracbasestep;
1419 // d_ptex += d_sfrac >> 16;
1420 // d_sfrac &= 0xFFFF;
1421 movl C(d_pz),%eax
1422 movl C(d_aspancount),%esi
1423 addl C(d_pzbasestep),%eax
1424 addl C(d_sfracbasestep),%ecx
1425 adcl C(d_ptexbasestep),%ebx
1426 addl C(ubasestep),%esi
1427 movl %eax,C(d_pz)
1428 movl %esi,C(d_aspancount)
1430 // d_tfrac += d_tfracbasestep;
1431 movl C(d_tfracbasestep),%esi
1432 addl %esi,%edx
1434 // if (d_tfrac & 0x10000)
1435 // {
1436 jnc LSkip2
1438 // d_ptex += r_affinetridesc.skinwidth;
1439 // d_tfrac &= 0xFFFF;
1440 addl C(r_affinetridesc)+atd_skinwidth,%ebx
1442 // }
1444 LSkip2:
1446 // d_light += d_lightbasestep;
1447 // d_zi += d_zibasestep;
1448 addl C(d_lightbasestep),%edi
1449 addl C(d_zibasestep),%ebp
1451 // }
1452 // } while (--height);
1453 movl C(d_pedgespanpackage),%esi
1454 decl %ecx
1455 testl $0xFFFF,%ecx
1456 jnz LScanLoop
1458 popl %ebx
1459 popl %edi
1460 popl %esi
1461 popl %ebp
1465 //----------------------------------------------------------------------
1466 // Alias model vertex drawing code
1467 //----------------------------------------------------------------------
1469 #define fv 4+8
1470 #define numverts 8+8
1472 .globl C(D_PolysetDrawFinalVerts)
1473 C(D_PolysetDrawFinalVerts):
1474 pushl %ebp // preserve caller stack frame pointer
1475 pushl %ebx
1477 // int i, z;
1478 // short *zbuf;
1480 movl numverts(%esp),%ecx
1481 movl fv(%esp),%ebx
1483 pushl %esi // preserve register variables
1484 pushl %edi
1486 LFVLoop:
1488 // for (i=0 ; i<numverts ; i++, fv++)
1489 // {
1490 // // valid triangle coordinates for filling can include the bottom and
1491 // // right clip edges, due to the fill rule; these shouldn't be drawn
1492 // if ((fv->v[0] < r_refdef.vrectright) &&
1493 // (fv->v[1] < r_refdef.vrectbottom))
1494 // {
1495 movl fv_v+0(%ebx),%eax
1496 movl C(r_refdef)+rd_vrectright,%edx
1497 cmpl %edx,%eax
1498 jge LNextVert
1499 movl fv_v+4(%ebx),%esi
1500 movl C(r_refdef)+rd_vrectbottom,%edx
1501 cmpl %edx,%esi
1502 jge LNextVert
1504 // zbuf = zspantable[fv->v[1]] + fv->v[0];
1505 movl C(zspantable)(,%esi,4),%edi
1507 // z = fv->v[5]>>16;
1508 movl fv_v+20(%ebx),%edx
1509 shrl $16,%edx
1511 // if (z >= *zbuf)
1512 // {
1513 // int pix;
1514 cmpw (%edi,%eax,2),%dx
1515 jl LNextVert
1517 // *zbuf = z;
1518 movw %dx,(%edi,%eax,2)
1520 // pix = skintable[fv->v[3]>>16][fv->v[2]>>16];
1521 movl fv_v+12(%ebx),%edi
1522 shrl $16,%edi
1523 movl C(skintable)(,%edi,4),%edi
1524 movl fv_v+8(%ebx),%edx
1525 shrl $16,%edx
1526 movb (%edi,%edx),%dl
1528 // pix = ((byte *)acolormap)[pix + (fv->v[4] & 0xFF00)];
1529 movl fv_v+16(%ebx),%edi
1530 andl $0xFF00,%edi
1531 andl $0x00FF,%edx
1532 addl %edx,%edi
1533 movl C(acolormap),%edx
1534 movb (%edx,%edi,1),%dl
1536 // d_viewbuffer[d_scantable[fv->v[1]] + fv->v[0]] = pix;
1537 movl C(d_scantable)(,%esi,4),%edi
1538 movl C(d_viewbuffer),%esi
1539 addl %eax,%edi
1540 movb %dl,(%esi,%edi)
1542 // }
1543 // }
1544 // }
1545 LNextVert:
1546 addl $(fv_size),%ebx
1547 decl %ecx
1548 jnz LFVLoop
1550 popl %edi
1551 popl %esi
1552 popl %ebx
1553 popl %ebp
1557 //----------------------------------------------------------------------
1558 // Alias model non-subdivided polygon dispatching code
1560 // not C-callable because of stack buffer cleanup
1561 //----------------------------------------------------------------------
1563 .globl C(D_DrawNonSubdiv)
1564 C(D_DrawNonSubdiv):
1565 pushl %ebp // preserve caller stack frame pointer
1566 movl C(r_affinetridesc)+atd_numtriangles,%ebp
1567 pushl %ebx
1568 shll $(mtri_shift),%ebp
1569 pushl %esi // preserve register variables
1570 movl C(r_affinetridesc)+atd_ptriangles,%esi
1571 pushl %edi
1573 // mtriangle_t *ptri;
1574 // finalvert_t *pfv, *index0, *index1, *index2;
1575 // int i;
1576 // int lnumtriangles;
1578 // pfv = r_affinetridesc.pfinalverts;
1579 // ptri = r_affinetridesc.ptriangles;
1580 // lnumtriangles = r_affinetridesc.numtriangles;
1582 LNDLoop:
1584 // for (i=0 ; i<lnumtriangles ; i++, ptri++)
1585 // {
1586 // index0 = pfv + ptri->vertindex[0];
1587 // index1 = pfv + ptri->vertindex[1];
1588 // index2 = pfv + ptri->vertindex[2];
1589 movl C(r_affinetridesc)+atd_pfinalverts,%edi
1590 movl mtri_vertindex+0-mtri_size(%esi,%ebp,1),%ecx
1591 shll $(fv_shift),%ecx
1592 movl mtri_vertindex+4-mtri_size(%esi,%ebp,1),%edx
1593 shll $(fv_shift),%edx
1594 movl mtri_vertindex+8-mtri_size(%esi,%ebp,1),%ebx
1595 shll $(fv_shift),%ebx
1596 addl %edi,%ecx
1597 addl %edi,%edx
1598 addl %edi,%ebx
1600 // d_xdenom = (index0->v[1]-index1->v[1]) *
1601 // (index0->v[0]-index2->v[0]) -
1602 // (index0->v[0]-index1->v[0])*(index0->v[1]-index2->v[1]);
1603 movl fv_v+4(%ecx),%eax
1604 movl fv_v+0(%ecx),%esi
1605 subl fv_v+4(%edx),%eax
1606 subl fv_v+0(%ebx),%esi
1607 imull %esi,%eax
1608 movl fv_v+0(%ecx),%esi
1609 movl fv_v+4(%ecx),%edi
1610 subl fv_v+0(%edx),%esi
1611 subl fv_v+4(%ebx),%edi
1612 imull %esi,%edi
1613 subl %edi,%eax
1615 // if (d_xdenom >= 0)
1616 // {
1617 // continue;
1618 jns LNextTri
1620 // }
1622 movl %eax,C(d_xdenom)
1623 fildl C(d_xdenom)
1625 // r_p0[0] = index0->v[0]; // u
1626 // r_p0[1] = index0->v[1]; // v
1627 // r_p0[2] = index0->v[2]; // s
1628 // r_p0[3] = index0->v[3]; // t
1629 // r_p0[4] = index0->v[4]; // light
1630 // r_p0[5] = index0->v[5]; // iz
1631 movl fv_v+0(%ecx),%eax
1632 movl fv_v+4(%ecx),%esi
1633 movl %eax,C(r_p0)+0
1634 movl %esi,C(r_p0)+4
1635 movl fv_v+8(%ecx),%eax
1636 movl fv_v+12(%ecx),%esi
1637 movl %eax,C(r_p0)+8
1638 movl %esi,C(r_p0)+12
1639 movl fv_v+16(%ecx),%eax
1640 movl fv_v+20(%ecx),%esi
1641 movl %eax,C(r_p0)+16
1642 movl %esi,C(r_p0)+20
1644 fdivrs float_1
1646 // r_p1[0] = index1->v[0];
1647 // r_p1[1] = index1->v[1];
1648 // r_p1[2] = index1->v[2];
1649 // r_p1[3] = index1->v[3];
1650 // r_p1[4] = index1->v[4];
1651 // r_p1[5] = index1->v[5];
1652 movl fv_v+0(%edx),%eax
1653 movl fv_v+4(%edx),%esi
1654 movl %eax,C(r_p1)+0
1655 movl %esi,C(r_p1)+4
1656 movl fv_v+8(%edx),%eax
1657 movl fv_v+12(%edx),%esi
1658 movl %eax,C(r_p1)+8
1659 movl %esi,C(r_p1)+12
1660 movl fv_v+16(%edx),%eax
1661 movl fv_v+20(%edx),%esi
1662 movl %eax,C(r_p1)+16
1663 movl %esi,C(r_p1)+20
1665 // r_p2[0] = index2->v[0];
1666 // r_p2[1] = index2->v[1];
1667 // r_p2[2] = index2->v[2];
1668 // r_p2[3] = index2->v[3];
1669 // r_p2[4] = index2->v[4];
1670 // r_p2[5] = index2->v[5];
1671 movl fv_v+0(%ebx),%eax
1672 movl fv_v+4(%ebx),%esi
1673 movl %eax,C(r_p2)+0
1674 movl %esi,C(r_p2)+4
1675 movl fv_v+8(%ebx),%eax
1676 movl fv_v+12(%ebx),%esi
1677 movl %eax,C(r_p2)+8
1678 movl %esi,C(r_p2)+12
1679 movl fv_v+16(%ebx),%eax
1680 movl fv_v+20(%ebx),%esi
1681 movl %eax,C(r_p2)+16
1682 movl C(r_affinetridesc)+atd_ptriangles,%edi
1683 movl %esi,C(r_p2)+20
1684 movl mtri_facesfront-mtri_size(%edi,%ebp,1),%eax
1686 // if (!ptri->facesfront)
1687 // {
1688 testl %eax,%eax
1689 jnz LFacesFront
1691 // if (index0->flags & ALIAS_ONSEAM)
1692 // r_p0[2] += r_affinetridesc.seamfixupX16;
1693 movl fv_flags(%ecx),%eax
1694 movl fv_flags(%edx),%esi
1695 movl fv_flags(%ebx),%edi
1696 testl $(ALIAS_ONSEAM),%eax
1697 movl C(r_affinetridesc)+atd_seamfixupX16,%eax
1698 jz LOnseamDone0
1699 addl %eax,C(r_p0)+8
1700 LOnseamDone0:
1702 // if (index1->flags & ALIAS_ONSEAM)
1703 // r_p1[2] += r_affinetridesc.seamfixupX16;
1704 testl $(ALIAS_ONSEAM),%esi
1705 jz LOnseamDone1
1706 addl %eax,C(r_p1)+8
1707 LOnseamDone1:
1709 // if (index2->flags & ALIAS_ONSEAM)
1710 // r_p2[2] += r_affinetridesc.seamfixupX16;
1711 testl $(ALIAS_ONSEAM),%edi
1712 jz LOnseamDone2
1713 addl %eax,C(r_p2)+8
1714 LOnseamDone2:
1716 // }
1718 LFacesFront:
1720 fstps C(d_xdenom)
1722 // D_PolysetSetEdgeTable ();
1723 // D_RasterizeAliasPolySmooth ();
1724 call C(D_PolysetSetEdgeTable)
1725 call C(D_RasterizeAliasPolySmooth)
1727 LNextTri:
1728 movl C(r_affinetridesc)+atd_ptriangles,%esi
1729 subl $16,%ebp
1730 jnz LNDLoop
1731 // }
1733 popl %edi
1734 popl %esi
1735 popl %ebx
1736 popl %ebp
1738 addl $(SPAN_SIZE),%esp
1743 #endif // id386