Indentation cleanup.
[AROS-Contrib.git] / Games / Quake / d_parta.s
blob8c6491208f3652ac847e1d78d668bb59c0d2ec6c
1 /*
2 Copyright (C) 1996-1997 Id Software, Inc.
4 This program is free software; you can redistribute it and/or
5 modify it under the terms of the GNU General Public License
6 as published by the Free Software Foundation; either version 2
7 of the License, or (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
13 See the GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21 // d_parta.s
22 // x86 assembly-language 8-bpp particle-drawing code.
25 #include "asm_i386.h"
26 #include "quakeasm.h"
27 #include "d_ifacea.h"
28 #include "asm_draw.h"
30 #if id386
32 //----------------------------------------------------------------------
33 // 8-bpp particle drawing code.
34 //----------------------------------------------------------------------
36 //FIXME: comments, full optimization
38 //----------------------------------------------------------------------
39 // 8-bpp particle queueing code.
40 //----------------------------------------------------------------------
42 .text
44 #define P 12+4
46 .align 4
47 .globl C(D_DrawParticle)
48 C(D_DrawParticle):
49 pushl %ebp // preserve caller's stack frame
50 pushl %edi // preserve register variables
51 pushl %ebx
53 movl P(%esp),%edi
55 // FIXME: better FP overlap in general here
57 // transform point
58 // VectorSubtract (p->org, r_origin, local);
59 flds C(r_origin)
60 fsubrs pt_org(%edi)
61 flds pt_org+4(%edi)
62 fsubs C(r_origin)+4
63 flds pt_org+8(%edi)
64 fsubs C(r_origin)+8
65 fxch %st(2) // local[0] | local[1] | local[2]
67 // transformed[2] = DotProduct(local, r_ppn);
68 flds C(r_ppn) // r_ppn[0] | local[0] | local[1] | local[2]
69 fmul %st(1),%st(0) // dot0 | local[0] | local[1] | local[2]
70 flds C(r_ppn)+4 // r_ppn[1] | dot0 | local[0] | local[1] | local[2]
71 fmul %st(3),%st(0) // dot1 | dot0 | local[0] | local[1] | local[2]
72 flds C(r_ppn)+8 // r_ppn[2] | dot1 | dot0 | local[0] |
73 // local[1] | local[2]
74 fmul %st(5),%st(0) // dot2 | dot1 | dot0 | local[0] | local[1] | local[2]
75 fxch %st(2) // dot0 | dot1 | dot2 | local[0] | local[1] | local[2]
76 faddp %st(0),%st(1) // dot0 + dot1 | dot2 | local[0] | local[1] |
77 // local[2]
78 faddp %st(0),%st(1) // z | local[0] | local[1] | local[2]
79 fld %st(0) // z | z | local[0] | local[1] |
80 // local[2]
81 fdivrs float_1 // 1/z | z | local[0] | local[1] | local[2]
82 fxch %st(1) // z | 1/z | local[0] | local[1] | local[2]
84 // if (transformed[2] < PARTICLE_Z_CLIP)
85 // return;
86 fcomps float_particle_z_clip // 1/z | local[0] | local[1] | local[2]
87 fxch %st(3) // local[2] | local[0] | local[1] | 1/z
89 flds C(r_pup) // r_pup[0] | local[2] | local[0] | local[1] | 1/z
90 fmul %st(2),%st(0) // dot0 | local[2] | local[0] | local[1] | 1/z
91 flds C(r_pup)+4 // r_pup[1] | dot0 | local[2] | local[0] |
92 // local[1] | 1/z
94 fnstsw %ax
95 testb $1,%ah
96 jnz LPop6AndDone
98 // transformed[1] = DotProduct(local, r_pup);
99 fmul %st(4),%st(0) // dot1 | dot0 | local[2] | local[0] | local[1] | 1/z
100 flds C(r_pup)+8 // r_pup[2] | dot1 | dot0 | local[2] |
101 // local[0] | local[1] | 1/z
102 fmul %st(3),%st(0) // dot2 | dot1 | dot0 | local[2] | local[0] |
103 // local[1] | 1/z
104 fxch %st(2) // dot0 | dot1 | dot2 | local[2] | local[0] |
105 // local[1] | 1/z
106 faddp %st(0),%st(1) // dot0 + dot1 | dot2 | local[2] | local[0] |
107 // local[1] | 1/z
108 faddp %st(0),%st(1) // y | local[2] | local[0] | local[1] | 1/z
109 fxch %st(3) // local[1] | local[2] | local[0] | y | 1/z
111 // transformed[0] = DotProduct(local, r_pright);
112 fmuls C(r_pright)+4 // dot1 | local[2] | local[0] | y | 1/z
113 fxch %st(2) // local[0] | local[2] | dot1 | y | 1/z
114 fmuls C(r_pright) // dot0 | local[2] | dot1 | y | 1/z
115 fxch %st(1) // local[2] | dot0 | dot1 | y | 1/z
116 fmuls C(r_pright)+8 // dot2 | dot0 | dot1 | y | 1/z
117 fxch %st(2) // dot1 | dot0 | dot2 | y | 1/z
118 faddp %st(0),%st(1) // dot1 + dot0 | dot2 | y | 1/z
120 faddp %st(0),%st(1) // x | y | 1/z
121 fxch %st(1) // y | x | 1/z
123 // project the point
124 fmul %st(2),%st(0) // y/z | x | 1/z
125 fxch %st(1) // x | y/z | 1/z
126 fmul %st(2),%st(0) // x/z | y/z | 1/z
127 fxch %st(1) // y/z | x/z | 1/z
128 fsubrs C(ycenter) // v | x/z | 1/z
129 fxch %st(1) // x/z | v | 1/z
130 fadds C(xcenter) // u | v | 1/z
131 // FIXME: preadjust xcenter and ycenter
132 fxch %st(1) // v | u | 1/z
133 fadds float_point5 // v | u | 1/z
134 fxch %st(1) // u | v | 1/z
135 fadds float_point5 // u | v | 1/z
136 fxch %st(2) // 1/z | v | u
137 fmuls DP_32768 // 1/z * 0x8000 | v | u
138 fxch %st(2) // u | v | 1/z * 0x8000
140 // FIXME: use Terje's fp->int trick here?
141 // FIXME: check we're getting proper rounding here
142 fistpl DP_u // v | 1/z * 0x8000
143 fistpl DP_v // 1/z * 0x8000
145 movl DP_u,%eax
146 movl DP_v,%edx
148 // if ((v > d_vrectbottom_particle) ||
149 // (u > d_vrectright_particle) ||
150 // (v < d_vrecty) ||
151 // (u < d_vrectx))
152 // {
153 // continue;
154 // }
156 movl C(d_vrectbottom_particle),%ebx
157 movl C(d_vrectright_particle),%ecx
158 cmpl %ebx,%edx
159 jg LPop1AndDone
160 cmpl %ecx,%eax
161 jg LPop1AndDone
162 movl C(d_vrecty),%ebx
163 movl C(d_vrectx),%ecx
164 cmpl %ebx,%edx
165 jl LPop1AndDone
167 cmpl %ecx,%eax
168 jl LPop1AndDone
170 flds pt_color(%edi) // color | 1/z * 0x8000
171 // FIXME: use Terje's fast fp->int trick?
172 fistpl DP_Color // 1/z * 0x8000
174 movl C(d_viewbuffer),%ebx
176 addl %eax,%ebx
177 movl C(d_scantable)(,%edx,4),%edi // point to the pixel
179 imull C(d_zrowbytes),%edx // point to the z pixel
181 leal (%edx,%eax,2),%edx
182 movl C(d_pzbuffer),%eax
184 fistpl izi
186 addl %ebx,%edi
187 addl %eax,%edx
189 // pix = izi >> d_pix_shift;
191 movl izi,%eax
192 movl C(d_pix_shift),%ecx
193 shrl %cl,%eax
194 movl izi,%ebp
196 // if (pix < d_pix_min)
197 // pix = d_pix_min;
198 // else if (pix > d_pix_max)
199 // pix = d_pix_max;
201 movl C(d_pix_min),%ebx
202 movl C(d_pix_max),%ecx
203 cmpl %ebx,%eax
204 jnl LTestPixMax
205 movl %ebx,%eax
206 jmp LTestDone
208 LTestPixMax:
209 cmpl %ecx,%eax
210 jng LTestDone
211 movl %ecx,%eax
212 LTestDone:
214 movb DP_Color,%ch
216 movl C(d_y_aspect_shift),%ebx
217 testl %ebx,%ebx
218 jnz LDefault
220 cmpl $4,%eax
221 ja LDefault
223 jmp *DP_EntryTable-4(,%eax,4)
225 // 1x1
226 .globl DP_1x1
227 DP_1x1:
228 cmpw %bp,(%edx) // just one pixel to do
229 jg LDone
230 movw %bp,(%edx)
231 movb %ch,(%edi)
232 jmp LDone
234 // 2x2
235 .globl DP_2x2
236 DP_2x2:
237 pushl %esi
238 movl C(screenwidth),%ebx
239 movl C(d_zrowbytes),%esi
241 cmpw %bp,(%edx)
242 jg L2x2_1
243 movw %bp,(%edx)
244 movb %ch,(%edi)
245 L2x2_1:
246 cmpw %bp,2(%edx)
247 jg L2x2_2
248 movw %bp,2(%edx)
249 movb %ch,1(%edi)
250 L2x2_2:
251 cmpw %bp,(%edx,%esi,1)
252 jg L2x2_3
253 movw %bp,(%edx,%esi,1)
254 movb %ch,(%edi,%ebx,1)
255 L2x2_3:
256 cmpw %bp,2(%edx,%esi,1)
257 jg L2x2_4
258 movw %bp,2(%edx,%esi,1)
259 movb %ch,1(%edi,%ebx,1)
260 L2x2_4:
262 popl %esi
263 jmp LDone
265 // 3x3
266 .globl DP_3x3
267 DP_3x3:
268 pushl %esi
269 movl C(screenwidth),%ebx
270 movl C(d_zrowbytes),%esi
272 cmpw %bp,(%edx)
273 jg L3x3_1
274 movw %bp,(%edx)
275 movb %ch,(%edi)
276 L3x3_1:
277 cmpw %bp,2(%edx)
278 jg L3x3_2
279 movw %bp,2(%edx)
280 movb %ch,1(%edi)
281 L3x3_2:
282 cmpw %bp,4(%edx)
283 jg L3x3_3
284 movw %bp,4(%edx)
285 movb %ch,2(%edi)
286 L3x3_3:
288 cmpw %bp,(%edx,%esi,1)
289 jg L3x3_4
290 movw %bp,(%edx,%esi,1)
291 movb %ch,(%edi,%ebx,1)
292 L3x3_4:
293 cmpw %bp,2(%edx,%esi,1)
294 jg L3x3_5
295 movw %bp,2(%edx,%esi,1)
296 movb %ch,1(%edi,%ebx,1)
297 L3x3_5:
298 cmpw %bp,4(%edx,%esi,1)
299 jg L3x3_6
300 movw %bp,4(%edx,%esi,1)
301 movb %ch,2(%edi,%ebx,1)
302 L3x3_6:
304 cmpw %bp,(%edx,%esi,2)
305 jg L3x3_7
306 movw %bp,(%edx,%esi,2)
307 movb %ch,(%edi,%ebx,2)
308 L3x3_7:
309 cmpw %bp,2(%edx,%esi,2)
310 jg L3x3_8
311 movw %bp,2(%edx,%esi,2)
312 movb %ch,1(%edi,%ebx,2)
313 L3x3_8:
314 cmpw %bp,4(%edx,%esi,2)
315 jg L3x3_9
316 movw %bp,4(%edx,%esi,2)
317 movb %ch,2(%edi,%ebx,2)
318 L3x3_9:
320 popl %esi
321 jmp LDone
324 // 4x4
325 .globl DP_4x4
326 DP_4x4:
327 pushl %esi
328 movl C(screenwidth),%ebx
329 movl C(d_zrowbytes),%esi
331 cmpw %bp,(%edx)
332 jg L4x4_1
333 movw %bp,(%edx)
334 movb %ch,(%edi)
335 L4x4_1:
336 cmpw %bp,2(%edx)
337 jg L4x4_2
338 movw %bp,2(%edx)
339 movb %ch,1(%edi)
340 L4x4_2:
341 cmpw %bp,4(%edx)
342 jg L4x4_3
343 movw %bp,4(%edx)
344 movb %ch,2(%edi)
345 L4x4_3:
346 cmpw %bp,6(%edx)
347 jg L4x4_4
348 movw %bp,6(%edx)
349 movb %ch,3(%edi)
350 L4x4_4:
352 cmpw %bp,(%edx,%esi,1)
353 jg L4x4_5
354 movw %bp,(%edx,%esi,1)
355 movb %ch,(%edi,%ebx,1)
356 L4x4_5:
357 cmpw %bp,2(%edx,%esi,1)
358 jg L4x4_6
359 movw %bp,2(%edx,%esi,1)
360 movb %ch,1(%edi,%ebx,1)
361 L4x4_6:
362 cmpw %bp,4(%edx,%esi,1)
363 jg L4x4_7
364 movw %bp,4(%edx,%esi,1)
365 movb %ch,2(%edi,%ebx,1)
366 L4x4_7:
367 cmpw %bp,6(%edx,%esi,1)
368 jg L4x4_8
369 movw %bp,6(%edx,%esi,1)
370 movb %ch,3(%edi,%ebx,1)
371 L4x4_8:
373 leal (%edx,%esi,2),%edx
374 leal (%edi,%ebx,2),%edi
376 cmpw %bp,(%edx)
377 jg L4x4_9
378 movw %bp,(%edx)
379 movb %ch,(%edi)
380 L4x4_9:
381 cmpw %bp,2(%edx)
382 jg L4x4_10
383 movw %bp,2(%edx)
384 movb %ch,1(%edi)
385 L4x4_10:
386 cmpw %bp,4(%edx)
387 jg L4x4_11
388 movw %bp,4(%edx)
389 movb %ch,2(%edi)
390 L4x4_11:
391 cmpw %bp,6(%edx)
392 jg L4x4_12
393 movw %bp,6(%edx)
394 movb %ch,3(%edi)
395 L4x4_12:
397 cmpw %bp,(%edx,%esi,1)
398 jg L4x4_13
399 movw %bp,(%edx,%esi,1)
400 movb %ch,(%edi,%ebx,1)
401 L4x4_13:
402 cmpw %bp,2(%edx,%esi,1)
403 jg L4x4_14
404 movw %bp,2(%edx,%esi,1)
405 movb %ch,1(%edi,%ebx,1)
406 L4x4_14:
407 cmpw %bp,4(%edx,%esi,1)
408 jg L4x4_15
409 movw %bp,4(%edx,%esi,1)
410 movb %ch,2(%edi,%ebx,1)
411 L4x4_15:
412 cmpw %bp,6(%edx,%esi,1)
413 jg L4x4_16
414 movw %bp,6(%edx,%esi,1)
415 movb %ch,3(%edi,%ebx,1)
416 L4x4_16:
418 popl %esi
419 jmp LDone
421 // default case, handling any size particle
422 LDefault:
424 // count = pix << d_y_aspect_shift;
426 movl %eax,%ebx
427 movl %eax,DP_Pix
428 movb C(d_y_aspect_shift),%cl
429 shll %cl,%ebx
431 // for ( ; count ; count--, pz += d_zwidth, pdest += screenwidth)
432 // {
433 // for (i=0 ; i<pix ; i++)
434 // {
435 // if (pz[i] <= izi)
436 // {
437 // pz[i] = izi;
438 // pdest[i] = color;
439 // }
440 // }
441 // }
443 LGenRowLoop:
444 movl DP_Pix,%eax
446 LGenColLoop:
447 cmpw %bp,-2(%edx,%eax,2)
448 jg LGSkip
449 movw %bp,-2(%edx,%eax,2)
450 movb %ch,-1(%edi,%eax,1)
451 LGSkip:
452 decl %eax // --pix
453 jnz LGenColLoop
455 addl C(d_zrowbytes),%edx
456 addl C(screenwidth),%edi
458 decl %ebx // --count
459 jnz LGenRowLoop
461 LDone:
462 popl %ebx // restore register variables
463 popl %edi
464 popl %ebp // restore the caller's stack frame
467 LPop6AndDone:
468 fstp %st(0)
469 fstp %st(0)
470 fstp %st(0)
471 fstp %st(0)
472 fstp %st(0)
473 LPop1AndDone:
474 fstp %st(0)
475 jmp LDone
477 #endif // id386