2 Copyright (C) 1996-1997 Id Software, Inc.
4 This program is free software; you can redistribute it and/or
5 modify it under the terms of the GNU General Public License
6 as published by the Free Software Foundation; either version 2
7 of the License, or (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
13 See the GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
22 // x86 assembly-language
8-bpp particle-drawing code.
32 //----------------------------------------------------------------------
33 // 8-bpp particle drawing code.
34 //----------------------------------------------------------------------
36 //FIXME
: comments
, full optimization
38 //----------------------------------------------------------------------
39 // 8-bpp particle queueing code.
40 //----------------------------------------------------------------------
47 .globl C(D_DrawParticle)
49 pushl
%ebp
// preserve caller
's stack frame
50 pushl %edi // preserve register variables
55 // FIXME: better FP overlap in general here
58 // VectorSubtract (p->org, r_origin, local);
65 fxch %st(2) // local[0] | local[1] | local[2]
67 // transformed[2] = DotProduct(local, r_ppn);
68 flds C(r_ppn) // r_ppn[0] | local[0] | local[1] | local[2]
69 fmul %st(1),%st(0) // dot0 | local[0] | local[1] | local[2]
70 flds C(r_ppn)+4 // r_ppn[1] | dot0 | local[0] | local[1] | local[2]
71 fmul %st(3),%st(0) // dot1 | dot0 | local[0] | local[1] | local[2]
72 flds C(r_ppn)+8 // r_ppn[2] | dot1 | dot0 | local[0] |
73 // local[1] | local[2]
74 fmul %st(5),%st(0) // dot2 | dot1 | dot0 | local[0] | local[1] | local[2]
75 fxch %st(2) // dot0 | dot1 | dot2 | local[0] | local[1] | local[2]
76 faddp %st(0),%st(1) // dot0 + dot1 | dot2 | local[0] | local[1] |
78 faddp %st(0),%st(1) // z | local[0] | local[1] | local[2]
79 fld %st(0) // z | z | local[0] | local[1] |
81 fdivrs float_1 // 1/z | z | local[0] | local[1] | local[2]
82 fxch %st(1) // z | 1/z | local[0] | local[1] | local[2]
84 // if (transformed[2] < PARTICLE_Z_CLIP)
86 fcomps float_particle_z_clip // 1/z | local[0] | local[1] | local[2]
87 fxch %st(3) // local[2] | local[0] | local[1] | 1/z
89 flds C(r_pup) // r_pup[0] | local[2] | local[0] | local[1] | 1/z
90 fmul %st(2),%st(0) // dot0 | local[2] | local[0] | local[1] | 1/z
91 flds C(r_pup)+4 // r_pup[1] | dot0 | local[2] | local[0] |
98 // transformed[1] = DotProduct(local, r_pup);
99 fmul %st(4),%st(0) // dot1 | dot0 | local[2] | local[0] | local[1] | 1/z
100 flds C(r_pup)+8 // r_pup[2] | dot1 | dot0 | local[2] |
101 // local[0] | local[1] | 1/z
102 fmul %st(3),%st(0) // dot2 | dot1 | dot0 | local[2] | local[0] |
104 fxch %st(2) // dot0 | dot1 | dot2 | local[2] | local[0] |
106 faddp %st(0),%st(1) // dot0 + dot1 | dot2 | local[2] | local[0] |
108 faddp %st(0),%st(1) // y | local[2] | local[0] | local[1] | 1/z
109 fxch %st(3) // local[1] | local[2] | local[0] | y | 1/z
111 // transformed[0] = DotProduct(local, r_pright);
112 fmuls C(r_pright)+4 // dot1 | local[2] | local[0] | y | 1/z
113 fxch %st(2) // local[0] | local[2] | dot1 | y | 1/z
114 fmuls C(r_pright) // dot0 | local[2] | dot1 | y | 1/z
115 fxch %st(1) // local[2] | dot0 | dot1 | y | 1/z
116 fmuls C(r_pright)+8 // dot2 | dot0 | dot1 | y | 1/z
117 fxch %st(2) // dot1 | dot0 | dot2 | y | 1/z
118 faddp %st(0),%st(1) // dot1 + dot0 | dot2 | y | 1/z
120 faddp %st(0),%st(1) // x | y | 1/z
121 fxch %st(1) // y | x | 1/z
124 fmul %st(2),%st(0) // y/z | x | 1/z
125 fxch %st(1) // x | y/z | 1/z
126 fmul %st(2),%st(0) // x/z | y/z | 1/z
127 fxch %st(1) // y/z | x/z | 1/z
128 fsubrs C(ycenter) // v | x/z | 1/z
129 fxch %st(1) // x/z | v | 1/z
130 fadds C(xcenter) // u | v | 1/z
131 // FIXME: preadjust xcenter and ycenter
132 fxch %st(1) // v | u | 1/z
133 fadds float_point5 // v | u | 1/z
134 fxch %st(1) // u | v | 1/z
135 fadds float_point5 // u | v | 1/z
136 fxch %st(2) // 1/z | v | u
137 fmuls DP_32768 // 1/z * 0x8000 | v | u
138 fxch %st(2) // u | v | 1/z * 0x8000
140 // FIXME: use Terje's fp-
>int trick here?
141 // FIXME
: check we
're getting proper rounding here
142 fistpl DP_u // v | 1/z * 0x8000
143 fistpl DP_v // 1/z * 0x8000
148 // if ((v > d_vrectbottom_particle) ||
149 // (u > d_vrectright_particle) ||
156 movl C(d_vrectbottom_particle),%ebx
157 movl C(d_vrectright_particle),%ecx
162 movl C(d_vrecty),%ebx
163 movl C(d_vrectx),%ecx
170 flds pt_color(%edi) // color | 1/z * 0x8000
171 // FIXME: use Terje's fast fp-
>int trick?
172 fistpl DP_Color
// 1/z
* 0x8000
174 movl C
(d_viewbuffer
),%ebx
177 movl C
(d_scantable
)(,%edx
,4),%edi
// point to the pixel
179 imull C
(d_zrowbytes
),%edx
// point to the z pixel
181 leal
(%edx
,%eax
,2),%edx
182 movl C
(d_pzbuffer
),%eax
189 // pix
= izi
>> d_pix_shift;
192 movl C
(d_pix_shift
),%ecx
196 // if
(pix
< d_pix_min
)
198 // else if
(pix
> d_pix_max
)
201 movl C
(d_pix_min
),%ebx
202 movl C
(d_pix_max
),%ecx
216 movl C
(d_y_aspect_shift
),%ebx
223 jmp
*DP_EntryTable-
4(,%eax
,4)
228 cmpw %bp
,(%edx
) // just one pixel to do
238 movl C
(screenwidth
),%ebx
239 movl C
(d_zrowbytes
),%esi
251 cmpw %bp
,(%edx
,%esi
,1)
253 movw
%bp
,(%edx
,%esi
,1)
254 movb
%ch
,(%edi
,%ebx
,1)
256 cmpw %bp
,2(%edx
,%esi
,1)
258 movw
%bp
,2(%edx
,%esi
,1)
259 movb
%ch
,1(%edi
,%ebx
,1)
269 movl C
(screenwidth
),%ebx
270 movl C
(d_zrowbytes
),%esi
288 cmpw %bp
,(%edx
,%esi
,1)
290 movw
%bp
,(%edx
,%esi
,1)
291 movb
%ch
,(%edi
,%ebx
,1)
293 cmpw %bp
,2(%edx
,%esi
,1)
295 movw
%bp
,2(%edx
,%esi
,1)
296 movb
%ch
,1(%edi
,%ebx
,1)
298 cmpw %bp
,4(%edx
,%esi
,1)
300 movw
%bp
,4(%edx
,%esi
,1)
301 movb
%ch
,2(%edi
,%ebx
,1)
304 cmpw %bp
,(%edx
,%esi
,2)
306 movw
%bp
,(%edx
,%esi
,2)
307 movb
%ch
,(%edi
,%ebx
,2)
309 cmpw %bp
,2(%edx
,%esi
,2)
311 movw
%bp
,2(%edx
,%esi
,2)
312 movb
%ch
,1(%edi
,%ebx
,2)
314 cmpw %bp
,4(%edx
,%esi
,2)
316 movw
%bp
,4(%edx
,%esi
,2)
317 movb
%ch
,2(%edi
,%ebx
,2)
328 movl C
(screenwidth
),%ebx
329 movl C
(d_zrowbytes
),%esi
352 cmpw %bp
,(%edx
,%esi
,1)
354 movw
%bp
,(%edx
,%esi
,1)
355 movb
%ch
,(%edi
,%ebx
,1)
357 cmpw %bp
,2(%edx
,%esi
,1)
359 movw
%bp
,2(%edx
,%esi
,1)
360 movb
%ch
,1(%edi
,%ebx
,1)
362 cmpw %bp
,4(%edx
,%esi
,1)
364 movw
%bp
,4(%edx
,%esi
,1)
365 movb
%ch
,2(%edi
,%ebx
,1)
367 cmpw %bp
,6(%edx
,%esi
,1)
369 movw
%bp
,6(%edx
,%esi
,1)
370 movb
%ch
,3(%edi
,%ebx
,1)
373 leal
(%edx
,%esi
,2),%edx
374 leal
(%edi
,%ebx
,2),%edi
397 cmpw %bp
,(%edx
,%esi
,1)
399 movw
%bp
,(%edx
,%esi
,1)
400 movb
%ch
,(%edi
,%ebx
,1)
402 cmpw %bp
,2(%edx
,%esi
,1)
404 movw
%bp
,2(%edx
,%esi
,1)
405 movb
%ch
,1(%edi
,%ebx
,1)
407 cmpw %bp
,4(%edx
,%esi
,1)
409 movw
%bp
,4(%edx
,%esi
,1)
410 movb
%ch
,2(%edi
,%ebx
,1)
412 cmpw %bp
,6(%edx
,%esi
,1)
414 movw
%bp
,6(%edx
,%esi
,1)
415 movb
%ch
,3(%edi
,%ebx
,1)
421 // default case
, handling any size particle
424 // count
= pix
<< d_y_aspect_shift;
428 movb C
(d_y_aspect_shift
),%cl
431 // for
( ; count ; count-
-, pz
+= d_zwidth
, pdest
+= screenwidth
)
433 // for
(i
=0 ; i
<pix ; i+
+)
447 cmpw %bp
,-2(%edx
,%eax
,2)
449 movw
%bp
,-2(%edx
,%eax
,2)
450 movb
%ch
,-1(%edi
,%eax
,1)
455 addl C
(d_zrowbytes
),%edx
456 addl C
(screenwidth
),%edi
462 popl
%ebx
// restore register variables
464 popl
%ebp
// restore the caller
's stack frame