Fix for the problem that SDL applications exited
[AROS-Contrib.git] / Games / Quake / d_draw.s
blobf0a6e2aafb7a7ec5f401863f9d4ebbded68e78eb
1 /*
2 Copyright (C) 1996-1997 Id Software, Inc.
4 This program is free software; you can redistribute it and/or
5 modify it under the terms of the GNU General Public License
6 as published by the Free Software Foundation; either version 2
7 of the License, or (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
13 See the GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21 // d_draw.s
22 // x86 assembly-language horizontal 8-bpp span-drawing code.
25 #include "asm_i386.h"
26 #include "quakeasm.h"
27 #include "asm_draw.h"
28 #include "d_ifacea.h"
30 #if id386
32 //----------------------------------------------------------------------
33 // 8-bpp horizontal span drawing code for polygons, with no transparency.
35 // Assumes there is at least one span in pspans, and that every span
36 // contains at least one pixel
37 //----------------------------------------------------------------------
39 .text
41 // out-of-line, rarely-needed clamping code
43 LClampHigh0:
44 movl C(bbextents),%esi
45 jmp LClampReentry0
46 LClampHighOrLow0:
47 jg LClampHigh0
48 xorl %esi,%esi
49 jmp LClampReentry0
51 LClampHigh1:
52 movl C(bbextentt),%edx
53 jmp LClampReentry1
54 LClampHighOrLow1:
55 jg LClampHigh1
56 xorl %edx,%edx
57 jmp LClampReentry1
59 LClampLow2:
60 movl $2048,%ebp
61 jmp LClampReentry2
62 LClampHigh2:
63 movl C(bbextents),%ebp
64 jmp LClampReentry2
66 LClampLow3:
67 movl $2048,%ecx
68 jmp LClampReentry3
69 LClampHigh3:
70 movl C(bbextentt),%ecx
71 jmp LClampReentry3
73 LClampLow4:
74 movl $2048,%eax
75 jmp LClampReentry4
76 LClampHigh4:
77 movl C(bbextents),%eax
78 jmp LClampReentry4
80 LClampLow5:
81 movl $2048,%ebx
82 jmp LClampReentry5
83 LClampHigh5:
84 movl C(bbextentt),%ebx
85 jmp LClampReentry5
88 #define pspans 4+16
90 .align 4
91 .globl C(D_DrawSpans8)
92 C(D_DrawSpans8):
93 pushl %ebp // preserve caller's stack frame
94 pushl %edi
95 pushl %esi // preserve register variables
96 pushl %ebx
99 // set up scaled-by-8 steps, for 8-long segments; also set up cacheblock
100 // and span list pointers
102 // TODO: any overlap from rearranging?
103 flds C(d_sdivzstepu)
104 fmuls fp_8
105 movl C(cacheblock),%edx
106 flds C(d_tdivzstepu)
107 fmuls fp_8
108 movl pspans(%esp),%ebx // point to the first span descriptor
109 flds C(d_zistepu)
110 fmuls fp_8
111 movl %edx,pbase // pbase = cacheblock
112 fstps zi8stepu
113 fstps tdivz8stepu
114 fstps sdivz8stepu
116 LSpanLoop:
118 // set up the initial s/z, t/z, and 1/z on the FP stack, and generate the
119 // initial s and t values
121 // FIXME: pipeline FILD?
122 fildl espan_t_v(%ebx)
123 fildl espan_t_u(%ebx)
125 fld %st(1) // dv | du | dv
126 fmuls C(d_sdivzstepv) // dv*d_sdivzstepv | du | dv
127 fld %st(1) // du | dv*d_sdivzstepv | du | dv
128 fmuls C(d_sdivzstepu) // du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
129 fld %st(2) // du | du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
130 fmuls C(d_tdivzstepu) // du*d_tdivzstepu | du*d_sdivzstepu |
131 // dv*d_sdivzstepv | du | dv
132 fxch %st(1) // du*d_sdivzstepu | du*d_tdivzstepu |
133 // dv*d_sdivzstepv | du | dv
134 faddp %st(0),%st(2) // du*d_tdivzstepu |
135 // du*d_sdivzstepu + dv*d_sdivzstepv | du | dv
136 fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv |
137 // du*d_tdivzstepu | du | dv
138 fld %st(3) // dv | du*d_sdivzstepu + dv*d_sdivzstepv |
139 // du*d_tdivzstepu | du | dv
140 fmuls C(d_tdivzstepv) // dv*d_tdivzstepv |
141 // du*d_sdivzstepu + dv*d_sdivzstepv |
142 // du*d_tdivzstepu | du | dv
143 fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv |
144 // dv*d_tdivzstepv | du*d_tdivzstepu | du | dv
145 fadds C(d_sdivzorigin) // sdivz = d_sdivzorigin + dv*d_sdivzstepv +
146 // du*d_sdivzstepu; stays in %st(2) at end
147 fxch %st(4) // dv | dv*d_tdivzstepv | du*d_tdivzstepu | du |
148 // s/z
149 fmuls C(d_zistepv) // dv*d_zistepv | dv*d_tdivzstepv |
150 // du*d_tdivzstepu | du | s/z
151 fxch %st(1) // dv*d_tdivzstepv | dv*d_zistepv |
152 // du*d_tdivzstepu | du | s/z
153 faddp %st(0),%st(2) // dv*d_zistepv |
154 // dv*d_tdivzstepv + du*d_tdivzstepu | du | s/z
155 fxch %st(2) // du | dv*d_tdivzstepv + du*d_tdivzstepu |
156 // dv*d_zistepv | s/z
157 fmuls C(d_zistepu) // du*d_zistepu |
158 // dv*d_tdivzstepv + du*d_tdivzstepu |
159 // dv*d_zistepv | s/z
160 fxch %st(1) // dv*d_tdivzstepv + du*d_tdivzstepu |
161 // du*d_zistepu | dv*d_zistepv | s/z
162 fadds C(d_tdivzorigin) // tdivz = d_tdivzorigin + dv*d_tdivzstepv +
163 // du*d_tdivzstepu; stays in %st(1) at end
164 fxch %st(2) // dv*d_zistepv | du*d_zistepu | t/z | s/z
165 faddp %st(0),%st(1) // dv*d_zistepv + du*d_zistepu | t/z | s/z
167 flds fp_64k // fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z
168 fxch %st(1) // dv*d_zistepv + du*d_zistepu | fp_64k | t/z | s/z
169 fadds C(d_ziorigin) // zi = d_ziorigin + dv*d_zistepv +
170 // du*d_zistepu; stays in %st(0) at end
171 // 1/z | fp_64k | t/z | s/z
173 // calculate and clamp s & t
175 fdivr %st(0),%st(1) // 1/z | z*64k | t/z | s/z
178 // point %edi to the first pixel in the span
180 movl C(d_viewbuffer),%ecx
181 movl espan_t_v(%ebx),%eax
182 movl %ebx,pspantemp // preserve spans pointer
184 movl C(tadjust),%edx
185 movl C(sadjust),%esi
186 movl C(d_scantable)(,%eax,4),%edi // v * screenwidth
187 addl %ecx,%edi
188 movl espan_t_u(%ebx),%ecx
189 addl %ecx,%edi // pdest = &pdestspan[scans->u];
190 movl espan_t_count(%ebx),%ecx
193 // now start the FDIV for the end of the span
195 cmpl $8,%ecx
196 ja LSetupNotLast1
198 decl %ecx
199 jz LCleanup1 // if only one pixel, no need to start an FDIV
200 movl %ecx,spancountminus1
202 // finish up the s and t calcs
203 fxch %st(1) // z*64k | 1/z | t/z | s/z
205 fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z
206 fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z
207 fxch %st(1) // z*64k | s | 1/z | t/z | s/z
208 fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z
209 fxch %st(1) // s | t | 1/z | t/z | s/z
210 fistpl s // 1/z | t | t/z | s/z
211 fistpl t // 1/z | t/z | s/z
213 fildl spancountminus1
215 flds C(d_tdivzstepu) // C(d_tdivzstepu) | spancountminus1
216 flds C(d_zistepu) // C(d_zistepu) | C(d_tdivzstepu) | spancountminus1
217 fmul %st(2),%st(0) // C(d_zistepu)*scm1 | C(d_tdivzstepu) | scm1
218 fxch %st(1) // C(d_tdivzstepu) | C(d_zistepu)*scm1 | scm1
219 fmul %st(2),%st(0) // C(d_tdivzstepu)*scm1 | C(d_zistepu)*scm1 | scm1
220 fxch %st(2) // scm1 | C(d_zistepu)*scm1 | C(d_tdivzstepu)*scm1
221 fmuls C(d_sdivzstepu) // C(d_sdivzstepu)*scm1 | C(d_zistepu)*scm1 |
222 // C(d_tdivzstepu)*scm1
223 fxch %st(1) // C(d_zistepu)*scm1 | C(d_sdivzstepu)*scm1 |
224 // C(d_tdivzstepu)*scm1
225 faddp %st(0),%st(3) // C(d_sdivzstepu)*scm1 | C(d_tdivzstepu)*scm1
226 fxch %st(1) // C(d_tdivzstepu)*scm1 | C(d_sdivzstepu)*scm1
227 faddp %st(0),%st(3) // C(d_sdivzstepu)*scm1
228 faddp %st(0),%st(3)
230 flds fp_64k
231 fdiv %st(1),%st(0) // this is what we've gone to all this trouble to
232 // overlap
233 jmp LFDIVInFlight1
235 LCleanup1:
236 // finish up the s and t calcs
237 fxch %st(1) // z*64k | 1/z | t/z | s/z
239 fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z
240 fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z
241 fxch %st(1) // z*64k | s | 1/z | t/z | s/z
242 fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z
243 fxch %st(1) // s | t | 1/z | t/z | s/z
244 fistpl s // 1/z | t | t/z | s/z
245 fistpl t // 1/z | t/z | s/z
246 jmp LFDIVInFlight1
248 .align 4
249 LSetupNotLast1:
250 // finish up the s and t calcs
251 fxch %st(1) // z*64k | 1/z | t/z | s/z
253 fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z
254 fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z
255 fxch %st(1) // z*64k | s | 1/z | t/z | s/z
256 fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z
257 fxch %st(1) // s | t | 1/z | t/z | s/z
258 fistpl s // 1/z | t | t/z | s/z
259 fistpl t // 1/z | t/z | s/z
261 fadds zi8stepu
262 fxch %st(2)
263 fadds sdivz8stepu
264 fxch %st(2)
265 flds tdivz8stepu
266 faddp %st(0),%st(2)
267 flds fp_64k
268 fdiv %st(1),%st(0) // z = 1/1/z
269 // this is what we've gone to all this trouble to
270 // overlap
271 LFDIVInFlight1:
273 addl s,%esi
274 addl t,%edx
275 movl C(bbextents),%ebx
276 movl C(bbextentt),%ebp
277 cmpl %ebx,%esi
278 ja LClampHighOrLow0
279 LClampReentry0:
280 movl %esi,s
281 movl pbase,%ebx
282 shll $16,%esi
283 cmpl %ebp,%edx
284 movl %esi,sfracf
285 ja LClampHighOrLow1
286 LClampReentry1:
287 movl %edx,t
288 movl s,%esi // sfrac = scans->sfrac;
289 shll $16,%edx
290 movl t,%eax // tfrac = scans->tfrac;
291 sarl $16,%esi
292 movl %edx,tfracf
295 // calculate the texture starting address
297 sarl $16,%eax
298 movl C(cachewidth),%edx
299 imull %edx,%eax // (tfrac >> 16) * cachewidth
300 addl %ebx,%esi
301 addl %eax,%esi // psource = pbase + (sfrac >> 16) +
302 // ((tfrac >> 16) * cachewidth);
305 // determine whether last span or not
307 cmpl $8,%ecx
308 jna LLastSegment
311 // not the last segment; do full 8-wide segment
313 LNotLastSegment:
316 // advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
317 // get there
320 // pick up after the FDIV that was left in flight previously
322 fld %st(0) // duplicate it
323 fmul %st(4),%st(0) // s = s/z * z
324 fxch %st(1)
325 fmul %st(3),%st(0) // t = t/z * z
326 fxch %st(1)
327 fistpl snext
328 fistpl tnext
329 movl snext,%eax
330 movl tnext,%edx
332 movb (%esi),%bl // get first source texel
333 subl $8,%ecx // count off this segments' pixels
334 movl C(sadjust),%ebp
335 movl %ecx,counttemp // remember count of remaining pixels
337 movl C(tadjust),%ecx
338 movb %bl,(%edi) // store first dest pixel
340 addl %eax,%ebp
341 addl %edx,%ecx
343 movl C(bbextents),%eax
344 movl C(bbextentt),%edx
346 cmpl $2048,%ebp
347 jl LClampLow2
348 cmpl %eax,%ebp
349 ja LClampHigh2
350 LClampReentry2:
352 cmpl $2048,%ecx
353 jl LClampLow3
354 cmpl %edx,%ecx
355 ja LClampHigh3
356 LClampReentry3:
358 movl %ebp,snext
359 movl %ecx,tnext
361 subl s,%ebp
362 subl t,%ecx
365 // set up advancetable
367 movl %ecx,%eax
368 movl %ebp,%edx
369 sarl $19,%eax // tstep >>= 16;
370 jz LZero
371 sarl $19,%edx // sstep >>= 16;
372 movl C(cachewidth),%ebx
373 imull %ebx,%eax
374 jmp LSetUp1
376 LZero:
377 sarl $19,%edx // sstep >>= 16;
378 movl C(cachewidth),%ebx
380 LSetUp1:
382 addl %edx,%eax // add in sstep
383 // (tstep >> 16) * cachewidth + (sstep >> 16);
384 movl tfracf,%edx
385 movl %eax,advancetable+4 // advance base in t
386 addl %ebx,%eax // ((tstep >> 16) + 1) * cachewidth +
387 // (sstep >> 16);
388 shll $13,%ebp // left-justify sstep fractional part
389 movl sfracf,%ebx
390 shll $13,%ecx // left-justify tstep fractional part
391 movl %eax,advancetable // advance extra in t
393 movl %ecx,tstep
394 addl %ecx,%edx // advance tfrac fractional part by tstep frac
396 sbbl %ecx,%ecx // turn tstep carry into -1 (0 if none)
397 addl %ebp,%ebx // advance sfrac fractional part by sstep frac
398 adcl advancetable+4(,%ecx,4),%esi // point to next source texel
400 addl tstep,%edx
401 sbbl %ecx,%ecx
402 movb (%esi),%al
403 addl %ebp,%ebx
404 movb %al,1(%edi)
405 adcl advancetable+4(,%ecx,4),%esi
407 addl tstep,%edx
408 sbbl %ecx,%ecx
409 addl %ebp,%ebx
410 movb (%esi),%al
411 adcl advancetable+4(,%ecx,4),%esi
413 addl tstep,%edx
414 sbbl %ecx,%ecx
415 movb %al,2(%edi)
416 addl %ebp,%ebx
417 movb (%esi),%al
418 adcl advancetable+4(,%ecx,4),%esi
420 addl tstep,%edx
421 sbbl %ecx,%ecx
422 movb %al,3(%edi)
423 addl %ebp,%ebx
424 movb (%esi),%al
425 adcl advancetable+4(,%ecx,4),%esi
429 // start FDIV for end of next segment in flight, so it can overlap
431 movl counttemp,%ecx
432 cmpl $8,%ecx // more than one segment after this?
433 ja LSetupNotLast2 // yes
435 decl %ecx
436 jz LFDIVInFlight2 // if only one pixel, no need to start an FDIV
437 movl %ecx,spancountminus1
438 fildl spancountminus1
440 flds C(d_zistepu) // C(d_zistepu) | spancountminus1
441 fmul %st(1),%st(0) // C(d_zistepu)*scm1 | scm1
442 flds C(d_tdivzstepu) // C(d_tdivzstepu) | C(d_zistepu)*scm1 | scm1
443 fmul %st(2),%st(0) // C(d_tdivzstepu)*scm1 | C(d_zistepu)*scm1 | scm1
444 fxch %st(1) // C(d_zistepu)*scm1 | C(d_tdivzstepu)*scm1 | scm1
445 faddp %st(0),%st(3) // C(d_tdivzstepu)*scm1 | scm1
446 fxch %st(1) // scm1 | C(d_tdivzstepu)*scm1
447 fmuls C(d_sdivzstepu) // C(d_sdivzstepu)*scm1 | C(d_tdivzstepu)*scm1
448 fxch %st(1) // C(d_tdivzstepu)*scm1 | C(d_sdivzstepu)*scm1
449 faddp %st(0),%st(3) // C(d_sdivzstepu)*scm1
450 flds fp_64k // 64k | C(d_sdivzstepu)*scm1
451 fxch %st(1) // C(d_sdivzstepu)*scm1 | 64k
452 faddp %st(0),%st(4) // 64k
454 fdiv %st(1),%st(0) // this is what we've gone to all this trouble to
455 // overlap
456 jmp LFDIVInFlight2
458 .align 4
459 LSetupNotLast2:
460 fadds zi8stepu
461 fxch %st(2)
462 fadds sdivz8stepu
463 fxch %st(2)
464 flds tdivz8stepu
465 faddp %st(0),%st(2)
466 flds fp_64k
467 fdiv %st(1),%st(0) // z = 1/1/z
468 // this is what we've gone to all this trouble to
469 // overlap
470 LFDIVInFlight2:
471 movl %ecx,counttemp
473 addl tstep,%edx
474 sbbl %ecx,%ecx
475 movb %al,4(%edi)
476 addl %ebp,%ebx
477 movb (%esi),%al
478 adcl advancetable+4(,%ecx,4),%esi
480 addl tstep,%edx
481 sbbl %ecx,%ecx
482 movb %al,5(%edi)
483 addl %ebp,%ebx
484 movb (%esi),%al
485 adcl advancetable+4(,%ecx,4),%esi
487 addl tstep,%edx
488 sbbl %ecx,%ecx
489 movb %al,6(%edi)
490 addl %ebp,%ebx
491 movb (%esi),%al
492 adcl advancetable+4(,%ecx,4),%esi
494 addl $8,%edi
495 movl %edx,tfracf
496 movl snext,%edx
497 movl %ebx,sfracf
498 movl tnext,%ebx
499 movl %edx,s
500 movl %ebx,t
502 movl counttemp,%ecx // retrieve count
505 // determine whether last span or not
507 cmpl $8,%ecx // are there multiple segments remaining?
508 movb %al,-1(%edi)
509 ja LNotLastSegment // yes
512 // last segment of scan
514 LLastSegment:
517 // advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
518 // get there. The number of pixels left is variable, and we want to land on the
519 // last pixel, not step one past it, so we can't run into arithmetic problems
521 testl %ecx,%ecx
522 jz LNoSteps // just draw the last pixel and we're done
524 // pick up after the FDIV that was left in flight previously
527 fld %st(0) // duplicate it
528 fmul %st(4),%st(0) // s = s/z * z
529 fxch %st(1)
530 fmul %st(3),%st(0) // t = t/z * z
531 fxch %st(1)
532 fistpl snext
533 fistpl tnext
535 movb (%esi),%al // load first texel in segment
536 movl C(tadjust),%ebx
537 movb %al,(%edi) // store first pixel in segment
538 movl C(sadjust),%eax
540 addl snext,%eax
541 addl tnext,%ebx
543 movl C(bbextents),%ebp
544 movl C(bbextentt),%edx
546 cmpl $2048,%eax
547 jl LClampLow4
548 cmpl %ebp,%eax
549 ja LClampHigh4
550 LClampReentry4:
551 movl %eax,snext
553 cmpl $2048,%ebx
554 jl LClampLow5
555 cmpl %edx,%ebx
556 ja LClampHigh5
557 LClampReentry5:
559 cmpl $1,%ecx // don't bother
560 je LOnlyOneStep // if two pixels in segment, there's only one step,
561 // of the segment length
562 subl s,%eax
563 subl t,%ebx
565 addl %eax,%eax // convert to 15.17 format so multiply by 1.31
566 addl %ebx,%ebx // reciprocal yields 16.48
568 imull reciprocal_table-8(,%ecx,4) // sstep = (snext - s) / (spancount-1)
569 movl %edx,%ebp
571 movl %ebx,%eax
572 imull reciprocal_table-8(,%ecx,4) // tstep = (tnext - t) / (spancount-1)
574 LSetEntryvec:
576 // set up advancetable
578 movl entryvec_table(,%ecx,4),%ebx
579 movl %edx,%eax
580 movl %ebx,jumptemp // entry point into code for RET later
581 movl %ebp,%ecx
582 sarl $16,%edx // tstep >>= 16;
583 movl C(cachewidth),%ebx
584 sarl $16,%ecx // sstep >>= 16;
585 imull %ebx,%edx
587 addl %ecx,%edx // add in sstep
588 // (tstep >> 16) * cachewidth + (sstep >> 16);
589 movl tfracf,%ecx
590 movl %edx,advancetable+4 // advance base in t
591 addl %ebx,%edx // ((tstep >> 16) + 1) * cachewidth +
592 // (sstep >> 16);
593 shll $16,%ebp // left-justify sstep fractional part
594 movl sfracf,%ebx
595 shll $16,%eax // left-justify tstep fractional part
596 movl %edx,advancetable // advance extra in t
598 movl %eax,tstep
599 movl %ecx,%edx
600 addl %eax,%edx
601 sbbl %ecx,%ecx
602 addl %ebp,%ebx
603 adcl advancetable+4(,%ecx,4),%esi
605 jmp *jumptemp // jump to the number-of-pixels handler
607 //----------------------------------------
609 LNoSteps:
610 movb (%esi),%al // load first texel in segment
611 subl $7,%edi // adjust for hardwired offset
612 jmp LEndSpan
615 LOnlyOneStep:
616 subl s,%eax
617 subl t,%ebx
618 movl %eax,%ebp
619 movl %ebx,%edx
620 jmp LSetEntryvec
622 //----------------------------------------
624 .globl Entry2_8
625 Entry2_8:
626 subl $6,%edi // adjust for hardwired offsets
627 movb (%esi),%al
628 jmp LLEntry2_8
630 //----------------------------------------
632 .globl Entry3_8
633 Entry3_8:
634 subl $5,%edi // adjust for hardwired offsets
635 addl %eax,%edx
636 movb (%esi),%al
637 sbbl %ecx,%ecx
638 addl %ebp,%ebx
639 adcl advancetable+4(,%ecx,4),%esi
640 jmp LLEntry3_8
642 //----------------------------------------
644 .globl Entry4_8
645 Entry4_8:
646 subl $4,%edi // adjust for hardwired offsets
647 addl %eax,%edx
648 movb (%esi),%al
649 sbbl %ecx,%ecx
650 addl %ebp,%ebx
651 adcl advancetable+4(,%ecx,4),%esi
652 addl tstep,%edx
653 jmp LLEntry4_8
655 //----------------------------------------
657 .globl Entry5_8
658 Entry5_8:
659 subl $3,%edi // adjust for hardwired offsets
660 addl %eax,%edx
661 movb (%esi),%al
662 sbbl %ecx,%ecx
663 addl %ebp,%ebx
664 adcl advancetable+4(,%ecx,4),%esi
665 addl tstep,%edx
666 jmp LLEntry5_8
668 //----------------------------------------
670 .globl Entry6_8
671 Entry6_8:
672 subl $2,%edi // adjust for hardwired offsets
673 addl %eax,%edx
674 movb (%esi),%al
675 sbbl %ecx,%ecx
676 addl %ebp,%ebx
677 adcl advancetable+4(,%ecx,4),%esi
678 addl tstep,%edx
679 jmp LLEntry6_8
681 //----------------------------------------
683 .globl Entry7_8
684 Entry7_8:
685 decl %edi // adjust for hardwired offsets
686 addl %eax,%edx
687 movb (%esi),%al
688 sbbl %ecx,%ecx
689 addl %ebp,%ebx
690 adcl advancetable+4(,%ecx,4),%esi
691 addl tstep,%edx
692 jmp LLEntry7_8
694 //----------------------------------------
696 .globl Entry8_8
697 Entry8_8:
698 addl %eax,%edx
699 movb (%esi),%al
700 sbbl %ecx,%ecx
701 addl %ebp,%ebx
702 adcl advancetable+4(,%ecx,4),%esi
704 addl tstep,%edx
705 sbbl %ecx,%ecx
706 movb %al,1(%edi)
707 addl %ebp,%ebx
708 movb (%esi),%al
709 adcl advancetable+4(,%ecx,4),%esi
710 addl tstep,%edx
711 LLEntry7_8:
712 sbbl %ecx,%ecx
713 movb %al,2(%edi)
714 addl %ebp,%ebx
715 movb (%esi),%al
716 adcl advancetable+4(,%ecx,4),%esi
717 addl tstep,%edx
718 LLEntry6_8:
719 sbbl %ecx,%ecx
720 movb %al,3(%edi)
721 addl %ebp,%ebx
722 movb (%esi),%al
723 adcl advancetable+4(,%ecx,4),%esi
724 addl tstep,%edx
725 LLEntry5_8:
726 sbbl %ecx,%ecx
727 movb %al,4(%edi)
728 addl %ebp,%ebx
729 movb (%esi),%al
730 adcl advancetable+4(,%ecx,4),%esi
731 addl tstep,%edx
732 LLEntry4_8:
733 sbbl %ecx,%ecx
734 movb %al,5(%edi)
735 addl %ebp,%ebx
736 movb (%esi),%al
737 adcl advancetable+4(,%ecx,4),%esi
738 LLEntry3_8:
739 movb %al,6(%edi)
740 movb (%esi),%al
741 LLEntry2_8:
743 LEndSpan:
746 // clear s/z, t/z, 1/z from FP stack
748 fstp %st(0)
749 fstp %st(0)
750 fstp %st(0)
752 movl pspantemp,%ebx // restore spans pointer
753 movl espan_t_pnext(%ebx),%ebx // point to next span
754 testl %ebx,%ebx // any more spans?
755 movb %al,7(%edi)
756 jnz LSpanLoop // more spans
758 popl %ebx // restore register variables
759 popl %esi
760 popl %edi
761 popl %ebp // restore the caller's stack frame
764 //----------------------------------------------------------------------
765 // 8-bpp horizontal span z drawing codefor polygons, with no transparency.
767 // Assumes there is at least one span in pzspans, and that every span
768 // contains at least one pixel
769 //----------------------------------------------------------------------
771 .text
773 // z-clamp on a non-negative gradient span
774 LClamp:
775 movl $0x40000000,%edx
776 xorl %ebx,%ebx
777 fstp %st(0)
778 jmp LZDraw
780 // z-clamp on a negative gradient span
781 LClampNeg:
782 movl $0x40000000,%edx
783 xorl %ebx,%ebx
784 fstp %st(0)
785 jmp LZDrawNeg
788 #define pzspans 4+16
790 .globl C(D_DrawZSpans)
791 C(D_DrawZSpans):
792 pushl %ebp // preserve caller's stack frame
793 pushl %edi
794 pushl %esi // preserve register variables
795 pushl %ebx
797 flds C(d_zistepu)
798 movl C(d_zistepu),%eax
799 movl pzspans(%esp),%esi
800 testl %eax,%eax
801 jz LFNegSpan
803 fmuls Float2ToThe31nd
804 fistpl izistep // note: we are relying on FP exceptions being turned
805 // off here to avoid range problems
806 movl izistep,%ebx // remains loaded for all spans
808 LFSpanLoop:
809 // set up the initial 1/z value
810 fildl espan_t_v(%esi)
811 fildl espan_t_u(%esi)
812 movl espan_t_v(%esi),%ecx
813 movl C(d_pzbuffer),%edi
814 fmuls C(d_zistepu)
815 fxch %st(1)
816 fmuls C(d_zistepv)
817 fxch %st(1)
818 fadds C(d_ziorigin)
819 imull C(d_zrowbytes),%ecx
820 faddp %st(0),%st(1)
822 // clamp if z is nearer than 2 (1/z > 0.5)
823 fcoms float_point5
824 addl %ecx,%edi
825 movl espan_t_u(%esi),%edx
826 addl %edx,%edx // word count
827 movl espan_t_count(%esi),%ecx
828 addl %edx,%edi // pdest = &pdestspan[scans->u];
829 pushl %esi // preserve spans pointer
830 fnstsw %ax
831 testb $0x45,%ah
832 jz LClamp
834 fmuls Float2ToThe31nd
835 fistpl izi // note: we are relying on FP exceptions being turned
836 // off here to avoid problems when the span is closer
837 // than 1/(2**31)
838 movl izi,%edx
840 // at this point:
841 // %ebx = izistep
842 // %ecx = count
843 // %edx = izi
844 // %edi = pdest
846 LZDraw:
848 // do a single pixel up front, if necessary to dword align the destination
849 testl $2,%edi
850 jz LFMiddle
851 movl %edx,%eax
852 addl %ebx,%edx
853 shrl $16,%eax
854 decl %ecx
855 movw %ax,(%edi)
856 addl $2,%edi
858 // do middle a pair of aligned dwords at a time
859 LFMiddle:
860 pushl %ecx
861 shrl $1,%ecx // count / 2
862 jz LFLast // no aligned dwords to do
863 shrl $1,%ecx // (count / 2) / 2
864 jnc LFMiddleLoop // even number of aligned dwords to do
866 movl %edx,%eax
867 addl %ebx,%edx
868 shrl $16,%eax
869 movl %edx,%esi
870 addl %ebx,%edx
871 andl $0xFFFF0000,%esi
872 orl %esi,%eax
873 movl %eax,(%edi)
874 addl $4,%edi
875 andl %ecx,%ecx
876 jz LFLast
878 LFMiddleLoop:
879 movl %edx,%eax
880 addl %ebx,%edx
881 shrl $16,%eax
882 movl %edx,%esi
883 addl %ebx,%edx
884 andl $0xFFFF0000,%esi
885 orl %esi,%eax
886 movl %edx,%ebp
887 movl %eax,(%edi)
888 addl %ebx,%edx
889 shrl $16,%ebp
890 movl %edx,%esi
891 addl %ebx,%edx
892 andl $0xFFFF0000,%esi
893 orl %esi,%ebp
894 movl %ebp,4(%edi) // FIXME: eliminate register contention
895 addl $8,%edi
897 decl %ecx
898 jnz LFMiddleLoop
900 LFLast:
901 popl %ecx // retrieve count
902 popl %esi // retrieve span pointer
904 // do the last, unaligned pixel, if there is one
905 andl $1,%ecx // is there an odd pixel left to do?
906 jz LFSpanDone // no
907 shrl $16,%edx
908 movw %dx,(%edi) // do the final pixel's z
910 LFSpanDone:
911 movl espan_t_pnext(%esi),%esi
912 testl %esi,%esi
913 jnz LFSpanLoop
915 jmp LFDone
917 LFNegSpan:
918 fmuls FloatMinus2ToThe31nd
919 fistpl izistep // note: we are relying on FP exceptions being turned
920 // off here to avoid range problems
921 movl izistep,%ebx // remains loaded for all spans
923 LFNegSpanLoop:
924 // set up the initial 1/z value
925 fildl espan_t_v(%esi)
926 fildl espan_t_u(%esi)
927 movl espan_t_v(%esi),%ecx
928 movl C(d_pzbuffer),%edi
929 fmuls C(d_zistepu)
930 fxch %st(1)
931 fmuls C(d_zistepv)
932 fxch %st(1)
933 fadds C(d_ziorigin)
934 imull C(d_zrowbytes),%ecx
935 faddp %st(0),%st(1)
937 // clamp if z is nearer than 2 (1/z > 0.5)
938 fcoms float_point5
939 addl %ecx,%edi
940 movl espan_t_u(%esi),%edx
941 addl %edx,%edx // word count
942 movl espan_t_count(%esi),%ecx
943 addl %edx,%edi // pdest = &pdestspan[scans->u];
944 pushl %esi // preserve spans pointer
945 fnstsw %ax
946 testb $0x45,%ah
947 jz LClampNeg
949 fmuls Float2ToThe31nd
950 fistpl izi // note: we are relying on FP exceptions being turned
951 // off here to avoid problems when the span is closer
952 // than 1/(2**31)
953 movl izi,%edx
955 // at this point:
956 // %ebx = izistep
957 // %ecx = count
958 // %edx = izi
959 // %edi = pdest
961 LZDrawNeg:
963 // do a single pixel up front, if necessary to dword align the destination
964 testl $2,%edi
965 jz LFNegMiddle
966 movl %edx,%eax
967 subl %ebx,%edx
968 shrl $16,%eax
969 decl %ecx
970 movw %ax,(%edi)
971 addl $2,%edi
973 // do middle a pair of aligned dwords at a time
974 LFNegMiddle:
975 pushl %ecx
976 shrl $1,%ecx // count / 2
977 jz LFNegLast // no aligned dwords to do
978 shrl $1,%ecx // (count / 2) / 2
979 jnc LFNegMiddleLoop // even number of aligned dwords to do
981 movl %edx,%eax
982 subl %ebx,%edx
983 shrl $16,%eax
984 movl %edx,%esi
985 subl %ebx,%edx
986 andl $0xFFFF0000,%esi
987 orl %esi,%eax
988 movl %eax,(%edi)
989 addl $4,%edi
990 andl %ecx,%ecx
991 jz LFNegLast
993 LFNegMiddleLoop:
994 movl %edx,%eax
995 subl %ebx,%edx
996 shrl $16,%eax
997 movl %edx,%esi
998 subl %ebx,%edx
999 andl $0xFFFF0000,%esi
1000 orl %esi,%eax
1001 movl %edx,%ebp
1002 movl %eax,(%edi)
1003 subl %ebx,%edx
1004 shrl $16,%ebp
1005 movl %edx,%esi
1006 subl %ebx,%edx
1007 andl $0xFFFF0000,%esi
1008 orl %esi,%ebp
1009 movl %ebp,4(%edi) // FIXME: eliminate register contention
1010 addl $8,%edi
1012 decl %ecx
1013 jnz LFNegMiddleLoop
1015 LFNegLast:
1016 popl %ecx // retrieve count
1017 popl %esi // retrieve span pointer
1019 // do the last, unaligned pixel, if there is one
1020 andl $1,%ecx // is there an odd pixel left to do?
1021 jz LFNegSpanDone // no
1022 shrl $16,%edx
1023 movw %dx,(%edi) // do the final pixel's z
1025 LFNegSpanDone:
1026 movl espan_t_pnext(%esi),%esi
1027 testl %esi,%esi
1028 jnz LFNegSpanLoop
1030 LFDone:
1031 popl %ebx // restore register variables
1032 popl %esi
1033 popl %edi
1034 popl %ebp // restore the caller's stack frame
1037 #endif // id386