beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / x86 / pentium4 / sse2 / bdiv_dbm1c.asm
blob354300e4de1b01fe0742a325d7a166e7fb61ff65
1 dnl Intel Atom mpn_bdiv_dbm1.
3 dnl Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
5 dnl Copyright 2011 Free Software Foundation, Inc.
7 dnl This file is part of the GNU MP Library.
8 dnl
9 dnl The GNU MP Library is free software; you can redistribute it and/or modify
10 dnl it under the terms of either:
11 dnl
12 dnl * the GNU Lesser General Public License as published by the Free
13 dnl Software Foundation; either version 3 of the License, or (at your
14 dnl option) any later version.
15 dnl
16 dnl or
17 dnl
18 dnl * the GNU General Public License as published by the Free Software
19 dnl Foundation; either version 2 of the License, or (at your option) any
20 dnl later version.
21 dnl
22 dnl or both in parallel, as here.
23 dnl
24 dnl The GNU MP Library is distributed in the hope that it will be useful, but
25 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
26 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
27 dnl for more details.
28 dnl
29 dnl You should have received copies of the GNU General Public License and the
30 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
31 dnl see https://www.gnu.org/licenses/.
33 include(`../config.m4')
35 C cycles/limb
36 C cycles/limb
37 C P5 -
38 C P6 model 0-8,10-12 -
39 C P6 model 9 (Banias) 9.75
40 C P6 model 13 (Dothan)
41 C P4 model 0 (Willamette)
42 C P4 model 1 (?)
43 C P4 model 2 (Northwood) 8.25
44 C P4 model 3 (Prescott)
45 C P4 model 4 (Nocona)
46 C Intel Atom 8
47 C AMD K6 -
48 C AMD K7 -
49 C AMD K8
50 C AMD K10
52 C TODO: This code was optimised for atom-32, consider moving it back to atom
53 C dir(atom currently grabs this code), and write a 4-way version(7c/l).
55 defframe(PARAM_CARRY,20)
56 defframe(PARAM_MUL, 16)
57 defframe(PARAM_SIZE, 12)
58 defframe(PARAM_SRC, 8)
59 defframe(PARAM_DST, 4)
61 dnl re-use parameter space
62 define(SAVE_RP,`PARAM_MUL')
63 define(SAVE_UP,`PARAM_SIZE')
65 define(`rp', `%edi')
66 define(`up', `%esi')
67 define(`n', `%ecx')
68 define(`reg', `%edx')
69 define(`cy', `%eax') C contains the return value
71 ASM_START()
72 TEXT
73 ALIGN(16)
74 deflit(`FRAME',0)
76 PROLOGUE(mpn_bdiv_dbm1c)
77 mov PARAM_SIZE, n C size
78 mov up, SAVE_UP
79 mov PARAM_SRC, up
80 movd PARAM_MUL, %mm7
81 mov rp, SAVE_RP
82 mov PARAM_DST, rp
84 movd (up), %mm0
85 pmuludq %mm7, %mm0
86 shr n
87 mov PARAM_CARRY, cy
88 jz L(eq1)
90 movd 4(up), %mm1
91 jc L(odd)
93 lea 4(up), up
94 pmuludq %mm7, %mm1
95 movd %mm0, reg
96 psrlq $32, %mm0
97 sub reg, cy
98 movd %mm0, reg
99 movq %mm1, %mm0
100 dec n
101 mov cy, (rp)
102 lea 4(rp), rp
103 jz L(end)
105 C ALIGN(16)
106 L(top): movd 4(up), %mm1
107 sbb reg, cy
108 L(odd): movd %mm0, reg
109 psrlq $32, %mm0
110 pmuludq %mm7, %mm1
111 sub reg, cy
112 lea 8(up), up
113 movd %mm0, reg
114 movd (up), %mm0
115 mov cy, (rp)
116 sbb reg, cy
117 movd %mm1, reg
118 psrlq $32, %mm1
119 sub reg, cy
120 movd %mm1, reg
121 pmuludq %mm7, %mm0
122 dec n
123 mov cy, 4(rp)
124 lea 8(rp), rp
125 jnz L(top)
127 L(end): sbb reg, cy
129 L(eq1): movd %mm0, reg
130 psrlq $32, %mm0
131 mov SAVE_UP, up
132 sub reg, cy
133 movd %mm0, reg
134 emms
135 mov cy, (rp)
136 sbb reg, cy
138 mov SAVE_RP, rp
140 EPILOGUE()
141 ASM_END()