beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / x86_64 / div_qr_2n_pi1.asm
blob5e59a0ac5d6482e5ab00303b579c55aa388267a0
1 dnl x86-64 mpn_div_qr_2n_pi1
2 dnl -- Divide an mpn number by a normalized 2-limb number,
3 dnl using a single-limb inverse.
5 dnl Copyright 2007, 2008, 2010-2012 Free Software Foundation, Inc.
7 dnl This file is part of the GNU MP Library.
8 dnl
9 dnl The GNU MP Library is free software; you can redistribute it and/or modify
10 dnl it under the terms of either:
11 dnl
12 dnl * the GNU Lesser General Public License as published by the Free
13 dnl Software Foundation; either version 3 of the License, or (at your
14 dnl option) any later version.
15 dnl
16 dnl or
17 dnl
18 dnl * the GNU General Public License as published by the Free Software
19 dnl Foundation; either version 2 of the License, or (at your option) any
20 dnl later version.
21 dnl
22 dnl or both in parallel, as here.
23 dnl
24 dnl The GNU MP Library is distributed in the hope that it will be useful, but
25 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
26 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
27 dnl for more details.
28 dnl
29 dnl You should have received copies of the GNU General Public License and the
30 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
31 dnl see https://www.gnu.org/licenses/.
33 include(`../config.m4')
36 C c/l
37 C INPUT PARAMETERS
38 define(`qp', `%rdi')
39 define(`rp', `%rsi')
40 define(`up_param', `%rdx')
41 define(`un', `%rcx')
42 define(`d1', `%r8')
43 define(`d0', `%r9')
44 define(`di_param', `8(%rsp)')
46 define(`di', `%r10')
47 define(`up', `%r11')
48 define(`u2', `%rbx')
49 define(`u1', `%r12')
50 define(`t1', `%r13')
51 define(`t0', `%r14')
52 define(`md1', `%r15')
54 C TODO
55 C * Store qh in the same stack slot as di_param, instead of pushing
56 C it. (we could put it in register %rbp, but then we would need to
57 C save and restore that instead, which doesn't seem like a win).
59 ABI_SUPPORT(DOS64)
60 ABI_SUPPORT(STD64)
62 ASM_START()
63 TEXT
64 ALIGN(16)
65 PROLOGUE(mpn_div_qr_2n_pi1)
66 FUNC_ENTRY(4)
67 IFDOS(` mov 56(%rsp), %r8 ')
68 IFDOS(` mov 64(%rsp), %r9 ')
69 IFDOS(`define(`di_param', `72(%rsp)')')
70 mov di_param, di
71 mov up_param, up
72 push %r15
73 push %r14
74 push %r13
75 push %r12
76 push %rbx
78 mov -16(up, un, 8), u1
79 mov -8(up, un, 8), u2
81 mov u1, t0
82 mov u2, t1
83 sub d0, t0
84 sbb d1, t1
85 cmovnc t0, u1
86 cmovnc t1, u2
87 C push qh which is !carry
88 sbb %rax, %rax
89 inc %rax
90 push %rax
91 lea -2(un), un
92 mov d1, md1
93 neg md1
95 jmp L(next)
97 ALIGN(16)
98 L(loop):
99 C udiv_qr_3by2 (q,u2,u1,u2,u1,n0, d1,d0,di)
100 C Based on the optimized divrem_2.asm code.
102 mov di, %rax
103 mul u2
104 mov u1, t0
105 add %rax, t0 C q0 in t0
106 adc u2, %rdx
107 mov %rdx, t1 C q in t1
108 imul md1, %rdx
109 mov d0, %rax
110 lea (%rdx, u1), u2
111 mul t1
112 mov (up, un, 8), u1
113 sub d0, u1
114 sbb d1, u2
115 sub %rax, u1
116 sbb %rdx, u2
117 xor R32(%rax), R32(%rax)
118 xor R32(%rdx), R32(%rdx)
119 cmp t0, u2
120 cmovnc d0, %rax
121 cmovnc d1, %rdx
122 adc $0, t1
124 add %rax, u1
125 adc %rdx, u2
126 cmp d1, u2
127 jae L(fix)
128 L(bck):
129 mov t1, (qp, un, 8)
130 L(next):
131 sub $1, un
132 jnc L(loop)
133 L(end):
134 mov u2, 8(rp)
135 mov u1, (rp)
137 C qh on stack
138 pop %rax
140 pop %rbx
141 pop %r12
142 pop %r13
143 pop %r14
144 pop %r15
145 FUNC_EXIT()
148 L(fix): C Unlikely update. u2 >= d1
149 seta %dl
150 cmp d0, u1
151 setae %al
152 orb %dl, %al C "orb" form to placate Sun tools
153 je L(bck)
154 inc t1
155 sub d0, u1
156 sbb d1, u2
157 jmp L(bck)
158 EPILOGUE()