beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / x86 / bdiv_dbm1c.asm
blob0288c475cdc6073135e8d13d49813d7e5f84d15a
1 dnl x86 mpn_bdiv_dbm1.
3 dnl Copyright 2008, 2011 Free Software Foundation, Inc.
5 dnl This file is part of the GNU MP Library.
6 dnl
7 dnl The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl it under the terms of either:
9 dnl
10 dnl * the GNU Lesser General Public License as published by the Free
11 dnl Software Foundation; either version 3 of the License, or (at your
12 dnl option) any later version.
13 dnl
14 dnl or
15 dnl
16 dnl * the GNU General Public License as published by the Free Software
17 dnl Foundation; either version 2 of the License, or (at your option) any
18 dnl later version.
19 dnl
20 dnl or both in parallel, as here.
21 dnl
22 dnl The GNU MP Library is distributed in the hope that it will be useful, but
23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25 dnl for more details.
26 dnl
27 dnl You should have received copies of the GNU General Public License and the
28 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
29 dnl see https://www.gnu.org/licenses/.
31 include(`../config.m4')
33 C cycles/limb
34 C P5
35 C P6 model 0-8,10-12)
36 C P6 model 9 (Banias)
37 C P6 model 13 (Dothan) 5.1
38 C P4 model 0 (Willamette)
39 C P4 model 1 (?)
40 C P4 model 2 (Northwood) 13.67
41 C P4 model 3 (Prescott)
42 C P4 model 4 (Nocona)
43 C Intel Atom
44 C AMD K6
45 C AMD K7 3.5
46 C AMD K8
47 C AMD K10
50 C TODO
51 C * Optimize for more x86 processors
53 ASM_START()
54 TEXT
55 ALIGN(16)
56 PROLOGUE(mpn_bdiv_dbm1c)
57 mov 16(%esp), %ecx C d
58 push %esi
59 mov 12(%esp), %esi C ap
60 push %edi
61 mov 12(%esp), %edi C qp
62 push %ebp
63 mov 24(%esp), %ebp C n
64 push %ebx
66 mov (%esi), %eax
67 mul %ecx
68 mov 36(%esp), %ebx
69 sub %eax, %ebx
70 mov %ebx, (%edi)
71 sbb %edx, %ebx
73 mov %ebp, %eax
74 and $3, %eax
75 jz L(b0)
76 cmp $2, %eax
77 jc L(b1)
78 jz L(b2)
80 L(b3): lea -8(%esi), %esi
81 lea 8(%edi), %edi
82 add $-3, %ebp
83 jmp L(3)
85 L(b0): mov 4(%esi), %eax
86 lea -4(%esi), %esi
87 lea 12(%edi), %edi
88 add $-4, %ebp
89 jmp L(0)
91 L(b2): mov 4(%esi), %eax
92 lea 4(%esi), %esi
93 lea 4(%edi), %edi
94 add $-2, %ebp
95 jmp L(2)
97 ALIGN(8)
98 L(top): mov 4(%esi), %eax
99 mul %ecx
100 lea 16(%edi), %edi
101 sub %eax, %ebx
102 mov 8(%esi), %eax
103 mov %ebx, -12(%edi)
104 sbb %edx, %ebx
105 L(0): mul %ecx
106 sub %eax, %ebx
107 mov %ebx, -8(%edi)
108 sbb %edx, %ebx
109 L(3): mov 12(%esi), %eax
110 mul %ecx
111 sub %eax, %ebx
112 mov %ebx, -4(%edi)
113 mov 16(%esi), %eax
114 lea 16(%esi), %esi
115 sbb %edx, %ebx
116 L(2): mul %ecx
117 sub %eax, %ebx
118 mov %ebx, 0(%edi)
119 sbb %edx, %ebx
120 L(b1): add $-4, %ebp
121 jns L(top)
123 mov %ebx, %eax
124 pop %ebx
125 pop %ebp
126 pop %edi
127 pop %esi
129 EPILOGUE()