target-alpha: Add vector implementation for CMPBGE
[qemu.git] / target-alpha / int_helper.c
blob29e927f53fe3ae94d67137a9affd18b4b34fb33c
1 /*
2 * Helpers for integer and multimedia instructions.
4 * Copyright (c) 2007 Jocelyn Mayer
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "cpu.h"
21 #include "exec/helper-proto.h"
22 #include "qemu/host-utils.h"
25 uint64_t helper_ctpop(uint64_t arg)
27 return ctpop64(arg);
30 uint64_t helper_ctlz(uint64_t arg)
32 return clz64(arg);
35 uint64_t helper_cttz(uint64_t arg)
37 return ctz64(arg);
40 uint64_t helper_zapnot(uint64_t val, uint64_t mskb)
42 uint64_t mask;
44 mask = -(mskb & 0x01) & 0x00000000000000ffull;
45 mask |= -(mskb & 0x02) & 0x000000000000ff00ull;
46 mask |= -(mskb & 0x04) & 0x0000000000ff0000ull;
47 mask |= -(mskb & 0x08) & 0x00000000ff000000ull;
48 mask |= -(mskb & 0x10) & 0x000000ff00000000ull;
49 mask |= -(mskb & 0x20) & 0x0000ff0000000000ull;
50 mask |= -(mskb & 0x40) & 0x00ff000000000000ull;
51 mask |= -(mskb & 0x80) & 0xff00000000000000ull;
53 return val & mask;
56 uint64_t helper_zap(uint64_t val, uint64_t mask)
58 return helper_zapnot(val, ~mask);
61 uint64_t helper_cmpbge(uint64_t op1, uint64_t op2)
63 #if defined(__SSE2__)
64 uint64_t r;
66 /* The cmpbge instruction is heavily used in the implementation of
67 every string function on Alpha. We can do much better than either
68 the default loop below, or even an unrolled version by using the
69 native vector support. */
71 typedef uint64_t Q __attribute__((vector_size(16)));
72 typedef uint8_t B __attribute__((vector_size(16)));
74 Q q1 = (Q){ op1, 0 };
75 Q q2 = (Q){ op2, 0 };
77 q1 = (Q)((B)q1 >= (B)q2);
79 r = q1[0];
82 /* Select only one bit from each byte. */
83 r &= 0x0101010101010101;
85 /* Collect the bits into the bottom byte. */
86 /* .......A.......B.......C.......D.......E.......F.......G.......H */
87 r |= r >> (8 - 1);
89 /* .......A......AB......BC......CD......DE......EF......FG......GH */
90 r |= r >> (16 - 2);
92 /* .......A......AB.....ABC....ABCD....BCDE....CDEF....DEFG....EFGH */
93 r |= r >> (32 - 4);
95 /* .......A......AB.....ABC....ABCD...ABCDE..ABCDEF.ABCDEFGABCDEFGH */
96 /* Return only the low 8 bits. */
97 return r & 0xff;
98 #else
99 uint8_t opa, opb, res;
100 int i;
102 res = 0;
103 for (i = 0; i < 8; i++) {
104 opa = op1 >> (i * 8);
105 opb = op2 >> (i * 8);
106 if (opa >= opb) {
107 res |= 1 << i;
110 return res;
111 #endif
114 uint64_t helper_minub8(uint64_t op1, uint64_t op2)
116 uint64_t res = 0;
117 uint8_t opa, opb, opr;
118 int i;
120 for (i = 0; i < 8; ++i) {
121 opa = op1 >> (i * 8);
122 opb = op2 >> (i * 8);
123 opr = opa < opb ? opa : opb;
124 res |= (uint64_t)opr << (i * 8);
126 return res;
129 uint64_t helper_minsb8(uint64_t op1, uint64_t op2)
131 uint64_t res = 0;
132 int8_t opa, opb;
133 uint8_t opr;
134 int i;
136 for (i = 0; i < 8; ++i) {
137 opa = op1 >> (i * 8);
138 opb = op2 >> (i * 8);
139 opr = opa < opb ? opa : opb;
140 res |= (uint64_t)opr << (i * 8);
142 return res;
145 uint64_t helper_minuw4(uint64_t op1, uint64_t op2)
147 uint64_t res = 0;
148 uint16_t opa, opb, opr;
149 int i;
151 for (i = 0; i < 4; ++i) {
152 opa = op1 >> (i * 16);
153 opb = op2 >> (i * 16);
154 opr = opa < opb ? opa : opb;
155 res |= (uint64_t)opr << (i * 16);
157 return res;
160 uint64_t helper_minsw4(uint64_t op1, uint64_t op2)
162 uint64_t res = 0;
163 int16_t opa, opb;
164 uint16_t opr;
165 int i;
167 for (i = 0; i < 4; ++i) {
168 opa = op1 >> (i * 16);
169 opb = op2 >> (i * 16);
170 opr = opa < opb ? opa : opb;
171 res |= (uint64_t)opr << (i * 16);
173 return res;
176 uint64_t helper_maxub8(uint64_t op1, uint64_t op2)
178 uint64_t res = 0;
179 uint8_t opa, opb, opr;
180 int i;
182 for (i = 0; i < 8; ++i) {
183 opa = op1 >> (i * 8);
184 opb = op2 >> (i * 8);
185 opr = opa > opb ? opa : opb;
186 res |= (uint64_t)opr << (i * 8);
188 return res;
191 uint64_t helper_maxsb8(uint64_t op1, uint64_t op2)
193 uint64_t res = 0;
194 int8_t opa, opb;
195 uint8_t opr;
196 int i;
198 for (i = 0; i < 8; ++i) {
199 opa = op1 >> (i * 8);
200 opb = op2 >> (i * 8);
201 opr = opa > opb ? opa : opb;
202 res |= (uint64_t)opr << (i * 8);
204 return res;
207 uint64_t helper_maxuw4(uint64_t op1, uint64_t op2)
209 uint64_t res = 0;
210 uint16_t opa, opb, opr;
211 int i;
213 for (i = 0; i < 4; ++i) {
214 opa = op1 >> (i * 16);
215 opb = op2 >> (i * 16);
216 opr = opa > opb ? opa : opb;
217 res |= (uint64_t)opr << (i * 16);
219 return res;
222 uint64_t helper_maxsw4(uint64_t op1, uint64_t op2)
224 uint64_t res = 0;
225 int16_t opa, opb;
226 uint16_t opr;
227 int i;
229 for (i = 0; i < 4; ++i) {
230 opa = op1 >> (i * 16);
231 opb = op2 >> (i * 16);
232 opr = opa > opb ? opa : opb;
233 res |= (uint64_t)opr << (i * 16);
235 return res;
238 uint64_t helper_perr(uint64_t op1, uint64_t op2)
240 uint64_t res = 0;
241 uint8_t opa, opb, opr;
242 int i;
244 for (i = 0; i < 8; ++i) {
245 opa = op1 >> (i * 8);
246 opb = op2 >> (i * 8);
247 if (opa >= opb) {
248 opr = opa - opb;
249 } else {
250 opr = opb - opa;
252 res += opr;
254 return res;
257 uint64_t helper_pklb(uint64_t op1)
259 return (op1 & 0xff) | ((op1 >> 24) & 0xff00);
262 uint64_t helper_pkwb(uint64_t op1)
264 return ((op1 & 0xff)
265 | ((op1 >> 8) & 0xff00)
266 | ((op1 >> 16) & 0xff0000)
267 | ((op1 >> 24) & 0xff000000));
270 uint64_t helper_unpkbl(uint64_t op1)
272 return (op1 & 0xff) | ((op1 & 0xff00) << 24);
275 uint64_t helper_unpkbw(uint64_t op1)
277 return ((op1 & 0xff)
278 | ((op1 & 0xff00) << 8)
279 | ((op1 & 0xff0000) << 16)
280 | ((op1 & 0xff000000) << 24));
283 void helper_check_overflow(CPUAlphaState *env, uint64_t op1, uint64_t op2)
285 if (unlikely(op1 != op2)) {
286 arith_excp(env, GETPC(), EXC_M_IOV, 0);