target/sparc/vis_helper.c

   1 /*
   2  * VIS op helpers
   3  *
   4  *  Copyright (c) 2003-2005 Fabrice Bellard
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19
  20 #include "qemu/osdep.h"
  21 #include "cpu.h"
  22 #include "exec/helper-proto.h"
  23
  24 /* This function uses non-native bit order */
  25 #define GET_FIELD(X, FROM, TO)                                  \
  26     ((X) >> (63 - (TO)) & ((1ULL << ((TO) - (FROM) + 1)) - 1))
  27
  28 /* This function uses the order in the manuals, i.e. bit 0 is 2^0 */
  29 #define GET_FIELD_SP(X, FROM, TO)               \
  30     GET_FIELD(X, 63 - (TO), 63 - (FROM))
  31
  32 target_ulong helper_array8(target_ulong pixel_addr, target_ulong cubesize)
  33 {
  34     return (GET_FIELD_SP(pixel_addr, 60, 63) << (17 + 2 * cubesize)) |
  35         (GET_FIELD_SP(pixel_addr, 39, 39 + cubesize - 1) << (17 + cubesize)) |
  36         (GET_FIELD_SP(pixel_addr, 17 + cubesize - 1, 17) << 17) |
  37         (GET_FIELD_SP(pixel_addr, 56, 59) << 13) |
  38         (GET_FIELD_SP(pixel_addr, 35, 38) << 9) |
  39         (GET_FIELD_SP(pixel_addr, 13, 16) << 5) |
  40         (((pixel_addr >> 55) & 1) << 4) |
  41         (GET_FIELD_SP(pixel_addr, 33, 34) << 2) |
  42         GET_FIELD_SP(pixel_addr, 11, 12);
  43 }
  44
  45 #if HOST_BIG_ENDIAN
  46 #define VIS_B64(n) b[7 - (n)]
  47 #define VIS_W64(n) w[3 - (n)]
  48 #define VIS_SW64(n) sw[3 - (n)]
  49 #define VIS_L64(n) l[1 - (n)]
  50 #define VIS_B32(n) b[3 - (n)]
  51 #define VIS_W32(n) w[1 - (n)]
  52 #else
  53 #define VIS_B64(n) b[n]
  54 #define VIS_W64(n) w[n]
  55 #define VIS_SW64(n) sw[n]
  56 #define VIS_L64(n) l[n]
  57 #define VIS_B32(n) b[n]
  58 #define VIS_W32(n) w[n]
  59 #endif
  60
  61 typedef union {
  62     uint8_t b[8];
  63     uint16_t w[4];
  64     int16_t sw[4];
  65     uint32_t l[2];
  66     uint64_t ll;
  67     float64 d;
  68 } VIS64;
  69
  70 typedef union {
  71     uint8_t b[4];
  72     uint16_t w[2];
  73     uint32_t l;
  74     float32 f;
  75 } VIS32;
  76
  77 uint64_t helper_fpmerge(uint64_t src1, uint64_t src2)
  78 {
  79     VIS64 s, d;
  80
  81     s.ll = src1;
  82     d.ll = src2;
  83
  84     /* Reverse calculation order to handle overlap */
  85     d.VIS_B64(7) = s.VIS_B64(3);
  86     d.VIS_B64(6) = d.VIS_B64(3);
  87     d.VIS_B64(5) = s.VIS_B64(2);
  88     d.VIS_B64(4) = d.VIS_B64(2);
  89     d.VIS_B64(3) = s.VIS_B64(1);
  90     d.VIS_B64(2) = d.VIS_B64(1);
  91     d.VIS_B64(1) = s.VIS_B64(0);
  92     /* d.VIS_B64(0) = d.VIS_B64(0); */
  93
  94     return d.ll;
  95 }
  96
  97 uint64_t helper_fmul8x16(uint64_t src1, uint64_t src2)
  98 {
  99     VIS64 s, d;
 100     uint32_t tmp;
 101
 102     s.ll = src1;
 103     d.ll = src2;
 104
 105 #define PMUL(r)                                                 \
 106     tmp = (int32_t)d.VIS_SW64(r) * (int32_t)s.VIS_B64(r);       \
 107     if ((tmp & 0xff) > 0x7f) {                                  \
 108         tmp += 0x100;                                           \
 109     }                                                           \
 110     d.VIS_W64(r) = tmp >> 8;
 111
 112     PMUL(0);
 113     PMUL(1);
 114     PMUL(2);
 115     PMUL(3);
 116 #undef PMUL
 117
 118     return d.ll;
 119 }
 120
 121 uint64_t helper_fmul8x16al(uint64_t src1, uint64_t src2)
 122 {
 123     VIS64 s, d;
 124     uint32_t tmp;
 125
 126     s.ll = src1;
 127     d.ll = src2;
 128
 129 #define PMUL(r)                                                 \
 130     tmp = (int32_t)d.VIS_SW64(1) * (int32_t)s.VIS_B64(r);       \
 131     if ((tmp & 0xff) > 0x7f) {                                  \
 132         tmp += 0x100;                                           \
 133     }                                                           \
 134     d.VIS_W64(r) = tmp >> 8;
 135
 136     PMUL(0);
 137     PMUL(1);
 138     PMUL(2);
 139     PMUL(3);
 140 #undef PMUL
 141
 142     return d.ll;
 143 }
 144
 145 uint64_t helper_fmul8x16au(uint64_t src1, uint64_t src2)
 146 {
 147     VIS64 s, d;
 148     uint32_t tmp;
 149
 150     s.ll = src1;
 151     d.ll = src2;
 152
 153 #define PMUL(r)                                                 \
 154     tmp = (int32_t)d.VIS_SW64(0) * (int32_t)s.VIS_B64(r);       \
 155     if ((tmp & 0xff) > 0x7f) {                                  \
 156         tmp += 0x100;                                           \
 157     }                                                           \
 158     d.VIS_W64(r) = tmp >> 8;
 159
 160     PMUL(0);
 161     PMUL(1);
 162     PMUL(2);
 163     PMUL(3);
 164 #undef PMUL
 165
 166     return d.ll;
 167 }
 168
 169 uint64_t helper_fmul8sux16(uint64_t src1, uint64_t src2)
 170 {
 171     VIS64 s, d;
 172     uint32_t tmp;
 173
 174     s.ll = src1;
 175     d.ll = src2;
 176
 177 #define PMUL(r)                                                         \
 178     tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8);       \
 179     if ((tmp & 0xff) > 0x7f) {                                          \
 180         tmp += 0x100;                                                   \
 181     }                                                                   \
 182     d.VIS_W64(r) = tmp >> 8;
 183
 184     PMUL(0);
 185     PMUL(1);
 186     PMUL(2);
 187     PMUL(3);
 188 #undef PMUL
 189
 190     return d.ll;
 191 }
 192
 193 uint64_t helper_fmul8ulx16(uint64_t src1, uint64_t src2)
 194 {
 195     VIS64 s, d;
 196     uint32_t tmp;
 197
 198     s.ll = src1;
 199     d.ll = src2;
 200
 201 #define PMUL(r)                                                         \
 202     tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2));        \
 203     if ((tmp & 0xff) > 0x7f) {                                          \
 204         tmp += 0x100;                                                   \
 205     }                                                                   \
 206     d.VIS_W64(r) = tmp >> 8;
 207
 208     PMUL(0);
 209     PMUL(1);
 210     PMUL(2);
 211     PMUL(3);
 212 #undef PMUL
 213
 214     return d.ll;
 215 }
 216
 217 uint64_t helper_fmuld8sux16(uint64_t src1, uint64_t src2)
 218 {
 219     VIS64 s, d;
 220     uint32_t tmp;
 221
 222     s.ll = src1;
 223     d.ll = src2;
 224
 225 #define PMUL(r)                                                         \
 226     tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8);       \
 227     if ((tmp & 0xff) > 0x7f) {                                          \
 228         tmp += 0x100;                                                   \
 229     }                                                                   \
 230     d.VIS_L64(r) = tmp;
 231
 232     /* Reverse calculation order to handle overlap */
 233     PMUL(1);
 234     PMUL(0);
 235 #undef PMUL
 236
 237     return d.ll;
 238 }
 239
 240 uint64_t helper_fmuld8ulx16(uint64_t src1, uint64_t src2)
 241 {
 242     VIS64 s, d;
 243     uint32_t tmp;
 244
 245     s.ll = src1;
 246     d.ll = src2;
 247
 248 #define PMUL(r)                                                         \
 249     tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2));        \
 250     if ((tmp & 0xff) > 0x7f) {                                          \
 251         tmp += 0x100;                                                   \
 252     }                                                                   \
 253     d.VIS_L64(r) = tmp;
 254
 255     /* Reverse calculation order to handle overlap */
 256     PMUL(1);
 257     PMUL(0);
 258 #undef PMUL
 259
 260     return d.ll;
 261 }
 262
 263 uint64_t helper_fexpand(uint64_t src1, uint64_t src2)
 264 {
 265     VIS32 s;
 266     VIS64 d;
 267
 268     s.l = (uint32_t)src1;
 269     d.ll = src2;
 270     d.VIS_W64(0) = s.VIS_B32(0) << 4;
 271     d.VIS_W64(1) = s.VIS_B32(1) << 4;
 272     d.VIS_W64(2) = s.VIS_B32(2) << 4;
 273     d.VIS_W64(3) = s.VIS_B32(3) << 4;
 274
 275     return d.ll;
 276 }
 277
 278 #define VIS_CMPHELPER(name, F)                                    \
 279     uint64_t name##16(uint64_t src1, uint64_t src2)               \
 280     {                                                             \
 281         VIS64 s, d;                                               \
 282                                                                   \
 283         s.ll = src1;                                              \
 284         d.ll = src2;                                              \
 285                                                                   \
 286         d.VIS_W64(0) = F(s.VIS_W64(0), d.VIS_W64(0)) ? 1 : 0;     \
 287         d.VIS_W64(0) |= F(s.VIS_W64(1), d.VIS_W64(1)) ? 2 : 0;    \
 288         d.VIS_W64(0) |= F(s.VIS_W64(2), d.VIS_W64(2)) ? 4 : 0;    \
 289         d.VIS_W64(0) |= F(s.VIS_W64(3), d.VIS_W64(3)) ? 8 : 0;    \
 290         d.VIS_W64(1) = d.VIS_W64(2) = d.VIS_W64(3) = 0;           \
 291                                                                   \
 292         return d.ll;                                              \
 293     }                                                             \
 294                                                                   \
 295     uint64_t name##32(uint64_t src1, uint64_t src2)               \
 296     {                                                             \
 297         VIS64 s, d;                                               \
 298                                                                   \
 299         s.ll = src1;                                              \
 300         d.ll = src2;                                              \
 301                                                                   \
 302         d.VIS_L64(0) = F(s.VIS_L64(0), d.VIS_L64(0)) ? 1 : 0;     \
 303         d.VIS_L64(0) |= F(s.VIS_L64(1), d.VIS_L64(1)) ? 2 : 0;    \
 304         d.VIS_L64(1) = 0;                                         \
 305                                                                   \
 306         return d.ll;                                              \
 307     }
 308
 309 #define FCMPGT(a, b) ((a) > (b))
 310 #define FCMPEQ(a, b) ((a) == (b))
 311 #define FCMPLE(a, b) ((a) <= (b))
 312 #define FCMPNE(a, b) ((a) != (b))
 313
 314 VIS_CMPHELPER(helper_fcmpgt, FCMPGT)
 315 VIS_CMPHELPER(helper_fcmpeq, FCMPEQ)
 316 VIS_CMPHELPER(helper_fcmple, FCMPLE)
 317 VIS_CMPHELPER(helper_fcmpne, FCMPNE)
 318
 319 uint64_t helper_pdist(uint64_t sum, uint64_t src1, uint64_t src2)
 320 {
 321     int i;
 322     for (i = 0; i < 8; i++) {
 323         int s1, s2;
 324
 325         s1 = (src1 >> (56 - (i * 8))) & 0xff;
 326         s2 = (src2 >> (56 - (i * 8))) & 0xff;
 327
 328         /* Absolute value of difference. */
 329         s1 -= s2;
 330         if (s1 < 0) {
 331             s1 = -s1;
 332         }
 333
 334         sum += s1;
 335     }
 336
 337     return sum;
 338 }
 339
 340 uint32_t helper_fpack16(uint64_t gsr, uint64_t rs2)
 341 {
 342     int scale = (gsr >> 3) & 0xf;
 343     uint32_t ret = 0;
 344     int byte;
 345
 346     for (byte = 0; byte < 4; byte++) {
 347         uint32_t val;
 348         int16_t src = rs2 >> (byte * 16);
 349         int32_t scaled = src << scale;
 350         int32_t from_fixed = scaled >> 7;
 351
 352         val = (from_fixed < 0 ?  0 :
 353                from_fixed > 255 ?  255 : from_fixed);
 354
 355         ret |= val << (8 * byte);
 356     }
 357
 358     return ret;
 359 }
 360
 361 uint64_t helper_fpack32(uint64_t gsr, uint64_t rs1, uint64_t rs2)
 362 {
 363     int scale = (gsr >> 3) & 0x1f;
 364     uint64_t ret = 0;
 365     int word;
 366
 367     ret = (rs1 << 8) & ~(0x000000ff000000ffULL);
 368     for (word = 0; word < 2; word++) {
 369         uint64_t val;
 370         int32_t src = rs2 >> (word * 32);
 371         int64_t scaled = (int64_t)src << scale;
 372         int64_t from_fixed = scaled >> 23;
 373
 374         val = (from_fixed < 0 ? 0 :
 375                (from_fixed > 255) ? 255 : from_fixed);
 376
 377         ret |= val << (32 * word);
 378     }
 379
 380     return ret;
 381 }
 382
 383 uint32_t helper_fpackfix(uint64_t gsr, uint64_t rs2)
 384 {
 385     int scale = (gsr >> 3) & 0x1f;
 386     uint32_t ret = 0;
 387     int word;
 388
 389     for (word = 0; word < 2; word++) {
 390         uint32_t val;
 391         int32_t src = rs2 >> (word * 32);
 392         int64_t scaled = (int64_t)src << scale;
 393         int64_t from_fixed = scaled >> 16;
 394
 395         val = (from_fixed < -32768 ? -32768 :
 396                from_fixed > 32767 ?  32767 : from_fixed);
 397
 398         ret |= (val & 0xffff) << (word * 16);
 399     }
 400
 401     return ret;
 402 }
 403
 404 uint64_t helper_bshuffle(uint64_t gsr, uint64_t src1, uint64_t src2)
 405 {
 406     union {
 407         uint64_t ll[2];
 408         uint8_t b[16];
 409     } s;
 410     VIS64 r;
 411     uint32_t i, mask, host;
 412
 413     /* Set up S such that we can index across all of the bytes.  */
 414 #if HOST_BIG_ENDIAN
 415     s.ll[0] = src1;
 416     s.ll[1] = src2;
 417     host = 0;
 418 #else
 419     s.ll[1] = src1;
 420     s.ll[0] = src2;
 421     host = 15;
 422 #endif
 423     mask = gsr >> 32;
 424
 425     for (i = 0; i < 8; ++i) {
 426         unsigned e = (mask >> (28 - i*4)) & 0xf;
 427         r.VIS_B64(i) = s.b[e ^ host];
 428     }
 429
 430     return r.ll;
 431 }