target/sparc/vis_helper.c

   1 /*
   2  * VIS op helpers
   3  *
   4  *  Copyright (c) 2003-2005 Fabrice Bellard
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19
  20 #include "qemu/osdep.h"
  21 #include "cpu.h"
  22 #include "exec/helper-proto.h"
  23
  24 /* This function uses non-native bit order */
  25 #define GET_FIELD(X, FROM, TO)                                  \
  26     ((X) >> (63 - (TO)) & ((1ULL << ((TO) - (FROM) + 1)) - 1))
  27
  28 /* This function uses the order in the manuals, i.e. bit 0 is 2^0 */
  29 #define GET_FIELD_SP(X, FROM, TO)               \
  30     GET_FIELD(X, 63 - (TO), 63 - (FROM))
  31
  32 target_ulong helper_array8(target_ulong pixel_addr, target_ulong cubesize)
  33 {
  34     return (GET_FIELD_SP(pixel_addr, 60, 63) << (17 + 2 * cubesize)) |
  35         (GET_FIELD_SP(pixel_addr, 39, 39 + cubesize - 1) << (17 + cubesize)) |
  36         (GET_FIELD_SP(pixel_addr, 17 + cubesize - 1, 17) << 17) |
  37         (GET_FIELD_SP(pixel_addr, 56, 59) << 13) |
  38         (GET_FIELD_SP(pixel_addr, 35, 38) << 9) |
  39         (GET_FIELD_SP(pixel_addr, 13, 16) << 5) |
  40         (((pixel_addr >> 55) & 1) << 4) |
  41         (GET_FIELD_SP(pixel_addr, 33, 34) << 2) |
  42         GET_FIELD_SP(pixel_addr, 11, 12);
  43 }
  44
  45 #ifdef HOST_WORDS_BIGENDIAN
  46 #define VIS_B64(n) b[7 - (n)]
  47 #define VIS_W64(n) w[3 - (n)]
  48 #define VIS_SW64(n) sw[3 - (n)]
  49 #define VIS_L64(n) l[1 - (n)]
  50 #define VIS_B32(n) b[3 - (n)]
  51 #define VIS_W32(n) w[1 - (n)]
  52 #else
  53 #define VIS_B64(n) b[n]
  54 #define VIS_W64(n) w[n]
  55 #define VIS_SW64(n) sw[n]
  56 #define VIS_L64(n) l[n]
  57 #define VIS_B32(n) b[n]
  58 #define VIS_W32(n) w[n]
  59 #endif
  60
  61 typedef union {
  62     uint8_t b[8];
  63     uint16_t w[4];
  64     int16_t sw[4];
  65     uint32_t l[2];
  66     uint64_t ll;
  67     float64 d;
  68 } VIS64;
  69
  70 typedef union {
  71     uint8_t b[4];
  72     uint16_t w[2];
  73     uint32_t l;
  74     float32 f;
  75 } VIS32;
  76
  77 uint64_t helper_fpmerge(uint64_t src1, uint64_t src2)
  78 {
  79     VIS64 s, d;
  80
  81     s.ll = src1;
  82     d.ll = src2;
  83
  84     /* Reverse calculation order to handle overlap */
  85     d.VIS_B64(7) = s.VIS_B64(3);
  86     d.VIS_B64(6) = d.VIS_B64(3);
  87     d.VIS_B64(5) = s.VIS_B64(2);
  88     d.VIS_B64(4) = d.VIS_B64(2);
  89     d.VIS_B64(3) = s.VIS_B64(1);
  90     d.VIS_B64(2) = d.VIS_B64(1);
  91     d.VIS_B64(1) = s.VIS_B64(0);
  92     /* d.VIS_B64(0) = d.VIS_B64(0); */
  93
  94     return d.ll;
  95 }
  96
  97 uint64_t helper_fmul8x16(uint64_t src1, uint64_t src2)
  98 {
  99     VIS64 s, d;
 100     uint32_t tmp;
 101
 102     s.ll = src1;
 103     d.ll = src2;
 104
 105 #define PMUL(r)                                                 \
 106     tmp = (int32_t)d.VIS_SW64(r) * (int32_t)s.VIS_B64(r);       \
 107     if ((tmp & 0xff) > 0x7f) {                                  \
 108         tmp += 0x100;                                           \
 109     }                                                           \
 110     d.VIS_W64(r) = tmp >> 8;
 111
 112     PMUL(0);
 113     PMUL(1);
 114     PMUL(2);
 115     PMUL(3);
 116 #undef PMUL
 117
 118     return d.ll;
 119 }
 120
 121 uint64_t helper_fmul8x16al(uint64_t src1, uint64_t src2)
 122 {
 123     VIS64 s, d;
 124     uint32_t tmp;
 125
 126     s.ll = src1;
 127     d.ll = src2;
 128
 129 #define PMUL(r)                                                 \
 130     tmp = (int32_t)d.VIS_SW64(1) * (int32_t)s.VIS_B64(r);       \
 131     if ((tmp & 0xff) > 0x7f) {                                  \
 132         tmp += 0x100;                                           \
 133     }                                                           \
 134     d.VIS_W64(r) = tmp >> 8;
 135
 136     PMUL(0);
 137     PMUL(1);
 138     PMUL(2);
 139     PMUL(3);
 140 #undef PMUL
 141
 142     return d.ll;
 143 }
 144
 145 uint64_t helper_fmul8x16au(uint64_t src1, uint64_t src2)
 146 {
 147     VIS64 s, d;
 148     uint32_t tmp;
 149
 150     s.ll = src1;
 151     d.ll = src2;
 152
 153 #define PMUL(r)                                                 \
 154     tmp = (int32_t)d.VIS_SW64(0) * (int32_t)s.VIS_B64(r);       \
 155     if ((tmp & 0xff) > 0x7f) {                                  \
 156         tmp += 0x100;                                           \
 157     }                                                           \
 158     d.VIS_W64(r) = tmp >> 8;
 159
 160     PMUL(0);
 161     PMUL(1);
 162     PMUL(2);
 163     PMUL(3);
 164 #undef PMUL
 165
 166     return d.ll;
 167 }
 168
 169 uint64_t helper_fmul8sux16(uint64_t src1, uint64_t src2)
 170 {
 171     VIS64 s, d;
 172     uint32_t tmp;
 173
 174     s.ll = src1;
 175     d.ll = src2;
 176
 177 #define PMUL(r)                                                         \
 178     tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8);       \
 179     if ((tmp & 0xff) > 0x7f) {                                          \
 180         tmp += 0x100;                                                   \
 181     }                                                                   \
 182     d.VIS_W64(r) = tmp >> 8;
 183
 184     PMUL(0);
 185     PMUL(1);
 186     PMUL(2);
 187     PMUL(3);
 188 #undef PMUL
 189
 190     return d.ll;
 191 }
 192
 193 uint64_t helper_fmul8ulx16(uint64_t src1, uint64_t src2)
 194 {
 195     VIS64 s, d;
 196     uint32_t tmp;
 197
 198     s.ll = src1;
 199     d.ll = src2;
 200
 201 #define PMUL(r)                                                         \
 202     tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2));        \
 203     if ((tmp & 0xff) > 0x7f) {                                          \
 204         tmp += 0x100;                                                   \
 205     }                                                                   \
 206     d.VIS_W64(r) = tmp >> 8;
 207
 208     PMUL(0);
 209     PMUL(1);
 210     PMUL(2);
 211     PMUL(3);
 212 #undef PMUL
 213
 214     return d.ll;
 215 }
 216
 217 uint64_t helper_fmuld8sux16(uint64_t src1, uint64_t src2)
 218 {
 219     VIS64 s, d;
 220     uint32_t tmp;
 221
 222     s.ll = src1;
 223     d.ll = src2;
 224
 225 #define PMUL(r)                                                         \
 226     tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8);       \
 227     if ((tmp & 0xff) > 0x7f) {                                          \
 228         tmp += 0x100;                                                   \
 229     }                                                                   \
 230     d.VIS_L64(r) = tmp;
 231
 232     /* Reverse calculation order to handle overlap */
 233     PMUL(1);
 234     PMUL(0);
 235 #undef PMUL
 236
 237     return d.ll;
 238 }
 239
 240 uint64_t helper_fmuld8ulx16(uint64_t src1, uint64_t src2)
 241 {
 242     VIS64 s, d;
 243     uint32_t tmp;
 244
 245     s.ll = src1;
 246     d.ll = src2;
 247
 248 #define PMUL(r)                                                         \
 249     tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2));        \
 250     if ((tmp & 0xff) > 0x7f) {                                          \
 251         tmp += 0x100;                                                   \
 252     }                                                                   \
 253     d.VIS_L64(r) = tmp;
 254
 255     /* Reverse calculation order to handle overlap */
 256     PMUL(1);
 257     PMUL(0);
 258 #undef PMUL
 259
 260     return d.ll;
 261 }
 262
 263 uint64_t helper_fexpand(uint64_t src1, uint64_t src2)
 264 {
 265     VIS32 s;
 266     VIS64 d;
 267
 268     s.l = (uint32_t)src1;
 269     d.ll = src2;
 270     d.VIS_W64(0) = s.VIS_B32(0) << 4;
 271     d.VIS_W64(1) = s.VIS_B32(1) << 4;
 272     d.VIS_W64(2) = s.VIS_B32(2) << 4;
 273     d.VIS_W64(3) = s.VIS_B32(3) << 4;
 274
 275     return d.ll;
 276 }
 277
 278 #define VIS_HELPER(name, F)                             \
 279     uint64_t name##16(uint64_t src1, uint64_t src2)     \
 280     {                                                   \
 281         VIS64 s, d;                                     \
 282                                                         \
 283         s.ll = src1;                                    \
 284         d.ll = src2;                                    \
 285                                                         \
 286         d.VIS_W64(0) = F(d.VIS_W64(0), s.VIS_W64(0));   \
 287         d.VIS_W64(1) = F(d.VIS_W64(1), s.VIS_W64(1));   \
 288         d.VIS_W64(2) = F(d.VIS_W64(2), s.VIS_W64(2));   \
 289         d.VIS_W64(3) = F(d.VIS_W64(3), s.VIS_W64(3));   \
 290                                                         \
 291         return d.ll;                                    \
 292     }                                                   \
 293                                                         \
 294     uint32_t name##16s(uint32_t src1, uint32_t src2)    \
 295     {                                                   \
 296         VIS32 s, d;                                     \
 297                                                         \
 298         s.l = src1;                                     \
 299         d.l = src2;                                     \
 300                                                         \
 301         d.VIS_W32(0) = F(d.VIS_W32(0), s.VIS_W32(0));   \
 302         d.VIS_W32(1) = F(d.VIS_W32(1), s.VIS_W32(1));   \
 303                                                         \
 304         return d.l;                                     \
 305     }                                                   \
 306                                                         \
 307     uint64_t name##32(uint64_t src1, uint64_t src2)     \
 308     {                                                   \
 309         VIS64 s, d;                                     \
 310                                                         \
 311         s.ll = src1;                                    \
 312         d.ll = src2;                                    \
 313                                                         \
 314         d.VIS_L64(0) = F(d.VIS_L64(0), s.VIS_L64(0));   \
 315         d.VIS_L64(1) = F(d.VIS_L64(1), s.VIS_L64(1));   \
 316                                                         \
 317         return d.ll;                                    \
 318     }                                                   \
 319                                                         \
 320     uint32_t name##32s(uint32_t src1, uint32_t src2)    \
 321     {                                                   \
 322         VIS32 s, d;                                     \
 323                                                         \
 324         s.l = src1;                                     \
 325         d.l = src2;                                     \
 326                                                         \
 327         d.l = F(d.l, s.l);                              \
 328                                                         \
 329         return d.l;                                     \
 330     }
 331
 332 #define FADD(a, b) ((a) + (b))
 333 #define FSUB(a, b) ((a) - (b))
 334 VIS_HELPER(helper_fpadd, FADD)
 335 VIS_HELPER(helper_fpsub, FSUB)
 336
 337 #define VIS_CMPHELPER(name, F)                                    \
 338     uint64_t name##16(uint64_t src1, uint64_t src2)               \
 339     {                                                             \
 340         VIS64 s, d;                                               \
 341                                                                   \
 342         s.ll = src1;                                              \
 343         d.ll = src2;                                              \
 344                                                                   \
 345         d.VIS_W64(0) = F(s.VIS_W64(0), d.VIS_W64(0)) ? 1 : 0;     \
 346         d.VIS_W64(0) |= F(s.VIS_W64(1), d.VIS_W64(1)) ? 2 : 0;    \
 347         d.VIS_W64(0) |= F(s.VIS_W64(2), d.VIS_W64(2)) ? 4 : 0;    \
 348         d.VIS_W64(0) |= F(s.VIS_W64(3), d.VIS_W64(3)) ? 8 : 0;    \
 349         d.VIS_W64(1) = d.VIS_W64(2) = d.VIS_W64(3) = 0;           \
 350                                                                   \
 351         return d.ll;                                              \
 352     }                                                             \
 353                                                                   \
 354     uint64_t name##32(uint64_t src1, uint64_t src2)               \
 355     {                                                             \
 356         VIS64 s, d;                                               \
 357                                                                   \
 358         s.ll = src1;                                              \
 359         d.ll = src2;                                              \
 360                                                                   \
 361         d.VIS_L64(0) = F(s.VIS_L64(0), d.VIS_L64(0)) ? 1 : 0;     \
 362         d.VIS_L64(0) |= F(s.VIS_L64(1), d.VIS_L64(1)) ? 2 : 0;    \
 363         d.VIS_L64(1) = 0;                                         \
 364                                                                   \
 365         return d.ll;                                              \
 366     }
 367
 368 #define FCMPGT(a, b) ((a) > (b))
 369 #define FCMPEQ(a, b) ((a) == (b))
 370 #define FCMPLE(a, b) ((a) <= (b))
 371 #define FCMPNE(a, b) ((a) != (b))
 372
 373 VIS_CMPHELPER(helper_fcmpgt, FCMPGT)
 374 VIS_CMPHELPER(helper_fcmpeq, FCMPEQ)
 375 VIS_CMPHELPER(helper_fcmple, FCMPLE)
 376 VIS_CMPHELPER(helper_fcmpne, FCMPNE)
 377
 378 uint64_t helper_pdist(uint64_t sum, uint64_t src1, uint64_t src2)
 379 {
 380     int i;
 381     for (i = 0; i < 8; i++) {
 382         int s1, s2;
 383
 384         s1 = (src1 >> (56 - (i * 8))) & 0xff;
 385         s2 = (src2 >> (56 - (i * 8))) & 0xff;
 386
 387         /* Absolute value of difference. */
 388         s1 -= s2;
 389         if (s1 < 0) {
 390             s1 = -s1;
 391         }
 392
 393         sum += s1;
 394     }
 395
 396     return sum;
 397 }
 398
 399 uint32_t helper_fpack16(uint64_t gsr, uint64_t rs2)
 400 {
 401     int scale = (gsr >> 3) & 0xf;
 402     uint32_t ret = 0;
 403     int byte;
 404
 405     for (byte = 0; byte < 4; byte++) {
 406         uint32_t val;
 407         int16_t src = rs2 >> (byte * 16);
 408         int32_t scaled = src << scale;
 409         int32_t from_fixed = scaled >> 7;
 410
 411         val = (from_fixed < 0 ?  0 :
 412                from_fixed > 255 ?  255 : from_fixed);
 413
 414         ret |= val << (8 * byte);
 415     }
 416
 417     return ret;
 418 }
 419
 420 uint64_t helper_fpack32(uint64_t gsr, uint64_t rs1, uint64_t rs2)
 421 {
 422     int scale = (gsr >> 3) & 0x1f;
 423     uint64_t ret = 0;
 424     int word;
 425
 426     ret = (rs1 << 8) & ~(0x000000ff000000ffULL);
 427     for (word = 0; word < 2; word++) {
 428         uint64_t val;
 429         int32_t src = rs2 >> (word * 32);
 430         int64_t scaled = (int64_t)src << scale;
 431         int64_t from_fixed = scaled >> 23;
 432
 433         val = (from_fixed < 0 ? 0 :
 434                (from_fixed > 255) ? 255 : from_fixed);
 435
 436         ret |= val << (32 * word);
 437     }
 438
 439     return ret;
 440 }
 441
 442 uint32_t helper_fpackfix(uint64_t gsr, uint64_t rs2)
 443 {
 444     int scale = (gsr >> 3) & 0x1f;
 445     uint32_t ret = 0;
 446     int word;
 447
 448     for (word = 0; word < 2; word++) {
 449         uint32_t val;
 450         int32_t src = rs2 >> (word * 32);
 451         int64_t scaled = (int64_t)src << scale;
 452         int64_t from_fixed = scaled >> 16;
 453
 454         val = (from_fixed < -32768 ? -32768 :
 455                from_fixed > 32767 ?  32767 : from_fixed);
 456
 457         ret |= (val & 0xffff) << (word * 16);
 458     }
 459
 460     return ret;
 461 }
 462
 463 uint64_t helper_bshuffle(uint64_t gsr, uint64_t src1, uint64_t src2)
 464 {
 465     union {
 466         uint64_t ll[2];
 467         uint8_t b[16];
 468     } s;
 469     VIS64 r;
 470     uint32_t i, mask, host;
 471
 472     /* Set up S such that we can index across all of the bytes.  */
 473 #ifdef HOST_WORDS_BIGENDIAN
 474     s.ll[0] = src1;
 475     s.ll[1] = src2;
 476     host = 0;
 477 #else
 478     s.ll[1] = src1;
 479     s.ll[0] = src2;
 480     host = 15;
 481 #endif
 482     mask = gsr >> 32;
 483
 484     for (i = 0; i < 8; ++i) {
 485         unsigned e = (mask >> (28 - i*4)) & 0xf;
 486         r.VIS_B64(i) = s.b[e ^ host];
 487     }
 488
 489     return r.ll;
 490 }