target/hexagon/arch.c

   1 /*
   2  *  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
   3  *
   4  *  This program is free software; you can redistribute it and/or modify
   5  *  it under the terms of the GNU General Public License as published by
   6  *  the Free Software Foundation; either version 2 of the License, or
   7  *  (at your option) any later version.
   8  *
   9  *  This program is distributed in the hope that it will be useful,
  10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  *  GNU General Public License for more details.
  13  *
  14  *  You should have received a copy of the GNU General Public License
  15  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
  16  */
  17
  18 #include "qemu/osdep.h"
  19 #include "fpu/softfloat.h"
  20 #include "cpu.h"
  21 #include "fma_emu.h"
  22 #include "arch.h"
  23 #include "macros.h"
  24
  25 #define SF_BIAS        127
  26 #define SF_MAXEXP      254
  27 #define SF_MANTBITS    23
  28 #define float32_nan    make_float32(0xffffffff)
  29
  30 /*
  31  * These three tables are used by the cabacdecbin instruction
  32  */
  33 const uint8_t rLPS_table_64x4[64][4] = {
  34     {128, 176, 208, 240},
  35     {128, 167, 197, 227},
  36     {128, 158, 187, 216},
  37     {123, 150, 178, 205},
  38     {116, 142, 169, 195},
  39     {111, 135, 160, 185},
  40     {105, 128, 152, 175},
  41     {100, 122, 144, 166},
  42     {95, 116, 137, 158},
  43     {90, 110, 130, 150},
  44     {85, 104, 123, 142},
  45     {81, 99, 117, 135},
  46     {77, 94, 111, 128},
  47     {73, 89, 105, 122},
  48     {69, 85, 100, 116},
  49     {66, 80, 95, 110},
  50     {62, 76, 90, 104},
  51     {59, 72, 86, 99},
  52     {56, 69, 81, 94},
  53     {53, 65, 77, 89},
  54     {51, 62, 73, 85},
  55     {48, 59, 69, 80},
  56     {46, 56, 66, 76},
  57     {43, 53, 63, 72},
  58     {41, 50, 59, 69},
  59     {39, 48, 56, 65},
  60     {37, 45, 54, 62},
  61     {35, 43, 51, 59},
  62     {33, 41, 48, 56},
  63     {32, 39, 46, 53},
  64     {30, 37, 43, 50},
  65     {29, 35, 41, 48},
  66     {27, 33, 39, 45},
  67     {26, 31, 37, 43},
  68     {24, 30, 35, 41},
  69     {23, 28, 33, 39},
  70     {22, 27, 32, 37},
  71     {21, 26, 30, 35},
  72     {20, 24, 29, 33},
  73     {19, 23, 27, 31},
  74     {18, 22, 26, 30},
  75     {17, 21, 25, 28},
  76     {16, 20, 23, 27},
  77     {15, 19, 22, 25},
  78     {14, 18, 21, 24},
  79     {14, 17, 20, 23},
  80     {13, 16, 19, 22},
  81     {12, 15, 18, 21},
  82     {12, 14, 17, 20},
  83     {11, 14, 16, 19},
  84     {11, 13, 15, 18},
  85     {10, 12, 15, 17},
  86     {10, 12, 14, 16},
  87     {9, 11, 13, 15},
  88     {9, 11, 12, 14},
  89     {8, 10, 12, 14},
  90     {8, 9, 11, 13},
  91     {7, 9, 11, 12},
  92     {7, 9, 10, 12},
  93     {7, 8, 10, 11},
  94     {6, 8, 9, 11},
  95     {6, 7, 9, 10},
  96     {6, 7, 8, 9},
  97     {2, 2, 2, 2}
  98 };
  99
 100 const uint8_t AC_next_state_MPS_64[64] = {
 101     1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
 102     11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
 103     21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
 104     31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
 105     41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
 106     51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
 107     61, 62, 62, 63
 108 };
 109
 110
 111 const uint8_t AC_next_state_LPS_64[64] = {
 112     0, 0, 1, 2, 2, 4, 4, 5, 6, 7,
 113     8, 9, 9, 11, 11, 12, 13, 13, 15, 15,
 114     16, 16, 18, 18, 19, 19, 21, 21, 22, 22,
 115     23, 24, 24, 25, 26, 26, 27, 27, 28, 29,
 116     29, 30, 30, 30, 31, 32, 32, 33, 33, 33,
 117     34, 34, 35, 35, 35, 36, 36, 36, 37, 37,
 118     37, 38, 38, 63
 119 };
 120
 121 #define BITS_MASK_8 0x5555555555555555ULL
 122 #define PAIR_MASK_8 0x3333333333333333ULL
 123 #define NYBL_MASK_8 0x0f0f0f0f0f0f0f0fULL
 124 #define BYTE_MASK_8 0x00ff00ff00ff00ffULL
 125 #define HALF_MASK_8 0x0000ffff0000ffffULL
 126 #define WORD_MASK_8 0x00000000ffffffffULL
 127
 128 uint64_t interleave(uint32_t odd, uint32_t even)
 129 {
 130     /* Convert to long long */
 131     uint64_t myodd = odd;
 132     uint64_t myeven = even;
 133     /* First, spread bits out */
 134     myodd = (myodd | (myodd << 16)) & HALF_MASK_8;
 135     myeven = (myeven | (myeven << 16)) & HALF_MASK_8;
 136     myodd = (myodd | (myodd << 8)) & BYTE_MASK_8;
 137     myeven = (myeven | (myeven << 8)) & BYTE_MASK_8;
 138     myodd = (myodd | (myodd << 4)) & NYBL_MASK_8;
 139     myeven = (myeven | (myeven << 4)) & NYBL_MASK_8;
 140     myodd = (myodd | (myodd << 2)) & PAIR_MASK_8;
 141     myeven = (myeven | (myeven << 2)) & PAIR_MASK_8;
 142     myodd = (myodd | (myodd << 1)) & BITS_MASK_8;
 143     myeven = (myeven | (myeven << 1)) & BITS_MASK_8;
 144     /* Now OR together */
 145     return myeven | (myodd << 1);
 146 }
 147
 148 uint64_t deinterleave(uint64_t src)
 149 {
 150     /* Get odd and even bits */
 151     uint64_t myodd = ((src >> 1) & BITS_MASK_8);
 152     uint64_t myeven = (src & BITS_MASK_8);
 153
 154     /* Unspread bits */
 155     myeven = (myeven | (myeven >> 1)) & PAIR_MASK_8;
 156     myodd = (myodd | (myodd >> 1)) & PAIR_MASK_8;
 157     myeven = (myeven | (myeven >> 2)) & NYBL_MASK_8;
 158     myodd = (myodd | (myodd >> 2)) & NYBL_MASK_8;
 159     myeven = (myeven | (myeven >> 4)) & BYTE_MASK_8;
 160     myodd = (myodd | (myodd >> 4)) & BYTE_MASK_8;
 161     myeven = (myeven | (myeven >> 8)) & HALF_MASK_8;
 162     myodd = (myodd | (myodd >> 8)) & HALF_MASK_8;
 163     myeven = (myeven | (myeven >> 16)) & WORD_MASK_8;
 164     myodd = (myodd | (myodd >> 16)) & WORD_MASK_8;
 165
 166     /* Return odd bits in upper half */
 167     return myeven | (myodd << 32);
 168 }
 169
 170 int32_t conv_round(int32_t a, int n)
 171 {
 172     int64_t val;
 173
 174     if (n == 0) {
 175         val = a;
 176     } else if ((a & ((1 << (n - 1)) - 1)) == 0) {    /* N-1..0 all zero? */
 177         /* Add LSB from int part */
 178         val = ((fSE32_64(a)) + (int64_t) (((uint32_t) ((1 << n) & a)) >> 1));
 179     } else {
 180         val = ((fSE32_64(a)) + (1 << (n - 1)));
 181     }
 182
 183     val = val >> n;
 184     return (int32_t)val;
 185 }
 186
 187 /* Floating Point Stuff */
 188
 189 static const FloatRoundMode softfloat_roundingmodes[] = {
 190     float_round_nearest_even,
 191     float_round_to_zero,
 192     float_round_down,
 193     float_round_up,
 194 };
 195
 196 void arch_fpop_start(CPUHexagonState *env)
 197 {
 198     set_float_exception_flags(0, &env->fp_status);
 199     set_float_rounding_mode(
 200         softfloat_roundingmodes[fREAD_REG_FIELD(USR, USR_FPRND)],
 201         &env->fp_status);
 202 }
 203
 204 #ifdef CONFIG_USER_ONLY
 205 /*
 206  * Hexagon Linux kernel only sets the relevant bits in USR (user status
 207  * register).  The exception isn't raised to user mode, so we don't
 208  * model it in qemu user mode.
 209  */
 210 #define RAISE_FP_EXCEPTION   do {} while (0)
 211 #endif
 212
 213 #define SOFTFLOAT_TEST_FLAG(FLAG, MYF, MYE) \
 214     do { \
 215         if (flags & FLAG) { \
 216             if (GET_USR_FIELD(USR_##MYF) == 0) { \
 217                 SET_USR_FIELD(USR_##MYF, 1); \
 218                 if (GET_USR_FIELD(USR_##MYE)) { \
 219                     RAISE_FP_EXCEPTION; \
 220                 } \
 221             } \
 222         } \
 223     } while (0)
 224
 225 void arch_fpop_end(CPUHexagonState *env)
 226 {
 227     const bool pkt_need_commit = true;
 228     int flags = get_float_exception_flags(&env->fp_status);
 229     if (flags != 0) {
 230         SOFTFLOAT_TEST_FLAG(float_flag_inexact, FPINPF, FPINPE);
 231         SOFTFLOAT_TEST_FLAG(float_flag_divbyzero, FPDBZF, FPDBZE);
 232         SOFTFLOAT_TEST_FLAG(float_flag_invalid, FPINVF, FPINVE);
 233         SOFTFLOAT_TEST_FLAG(float_flag_overflow, FPOVFF, FPOVFE);
 234         SOFTFLOAT_TEST_FLAG(float_flag_underflow, FPUNFF, FPUNFE);
 235     }
 236 }
 237
 238 int arch_sf_recip_common(float32 *Rs, float32 *Rt, float32 *Rd, int *adjust,
 239                          float_status *fp_status)
 240 {
 241     int n_exp;
 242     int d_exp;
 243     int ret = 0;
 244     float32 RsV, RtV, RdV;
 245     int PeV = 0;
 246     RsV = *Rs;
 247     RtV = *Rt;
 248     if (float32_is_any_nan(RsV) && float32_is_any_nan(RtV)) {
 249         if (extract32(RsV & RtV, 22, 1) == 0) {
 250             float_raise(float_flag_invalid, fp_status);
 251         }
 252         RdV = RsV = RtV = float32_nan;
 253     } else if (float32_is_any_nan(RsV)) {
 254         if (extract32(RsV, 22, 1) == 0) {
 255             float_raise(float_flag_invalid, fp_status);
 256         }
 257         RdV = RsV = RtV = float32_nan;
 258     } else if (float32_is_any_nan(RtV)) {
 259         /* or put NaN in num/den fixup? */
 260         if (extract32(RtV, 22, 1) == 0) {
 261             float_raise(float_flag_invalid, fp_status);
 262         }
 263         RdV = RsV = RtV = float32_nan;
 264     } else if (float32_is_infinity(RsV) && float32_is_infinity(RtV)) {
 265         /* or put Inf in num fixup? */
 266         RdV = RsV = RtV = float32_nan;
 267         float_raise(float_flag_invalid, fp_status);
 268     } else if (float32_is_zero(RsV) && float32_is_zero(RtV)) {
 269         /* or put zero in num fixup? */
 270         RdV = RsV = RtV = float32_nan;
 271         float_raise(float_flag_invalid, fp_status);
 272     } else if (float32_is_zero(RtV)) {
 273         /* or put Inf in num fixup? */
 274         uint8_t RsV_sign = float32_is_neg(RsV);
 275         uint8_t RtV_sign = float32_is_neg(RtV);
 276         /* Check that RsV is NOT infinite before we overwrite it */
 277         if (!float32_is_infinity(RsV)) {
 278             float_raise(float_flag_divbyzero, fp_status);
 279         }
 280         RsV = infinite_float32(RsV_sign ^ RtV_sign);
 281         RtV = float32_one;
 282         RdV = float32_one;
 283     } else if (float32_is_infinity(RtV)) {
 284         RsV = make_float32(0x80000000 & (RsV ^ RtV));
 285         RtV = float32_one;
 286         RdV = float32_one;
 287     } else if (float32_is_zero(RsV)) {
 288         /* Does this just work itself out? */
 289         /* No, 0/Inf causes problems. */
 290         RsV = make_float32(0x80000000 & (RsV ^ RtV));
 291         RtV = float32_one;
 292         RdV = float32_one;
 293     } else if (float32_is_infinity(RsV)) {
 294         uint8_t RsV_sign = float32_is_neg(RsV);
 295         uint8_t RtV_sign = float32_is_neg(RtV);
 296         RsV = infinite_float32(RsV_sign ^ RtV_sign);
 297         RtV = float32_one;
 298         RdV = float32_one;
 299     } else {
 300         PeV = 0x00;
 301         /* Basic checks passed */
 302         n_exp = float32_getexp_raw(RsV);
 303         d_exp = float32_getexp_raw(RtV);
 304         if ((n_exp - d_exp + SF_BIAS) <= SF_MANTBITS) {
 305             /* Near quotient underflow / inexact Q */
 306             PeV = 0x80;
 307             RtV = float32_scalbn(RtV, -64, fp_status);
 308             RsV = float32_scalbn(RsV, 64, fp_status);
 309         } else if ((n_exp - d_exp + SF_BIAS) > (SF_MAXEXP - 24)) {
 310             /* Near quotient overflow */
 311             PeV = 0x40;
 312             RtV = float32_scalbn(RtV, 32, fp_status);
 313             RsV = float32_scalbn(RsV, -32, fp_status);
 314         } else if (n_exp <= SF_MANTBITS + 2) {
 315             RtV = float32_scalbn(RtV, 64, fp_status);
 316             RsV = float32_scalbn(RsV, 64, fp_status);
 317         } else if (d_exp <= 1) {
 318             RtV = float32_scalbn(RtV, 32, fp_status);
 319             RsV = float32_scalbn(RsV, 32, fp_status);
 320         } else if (d_exp > 252) {
 321             RtV = float32_scalbn(RtV, -32, fp_status);
 322             RsV = float32_scalbn(RsV, -32, fp_status);
 323         }
 324         RdV = 0;
 325         ret = 1;
 326     }
 327     *Rs = RsV;
 328     *Rt = RtV;
 329     *Rd = RdV;
 330     *adjust = PeV;
 331     return ret;
 332 }
 333
 334 int arch_sf_invsqrt_common(float32 *Rs, float32 *Rd, int *adjust,
 335                            float_status *fp_status)
 336 {
 337     float32 RsV, RdV;
 338     int PeV = 0;
 339     int r_exp;
 340     int ret = 0;
 341     RsV = *Rs;
 342     if (float32_is_any_nan(RsV)) {
 343         if (extract32(RsV, 22, 1) == 0) {
 344             float_raise(float_flag_invalid, fp_status);
 345         }
 346         RdV = RsV = float32_nan;
 347     } else if (float32_lt(RsV, float32_zero, fp_status)) {
 348         /* Negative nonzero values are NaN */
 349         float_raise(float_flag_invalid, fp_status);
 350         RsV = float32_nan;
 351         RdV = float32_nan;
 352     } else if (float32_is_infinity(RsV)) {
 353         /* or put Inf in num fixup? */
 354         RsV = infinite_float32(1);
 355         RdV = infinite_float32(1);
 356     } else if (float32_is_zero(RsV)) {
 357         /* or put zero in num fixup? */
 358         RdV = float32_one;
 359     } else {
 360         PeV = 0x00;
 361         /* Basic checks passed */
 362         r_exp = float32_getexp(RsV);
 363         if (r_exp <= 24) {
 364             RsV = float32_scalbn(RsV, 64, fp_status);
 365             PeV = 0xe0;
 366         }
 367         RdV = 0;
 368         ret = 1;
 369     }
 370     *Rs = RsV;
 371     *Rd = RdV;
 372     *adjust = PeV;
 373     return ret;
 374 }
 375
 376 const uint8_t recip_lookup_table[128] = {
 377     0x0fe, 0x0fa, 0x0f6, 0x0f2, 0x0ef, 0x0eb, 0x0e7, 0x0e4,
 378     0x0e0, 0x0dd, 0x0d9, 0x0d6, 0x0d2, 0x0cf, 0x0cc, 0x0c9,
 379     0x0c6, 0x0c2, 0x0bf, 0x0bc, 0x0b9, 0x0b6, 0x0b3, 0x0b1,
 380     0x0ae, 0x0ab, 0x0a8, 0x0a5, 0x0a3, 0x0a0, 0x09d, 0x09b,
 381     0x098, 0x096, 0x093, 0x091, 0x08e, 0x08c, 0x08a, 0x087,
 382     0x085, 0x083, 0x080, 0x07e, 0x07c, 0x07a, 0x078, 0x075,
 383     0x073, 0x071, 0x06f, 0x06d, 0x06b, 0x069, 0x067, 0x065,
 384     0x063, 0x061, 0x05f, 0x05e, 0x05c, 0x05a, 0x058, 0x056,
 385     0x054, 0x053, 0x051, 0x04f, 0x04e, 0x04c, 0x04a, 0x049,
 386     0x047, 0x045, 0x044, 0x042, 0x040, 0x03f, 0x03d, 0x03c,
 387     0x03a, 0x039, 0x037, 0x036, 0x034, 0x033, 0x032, 0x030,
 388     0x02f, 0x02d, 0x02c, 0x02b, 0x029, 0x028, 0x027, 0x025,
 389     0x024, 0x023, 0x021, 0x020, 0x01f, 0x01e, 0x01c, 0x01b,
 390     0x01a, 0x019, 0x017, 0x016, 0x015, 0x014, 0x013, 0x012,
 391     0x011, 0x00f, 0x00e, 0x00d, 0x00c, 0x00b, 0x00a, 0x009,
 392     0x008, 0x007, 0x006, 0x005, 0x004, 0x003, 0x002, 0x000,
 393 };
 394
 395 const uint8_t invsqrt_lookup_table[128] = {
 396     0x069, 0x066, 0x063, 0x061, 0x05e, 0x05b, 0x059, 0x057,
 397     0x054, 0x052, 0x050, 0x04d, 0x04b, 0x049, 0x047, 0x045,
 398     0x043, 0x041, 0x03f, 0x03d, 0x03b, 0x039, 0x037, 0x036,
 399     0x034, 0x032, 0x030, 0x02f, 0x02d, 0x02c, 0x02a, 0x028,
 400     0x027, 0x025, 0x024, 0x022, 0x021, 0x01f, 0x01e, 0x01d,
 401     0x01b, 0x01a, 0x019, 0x017, 0x016, 0x015, 0x014, 0x012,
 402     0x011, 0x010, 0x00f, 0x00d, 0x00c, 0x00b, 0x00a, 0x009,
 403     0x008, 0x007, 0x006, 0x005, 0x004, 0x003, 0x002, 0x001,
 404     0x0fe, 0x0fa, 0x0f6, 0x0f3, 0x0ef, 0x0eb, 0x0e8, 0x0e4,
 405     0x0e1, 0x0de, 0x0db, 0x0d7, 0x0d4, 0x0d1, 0x0ce, 0x0cb,
 406     0x0c9, 0x0c6, 0x0c3, 0x0c0, 0x0be, 0x0bb, 0x0b8, 0x0b6,
 407     0x0b3, 0x0b1, 0x0af, 0x0ac, 0x0aa, 0x0a8, 0x0a5, 0x0a3,
 408     0x0a1, 0x09f, 0x09d, 0x09b, 0x099, 0x097, 0x095, 0x093,
 409     0x091, 0x08f, 0x08d, 0x08b, 0x089, 0x087, 0x086, 0x084,
 410     0x082, 0x080, 0x07f, 0x07d, 0x07b, 0x07a, 0x078, 0x077,
 411     0x075, 0x074, 0x072, 0x071, 0x06f, 0x06e, 0x06c, 0x06b,
 412 };