arch/arm/vfp/vfpsingle.c

   1 /*
   2  *  linux/arch/arm/vfp/vfpsingle.c
   3  *
   4  * This code is derived in part from John R. Housers softfloat library, which
   5  * carries the following notice:
   6  *
   7  * ===========================================================================
   8  * This C source file is part of the SoftFloat IEC/IEEE Floating-point
   9  * Arithmetic Package, Release 2.
  10  *
  11  * Written by John R. Hauser.  This work was made possible in part by the
  12  * International Computer Science Institute, located at Suite 600, 1947 Center
  13  * Street, Berkeley, California 94704.  Funding was partially provided by the
  14  * National Science Foundation under grant MIP-9311980.  The original version
  15  * of this code was written as part of a project to build a fixed-point vector
  16  * processor in collaboration with the University of California at Berkeley,
  17  * overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
  18  * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
  19  * arithmetic/softfloat.html'.
  20  *
  21  * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
  22  * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
  23  * TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
  24  * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
  25  * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
  26  *
  27  * Derivative works are acceptable, even for commercial purposes, so long as
  28  * (1) they include prominent notice that the work is derivative, and (2) they
  29  * include prominent notice akin to these three paragraphs for those parts of
  30  * this code that are retained.
  31  * ===========================================================================
  32  */
  33 #include <linux/kernel.h>
  34 #include <linux/bitops.h>
  35 #include <asm/ptrace.h>
  36 #include <asm/vfp.h>
  37
  38 #include "vfpinstr.h"
  39 #include "vfp.h"
  40
  41 static struct vfp_single vfp_single_default_qnan = {
  42         .exponent       = 255,
  43         .sign           = 0,
  44         .significand    = VFP_SINGLE_SIGNIFICAND_QNAN,
  45 };
  46
  47 static void vfp_single_dump(const char *str, struct vfp_single *s)
  48 {
  49         pr_debug("VFP: %s: sign=%d exponent=%d significand=%08x\n",
  50                  str, s->sign != 0, s->exponent, s->significand);
  51 }
  52
  53 static void vfp_single_normalise_denormal(struct vfp_single *vs)
  54 {
  55         int bits = 31 - fls(vs->significand);
  56
  57         vfp_single_dump("normalise_denormal: in", vs);
  58
  59         if (bits) {
  60                 vs->exponent -= bits - 1;
  61                 vs->significand <<= bits;
  62         }
  63
  64         vfp_single_dump("normalise_denormal: out", vs);
  65 }
  66
  67 #ifndef DEBUG
  68 #define vfp_single_normaliseround(sd,vsd,fpscr,except,func) __vfp_single_normaliseround(sd,vsd,fpscr,except)
  69 u32 __vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions)
  70 #else
  71 u32 vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions, const char *func)
  72 #endif
  73 {
  74         u32 significand, incr, rmode;
  75         int exponent, shift, underflow;
  76
  77         vfp_single_dump("pack: in", vs);
  78
  79         /*
  80          * Infinities and NaNs are a special case.
  81          */
  82         if (vs->exponent == 255 && (vs->significand == 0 || exceptions))
  83                 goto pack;
  84
  85         /*
  86          * Special-case zero.
  87          */
  88         if (vs->significand == 0) {
  89                 vs->exponent = 0;
  90                 goto pack;
  91         }
  92
  93         exponent = vs->exponent;
  94         significand = vs->significand;
  95
  96         /*
  97          * Normalise first.  Note that we shift the significand up to
  98          * bit 31, so we have VFP_SINGLE_LOW_BITS + 1 below the least
  99          * significant bit.
 100          */
 101         shift = 32 - fls(significand);
 102         if (shift < 32 && shift) {
 103                 exponent -= shift;
 104                 significand <<= shift;
 105         }
 106
 107 #ifdef DEBUG
 108         vs->exponent = exponent;
 109         vs->significand = significand;
 110         vfp_single_dump("pack: normalised", vs);
 111 #endif
 112
 113         /*
 114          * Tiny number?
 115          */
 116         underflow = exponent < 0;
 117         if (underflow) {
 118                 significand = vfp_shiftright32jamming(significand, -exponent);
 119                 exponent = 0;
 120 #ifdef DEBUG
 121                 vs->exponent = exponent;
 122                 vs->significand = significand;
 123                 vfp_single_dump("pack: tiny number", vs);
 124 #endif
 125                 if (!(significand & ((1 << (VFP_SINGLE_LOW_BITS + 1)) - 1)))
 126                         underflow = 0;
 127         }
 128
 129         /*
 130          * Select rounding increment.
 131          */
 132         incr = 0;
 133         rmode = fpscr & FPSCR_RMODE_MASK;
 134
 135         if (rmode == FPSCR_ROUND_NEAREST) {
 136                 incr = 1 << VFP_SINGLE_LOW_BITS;
 137                 if ((significand & (1 << (VFP_SINGLE_LOW_BITS + 1))) == 0)
 138                         incr -= 1;
 139         } else if (rmode == FPSCR_ROUND_TOZERO) {
 140                 incr = 0;
 141         } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vs->sign != 0))
 142                 incr = (1 << (VFP_SINGLE_LOW_BITS + 1)) - 1;
 143
 144         pr_debug("VFP: rounding increment = 0x%08x\n", incr);
 145
 146         /*
 147          * Is our rounding going to overflow?
 148          */
 149         if ((significand + incr) < significand) {
 150                 exponent += 1;
 151                 significand = (significand >> 1) | (significand & 1);
 152                 incr >>= 1;
 153 #ifdef DEBUG
 154                 vs->exponent = exponent;
 155                 vs->significand = significand;
 156                 vfp_single_dump("pack: overflow", vs);
 157 #endif
 158         }
 159
 160         /*
 161          * If any of the low bits (which will be shifted out of the
 162          * number) are non-zero, the result is inexact.
 163          */
 164         if (significand & ((1 << (VFP_SINGLE_LOW_BITS + 1)) - 1))
 165                 exceptions |= FPSCR_IXC;
 166
 167         /*
 168          * Do our rounding.
 169          */
 170         significand += incr;
 171
 172         /*
 173          * Infinity?
 174          */
 175         if (exponent >= 254) {
 176                 exceptions |= FPSCR_OFC | FPSCR_IXC;
 177                 if (incr == 0) {
 178                         vs->exponent = 253;
 179                         vs->significand = 0x7fffffff;
 180                 } else {
 181                         vs->exponent = 255;             /* infinity */
 182                         vs->significand = 0;
 183                 }
 184         } else {
 185                 if (significand >> (VFP_SINGLE_LOW_BITS + 1) == 0)
 186                         exponent = 0;
 187                 if (exponent || significand > 0x80000000)
 188                         underflow = 0;
 189                 if (underflow)
 190                         exceptions |= FPSCR_UFC;
 191                 vs->exponent = exponent;
 192                 vs->significand = significand >> 1;
 193         }
 194
 195  pack:
 196         vfp_single_dump("pack: final", vs);
 197         {
 198                 s32 d = vfp_single_pack(vs);
 199                 pr_debug("VFP: %s: d(s%d)=%08x exceptions=%08x\n", func,
 200                          sd, d, exceptions);
 201                 vfp_put_float(sd, d);
 202         }
 203
 204         return exceptions & ~VFP_NAN_FLAG;
 205 }
 206
 207 /*
 208  * Propagate the NaN, setting exceptions if it is signalling.
 209  * 'n' is always a NaN.  'm' may be a number, NaN or infinity.
 210  */
 211 static u32
 212 vfp_propagate_nan(struct vfp_single *vsd, struct vfp_single *vsn,
 213                   struct vfp_single *vsm, u32 fpscr)
 214 {
 215         struct vfp_single *nan;
 216         int tn, tm = 0;
 217
 218         tn = vfp_single_type(vsn);
 219
 220         if (vsm)
 221                 tm = vfp_single_type(vsm);
 222
 223         if (fpscr & FPSCR_DEFAULT_NAN)
 224                 /*
 225                  * Default NaN mode - always returns a quiet NaN
 226                  */
 227                 nan = &vfp_single_default_qnan;
 228         else {
 229                 /*
 230                  * Contemporary mode - select the first signalling
 231                  * NAN, or if neither are signalling, the first
 232                  * quiet NAN.
 233                  */
 234                 if (tn == VFP_SNAN || (tm != VFP_SNAN && tn == VFP_QNAN))
 235                         nan = vsn;
 236                 else
 237                         nan = vsm;
 238                 /*
 239                  * Make the NaN quiet.
 240                  */
 241                 nan->significand |= VFP_SINGLE_SIGNIFICAND_QNAN;
 242         }
 243
 244         *vsd = *nan;
 245
 246         /*
 247          * If one was a signalling NAN, raise invalid operation.
 248          */
 249         return tn == VFP_SNAN || tm == VFP_SNAN ? FPSCR_IOC : VFP_NAN_FLAG;
 250 }
 251
 252
 253 /*
 254  * Extended operations
 255  */
 256 static u32 vfp_single_fabs(int sd, int unused, s32 m, u32 fpscr)
 257 {
 258         vfp_put_float(sd, vfp_single_packed_abs(m));
 259         return 0;
 260 }
 261
 262 static u32 vfp_single_fcpy(int sd, int unused, s32 m, u32 fpscr)
 263 {
 264         vfp_put_float(sd, m);
 265         return 0;
 266 }
 267
 268 static u32 vfp_single_fneg(int sd, int unused, s32 m, u32 fpscr)
 269 {
 270         vfp_put_float(sd, vfp_single_packed_negate(m));
 271         return 0;
 272 }
 273
 274 static const u16 sqrt_oddadjust[] = {
 275         0x0004, 0x0022, 0x005d, 0x00b1, 0x011d, 0x019f, 0x0236, 0x02e0,
 276         0x039c, 0x0468, 0x0545, 0x0631, 0x072b, 0x0832, 0x0946, 0x0a67
 277 };
 278
 279 static const u16 sqrt_evenadjust[] = {
 280         0x0a2d, 0x08af, 0x075a, 0x0629, 0x051a, 0x0429, 0x0356, 0x029e,
 281         0x0200, 0x0179, 0x0109, 0x00af, 0x0068, 0x0034, 0x0012, 0x0002
 282 };
 283
 284 u32 vfp_estimate_sqrt_significand(u32 exponent, u32 significand)
 285 {
 286         int index;
 287         u32 z, a;
 288
 289         if ((significand & 0xc0000000) != 0x40000000) {
 290                 printk(KERN_WARNING "VFP: estimate_sqrt: invalid significand\n");
 291         }
 292
 293         a = significand << 1;
 294         index = (a >> 27) & 15;
 295         if (exponent & 1) {
 296                 z = 0x4000 + (a >> 17) - sqrt_oddadjust[index];
 297                 z = ((a / z) << 14) + (z << 15);
 298                 a >>= 1;
 299         } else {
 300                 z = 0x8000 + (a >> 17) - sqrt_evenadjust[index];
 301                 z = a / z + z;
 302                 z = (z >= 0x20000) ? 0xffff8000 : (z << 15);
 303                 if (z <= a)
 304                         return (s32)a >> 1;
 305         }
 306         return (u32)(((u64)a << 31) / z) + (z >> 1);
 307 }
 308
 309 static u32 vfp_single_fsqrt(int sd, int unused, s32 m, u32 fpscr)
 310 {
 311         struct vfp_single vsm, vsd;
 312         int ret, tm;
 313
 314         vfp_single_unpack(&vsm, m);
 315         tm = vfp_single_type(&vsm);
 316         if (tm & (VFP_NAN|VFP_INFINITY)) {
 317                 struct vfp_single *vsp = &vsd;
 318
 319                 if (tm & VFP_NAN)
 320                         ret = vfp_propagate_nan(vsp, &vsm, NULL, fpscr);
 321                 else if (vsm.sign == 0) {
 322  sqrt_copy:
 323                         vsp = &vsm;
 324                         ret = 0;
 325                 } else {
 326  sqrt_invalid:
 327                         vsp = &vfp_single_default_qnan;
 328                         ret = FPSCR_IOC;
 329                 }
 330                 vfp_put_float(sd, vfp_single_pack(vsp));
 331                 return ret;
 332         }
 333
 334         /*
 335          * sqrt(+/- 0) == +/- 0
 336          */
 337         if (tm & VFP_ZERO)
 338                 goto sqrt_copy;
 339
 340         /*
 341          * Normalise a denormalised number
 342          */
 343         if (tm & VFP_DENORMAL)
 344                 vfp_single_normalise_denormal(&vsm);
 345
 346         /*
 347          * sqrt(<0) = invalid
 348          */
 349         if (vsm.sign)
 350                 goto sqrt_invalid;
 351
 352         vfp_single_dump("sqrt", &vsm);
 353
 354         /*
 355          * Estimate the square root.
 356          */
 357         vsd.sign = 0;
 358         vsd.exponent = ((vsm.exponent - 127) >> 1) + 127;
 359         vsd.significand = vfp_estimate_sqrt_significand(vsm.exponent, vsm.significand) + 2;
 360
 361         vfp_single_dump("sqrt estimate", &vsd);
 362
 363         /*
 364          * And now adjust.
 365          */
 366         if ((vsd.significand & VFP_SINGLE_LOW_BITS_MASK) <= 5) {
 367                 if (vsd.significand < 2) {
 368                         vsd.significand = 0xffffffff;
 369                 } else {
 370                         u64 term;
 371                         s64 rem;
 372                         vsm.significand <<= !(vsm.exponent & 1);
 373                         term = (u64)vsd.significand * vsd.significand;
 374                         rem = ((u64)vsm.significand << 32) - term;
 375
 376                         pr_debug("VFP: term=%016llx rem=%016llx\n", term, rem);
 377
 378                         while (rem < 0) {
 379                                 vsd.significand -= 1;
 380                                 rem += ((u64)vsd.significand << 1) | 1;
 381                         }
 382                         vsd.significand |= rem != 0;
 383                 }
 384         }
 385         vsd.significand = vfp_shiftright32jamming(vsd.significand, 1);
 386
 387         return vfp_single_normaliseround(sd, &vsd, fpscr, 0, "fsqrt");
 388 }
 389
 390 /*
 391  * Equal        := ZC
 392  * Less than    := N
 393  * Greater than := C
 394  * Unordered    := CV
 395  */
 396 static u32 vfp_compare(int sd, int signal_on_qnan, s32 m, u32 fpscr)
 397 {
 398         s32 d;
 399         u32 ret = 0;
 400
 401         d = vfp_get_float(sd);
 402         if (vfp_single_packed_exponent(m) == 255 && vfp_single_packed_mantissa(m)) {
 403                 ret |= FPSCR_C | FPSCR_V;
 404                 if (signal_on_qnan || !(vfp_single_packed_mantissa(m) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1))))
 405                         /*
 406                          * Signalling NaN, or signalling on quiet NaN
 407                          */
 408                         ret |= FPSCR_IOC;
 409         }
 410
 411         if (vfp_single_packed_exponent(d) == 255 && vfp_single_packed_mantissa(d)) {
 412                 ret |= FPSCR_C | FPSCR_V;
 413                 if (signal_on_qnan || !(vfp_single_packed_mantissa(d) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1))))
 414                         /*
 415                          * Signalling NaN, or signalling on quiet NaN
 416                          */
 417                         ret |= FPSCR_IOC;
 418         }
 419
 420         if (ret == 0) {
 421                 if (d == m || vfp_single_packed_abs(d | m) == 0) {
 422                         /*
 423                          * equal
 424                          */
 425                         ret |= FPSCR_Z | FPSCR_C;
 426                 } else if (vfp_single_packed_sign(d ^ m)) {
 427                         /*
 428                          * different signs
 429                          */
 430                         if (vfp_single_packed_sign(d))
 431                                 /*
 432                                  * d is negative, so d < m
 433                                  */
 434                                 ret |= FPSCR_N;
 435                         else
 436                                 /*
 437                                  * d is positive, so d > m
 438                                  */
 439                                 ret |= FPSCR_C;
 440                 } else if ((vfp_single_packed_sign(d) != 0) ^ (d < m)) {
 441                         /*
 442                          * d < m
 443                          */
 444                         ret |= FPSCR_N;
 445                 } else if ((vfp_single_packed_sign(d) != 0) ^ (d > m)) {
 446                         /*
 447                          * d > m
 448                          */
 449                         ret |= FPSCR_C;
 450                 }
 451         }
 452         return ret;
 453 }
 454
 455 static u32 vfp_single_fcmp(int sd, int unused, s32 m, u32 fpscr)
 456 {
 457         return vfp_compare(sd, 0, m, fpscr);
 458 }
 459
 460 static u32 vfp_single_fcmpe(int sd, int unused, s32 m, u32 fpscr)
 461 {
 462         return vfp_compare(sd, 1, m, fpscr);
 463 }
 464
 465 static u32 vfp_single_fcmpz(int sd, int unused, s32 m, u32 fpscr)
 466 {
 467         return vfp_compare(sd, 0, 0, fpscr);
 468 }
 469
 470 static u32 vfp_single_fcmpez(int sd, int unused, s32 m, u32 fpscr)
 471 {
 472         return vfp_compare(sd, 1, 0, fpscr);
 473 }
 474
 475 static u32 vfp_single_fcvtd(int dd, int unused, s32 m, u32 fpscr)
 476 {
 477         struct vfp_single vsm;
 478         struct vfp_double vdd;
 479         int tm;
 480         u32 exceptions = 0;
 481
 482         vfp_single_unpack(&vsm, m);
 483
 484         tm = vfp_single_type(&vsm);
 485
 486         /*
 487          * If we have a signalling NaN, signal invalid operation.
 488          */
 489         if (tm == VFP_SNAN)
 490                 exceptions = FPSCR_IOC;
 491
 492         if (tm & VFP_DENORMAL)
 493                 vfp_single_normalise_denormal(&vsm);
 494
 495         vdd.sign = vsm.sign;
 496         vdd.significand = (u64)vsm.significand << 32;
 497
 498         /*
 499          * If we have an infinity or NaN, the exponent must be 2047.
 500          */
 501         if (tm & (VFP_INFINITY|VFP_NAN)) {
 502                 vdd.exponent = 2047;
 503                 if (tm & VFP_NAN)
 504                         vdd.significand |= VFP_DOUBLE_SIGNIFICAND_QNAN;
 505                 goto pack_nan;
 506         } else if (tm & VFP_ZERO)
 507                 vdd.exponent = 0;
 508         else
 509                 vdd.exponent = vsm.exponent + (1023 - 127);
 510
 511         /*
 512          * Technically, if bit 0 of dd is set, this is an invalid
 513          * instruction.  However, we ignore this for efficiency.
 514          */
 515         return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fcvtd");
 516
 517  pack_nan:
 518         vfp_put_double(dd, vfp_double_pack(&vdd));
 519         return exceptions;
 520 }
 521
 522 static u32 vfp_single_fuito(int sd, int unused, s32 m, u32 fpscr)
 523 {
 524         struct vfp_single vs;
 525
 526         vs.sign = 0;
 527         vs.exponent = 127 + 31 - 1;
 528         vs.significand = (u32)m;
 529
 530         return vfp_single_normaliseround(sd, &vs, fpscr, 0, "fuito");
 531 }
 532
 533 static u32 vfp_single_fsito(int sd, int unused, s32 m, u32 fpscr)
 534 {
 535         struct vfp_single vs;
 536
 537         vs.sign = (m & 0x80000000) >> 16;
 538         vs.exponent = 127 + 31 - 1;
 539         vs.significand = vs.sign ? -m : m;
 540
 541         return vfp_single_normaliseround(sd, &vs, fpscr, 0, "fsito");
 542 }
 543
 544 static u32 vfp_single_ftoui(int sd, int unused, s32 m, u32 fpscr)
 545 {
 546         struct vfp_single vsm;
 547         u32 d, exceptions = 0;
 548         int rmode = fpscr & FPSCR_RMODE_MASK;
 549         int tm;
 550
 551         vfp_single_unpack(&vsm, m);
 552         vfp_single_dump("VSM", &vsm);
 553
 554         /*
 555          * Do we have a denormalised number?
 556          */
 557         tm = vfp_single_type(&vsm);
 558         if (tm & VFP_DENORMAL)
 559                 exceptions |= FPSCR_IDC;
 560
 561         if (tm & VFP_NAN)
 562                 vsm.sign = 0;
 563
 564         if (vsm.exponent >= 127 + 32) {
 565                 d = vsm.sign ? 0 : 0xffffffff;
 566                 exceptions = FPSCR_IOC;
 567         } else if (vsm.exponent >= 127 - 1) {
 568                 int shift = 127 + 31 - vsm.exponent;
 569                 u32 rem, incr = 0;
 570
 571                 /*
 572                  * 2^0 <= m < 2^32-2^8
 573                  */
 574                 d = (vsm.significand << 1) >> shift;
 575                 rem = vsm.significand << (33 - shift);
 576
 577                 if (rmode == FPSCR_ROUND_NEAREST) {
 578                         incr = 0x80000000;
 579                         if ((d & 1) == 0)
 580                                 incr -= 1;
 581                 } else if (rmode == FPSCR_ROUND_TOZERO) {
 582                         incr = 0;
 583                 } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vsm.sign != 0)) {
 584                         incr = ~0;
 585                 }
 586
 587                 if ((rem + incr) < rem) {
 588                         if (d < 0xffffffff)
 589                                 d += 1;
 590                         else
 591                                 exceptions |= FPSCR_IOC;
 592                 }
 593
 594                 if (d && vsm.sign) {
 595                         d = 0;
 596                         exceptions |= FPSCR_IOC;
 597                 } else if (rem)
 598                         exceptions |= FPSCR_IXC;
 599         } else {
 600                 d = 0;
 601                 if (vsm.exponent | vsm.significand) {
 602                         exceptions |= FPSCR_IXC;
 603                         if (rmode == FPSCR_ROUND_PLUSINF && vsm.sign == 0)
 604                                 d = 1;
 605                         else if (rmode == FPSCR_ROUND_MINUSINF && vsm.sign) {
 606                                 d = 0;
 607                                 exceptions |= FPSCR_IOC;
 608                         }
 609                 }
 610         }
 611
 612         pr_debug("VFP: ftoui: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
 613
 614         vfp_put_float(sd, d);
 615
 616         return exceptions;
 617 }
 618
 619 static u32 vfp_single_ftouiz(int sd, int unused, s32 m, u32 fpscr)
 620 {
 621         return vfp_single_ftoui(sd, unused, m, FPSCR_ROUND_TOZERO);
 622 }
 623
 624 static u32 vfp_single_ftosi(int sd, int unused, s32 m, u32 fpscr)
 625 {
 626         struct vfp_single vsm;
 627         u32 d, exceptions = 0;
 628         int rmode = fpscr & FPSCR_RMODE_MASK;
 629
 630         vfp_single_unpack(&vsm, m);
 631         vfp_single_dump("VSM", &vsm);
 632
 633         /*
 634          * Do we have a denormalised number?
 635          */
 636         if (vfp_single_type(&vsm) & VFP_DENORMAL)
 637                 exceptions |= FPSCR_IDC;
 638
 639         if (vsm.exponent >= 127 + 32) {
 640                 /*
 641                  * m >= 2^31-2^7: invalid
 642                  */
 643                 d = 0x7fffffff;
 644                 if (vsm.sign)
 645                         d = ~d;
 646                 exceptions |= FPSCR_IOC;
 647         } else if (vsm.exponent >= 127 - 1) {
 648                 int shift = 127 + 31 - vsm.exponent;
 649                 u32 rem, incr = 0;
 650
 651                 /* 2^0 <= m <= 2^31-2^7 */
 652                 d = (vsm.significand << 1) >> shift;
 653                 rem = vsm.significand << (33 - shift);
 654
 655                 if (rmode == FPSCR_ROUND_NEAREST) {
 656                         incr = 0x80000000;
 657                         if ((d & 1) == 0)
 658                                 incr -= 1;
 659                 } else if (rmode == FPSCR_ROUND_TOZERO) {
 660                         incr = 0;
 661                 } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vsm.sign != 0)) {
 662                         incr = ~0;
 663                 }
 664
 665                 if ((rem + incr) < rem && d < 0xffffffff)
 666                         d += 1;
 667                 if (d > 0x7fffffff + (vsm.sign != 0)) {
 668                         d = 0x7fffffff + (vsm.sign != 0);
 669                         exceptions |= FPSCR_IOC;
 670                 } else if (rem)
 671                         exceptions |= FPSCR_IXC;
 672
 673                 if (vsm.sign)
 674                         d = -d;
 675         } else {
 676                 d = 0;
 677                 if (vsm.exponent | vsm.significand) {
 678                         exceptions |= FPSCR_IXC;
 679                         if (rmode == FPSCR_ROUND_PLUSINF && vsm.sign == 0)
 680                                 d = 1;
 681                         else if (rmode == FPSCR_ROUND_MINUSINF && vsm.sign)
 682                                 d = -1;
 683                 }
 684         }
 685
 686         pr_debug("VFP: ftosi: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
 687
 688         vfp_put_float(sd, (s32)d);
 689
 690         return exceptions;
 691 }
 692
 693 static u32 vfp_single_ftosiz(int sd, int unused, s32 m, u32 fpscr)
 694 {
 695         return vfp_single_ftosi(sd, unused, m, FPSCR_ROUND_TOZERO);
 696 }
 697
 698 static u32 (* const fop_extfns[32])(int sd, int unused, s32 m, u32 fpscr) = {
 699         [FEXT_TO_IDX(FEXT_FCPY)]        = vfp_single_fcpy,
 700         [FEXT_TO_IDX(FEXT_FABS)]        = vfp_single_fabs,
 701         [FEXT_TO_IDX(FEXT_FNEG)]        = vfp_single_fneg,
 702         [FEXT_TO_IDX(FEXT_FSQRT)]       = vfp_single_fsqrt,
 703         [FEXT_TO_IDX(FEXT_FCMP)]        = vfp_single_fcmp,
 704         [FEXT_TO_IDX(FEXT_FCMPE)]       = vfp_single_fcmpe,
 705         [FEXT_TO_IDX(FEXT_FCMPZ)]       = vfp_single_fcmpz,
 706         [FEXT_TO_IDX(FEXT_FCMPEZ)]      = vfp_single_fcmpez,
 707         [FEXT_TO_IDX(FEXT_FCVT)]        = vfp_single_fcvtd,
 708         [FEXT_TO_IDX(FEXT_FUITO)]       = vfp_single_fuito,
 709         [FEXT_TO_IDX(FEXT_FSITO)]       = vfp_single_fsito,
 710         [FEXT_TO_IDX(FEXT_FTOUI)]       = vfp_single_ftoui,
 711         [FEXT_TO_IDX(FEXT_FTOUIZ)]      = vfp_single_ftouiz,
 712         [FEXT_TO_IDX(FEXT_FTOSI)]       = vfp_single_ftosi,
 713         [FEXT_TO_IDX(FEXT_FTOSIZ)]      = vfp_single_ftosiz,
 714 };
 715
 716
 717
 718
 719
 720 static u32
 721 vfp_single_fadd_nonnumber(struct vfp_single *vsd, struct vfp_single *vsn,
 722                           struct vfp_single *vsm, u32 fpscr)
 723 {
 724         struct vfp_single *vsp;
 725         u32 exceptions = 0;
 726         int tn, tm;
 727
 728         tn = vfp_single_type(vsn);
 729         tm = vfp_single_type(vsm);
 730
 731         if (tn & tm & VFP_INFINITY) {
 732                 /*
 733                  * Two infinities.  Are they different signs?
 734                  */
 735                 if (vsn->sign ^ vsm->sign) {
 736                         /*
 737                          * different signs -> invalid
 738                          */
 739                         exceptions = FPSCR_IOC;
 740                         vsp = &vfp_single_default_qnan;
 741                 } else {
 742                         /*
 743                          * same signs -> valid
 744                          */
 745                         vsp = vsn;
 746                 }
 747         } else if (tn & VFP_INFINITY && tm & VFP_NUMBER) {
 748                 /*
 749                  * One infinity and one number -> infinity
 750                  */
 751                 vsp = vsn;
 752         } else {
 753                 /*
 754                  * 'n' is a NaN of some type
 755                  */
 756                 return vfp_propagate_nan(vsd, vsn, vsm, fpscr);
 757         }
 758         *vsd = *vsp;
 759         return exceptions;
 760 }
 761
 762 static u32
 763 vfp_single_add(struct vfp_single *vsd, struct vfp_single *vsn,
 764                struct vfp_single *vsm, u32 fpscr)
 765 {
 766         u32 exp_diff, m_sig;
 767
 768         if (vsn->significand & 0x80000000 ||
 769             vsm->significand & 0x80000000) {
 770                 pr_info("VFP: bad FP values in %s\n", __func__);
 771                 vfp_single_dump("VSN", vsn);
 772                 vfp_single_dump("VSM", vsm);
 773         }
 774
 775         /*
 776          * Ensure that 'n' is the largest magnitude number.  Note that
 777          * if 'n' and 'm' have equal exponents, we do not swap them.
 778          * This ensures that NaN propagation works correctly.
 779          */
 780         if (vsn->exponent < vsm->exponent) {
 781                 struct vfp_single *t = vsn;
 782                 vsn = vsm;
 783                 vsm = t;
 784         }
 785
 786         /*
 787          * Is 'n' an infinity or a NaN?  Note that 'm' may be a number,
 788          * infinity or a NaN here.
 789          */
 790         if (vsn->exponent == 255)
 791                 return vfp_single_fadd_nonnumber(vsd, vsn, vsm, fpscr);
 792
 793         /*
 794          * We have two proper numbers, where 'vsn' is the larger magnitude.
 795          *
 796          * Copy 'n' to 'd' before doing the arithmetic.
 797          */
 798         *vsd = *vsn;
 799
 800         /*
 801          * Align both numbers.
 802          */
 803         exp_diff = vsn->exponent - vsm->exponent;
 804         m_sig = vfp_shiftright32jamming(vsm->significand, exp_diff);
 805
 806         /*
 807          * If the signs are different, we are really subtracting.
 808          */
 809         if (vsn->sign ^ vsm->sign) {
 810                 m_sig = vsn->significand - m_sig;
 811                 if ((s32)m_sig < 0) {
 812                         vsd->sign = vfp_sign_negate(vsd->sign);
 813                         m_sig = -m_sig;
 814                 } else if (m_sig == 0) {
 815                         vsd->sign = (fpscr & FPSCR_RMODE_MASK) ==
 816                                       FPSCR_ROUND_MINUSINF ? 0x8000 : 0;
 817                 }
 818         } else {
 819                 m_sig = vsn->significand + m_sig;
 820         }
 821         vsd->significand = m_sig;
 822
 823         return 0;
 824 }
 825
 826 static u32
 827 vfp_single_multiply(struct vfp_single *vsd, struct vfp_single *vsn, struct vfp_single *vsm, u32 fpscr)
 828 {
 829         vfp_single_dump("VSN", vsn);
 830         vfp_single_dump("VSM", vsm);
 831
 832         /*
 833          * Ensure that 'n' is the largest magnitude number.  Note that
 834          * if 'n' and 'm' have equal exponents, we do not swap them.
 835          * This ensures that NaN propagation works correctly.
 836          */
 837         if (vsn->exponent < vsm->exponent) {
 838                 struct vfp_single *t = vsn;
 839                 vsn = vsm;
 840                 vsm = t;
 841                 pr_debug("VFP: swapping M <-> N\n");
 842         }
 843
 844         vsd->sign = vsn->sign ^ vsm->sign;
 845
 846         /*
 847          * If 'n' is an infinity or NaN, handle it.  'm' may be anything.
 848          */
 849         if (vsn->exponent == 255) {
 850                 if (vsn->significand || (vsm->exponent == 255 && vsm->significand))
 851                         return vfp_propagate_nan(vsd, vsn, vsm, fpscr);
 852                 if ((vsm->exponent | vsm->significand) == 0) {
 853                         *vsd = vfp_single_default_qnan;
 854                         return FPSCR_IOC;
 855                 }
 856                 vsd->exponent = vsn->exponent;
 857                 vsd->significand = 0;
 858                 return 0;
 859         }
 860
 861         /*
 862          * If 'm' is zero, the result is always zero.  In this case,
 863          * 'n' may be zero or a number, but it doesn't matter which.
 864          */
 865         if ((vsm->exponent | vsm->significand) == 0) {
 866                 vsd->exponent = 0;
 867                 vsd->significand = 0;
 868                 return 0;
 869         }
 870
 871         /*
 872          * We add 2 to the destination exponent for the same reason as
 873          * the addition case - though this time we have +1 from each
 874          * input operand.
 875          */
 876         vsd->exponent = vsn->exponent + vsm->exponent - 127 + 2;
 877         vsd->significand = vfp_hi64to32jamming((u64)vsn->significand * vsm->significand);
 878
 879         vfp_single_dump("VSD", vsd);
 880         return 0;
 881 }
 882
 883 #define NEG_MULTIPLY    (1 << 0)
 884 #define NEG_SUBTRACT    (1 << 1)
 885
 886 static u32
 887 vfp_single_multiply_accumulate(int sd, int sn, s32 m, u32 fpscr, u32 negate, char *func)
 888 {
 889         struct vfp_single vsd, vsp, vsn, vsm;
 890         u32 exceptions;
 891         s32 v;
 892
 893         v = vfp_get_float(sn);
 894         pr_debug("VFP: s%u = %08x\n", sn, v);
 895         vfp_single_unpack(&vsn, v);
 896         if (vsn.exponent == 0 && vsn.significand)
 897                 vfp_single_normalise_denormal(&vsn);
 898
 899         vfp_single_unpack(&vsm, m);
 900         if (vsm.exponent == 0 && vsm.significand)
 901                 vfp_single_normalise_denormal(&vsm);
 902
 903         exceptions = vfp_single_multiply(&vsp, &vsn, &vsm, fpscr);
 904         if (negate & NEG_MULTIPLY)
 905                 vsp.sign = vfp_sign_negate(vsp.sign);
 906
 907         v = vfp_get_float(sd);
 908         pr_debug("VFP: s%u = %08x\n", sd, v);
 909         vfp_single_unpack(&vsn, v);
 910         if (negate & NEG_SUBTRACT)
 911                 vsn.sign = vfp_sign_negate(vsn.sign);
 912
 913         exceptions |= vfp_single_add(&vsd, &vsn, &vsp, fpscr);
 914
 915         return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, func);
 916 }
 917
 918 /*
 919  * Standard operations
 920  */
 921
 922 /*
 923  * sd = sd + (sn * sm)
 924  */
 925 static u32 vfp_single_fmac(int sd, int sn, s32 m, u32 fpscr)
 926 {
 927         return vfp_single_multiply_accumulate(sd, sn, m, fpscr, 0, "fmac");
 928 }
 929
 930 /*
 931  * sd = sd - (sn * sm)
 932  */
 933 static u32 vfp_single_fnmac(int sd, int sn, s32 m, u32 fpscr)
 934 {
 935         return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_MULTIPLY, "fnmac");
 936 }
 937
 938 /*
 939  * sd = -sd + (sn * sm)
 940  */
 941 static u32 vfp_single_fmsc(int sd, int sn, s32 m, u32 fpscr)
 942 {
 943         return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_SUBTRACT, "fmsc");
 944 }
 945
 946 /*
 947  * sd = -sd - (sn * sm)
 948  */
 949 static u32 vfp_single_fnmsc(int sd, int sn, s32 m, u32 fpscr)
 950 {
 951         return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_SUBTRACT | NEG_MULTIPLY, "fnmsc");
 952 }
 953
 954 /*
 955  * sd = sn * sm
 956  */
 957 static u32 vfp_single_fmul(int sd, int sn, s32 m, u32 fpscr)
 958 {
 959         struct vfp_single vsd, vsn, vsm;
 960         u32 exceptions;
 961         s32 n = vfp_get_float(sn);
 962
 963         pr_debug("VFP: s%u = %08x\n", sn, n);
 964
 965         vfp_single_unpack(&vsn, n);
 966         if (vsn.exponent == 0 && vsn.significand)
 967                 vfp_single_normalise_denormal(&vsn);
 968
 969         vfp_single_unpack(&vsm, m);
 970         if (vsm.exponent == 0 && vsm.significand)
 971                 vfp_single_normalise_denormal(&vsm);
 972
 973         exceptions = vfp_single_multiply(&vsd, &vsn, &vsm, fpscr);
 974         return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fmul");
 975 }
 976
 977 /*
 978  * sd = -(sn * sm)
 979  */
 980 static u32 vfp_single_fnmul(int sd, int sn, s32 m, u32 fpscr)
 981 {
 982         struct vfp_single vsd, vsn, vsm;
 983         u32 exceptions;
 984         s32 n = vfp_get_float(sn);
 985
 986         pr_debug("VFP: s%u = %08x\n", sn, n);
 987
 988         vfp_single_unpack(&vsn, n);
 989         if (vsn.exponent == 0 && vsn.significand)
 990                 vfp_single_normalise_denormal(&vsn);
 991
 992         vfp_single_unpack(&vsm, m);
 993         if (vsm.exponent == 0 && vsm.significand)
 994                 vfp_single_normalise_denormal(&vsm);
 995
 996         exceptions = vfp_single_multiply(&vsd, &vsn, &vsm, fpscr);
 997         vsd.sign = vfp_sign_negate(vsd.sign);
 998         return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fnmul");
 999 }
1000
1001 /*
1002  * sd = sn + sm
1003  */
1004 static u32 vfp_single_fadd(int sd, int sn, s32 m, u32 fpscr)
1005 {
1006         struct vfp_single vsd, vsn, vsm;
1007         u32 exceptions;
1008         s32 n = vfp_get_float(sn);
1009
1010         pr_debug("VFP: s%u = %08x\n", sn, n);
1011
1012         /*
1013          * Unpack and normalise denormals.
1014          */
1015         vfp_single_unpack(&vsn, n);
1016         if (vsn.exponent == 0 && vsn.significand)
1017                 vfp_single_normalise_denormal(&vsn);
1018
1019         vfp_single_unpack(&vsm, m);
1020         if (vsm.exponent == 0 && vsm.significand)
1021                 vfp_single_normalise_denormal(&vsm);
1022
1023         exceptions = vfp_single_add(&vsd, &vsn, &vsm, fpscr);
1024
1025         return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fadd");
1026 }
1027
1028 /*
1029  * sd = sn - sm
1030  */
1031 static u32 vfp_single_fsub(int sd, int sn, s32 m, u32 fpscr)
1032 {
1033         /*
1034          * Subtraction is addition with one sign inverted.
1035          */
1036         return vfp_single_fadd(sd, sn, vfp_single_packed_negate(m), fpscr);
1037 }
1038
1039 /*
1040  * sd = sn / sm
1041  */
1042 static u32 vfp_single_fdiv(int sd, int sn, s32 m, u32 fpscr)
1043 {
1044         struct vfp_single vsd, vsn, vsm;
1045         u32 exceptions = 0;
1046         s32 n = vfp_get_float(sn);
1047         int tm, tn;
1048
1049         pr_debug("VFP: s%u = %08x\n", sn, n);
1050
1051         vfp_single_unpack(&vsn, n);
1052         vfp_single_unpack(&vsm, m);
1053
1054         vsd.sign = vsn.sign ^ vsm.sign;
1055
1056         tn = vfp_single_type(&vsn);
1057         tm = vfp_single_type(&vsm);
1058
1059         /*
1060          * Is n a NAN?
1061          */
1062         if (tn & VFP_NAN)
1063                 goto vsn_nan;
1064
1065         /*
1066          * Is m a NAN?
1067          */
1068         if (tm & VFP_NAN)
1069                 goto vsm_nan;
1070
1071         /*
1072          * If n and m are infinity, the result is invalid
1073          * If n and m are zero, the result is invalid
1074          */
1075         if (tm & tn & (VFP_INFINITY|VFP_ZERO))
1076                 goto invalid;
1077
1078         /*
1079          * If n is infinity, the result is infinity
1080          */
1081         if (tn & VFP_INFINITY)
1082                 goto infinity;
1083
1084         /*
1085          * If m is zero, raise div0 exception
1086          */
1087         if (tm & VFP_ZERO)
1088                 goto divzero;
1089
1090         /*
1091          * If m is infinity, or n is zero, the result is zero
1092          */
1093         if (tm & VFP_INFINITY || tn & VFP_ZERO)
1094                 goto zero;
1095
1096         if (tn & VFP_DENORMAL)
1097                 vfp_single_normalise_denormal(&vsn);
1098         if (tm & VFP_DENORMAL)
1099                 vfp_single_normalise_denormal(&vsm);
1100
1101         /*
1102          * Ok, we have two numbers, we can perform division.
1103          */
1104         vsd.exponent = vsn.exponent - vsm.exponent + 127 - 1;
1105         vsm.significand <<= 1;
1106         if (vsm.significand <= (2 * vsn.significand)) {
1107                 vsn.significand >>= 1;
1108                 vsd.exponent++;
1109         }
1110         vsd.significand = ((u64)vsn.significand << 32) / vsm.significand;
1111         if ((vsd.significand & 0x3f) == 0)
1112                 vsd.significand |= ((u64)vsm.significand * vsd.significand != (u64)vsn.significand << 32);
1113
1114         return vfp_single_normaliseround(sd, &vsd, fpscr, 0, "fdiv");
1115
1116  vsn_nan:
1117         exceptions = vfp_propagate_nan(&vsd, &vsn, &vsm, fpscr);
1118  pack:
1119         vfp_put_float(sd, vfp_single_pack(&vsd));
1120         return exceptions;
1121
1122  vsm_nan:
1123         exceptions = vfp_propagate_nan(&vsd, &vsm, &vsn, fpscr);
1124         goto pack;
1125
1126  zero:
1127         vsd.exponent = 0;
1128         vsd.significand = 0;
1129         goto pack;
1130
1131  divzero:
1132         exceptions = FPSCR_DZC;
1133  infinity:
1134         vsd.exponent = 255;
1135         vsd.significand = 0;
1136         goto pack;
1137
1138  invalid:
1139         vfp_put_float(sd, vfp_single_pack(&vfp_single_default_qnan));
1140         return FPSCR_IOC;
1141 }
1142
1143 static u32 (* const fop_fns[16])(int sd, int sn, s32 m, u32 fpscr) = {
1144         [FOP_TO_IDX(FOP_FMAC)]  = vfp_single_fmac,
1145         [FOP_TO_IDX(FOP_FNMAC)] = vfp_single_fnmac,
1146         [FOP_TO_IDX(FOP_FMSC)]  = vfp_single_fmsc,
1147         [FOP_TO_IDX(FOP_FNMSC)] = vfp_single_fnmsc,
1148         [FOP_TO_IDX(FOP_FMUL)]  = vfp_single_fmul,
1149         [FOP_TO_IDX(FOP_FNMUL)] = vfp_single_fnmul,
1150         [FOP_TO_IDX(FOP_FADD)]  = vfp_single_fadd,
1151         [FOP_TO_IDX(FOP_FSUB)]  = vfp_single_fsub,
1152         [FOP_TO_IDX(FOP_FDIV)]  = vfp_single_fdiv,
1153 };
1154
1155 #define FREG_BANK(x)    ((x) & 0x18)
1156 #define FREG_IDX(x)     ((x) & 7)
1157
1158 u32 vfp_single_cpdo(u32 inst, u32 fpscr)
1159 {
1160         u32 op = inst & FOP_MASK;
1161         u32 exceptions = 0;
1162         unsigned int sd = vfp_get_sd(inst);
1163         unsigned int sn = vfp_get_sn(inst);
1164         unsigned int sm = vfp_get_sm(inst);
1165         unsigned int vecitr, veclen, vecstride;
1166         u32 (*fop)(int, int, s32, u32);
1167
1168         veclen = fpscr & FPSCR_LENGTH_MASK;
1169         vecstride = 1 + ((fpscr & FPSCR_STRIDE_MASK) == FPSCR_STRIDE_MASK);
1170
1171         /*
1172          * If destination bank is zero, vector length is always '1'.
1173          * ARM DDI0100F C5.1.3, C5.3.2.
1174          */
1175         if (FREG_BANK(sd) == 0)
1176                 veclen = 0;
1177
1178         pr_debug("VFP: vecstride=%u veclen=%u\n", vecstride,
1179                  (veclen >> FPSCR_LENGTH_BIT) + 1);
1180
1181         fop = (op == FOP_EXT) ? fop_extfns[sn] : fop_fns[FOP_TO_IDX(op)];
1182         if (!fop)
1183                 goto invalid;
1184
1185         for (vecitr = 0; vecitr <= veclen; vecitr += 1 << FPSCR_LENGTH_BIT) {
1186                 s32 m = vfp_get_float(sm);
1187                 u32 except;
1188
1189                 if (op == FOP_EXT)
1190                         pr_debug("VFP: itr%d (s%u) = op[%u] (s%u=%08x)\n",
1191                                  vecitr >> FPSCR_LENGTH_BIT, sd, sn, sm, m);
1192                 else
1193                         pr_debug("VFP: itr%d (s%u) = (s%u) op[%u] (s%u=%08x)\n",
1194                                  vecitr >> FPSCR_LENGTH_BIT, sd, sn,
1195                                  FOP_TO_IDX(op), sm, m);
1196
1197                 except = fop(sd, sn, m, fpscr);
1198                 pr_debug("VFP: itr%d: exceptions=%08x\n",
1199                          vecitr >> FPSCR_LENGTH_BIT, except);
1200
1201                 exceptions |= except;
1202
1203                 /*
1204                  * This ensures that comparisons only operate on scalars;
1205                  * comparisons always return with one FPSCR status bit set.
1206                  */
1207                 if (except & (FPSCR_N|FPSCR_Z|FPSCR_C|FPSCR_V))
1208                         break;
1209
1210                 /*
1211                  * CHECK: It appears to be undefined whether we stop when
1212                  * we encounter an exception.  We continue.
1213                  */
1214
1215                 sd = FREG_BANK(sd) + ((FREG_IDX(sd) + vecstride) & 7);
1216                 sn = FREG_BANK(sn) + ((FREG_IDX(sn) + vecstride) & 7);
1217                 if (FREG_BANK(sm) != 0)
1218                         sm = FREG_BANK(sm) + ((FREG_IDX(sm) + vecstride) & 7);
1219         }
1220         return exceptions;
1221
1222  invalid:
1223         return (u32)-1;
1224 }