arch/sh/kernel/cpu/sh4/fpu.c

   1 /*
   2  * Save/restore floating point context for signal handlers.
   3  *
   4  * This file is subject to the terms and conditions of the GNU General Public
   5  * License.  See the file "COPYING" in the main directory of this archive
   6  * for more details.
   7  *
   8  * Copyright (C) 1999, 2000  Kaz Kojima & Niibe Yutaka
   9  * Copyright (C) 2006  ST Microelectronics Ltd. (denorm support)
  10  *
  11  * FIXME! These routines have not been tested for big endian case.
  12  */
  13 #include <linux/sched.h>
  14 #include <linux/signal.h>
  15 #include <linux/io.h>
  16 #include <asm/cpu/fpu.h>
  17 #include <asm/processor.h>
  18 #include <asm/system.h>
  19 #include <asm/fpu.h>
  20
  21 /* The PR (precision) bit in the FP Status Register must be clear when
  22  * an frchg instruction is executed, otherwise the instruction is undefined.
  23  * Executing frchg with PR set causes a trap on some SH4 implementations.
  24  */
  25
  26 #define FPSCR_RCHG 0x00000000
  27 extern unsigned long long float64_div(unsigned long long a,
  28                                       unsigned long long b);
  29 extern unsigned long int float32_div(unsigned long int a, unsigned long int b);
  30 extern unsigned long long float64_mul(unsigned long long a,
  31                                       unsigned long long b);
  32 extern unsigned long int float32_mul(unsigned long int a, unsigned long int b);
  33 extern unsigned long long float64_add(unsigned long long a,
  34                                       unsigned long long b);
  35 extern unsigned long int float32_add(unsigned long int a, unsigned long int b);
  36 extern unsigned long long float64_sub(unsigned long long a,
  37                                       unsigned long long b);
  38 extern unsigned long int float32_sub(unsigned long int a, unsigned long int b);
  39
  40 static unsigned int fpu_exception_flags;
  41
  42 /*
  43  * Save FPU registers onto task structure.
  44  * Assume called with FPU enabled (SR.FD=0).
  45  */
  46 void save_fpu(struct task_struct *tsk, struct pt_regs *regs)
  47 {
  48         unsigned long dummy;
  49
  50         clear_tsk_thread_flag(tsk, TIF_USEDFPU);
  51         enable_fpu();
  52         asm volatile ("sts.l    fpul, @-%0\n\t"
  53                       "sts.l    fpscr, @-%0\n\t"
  54                       "lds      %2, fpscr\n\t"
  55                       "frchg\n\t"
  56                       "fmov.s   fr15, @-%0\n\t"
  57                       "fmov.s   fr14, @-%0\n\t"
  58                       "fmov.s   fr13, @-%0\n\t"
  59                       "fmov.s   fr12, @-%0\n\t"
  60                       "fmov.s   fr11, @-%0\n\t"
  61                       "fmov.s   fr10, @-%0\n\t"
  62                       "fmov.s   fr9, @-%0\n\t"
  63                       "fmov.s   fr8, @-%0\n\t"
  64                       "fmov.s   fr7, @-%0\n\t"
  65                       "fmov.s   fr6, @-%0\n\t"
  66                       "fmov.s   fr5, @-%0\n\t"
  67                       "fmov.s   fr4, @-%0\n\t"
  68                       "fmov.s   fr3, @-%0\n\t"
  69                       "fmov.s   fr2, @-%0\n\t"
  70                       "fmov.s   fr1, @-%0\n\t"
  71                       "fmov.s   fr0, @-%0\n\t"
  72                       "frchg\n\t"
  73                       "fmov.s   fr15, @-%0\n\t"
  74                       "fmov.s   fr14, @-%0\n\t"
  75                       "fmov.s   fr13, @-%0\n\t"
  76                       "fmov.s   fr12, @-%0\n\t"
  77                       "fmov.s   fr11, @-%0\n\t"
  78                       "fmov.s   fr10, @-%0\n\t"
  79                       "fmov.s   fr9, @-%0\n\t"
  80                       "fmov.s   fr8, @-%0\n\t"
  81                       "fmov.s   fr7, @-%0\n\t"
  82                       "fmov.s   fr6, @-%0\n\t"
  83                       "fmov.s   fr5, @-%0\n\t"
  84                       "fmov.s   fr4, @-%0\n\t"
  85                       "fmov.s   fr3, @-%0\n\t"
  86                       "fmov.s   fr2, @-%0\n\t"
  87                       "fmov.s   fr1, @-%0\n\t"
  88                       "fmov.s   fr0, @-%0\n\t"
  89                       "lds      %3, fpscr\n\t":"=r" (dummy)
  90                       :"0"((char *)(&tsk->thread.fpu.hard.status)),
  91                       "r"(FPSCR_RCHG), "r"(FPSCR_INIT)
  92                       :"memory");
  93
  94         disable_fpu();
  95         release_fpu(regs);
  96 }
  97
  98 static void restore_fpu(struct task_struct *tsk)
  99 {
 100         unsigned long dummy;
 101
 102         enable_fpu();
 103         asm volatile ("lds      %2, fpscr\n\t"
 104                       "fmov.s   @%0+, fr0\n\t"
 105                       "fmov.s   @%0+, fr1\n\t"
 106                       "fmov.s   @%0+, fr2\n\t"
 107                       "fmov.s   @%0+, fr3\n\t"
 108                       "fmov.s   @%0+, fr4\n\t"
 109                       "fmov.s   @%0+, fr5\n\t"
 110                       "fmov.s   @%0+, fr6\n\t"
 111                       "fmov.s   @%0+, fr7\n\t"
 112                       "fmov.s   @%0+, fr8\n\t"
 113                       "fmov.s   @%0+, fr9\n\t"
 114                       "fmov.s   @%0+, fr10\n\t"
 115                       "fmov.s   @%0+, fr11\n\t"
 116                       "fmov.s   @%0+, fr12\n\t"
 117                       "fmov.s   @%0+, fr13\n\t"
 118                       "fmov.s   @%0+, fr14\n\t"
 119                       "fmov.s   @%0+, fr15\n\t"
 120                       "frchg\n\t"
 121                       "fmov.s   @%0+, fr0\n\t"
 122                       "fmov.s   @%0+, fr1\n\t"
 123                       "fmov.s   @%0+, fr2\n\t"
 124                       "fmov.s   @%0+, fr3\n\t"
 125                       "fmov.s   @%0+, fr4\n\t"
 126                       "fmov.s   @%0+, fr5\n\t"
 127                       "fmov.s   @%0+, fr6\n\t"
 128                       "fmov.s   @%0+, fr7\n\t"
 129                       "fmov.s   @%0+, fr8\n\t"
 130                       "fmov.s   @%0+, fr9\n\t"
 131                       "fmov.s   @%0+, fr10\n\t"
 132                       "fmov.s   @%0+, fr11\n\t"
 133                       "fmov.s   @%0+, fr12\n\t"
 134                       "fmov.s   @%0+, fr13\n\t"
 135                       "fmov.s   @%0+, fr14\n\t"
 136                       "fmov.s   @%0+, fr15\n\t"
 137                       "frchg\n\t"
 138                       "lds.l    @%0+, fpscr\n\t"
 139                       "lds.l    @%0+, fpul\n\t"
 140                       :"=r" (dummy)
 141                       :"0"(&tsk->thread.fpu), "r"(FPSCR_RCHG)
 142                       :"memory");
 143         disable_fpu();
 144 }
 145
 146 /*
 147  * Load the FPU with signalling NANS.  This bit pattern we're using
 148  * has the property that no matter wether considered as single or as
 149  * double precision represents signaling NANS.
 150  */
 151
 152 static void fpu_init(void)
 153 {
 154         enable_fpu();
 155         asm volatile (  "lds    %0, fpul\n\t"
 156                         "lds    %1, fpscr\n\t"
 157                         "fsts   fpul, fr0\n\t"
 158                         "fsts   fpul, fr1\n\t"
 159                         "fsts   fpul, fr2\n\t"
 160                         "fsts   fpul, fr3\n\t"
 161                         "fsts   fpul, fr4\n\t"
 162                         "fsts   fpul, fr5\n\t"
 163                         "fsts   fpul, fr6\n\t"
 164                         "fsts   fpul, fr7\n\t"
 165                         "fsts   fpul, fr8\n\t"
 166                         "fsts   fpul, fr9\n\t"
 167                         "fsts   fpul, fr10\n\t"
 168                         "fsts   fpul, fr11\n\t"
 169                         "fsts   fpul, fr12\n\t"
 170                         "fsts   fpul, fr13\n\t"
 171                         "fsts   fpul, fr14\n\t"
 172                         "fsts   fpul, fr15\n\t"
 173                         "frchg\n\t"
 174                         "fsts   fpul, fr0\n\t"
 175                         "fsts   fpul, fr1\n\t"
 176                         "fsts   fpul, fr2\n\t"
 177                         "fsts   fpul, fr3\n\t"
 178                         "fsts   fpul, fr4\n\t"
 179                         "fsts   fpul, fr5\n\t"
 180                         "fsts   fpul, fr6\n\t"
 181                         "fsts   fpul, fr7\n\t"
 182                         "fsts   fpul, fr8\n\t"
 183                         "fsts   fpul, fr9\n\t"
 184                         "fsts   fpul, fr10\n\t"
 185                         "fsts   fpul, fr11\n\t"
 186                         "fsts   fpul, fr12\n\t"
 187                         "fsts   fpul, fr13\n\t"
 188                         "fsts   fpul, fr14\n\t"
 189                         "fsts   fpul, fr15\n\t"
 190                         "frchg\n\t"
 191                         "lds    %2, fpscr\n\t"
 192                         :       /* no output */
 193                         :"r" (0), "r"(FPSCR_RCHG), "r"(FPSCR_INIT));
 194         disable_fpu();
 195 }
 196
 197 /**
 198  *      denormal_to_double - Given denormalized float number,
 199  *                           store double float
 200  *
 201  *      @fpu: Pointer to sh_fpu_hard structure
 202  *      @n: Index to FP register
 203  */
 204 static void denormal_to_double(struct sh_fpu_hard_struct *fpu, int n)
 205 {
 206         unsigned long du, dl;
 207         unsigned long x = fpu->fpul;
 208         int exp = 1023 - 126;
 209
 210         if (x != 0 && (x & 0x7f800000) == 0) {
 211                 du = (x & 0x80000000);
 212                 while ((x & 0x00800000) == 0) {
 213                         x <<= 1;
 214                         exp--;
 215                 }
 216                 x &= 0x007fffff;
 217                 du |= (exp << 20) | (x >> 3);
 218                 dl = x << 29;
 219
 220                 fpu->fp_regs[n] = du;
 221                 fpu->fp_regs[n + 1] = dl;
 222         }
 223 }
 224
 225 /**
 226  *      ieee_fpe_handler - Handle denormalized number exception
 227  *
 228  *      @regs: Pointer to register structure
 229  *
 230  *      Returns 1 when it's handled (should not cause exception).
 231  */
 232 static int ieee_fpe_handler(struct pt_regs *regs)
 233 {
 234         unsigned short insn = *(unsigned short *)regs->pc;
 235         unsigned short finsn;
 236         unsigned long nextpc;
 237         int nib[4] = {
 238                 (insn >> 12) & 0xf,
 239                 (insn >> 8) & 0xf,
 240                 (insn >> 4) & 0xf,
 241                 insn & 0xf
 242         };
 243
 244         if (nib[0] == 0xb || (nib[0] == 0x4 && nib[2] == 0x0 && nib[3] == 0xb))
 245                 regs->pr = regs->pc + 4;  /* bsr & jsr */
 246
 247         if (nib[0] == 0xa || nib[0] == 0xb) {
 248                 /* bra & bsr */
 249                 nextpc = regs->pc + 4 + ((short)((insn & 0xfff) << 4) >> 3);
 250                 finsn = *(unsigned short *)(regs->pc + 2);
 251         } else if (nib[0] == 0x8 && nib[1] == 0xd) {
 252                 /* bt/s */
 253                 if (regs->sr & 1)
 254                         nextpc = regs->pc + 4 + ((char)(insn & 0xff) << 1);
 255                 else
 256                         nextpc = regs->pc + 4;
 257                 finsn = *(unsigned short *)(regs->pc + 2);
 258         } else if (nib[0] == 0x8 && nib[1] == 0xf) {
 259                 /* bf/s */
 260                 if (regs->sr & 1)
 261                         nextpc = regs->pc + 4;
 262                 else
 263                         nextpc = regs->pc + 4 + ((char)(insn & 0xff) << 1);
 264                 finsn = *(unsigned short *)(regs->pc + 2);
 265         } else if (nib[0] == 0x4 && nib[3] == 0xb &&
 266                    (nib[2] == 0x0 || nib[2] == 0x2)) {
 267                 /* jmp & jsr */
 268                 nextpc = regs->regs[nib[1]];
 269                 finsn = *(unsigned short *)(regs->pc + 2);
 270         } else if (nib[0] == 0x0 && nib[3] == 0x3 &&
 271                    (nib[2] == 0x0 || nib[2] == 0x2)) {
 272                 /* braf & bsrf */
 273                 nextpc = regs->pc + 4 + regs->regs[nib[1]];
 274                 finsn = *(unsigned short *)(regs->pc + 2);
 275         } else if (insn == 0x000b) {
 276                 /* rts */
 277                 nextpc = regs->pr;
 278                 finsn = *(unsigned short *)(regs->pc + 2);
 279         } else {
 280                 nextpc = regs->pc + instruction_size(insn);
 281                 finsn = insn;
 282         }
 283
 284         if ((finsn & 0xf1ff) == 0xf0ad) {
 285                 /* fcnvsd */
 286                 struct task_struct *tsk = current;
 287
 288                 save_fpu(tsk, regs);
 289                 if ((tsk->thread.fpu.hard.fpscr & FPSCR_CAUSE_ERROR))
 290                         /* FPU error */
 291                         denormal_to_double(&tsk->thread.fpu.hard,
 292                                            (finsn >> 8) & 0xf);
 293                 else
 294                         return 0;
 295
 296                 regs->pc = nextpc;
 297                 return 1;
 298         } else if ((finsn & 0xf00f) == 0xf002) {
 299                 /* fmul */
 300                 struct task_struct *tsk = current;
 301                 int fpscr;
 302                 int n, m, prec;
 303                 unsigned int hx, hy;
 304
 305                 n = (finsn >> 8) & 0xf;
 306                 m = (finsn >> 4) & 0xf;
 307                 hx = tsk->thread.fpu.hard.fp_regs[n];
 308                 hy = tsk->thread.fpu.hard.fp_regs[m];
 309                 fpscr = tsk->thread.fpu.hard.fpscr;
 310                 prec = fpscr & FPSCR_DBL_PRECISION;
 311
 312                 if ((fpscr & FPSCR_CAUSE_ERROR)
 313                     && (prec && ((hx & 0x7fffffff) < 0x00100000
 314                                  || (hy & 0x7fffffff) < 0x00100000))) {
 315                         long long llx, lly;
 316
 317                         /* FPU error because of denormal (doubles) */
 318                         llx = ((long long)hx << 32)
 319                             | tsk->thread.fpu.hard.fp_regs[n + 1];
 320                         lly = ((long long)hy << 32)
 321                             | tsk->thread.fpu.hard.fp_regs[m + 1];
 322                         llx = float64_mul(llx, lly);
 323                         tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
 324                         tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff;
 325                 } else if ((fpscr & FPSCR_CAUSE_ERROR)
 326                            && (!prec && ((hx & 0x7fffffff) < 0x00800000
 327                                          || (hy & 0x7fffffff) < 0x00800000))) {
 328                         /* FPU error because of denormal (floats) */
 329                         hx = float32_mul(hx, hy);
 330                         tsk->thread.fpu.hard.fp_regs[n] = hx;
 331                 } else
 332                         return 0;
 333
 334                 regs->pc = nextpc;
 335                 return 1;
 336         } else if ((finsn & 0xf00e) == 0xf000) {
 337                 /* fadd, fsub */
 338                 struct task_struct *tsk = current;
 339                 int fpscr;
 340                 int n, m, prec;
 341                 unsigned int hx, hy;
 342
 343                 n = (finsn >> 8) & 0xf;
 344                 m = (finsn >> 4) & 0xf;
 345                 hx = tsk->thread.fpu.hard.fp_regs[n];
 346                 hy = tsk->thread.fpu.hard.fp_regs[m];
 347                 fpscr = tsk->thread.fpu.hard.fpscr;
 348                 prec = fpscr & FPSCR_DBL_PRECISION;
 349
 350                 if ((fpscr & FPSCR_CAUSE_ERROR)
 351                     && (prec && ((hx & 0x7fffffff) < 0x00100000
 352                                  || (hy & 0x7fffffff) < 0x00100000))) {
 353                         long long llx, lly;
 354
 355                         /* FPU error because of denormal (doubles) */
 356                         llx = ((long long)hx << 32)
 357                             | tsk->thread.fpu.hard.fp_regs[n + 1];
 358                         lly = ((long long)hy << 32)
 359                             | tsk->thread.fpu.hard.fp_regs[m + 1];
 360                         if ((finsn & 0xf00f) == 0xf000)
 361                                 llx = float64_add(llx, lly);
 362                         else
 363                                 llx = float64_sub(llx, lly);
 364                         tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
 365                         tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff;
 366                 } else if ((fpscr & FPSCR_CAUSE_ERROR)
 367                            && (!prec && ((hx & 0x7fffffff) < 0x00800000
 368                                          || (hy & 0x7fffffff) < 0x00800000))) {
 369                         /* FPU error because of denormal (floats) */
 370                         if ((finsn & 0xf00f) == 0xf000)
 371                                 hx = float32_add(hx, hy);
 372                         else
 373                                 hx = float32_sub(hx, hy);
 374                         tsk->thread.fpu.hard.fp_regs[n] = hx;
 375                 } else
 376                         return 0;
 377
 378                 regs->pc = nextpc;
 379                 return 1;
 380         } else if ((finsn & 0xf003) == 0xf003) {
 381                 /* fdiv */
 382                 struct task_struct *tsk = current;
 383                 int fpscr;
 384                 int n, m, prec;
 385                 unsigned int hx, hy;
 386
 387                 n = (finsn >> 8) & 0xf;
 388                 m = (finsn >> 4) & 0xf;
 389                 hx = tsk->thread.fpu.hard.fp_regs[n];
 390                 hy = tsk->thread.fpu.hard.fp_regs[m];
 391                 fpscr = tsk->thread.fpu.hard.fpscr;
 392                 prec = fpscr & FPSCR_DBL_PRECISION;
 393
 394                 if ((fpscr & FPSCR_CAUSE_ERROR)
 395                     && (prec && ((hx & 0x7fffffff) < 0x00100000
 396                                  || (hy & 0x7fffffff) < 0x00100000))) {
 397                         long long llx, lly;
 398
 399                         /* FPU error because of denormal (doubles) */
 400                         llx = ((long long)hx << 32)
 401                             | tsk->thread.fpu.hard.fp_regs[n + 1];
 402                         lly = ((long long)hy << 32)
 403                             | tsk->thread.fpu.hard.fp_regs[m + 1];
 404
 405                         llx = float64_div(llx, lly);
 406
 407                         tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
 408                         tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff;
 409                 } else if ((fpscr & FPSCR_CAUSE_ERROR)
 410                            && (!prec && ((hx & 0x7fffffff) < 0x00800000
 411                                          || (hy & 0x7fffffff) < 0x00800000))) {
 412                         /* FPU error because of denormal (floats) */
 413                         hx = float32_div(hx, hy);
 414                         tsk->thread.fpu.hard.fp_regs[n] = hx;
 415                 } else
 416                         return 0;
 417
 418                 regs->pc = nextpc;
 419                 return 1;
 420         }
 421
 422         return 0;
 423 }
 424
 425 void float_raise(unsigned int flags)
 426 {
 427         fpu_exception_flags |= flags;
 428 }
 429
 430 int float_rounding_mode(void)
 431 {
 432         struct task_struct *tsk = current;
 433         int roundingMode = FPSCR_ROUNDING_MODE(tsk->thread.fpu.hard.fpscr);
 434         return roundingMode;
 435 }
 436
 437 BUILD_TRAP_HANDLER(fpu_error)
 438 {
 439         struct task_struct *tsk = current;
 440         TRAP_HANDLER_DECL;
 441
 442         save_fpu(tsk, regs);
 443         fpu_exception_flags = 0;
 444         if (ieee_fpe_handler(regs)) {
 445                 tsk->thread.fpu.hard.fpscr &=
 446                     ~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK);
 447                 tsk->thread.fpu.hard.fpscr |= fpu_exception_flags;
 448                 /* Set the FPSCR flag as well as cause bits - simply
 449                  * replicate the cause */
 450                 tsk->thread.fpu.hard.fpscr |= (fpu_exception_flags >> 10);
 451                 grab_fpu(regs);
 452                 restore_fpu(tsk);
 453                 set_tsk_thread_flag(tsk, TIF_USEDFPU);
 454                 if ((((tsk->thread.fpu.hard.fpscr & FPSCR_ENABLE_MASK) >> 7) &
 455                      (fpu_exception_flags >> 2)) == 0) {
 456                         return;
 457                 }
 458         }
 459
 460         force_sig(SIGFPE, tsk);
 461 }
 462
 463 BUILD_TRAP_HANDLER(fpu_state_restore)
 464 {
 465         struct task_struct *tsk = current;
 466         TRAP_HANDLER_DECL;
 467
 468         grab_fpu(regs);
 469         if (!user_mode(regs)) {
 470                 printk(KERN_ERR "BUG: FPU is used in kernel mode.\n");
 471                 return;
 472         }
 473
 474         if (used_math()) {
 475                 /* Using the FPU again.  */
 476                 restore_fpu(tsk);
 477         } else {
 478                 /* First time FPU user.  */
 479                 fpu_init();
 480                 set_used_math();
 481         }
 482         set_tsk_thread_flag(tsk, TIF_USEDFPU);
 483 }