hw/misc/milkymist-pfpu.c

   1 /*
   2  *  QEMU model of the Milkymist programmable FPU.
   3  *
   4  *  Copyright (c) 2010 Michael Walle <michael@walle.cc>
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  *
  19  *
  20  * Specification available at:
  21  *   http://milkymist.walle.cc/socdoc/pfpu.pdf
  22  *
  23  */
  24
  25 #include "qemu/osdep.h"
  26 #include "hw/irq.h"
  27 #include "hw/sysbus.h"
  28 #include "migration/vmstate.h"
  29 #include "trace.h"
  30 #include "qemu/log.h"
  31 #include "qemu/module.h"
  32 #include "qemu/error-report.h"
  33 #include <math.h>
  34 #include "qom/object.h"
  35
  36 /* #define TRACE_EXEC */
  37
  38 #ifdef TRACE_EXEC
  39 #    define D_EXEC(x) x
  40 #else
  41 #    define D_EXEC(x)
  42 #endif
  43
  44 enum {
  45     R_CTL = 0,
  46     R_MESHBASE,
  47     R_HMESHLAST,
  48     R_VMESHLAST,
  49     R_CODEPAGE,
  50     R_VERTICES,
  51     R_COLLISIONS,
  52     R_STRAYWRITES,
  53     R_LASTDMA,
  54     R_PC,
  55     R_DREGBASE,
  56     R_CODEBASE,
  57     R_MAX
  58 };
  59
  60 enum {
  61     CTL_START_BUSY = (1<<0),
  62 };
  63
  64 enum {
  65     OP_NOP = 0,
  66     OP_FADD,
  67     OP_FSUB,
  68     OP_FMUL,
  69     OP_FABS,
  70     OP_F2I,
  71     OP_I2F,
  72     OP_VECTOUT,
  73     OP_SIN,
  74     OP_COS,
  75     OP_ABOVE,
  76     OP_EQUAL,
  77     OP_COPY,
  78     OP_IF,
  79     OP_TSIGN,
  80     OP_QUAKE,
  81 };
  82
  83 enum {
  84     GPR_X = 0,
  85     GPR_Y = 1,
  86     GPR_FLAGS = 2,
  87 };
  88
  89 enum {
  90     LATENCY_FADD = 5,
  91     LATENCY_FSUB = 5,
  92     LATENCY_FMUL = 7,
  93     LATENCY_FABS = 2,
  94     LATENCY_F2I = 2,
  95     LATENCY_I2F = 3,
  96     LATENCY_VECTOUT = 0,
  97     LATENCY_SIN = 4,
  98     LATENCY_COS = 4,
  99     LATENCY_ABOVE = 2,
 100     LATENCY_EQUAL = 2,
 101     LATENCY_COPY = 2,
 102     LATENCY_IF = 2,
 103     LATENCY_TSIGN = 2,
 104     LATENCY_QUAKE = 2,
 105     MAX_LATENCY = 7
 106 };
 107
 108 #define GPR_BEGIN       0x100
 109 #define GPR_END         0x17f
 110 #define MICROCODE_BEGIN 0x200
 111 #define MICROCODE_END   0x3ff
 112 #define MICROCODE_WORDS 2048
 113
 114 #define REINTERPRET_CAST(type, val) (*((type *)&(val)))
 115
 116 #ifdef TRACE_EXEC
 117 static const char *opcode_to_str[] = {
 118     "NOP", "FADD", "FSUB", "FMUL", "FABS", "F2I", "I2F", "VECTOUT",
 119     "SIN", "COS", "ABOVE", "EQUAL", "COPY", "IF", "TSIGN", "QUAKE",
 120 };
 121 #endif
 122
 123 #define TYPE_MILKYMIST_PFPU "milkymist-pfpu"
 124 typedef struct MilkymistPFPUState MilkymistPFPUState;
 125 DECLARE_INSTANCE_CHECKER(MilkymistPFPUState, MILKYMIST_PFPU,
 126                          TYPE_MILKYMIST_PFPU)
 127
 128 struct MilkymistPFPUState {
 129     SysBusDevice parent_obj;
 130
 131     MemoryRegion regs_region;
 132     Chardev *chr;
 133     qemu_irq irq;
 134
 135     uint32_t regs[R_MAX];
 136     uint32_t gp_regs[128];
 137     uint32_t microcode[MICROCODE_WORDS];
 138
 139     int output_queue_pos;
 140     uint32_t output_queue[MAX_LATENCY];
 141 };
 142
 143 static inline uint32_t
 144 get_dma_address(uint32_t base, uint32_t x, uint32_t y)
 145 {
 146     return base + 8 * (128 * y + x);
 147 }
 148
 149 static inline void
 150 output_queue_insert(MilkymistPFPUState *s, uint32_t val, int pos)
 151 {
 152     s->output_queue[(s->output_queue_pos + pos) % MAX_LATENCY] = val;
 153 }
 154
 155 static inline uint32_t
 156 output_queue_remove(MilkymistPFPUState *s)
 157 {
 158     return s->output_queue[s->output_queue_pos];
 159 }
 160
 161 static inline void
 162 output_queue_advance(MilkymistPFPUState *s)
 163 {
 164     s->output_queue[s->output_queue_pos] = 0;
 165     s->output_queue_pos = (s->output_queue_pos + 1) % MAX_LATENCY;
 166 }
 167
 168 static int pfpu_decode_insn(MilkymistPFPUState *s)
 169 {
 170     uint32_t pc = s->regs[R_PC];
 171     uint32_t insn = s->microcode[pc];
 172     uint32_t reg_a = (insn >> 18) & 0x7f;
 173     uint32_t reg_b = (insn >> 11) & 0x7f;
 174     uint32_t op = (insn >> 7) & 0xf;
 175     uint32_t reg_d = insn & 0x7f;
 176     uint32_t r = 0;
 177     int latency = 0;
 178
 179     switch (op) {
 180     case OP_NOP:
 181         break;
 182     case OP_FADD:
 183     {
 184         float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 185         float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
 186         float t = a + b;
 187         r = REINTERPRET_CAST(uint32_t, t);
 188         latency = LATENCY_FADD;
 189         D_EXEC(qemu_log("ADD a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
 190     } break;
 191     case OP_FSUB:
 192     {
 193         float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 194         float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
 195         float t = a - b;
 196         r = REINTERPRET_CAST(uint32_t, t);
 197         latency = LATENCY_FSUB;
 198         D_EXEC(qemu_log("SUB a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
 199     } break;
 200     case OP_FMUL:
 201     {
 202         float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 203         float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
 204         float t = a * b;
 205         r = REINTERPRET_CAST(uint32_t, t);
 206         latency = LATENCY_FMUL;
 207         D_EXEC(qemu_log("MUL a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
 208     } break;
 209     case OP_FABS:
 210     {
 211         float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 212         float t = fabsf(a);
 213         r = REINTERPRET_CAST(uint32_t, t);
 214         latency = LATENCY_FABS;
 215         D_EXEC(qemu_log("ABS a=%f t=%f, r=%08x\n", a, t, r));
 216     } break;
 217     case OP_F2I:
 218     {
 219         float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 220         int32_t t = a;
 221         r = REINTERPRET_CAST(uint32_t, t);
 222         latency = LATENCY_F2I;
 223         D_EXEC(qemu_log("F2I a=%f t=%d, r=%08x\n", a, t, r));
 224     } break;
 225     case OP_I2F:
 226     {
 227         int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]);
 228         float t = a;
 229         r = REINTERPRET_CAST(uint32_t, t);
 230         latency = LATENCY_I2F;
 231         D_EXEC(qemu_log("I2F a=%08x t=%f, r=%08x\n", a, t, r));
 232     } break;
 233     case OP_VECTOUT:
 234     {
 235         uint32_t a = cpu_to_be32(s->gp_regs[reg_a]);
 236         uint32_t b = cpu_to_be32(s->gp_regs[reg_b]);
 237         hwaddr dma_ptr =
 238             get_dma_address(s->regs[R_MESHBASE],
 239                     s->gp_regs[GPR_X], s->gp_regs[GPR_Y]);
 240         cpu_physical_memory_write(dma_ptr, &a, 4);
 241         cpu_physical_memory_write(dma_ptr + 4, &b, 4);
 242         s->regs[R_LASTDMA] = dma_ptr + 4;
 243         D_EXEC(qemu_log("VECTOUT a=%08x b=%08x dma=%08x\n", a, b, dma_ptr));
 244         trace_milkymist_pfpu_vectout(a, b, dma_ptr);
 245     } break;
 246     case OP_SIN:
 247     {
 248         int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]);
 249         float t = sinf(a * (1.0f / (M_PI * 4096.0f)));
 250         r = REINTERPRET_CAST(uint32_t, t);
 251         latency = LATENCY_SIN;
 252         D_EXEC(qemu_log("SIN a=%d t=%f, r=%08x\n", a, t, r));
 253     } break;
 254     case OP_COS:
 255     {
 256         int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]);
 257         float t = cosf(a * (1.0f / (M_PI * 4096.0f)));
 258         r = REINTERPRET_CAST(uint32_t, t);
 259         latency = LATENCY_COS;
 260         D_EXEC(qemu_log("COS a=%d t=%f, r=%08x\n", a, t, r));
 261     } break;
 262     case OP_ABOVE:
 263     {
 264         float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 265         float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
 266         float t = (a > b) ? 1.0f : 0.0f;
 267         r = REINTERPRET_CAST(uint32_t, t);
 268         latency = LATENCY_ABOVE;
 269         D_EXEC(qemu_log("ABOVE a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
 270     } break;
 271     case OP_EQUAL:
 272     {
 273         float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 274         float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
 275         float t = (a == b) ? 1.0f : 0.0f;
 276         r = REINTERPRET_CAST(uint32_t, t);
 277         latency = LATENCY_EQUAL;
 278         D_EXEC(qemu_log("EQUAL a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
 279     } break;
 280     case OP_COPY:
 281     {
 282         r = s->gp_regs[reg_a];
 283         latency = LATENCY_COPY;
 284         D_EXEC(qemu_log("COPY"));
 285     } break;
 286     case OP_IF:
 287     {
 288         float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 289         float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
 290         uint32_t f = s->gp_regs[GPR_FLAGS];
 291         float t = (f != 0) ? a : b;
 292         r = REINTERPRET_CAST(uint32_t, t);
 293         latency = LATENCY_IF;
 294         D_EXEC(qemu_log("IF f=%u a=%f b=%f t=%f, r=%08x\n", f, a, b, t, r));
 295     } break;
 296     case OP_TSIGN:
 297     {
 298         float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 299         float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
 300         float t = (b < 0) ? -a : a;
 301         r = REINTERPRET_CAST(uint32_t, t);
 302         latency = LATENCY_TSIGN;
 303         D_EXEC(qemu_log("TSIGN a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
 304     } break;
 305     case OP_QUAKE:
 306     {
 307         uint32_t a = s->gp_regs[reg_a];
 308         r = 0x5f3759df - (a >> 1);
 309         latency = LATENCY_QUAKE;
 310         D_EXEC(qemu_log("QUAKE a=%d r=%08x\n", a, r));
 311     } break;
 312
 313     default:
 314         error_report("milkymist_pfpu: unknown opcode %d", op);
 315         break;
 316     }
 317
 318     if (!reg_d) {
 319         D_EXEC(qemu_log("%04d %8s R%03d, R%03d <L=%d, E=%04d>\n",
 320                     s->regs[R_PC], opcode_to_str[op], reg_a, reg_b, latency,
 321                     s->regs[R_PC] + latency));
 322     } else {
 323         D_EXEC(qemu_log("%04d %8s R%03d, R%03d <L=%d, E=%04d> -> R%03d\n",
 324                     s->regs[R_PC], opcode_to_str[op], reg_a, reg_b, latency,
 325                     s->regs[R_PC] + latency, reg_d));
 326     }
 327
 328     if (op == OP_VECTOUT) {
 329         return 0;
 330     }
 331
 332     /* store output for this cycle */
 333     if (reg_d) {
 334         uint32_t val = output_queue_remove(s);
 335         D_EXEC(qemu_log("R%03d <- 0x%08x\n", reg_d, val));
 336         s->gp_regs[reg_d] = val;
 337     }
 338
 339     output_queue_advance(s);
 340
 341     /* store op output */
 342     if (op != OP_NOP) {
 343         output_queue_insert(s, r, latency-1);
 344     }
 345
 346     /* advance PC */
 347     s->regs[R_PC]++;
 348
 349     return 1;
 350 };
 351
 352 static void pfpu_start(MilkymistPFPUState *s)
 353 {
 354     int x, y;
 355     int i;
 356
 357     for (y = 0; y <= s->regs[R_VMESHLAST]; y++) {
 358         for (x = 0; x <= s->regs[R_HMESHLAST]; x++) {
 359             D_EXEC(qemu_log("\nprocessing x=%d y=%d\n", x, y));
 360
 361             /* set current position */
 362             s->gp_regs[GPR_X] = x;
 363             s->gp_regs[GPR_Y] = y;
 364
 365             /* run microcode on this position */
 366             i = 0;
 367             while (pfpu_decode_insn(s)) {
 368                 /* decode at most MICROCODE_WORDS instructions */
 369                 if (++i >= MICROCODE_WORDS) {
 370                     error_report("milkymist_pfpu: too many instructions "
 371                             "executed in microcode. No VECTOUT?");
 372                     break;
 373                 }
 374             }
 375
 376             /* reset pc for next run */
 377             s->regs[R_PC] = 0;
 378         }
 379     }
 380
 381     s->regs[R_VERTICES] = x * y;
 382
 383     trace_milkymist_pfpu_pulse_irq();
 384     qemu_irq_pulse(s->irq);
 385 }
 386
 387 static inline int get_microcode_address(MilkymistPFPUState *s, uint32_t addr)
 388 {
 389     return (512 * s->regs[R_CODEPAGE]) + addr - MICROCODE_BEGIN;
 390 }
 391
 392 static uint64_t pfpu_read(void *opaque, hwaddr addr,
 393                           unsigned size)
 394 {
 395     MilkymistPFPUState *s = opaque;
 396     uint32_t r = 0;
 397
 398     addr >>= 2;
 399     switch (addr) {
 400     case R_CTL:
 401     case R_MESHBASE:
 402     case R_HMESHLAST:
 403     case R_VMESHLAST:
 404     case R_CODEPAGE:
 405     case R_VERTICES:
 406     case R_COLLISIONS:
 407     case R_STRAYWRITES:
 408     case R_LASTDMA:
 409     case R_PC:
 410     case R_DREGBASE:
 411     case R_CODEBASE:
 412         r = s->regs[addr];
 413         break;
 414     case GPR_BEGIN ... GPR_END:
 415         r = s->gp_regs[addr - GPR_BEGIN];
 416         break;
 417     case MICROCODE_BEGIN ...  MICROCODE_END:
 418         r = s->microcode[get_microcode_address(s, addr)];
 419         break;
 420
 421     default:
 422         error_report("milkymist_pfpu: read access to unknown register 0x"
 423                 TARGET_FMT_plx, addr << 2);
 424         break;
 425     }
 426
 427     trace_milkymist_pfpu_memory_read(addr << 2, r);
 428
 429     return r;
 430 }
 431
 432 static void pfpu_write(void *opaque, hwaddr addr, uint64_t value,
 433                        unsigned size)
 434 {
 435     MilkymistPFPUState *s = opaque;
 436
 437     trace_milkymist_pfpu_memory_write(addr, value);
 438
 439     addr >>= 2;
 440     switch (addr) {
 441     case R_CTL:
 442         if (value & CTL_START_BUSY) {
 443             pfpu_start(s);
 444         }
 445         break;
 446     case R_MESHBASE:
 447     case R_HMESHLAST:
 448     case R_VMESHLAST:
 449     case R_CODEPAGE:
 450     case R_VERTICES:
 451     case R_COLLISIONS:
 452     case R_STRAYWRITES:
 453     case R_LASTDMA:
 454     case R_PC:
 455     case R_DREGBASE:
 456     case R_CODEBASE:
 457         s->regs[addr] = value;
 458         break;
 459     case GPR_BEGIN ...  GPR_END:
 460         s->gp_regs[addr - GPR_BEGIN] = value;
 461         break;
 462     case MICROCODE_BEGIN ...  MICROCODE_END:
 463         s->microcode[get_microcode_address(s, addr)] = value;
 464         break;
 465
 466     default:
 467         error_report("milkymist_pfpu: write access to unknown register 0x"
 468                 TARGET_FMT_plx, addr << 2);
 469         break;
 470     }
 471 }
 472
 473 static const MemoryRegionOps pfpu_mmio_ops = {
 474     .read = pfpu_read,
 475     .write = pfpu_write,
 476     .valid = {
 477         .min_access_size = 4,
 478         .max_access_size = 4,
 479     },
 480     .endianness = DEVICE_NATIVE_ENDIAN,
 481 };
 482
 483 static void milkymist_pfpu_reset(DeviceState *d)
 484 {
 485     MilkymistPFPUState *s = MILKYMIST_PFPU(d);
 486     int i;
 487
 488     for (i = 0; i < R_MAX; i++) {
 489         s->regs[i] = 0;
 490     }
 491     for (i = 0; i < 128; i++) {
 492         s->gp_regs[i] = 0;
 493     }
 494     for (i = 0; i < MICROCODE_WORDS; i++) {
 495         s->microcode[i] = 0;
 496     }
 497     s->output_queue_pos = 0;
 498     for (i = 0; i < MAX_LATENCY; i++) {
 499         s->output_queue[i] = 0;
 500     }
 501 }
 502
 503 static void milkymist_pfpu_realize(DeviceState *dev, Error **errp)
 504 {
 505     MilkymistPFPUState *s = MILKYMIST_PFPU(dev);
 506     SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
 507
 508     sysbus_init_irq(sbd, &s->irq);
 509
 510     memory_region_init_io(&s->regs_region, OBJECT(dev), &pfpu_mmio_ops, s,
 511             "milkymist-pfpu", MICROCODE_END * 4);
 512     sysbus_init_mmio(sbd, &s->regs_region);
 513 }
 514
 515 static const VMStateDescription vmstate_milkymist_pfpu = {
 516     .name = "milkymist-pfpu",
 517     .version_id = 1,
 518     .minimum_version_id = 1,
 519     .fields = (VMStateField[]) {
 520         VMSTATE_UINT32_ARRAY(regs, MilkymistPFPUState, R_MAX),
 521         VMSTATE_UINT32_ARRAY(gp_regs, MilkymistPFPUState, 128),
 522         VMSTATE_UINT32_ARRAY(microcode, MilkymistPFPUState, MICROCODE_WORDS),
 523         VMSTATE_INT32(output_queue_pos, MilkymistPFPUState),
 524         VMSTATE_UINT32_ARRAY(output_queue, MilkymistPFPUState, MAX_LATENCY),
 525         VMSTATE_END_OF_LIST()
 526     }
 527 };
 528
 529 static void milkymist_pfpu_class_init(ObjectClass *klass, void *data)
 530 {
 531     DeviceClass *dc = DEVICE_CLASS(klass);
 532
 533     dc->realize = milkymist_pfpu_realize;
 534     dc->reset = milkymist_pfpu_reset;
 535     dc->vmsd = &vmstate_milkymist_pfpu;
 536 }
 537
 538 static const TypeInfo milkymist_pfpu_info = {
 539     .name          = TYPE_MILKYMIST_PFPU,
 540     .parent        = TYPE_SYS_BUS_DEVICE,
 541     .instance_size = sizeof(MilkymistPFPUState),
 542     .class_init    = milkymist_pfpu_class_init,
 543 };
 544
 545 static void milkymist_pfpu_register_types(void)
 546 {
 547     type_register_static(&milkymist_pfpu_info);
 548 }
 549
 550 type_init(milkymist_pfpu_register_types)