hw/milkymist-pfpu.c

   1 /*
   2  *  QEMU model of the Milkymist programmable FPU.
   3  *
   4  *  Copyright (c) 2010 Michael Walle <michael@walle.cc>
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  *
  19  *
  20  * Specification available at:
  21  *   http://www.milkymist.org/socdoc/pfpu.pdf
  22  *
  23  */
  24
  25 #include "hw.h"
  26 #include "sysbus.h"
  27 #include "trace.h"
  28 #include "qemu-log.h"
  29 #include "qemu-error.h"
  30 #include <math.h>
  31
  32 /* #define TRACE_EXEC */
  33
  34 #ifdef TRACE_EXEC
  35 #    define D_EXEC(x) x
  36 #else
  37 #    define D_EXEC(x)
  38 #endif
  39
  40 enum {
  41     R_CTL = 0,
  42     R_MESHBASE,
  43     R_HMESHLAST,
  44     R_VMESHLAST,
  45     R_CODEPAGE,
  46     R_VERTICES,
  47     R_COLLISIONS,
  48     R_STRAYWRITES,
  49     R_LASTDMA,
  50     R_PC,
  51     R_DREGBASE,
  52     R_CODEBASE,
  53     R_MAX
  54 };
  55
  56 enum {
  57     CTL_START_BUSY = (1<<0),
  58 };
  59
  60 enum {
  61     OP_NOP = 0,
  62     OP_FADD,
  63     OP_FSUB,
  64     OP_FMUL,
  65     OP_FABS,
  66     OP_F2I,
  67     OP_I2F,
  68     OP_VECTOUT,
  69     OP_SIN,
  70     OP_COS,
  71     OP_ABOVE,
  72     OP_EQUAL,
  73     OP_COPY,
  74     OP_IF,
  75     OP_TSIGN,
  76     OP_QUAKE,
  77 };
  78
  79 enum {
  80     GPR_X = 0,
  81     GPR_Y = 1,
  82     GPR_FLAGS = 2,
  83 };
  84
  85 enum {
  86     LATENCY_FADD = 5,
  87     LATENCY_FSUB = 5,
  88     LATENCY_FMUL = 7,
  89     LATENCY_FABS = 2,
  90     LATENCY_F2I = 2,
  91     LATENCY_I2F = 3,
  92     LATENCY_VECTOUT = 0,
  93     LATENCY_SIN = 4,
  94     LATENCY_COS = 4,
  95     LATENCY_ABOVE = 2,
  96     LATENCY_EQUAL = 2,
  97     LATENCY_COPY = 2,
  98     LATENCY_IF = 2,
  99     LATENCY_TSIGN = 2,
 100     LATENCY_QUAKE = 2,
 101     MAX_LATENCY = 7
 102 };
 103
 104 #define GPR_BEGIN       0x100
 105 #define GPR_END         0x17f
 106 #define MICROCODE_BEGIN 0x200
 107 #define MICROCODE_END   0x3ff
 108 #define MICROCODE_WORDS 2048
 109
 110 #define REINTERPRET_CAST(type, val) (*((type *)&(val)))
 111
 112 #ifdef TRACE_EXEC
 113 static const char *opcode_to_str[] = {
 114     "NOP", "FADD", "FSUB", "FMUL", "FABS", "F2I", "I2F", "VECTOUT",
 115     "SIN", "COS", "ABOVE", "EQUAL", "COPY", "IF", "TSIGN", "QUAKE",
 116 };
 117 #endif
 118
 119 struct MilkymistPFPUState {
 120     SysBusDevice busdev;
 121     CharDriverState *chr;
 122     qemu_irq irq;
 123
 124     uint32_t regs[R_MAX];
 125     uint32_t gp_regs[128];
 126     uint32_t microcode[MICROCODE_WORDS];
 127
 128     int output_queue_pos;
 129     uint32_t output_queue[MAX_LATENCY];
 130 };
 131 typedef struct MilkymistPFPUState MilkymistPFPUState;
 132
 133 static inline target_phys_addr_t
 134 get_dma_address(uint32_t base, uint32_t x, uint32_t y)
 135 {
 136     return base + 8 * (128 * y + x);
 137 }
 138
 139 static inline void
 140 output_queue_insert(MilkymistPFPUState *s, uint32_t val, int pos)
 141 {
 142     s->output_queue[(s->output_queue_pos + pos) % MAX_LATENCY] = val;
 143 }
 144
 145 static inline uint32_t
 146 output_queue_remove(MilkymistPFPUState *s)
 147 {
 148     return s->output_queue[s->output_queue_pos];
 149 }
 150
 151 static inline void
 152 output_queue_advance(MilkymistPFPUState *s)
 153 {
 154     s->output_queue[s->output_queue_pos] = 0;
 155     s->output_queue_pos = (s->output_queue_pos + 1) % MAX_LATENCY;
 156 }
 157
 158 static int pfpu_decode_insn(MilkymistPFPUState *s)
 159 {
 160     uint32_t pc = s->regs[R_PC];
 161     uint32_t insn = s->microcode[pc];
 162     uint32_t reg_a = (insn >> 18) & 0x7f;
 163     uint32_t reg_b = (insn >> 11) & 0x7f;
 164     uint32_t op = (insn >> 7) & 0xf;
 165     uint32_t reg_d = insn & 0x7f;
 166     uint32_t r = 0;
 167     int latency = 0;
 168
 169     switch (op) {
 170     case OP_NOP:
 171         break;
 172     case OP_FADD:
 173     {
 174         float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 175         float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
 176         float t = a + b;
 177         r = REINTERPRET_CAST(uint32_t, t);
 178         latency = LATENCY_FADD;
 179         D_EXEC(qemu_log("ADD a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
 180     } break;
 181     case OP_FSUB:
 182     {
 183         float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 184         float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
 185         float t = a - b;
 186         r = REINTERPRET_CAST(uint32_t, t);
 187         latency = LATENCY_FSUB;
 188         D_EXEC(qemu_log("SUB a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
 189     } break;
 190     case OP_FMUL:
 191     {
 192         float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 193         float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
 194         float t = a * b;
 195         r = REINTERPRET_CAST(uint32_t, t);
 196         latency = LATENCY_FMUL;
 197         D_EXEC(qemu_log("MUL a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
 198     } break;
 199     case OP_FABS:
 200     {
 201         float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 202         float t = fabsf(a);
 203         r = REINTERPRET_CAST(uint32_t, t);
 204         latency = LATENCY_FABS;
 205         D_EXEC(qemu_log("ABS a=%f t=%f, r=%08x\n", a, t, r));
 206     } break;
 207     case OP_F2I:
 208     {
 209         float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 210         int32_t t = a;
 211         r = REINTERPRET_CAST(uint32_t, t);
 212         latency = LATENCY_F2I;
 213         D_EXEC(qemu_log("F2I a=%f t=%d, r=%08x\n", a, t, r));
 214     } break;
 215     case OP_I2F:
 216     {
 217         int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]);
 218         float t = a;
 219         r = REINTERPRET_CAST(uint32_t, t);
 220         latency = LATENCY_I2F;
 221         D_EXEC(qemu_log("I2F a=%08x t=%f, r=%08x\n", a, t, r));
 222     } break;
 223     case OP_VECTOUT:
 224     {
 225         uint32_t a = cpu_to_be32(s->gp_regs[reg_a]);
 226         uint32_t b = cpu_to_be32(s->gp_regs[reg_b]);
 227         target_phys_addr_t dma_ptr =
 228             get_dma_address(s->regs[R_MESHBASE],
 229                     s->gp_regs[GPR_X], s->gp_regs[GPR_Y]);
 230         cpu_physical_memory_write(dma_ptr, (uint8_t *)&a, 4);
 231         cpu_physical_memory_write(dma_ptr + 4, (uint8_t *)&b, 4);
 232         s->regs[R_LASTDMA] = dma_ptr + 4;
 233         D_EXEC(qemu_log("VECTOUT a=%08x b=%08x dma=%08x\n", a, b, dma_ptr));
 234         trace_milkymist_pfpu_vectout(a, b, dma_ptr);
 235     } break;
 236     case OP_SIN:
 237     {
 238         int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]);
 239         float t = sinf(a * (1.0f / (M_PI * 4096.0f)));
 240         r = REINTERPRET_CAST(uint32_t, t);
 241         latency = LATENCY_SIN;
 242         D_EXEC(qemu_log("SIN a=%d t=%f, r=%08x\n", a, t, r));
 243     } break;
 244     case OP_COS:
 245     {
 246         int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]);
 247         float t = cosf(a * (1.0f / (M_PI * 4096.0f)));
 248         r = REINTERPRET_CAST(uint32_t, t);
 249         latency = LATENCY_COS;
 250         D_EXEC(qemu_log("COS a=%d t=%f, r=%08x\n", a, t, r));
 251     } break;
 252     case OP_ABOVE:
 253     {
 254         float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 255         float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
 256         float t = (a > b) ? 1.0f : 0.0f;
 257         r = REINTERPRET_CAST(uint32_t, t);
 258         latency = LATENCY_ABOVE;
 259         D_EXEC(qemu_log("ABOVE a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
 260     } break;
 261     case OP_EQUAL:
 262     {
 263         float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 264         float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
 265         float t = (a == b) ? 1.0f : 0.0f;
 266         r = REINTERPRET_CAST(uint32_t, t);
 267         latency = LATENCY_EQUAL;
 268         D_EXEC(qemu_log("EQUAL a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
 269     } break;
 270     case OP_COPY:
 271     {
 272         r = s->gp_regs[reg_a];
 273         latency = LATENCY_COPY;
 274         D_EXEC(qemu_log("COPY"));
 275     } break;
 276     case OP_IF:
 277     {
 278         float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 279         float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
 280         uint32_t f = s->gp_regs[GPR_FLAGS];
 281         float t = (f != 0) ? a : b;
 282         r = REINTERPRET_CAST(uint32_t, t);
 283         latency = LATENCY_IF;
 284         D_EXEC(qemu_log("IF f=%u a=%f b=%f t=%f, r=%08x\n", f, a, b, t, r));
 285     } break;
 286     case OP_TSIGN:
 287     {
 288         float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
 289         float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
 290         float t = (b < 0) ? -a : a;
 291         r = REINTERPRET_CAST(uint32_t, t);
 292         latency = LATENCY_TSIGN;
 293         D_EXEC(qemu_log("TSIGN a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
 294     } break;
 295     case OP_QUAKE:
 296     {
 297         uint32_t a = s->gp_regs[reg_a];
 298         r = 0x5f3759df - (a >> 1);
 299         latency = LATENCY_QUAKE;
 300         D_EXEC(qemu_log("QUAKE a=%d r=%08x\n", a, r));
 301     } break;
 302
 303     default:
 304         error_report("milkymist_pfpu: unknown opcode %d\n", op);
 305         break;
 306     }
 307
 308     if (!reg_d) {
 309         D_EXEC(qemu_log("%04d %8s R%03d, R%03d <L=%d, E=%04d>\n",
 310                     s->regs[R_PC], opcode_to_str[op], reg_a, reg_b, latency,
 311                     s->regs[R_PC] + latency));
 312     } else {
 313         D_EXEC(qemu_log("%04d %8s R%03d, R%03d <L=%d, E=%04d> -> R%03d\n",
 314                     s->regs[R_PC], opcode_to_str[op], reg_a, reg_b, latency,
 315                     s->regs[R_PC] + latency, reg_d));
 316     }
 317
 318     if (op == OP_VECTOUT) {
 319         return 0;
 320     }
 321
 322     /* store output for this cycle */
 323     if (reg_d) {
 324         uint32_t val = output_queue_remove(s);
 325         D_EXEC(qemu_log("R%03d <- 0x%08x\n", reg_d, val));
 326         s->gp_regs[reg_d] = val;
 327     }
 328
 329     output_queue_advance(s);
 330
 331     /* store op output */
 332     if (op != OP_NOP) {
 333         output_queue_insert(s, r, latency-1);
 334     }
 335
 336     /* advance PC */
 337     s->regs[R_PC]++;
 338
 339     return 1;
 340 };
 341
 342 static void pfpu_start(MilkymistPFPUState *s)
 343 {
 344     int x, y;
 345     int i;
 346
 347     for (y = 0; y <= s->regs[R_VMESHLAST]; y++) {
 348         for (x = 0; x <= s->regs[R_HMESHLAST]; x++) {
 349             D_EXEC(qemu_log("\nprocessing x=%d y=%d\n", x, y));
 350
 351             /* set current position */
 352             s->gp_regs[GPR_X] = x;
 353             s->gp_regs[GPR_Y] = y;
 354
 355             /* run microcode on this position */
 356             i = 0;
 357             while (pfpu_decode_insn(s)) {
 358                 /* decode at most MICROCODE_WORDS instructions */
 359                 if (i++ >= MICROCODE_WORDS) {
 360                     error_report("milkymist_pfpu: too many instructions "
 361                             "executed in microcode. No VECTOUT?\n");
 362                     break;
 363                 }
 364             }
 365
 366             /* reset pc for next run */
 367             s->regs[R_PC] = 0;
 368         }
 369     }
 370
 371     s->regs[R_VERTICES] = x * y;
 372
 373     trace_milkymist_pfpu_pulse_irq();
 374     qemu_irq_pulse(s->irq);
 375 }
 376
 377 static inline int get_microcode_address(MilkymistPFPUState *s, uint32_t addr)
 378 {
 379     return (512 * s->regs[R_CODEPAGE]) + addr - MICROCODE_BEGIN;
 380 }
 381
 382 static uint32_t pfpu_read(void *opaque, target_phys_addr_t addr)
 383 {
 384     MilkymistPFPUState *s = opaque;
 385     uint32_t r = 0;
 386
 387     addr >>= 2;
 388     switch (addr) {
 389     case R_CTL:
 390     case R_MESHBASE:
 391     case R_HMESHLAST:
 392     case R_VMESHLAST:
 393     case R_CODEPAGE:
 394     case R_VERTICES:
 395     case R_COLLISIONS:
 396     case R_STRAYWRITES:
 397     case R_LASTDMA:
 398     case R_PC:
 399     case R_DREGBASE:
 400     case R_CODEBASE:
 401         r = s->regs[addr];
 402         break;
 403     case GPR_BEGIN ... GPR_END:
 404         r = s->gp_regs[addr - GPR_BEGIN];
 405         break;
 406     case MICROCODE_BEGIN ...  MICROCODE_END:
 407         r = s->microcode[get_microcode_address(s, addr)];
 408         break;
 409
 410     default:
 411         error_report("milkymist_pfpu: read access to unknown register 0x"
 412                 TARGET_FMT_plx, addr << 2);
 413         break;
 414     }
 415
 416     trace_milkymist_pfpu_memory_read(addr << 2, r);
 417
 418     return r;
 419 }
 420
 421 static void
 422 pfpu_write(void *opaque, target_phys_addr_t addr, uint32_t value)
 423 {
 424     MilkymistPFPUState *s = opaque;
 425
 426     trace_milkymist_pfpu_memory_write(addr, value);
 427
 428     addr >>= 2;
 429     switch (addr) {
 430     case R_CTL:
 431         if (value & CTL_START_BUSY) {
 432             pfpu_start(s);
 433         }
 434         break;
 435     case R_MESHBASE:
 436     case R_HMESHLAST:
 437     case R_VMESHLAST:
 438     case R_CODEPAGE:
 439     case R_VERTICES:
 440     case R_COLLISIONS:
 441     case R_STRAYWRITES:
 442     case R_LASTDMA:
 443     case R_PC:
 444     case R_DREGBASE:
 445     case R_CODEBASE:
 446         s->regs[addr] = value;
 447         break;
 448     case GPR_BEGIN ...  GPR_END:
 449         s->gp_regs[addr - GPR_BEGIN] = value;
 450         break;
 451     case MICROCODE_BEGIN ...  MICROCODE_END:
 452         s->microcode[get_microcode_address(s, addr)] = value;
 453         break;
 454
 455     default:
 456         error_report("milkymist_pfpu: write access to unknown register 0x"
 457                 TARGET_FMT_plx, addr << 2);
 458         break;
 459     }
 460 }
 461
 462 static CPUReadMemoryFunc * const pfpu_read_fn[] = {
 463     NULL,
 464     NULL,
 465     &pfpu_read,
 466 };
 467
 468 static CPUWriteMemoryFunc * const pfpu_write_fn[] = {
 469     NULL,
 470     NULL,
 471     &pfpu_write,
 472 };
 473
 474 static void milkymist_pfpu_reset(DeviceState *d)
 475 {
 476     MilkymistPFPUState *s = container_of(d, MilkymistPFPUState, busdev.qdev);
 477     int i;
 478
 479     for (i = 0; i < R_MAX; i++) {
 480         s->regs[i] = 0;
 481     }
 482     for (i = 0; i < 128; i++) {
 483         s->gp_regs[i] = 0;
 484     }
 485     for (i = 0; i < MICROCODE_WORDS; i++) {
 486         s->microcode[i] = 0;
 487     }
 488     s->output_queue_pos = 0;
 489     for (i = 0; i < MAX_LATENCY; i++) {
 490         s->output_queue[i] = 0;
 491     }
 492 }
 493
 494 static int milkymist_pfpu_init(SysBusDevice *dev)
 495 {
 496     MilkymistPFPUState *s = FROM_SYSBUS(typeof(*s), dev);
 497     int pfpu_regs;
 498
 499     sysbus_init_irq(dev, &s->irq);
 500
 501     pfpu_regs = cpu_register_io_memory(pfpu_read_fn, pfpu_write_fn, s,
 502             DEVICE_NATIVE_ENDIAN);
 503     sysbus_init_mmio(dev, MICROCODE_END * 4, pfpu_regs);
 504
 505     return 0;
 506 }
 507
 508 static const VMStateDescription vmstate_milkymist_pfpu = {
 509     .name = "milkymist-pfpu",
 510     .version_id = 1,
 511     .minimum_version_id = 1,
 512     .minimum_version_id_old = 1,
 513     .fields      = (VMStateField[]) {
 514         VMSTATE_UINT32_ARRAY(regs, MilkymistPFPUState, R_MAX),
 515         VMSTATE_UINT32_ARRAY(gp_regs, MilkymistPFPUState, 128),
 516         VMSTATE_UINT32_ARRAY(microcode, MilkymistPFPUState, MICROCODE_WORDS),
 517         VMSTATE_INT32(output_queue_pos, MilkymistPFPUState),
 518         VMSTATE_UINT32_ARRAY(output_queue, MilkymistPFPUState, MAX_LATENCY),
 519         VMSTATE_END_OF_LIST()
 520     }
 521 };
 522
 523 static SysBusDeviceInfo milkymist_pfpu_info = {
 524     .init = milkymist_pfpu_init,
 525     .qdev.name  = "milkymist-pfpu",
 526     .qdev.size  = sizeof(MilkymistPFPUState),
 527     .qdev.vmsd  = &vmstate_milkymist_pfpu,
 528     .qdev.reset = milkymist_pfpu_reset,
 529 };
 530
 531 static void milkymist_pfpu_register(void)
 532 {
 533     sysbus_register_withprop(&milkymist_pfpu_info);
 534 }
 535
 536 device_init(milkymist_pfpu_register)