SPARC64: fix fault status overwritten on nonfaulting load
[qemu.git] / hw / milkymist-pfpu.c
blob306d1ce2878ab0f23563a78a2542b4169ef2a209
1 /*
2 * QEMU model of the Milkymist programmable FPU.
4 * Copyright (c) 2010 Michael Walle <michael@walle.cc>
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 * Specification available at:
21 * http://www.milkymist.org/socdoc/pfpu.pdf
25 #include "hw.h"
26 #include "sysbus.h"
27 #include "trace.h"
28 #include "qemu-log.h"
29 #include "qemu-error.h"
30 #include <math.h>
32 /* #define TRACE_EXEC */
34 #ifdef TRACE_EXEC
35 # define D_EXEC(x) x
36 #else
37 # define D_EXEC(x)
38 #endif
40 enum {
41 R_CTL = 0,
42 R_MESHBASE,
43 R_HMESHLAST,
44 R_VMESHLAST,
45 R_CODEPAGE,
46 R_VERTICES,
47 R_COLLISIONS,
48 R_STRAYWRITES,
49 R_LASTDMA,
50 R_PC,
51 R_DREGBASE,
52 R_CODEBASE,
53 R_MAX
56 enum {
57 CTL_START_BUSY = (1<<0),
60 enum {
61 OP_NOP = 0,
62 OP_FADD,
63 OP_FSUB,
64 OP_FMUL,
65 OP_FABS,
66 OP_F2I,
67 OP_I2F,
68 OP_VECTOUT,
69 OP_SIN,
70 OP_COS,
71 OP_ABOVE,
72 OP_EQUAL,
73 OP_COPY,
74 OP_IF,
75 OP_TSIGN,
76 OP_QUAKE,
79 enum {
80 GPR_X = 0,
81 GPR_Y = 1,
82 GPR_FLAGS = 2,
85 enum {
86 LATENCY_FADD = 5,
87 LATENCY_FSUB = 5,
88 LATENCY_FMUL = 7,
89 LATENCY_FABS = 2,
90 LATENCY_F2I = 2,
91 LATENCY_I2F = 3,
92 LATENCY_VECTOUT = 0,
93 LATENCY_SIN = 4,
94 LATENCY_COS = 4,
95 LATENCY_ABOVE = 2,
96 LATENCY_EQUAL = 2,
97 LATENCY_COPY = 2,
98 LATENCY_IF = 2,
99 LATENCY_TSIGN = 2,
100 LATENCY_QUAKE = 2,
101 MAX_LATENCY = 7
104 #define GPR_BEGIN 0x100
105 #define GPR_END 0x17f
106 #define MICROCODE_BEGIN 0x200
107 #define MICROCODE_END 0x3ff
108 #define MICROCODE_WORDS 2048
110 #define REINTERPRET_CAST(type, val) (*((type *)&(val)))
112 #ifdef TRACE_EXEC
113 static const char *opcode_to_str[] = {
114 "NOP", "FADD", "FSUB", "FMUL", "FABS", "F2I", "I2F", "VECTOUT",
115 "SIN", "COS", "ABOVE", "EQUAL", "COPY", "IF", "TSIGN", "QUAKE",
117 #endif
119 struct MilkymistPFPUState {
120 SysBusDevice busdev;
121 CharDriverState *chr;
122 qemu_irq irq;
124 uint32_t regs[R_MAX];
125 uint32_t gp_regs[128];
126 uint32_t microcode[MICROCODE_WORDS];
128 int output_queue_pos;
129 uint32_t output_queue[MAX_LATENCY];
131 typedef struct MilkymistPFPUState MilkymistPFPUState;
133 static inline target_phys_addr_t
134 get_dma_address(uint32_t base, uint32_t x, uint32_t y)
136 return base + 8 * (128 * y + x);
139 static inline void
140 output_queue_insert(MilkymistPFPUState *s, uint32_t val, int pos)
142 s->output_queue[(s->output_queue_pos + pos) % MAX_LATENCY] = val;
145 static inline uint32_t
146 output_queue_remove(MilkymistPFPUState *s)
148 return s->output_queue[s->output_queue_pos];
151 static inline void
152 output_queue_advance(MilkymistPFPUState *s)
154 s->output_queue[s->output_queue_pos] = 0;
155 s->output_queue_pos = (s->output_queue_pos + 1) % MAX_LATENCY;
158 static int pfpu_decode_insn(MilkymistPFPUState *s)
160 uint32_t pc = s->regs[R_PC];
161 uint32_t insn = s->microcode[pc];
162 uint32_t reg_a = (insn >> 18) & 0x7f;
163 uint32_t reg_b = (insn >> 11) & 0x7f;
164 uint32_t op = (insn >> 7) & 0xf;
165 uint32_t reg_d = insn & 0x7f;
166 uint32_t r = 0;
167 int latency = 0;
169 switch (op) {
170 case OP_NOP:
171 break;
172 case OP_FADD:
174 float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
175 float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
176 float t = a + b;
177 r = REINTERPRET_CAST(uint32_t, t);
178 latency = LATENCY_FADD;
179 D_EXEC(qemu_log("ADD a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
180 } break;
181 case OP_FSUB:
183 float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
184 float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
185 float t = a - b;
186 r = REINTERPRET_CAST(uint32_t, t);
187 latency = LATENCY_FSUB;
188 D_EXEC(qemu_log("SUB a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
189 } break;
190 case OP_FMUL:
192 float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
193 float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
194 float t = a * b;
195 r = REINTERPRET_CAST(uint32_t, t);
196 latency = LATENCY_FMUL;
197 D_EXEC(qemu_log("MUL a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
198 } break;
199 case OP_FABS:
201 float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
202 float t = fabsf(a);
203 r = REINTERPRET_CAST(uint32_t, t);
204 latency = LATENCY_FABS;
205 D_EXEC(qemu_log("ABS a=%f t=%f, r=%08x\n", a, t, r));
206 } break;
207 case OP_F2I:
209 float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
210 int32_t t = a;
211 r = REINTERPRET_CAST(uint32_t, t);
212 latency = LATENCY_F2I;
213 D_EXEC(qemu_log("F2I a=%f t=%d, r=%08x\n", a, t, r));
214 } break;
215 case OP_I2F:
217 int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]);
218 float t = a;
219 r = REINTERPRET_CAST(uint32_t, t);
220 latency = LATENCY_I2F;
221 D_EXEC(qemu_log("I2F a=%08x t=%f, r=%08x\n", a, t, r));
222 } break;
223 case OP_VECTOUT:
225 uint32_t a = cpu_to_be32(s->gp_regs[reg_a]);
226 uint32_t b = cpu_to_be32(s->gp_regs[reg_b]);
227 target_phys_addr_t dma_ptr =
228 get_dma_address(s->regs[R_MESHBASE],
229 s->gp_regs[GPR_X], s->gp_regs[GPR_Y]);
230 cpu_physical_memory_write(dma_ptr, (uint8_t *)&a, 4);
231 cpu_physical_memory_write(dma_ptr + 4, (uint8_t *)&b, 4);
232 s->regs[R_LASTDMA] = dma_ptr + 4;
233 D_EXEC(qemu_log("VECTOUT a=%08x b=%08x dma=%08x\n", a, b, dma_ptr));
234 trace_milkymist_pfpu_vectout(a, b, dma_ptr);
235 } break;
236 case OP_SIN:
238 int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]);
239 float t = sinf(a * (1.0f / (M_PI * 4096.0f)));
240 r = REINTERPRET_CAST(uint32_t, t);
241 latency = LATENCY_SIN;
242 D_EXEC(qemu_log("SIN a=%d t=%f, r=%08x\n", a, t, r));
243 } break;
244 case OP_COS:
246 int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]);
247 float t = cosf(a * (1.0f / (M_PI * 4096.0f)));
248 r = REINTERPRET_CAST(uint32_t, t);
249 latency = LATENCY_COS;
250 D_EXEC(qemu_log("COS a=%d t=%f, r=%08x\n", a, t, r));
251 } break;
252 case OP_ABOVE:
254 float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
255 float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
256 float t = (a > b) ? 1.0f : 0.0f;
257 r = REINTERPRET_CAST(uint32_t, t);
258 latency = LATENCY_ABOVE;
259 D_EXEC(qemu_log("ABOVE a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
260 } break;
261 case OP_EQUAL:
263 float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
264 float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
265 float t = (a == b) ? 1.0f : 0.0f;
266 r = REINTERPRET_CAST(uint32_t, t);
267 latency = LATENCY_EQUAL;
268 D_EXEC(qemu_log("EQUAL a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
269 } break;
270 case OP_COPY:
272 r = s->gp_regs[reg_a];
273 latency = LATENCY_COPY;
274 D_EXEC(qemu_log("COPY"));
275 } break;
276 case OP_IF:
278 float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
279 float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
280 uint32_t f = s->gp_regs[GPR_FLAGS];
281 float t = (f != 0) ? a : b;
282 r = REINTERPRET_CAST(uint32_t, t);
283 latency = LATENCY_IF;
284 D_EXEC(qemu_log("IF f=%u a=%f b=%f t=%f, r=%08x\n", f, a, b, t, r));
285 } break;
286 case OP_TSIGN:
288 float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
289 float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
290 float t = (b < 0) ? -a : a;
291 r = REINTERPRET_CAST(uint32_t, t);
292 latency = LATENCY_TSIGN;
293 D_EXEC(qemu_log("TSIGN a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
294 } break;
295 case OP_QUAKE:
297 uint32_t a = s->gp_regs[reg_a];
298 r = 0x5f3759df - (a >> 1);
299 latency = LATENCY_QUAKE;
300 D_EXEC(qemu_log("QUAKE a=%d r=%08x\n", a, r));
301 } break;
303 default:
304 error_report("milkymist_pfpu: unknown opcode %d", op);
305 break;
308 if (!reg_d) {
309 D_EXEC(qemu_log("%04d %8s R%03d, R%03d <L=%d, E=%04d>\n",
310 s->regs[R_PC], opcode_to_str[op], reg_a, reg_b, latency,
311 s->regs[R_PC] + latency));
312 } else {
313 D_EXEC(qemu_log("%04d %8s R%03d, R%03d <L=%d, E=%04d> -> R%03d\n",
314 s->regs[R_PC], opcode_to_str[op], reg_a, reg_b, latency,
315 s->regs[R_PC] + latency, reg_d));
318 if (op == OP_VECTOUT) {
319 return 0;
322 /* store output for this cycle */
323 if (reg_d) {
324 uint32_t val = output_queue_remove(s);
325 D_EXEC(qemu_log("R%03d <- 0x%08x\n", reg_d, val));
326 s->gp_regs[reg_d] = val;
329 output_queue_advance(s);
331 /* store op output */
332 if (op != OP_NOP) {
333 output_queue_insert(s, r, latency-1);
336 /* advance PC */
337 s->regs[R_PC]++;
339 return 1;
342 static void pfpu_start(MilkymistPFPUState *s)
344 int x, y;
345 int i;
347 for (y = 0; y <= s->regs[R_VMESHLAST]; y++) {
348 for (x = 0; x <= s->regs[R_HMESHLAST]; x++) {
349 D_EXEC(qemu_log("\nprocessing x=%d y=%d\n", x, y));
351 /* set current position */
352 s->gp_regs[GPR_X] = x;
353 s->gp_regs[GPR_Y] = y;
355 /* run microcode on this position */
356 i = 0;
357 while (pfpu_decode_insn(s)) {
358 /* decode at most MICROCODE_WORDS instructions */
359 if (i++ >= MICROCODE_WORDS) {
360 error_report("milkymist_pfpu: too many instructions "
361 "executed in microcode. No VECTOUT?");
362 break;
366 /* reset pc for next run */
367 s->regs[R_PC] = 0;
371 s->regs[R_VERTICES] = x * y;
373 trace_milkymist_pfpu_pulse_irq();
374 qemu_irq_pulse(s->irq);
377 static inline int get_microcode_address(MilkymistPFPUState *s, uint32_t addr)
379 return (512 * s->regs[R_CODEPAGE]) + addr - MICROCODE_BEGIN;
382 static uint32_t pfpu_read(void *opaque, target_phys_addr_t addr)
384 MilkymistPFPUState *s = opaque;
385 uint32_t r = 0;
387 addr >>= 2;
388 switch (addr) {
389 case R_CTL:
390 case R_MESHBASE:
391 case R_HMESHLAST:
392 case R_VMESHLAST:
393 case R_CODEPAGE:
394 case R_VERTICES:
395 case R_COLLISIONS:
396 case R_STRAYWRITES:
397 case R_LASTDMA:
398 case R_PC:
399 case R_DREGBASE:
400 case R_CODEBASE:
401 r = s->regs[addr];
402 break;
403 case GPR_BEGIN ... GPR_END:
404 r = s->gp_regs[addr - GPR_BEGIN];
405 break;
406 case MICROCODE_BEGIN ... MICROCODE_END:
407 r = s->microcode[get_microcode_address(s, addr)];
408 break;
410 default:
411 error_report("milkymist_pfpu: read access to unknown register 0x"
412 TARGET_FMT_plx, addr << 2);
413 break;
416 trace_milkymist_pfpu_memory_read(addr << 2, r);
418 return r;
421 static void
422 pfpu_write(void *opaque, target_phys_addr_t addr, uint32_t value)
424 MilkymistPFPUState *s = opaque;
426 trace_milkymist_pfpu_memory_write(addr, value);
428 addr >>= 2;
429 switch (addr) {
430 case R_CTL:
431 if (value & CTL_START_BUSY) {
432 pfpu_start(s);
434 break;
435 case R_MESHBASE:
436 case R_HMESHLAST:
437 case R_VMESHLAST:
438 case R_CODEPAGE:
439 case R_VERTICES:
440 case R_COLLISIONS:
441 case R_STRAYWRITES:
442 case R_LASTDMA:
443 case R_PC:
444 case R_DREGBASE:
445 case R_CODEBASE:
446 s->regs[addr] = value;
447 break;
448 case GPR_BEGIN ... GPR_END:
449 s->gp_regs[addr - GPR_BEGIN] = value;
450 break;
451 case MICROCODE_BEGIN ... MICROCODE_END:
452 s->microcode[get_microcode_address(s, addr)] = value;
453 break;
455 default:
456 error_report("milkymist_pfpu: write access to unknown register 0x"
457 TARGET_FMT_plx, addr << 2);
458 break;
462 static CPUReadMemoryFunc * const pfpu_read_fn[] = {
463 NULL,
464 NULL,
465 &pfpu_read,
468 static CPUWriteMemoryFunc * const pfpu_write_fn[] = {
469 NULL,
470 NULL,
471 &pfpu_write,
474 static void milkymist_pfpu_reset(DeviceState *d)
476 MilkymistPFPUState *s = container_of(d, MilkymistPFPUState, busdev.qdev);
477 int i;
479 for (i = 0; i < R_MAX; i++) {
480 s->regs[i] = 0;
482 for (i = 0; i < 128; i++) {
483 s->gp_regs[i] = 0;
485 for (i = 0; i < MICROCODE_WORDS; i++) {
486 s->microcode[i] = 0;
488 s->output_queue_pos = 0;
489 for (i = 0; i < MAX_LATENCY; i++) {
490 s->output_queue[i] = 0;
494 static int milkymist_pfpu_init(SysBusDevice *dev)
496 MilkymistPFPUState *s = FROM_SYSBUS(typeof(*s), dev);
497 int pfpu_regs;
499 sysbus_init_irq(dev, &s->irq);
501 pfpu_regs = cpu_register_io_memory(pfpu_read_fn, pfpu_write_fn, s,
502 DEVICE_NATIVE_ENDIAN);
503 sysbus_init_mmio(dev, MICROCODE_END * 4, pfpu_regs);
505 return 0;
508 static const VMStateDescription vmstate_milkymist_pfpu = {
509 .name = "milkymist-pfpu",
510 .version_id = 1,
511 .minimum_version_id = 1,
512 .minimum_version_id_old = 1,
513 .fields = (VMStateField[]) {
514 VMSTATE_UINT32_ARRAY(regs, MilkymistPFPUState, R_MAX),
515 VMSTATE_UINT32_ARRAY(gp_regs, MilkymistPFPUState, 128),
516 VMSTATE_UINT32_ARRAY(microcode, MilkymistPFPUState, MICROCODE_WORDS),
517 VMSTATE_INT32(output_queue_pos, MilkymistPFPUState),
518 VMSTATE_UINT32_ARRAY(output_queue, MilkymistPFPUState, MAX_LATENCY),
519 VMSTATE_END_OF_LIST()
523 static SysBusDeviceInfo milkymist_pfpu_info = {
524 .init = milkymist_pfpu_init,
525 .qdev.name = "milkymist-pfpu",
526 .qdev.size = sizeof(MilkymistPFPUState),
527 .qdev.vmsd = &vmstate_milkymist_pfpu,
528 .qdev.reset = milkymist_pfpu_reset,
531 static void milkymist_pfpu_register(void)
533 sysbus_register_withprop(&milkymist_pfpu_info);
536 device_init(milkymist_pfpu_register)