hw/elf_ops: Fix a typo
[qemu/ar7.git] / hw / misc / milkymist-pfpu.c
blobe4ee209c1019b61994a39dd0fd41e75474be48e9
1 /*
2 * QEMU model of the Milkymist programmable FPU.
4 * Copyright (c) 2010 Michael Walle <michael@walle.cc>
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 * Specification available at:
21 * http://milkymist.walle.cc/socdoc/pfpu.pdf
25 #include "qemu/osdep.h"
26 #include "hw/irq.h"
27 #include "hw/sysbus.h"
28 #include "migration/vmstate.h"
29 #include "trace.h"
30 #include "qemu/log.h"
31 #include "qemu/module.h"
32 #include "qemu/error-report.h"
33 #include <math.h>
34 #include "qom/object.h"
36 /* #define TRACE_EXEC */
38 #ifdef TRACE_EXEC
39 # define D_EXEC(x) x
40 #else
41 # define D_EXEC(x)
42 #endif
44 enum {
45 R_CTL = 0,
46 R_MESHBASE,
47 R_HMESHLAST,
48 R_VMESHLAST,
49 R_CODEPAGE,
50 R_VERTICES,
51 R_COLLISIONS,
52 R_STRAYWRITES,
53 R_LASTDMA,
54 R_PC,
55 R_DREGBASE,
56 R_CODEBASE,
57 R_MAX
60 enum {
61 CTL_START_BUSY = (1<<0),
64 enum {
65 OP_NOP = 0,
66 OP_FADD,
67 OP_FSUB,
68 OP_FMUL,
69 OP_FABS,
70 OP_F2I,
71 OP_I2F,
72 OP_VECTOUT,
73 OP_SIN,
74 OP_COS,
75 OP_ABOVE,
76 OP_EQUAL,
77 OP_COPY,
78 OP_IF,
79 OP_TSIGN,
80 OP_QUAKE,
83 enum {
84 GPR_X = 0,
85 GPR_Y = 1,
86 GPR_FLAGS = 2,
89 enum {
90 LATENCY_FADD = 5,
91 LATENCY_FSUB = 5,
92 LATENCY_FMUL = 7,
93 LATENCY_FABS = 2,
94 LATENCY_F2I = 2,
95 LATENCY_I2F = 3,
96 LATENCY_VECTOUT = 0,
97 LATENCY_SIN = 4,
98 LATENCY_COS = 4,
99 LATENCY_ABOVE = 2,
100 LATENCY_EQUAL = 2,
101 LATENCY_COPY = 2,
102 LATENCY_IF = 2,
103 LATENCY_TSIGN = 2,
104 LATENCY_QUAKE = 2,
105 MAX_LATENCY = 7
108 #define GPR_BEGIN 0x100
109 #define GPR_END 0x17f
110 #define MICROCODE_BEGIN 0x200
111 #define MICROCODE_END 0x3ff
112 #define MICROCODE_WORDS 2048
114 #define REINTERPRET_CAST(type, val) (*((type *)&(val)))
116 #ifdef TRACE_EXEC
117 static const char *opcode_to_str[] = {
118 "NOP", "FADD", "FSUB", "FMUL", "FABS", "F2I", "I2F", "VECTOUT",
119 "SIN", "COS", "ABOVE", "EQUAL", "COPY", "IF", "TSIGN", "QUAKE",
121 #endif
123 #define TYPE_MILKYMIST_PFPU "milkymist-pfpu"
124 OBJECT_DECLARE_SIMPLE_TYPE(MilkymistPFPUState, MILKYMIST_PFPU)
126 struct MilkymistPFPUState {
127 SysBusDevice parent_obj;
129 MemoryRegion regs_region;
130 Chardev *chr;
131 qemu_irq irq;
133 uint32_t regs[R_MAX];
134 uint32_t gp_regs[128];
135 uint32_t microcode[MICROCODE_WORDS];
137 int output_queue_pos;
138 uint32_t output_queue[MAX_LATENCY];
141 static inline uint32_t
142 get_dma_address(uint32_t base, uint32_t x, uint32_t y)
144 return base + 8 * (128 * y + x);
147 static inline void
148 output_queue_insert(MilkymistPFPUState *s, uint32_t val, int pos)
150 s->output_queue[(s->output_queue_pos + pos) % MAX_LATENCY] = val;
153 static inline uint32_t
154 output_queue_remove(MilkymistPFPUState *s)
156 return s->output_queue[s->output_queue_pos];
159 static inline void
160 output_queue_advance(MilkymistPFPUState *s)
162 s->output_queue[s->output_queue_pos] = 0;
163 s->output_queue_pos = (s->output_queue_pos + 1) % MAX_LATENCY;
166 static int pfpu_decode_insn(MilkymistPFPUState *s)
168 uint32_t pc = s->regs[R_PC];
169 uint32_t insn = s->microcode[pc];
170 uint32_t reg_a = (insn >> 18) & 0x7f;
171 uint32_t reg_b = (insn >> 11) & 0x7f;
172 uint32_t op = (insn >> 7) & 0xf;
173 uint32_t reg_d = insn & 0x7f;
174 uint32_t r = 0;
175 int latency = 0;
177 switch (op) {
178 case OP_NOP:
179 break;
180 case OP_FADD:
182 float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
183 float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
184 float t = a + b;
185 r = REINTERPRET_CAST(uint32_t, t);
186 latency = LATENCY_FADD;
187 D_EXEC(qemu_log("ADD a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
188 } break;
189 case OP_FSUB:
191 float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
192 float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
193 float t = a - b;
194 r = REINTERPRET_CAST(uint32_t, t);
195 latency = LATENCY_FSUB;
196 D_EXEC(qemu_log("SUB a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
197 } break;
198 case OP_FMUL:
200 float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
201 float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
202 float t = a * b;
203 r = REINTERPRET_CAST(uint32_t, t);
204 latency = LATENCY_FMUL;
205 D_EXEC(qemu_log("MUL a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
206 } break;
207 case OP_FABS:
209 float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
210 float t = fabsf(a);
211 r = REINTERPRET_CAST(uint32_t, t);
212 latency = LATENCY_FABS;
213 D_EXEC(qemu_log("ABS a=%f t=%f, r=%08x\n", a, t, r));
214 } break;
215 case OP_F2I:
217 float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
218 int32_t t = a;
219 r = REINTERPRET_CAST(uint32_t, t);
220 latency = LATENCY_F2I;
221 D_EXEC(qemu_log("F2I a=%f t=%d, r=%08x\n", a, t, r));
222 } break;
223 case OP_I2F:
225 int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]);
226 float t = a;
227 r = REINTERPRET_CAST(uint32_t, t);
228 latency = LATENCY_I2F;
229 D_EXEC(qemu_log("I2F a=%08x t=%f, r=%08x\n", a, t, r));
230 } break;
231 case OP_VECTOUT:
233 uint32_t a = cpu_to_be32(s->gp_regs[reg_a]);
234 uint32_t b = cpu_to_be32(s->gp_regs[reg_b]);
235 hwaddr dma_ptr =
236 get_dma_address(s->regs[R_MESHBASE],
237 s->gp_regs[GPR_X], s->gp_regs[GPR_Y]);
238 cpu_physical_memory_write(dma_ptr, &a, 4);
239 cpu_physical_memory_write(dma_ptr + 4, &b, 4);
240 s->regs[R_LASTDMA] = dma_ptr + 4;
241 D_EXEC(qemu_log("VECTOUT a=%08x b=%08x dma=%08x\n", a, b, dma_ptr));
242 trace_milkymist_pfpu_vectout(a, b, dma_ptr);
243 } break;
244 case OP_SIN:
246 int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]);
247 float t = sinf(a * (1.0f / (M_PI * 4096.0f)));
248 r = REINTERPRET_CAST(uint32_t, t);
249 latency = LATENCY_SIN;
250 D_EXEC(qemu_log("SIN a=%d t=%f, r=%08x\n", a, t, r));
251 } break;
252 case OP_COS:
254 int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]);
255 float t = cosf(a * (1.0f / (M_PI * 4096.0f)));
256 r = REINTERPRET_CAST(uint32_t, t);
257 latency = LATENCY_COS;
258 D_EXEC(qemu_log("COS a=%d t=%f, r=%08x\n", a, t, r));
259 } break;
260 case OP_ABOVE:
262 float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
263 float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
264 float t = (a > b) ? 1.0f : 0.0f;
265 r = REINTERPRET_CAST(uint32_t, t);
266 latency = LATENCY_ABOVE;
267 D_EXEC(qemu_log("ABOVE a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
268 } break;
269 case OP_EQUAL:
271 float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
272 float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
273 float t = (a == b) ? 1.0f : 0.0f;
274 r = REINTERPRET_CAST(uint32_t, t);
275 latency = LATENCY_EQUAL;
276 D_EXEC(qemu_log("EQUAL a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
277 } break;
278 case OP_COPY:
280 r = s->gp_regs[reg_a];
281 latency = LATENCY_COPY;
282 D_EXEC(qemu_log("COPY"));
283 } break;
284 case OP_IF:
286 float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
287 float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
288 uint32_t f = s->gp_regs[GPR_FLAGS];
289 float t = (f != 0) ? a : b;
290 r = REINTERPRET_CAST(uint32_t, t);
291 latency = LATENCY_IF;
292 D_EXEC(qemu_log("IF f=%u a=%f b=%f t=%f, r=%08x\n", f, a, b, t, r));
293 } break;
294 case OP_TSIGN:
296 float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
297 float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
298 float t = (b < 0) ? -a : a;
299 r = REINTERPRET_CAST(uint32_t, t);
300 latency = LATENCY_TSIGN;
301 D_EXEC(qemu_log("TSIGN a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
302 } break;
303 case OP_QUAKE:
305 uint32_t a = s->gp_regs[reg_a];
306 r = 0x5f3759df - (a >> 1);
307 latency = LATENCY_QUAKE;
308 D_EXEC(qemu_log("QUAKE a=%d r=%08x\n", a, r));
309 } break;
311 default:
312 error_report("milkymist_pfpu: unknown opcode %d", op);
313 break;
316 if (!reg_d) {
317 D_EXEC(qemu_log("%04d %8s R%03d, R%03d <L=%d, E=%04d>\n",
318 s->regs[R_PC], opcode_to_str[op], reg_a, reg_b, latency,
319 s->regs[R_PC] + latency));
320 } else {
321 D_EXEC(qemu_log("%04d %8s R%03d, R%03d <L=%d, E=%04d> -> R%03d\n",
322 s->regs[R_PC], opcode_to_str[op], reg_a, reg_b, latency,
323 s->regs[R_PC] + latency, reg_d));
326 if (op == OP_VECTOUT) {
327 return 0;
330 /* store output for this cycle */
331 if (reg_d) {
332 uint32_t val = output_queue_remove(s);
333 D_EXEC(qemu_log("R%03d <- 0x%08x\n", reg_d, val));
334 s->gp_regs[reg_d] = val;
337 output_queue_advance(s);
339 /* store op output */
340 if (op != OP_NOP) {
341 output_queue_insert(s, r, latency-1);
344 /* advance PC */
345 s->regs[R_PC]++;
347 return 1;
350 static void pfpu_start(MilkymistPFPUState *s)
352 int x, y;
353 int i;
355 for (y = 0; y <= s->regs[R_VMESHLAST]; y++) {
356 for (x = 0; x <= s->regs[R_HMESHLAST]; x++) {
357 D_EXEC(qemu_log("\nprocessing x=%d y=%d\n", x, y));
359 /* set current position */
360 s->gp_regs[GPR_X] = x;
361 s->gp_regs[GPR_Y] = y;
363 /* run microcode on this position */
364 i = 0;
365 while (pfpu_decode_insn(s)) {
366 /* decode at most MICROCODE_WORDS instructions */
367 if (++i >= MICROCODE_WORDS) {
368 error_report("milkymist_pfpu: too many instructions "
369 "executed in microcode. No VECTOUT?");
370 break;
374 /* reset pc for next run */
375 s->regs[R_PC] = 0;
379 s->regs[R_VERTICES] = x * y;
381 trace_milkymist_pfpu_pulse_irq();
382 qemu_irq_pulse(s->irq);
385 static inline int get_microcode_address(MilkymistPFPUState *s, uint32_t addr)
387 return (512 * s->regs[R_CODEPAGE]) + addr - MICROCODE_BEGIN;
390 static uint64_t pfpu_read(void *opaque, hwaddr addr,
391 unsigned size)
393 MilkymistPFPUState *s = opaque;
394 uint32_t r = 0;
396 addr >>= 2;
397 switch (addr) {
398 case R_CTL:
399 case R_MESHBASE:
400 case R_HMESHLAST:
401 case R_VMESHLAST:
402 case R_CODEPAGE:
403 case R_VERTICES:
404 case R_COLLISIONS:
405 case R_STRAYWRITES:
406 case R_LASTDMA:
407 case R_PC:
408 case R_DREGBASE:
409 case R_CODEBASE:
410 r = s->regs[addr];
411 break;
412 case GPR_BEGIN ... GPR_END:
413 r = s->gp_regs[addr - GPR_BEGIN];
414 break;
415 case MICROCODE_BEGIN ... MICROCODE_END:
416 r = s->microcode[get_microcode_address(s, addr)];
417 break;
419 default:
420 error_report("milkymist_pfpu: read access to unknown register 0x"
421 TARGET_FMT_plx, addr << 2);
422 break;
425 trace_milkymist_pfpu_memory_read(addr << 2, r);
427 return r;
430 static void pfpu_write(void *opaque, hwaddr addr, uint64_t value,
431 unsigned size)
433 MilkymistPFPUState *s = opaque;
435 trace_milkymist_pfpu_memory_write(addr, value);
437 addr >>= 2;
438 switch (addr) {
439 case R_CTL:
440 if (value & CTL_START_BUSY) {
441 pfpu_start(s);
443 break;
444 case R_MESHBASE:
445 case R_HMESHLAST:
446 case R_VMESHLAST:
447 case R_CODEPAGE:
448 case R_VERTICES:
449 case R_COLLISIONS:
450 case R_STRAYWRITES:
451 case R_LASTDMA:
452 case R_PC:
453 case R_DREGBASE:
454 case R_CODEBASE:
455 s->regs[addr] = value;
456 break;
457 case GPR_BEGIN ... GPR_END:
458 s->gp_regs[addr - GPR_BEGIN] = value;
459 break;
460 case MICROCODE_BEGIN ... MICROCODE_END:
461 s->microcode[get_microcode_address(s, addr)] = value;
462 break;
464 default:
465 error_report("milkymist_pfpu: write access to unknown register 0x"
466 TARGET_FMT_plx, addr << 2);
467 break;
471 static const MemoryRegionOps pfpu_mmio_ops = {
472 .read = pfpu_read,
473 .write = pfpu_write,
474 .valid = {
475 .min_access_size = 4,
476 .max_access_size = 4,
478 .endianness = DEVICE_NATIVE_ENDIAN,
481 static void milkymist_pfpu_reset(DeviceState *d)
483 MilkymistPFPUState *s = MILKYMIST_PFPU(d);
484 int i;
486 for (i = 0; i < R_MAX; i++) {
487 s->regs[i] = 0;
489 for (i = 0; i < 128; i++) {
490 s->gp_regs[i] = 0;
492 for (i = 0; i < MICROCODE_WORDS; i++) {
493 s->microcode[i] = 0;
495 s->output_queue_pos = 0;
496 for (i = 0; i < MAX_LATENCY; i++) {
497 s->output_queue[i] = 0;
501 static void milkymist_pfpu_realize(DeviceState *dev, Error **errp)
503 MilkymistPFPUState *s = MILKYMIST_PFPU(dev);
504 SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
506 sysbus_init_irq(sbd, &s->irq);
508 memory_region_init_io(&s->regs_region, OBJECT(dev), &pfpu_mmio_ops, s,
509 "milkymist-pfpu", MICROCODE_END * 4);
510 sysbus_init_mmio(sbd, &s->regs_region);
513 static const VMStateDescription vmstate_milkymist_pfpu = {
514 .name = "milkymist-pfpu",
515 .version_id = 1,
516 .minimum_version_id = 1,
517 .fields = (VMStateField[]) {
518 VMSTATE_UINT32_ARRAY(regs, MilkymistPFPUState, R_MAX),
519 VMSTATE_UINT32_ARRAY(gp_regs, MilkymistPFPUState, 128),
520 VMSTATE_UINT32_ARRAY(microcode, MilkymistPFPUState, MICROCODE_WORDS),
521 VMSTATE_INT32(output_queue_pos, MilkymistPFPUState),
522 VMSTATE_UINT32_ARRAY(output_queue, MilkymistPFPUState, MAX_LATENCY),
523 VMSTATE_END_OF_LIST()
527 static void milkymist_pfpu_class_init(ObjectClass *klass, void *data)
529 DeviceClass *dc = DEVICE_CLASS(klass);
531 dc->realize = milkymist_pfpu_realize;
532 dc->reset = milkymist_pfpu_reset;
533 dc->vmsd = &vmstate_milkymist_pfpu;
536 static const TypeInfo milkymist_pfpu_info = {
537 .name = TYPE_MILKYMIST_PFPU,
538 .parent = TYPE_SYS_BUS_DEVICE,
539 .instance_size = sizeof(MilkymistPFPUState),
540 .class_init = milkymist_pfpu_class_init,
543 static void milkymist_pfpu_register_types(void)
545 type_register_static(&milkymist_pfpu_info);
548 type_init(milkymist_pfpu_register_types)