2 * QEMU model of the Milkymist programmable FPU.
4 * Copyright (c) 2010 Michael Walle <michael@walle.cc>
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 * Specification available at:
21 * http://milkymist.walle.cc/socdoc/pfpu.pdf
25 #include "qemu/osdep.h"
27 #include "hw/sysbus.h"
28 #include "migration/vmstate.h"
31 #include "qemu/module.h"
32 #include "qemu/error-report.h"
34 #include "qom/object.h"
36 /* #define TRACE_EXEC */
61 CTL_START_BUSY
= (1<<0),
108 #define GPR_BEGIN 0x100
109 #define GPR_END 0x17f
110 #define MICROCODE_BEGIN 0x200
111 #define MICROCODE_END 0x3ff
112 #define MICROCODE_WORDS 2048
114 #define REINTERPRET_CAST(type, val) (*((type *)&(val)))
117 static const char *opcode_to_str
[] = {
118 "NOP", "FADD", "FSUB", "FMUL", "FABS", "F2I", "I2F", "VECTOUT",
119 "SIN", "COS", "ABOVE", "EQUAL", "COPY", "IF", "TSIGN", "QUAKE",
123 #define TYPE_MILKYMIST_PFPU "milkymist-pfpu"
124 typedef struct MilkymistPFPUState MilkymistPFPUState
;
125 DECLARE_INSTANCE_CHECKER(MilkymistPFPUState
, MILKYMIST_PFPU
,
128 struct MilkymistPFPUState
{
129 SysBusDevice parent_obj
;
131 MemoryRegion regs_region
;
135 uint32_t regs
[R_MAX
];
136 uint32_t gp_regs
[128];
137 uint32_t microcode
[MICROCODE_WORDS
];
139 int output_queue_pos
;
140 uint32_t output_queue
[MAX_LATENCY
];
143 static inline uint32_t
144 get_dma_address(uint32_t base
, uint32_t x
, uint32_t y
)
146 return base
+ 8 * (128 * y
+ x
);
150 output_queue_insert(MilkymistPFPUState
*s
, uint32_t val
, int pos
)
152 s
->output_queue
[(s
->output_queue_pos
+ pos
) % MAX_LATENCY
] = val
;
155 static inline uint32_t
156 output_queue_remove(MilkymistPFPUState
*s
)
158 return s
->output_queue
[s
->output_queue_pos
];
162 output_queue_advance(MilkymistPFPUState
*s
)
164 s
->output_queue
[s
->output_queue_pos
] = 0;
165 s
->output_queue_pos
= (s
->output_queue_pos
+ 1) % MAX_LATENCY
;
168 static int pfpu_decode_insn(MilkymistPFPUState
*s
)
170 uint32_t pc
= s
->regs
[R_PC
];
171 uint32_t insn
= s
->microcode
[pc
];
172 uint32_t reg_a
= (insn
>> 18) & 0x7f;
173 uint32_t reg_b
= (insn
>> 11) & 0x7f;
174 uint32_t op
= (insn
>> 7) & 0xf;
175 uint32_t reg_d
= insn
& 0x7f;
184 float a
= REINTERPRET_CAST(float, s
->gp_regs
[reg_a
]);
185 float b
= REINTERPRET_CAST(float, s
->gp_regs
[reg_b
]);
187 r
= REINTERPRET_CAST(uint32_t, t
);
188 latency
= LATENCY_FADD
;
189 D_EXEC(qemu_log("ADD a=%f b=%f t=%f, r=%08x\n", a
, b
, t
, r
));
193 float a
= REINTERPRET_CAST(float, s
->gp_regs
[reg_a
]);
194 float b
= REINTERPRET_CAST(float, s
->gp_regs
[reg_b
]);
196 r
= REINTERPRET_CAST(uint32_t, t
);
197 latency
= LATENCY_FSUB
;
198 D_EXEC(qemu_log("SUB a=%f b=%f t=%f, r=%08x\n", a
, b
, t
, r
));
202 float a
= REINTERPRET_CAST(float, s
->gp_regs
[reg_a
]);
203 float b
= REINTERPRET_CAST(float, s
->gp_regs
[reg_b
]);
205 r
= REINTERPRET_CAST(uint32_t, t
);
206 latency
= LATENCY_FMUL
;
207 D_EXEC(qemu_log("MUL a=%f b=%f t=%f, r=%08x\n", a
, b
, t
, r
));
211 float a
= REINTERPRET_CAST(float, s
->gp_regs
[reg_a
]);
213 r
= REINTERPRET_CAST(uint32_t, t
);
214 latency
= LATENCY_FABS
;
215 D_EXEC(qemu_log("ABS a=%f t=%f, r=%08x\n", a
, t
, r
));
219 float a
= REINTERPRET_CAST(float, s
->gp_regs
[reg_a
]);
221 r
= REINTERPRET_CAST(uint32_t, t
);
222 latency
= LATENCY_F2I
;
223 D_EXEC(qemu_log("F2I a=%f t=%d, r=%08x\n", a
, t
, r
));
227 int32_t a
= REINTERPRET_CAST(int32_t, s
->gp_regs
[reg_a
]);
229 r
= REINTERPRET_CAST(uint32_t, t
);
230 latency
= LATENCY_I2F
;
231 D_EXEC(qemu_log("I2F a=%08x t=%f, r=%08x\n", a
, t
, r
));
235 uint32_t a
= cpu_to_be32(s
->gp_regs
[reg_a
]);
236 uint32_t b
= cpu_to_be32(s
->gp_regs
[reg_b
]);
238 get_dma_address(s
->regs
[R_MESHBASE
],
239 s
->gp_regs
[GPR_X
], s
->gp_regs
[GPR_Y
]);
240 cpu_physical_memory_write(dma_ptr
, &a
, 4);
241 cpu_physical_memory_write(dma_ptr
+ 4, &b
, 4);
242 s
->regs
[R_LASTDMA
] = dma_ptr
+ 4;
243 D_EXEC(qemu_log("VECTOUT a=%08x b=%08x dma=%08x\n", a
, b
, dma_ptr
));
244 trace_milkymist_pfpu_vectout(a
, b
, dma_ptr
);
248 int32_t a
= REINTERPRET_CAST(int32_t, s
->gp_regs
[reg_a
]);
249 float t
= sinf(a
* (1.0f
/ (M_PI
* 4096.0f
)));
250 r
= REINTERPRET_CAST(uint32_t, t
);
251 latency
= LATENCY_SIN
;
252 D_EXEC(qemu_log("SIN a=%d t=%f, r=%08x\n", a
, t
, r
));
256 int32_t a
= REINTERPRET_CAST(int32_t, s
->gp_regs
[reg_a
]);
257 float t
= cosf(a
* (1.0f
/ (M_PI
* 4096.0f
)));
258 r
= REINTERPRET_CAST(uint32_t, t
);
259 latency
= LATENCY_COS
;
260 D_EXEC(qemu_log("COS a=%d t=%f, r=%08x\n", a
, t
, r
));
264 float a
= REINTERPRET_CAST(float, s
->gp_regs
[reg_a
]);
265 float b
= REINTERPRET_CAST(float, s
->gp_regs
[reg_b
]);
266 float t
= (a
> b
) ? 1.0f
: 0.0f
;
267 r
= REINTERPRET_CAST(uint32_t, t
);
268 latency
= LATENCY_ABOVE
;
269 D_EXEC(qemu_log("ABOVE a=%f b=%f t=%f, r=%08x\n", a
, b
, t
, r
));
273 float a
= REINTERPRET_CAST(float, s
->gp_regs
[reg_a
]);
274 float b
= REINTERPRET_CAST(float, s
->gp_regs
[reg_b
]);
275 float t
= (a
== b
) ? 1.0f
: 0.0f
;
276 r
= REINTERPRET_CAST(uint32_t, t
);
277 latency
= LATENCY_EQUAL
;
278 D_EXEC(qemu_log("EQUAL a=%f b=%f t=%f, r=%08x\n", a
, b
, t
, r
));
282 r
= s
->gp_regs
[reg_a
];
283 latency
= LATENCY_COPY
;
284 D_EXEC(qemu_log("COPY"));
288 float a
= REINTERPRET_CAST(float, s
->gp_regs
[reg_a
]);
289 float b
= REINTERPRET_CAST(float, s
->gp_regs
[reg_b
]);
290 uint32_t f
= s
->gp_regs
[GPR_FLAGS
];
291 float t
= (f
!= 0) ? a
: b
;
292 r
= REINTERPRET_CAST(uint32_t, t
);
293 latency
= LATENCY_IF
;
294 D_EXEC(qemu_log("IF f=%u a=%f b=%f t=%f, r=%08x\n", f
, a
, b
, t
, r
));
298 float a
= REINTERPRET_CAST(float, s
->gp_regs
[reg_a
]);
299 float b
= REINTERPRET_CAST(float, s
->gp_regs
[reg_b
]);
300 float t
= (b
< 0) ? -a
: a
;
301 r
= REINTERPRET_CAST(uint32_t, t
);
302 latency
= LATENCY_TSIGN
;
303 D_EXEC(qemu_log("TSIGN a=%f b=%f t=%f, r=%08x\n", a
, b
, t
, r
));
307 uint32_t a
= s
->gp_regs
[reg_a
];
308 r
= 0x5f3759df - (a
>> 1);
309 latency
= LATENCY_QUAKE
;
310 D_EXEC(qemu_log("QUAKE a=%d r=%08x\n", a
, r
));
314 error_report("milkymist_pfpu: unknown opcode %d", op
);
319 D_EXEC(qemu_log("%04d %8s R%03d, R%03d <L=%d, E=%04d>\n",
320 s
->regs
[R_PC
], opcode_to_str
[op
], reg_a
, reg_b
, latency
,
321 s
->regs
[R_PC
] + latency
));
323 D_EXEC(qemu_log("%04d %8s R%03d, R%03d <L=%d, E=%04d> -> R%03d\n",
324 s
->regs
[R_PC
], opcode_to_str
[op
], reg_a
, reg_b
, latency
,
325 s
->regs
[R_PC
] + latency
, reg_d
));
328 if (op
== OP_VECTOUT
) {
332 /* store output for this cycle */
334 uint32_t val
= output_queue_remove(s
);
335 D_EXEC(qemu_log("R%03d <- 0x%08x\n", reg_d
, val
));
336 s
->gp_regs
[reg_d
] = val
;
339 output_queue_advance(s
);
341 /* store op output */
343 output_queue_insert(s
, r
, latency
-1);
352 static void pfpu_start(MilkymistPFPUState
*s
)
357 for (y
= 0; y
<= s
->regs
[R_VMESHLAST
]; y
++) {
358 for (x
= 0; x
<= s
->regs
[R_HMESHLAST
]; x
++) {
359 D_EXEC(qemu_log("\nprocessing x=%d y=%d\n", x
, y
));
361 /* set current position */
362 s
->gp_regs
[GPR_X
] = x
;
363 s
->gp_regs
[GPR_Y
] = y
;
365 /* run microcode on this position */
367 while (pfpu_decode_insn(s
)) {
368 /* decode at most MICROCODE_WORDS instructions */
369 if (++i
>= MICROCODE_WORDS
) {
370 error_report("milkymist_pfpu: too many instructions "
371 "executed in microcode. No VECTOUT?");
376 /* reset pc for next run */
381 s
->regs
[R_VERTICES
] = x
* y
;
383 trace_milkymist_pfpu_pulse_irq();
384 qemu_irq_pulse(s
->irq
);
387 static inline int get_microcode_address(MilkymistPFPUState
*s
, uint32_t addr
)
389 return (512 * s
->regs
[R_CODEPAGE
]) + addr
- MICROCODE_BEGIN
;
392 static uint64_t pfpu_read(void *opaque
, hwaddr addr
,
395 MilkymistPFPUState
*s
= opaque
;
414 case GPR_BEGIN
... GPR_END
:
415 r
= s
->gp_regs
[addr
- GPR_BEGIN
];
417 case MICROCODE_BEGIN
... MICROCODE_END
:
418 r
= s
->microcode
[get_microcode_address(s
, addr
)];
422 error_report("milkymist_pfpu: read access to unknown register 0x"
423 TARGET_FMT_plx
, addr
<< 2);
427 trace_milkymist_pfpu_memory_read(addr
<< 2, r
);
432 static void pfpu_write(void *opaque
, hwaddr addr
, uint64_t value
,
435 MilkymistPFPUState
*s
= opaque
;
437 trace_milkymist_pfpu_memory_write(addr
, value
);
442 if (value
& CTL_START_BUSY
) {
457 s
->regs
[addr
] = value
;
459 case GPR_BEGIN
... GPR_END
:
460 s
->gp_regs
[addr
- GPR_BEGIN
] = value
;
462 case MICROCODE_BEGIN
... MICROCODE_END
:
463 s
->microcode
[get_microcode_address(s
, addr
)] = value
;
467 error_report("milkymist_pfpu: write access to unknown register 0x"
468 TARGET_FMT_plx
, addr
<< 2);
473 static const MemoryRegionOps pfpu_mmio_ops
= {
477 .min_access_size
= 4,
478 .max_access_size
= 4,
480 .endianness
= DEVICE_NATIVE_ENDIAN
,
483 static void milkymist_pfpu_reset(DeviceState
*d
)
485 MilkymistPFPUState
*s
= MILKYMIST_PFPU(d
);
488 for (i
= 0; i
< R_MAX
; i
++) {
491 for (i
= 0; i
< 128; i
++) {
494 for (i
= 0; i
< MICROCODE_WORDS
; i
++) {
497 s
->output_queue_pos
= 0;
498 for (i
= 0; i
< MAX_LATENCY
; i
++) {
499 s
->output_queue
[i
] = 0;
503 static void milkymist_pfpu_realize(DeviceState
*dev
, Error
**errp
)
505 MilkymistPFPUState
*s
= MILKYMIST_PFPU(dev
);
506 SysBusDevice
*sbd
= SYS_BUS_DEVICE(dev
);
508 sysbus_init_irq(sbd
, &s
->irq
);
510 memory_region_init_io(&s
->regs_region
, OBJECT(dev
), &pfpu_mmio_ops
, s
,
511 "milkymist-pfpu", MICROCODE_END
* 4);
512 sysbus_init_mmio(sbd
, &s
->regs_region
);
515 static const VMStateDescription vmstate_milkymist_pfpu
= {
516 .name
= "milkymist-pfpu",
518 .minimum_version_id
= 1,
519 .fields
= (VMStateField
[]) {
520 VMSTATE_UINT32_ARRAY(regs
, MilkymistPFPUState
, R_MAX
),
521 VMSTATE_UINT32_ARRAY(gp_regs
, MilkymistPFPUState
, 128),
522 VMSTATE_UINT32_ARRAY(microcode
, MilkymistPFPUState
, MICROCODE_WORDS
),
523 VMSTATE_INT32(output_queue_pos
, MilkymistPFPUState
),
524 VMSTATE_UINT32_ARRAY(output_queue
, MilkymistPFPUState
, MAX_LATENCY
),
525 VMSTATE_END_OF_LIST()
529 static void milkymist_pfpu_class_init(ObjectClass
*klass
, void *data
)
531 DeviceClass
*dc
= DEVICE_CLASS(klass
);
533 dc
->realize
= milkymist_pfpu_realize
;
534 dc
->reset
= milkymist_pfpu_reset
;
535 dc
->vmsd
= &vmstate_milkymist_pfpu
;
538 static const TypeInfo milkymist_pfpu_info
= {
539 .name
= TYPE_MILKYMIST_PFPU
,
540 .parent
= TYPE_SYS_BUS_DEVICE
,
541 .instance_size
= sizeof(MilkymistPFPUState
),
542 .class_init
= milkymist_pfpu_class_init
,
545 static void milkymist_pfpu_register_types(void)
547 type_register_static(&milkymist_pfpu_info
);
550 type_init(milkymist_pfpu_register_types
)