hw: set interrupts using pci irq wrappers
[qemu.git] / hw / net / e1000.c
bloba37a3dfbe4751726260d95bcba8c3cab418090a8
1 /*
2 * QEMU e1000 emulation
4 * Software developer's manual:
5 * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
7 * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
8 * Copyright (c) 2008 Qumranet
9 * Based on work done by:
10 * Copyright (c) 2007 Dan Aloni
11 * Copyright (c) 2004 Antony T Curtis
13 * This library is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU Lesser General Public
15 * License as published by the Free Software Foundation; either
16 * version 2 of the License, or (at your option) any later version.
18 * This library is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 * Lesser General Public License for more details.
23 * You should have received a copy of the GNU Lesser General Public
24 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
28 #include "hw/hw.h"
29 #include "hw/pci/pci.h"
30 #include "net/net.h"
31 #include "net/checksum.h"
32 #include "hw/loader.h"
33 #include "sysemu/sysemu.h"
34 #include "sysemu/dma.h"
36 #include "e1000_regs.h"
38 #define E1000_DEBUG
40 #ifdef E1000_DEBUG
41 enum {
42 DEBUG_GENERAL, DEBUG_IO, DEBUG_MMIO, DEBUG_INTERRUPT,
43 DEBUG_RX, DEBUG_TX, DEBUG_MDIC, DEBUG_EEPROM,
44 DEBUG_UNKNOWN, DEBUG_TXSUM, DEBUG_TXERR, DEBUG_RXERR,
45 DEBUG_RXFILTER, DEBUG_PHY, DEBUG_NOTYET,
47 #define DBGBIT(x) (1<<DEBUG_##x)
48 static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
50 #define DBGOUT(what, fmt, ...) do { \
51 if (debugflags & DBGBIT(what)) \
52 fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
53 } while (0)
54 #else
55 #define DBGOUT(what, fmt, ...) do {} while (0)
56 #endif
58 #define IOPORT_SIZE 0x40
59 #define PNPMMIO_SIZE 0x20000
60 #define MIN_BUF_SIZE 60 /* Min. octets in an ethernet frame sans FCS */
62 /* this is the size past which hardware will drop packets when setting LPE=0 */
63 #define MAXIMUM_ETHERNET_VLAN_SIZE 1522
64 /* this is the size past which hardware will drop packets when setting LPE=1 */
65 #define MAXIMUM_ETHERNET_LPE_SIZE 16384
68 * HW models:
69 * E1000_DEV_ID_82540EM works with Windows and Linux
70 * E1000_DEV_ID_82573L OK with windoze and Linux 2.6.22,
71 * appears to perform better than 82540EM, but breaks with Linux 2.6.18
72 * E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
73 * Others never tested
75 enum { E1000_DEVID = E1000_DEV_ID_82540EM };
78 * May need to specify additional MAC-to-PHY entries --
79 * Intel's Windows driver refuses to initialize unless they match
81 enum {
82 PHY_ID2_INIT = E1000_DEVID == E1000_DEV_ID_82573L ? 0xcc2 :
83 E1000_DEVID == E1000_DEV_ID_82544GC_COPPER ? 0xc30 :
84 /* default to E1000_DEV_ID_82540EM */ 0xc20
87 typedef struct E1000State_st {
88 /*< private >*/
89 PCIDevice parent_obj;
90 /*< public >*/
92 NICState *nic;
93 NICConf conf;
94 MemoryRegion mmio;
95 MemoryRegion io;
97 uint32_t mac_reg[0x8000];
98 uint16_t phy_reg[0x20];
99 uint16_t eeprom_data[64];
101 uint32_t rxbuf_size;
102 uint32_t rxbuf_min_shift;
103 struct e1000_tx {
104 unsigned char header[256];
105 unsigned char vlan_header[4];
106 /* Fields vlan and data must not be reordered or separated. */
107 unsigned char vlan[4];
108 unsigned char data[0x10000];
109 uint16_t size;
110 unsigned char sum_needed;
111 unsigned char vlan_needed;
112 uint8_t ipcss;
113 uint8_t ipcso;
114 uint16_t ipcse;
115 uint8_t tucss;
116 uint8_t tucso;
117 uint16_t tucse;
118 uint8_t hdr_len;
119 uint16_t mss;
120 uint32_t paylen;
121 uint16_t tso_frames;
122 char tse;
123 int8_t ip;
124 int8_t tcp;
125 char cptse; // current packet tse bit
126 } tx;
128 struct {
129 uint32_t val_in; // shifted in from guest driver
130 uint16_t bitnum_in;
131 uint16_t bitnum_out;
132 uint16_t reading;
133 uint32_t old_eecd;
134 } eecd_state;
136 QEMUTimer *autoneg_timer;
138 QEMUTimer *mit_timer; /* Mitigation timer. */
139 bool mit_timer_on; /* Mitigation timer is running. */
140 bool mit_irq_level; /* Tracks interrupt pin level. */
141 uint32_t mit_ide; /* Tracks E1000_TXD_CMD_IDE bit. */
143 /* Compatibility flags for migration to/from qemu 1.3.0 and older */
144 #define E1000_FLAG_AUTONEG_BIT 0
145 #define E1000_FLAG_MIT_BIT 1
146 #define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
147 #define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
148 uint32_t compat_flags;
149 } E1000State;
151 #define TYPE_E1000 "e1000"
153 #define E1000(obj) \
154 OBJECT_CHECK(E1000State, (obj), TYPE_E1000)
156 #define defreg(x) x = (E1000_##x>>2)
157 enum {
158 defreg(CTRL), defreg(EECD), defreg(EERD), defreg(GPRC),
159 defreg(GPTC), defreg(ICR), defreg(ICS), defreg(IMC),
160 defreg(IMS), defreg(LEDCTL), defreg(MANC), defreg(MDIC),
161 defreg(MPC), defreg(PBA), defreg(RCTL), defreg(RDBAH),
162 defreg(RDBAL), defreg(RDH), defreg(RDLEN), defreg(RDT),
163 defreg(STATUS), defreg(SWSM), defreg(TCTL), defreg(TDBAH),
164 defreg(TDBAL), defreg(TDH), defreg(TDLEN), defreg(TDT),
165 defreg(TORH), defreg(TORL), defreg(TOTH), defreg(TOTL),
166 defreg(TPR), defreg(TPT), defreg(TXDCTL), defreg(WUFC),
167 defreg(RA), defreg(MTA), defreg(CRCERRS),defreg(VFTA),
168 defreg(VET), defreg(RDTR), defreg(RADV), defreg(TADV),
169 defreg(ITR),
172 static void
173 e1000_link_down(E1000State *s)
175 s->mac_reg[STATUS] &= ~E1000_STATUS_LU;
176 s->phy_reg[PHY_STATUS] &= ~MII_SR_LINK_STATUS;
179 static void
180 e1000_link_up(E1000State *s)
182 s->mac_reg[STATUS] |= E1000_STATUS_LU;
183 s->phy_reg[PHY_STATUS] |= MII_SR_LINK_STATUS;
186 static void
187 set_phy_ctrl(E1000State *s, int index, uint16_t val)
190 * QEMU 1.3 does not support link auto-negotiation emulation, so if we
191 * migrate during auto negotiation, after migration the link will be
192 * down.
194 if (!(s->compat_flags & E1000_FLAG_AUTONEG)) {
195 return;
197 if ((val & MII_CR_AUTO_NEG_EN) && (val & MII_CR_RESTART_AUTO_NEG)) {
198 e1000_link_down(s);
199 s->phy_reg[PHY_STATUS] &= ~MII_SR_AUTONEG_COMPLETE;
200 DBGOUT(PHY, "Start link auto negotiation\n");
201 timer_mod(s->autoneg_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
205 static void
206 e1000_autoneg_timer(void *opaque)
208 E1000State *s = opaque;
209 if (!qemu_get_queue(s->nic)->link_down) {
210 e1000_link_up(s);
212 s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
213 DBGOUT(PHY, "Auto negotiation is completed\n");
216 static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = {
217 [PHY_CTRL] = set_phy_ctrl,
220 enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) };
222 enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
223 static const char phy_regcap[0x20] = {
224 [PHY_STATUS] = PHY_R, [M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
225 [PHY_ID1] = PHY_R, [M88E1000_PHY_SPEC_CTRL] = PHY_RW,
226 [PHY_CTRL] = PHY_RW, [PHY_1000T_CTRL] = PHY_RW,
227 [PHY_LP_ABILITY] = PHY_R, [PHY_1000T_STATUS] = PHY_R,
228 [PHY_AUTONEG_ADV] = PHY_RW, [M88E1000_RX_ERR_CNTR] = PHY_R,
229 [PHY_ID2] = PHY_R, [M88E1000_PHY_SPEC_STATUS] = PHY_R
232 static const uint16_t phy_reg_init[] = {
233 [PHY_CTRL] = 0x1140,
234 [PHY_STATUS] = 0x794d, /* link initially up with not completed autoneg */
235 [PHY_ID1] = 0x141, [PHY_ID2] = PHY_ID2_INIT,
236 [PHY_1000T_CTRL] = 0x0e00, [M88E1000_PHY_SPEC_CTRL] = 0x360,
237 [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60, [PHY_AUTONEG_ADV] = 0xde1,
238 [PHY_LP_ABILITY] = 0x1e0, [PHY_1000T_STATUS] = 0x3c00,
239 [M88E1000_PHY_SPEC_STATUS] = 0xac00,
242 static const uint32_t mac_reg_init[] = {
243 [PBA] = 0x00100030,
244 [LEDCTL] = 0x602,
245 [CTRL] = E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
246 E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
247 [STATUS] = 0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
248 E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
249 E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
250 E1000_STATUS_LU,
251 [MANC] = E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
252 E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
253 E1000_MANC_RMCP_EN,
256 /* Helper function, *curr == 0 means the value is not set */
257 static inline void
258 mit_update_delay(uint32_t *curr, uint32_t value)
260 if (value && (*curr == 0 || value < *curr)) {
261 *curr = value;
265 static void
266 set_interrupt_cause(E1000State *s, int index, uint32_t val)
268 PCIDevice *d = PCI_DEVICE(s);
269 uint32_t pending_ints;
270 uint32_t mit_delay;
272 if (val && (E1000_DEVID >= E1000_DEV_ID_82547EI_MOBILE)) {
273 /* Only for 8257x */
274 val |= E1000_ICR_INT_ASSERTED;
276 s->mac_reg[ICR] = val;
279 * Make sure ICR and ICS registers have the same value.
280 * The spec says that the ICS register is write-only. However in practice,
281 * on real hardware ICS is readable, and for reads it has the same value as
282 * ICR (except that ICS does not have the clear on read behaviour of ICR).
284 * The VxWorks PRO/1000 driver uses this behaviour.
286 s->mac_reg[ICS] = val;
288 pending_ints = (s->mac_reg[IMS] & s->mac_reg[ICR]);
289 if (!s->mit_irq_level && pending_ints) {
291 * Here we detect a potential raising edge. We postpone raising the
292 * interrupt line if we are inside the mitigation delay window
293 * (s->mit_timer_on == 1).
294 * We provide a partial implementation of interrupt mitigation,
295 * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for
296 * RADV and TADV, 256ns units for ITR). RDTR is only used to enable
297 * RADV; relative timers based on TIDV and RDTR are not implemented.
299 if (s->mit_timer_on) {
300 return;
302 if (s->compat_flags & E1000_FLAG_MIT) {
303 /* Compute the next mitigation delay according to pending
304 * interrupts and the current values of RADV (provided
305 * RDTR!=0), TADV and ITR.
306 * Then rearm the timer.
308 mit_delay = 0;
309 if (s->mit_ide &&
310 (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) {
311 mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
313 if (s->mac_reg[RDTR] && (pending_ints & E1000_ICS_RXT0)) {
314 mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4);
316 mit_update_delay(&mit_delay, s->mac_reg[ITR]);
318 if (mit_delay) {
319 s->mit_timer_on = 1;
320 timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
321 mit_delay * 256);
323 s->mit_ide = 0;
327 s->mit_irq_level = (pending_ints != 0);
328 pci_set_irq(d, s->mit_irq_level);
331 static void
332 e1000_mit_timer(void *opaque)
334 E1000State *s = opaque;
336 s->mit_timer_on = 0;
337 /* Call set_interrupt_cause to update the irq level (if necessary). */
338 set_interrupt_cause(s, 0, s->mac_reg[ICR]);
341 static void
342 set_ics(E1000State *s, int index, uint32_t val)
344 DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
345 s->mac_reg[IMS]);
346 set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
349 static int
350 rxbufsize(uint32_t v)
352 v &= E1000_RCTL_BSEX | E1000_RCTL_SZ_16384 | E1000_RCTL_SZ_8192 |
353 E1000_RCTL_SZ_4096 | E1000_RCTL_SZ_2048 | E1000_RCTL_SZ_1024 |
354 E1000_RCTL_SZ_512 | E1000_RCTL_SZ_256;
355 switch (v) {
356 case E1000_RCTL_BSEX | E1000_RCTL_SZ_16384:
357 return 16384;
358 case E1000_RCTL_BSEX | E1000_RCTL_SZ_8192:
359 return 8192;
360 case E1000_RCTL_BSEX | E1000_RCTL_SZ_4096:
361 return 4096;
362 case E1000_RCTL_SZ_1024:
363 return 1024;
364 case E1000_RCTL_SZ_512:
365 return 512;
366 case E1000_RCTL_SZ_256:
367 return 256;
369 return 2048;
372 static void e1000_reset(void *opaque)
374 E1000State *d = opaque;
375 uint8_t *macaddr = d->conf.macaddr.a;
376 int i;
378 timer_del(d->autoneg_timer);
379 timer_del(d->mit_timer);
380 d->mit_timer_on = 0;
381 d->mit_irq_level = 0;
382 d->mit_ide = 0;
383 memset(d->phy_reg, 0, sizeof d->phy_reg);
384 memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
385 memset(d->mac_reg, 0, sizeof d->mac_reg);
386 memmove(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
387 d->rxbuf_min_shift = 1;
388 memset(&d->tx, 0, sizeof d->tx);
390 if (qemu_get_queue(d->nic)->link_down) {
391 e1000_link_down(d);
394 /* Some guests expect pre-initialized RAH/RAL (AddrValid flag + MACaddr) */
395 d->mac_reg[RA] = 0;
396 d->mac_reg[RA + 1] = E1000_RAH_AV;
397 for (i = 0; i < 4; i++) {
398 d->mac_reg[RA] |= macaddr[i] << (8 * i);
399 d->mac_reg[RA + 1] |= (i < 2) ? macaddr[i + 4] << (8 * i) : 0;
403 static void
404 set_ctrl(E1000State *s, int index, uint32_t val)
406 /* RST is self clearing */
407 s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
410 static void
411 set_rx_control(E1000State *s, int index, uint32_t val)
413 s->mac_reg[RCTL] = val;
414 s->rxbuf_size = rxbufsize(val);
415 s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
416 DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
417 s->mac_reg[RCTL]);
418 qemu_flush_queued_packets(qemu_get_queue(s->nic));
421 static void
422 set_mdic(E1000State *s, int index, uint32_t val)
424 uint32_t data = val & E1000_MDIC_DATA_MASK;
425 uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
427 if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
428 val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
429 else if (val & E1000_MDIC_OP_READ) {
430 DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
431 if (!(phy_regcap[addr] & PHY_R)) {
432 DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
433 val |= E1000_MDIC_ERROR;
434 } else
435 val = (val ^ data) | s->phy_reg[addr];
436 } else if (val & E1000_MDIC_OP_WRITE) {
437 DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
438 if (!(phy_regcap[addr] & PHY_W)) {
439 DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
440 val |= E1000_MDIC_ERROR;
441 } else {
442 if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
443 phyreg_writeops[addr](s, index, data);
445 s->phy_reg[addr] = data;
448 s->mac_reg[MDIC] = val | E1000_MDIC_READY;
450 if (val & E1000_MDIC_INT_EN) {
451 set_ics(s, 0, E1000_ICR_MDAC);
455 static uint32_t
456 get_eecd(E1000State *s, int index)
458 uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
460 DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
461 s->eecd_state.bitnum_out, s->eecd_state.reading);
462 if (!s->eecd_state.reading ||
463 ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
464 ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
465 ret |= E1000_EECD_DO;
466 return ret;
469 static void
470 set_eecd(E1000State *s, int index, uint32_t val)
472 uint32_t oldval = s->eecd_state.old_eecd;
474 s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
475 E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
476 if (!(E1000_EECD_CS & val)) // CS inactive; nothing to do
477 return;
478 if (E1000_EECD_CS & (val ^ oldval)) { // CS rise edge; reset state
479 s->eecd_state.val_in = 0;
480 s->eecd_state.bitnum_in = 0;
481 s->eecd_state.bitnum_out = 0;
482 s->eecd_state.reading = 0;
484 if (!(E1000_EECD_SK & (val ^ oldval))) // no clock edge
485 return;
486 if (!(E1000_EECD_SK & val)) { // falling edge
487 s->eecd_state.bitnum_out++;
488 return;
490 s->eecd_state.val_in <<= 1;
491 if (val & E1000_EECD_DI)
492 s->eecd_state.val_in |= 1;
493 if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
494 s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
495 s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
496 EEPROM_READ_OPCODE_MICROWIRE);
498 DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
499 s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
500 s->eecd_state.reading);
503 static uint32_t
504 flash_eerd_read(E1000State *s, int x)
506 unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
508 if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
509 return (s->mac_reg[EERD]);
511 if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
512 return (E1000_EEPROM_RW_REG_DONE | r);
514 return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
515 E1000_EEPROM_RW_REG_DONE | r);
518 static void
519 putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
521 uint32_t sum;
523 if (cse && cse < n)
524 n = cse + 1;
525 if (sloc < n-1) {
526 sum = net_checksum_add(n-css, data+css);
527 cpu_to_be16wu((uint16_t *)(data + sloc),
528 net_checksum_finish(sum));
532 static inline int
533 vlan_enabled(E1000State *s)
535 return ((s->mac_reg[CTRL] & E1000_CTRL_VME) != 0);
538 static inline int
539 vlan_rx_filter_enabled(E1000State *s)
541 return ((s->mac_reg[RCTL] & E1000_RCTL_VFE) != 0);
544 static inline int
545 is_vlan_packet(E1000State *s, const uint8_t *buf)
547 return (be16_to_cpup((uint16_t *)(buf + 12)) ==
548 le16_to_cpup((uint16_t *)(s->mac_reg + VET)));
551 static inline int
552 is_vlan_txd(uint32_t txd_lower)
554 return ((txd_lower & E1000_TXD_CMD_VLE) != 0);
557 /* FCS aka Ethernet CRC-32. We don't get it from backends and can't
558 * fill it in, just pad descriptor length by 4 bytes unless guest
559 * told us to strip it off the packet. */
560 static inline int
561 fcs_len(E1000State *s)
563 return (s->mac_reg[RCTL] & E1000_RCTL_SECRC) ? 0 : 4;
566 static void
567 e1000_send_packet(E1000State *s, const uint8_t *buf, int size)
569 NetClientState *nc = qemu_get_queue(s->nic);
570 if (s->phy_reg[PHY_CTRL] & MII_CR_LOOPBACK) {
571 nc->info->receive(nc, buf, size);
572 } else {
573 qemu_send_packet(nc, buf, size);
577 static void
578 xmit_seg(E1000State *s)
580 uint16_t len, *sp;
581 unsigned int frames = s->tx.tso_frames, css, sofar, n;
582 struct e1000_tx *tp = &s->tx;
584 if (tp->tse && tp->cptse) {
585 css = tp->ipcss;
586 DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
587 frames, tp->size, css);
588 if (tp->ip) { // IPv4
589 cpu_to_be16wu((uint16_t *)(tp->data+css+2),
590 tp->size - css);
591 cpu_to_be16wu((uint16_t *)(tp->data+css+4),
592 be16_to_cpup((uint16_t *)(tp->data+css+4))+frames);
593 } else // IPv6
594 cpu_to_be16wu((uint16_t *)(tp->data+css+4),
595 tp->size - css);
596 css = tp->tucss;
597 len = tp->size - css;
598 DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", tp->tcp, css, len);
599 if (tp->tcp) {
600 sofar = frames * tp->mss;
601 cpu_to_be32wu((uint32_t *)(tp->data+css+4), // seq
602 be32_to_cpupu((uint32_t *)(tp->data+css+4))+sofar);
603 if (tp->paylen - sofar > tp->mss)
604 tp->data[css + 13] &= ~9; // PSH, FIN
605 } else // UDP
606 cpu_to_be16wu((uint16_t *)(tp->data+css+4), len);
607 if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
608 unsigned int phsum;
609 // add pseudo-header length before checksum calculation
610 sp = (uint16_t *)(tp->data + tp->tucso);
611 phsum = be16_to_cpup(sp) + len;
612 phsum = (phsum >> 16) + (phsum & 0xffff);
613 cpu_to_be16wu(sp, phsum);
615 tp->tso_frames++;
618 if (tp->sum_needed & E1000_TXD_POPTS_TXSM)
619 putsum(tp->data, tp->size, tp->tucso, tp->tucss, tp->tucse);
620 if (tp->sum_needed & E1000_TXD_POPTS_IXSM)
621 putsum(tp->data, tp->size, tp->ipcso, tp->ipcss, tp->ipcse);
622 if (tp->vlan_needed) {
623 memmove(tp->vlan, tp->data, 4);
624 memmove(tp->data, tp->data + 4, 8);
625 memcpy(tp->data + 8, tp->vlan_header, 4);
626 e1000_send_packet(s, tp->vlan, tp->size + 4);
627 } else
628 e1000_send_packet(s, tp->data, tp->size);
629 s->mac_reg[TPT]++;
630 s->mac_reg[GPTC]++;
631 n = s->mac_reg[TOTL];
632 if ((s->mac_reg[TOTL] += s->tx.size) < n)
633 s->mac_reg[TOTH]++;
636 static void
637 process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
639 PCIDevice *d = PCI_DEVICE(s);
640 uint32_t txd_lower = le32_to_cpu(dp->lower.data);
641 uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
642 unsigned int split_size = txd_lower & 0xffff, bytes, sz, op;
643 unsigned int msh = 0xfffff;
644 uint64_t addr;
645 struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
646 struct e1000_tx *tp = &s->tx;
648 s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE);
649 if (dtype == E1000_TXD_CMD_DEXT) { // context descriptor
650 op = le32_to_cpu(xp->cmd_and_length);
651 tp->ipcss = xp->lower_setup.ip_fields.ipcss;
652 tp->ipcso = xp->lower_setup.ip_fields.ipcso;
653 tp->ipcse = le16_to_cpu(xp->lower_setup.ip_fields.ipcse);
654 tp->tucss = xp->upper_setup.tcp_fields.tucss;
655 tp->tucso = xp->upper_setup.tcp_fields.tucso;
656 tp->tucse = le16_to_cpu(xp->upper_setup.tcp_fields.tucse);
657 tp->paylen = op & 0xfffff;
658 tp->hdr_len = xp->tcp_seg_setup.fields.hdr_len;
659 tp->mss = le16_to_cpu(xp->tcp_seg_setup.fields.mss);
660 tp->ip = (op & E1000_TXD_CMD_IP) ? 1 : 0;
661 tp->tcp = (op & E1000_TXD_CMD_TCP) ? 1 : 0;
662 tp->tse = (op & E1000_TXD_CMD_TSE) ? 1 : 0;
663 tp->tso_frames = 0;
664 if (tp->tucso == 0) { // this is probably wrong
665 DBGOUT(TXSUM, "TCP/UDP: cso 0!\n");
666 tp->tucso = tp->tucss + (tp->tcp ? 16 : 6);
668 return;
669 } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
670 // data descriptor
671 if (tp->size == 0) {
672 tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
674 tp->cptse = ( txd_lower & E1000_TXD_CMD_TSE ) ? 1 : 0;
675 } else {
676 // legacy descriptor
677 tp->cptse = 0;
680 if (vlan_enabled(s) && is_vlan_txd(txd_lower) &&
681 (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
682 tp->vlan_needed = 1;
683 cpu_to_be16wu((uint16_t *)(tp->vlan_header),
684 le16_to_cpup((uint16_t *)(s->mac_reg + VET)));
685 cpu_to_be16wu((uint16_t *)(tp->vlan_header + 2),
686 le16_to_cpu(dp->upper.fields.special));
689 addr = le64_to_cpu(dp->buffer_addr);
690 if (tp->tse && tp->cptse) {
691 msh = tp->hdr_len + tp->mss;
692 do {
693 bytes = split_size;
694 if (tp->size + bytes > msh)
695 bytes = msh - tp->size;
697 bytes = MIN(sizeof(tp->data) - tp->size, bytes);
698 pci_dma_read(d, addr, tp->data + tp->size, bytes);
699 sz = tp->size + bytes;
700 if (sz >= tp->hdr_len && tp->size < tp->hdr_len) {
701 memmove(tp->header, tp->data, tp->hdr_len);
703 tp->size = sz;
704 addr += bytes;
705 if (sz == msh) {
706 xmit_seg(s);
707 memmove(tp->data, tp->header, tp->hdr_len);
708 tp->size = tp->hdr_len;
710 } while (split_size -= bytes);
711 } else if (!tp->tse && tp->cptse) {
712 // context descriptor TSE is not set, while data descriptor TSE is set
713 DBGOUT(TXERR, "TCP segmentation error\n");
714 } else {
715 split_size = MIN(sizeof(tp->data) - tp->size, split_size);
716 pci_dma_read(d, addr, tp->data + tp->size, split_size);
717 tp->size += split_size;
720 if (!(txd_lower & E1000_TXD_CMD_EOP))
721 return;
722 if (!(tp->tse && tp->cptse && tp->size < tp->hdr_len)) {
723 xmit_seg(s);
725 tp->tso_frames = 0;
726 tp->sum_needed = 0;
727 tp->vlan_needed = 0;
728 tp->size = 0;
729 tp->cptse = 0;
732 static uint32_t
733 txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
735 PCIDevice *d = PCI_DEVICE(s);
736 uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
738 if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
739 return 0;
740 txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
741 ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
742 dp->upper.data = cpu_to_le32(txd_upper);
743 pci_dma_write(d, base + ((char *)&dp->upper - (char *)dp),
744 &dp->upper, sizeof(dp->upper));
745 return E1000_ICR_TXDW;
748 static uint64_t tx_desc_base(E1000State *s)
750 uint64_t bah = s->mac_reg[TDBAH];
751 uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
753 return (bah << 32) + bal;
756 static void
757 start_xmit(E1000State *s)
759 PCIDevice *d = PCI_DEVICE(s);
760 dma_addr_t base;
761 struct e1000_tx_desc desc;
762 uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
764 if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
765 DBGOUT(TX, "tx disabled\n");
766 return;
769 while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
770 base = tx_desc_base(s) +
771 sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
772 pci_dma_read(d, base, &desc, sizeof(desc));
774 DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
775 (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
776 desc.upper.data);
778 process_tx_desc(s, &desc);
779 cause |= txdesc_writeback(s, base, &desc);
781 if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
782 s->mac_reg[TDH] = 0;
784 * the following could happen only if guest sw assigns
785 * bogus values to TDT/TDLEN.
786 * there's nothing too intelligent we could do about this.
788 if (s->mac_reg[TDH] == tdh_start) {
789 DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
790 tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
791 break;
794 set_ics(s, 0, cause);
797 static int
798 receive_filter(E1000State *s, const uint8_t *buf, int size)
800 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
801 static const int mta_shift[] = {4, 3, 2, 0};
802 uint32_t f, rctl = s->mac_reg[RCTL], ra[2], *rp;
804 if (is_vlan_packet(s, buf) && vlan_rx_filter_enabled(s)) {
805 uint16_t vid = be16_to_cpup((uint16_t *)(buf + 14));
806 uint32_t vfta = le32_to_cpup((uint32_t *)(s->mac_reg + VFTA) +
807 ((vid >> 5) & 0x7f));
808 if ((vfta & (1 << (vid & 0x1f))) == 0)
809 return 0;
812 if (rctl & E1000_RCTL_UPE) // promiscuous
813 return 1;
815 if ((buf[0] & 1) && (rctl & E1000_RCTL_MPE)) // promiscuous mcast
816 return 1;
818 if ((rctl & E1000_RCTL_BAM) && !memcmp(buf, bcast, sizeof bcast))
819 return 1;
821 for (rp = s->mac_reg + RA; rp < s->mac_reg + RA + 32; rp += 2) {
822 if (!(rp[1] & E1000_RAH_AV))
823 continue;
824 ra[0] = cpu_to_le32(rp[0]);
825 ra[1] = cpu_to_le32(rp[1]);
826 if (!memcmp(buf, (uint8_t *)ra, 6)) {
827 DBGOUT(RXFILTER,
828 "unicast match[%d]: %02x:%02x:%02x:%02x:%02x:%02x\n",
829 (int)(rp - s->mac_reg - RA)/2,
830 buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
831 return 1;
834 DBGOUT(RXFILTER, "unicast mismatch: %02x:%02x:%02x:%02x:%02x:%02x\n",
835 buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
837 f = mta_shift[(rctl >> E1000_RCTL_MO_SHIFT) & 3];
838 f = (((buf[5] << 8) | buf[4]) >> f) & 0xfff;
839 if (s->mac_reg[MTA + (f >> 5)] & (1 << (f & 0x1f)))
840 return 1;
841 DBGOUT(RXFILTER,
842 "dropping, inexact filter mismatch: %02x:%02x:%02x:%02x:%02x:%02x MO %d MTA[%d] %x\n",
843 buf[0], buf[1], buf[2], buf[3], buf[4], buf[5],
844 (rctl >> E1000_RCTL_MO_SHIFT) & 3, f >> 5,
845 s->mac_reg[MTA + (f >> 5)]);
847 return 0;
850 static void
851 e1000_set_link_status(NetClientState *nc)
853 E1000State *s = qemu_get_nic_opaque(nc);
854 uint32_t old_status = s->mac_reg[STATUS];
856 if (nc->link_down) {
857 e1000_link_down(s);
858 } else {
859 e1000_link_up(s);
862 if (s->mac_reg[STATUS] != old_status)
863 set_ics(s, 0, E1000_ICR_LSC);
866 static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
868 int bufs;
869 /* Fast-path short packets */
870 if (total_size <= s->rxbuf_size) {
871 return s->mac_reg[RDH] != s->mac_reg[RDT];
873 if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
874 bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
875 } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) {
876 bufs = s->mac_reg[RDLEN] / sizeof(struct e1000_rx_desc) +
877 s->mac_reg[RDT] - s->mac_reg[RDH];
878 } else {
879 return false;
881 return total_size <= bufs * s->rxbuf_size;
884 static int
885 e1000_can_receive(NetClientState *nc)
887 E1000State *s = qemu_get_nic_opaque(nc);
889 return (s->mac_reg[STATUS] & E1000_STATUS_LU) &&
890 (s->mac_reg[RCTL] & E1000_RCTL_EN) && e1000_has_rxbufs(s, 1);
893 static uint64_t rx_desc_base(E1000State *s)
895 uint64_t bah = s->mac_reg[RDBAH];
896 uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
898 return (bah << 32) + bal;
901 static ssize_t
902 e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
904 E1000State *s = qemu_get_nic_opaque(nc);
905 PCIDevice *d = PCI_DEVICE(s);
906 struct e1000_rx_desc desc;
907 dma_addr_t base;
908 unsigned int n, rdt;
909 uint32_t rdh_start;
910 uint16_t vlan_special = 0;
911 uint8_t vlan_status = 0, vlan_offset = 0;
912 uint8_t min_buf[MIN_BUF_SIZE];
913 size_t desc_offset;
914 size_t desc_size;
915 size_t total_size;
917 if (!(s->mac_reg[STATUS] & E1000_STATUS_LU)) {
918 return -1;
921 if (!(s->mac_reg[RCTL] & E1000_RCTL_EN)) {
922 return -1;
925 /* Pad to minimum Ethernet frame length */
926 if (size < sizeof(min_buf)) {
927 memcpy(min_buf, buf, size);
928 memset(&min_buf[size], 0, sizeof(min_buf) - size);
929 buf = min_buf;
930 size = sizeof(min_buf);
933 /* Discard oversized packets if !LPE and !SBP. */
934 if ((size > MAXIMUM_ETHERNET_LPE_SIZE ||
935 (size > MAXIMUM_ETHERNET_VLAN_SIZE
936 && !(s->mac_reg[RCTL] & E1000_RCTL_LPE)))
937 && !(s->mac_reg[RCTL] & E1000_RCTL_SBP)) {
938 return size;
941 if (!receive_filter(s, buf, size))
942 return size;
944 if (vlan_enabled(s) && is_vlan_packet(s, buf)) {
945 vlan_special = cpu_to_le16(be16_to_cpup((uint16_t *)(buf + 14)));
946 memmove((uint8_t *)buf + 4, buf, 12);
947 vlan_status = E1000_RXD_STAT_VP;
948 vlan_offset = 4;
949 size -= 4;
952 rdh_start = s->mac_reg[RDH];
953 desc_offset = 0;
954 total_size = size + fcs_len(s);
955 if (!e1000_has_rxbufs(s, total_size)) {
956 set_ics(s, 0, E1000_ICS_RXO);
957 return -1;
959 do {
960 desc_size = total_size - desc_offset;
961 if (desc_size > s->rxbuf_size) {
962 desc_size = s->rxbuf_size;
964 base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
965 pci_dma_read(d, base, &desc, sizeof(desc));
966 desc.special = vlan_special;
967 desc.status |= (vlan_status | E1000_RXD_STAT_DD);
968 if (desc.buffer_addr) {
969 if (desc_offset < size) {
970 size_t copy_size = size - desc_offset;
971 if (copy_size > s->rxbuf_size) {
972 copy_size = s->rxbuf_size;
974 pci_dma_write(d, le64_to_cpu(desc.buffer_addr),
975 buf + desc_offset + vlan_offset, copy_size);
977 desc_offset += desc_size;
978 desc.length = cpu_to_le16(desc_size);
979 if (desc_offset >= total_size) {
980 desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
981 } else {
982 /* Guest zeroing out status is not a hardware requirement.
983 Clear EOP in case guest didn't do it. */
984 desc.status &= ~E1000_RXD_STAT_EOP;
986 } else { // as per intel docs; skip descriptors with null buf addr
987 DBGOUT(RX, "Null RX descriptor!!\n");
989 pci_dma_write(d, base, &desc, sizeof(desc));
991 if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
992 s->mac_reg[RDH] = 0;
993 /* see comment in start_xmit; same here */
994 if (s->mac_reg[RDH] == rdh_start) {
995 DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
996 rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
997 set_ics(s, 0, E1000_ICS_RXO);
998 return -1;
1000 } while (desc_offset < total_size);
1002 s->mac_reg[GPRC]++;
1003 s->mac_reg[TPR]++;
1004 /* TOR - Total Octets Received:
1005 * This register includes bytes received in a packet from the <Destination
1006 * Address> field through the <CRC> field, inclusively.
1008 n = s->mac_reg[TORL] + size + /* Always include FCS length. */ 4;
1009 if (n < s->mac_reg[TORL])
1010 s->mac_reg[TORH]++;
1011 s->mac_reg[TORL] = n;
1013 n = E1000_ICS_RXT0;
1014 if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
1015 rdt += s->mac_reg[RDLEN] / sizeof(desc);
1016 if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
1017 s->rxbuf_min_shift)
1018 n |= E1000_ICS_RXDMT0;
1020 set_ics(s, 0, n);
1022 return size;
1025 static uint32_t
1026 mac_readreg(E1000State *s, int index)
1028 return s->mac_reg[index];
1031 static uint32_t
1032 mac_icr_read(E1000State *s, int index)
1034 uint32_t ret = s->mac_reg[ICR];
1036 DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
1037 set_interrupt_cause(s, 0, 0);
1038 return ret;
1041 static uint32_t
1042 mac_read_clr4(E1000State *s, int index)
1044 uint32_t ret = s->mac_reg[index];
1046 s->mac_reg[index] = 0;
1047 return ret;
1050 static uint32_t
1051 mac_read_clr8(E1000State *s, int index)
1053 uint32_t ret = s->mac_reg[index];
1055 s->mac_reg[index] = 0;
1056 s->mac_reg[index-1] = 0;
1057 return ret;
1060 static void
1061 mac_writereg(E1000State *s, int index, uint32_t val)
1063 s->mac_reg[index] = val;
1066 static void
1067 set_rdt(E1000State *s, int index, uint32_t val)
1069 s->mac_reg[index] = val & 0xffff;
1070 if (e1000_has_rxbufs(s, 1)) {
1071 qemu_flush_queued_packets(qemu_get_queue(s->nic));
1075 static void
1076 set_16bit(E1000State *s, int index, uint32_t val)
1078 s->mac_reg[index] = val & 0xffff;
1081 static void
1082 set_dlen(E1000State *s, int index, uint32_t val)
1084 s->mac_reg[index] = val & 0xfff80;
1087 static void
1088 set_tctl(E1000State *s, int index, uint32_t val)
1090 s->mac_reg[index] = val;
1091 s->mac_reg[TDT] &= 0xffff;
1092 start_xmit(s);
1095 static void
1096 set_icr(E1000State *s, int index, uint32_t val)
1098 DBGOUT(INTERRUPT, "set_icr %x\n", val);
1099 set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
1102 static void
1103 set_imc(E1000State *s, int index, uint32_t val)
1105 s->mac_reg[IMS] &= ~val;
1106 set_ics(s, 0, 0);
1109 static void
1110 set_ims(E1000State *s, int index, uint32_t val)
1112 s->mac_reg[IMS] |= val;
1113 set_ics(s, 0, 0);
1116 #define getreg(x) [x] = mac_readreg
1117 static uint32_t (*macreg_readops[])(E1000State *, int) = {
1118 getreg(PBA), getreg(RCTL), getreg(TDH), getreg(TXDCTL),
1119 getreg(WUFC), getreg(TDT), getreg(CTRL), getreg(LEDCTL),
1120 getreg(MANC), getreg(MDIC), getreg(SWSM), getreg(STATUS),
1121 getreg(TORL), getreg(TOTL), getreg(IMS), getreg(TCTL),
1122 getreg(RDH), getreg(RDT), getreg(VET), getreg(ICS),
1123 getreg(TDBAL), getreg(TDBAH), getreg(RDBAH), getreg(RDBAL),
1124 getreg(TDLEN), getreg(RDLEN), getreg(RDTR), getreg(RADV),
1125 getreg(TADV), getreg(ITR),
1127 [TOTH] = mac_read_clr8, [TORH] = mac_read_clr8, [GPRC] = mac_read_clr4,
1128 [GPTC] = mac_read_clr4, [TPR] = mac_read_clr4, [TPT] = mac_read_clr4,
1129 [ICR] = mac_icr_read, [EECD] = get_eecd, [EERD] = flash_eerd_read,
1130 [CRCERRS ... MPC] = &mac_readreg,
1131 [RA ... RA+31] = &mac_readreg,
1132 [MTA ... MTA+127] = &mac_readreg,
1133 [VFTA ... VFTA+127] = &mac_readreg,
1135 enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
1137 #define putreg(x) [x] = mac_writereg
1138 static void (*macreg_writeops[])(E1000State *, int, uint32_t) = {
1139 putreg(PBA), putreg(EERD), putreg(SWSM), putreg(WUFC),
1140 putreg(TDBAL), putreg(TDBAH), putreg(TXDCTL), putreg(RDBAH),
1141 putreg(RDBAL), putreg(LEDCTL), putreg(VET),
1142 [TDLEN] = set_dlen, [RDLEN] = set_dlen, [TCTL] = set_tctl,
1143 [TDT] = set_tctl, [MDIC] = set_mdic, [ICS] = set_ics,
1144 [TDH] = set_16bit, [RDH] = set_16bit, [RDT] = set_rdt,
1145 [IMC] = set_imc, [IMS] = set_ims, [ICR] = set_icr,
1146 [EECD] = set_eecd, [RCTL] = set_rx_control, [CTRL] = set_ctrl,
1147 [RDTR] = set_16bit, [RADV] = set_16bit, [TADV] = set_16bit,
1148 [ITR] = set_16bit,
1149 [RA ... RA+31] = &mac_writereg,
1150 [MTA ... MTA+127] = &mac_writereg,
1151 [VFTA ... VFTA+127] = &mac_writereg,
1154 enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
1156 static void
1157 e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1158 unsigned size)
1160 E1000State *s = opaque;
1161 unsigned int index = (addr & 0x1ffff) >> 2;
1163 if (index < NWRITEOPS && macreg_writeops[index]) {
1164 macreg_writeops[index](s, index, val);
1165 } else if (index < NREADOPS && macreg_readops[index]) {
1166 DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n", index<<2, val);
1167 } else {
1168 DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n",
1169 index<<2, val);
1173 static uint64_t
1174 e1000_mmio_read(void *opaque, hwaddr addr, unsigned size)
1176 E1000State *s = opaque;
1177 unsigned int index = (addr & 0x1ffff) >> 2;
1179 if (index < NREADOPS && macreg_readops[index])
1181 return macreg_readops[index](s, index);
1183 DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
1184 return 0;
1187 static const MemoryRegionOps e1000_mmio_ops = {
1188 .read = e1000_mmio_read,
1189 .write = e1000_mmio_write,
1190 .endianness = DEVICE_LITTLE_ENDIAN,
1191 .impl = {
1192 .min_access_size = 4,
1193 .max_access_size = 4,
1197 static uint64_t e1000_io_read(void *opaque, hwaddr addr,
1198 unsigned size)
1200 E1000State *s = opaque;
1202 (void)s;
1203 return 0;
1206 static void e1000_io_write(void *opaque, hwaddr addr,
1207 uint64_t val, unsigned size)
1209 E1000State *s = opaque;
1211 (void)s;
1214 static const MemoryRegionOps e1000_io_ops = {
1215 .read = e1000_io_read,
1216 .write = e1000_io_write,
1217 .endianness = DEVICE_LITTLE_ENDIAN,
1220 static bool is_version_1(void *opaque, int version_id)
1222 return version_id == 1;
1225 static void e1000_pre_save(void *opaque)
1227 E1000State *s = opaque;
1228 NetClientState *nc = qemu_get_queue(s->nic);
1230 /* If the mitigation timer is active, emulate a timeout now. */
1231 if (s->mit_timer_on) {
1232 e1000_mit_timer(s);
1235 if (!(s->compat_flags & E1000_FLAG_AUTONEG)) {
1236 return;
1240 * If link is down and auto-negotiation is ongoing, complete
1241 * auto-negotiation immediately. This allows is to look at
1242 * MII_SR_AUTONEG_COMPLETE to infer link status on load.
1244 if (nc->link_down &&
1245 s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN &&
1246 s->phy_reg[PHY_CTRL] & MII_CR_RESTART_AUTO_NEG) {
1247 s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
1251 static int e1000_post_load(void *opaque, int version_id)
1253 E1000State *s = opaque;
1254 NetClientState *nc = qemu_get_queue(s->nic);
1256 if (!(s->compat_flags & E1000_FLAG_MIT)) {
1257 s->mac_reg[ITR] = s->mac_reg[RDTR] = s->mac_reg[RADV] =
1258 s->mac_reg[TADV] = 0;
1259 s->mit_irq_level = false;
1261 s->mit_ide = 0;
1262 s->mit_timer_on = false;
1264 /* nc.link_down can't be migrated, so infer link_down according
1265 * to link status bit in mac_reg[STATUS].
1266 * Alternatively, restart link negotiation if it was in progress. */
1267 nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0;
1269 if (!(s->compat_flags & E1000_FLAG_AUTONEG)) {
1270 return 0;
1273 if (s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN &&
1274 s->phy_reg[PHY_CTRL] & MII_CR_RESTART_AUTO_NEG &&
1275 !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
1276 nc->link_down = false;
1277 timer_mod(s->autoneg_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
1280 return 0;
1283 static bool e1000_mit_state_needed(void *opaque)
1285 E1000State *s = opaque;
1287 return s->compat_flags & E1000_FLAG_MIT;
1290 static const VMStateDescription vmstate_e1000_mit_state = {
1291 .name = "e1000/mit_state",
1292 .version_id = 1,
1293 .minimum_version_id = 1,
1294 .minimum_version_id_old = 1,
1295 .fields = (VMStateField[]) {
1296 VMSTATE_UINT32(mac_reg[RDTR], E1000State),
1297 VMSTATE_UINT32(mac_reg[RADV], E1000State),
1298 VMSTATE_UINT32(mac_reg[TADV], E1000State),
1299 VMSTATE_UINT32(mac_reg[ITR], E1000State),
1300 VMSTATE_BOOL(mit_irq_level, E1000State),
1301 VMSTATE_END_OF_LIST()
1305 static const VMStateDescription vmstate_e1000 = {
1306 .name = "e1000",
1307 .version_id = 2,
1308 .minimum_version_id = 1,
1309 .minimum_version_id_old = 1,
1310 .pre_save = e1000_pre_save,
1311 .post_load = e1000_post_load,
1312 .fields = (VMStateField []) {
1313 VMSTATE_PCI_DEVICE(parent_obj, E1000State),
1314 VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
1315 VMSTATE_UNUSED(4), /* Was mmio_base. */
1316 VMSTATE_UINT32(rxbuf_size, E1000State),
1317 VMSTATE_UINT32(rxbuf_min_shift, E1000State),
1318 VMSTATE_UINT32(eecd_state.val_in, E1000State),
1319 VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
1320 VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
1321 VMSTATE_UINT16(eecd_state.reading, E1000State),
1322 VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
1323 VMSTATE_UINT8(tx.ipcss, E1000State),
1324 VMSTATE_UINT8(tx.ipcso, E1000State),
1325 VMSTATE_UINT16(tx.ipcse, E1000State),
1326 VMSTATE_UINT8(tx.tucss, E1000State),
1327 VMSTATE_UINT8(tx.tucso, E1000State),
1328 VMSTATE_UINT16(tx.tucse, E1000State),
1329 VMSTATE_UINT32(tx.paylen, E1000State),
1330 VMSTATE_UINT8(tx.hdr_len, E1000State),
1331 VMSTATE_UINT16(tx.mss, E1000State),
1332 VMSTATE_UINT16(tx.size, E1000State),
1333 VMSTATE_UINT16(tx.tso_frames, E1000State),
1334 VMSTATE_UINT8(tx.sum_needed, E1000State),
1335 VMSTATE_INT8(tx.ip, E1000State),
1336 VMSTATE_INT8(tx.tcp, E1000State),
1337 VMSTATE_BUFFER(tx.header, E1000State),
1338 VMSTATE_BUFFER(tx.data, E1000State),
1339 VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
1340 VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
1341 VMSTATE_UINT32(mac_reg[CTRL], E1000State),
1342 VMSTATE_UINT32(mac_reg[EECD], E1000State),
1343 VMSTATE_UINT32(mac_reg[EERD], E1000State),
1344 VMSTATE_UINT32(mac_reg[GPRC], E1000State),
1345 VMSTATE_UINT32(mac_reg[GPTC], E1000State),
1346 VMSTATE_UINT32(mac_reg[ICR], E1000State),
1347 VMSTATE_UINT32(mac_reg[ICS], E1000State),
1348 VMSTATE_UINT32(mac_reg[IMC], E1000State),
1349 VMSTATE_UINT32(mac_reg[IMS], E1000State),
1350 VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
1351 VMSTATE_UINT32(mac_reg[MANC], E1000State),
1352 VMSTATE_UINT32(mac_reg[MDIC], E1000State),
1353 VMSTATE_UINT32(mac_reg[MPC], E1000State),
1354 VMSTATE_UINT32(mac_reg[PBA], E1000State),
1355 VMSTATE_UINT32(mac_reg[RCTL], E1000State),
1356 VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
1357 VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
1358 VMSTATE_UINT32(mac_reg[RDH], E1000State),
1359 VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
1360 VMSTATE_UINT32(mac_reg[RDT], E1000State),
1361 VMSTATE_UINT32(mac_reg[STATUS], E1000State),
1362 VMSTATE_UINT32(mac_reg[SWSM], E1000State),
1363 VMSTATE_UINT32(mac_reg[TCTL], E1000State),
1364 VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
1365 VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
1366 VMSTATE_UINT32(mac_reg[TDH], E1000State),
1367 VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
1368 VMSTATE_UINT32(mac_reg[TDT], E1000State),
1369 VMSTATE_UINT32(mac_reg[TORH], E1000State),
1370 VMSTATE_UINT32(mac_reg[TORL], E1000State),
1371 VMSTATE_UINT32(mac_reg[TOTH], E1000State),
1372 VMSTATE_UINT32(mac_reg[TOTL], E1000State),
1373 VMSTATE_UINT32(mac_reg[TPR], E1000State),
1374 VMSTATE_UINT32(mac_reg[TPT], E1000State),
1375 VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
1376 VMSTATE_UINT32(mac_reg[WUFC], E1000State),
1377 VMSTATE_UINT32(mac_reg[VET], E1000State),
1378 VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
1379 VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128),
1380 VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
1381 VMSTATE_END_OF_LIST()
1383 .subsections = (VMStateSubsection[]) {
1385 .vmsd = &vmstate_e1000_mit_state,
1386 .needed = e1000_mit_state_needed,
1387 }, {
1388 /* empty */
1393 static const uint16_t e1000_eeprom_template[64] = {
1394 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0x0000, 0x0000, 0x0000,
1395 0x3000, 0x1000, 0x6403, E1000_DEVID, 0x8086, E1000_DEVID, 0x8086, 0x3040,
1396 0x0008, 0x2000, 0x7e14, 0x0048, 0x1000, 0x00d8, 0x0000, 0x2700,
1397 0x6cc9, 0x3150, 0x0722, 0x040b, 0x0984, 0x0000, 0xc000, 0x0706,
1398 0x1008, 0x0000, 0x0f04, 0x7fff, 0x4d01, 0xffff, 0xffff, 0xffff,
1399 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
1400 0x0100, 0x4000, 0x121c, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
1401 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000,
1404 /* PCI interface */
1406 static void
1407 e1000_mmio_setup(E1000State *d)
1409 int i;
1410 const uint32_t excluded_regs[] = {
1411 E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1412 E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1415 memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d,
1416 "e1000-mmio", PNPMMIO_SIZE);
1417 memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]);
1418 for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1419 memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4,
1420 excluded_regs[i+1] - excluded_regs[i] - 4);
1421 memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE);
1424 static void
1425 e1000_cleanup(NetClientState *nc)
1427 E1000State *s = qemu_get_nic_opaque(nc);
1429 s->nic = NULL;
1432 static void
1433 pci_e1000_uninit(PCIDevice *dev)
1435 E1000State *d = E1000(dev);
1437 timer_del(d->autoneg_timer);
1438 timer_free(d->autoneg_timer);
1439 timer_del(d->mit_timer);
1440 timer_free(d->mit_timer);
1441 memory_region_destroy(&d->mmio);
1442 memory_region_destroy(&d->io);
1443 qemu_del_nic(d->nic);
1446 static NetClientInfo net_e1000_info = {
1447 .type = NET_CLIENT_OPTIONS_KIND_NIC,
1448 .size = sizeof(NICState),
1449 .can_receive = e1000_can_receive,
1450 .receive = e1000_receive,
1451 .cleanup = e1000_cleanup,
1452 .link_status_changed = e1000_set_link_status,
1455 static int pci_e1000_init(PCIDevice *pci_dev)
1457 DeviceState *dev = DEVICE(pci_dev);
1458 E1000State *d = E1000(pci_dev);
1459 uint8_t *pci_conf;
1460 uint16_t checksum = 0;
1461 int i;
1462 uint8_t *macaddr;
1464 pci_conf = pci_dev->config;
1466 /* TODO: RST# value should be 0, PCI spec 6.2.4 */
1467 pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
1469 pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */
1471 e1000_mmio_setup(d);
1473 pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
1475 pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
1477 memmove(d->eeprom_data, e1000_eeprom_template,
1478 sizeof e1000_eeprom_template);
1479 qemu_macaddr_default_if_unset(&d->conf.macaddr);
1480 macaddr = d->conf.macaddr.a;
1481 for (i = 0; i < 3; i++)
1482 d->eeprom_data[i] = (macaddr[2*i+1]<<8) | macaddr[2*i];
1483 for (i = 0; i < EEPROM_CHECKSUM_REG; i++)
1484 checksum += d->eeprom_data[i];
1485 checksum = (uint16_t) EEPROM_SUM - checksum;
1486 d->eeprom_data[EEPROM_CHECKSUM_REG] = checksum;
1488 d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
1489 object_get_typename(OBJECT(d)), dev->id, d);
1491 qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
1493 add_boot_device_path(d->conf.bootindex, dev, "/ethernet-phy@0");
1495 d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d);
1496 d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d);
1498 return 0;
1501 static void qdev_e1000_reset(DeviceState *dev)
1503 E1000State *d = E1000(dev);
1504 e1000_reset(d);
1507 static Property e1000_properties[] = {
1508 DEFINE_NIC_PROPERTIES(E1000State, conf),
1509 DEFINE_PROP_BIT("autonegotiation", E1000State,
1510 compat_flags, E1000_FLAG_AUTONEG_BIT, true),
1511 DEFINE_PROP_BIT("mitigation", E1000State,
1512 compat_flags, E1000_FLAG_MIT_BIT, true),
1513 DEFINE_PROP_END_OF_LIST(),
1516 static void e1000_class_init(ObjectClass *klass, void *data)
1518 DeviceClass *dc = DEVICE_CLASS(klass);
1519 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1521 k->init = pci_e1000_init;
1522 k->exit = pci_e1000_uninit;
1523 k->romfile = "efi-e1000.rom";
1524 k->vendor_id = PCI_VENDOR_ID_INTEL;
1525 k->device_id = E1000_DEVID;
1526 k->revision = 0x03;
1527 k->class_id = PCI_CLASS_NETWORK_ETHERNET;
1528 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1529 dc->desc = "Intel Gigabit Ethernet";
1530 dc->reset = qdev_e1000_reset;
1531 dc->vmsd = &vmstate_e1000;
1532 dc->props = e1000_properties;
1535 static const TypeInfo e1000_info = {
1536 .name = TYPE_E1000,
1537 .parent = TYPE_PCI_DEVICE,
1538 .instance_size = sizeof(E1000State),
1539 .class_init = e1000_class_init,
1542 static void e1000_register_types(void)
1544 type_register_static(&e1000_info);
1547 type_init(e1000_register_types)