e1000: add bootindex to qom property
[qemu/qmp-unstable.git] / hw / net / e1000.c
blob0edbfa6b8a622658aa4de98cd775e57ab99fdd97
1 /*
2 * QEMU e1000 emulation
4 * Software developer's manual:
5 * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
7 * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
8 * Copyright (c) 2008 Qumranet
9 * Based on work done by:
10 * Copyright (c) 2007 Dan Aloni
11 * Copyright (c) 2004 Antony T Curtis
13 * This library is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU Lesser General Public
15 * License as published by the Free Software Foundation; either
16 * version 2 of the License, or (at your option) any later version.
18 * This library is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 * Lesser General Public License for more details.
23 * You should have received a copy of the GNU Lesser General Public
24 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
28 #include "hw/hw.h"
29 #include "hw/pci/pci.h"
30 #include "net/net.h"
31 #include "net/checksum.h"
32 #include "hw/loader.h"
33 #include "sysemu/sysemu.h"
34 #include "sysemu/dma.h"
35 #include "qemu/iov.h"
37 #include "e1000_regs.h"
39 #define E1000_DEBUG
41 #ifdef E1000_DEBUG
42 enum {
43 DEBUG_GENERAL, DEBUG_IO, DEBUG_MMIO, DEBUG_INTERRUPT,
44 DEBUG_RX, DEBUG_TX, DEBUG_MDIC, DEBUG_EEPROM,
45 DEBUG_UNKNOWN, DEBUG_TXSUM, DEBUG_TXERR, DEBUG_RXERR,
46 DEBUG_RXFILTER, DEBUG_PHY, DEBUG_NOTYET,
48 #define DBGBIT(x) (1<<DEBUG_##x)
49 static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
51 #define DBGOUT(what, fmt, ...) do { \
52 if (debugflags & DBGBIT(what)) \
53 fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
54 } while (0)
55 #else
56 #define DBGOUT(what, fmt, ...) do {} while (0)
57 #endif
59 #define IOPORT_SIZE 0x40
60 #define PNPMMIO_SIZE 0x20000
61 #define MIN_BUF_SIZE 60 /* Min. octets in an ethernet frame sans FCS */
63 /* this is the size past which hardware will drop packets when setting LPE=0 */
64 #define MAXIMUM_ETHERNET_VLAN_SIZE 1522
65 /* this is the size past which hardware will drop packets when setting LPE=1 */
66 #define MAXIMUM_ETHERNET_LPE_SIZE 16384
68 #define MAXIMUM_ETHERNET_HDR_LEN (14+4)
71 * HW models:
72 * E1000_DEV_ID_82540EM works with Windows, Linux, and OS X <= 10.8
73 * E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
74 * E1000_DEV_ID_82545EM_COPPER works with Linux and OS X >= 10.6
75 * Others never tested
78 typedef struct E1000State_st {
79 /*< private >*/
80 PCIDevice parent_obj;
81 /*< public >*/
83 NICState *nic;
84 NICConf conf;
85 MemoryRegion mmio;
86 MemoryRegion io;
88 uint32_t mac_reg[0x8000];
89 uint16_t phy_reg[0x20];
90 uint16_t eeprom_data[64];
92 uint32_t rxbuf_size;
93 uint32_t rxbuf_min_shift;
94 struct e1000_tx {
95 unsigned char header[256];
96 unsigned char vlan_header[4];
97 /* Fields vlan and data must not be reordered or separated. */
98 unsigned char vlan[4];
99 unsigned char data[0x10000];
100 uint16_t size;
101 unsigned char sum_needed;
102 unsigned char vlan_needed;
103 uint8_t ipcss;
104 uint8_t ipcso;
105 uint16_t ipcse;
106 uint8_t tucss;
107 uint8_t tucso;
108 uint16_t tucse;
109 uint8_t hdr_len;
110 uint16_t mss;
111 uint32_t paylen;
112 uint16_t tso_frames;
113 char tse;
114 int8_t ip;
115 int8_t tcp;
116 char cptse; // current packet tse bit
117 } tx;
119 struct {
120 uint32_t val_in; // shifted in from guest driver
121 uint16_t bitnum_in;
122 uint16_t bitnum_out;
123 uint16_t reading;
124 uint32_t old_eecd;
125 } eecd_state;
127 QEMUTimer *autoneg_timer;
129 QEMUTimer *mit_timer; /* Mitigation timer. */
130 bool mit_timer_on; /* Mitigation timer is running. */
131 bool mit_irq_level; /* Tracks interrupt pin level. */
132 uint32_t mit_ide; /* Tracks E1000_TXD_CMD_IDE bit. */
134 /* Compatibility flags for migration to/from qemu 1.3.0 and older */
135 #define E1000_FLAG_AUTONEG_BIT 0
136 #define E1000_FLAG_MIT_BIT 1
137 #define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
138 #define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
139 uint32_t compat_flags;
140 } E1000State;
142 typedef struct E1000BaseClass {
143 PCIDeviceClass parent_class;
144 uint16_t phy_id2;
145 } E1000BaseClass;
147 #define TYPE_E1000_BASE "e1000-base"
149 #define E1000(obj) \
150 OBJECT_CHECK(E1000State, (obj), TYPE_E1000_BASE)
152 #define E1000_DEVICE_CLASS(klass) \
153 OBJECT_CLASS_CHECK(E1000BaseClass, (klass), TYPE_E1000_BASE)
154 #define E1000_DEVICE_GET_CLASS(obj) \
155 OBJECT_GET_CLASS(E1000BaseClass, (obj), TYPE_E1000_BASE)
157 #define defreg(x) x = (E1000_##x>>2)
158 enum {
159 defreg(CTRL), defreg(EECD), defreg(EERD), defreg(GPRC),
160 defreg(GPTC), defreg(ICR), defreg(ICS), defreg(IMC),
161 defreg(IMS), defreg(LEDCTL), defreg(MANC), defreg(MDIC),
162 defreg(MPC), defreg(PBA), defreg(RCTL), defreg(RDBAH),
163 defreg(RDBAL), defreg(RDH), defreg(RDLEN), defreg(RDT),
164 defreg(STATUS), defreg(SWSM), defreg(TCTL), defreg(TDBAH),
165 defreg(TDBAL), defreg(TDH), defreg(TDLEN), defreg(TDT),
166 defreg(TORH), defreg(TORL), defreg(TOTH), defreg(TOTL),
167 defreg(TPR), defreg(TPT), defreg(TXDCTL), defreg(WUFC),
168 defreg(RA), defreg(MTA), defreg(CRCERRS),defreg(VFTA),
169 defreg(VET), defreg(RDTR), defreg(RADV), defreg(TADV),
170 defreg(ITR),
173 static void
174 e1000_link_down(E1000State *s)
176 s->mac_reg[STATUS] &= ~E1000_STATUS_LU;
177 s->phy_reg[PHY_STATUS] &= ~MII_SR_LINK_STATUS;
178 s->phy_reg[PHY_STATUS] &= ~MII_SR_AUTONEG_COMPLETE;
179 s->phy_reg[PHY_LP_ABILITY] &= ~MII_LPAR_LPACK;
182 static void
183 e1000_link_up(E1000State *s)
185 s->mac_reg[STATUS] |= E1000_STATUS_LU;
186 s->phy_reg[PHY_STATUS] |= MII_SR_LINK_STATUS;
189 static bool
190 have_autoneg(E1000State *s)
192 return (s->compat_flags & E1000_FLAG_AUTONEG) &&
193 (s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN);
196 static void
197 set_phy_ctrl(E1000State *s, int index, uint16_t val)
199 /* bits 0-5 reserved; MII_CR_[RESTART_AUTO_NEG,RESET] are self clearing */
200 s->phy_reg[PHY_CTRL] = val & ~(0x3f |
201 MII_CR_RESET |
202 MII_CR_RESTART_AUTO_NEG);
205 * QEMU 1.3 does not support link auto-negotiation emulation, so if we
206 * migrate during auto negotiation, after migration the link will be
207 * down.
209 if (have_autoneg(s) && (val & MII_CR_RESTART_AUTO_NEG)) {
210 e1000_link_down(s);
211 DBGOUT(PHY, "Start link auto negotiation\n");
212 timer_mod(s->autoneg_timer,
213 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
217 static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = {
218 [PHY_CTRL] = set_phy_ctrl,
221 enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) };
223 enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
224 static const char phy_regcap[0x20] = {
225 [PHY_STATUS] = PHY_R, [M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
226 [PHY_ID1] = PHY_R, [M88E1000_PHY_SPEC_CTRL] = PHY_RW,
227 [PHY_CTRL] = PHY_RW, [PHY_1000T_CTRL] = PHY_RW,
228 [PHY_LP_ABILITY] = PHY_R, [PHY_1000T_STATUS] = PHY_R,
229 [PHY_AUTONEG_ADV] = PHY_RW, [M88E1000_RX_ERR_CNTR] = PHY_R,
230 [PHY_ID2] = PHY_R, [M88E1000_PHY_SPEC_STATUS] = PHY_R,
231 [PHY_AUTONEG_EXP] = PHY_R,
234 /* PHY_ID2 documented in 8254x_GBe_SDM.pdf, pp. 250 */
235 static const uint16_t phy_reg_init[] = {
236 [PHY_CTRL] = MII_CR_SPEED_SELECT_MSB |
237 MII_CR_FULL_DUPLEX |
238 MII_CR_AUTO_NEG_EN,
240 [PHY_STATUS] = MII_SR_EXTENDED_CAPS |
241 MII_SR_LINK_STATUS | /* link initially up */
242 MII_SR_AUTONEG_CAPS |
243 /* MII_SR_AUTONEG_COMPLETE: initially NOT completed */
244 MII_SR_PREAMBLE_SUPPRESS |
245 MII_SR_EXTENDED_STATUS |
246 MII_SR_10T_HD_CAPS |
247 MII_SR_10T_FD_CAPS |
248 MII_SR_100X_HD_CAPS |
249 MII_SR_100X_FD_CAPS,
251 [PHY_ID1] = 0x141,
252 /* [PHY_ID2] configured per DevId, from e1000_reset() */
253 [PHY_AUTONEG_ADV] = 0xde1,
254 [PHY_LP_ABILITY] = 0x1e0,
255 [PHY_1000T_CTRL] = 0x0e00,
256 [PHY_1000T_STATUS] = 0x3c00,
257 [M88E1000_PHY_SPEC_CTRL] = 0x360,
258 [M88E1000_PHY_SPEC_STATUS] = 0xac00,
259 [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60,
262 static const uint32_t mac_reg_init[] = {
263 [PBA] = 0x00100030,
264 [LEDCTL] = 0x602,
265 [CTRL] = E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
266 E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
267 [STATUS] = 0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
268 E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
269 E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
270 E1000_STATUS_LU,
271 [MANC] = E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
272 E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
273 E1000_MANC_RMCP_EN,
276 /* Helper function, *curr == 0 means the value is not set */
277 static inline void
278 mit_update_delay(uint32_t *curr, uint32_t value)
280 if (value && (*curr == 0 || value < *curr)) {
281 *curr = value;
285 static void
286 set_interrupt_cause(E1000State *s, int index, uint32_t val)
288 PCIDevice *d = PCI_DEVICE(s);
289 uint32_t pending_ints;
290 uint32_t mit_delay;
292 s->mac_reg[ICR] = val;
295 * Make sure ICR and ICS registers have the same value.
296 * The spec says that the ICS register is write-only. However in practice,
297 * on real hardware ICS is readable, and for reads it has the same value as
298 * ICR (except that ICS does not have the clear on read behaviour of ICR).
300 * The VxWorks PRO/1000 driver uses this behaviour.
302 s->mac_reg[ICS] = val;
304 pending_ints = (s->mac_reg[IMS] & s->mac_reg[ICR]);
305 if (!s->mit_irq_level && pending_ints) {
307 * Here we detect a potential raising edge. We postpone raising the
308 * interrupt line if we are inside the mitigation delay window
309 * (s->mit_timer_on == 1).
310 * We provide a partial implementation of interrupt mitigation,
311 * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for
312 * RADV and TADV, 256ns units for ITR). RDTR is only used to enable
313 * RADV; relative timers based on TIDV and RDTR are not implemented.
315 if (s->mit_timer_on) {
316 return;
318 if (s->compat_flags & E1000_FLAG_MIT) {
319 /* Compute the next mitigation delay according to pending
320 * interrupts and the current values of RADV (provided
321 * RDTR!=0), TADV and ITR.
322 * Then rearm the timer.
324 mit_delay = 0;
325 if (s->mit_ide &&
326 (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) {
327 mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
329 if (s->mac_reg[RDTR] && (pending_ints & E1000_ICS_RXT0)) {
330 mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4);
332 mit_update_delay(&mit_delay, s->mac_reg[ITR]);
334 if (mit_delay) {
335 s->mit_timer_on = 1;
336 timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
337 mit_delay * 256);
339 s->mit_ide = 0;
343 s->mit_irq_level = (pending_ints != 0);
344 pci_set_irq(d, s->mit_irq_level);
347 static void
348 e1000_mit_timer(void *opaque)
350 E1000State *s = opaque;
352 s->mit_timer_on = 0;
353 /* Call set_interrupt_cause to update the irq level (if necessary). */
354 set_interrupt_cause(s, 0, s->mac_reg[ICR]);
357 static void
358 set_ics(E1000State *s, int index, uint32_t val)
360 DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
361 s->mac_reg[IMS]);
362 set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
365 static void
366 e1000_autoneg_timer(void *opaque)
368 E1000State *s = opaque;
369 if (!qemu_get_queue(s->nic)->link_down) {
370 e1000_link_up(s);
371 s->phy_reg[PHY_LP_ABILITY] |= MII_LPAR_LPACK;
372 s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
373 DBGOUT(PHY, "Auto negotiation is completed\n");
374 set_ics(s, 0, E1000_ICS_LSC); /* signal link status change to guest */
378 static int
379 rxbufsize(uint32_t v)
381 v &= E1000_RCTL_BSEX | E1000_RCTL_SZ_16384 | E1000_RCTL_SZ_8192 |
382 E1000_RCTL_SZ_4096 | E1000_RCTL_SZ_2048 | E1000_RCTL_SZ_1024 |
383 E1000_RCTL_SZ_512 | E1000_RCTL_SZ_256;
384 switch (v) {
385 case E1000_RCTL_BSEX | E1000_RCTL_SZ_16384:
386 return 16384;
387 case E1000_RCTL_BSEX | E1000_RCTL_SZ_8192:
388 return 8192;
389 case E1000_RCTL_BSEX | E1000_RCTL_SZ_4096:
390 return 4096;
391 case E1000_RCTL_SZ_1024:
392 return 1024;
393 case E1000_RCTL_SZ_512:
394 return 512;
395 case E1000_RCTL_SZ_256:
396 return 256;
398 return 2048;
401 static void e1000_reset(void *opaque)
403 E1000State *d = opaque;
404 E1000BaseClass *edc = E1000_DEVICE_GET_CLASS(d);
405 uint8_t *macaddr = d->conf.macaddr.a;
406 int i;
408 timer_del(d->autoneg_timer);
409 timer_del(d->mit_timer);
410 d->mit_timer_on = 0;
411 d->mit_irq_level = 0;
412 d->mit_ide = 0;
413 memset(d->phy_reg, 0, sizeof d->phy_reg);
414 memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
415 d->phy_reg[PHY_ID2] = edc->phy_id2;
416 memset(d->mac_reg, 0, sizeof d->mac_reg);
417 memmove(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
418 d->rxbuf_min_shift = 1;
419 memset(&d->tx, 0, sizeof d->tx);
421 if (qemu_get_queue(d->nic)->link_down) {
422 e1000_link_down(d);
425 /* Some guests expect pre-initialized RAH/RAL (AddrValid flag + MACaddr) */
426 d->mac_reg[RA] = 0;
427 d->mac_reg[RA + 1] = E1000_RAH_AV;
428 for (i = 0; i < 4; i++) {
429 d->mac_reg[RA] |= macaddr[i] << (8 * i);
430 d->mac_reg[RA + 1] |= (i < 2) ? macaddr[i + 4] << (8 * i) : 0;
432 qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
435 static void
436 set_ctrl(E1000State *s, int index, uint32_t val)
438 /* RST is self clearing */
439 s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
442 static void
443 set_rx_control(E1000State *s, int index, uint32_t val)
445 s->mac_reg[RCTL] = val;
446 s->rxbuf_size = rxbufsize(val);
447 s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
448 DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
449 s->mac_reg[RCTL]);
450 qemu_flush_queued_packets(qemu_get_queue(s->nic));
453 static void
454 set_mdic(E1000State *s, int index, uint32_t val)
456 uint32_t data = val & E1000_MDIC_DATA_MASK;
457 uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
459 if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
460 val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
461 else if (val & E1000_MDIC_OP_READ) {
462 DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
463 if (!(phy_regcap[addr] & PHY_R)) {
464 DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
465 val |= E1000_MDIC_ERROR;
466 } else
467 val = (val ^ data) | s->phy_reg[addr];
468 } else if (val & E1000_MDIC_OP_WRITE) {
469 DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
470 if (!(phy_regcap[addr] & PHY_W)) {
471 DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
472 val |= E1000_MDIC_ERROR;
473 } else {
474 if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
475 phyreg_writeops[addr](s, index, data);
476 } else {
477 s->phy_reg[addr] = data;
481 s->mac_reg[MDIC] = val | E1000_MDIC_READY;
483 if (val & E1000_MDIC_INT_EN) {
484 set_ics(s, 0, E1000_ICR_MDAC);
488 static uint32_t
489 get_eecd(E1000State *s, int index)
491 uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
493 DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
494 s->eecd_state.bitnum_out, s->eecd_state.reading);
495 if (!s->eecd_state.reading ||
496 ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
497 ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
498 ret |= E1000_EECD_DO;
499 return ret;
502 static void
503 set_eecd(E1000State *s, int index, uint32_t val)
505 uint32_t oldval = s->eecd_state.old_eecd;
507 s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
508 E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
509 if (!(E1000_EECD_CS & val)) // CS inactive; nothing to do
510 return;
511 if (E1000_EECD_CS & (val ^ oldval)) { // CS rise edge; reset state
512 s->eecd_state.val_in = 0;
513 s->eecd_state.bitnum_in = 0;
514 s->eecd_state.bitnum_out = 0;
515 s->eecd_state.reading = 0;
517 if (!(E1000_EECD_SK & (val ^ oldval))) // no clock edge
518 return;
519 if (!(E1000_EECD_SK & val)) { // falling edge
520 s->eecd_state.bitnum_out++;
521 return;
523 s->eecd_state.val_in <<= 1;
524 if (val & E1000_EECD_DI)
525 s->eecd_state.val_in |= 1;
526 if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
527 s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
528 s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
529 EEPROM_READ_OPCODE_MICROWIRE);
531 DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
532 s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
533 s->eecd_state.reading);
536 static uint32_t
537 flash_eerd_read(E1000State *s, int x)
539 unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
541 if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
542 return (s->mac_reg[EERD]);
544 if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
545 return (E1000_EEPROM_RW_REG_DONE | r);
547 return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
548 E1000_EEPROM_RW_REG_DONE | r);
551 static void
552 putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
554 uint32_t sum;
556 if (cse && cse < n)
557 n = cse + 1;
558 if (sloc < n-1) {
559 sum = net_checksum_add(n-css, data+css);
560 stw_be_p(data + sloc, net_checksum_finish(sum));
564 static inline int
565 vlan_enabled(E1000State *s)
567 return ((s->mac_reg[CTRL] & E1000_CTRL_VME) != 0);
570 static inline int
571 vlan_rx_filter_enabled(E1000State *s)
573 return ((s->mac_reg[RCTL] & E1000_RCTL_VFE) != 0);
576 static inline int
577 is_vlan_packet(E1000State *s, const uint8_t *buf)
579 return (be16_to_cpup((uint16_t *)(buf + 12)) ==
580 le16_to_cpup((uint16_t *)(s->mac_reg + VET)));
583 static inline int
584 is_vlan_txd(uint32_t txd_lower)
586 return ((txd_lower & E1000_TXD_CMD_VLE) != 0);
589 /* FCS aka Ethernet CRC-32. We don't get it from backends and can't
590 * fill it in, just pad descriptor length by 4 bytes unless guest
591 * told us to strip it off the packet. */
592 static inline int
593 fcs_len(E1000State *s)
595 return (s->mac_reg[RCTL] & E1000_RCTL_SECRC) ? 0 : 4;
598 static void
599 e1000_send_packet(E1000State *s, const uint8_t *buf, int size)
601 NetClientState *nc = qemu_get_queue(s->nic);
602 if (s->phy_reg[PHY_CTRL] & MII_CR_LOOPBACK) {
603 nc->info->receive(nc, buf, size);
604 } else {
605 qemu_send_packet(nc, buf, size);
609 static void
610 xmit_seg(E1000State *s)
612 uint16_t len, *sp;
613 unsigned int frames = s->tx.tso_frames, css, sofar, n;
614 struct e1000_tx *tp = &s->tx;
616 if (tp->tse && tp->cptse) {
617 css = tp->ipcss;
618 DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
619 frames, tp->size, css);
620 if (tp->ip) { // IPv4
621 stw_be_p(tp->data+css+2, tp->size - css);
622 stw_be_p(tp->data+css+4,
623 be16_to_cpup((uint16_t *)(tp->data+css+4))+frames);
624 } else // IPv6
625 stw_be_p(tp->data+css+4, tp->size - css);
626 css = tp->tucss;
627 len = tp->size - css;
628 DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", tp->tcp, css, len);
629 if (tp->tcp) {
630 sofar = frames * tp->mss;
631 stl_be_p(tp->data+css+4, ldl_be_p(tp->data+css+4)+sofar); /* seq */
632 if (tp->paylen - sofar > tp->mss)
633 tp->data[css + 13] &= ~9; // PSH, FIN
634 } else // UDP
635 stw_be_p(tp->data+css+4, len);
636 if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
637 unsigned int phsum;
638 // add pseudo-header length before checksum calculation
639 sp = (uint16_t *)(tp->data + tp->tucso);
640 phsum = be16_to_cpup(sp) + len;
641 phsum = (phsum >> 16) + (phsum & 0xffff);
642 stw_be_p(sp, phsum);
644 tp->tso_frames++;
647 if (tp->sum_needed & E1000_TXD_POPTS_TXSM)
648 putsum(tp->data, tp->size, tp->tucso, tp->tucss, tp->tucse);
649 if (tp->sum_needed & E1000_TXD_POPTS_IXSM)
650 putsum(tp->data, tp->size, tp->ipcso, tp->ipcss, tp->ipcse);
651 if (tp->vlan_needed) {
652 memmove(tp->vlan, tp->data, 4);
653 memmove(tp->data, tp->data + 4, 8);
654 memcpy(tp->data + 8, tp->vlan_header, 4);
655 e1000_send_packet(s, tp->vlan, tp->size + 4);
656 } else
657 e1000_send_packet(s, tp->data, tp->size);
658 s->mac_reg[TPT]++;
659 s->mac_reg[GPTC]++;
660 n = s->mac_reg[TOTL];
661 if ((s->mac_reg[TOTL] += s->tx.size) < n)
662 s->mac_reg[TOTH]++;
665 static void
666 process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
668 PCIDevice *d = PCI_DEVICE(s);
669 uint32_t txd_lower = le32_to_cpu(dp->lower.data);
670 uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
671 unsigned int split_size = txd_lower & 0xffff, bytes, sz, op;
672 unsigned int msh = 0xfffff;
673 uint64_t addr;
674 struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
675 struct e1000_tx *tp = &s->tx;
677 s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE);
678 if (dtype == E1000_TXD_CMD_DEXT) { // context descriptor
679 op = le32_to_cpu(xp->cmd_and_length);
680 tp->ipcss = xp->lower_setup.ip_fields.ipcss;
681 tp->ipcso = xp->lower_setup.ip_fields.ipcso;
682 tp->ipcse = le16_to_cpu(xp->lower_setup.ip_fields.ipcse);
683 tp->tucss = xp->upper_setup.tcp_fields.tucss;
684 tp->tucso = xp->upper_setup.tcp_fields.tucso;
685 tp->tucse = le16_to_cpu(xp->upper_setup.tcp_fields.tucse);
686 tp->paylen = op & 0xfffff;
687 tp->hdr_len = xp->tcp_seg_setup.fields.hdr_len;
688 tp->mss = le16_to_cpu(xp->tcp_seg_setup.fields.mss);
689 tp->ip = (op & E1000_TXD_CMD_IP) ? 1 : 0;
690 tp->tcp = (op & E1000_TXD_CMD_TCP) ? 1 : 0;
691 tp->tse = (op & E1000_TXD_CMD_TSE) ? 1 : 0;
692 tp->tso_frames = 0;
693 if (tp->tucso == 0) { // this is probably wrong
694 DBGOUT(TXSUM, "TCP/UDP: cso 0!\n");
695 tp->tucso = tp->tucss + (tp->tcp ? 16 : 6);
697 return;
698 } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
699 // data descriptor
700 if (tp->size == 0) {
701 tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
703 tp->cptse = ( txd_lower & E1000_TXD_CMD_TSE ) ? 1 : 0;
704 } else {
705 // legacy descriptor
706 tp->cptse = 0;
709 if (vlan_enabled(s) && is_vlan_txd(txd_lower) &&
710 (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
711 tp->vlan_needed = 1;
712 stw_be_p(tp->vlan_header,
713 le16_to_cpup((uint16_t *)(s->mac_reg + VET)));
714 stw_be_p(tp->vlan_header + 2,
715 le16_to_cpu(dp->upper.fields.special));
718 addr = le64_to_cpu(dp->buffer_addr);
719 if (tp->tse && tp->cptse) {
720 msh = tp->hdr_len + tp->mss;
721 do {
722 bytes = split_size;
723 if (tp->size + bytes > msh)
724 bytes = msh - tp->size;
726 bytes = MIN(sizeof(tp->data) - tp->size, bytes);
727 pci_dma_read(d, addr, tp->data + tp->size, bytes);
728 sz = tp->size + bytes;
729 if (sz >= tp->hdr_len && tp->size < tp->hdr_len) {
730 memmove(tp->header, tp->data, tp->hdr_len);
732 tp->size = sz;
733 addr += bytes;
734 if (sz == msh) {
735 xmit_seg(s);
736 memmove(tp->data, tp->header, tp->hdr_len);
737 tp->size = tp->hdr_len;
739 } while (split_size -= bytes);
740 } else if (!tp->tse && tp->cptse) {
741 // context descriptor TSE is not set, while data descriptor TSE is set
742 DBGOUT(TXERR, "TCP segmentation error\n");
743 } else {
744 split_size = MIN(sizeof(tp->data) - tp->size, split_size);
745 pci_dma_read(d, addr, tp->data + tp->size, split_size);
746 tp->size += split_size;
749 if (!(txd_lower & E1000_TXD_CMD_EOP))
750 return;
751 if (!(tp->tse && tp->cptse && tp->size < tp->hdr_len)) {
752 xmit_seg(s);
754 tp->tso_frames = 0;
755 tp->sum_needed = 0;
756 tp->vlan_needed = 0;
757 tp->size = 0;
758 tp->cptse = 0;
761 static uint32_t
762 txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
764 PCIDevice *d = PCI_DEVICE(s);
765 uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
767 if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
768 return 0;
769 txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
770 ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
771 dp->upper.data = cpu_to_le32(txd_upper);
772 pci_dma_write(d, base + ((char *)&dp->upper - (char *)dp),
773 &dp->upper, sizeof(dp->upper));
774 return E1000_ICR_TXDW;
777 static uint64_t tx_desc_base(E1000State *s)
779 uint64_t bah = s->mac_reg[TDBAH];
780 uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
782 return (bah << 32) + bal;
785 static void
786 start_xmit(E1000State *s)
788 PCIDevice *d = PCI_DEVICE(s);
789 dma_addr_t base;
790 struct e1000_tx_desc desc;
791 uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
793 if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
794 DBGOUT(TX, "tx disabled\n");
795 return;
798 while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
799 base = tx_desc_base(s) +
800 sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
801 pci_dma_read(d, base, &desc, sizeof(desc));
803 DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
804 (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
805 desc.upper.data);
807 process_tx_desc(s, &desc);
808 cause |= txdesc_writeback(s, base, &desc);
810 if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
811 s->mac_reg[TDH] = 0;
813 * the following could happen only if guest sw assigns
814 * bogus values to TDT/TDLEN.
815 * there's nothing too intelligent we could do about this.
817 if (s->mac_reg[TDH] == tdh_start) {
818 DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
819 tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
820 break;
823 set_ics(s, 0, cause);
826 static int
827 receive_filter(E1000State *s, const uint8_t *buf, int size)
829 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
830 static const int mta_shift[] = {4, 3, 2, 0};
831 uint32_t f, rctl = s->mac_reg[RCTL], ra[2], *rp;
833 if (is_vlan_packet(s, buf) && vlan_rx_filter_enabled(s)) {
834 uint16_t vid = be16_to_cpup((uint16_t *)(buf + 14));
835 uint32_t vfta = le32_to_cpup((uint32_t *)(s->mac_reg + VFTA) +
836 ((vid >> 5) & 0x7f));
837 if ((vfta & (1 << (vid & 0x1f))) == 0)
838 return 0;
841 if (rctl & E1000_RCTL_UPE) // promiscuous
842 return 1;
844 if ((buf[0] & 1) && (rctl & E1000_RCTL_MPE)) // promiscuous mcast
845 return 1;
847 if ((rctl & E1000_RCTL_BAM) && !memcmp(buf, bcast, sizeof bcast))
848 return 1;
850 for (rp = s->mac_reg + RA; rp < s->mac_reg + RA + 32; rp += 2) {
851 if (!(rp[1] & E1000_RAH_AV))
852 continue;
853 ra[0] = cpu_to_le32(rp[0]);
854 ra[1] = cpu_to_le32(rp[1]);
855 if (!memcmp(buf, (uint8_t *)ra, 6)) {
856 DBGOUT(RXFILTER,
857 "unicast match[%d]: %02x:%02x:%02x:%02x:%02x:%02x\n",
858 (int)(rp - s->mac_reg - RA)/2,
859 buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
860 return 1;
863 DBGOUT(RXFILTER, "unicast mismatch: %02x:%02x:%02x:%02x:%02x:%02x\n",
864 buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
866 f = mta_shift[(rctl >> E1000_RCTL_MO_SHIFT) & 3];
867 f = (((buf[5] << 8) | buf[4]) >> f) & 0xfff;
868 if (s->mac_reg[MTA + (f >> 5)] & (1 << (f & 0x1f)))
869 return 1;
870 DBGOUT(RXFILTER,
871 "dropping, inexact filter mismatch: %02x:%02x:%02x:%02x:%02x:%02x MO %d MTA[%d] %x\n",
872 buf[0], buf[1], buf[2], buf[3], buf[4], buf[5],
873 (rctl >> E1000_RCTL_MO_SHIFT) & 3, f >> 5,
874 s->mac_reg[MTA + (f >> 5)]);
876 return 0;
879 static void
880 e1000_set_link_status(NetClientState *nc)
882 E1000State *s = qemu_get_nic_opaque(nc);
883 uint32_t old_status = s->mac_reg[STATUS];
885 if (nc->link_down) {
886 e1000_link_down(s);
887 } else {
888 if (have_autoneg(s) &&
889 !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
890 /* emulate auto-negotiation if supported */
891 timer_mod(s->autoneg_timer,
892 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
893 } else {
894 e1000_link_up(s);
898 if (s->mac_reg[STATUS] != old_status)
899 set_ics(s, 0, E1000_ICR_LSC);
902 static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
904 int bufs;
905 /* Fast-path short packets */
906 if (total_size <= s->rxbuf_size) {
907 return s->mac_reg[RDH] != s->mac_reg[RDT];
909 if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
910 bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
911 } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) {
912 bufs = s->mac_reg[RDLEN] / sizeof(struct e1000_rx_desc) +
913 s->mac_reg[RDT] - s->mac_reg[RDH];
914 } else {
915 return false;
917 return total_size <= bufs * s->rxbuf_size;
920 static int
921 e1000_can_receive(NetClientState *nc)
923 E1000State *s = qemu_get_nic_opaque(nc);
925 return (s->mac_reg[STATUS] & E1000_STATUS_LU) &&
926 (s->mac_reg[RCTL] & E1000_RCTL_EN) && e1000_has_rxbufs(s, 1);
929 static uint64_t rx_desc_base(E1000State *s)
931 uint64_t bah = s->mac_reg[RDBAH];
932 uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
934 return (bah << 32) + bal;
937 static ssize_t
938 e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
940 E1000State *s = qemu_get_nic_opaque(nc);
941 PCIDevice *d = PCI_DEVICE(s);
942 struct e1000_rx_desc desc;
943 dma_addr_t base;
944 unsigned int n, rdt;
945 uint32_t rdh_start;
946 uint16_t vlan_special = 0;
947 uint8_t vlan_status = 0;
948 uint8_t min_buf[MIN_BUF_SIZE];
949 struct iovec min_iov;
950 uint8_t *filter_buf = iov->iov_base;
951 size_t size = iov_size(iov, iovcnt);
952 size_t iov_ofs = 0;
953 size_t desc_offset;
954 size_t desc_size;
955 size_t total_size;
957 if (!(s->mac_reg[STATUS] & E1000_STATUS_LU)) {
958 return -1;
961 if (!(s->mac_reg[RCTL] & E1000_RCTL_EN)) {
962 return -1;
965 /* Pad to minimum Ethernet frame length */
966 if (size < sizeof(min_buf)) {
967 iov_to_buf(iov, iovcnt, 0, min_buf, size);
968 memset(&min_buf[size], 0, sizeof(min_buf) - size);
969 min_iov.iov_base = filter_buf = min_buf;
970 min_iov.iov_len = size = sizeof(min_buf);
971 iovcnt = 1;
972 iov = &min_iov;
973 } else if (iov->iov_len < MAXIMUM_ETHERNET_HDR_LEN) {
974 /* This is very unlikely, but may happen. */
975 iov_to_buf(iov, iovcnt, 0, min_buf, MAXIMUM_ETHERNET_HDR_LEN);
976 filter_buf = min_buf;
979 /* Discard oversized packets if !LPE and !SBP. */
980 if ((size > MAXIMUM_ETHERNET_LPE_SIZE ||
981 (size > MAXIMUM_ETHERNET_VLAN_SIZE
982 && !(s->mac_reg[RCTL] & E1000_RCTL_LPE)))
983 && !(s->mac_reg[RCTL] & E1000_RCTL_SBP)) {
984 return size;
987 if (!receive_filter(s, filter_buf, size)) {
988 return size;
991 if (vlan_enabled(s) && is_vlan_packet(s, filter_buf)) {
992 vlan_special = cpu_to_le16(be16_to_cpup((uint16_t *)(filter_buf
993 + 14)));
994 iov_ofs = 4;
995 if (filter_buf == iov->iov_base) {
996 memmove(filter_buf + 4, filter_buf, 12);
997 } else {
998 iov_from_buf(iov, iovcnt, 4, filter_buf, 12);
999 while (iov->iov_len <= iov_ofs) {
1000 iov_ofs -= iov->iov_len;
1001 iov++;
1004 vlan_status = E1000_RXD_STAT_VP;
1005 size -= 4;
1008 rdh_start = s->mac_reg[RDH];
1009 desc_offset = 0;
1010 total_size = size + fcs_len(s);
1011 if (!e1000_has_rxbufs(s, total_size)) {
1012 set_ics(s, 0, E1000_ICS_RXO);
1013 return -1;
1015 do {
1016 desc_size = total_size - desc_offset;
1017 if (desc_size > s->rxbuf_size) {
1018 desc_size = s->rxbuf_size;
1020 base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
1021 pci_dma_read(d, base, &desc, sizeof(desc));
1022 desc.special = vlan_special;
1023 desc.status |= (vlan_status | E1000_RXD_STAT_DD);
1024 if (desc.buffer_addr) {
1025 if (desc_offset < size) {
1026 size_t iov_copy;
1027 hwaddr ba = le64_to_cpu(desc.buffer_addr);
1028 size_t copy_size = size - desc_offset;
1029 if (copy_size > s->rxbuf_size) {
1030 copy_size = s->rxbuf_size;
1032 do {
1033 iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
1034 pci_dma_write(d, ba, iov->iov_base + iov_ofs, iov_copy);
1035 copy_size -= iov_copy;
1036 ba += iov_copy;
1037 iov_ofs += iov_copy;
1038 if (iov_ofs == iov->iov_len) {
1039 iov++;
1040 iov_ofs = 0;
1042 } while (copy_size);
1044 desc_offset += desc_size;
1045 desc.length = cpu_to_le16(desc_size);
1046 if (desc_offset >= total_size) {
1047 desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
1048 } else {
1049 /* Guest zeroing out status is not a hardware requirement.
1050 Clear EOP in case guest didn't do it. */
1051 desc.status &= ~E1000_RXD_STAT_EOP;
1053 } else { // as per intel docs; skip descriptors with null buf addr
1054 DBGOUT(RX, "Null RX descriptor!!\n");
1056 pci_dma_write(d, base, &desc, sizeof(desc));
1058 if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
1059 s->mac_reg[RDH] = 0;
1060 /* see comment in start_xmit; same here */
1061 if (s->mac_reg[RDH] == rdh_start) {
1062 DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
1063 rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
1064 set_ics(s, 0, E1000_ICS_RXO);
1065 return -1;
1067 } while (desc_offset < total_size);
1069 s->mac_reg[GPRC]++;
1070 s->mac_reg[TPR]++;
1071 /* TOR - Total Octets Received:
1072 * This register includes bytes received in a packet from the <Destination
1073 * Address> field through the <CRC> field, inclusively.
1075 n = s->mac_reg[TORL] + size + /* Always include FCS length. */ 4;
1076 if (n < s->mac_reg[TORL])
1077 s->mac_reg[TORH]++;
1078 s->mac_reg[TORL] = n;
1080 n = E1000_ICS_RXT0;
1081 if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
1082 rdt += s->mac_reg[RDLEN] / sizeof(desc);
1083 if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
1084 s->rxbuf_min_shift)
1085 n |= E1000_ICS_RXDMT0;
1087 set_ics(s, 0, n);
1089 return size;
1092 static ssize_t
1093 e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
1095 const struct iovec iov = {
1096 .iov_base = (uint8_t *)buf,
1097 .iov_len = size
1100 return e1000_receive_iov(nc, &iov, 1);
1103 static uint32_t
1104 mac_readreg(E1000State *s, int index)
1106 return s->mac_reg[index];
1109 static uint32_t
1110 mac_icr_read(E1000State *s, int index)
1112 uint32_t ret = s->mac_reg[ICR];
1114 DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
1115 set_interrupt_cause(s, 0, 0);
1116 return ret;
1119 static uint32_t
1120 mac_read_clr4(E1000State *s, int index)
1122 uint32_t ret = s->mac_reg[index];
1124 s->mac_reg[index] = 0;
1125 return ret;
1128 static uint32_t
1129 mac_read_clr8(E1000State *s, int index)
1131 uint32_t ret = s->mac_reg[index];
1133 s->mac_reg[index] = 0;
1134 s->mac_reg[index-1] = 0;
1135 return ret;
1138 static void
1139 mac_writereg(E1000State *s, int index, uint32_t val)
1141 uint32_t macaddr[2];
1143 s->mac_reg[index] = val;
1145 if (index == RA + 1) {
1146 macaddr[0] = cpu_to_le32(s->mac_reg[RA]);
1147 macaddr[1] = cpu_to_le32(s->mac_reg[RA + 1]);
1148 qemu_format_nic_info_str(qemu_get_queue(s->nic), (uint8_t *)macaddr);
1152 static void
1153 set_rdt(E1000State *s, int index, uint32_t val)
1155 s->mac_reg[index] = val & 0xffff;
1156 if (e1000_has_rxbufs(s, 1)) {
1157 qemu_flush_queued_packets(qemu_get_queue(s->nic));
1161 static void
1162 set_16bit(E1000State *s, int index, uint32_t val)
1164 s->mac_reg[index] = val & 0xffff;
1167 static void
1168 set_dlen(E1000State *s, int index, uint32_t val)
1170 s->mac_reg[index] = val & 0xfff80;
1173 static void
1174 set_tctl(E1000State *s, int index, uint32_t val)
1176 s->mac_reg[index] = val;
1177 s->mac_reg[TDT] &= 0xffff;
1178 start_xmit(s);
1181 static void
1182 set_icr(E1000State *s, int index, uint32_t val)
1184 DBGOUT(INTERRUPT, "set_icr %x\n", val);
1185 set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
1188 static void
1189 set_imc(E1000State *s, int index, uint32_t val)
1191 s->mac_reg[IMS] &= ~val;
1192 set_ics(s, 0, 0);
1195 static void
1196 set_ims(E1000State *s, int index, uint32_t val)
1198 s->mac_reg[IMS] |= val;
1199 set_ics(s, 0, 0);
1202 #define getreg(x) [x] = mac_readreg
1203 static uint32_t (*macreg_readops[])(E1000State *, int) = {
1204 getreg(PBA), getreg(RCTL), getreg(TDH), getreg(TXDCTL),
1205 getreg(WUFC), getreg(TDT), getreg(CTRL), getreg(LEDCTL),
1206 getreg(MANC), getreg(MDIC), getreg(SWSM), getreg(STATUS),
1207 getreg(TORL), getreg(TOTL), getreg(IMS), getreg(TCTL),
1208 getreg(RDH), getreg(RDT), getreg(VET), getreg(ICS),
1209 getreg(TDBAL), getreg(TDBAH), getreg(RDBAH), getreg(RDBAL),
1210 getreg(TDLEN), getreg(RDLEN), getreg(RDTR), getreg(RADV),
1211 getreg(TADV), getreg(ITR),
1213 [TOTH] = mac_read_clr8, [TORH] = mac_read_clr8, [GPRC] = mac_read_clr4,
1214 [GPTC] = mac_read_clr4, [TPR] = mac_read_clr4, [TPT] = mac_read_clr4,
1215 [ICR] = mac_icr_read, [EECD] = get_eecd, [EERD] = flash_eerd_read,
1216 [CRCERRS ... MPC] = &mac_readreg,
1217 [RA ... RA+31] = &mac_readreg,
1218 [MTA ... MTA+127] = &mac_readreg,
1219 [VFTA ... VFTA+127] = &mac_readreg,
1221 enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
1223 #define putreg(x) [x] = mac_writereg
1224 static void (*macreg_writeops[])(E1000State *, int, uint32_t) = {
1225 putreg(PBA), putreg(EERD), putreg(SWSM), putreg(WUFC),
1226 putreg(TDBAL), putreg(TDBAH), putreg(TXDCTL), putreg(RDBAH),
1227 putreg(RDBAL), putreg(LEDCTL), putreg(VET),
1228 [TDLEN] = set_dlen, [RDLEN] = set_dlen, [TCTL] = set_tctl,
1229 [TDT] = set_tctl, [MDIC] = set_mdic, [ICS] = set_ics,
1230 [TDH] = set_16bit, [RDH] = set_16bit, [RDT] = set_rdt,
1231 [IMC] = set_imc, [IMS] = set_ims, [ICR] = set_icr,
1232 [EECD] = set_eecd, [RCTL] = set_rx_control, [CTRL] = set_ctrl,
1233 [RDTR] = set_16bit, [RADV] = set_16bit, [TADV] = set_16bit,
1234 [ITR] = set_16bit,
1235 [RA ... RA+31] = &mac_writereg,
1236 [MTA ... MTA+127] = &mac_writereg,
1237 [VFTA ... VFTA+127] = &mac_writereg,
1240 enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
1242 static void
1243 e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1244 unsigned size)
1246 E1000State *s = opaque;
1247 unsigned int index = (addr & 0x1ffff) >> 2;
1249 if (index < NWRITEOPS && macreg_writeops[index]) {
1250 macreg_writeops[index](s, index, val);
1251 } else if (index < NREADOPS && macreg_readops[index]) {
1252 DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n", index<<2, val);
1253 } else {
1254 DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n",
1255 index<<2, val);
1259 static uint64_t
1260 e1000_mmio_read(void *opaque, hwaddr addr, unsigned size)
1262 E1000State *s = opaque;
1263 unsigned int index = (addr & 0x1ffff) >> 2;
1265 if (index < NREADOPS && macreg_readops[index])
1267 return macreg_readops[index](s, index);
1269 DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
1270 return 0;
1273 static const MemoryRegionOps e1000_mmio_ops = {
1274 .read = e1000_mmio_read,
1275 .write = e1000_mmio_write,
1276 .endianness = DEVICE_LITTLE_ENDIAN,
1277 .impl = {
1278 .min_access_size = 4,
1279 .max_access_size = 4,
1283 static uint64_t e1000_io_read(void *opaque, hwaddr addr,
1284 unsigned size)
1286 E1000State *s = opaque;
1288 (void)s;
1289 return 0;
1292 static void e1000_io_write(void *opaque, hwaddr addr,
1293 uint64_t val, unsigned size)
1295 E1000State *s = opaque;
1297 (void)s;
1300 static const MemoryRegionOps e1000_io_ops = {
1301 .read = e1000_io_read,
1302 .write = e1000_io_write,
1303 .endianness = DEVICE_LITTLE_ENDIAN,
1306 static bool is_version_1(void *opaque, int version_id)
1308 return version_id == 1;
1311 static void e1000_pre_save(void *opaque)
1313 E1000State *s = opaque;
1314 NetClientState *nc = qemu_get_queue(s->nic);
1316 /* If the mitigation timer is active, emulate a timeout now. */
1317 if (s->mit_timer_on) {
1318 e1000_mit_timer(s);
1322 * If link is down and auto-negotiation is supported and ongoing,
1323 * complete auto-negotiation immediately. This allows us to look
1324 * at MII_SR_AUTONEG_COMPLETE to infer link status on load.
1326 if (nc->link_down && have_autoneg(s)) {
1327 s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
1331 static int e1000_post_load(void *opaque, int version_id)
1333 E1000State *s = opaque;
1334 NetClientState *nc = qemu_get_queue(s->nic);
1336 if (!(s->compat_flags & E1000_FLAG_MIT)) {
1337 s->mac_reg[ITR] = s->mac_reg[RDTR] = s->mac_reg[RADV] =
1338 s->mac_reg[TADV] = 0;
1339 s->mit_irq_level = false;
1341 s->mit_ide = 0;
1342 s->mit_timer_on = false;
1344 /* nc.link_down can't be migrated, so infer link_down according
1345 * to link status bit in mac_reg[STATUS].
1346 * Alternatively, restart link negotiation if it was in progress. */
1347 nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0;
1349 if (have_autoneg(s) &&
1350 !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
1351 nc->link_down = false;
1352 timer_mod(s->autoneg_timer,
1353 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
1356 return 0;
1359 static bool e1000_mit_state_needed(void *opaque)
1361 E1000State *s = opaque;
1363 return s->compat_flags & E1000_FLAG_MIT;
1366 static const VMStateDescription vmstate_e1000_mit_state = {
1367 .name = "e1000/mit_state",
1368 .version_id = 1,
1369 .minimum_version_id = 1,
1370 .fields = (VMStateField[]) {
1371 VMSTATE_UINT32(mac_reg[RDTR], E1000State),
1372 VMSTATE_UINT32(mac_reg[RADV], E1000State),
1373 VMSTATE_UINT32(mac_reg[TADV], E1000State),
1374 VMSTATE_UINT32(mac_reg[ITR], E1000State),
1375 VMSTATE_BOOL(mit_irq_level, E1000State),
1376 VMSTATE_END_OF_LIST()
1380 static const VMStateDescription vmstate_e1000 = {
1381 .name = "e1000",
1382 .version_id = 2,
1383 .minimum_version_id = 1,
1384 .pre_save = e1000_pre_save,
1385 .post_load = e1000_post_load,
1386 .fields = (VMStateField[]) {
1387 VMSTATE_PCI_DEVICE(parent_obj, E1000State),
1388 VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
1389 VMSTATE_UNUSED(4), /* Was mmio_base. */
1390 VMSTATE_UINT32(rxbuf_size, E1000State),
1391 VMSTATE_UINT32(rxbuf_min_shift, E1000State),
1392 VMSTATE_UINT32(eecd_state.val_in, E1000State),
1393 VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
1394 VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
1395 VMSTATE_UINT16(eecd_state.reading, E1000State),
1396 VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
1397 VMSTATE_UINT8(tx.ipcss, E1000State),
1398 VMSTATE_UINT8(tx.ipcso, E1000State),
1399 VMSTATE_UINT16(tx.ipcse, E1000State),
1400 VMSTATE_UINT8(tx.tucss, E1000State),
1401 VMSTATE_UINT8(tx.tucso, E1000State),
1402 VMSTATE_UINT16(tx.tucse, E1000State),
1403 VMSTATE_UINT32(tx.paylen, E1000State),
1404 VMSTATE_UINT8(tx.hdr_len, E1000State),
1405 VMSTATE_UINT16(tx.mss, E1000State),
1406 VMSTATE_UINT16(tx.size, E1000State),
1407 VMSTATE_UINT16(tx.tso_frames, E1000State),
1408 VMSTATE_UINT8(tx.sum_needed, E1000State),
1409 VMSTATE_INT8(tx.ip, E1000State),
1410 VMSTATE_INT8(tx.tcp, E1000State),
1411 VMSTATE_BUFFER(tx.header, E1000State),
1412 VMSTATE_BUFFER(tx.data, E1000State),
1413 VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
1414 VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
1415 VMSTATE_UINT32(mac_reg[CTRL], E1000State),
1416 VMSTATE_UINT32(mac_reg[EECD], E1000State),
1417 VMSTATE_UINT32(mac_reg[EERD], E1000State),
1418 VMSTATE_UINT32(mac_reg[GPRC], E1000State),
1419 VMSTATE_UINT32(mac_reg[GPTC], E1000State),
1420 VMSTATE_UINT32(mac_reg[ICR], E1000State),
1421 VMSTATE_UINT32(mac_reg[ICS], E1000State),
1422 VMSTATE_UINT32(mac_reg[IMC], E1000State),
1423 VMSTATE_UINT32(mac_reg[IMS], E1000State),
1424 VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
1425 VMSTATE_UINT32(mac_reg[MANC], E1000State),
1426 VMSTATE_UINT32(mac_reg[MDIC], E1000State),
1427 VMSTATE_UINT32(mac_reg[MPC], E1000State),
1428 VMSTATE_UINT32(mac_reg[PBA], E1000State),
1429 VMSTATE_UINT32(mac_reg[RCTL], E1000State),
1430 VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
1431 VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
1432 VMSTATE_UINT32(mac_reg[RDH], E1000State),
1433 VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
1434 VMSTATE_UINT32(mac_reg[RDT], E1000State),
1435 VMSTATE_UINT32(mac_reg[STATUS], E1000State),
1436 VMSTATE_UINT32(mac_reg[SWSM], E1000State),
1437 VMSTATE_UINT32(mac_reg[TCTL], E1000State),
1438 VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
1439 VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
1440 VMSTATE_UINT32(mac_reg[TDH], E1000State),
1441 VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
1442 VMSTATE_UINT32(mac_reg[TDT], E1000State),
1443 VMSTATE_UINT32(mac_reg[TORH], E1000State),
1444 VMSTATE_UINT32(mac_reg[TORL], E1000State),
1445 VMSTATE_UINT32(mac_reg[TOTH], E1000State),
1446 VMSTATE_UINT32(mac_reg[TOTL], E1000State),
1447 VMSTATE_UINT32(mac_reg[TPR], E1000State),
1448 VMSTATE_UINT32(mac_reg[TPT], E1000State),
1449 VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
1450 VMSTATE_UINT32(mac_reg[WUFC], E1000State),
1451 VMSTATE_UINT32(mac_reg[VET], E1000State),
1452 VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
1453 VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128),
1454 VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
1455 VMSTATE_END_OF_LIST()
1457 .subsections = (VMStateSubsection[]) {
1459 .vmsd = &vmstate_e1000_mit_state,
1460 .needed = e1000_mit_state_needed,
1461 }, {
1462 /* empty */
1468 * EEPROM contents documented in Tables 5-2 and 5-3, pp. 98-102.
1469 * Note: A valid DevId will be inserted during pci_e1000_init().
1471 static const uint16_t e1000_eeprom_template[64] = {
1472 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0x0000, 0x0000, 0x0000,
1473 0x3000, 0x1000, 0x6403, 0 /*DevId*/, 0x8086, 0 /*DevId*/, 0x8086, 0x3040,
1474 0x0008, 0x2000, 0x7e14, 0x0048, 0x1000, 0x00d8, 0x0000, 0x2700,
1475 0x6cc9, 0x3150, 0x0722, 0x040b, 0x0984, 0x0000, 0xc000, 0x0706,
1476 0x1008, 0x0000, 0x0f04, 0x7fff, 0x4d01, 0xffff, 0xffff, 0xffff,
1477 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
1478 0x0100, 0x4000, 0x121c, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
1479 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000,
1482 /* PCI interface */
1484 static void
1485 e1000_mmio_setup(E1000State *d)
1487 int i;
1488 const uint32_t excluded_regs[] = {
1489 E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1490 E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1493 memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d,
1494 "e1000-mmio", PNPMMIO_SIZE);
1495 memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]);
1496 for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1497 memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4,
1498 excluded_regs[i+1] - excluded_regs[i] - 4);
1499 memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE);
1502 static void
1503 e1000_cleanup(NetClientState *nc)
1505 E1000State *s = qemu_get_nic_opaque(nc);
1507 s->nic = NULL;
1510 static void
1511 pci_e1000_uninit(PCIDevice *dev)
1513 E1000State *d = E1000(dev);
1515 timer_del(d->autoneg_timer);
1516 timer_free(d->autoneg_timer);
1517 timer_del(d->mit_timer);
1518 timer_free(d->mit_timer);
1519 qemu_del_nic(d->nic);
1522 static NetClientInfo net_e1000_info = {
1523 .type = NET_CLIENT_OPTIONS_KIND_NIC,
1524 .size = sizeof(NICState),
1525 .can_receive = e1000_can_receive,
1526 .receive = e1000_receive,
1527 .receive_iov = e1000_receive_iov,
1528 .cleanup = e1000_cleanup,
1529 .link_status_changed = e1000_set_link_status,
1532 static int pci_e1000_init(PCIDevice *pci_dev)
1534 DeviceState *dev = DEVICE(pci_dev);
1535 E1000State *d = E1000(pci_dev);
1536 PCIDeviceClass *pdc = PCI_DEVICE_GET_CLASS(pci_dev);
1537 uint8_t *pci_conf;
1538 uint16_t checksum = 0;
1539 int i;
1540 uint8_t *macaddr;
1542 pci_conf = pci_dev->config;
1544 /* TODO: RST# value should be 0, PCI spec 6.2.4 */
1545 pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
1547 pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */
1549 e1000_mmio_setup(d);
1551 pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
1553 pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
1555 memmove(d->eeprom_data, e1000_eeprom_template,
1556 sizeof e1000_eeprom_template);
1557 qemu_macaddr_default_if_unset(&d->conf.macaddr);
1558 macaddr = d->conf.macaddr.a;
1559 for (i = 0; i < 3; i++)
1560 d->eeprom_data[i] = (macaddr[2*i+1]<<8) | macaddr[2*i];
1561 d->eeprom_data[11] = d->eeprom_data[13] = pdc->device_id;
1562 for (i = 0; i < EEPROM_CHECKSUM_REG; i++)
1563 checksum += d->eeprom_data[i];
1564 checksum = (uint16_t) EEPROM_SUM - checksum;
1565 d->eeprom_data[EEPROM_CHECKSUM_REG] = checksum;
1567 d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
1568 object_get_typename(OBJECT(d)), dev->id, d);
1570 qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
1572 add_boot_device_path(d->conf.bootindex, dev, "/ethernet-phy@0");
1574 d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d);
1575 d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d);
1577 return 0;
1580 static void qdev_e1000_reset(DeviceState *dev)
1582 E1000State *d = E1000(dev);
1583 e1000_reset(d);
1586 static Property e1000_properties[] = {
1587 DEFINE_NIC_PROPERTIES(E1000State, conf),
1588 DEFINE_PROP_BIT("autonegotiation", E1000State,
1589 compat_flags, E1000_FLAG_AUTONEG_BIT, true),
1590 DEFINE_PROP_BIT("mitigation", E1000State,
1591 compat_flags, E1000_FLAG_MIT_BIT, true),
1592 DEFINE_PROP_END_OF_LIST(),
1595 typedef struct E1000Info {
1596 const char *name;
1597 uint16_t device_id;
1598 uint8_t revision;
1599 uint16_t phy_id2;
1600 } E1000Info;
1602 static void e1000_class_init(ObjectClass *klass, void *data)
1604 DeviceClass *dc = DEVICE_CLASS(klass);
1605 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1606 E1000BaseClass *e = E1000_DEVICE_CLASS(klass);
1607 const E1000Info *info = data;
1609 k->init = pci_e1000_init;
1610 k->exit = pci_e1000_uninit;
1611 k->romfile = "efi-e1000.rom";
1612 k->vendor_id = PCI_VENDOR_ID_INTEL;
1613 k->device_id = info->device_id;
1614 k->revision = info->revision;
1615 e->phy_id2 = info->phy_id2;
1616 k->class_id = PCI_CLASS_NETWORK_ETHERNET;
1617 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1618 dc->desc = "Intel Gigabit Ethernet";
1619 dc->reset = qdev_e1000_reset;
1620 dc->vmsd = &vmstate_e1000;
1621 dc->props = e1000_properties;
1624 static void e1000_instance_init(Object *obj)
1626 E1000State *n = E1000(obj);
1627 device_add_bootindex_property(obj, &n->conf.bootindex,
1628 "bootindex", "/ethernet-phy@0",
1629 DEVICE(n), NULL);
1632 static const TypeInfo e1000_base_info = {
1633 .name = TYPE_E1000_BASE,
1634 .parent = TYPE_PCI_DEVICE,
1635 .instance_size = sizeof(E1000State),
1636 .instance_init = e1000_instance_init,
1637 .class_size = sizeof(E1000BaseClass),
1638 .abstract = true,
1641 static const E1000Info e1000_devices[] = {
1643 .name = "e1000-82540em",
1644 .device_id = E1000_DEV_ID_82540EM,
1645 .revision = 0x03,
1646 .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT,
1649 .name = "e1000-82544gc",
1650 .device_id = E1000_DEV_ID_82544GC_COPPER,
1651 .revision = 0x03,
1652 .phy_id2 = E1000_PHY_ID2_82544x,
1655 .name = "e1000-82545em",
1656 .device_id = E1000_DEV_ID_82545EM_COPPER,
1657 .revision = 0x03,
1658 .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT,
1662 static const TypeInfo e1000_default_info = {
1663 .name = "e1000",
1664 .parent = "e1000-82540em",
1667 static void e1000_register_types(void)
1669 int i;
1671 type_register_static(&e1000_base_info);
1672 for (i = 0; i < ARRAY_SIZE(e1000_devices); i++) {
1673 const E1000Info *info = &e1000_devices[i];
1674 TypeInfo type_info = {};
1676 type_info.name = info->name;
1677 type_info.parent = TYPE_E1000_BASE;
1678 type_info.class_data = (void *)info;
1679 type_info.class_init = e1000_class_init;
1680 type_info.instance_init = e1000_instance_init;
1682 type_register(&type_info);
1684 type_register_static(&e1000_default_info);
1687 type_init(e1000_register_types)