e1000: fix hang of win2k12 shutdown with flood ping
[qemu/kevin.git] / hw / net / e1000.c
blob153124485de2ebb8782fe468248446fda9077b1f
1 /*
2 * QEMU e1000 emulation
4 * Software developer's manual:
5 * http://download.intel.com/design/network/manuals/8254x_GBe_SDM.pdf
7 * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
8 * Copyright (c) 2008 Qumranet
9 * Based on work done by:
10 * Copyright (c) 2007 Dan Aloni
11 * Copyright (c) 2004 Antony T Curtis
13 * This library is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU Lesser General Public
15 * License as published by the Free Software Foundation; either
16 * version 2 of the License, or (at your option) any later version.
18 * This library is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 * Lesser General Public License for more details.
23 * You should have received a copy of the GNU Lesser General Public
24 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
28 #include "hw/hw.h"
29 #include "hw/pci/pci.h"
30 #include "net/net.h"
31 #include "net/checksum.h"
32 #include "hw/loader.h"
33 #include "sysemu/sysemu.h"
34 #include "sysemu/dma.h"
35 #include "qemu/iov.h"
36 #include "qemu/range.h"
38 #include "e1000_regs.h"
40 #define E1000_DEBUG
42 #ifdef E1000_DEBUG
43 enum {
44 DEBUG_GENERAL, DEBUG_IO, DEBUG_MMIO, DEBUG_INTERRUPT,
45 DEBUG_RX, DEBUG_TX, DEBUG_MDIC, DEBUG_EEPROM,
46 DEBUG_UNKNOWN, DEBUG_TXSUM, DEBUG_TXERR, DEBUG_RXERR,
47 DEBUG_RXFILTER, DEBUG_PHY, DEBUG_NOTYET,
49 #define DBGBIT(x) (1<<DEBUG_##x)
50 static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL);
52 #define DBGOUT(what, fmt, ...) do { \
53 if (debugflags & DBGBIT(what)) \
54 fprintf(stderr, "e1000: " fmt, ## __VA_ARGS__); \
55 } while (0)
56 #else
57 #define DBGOUT(what, fmt, ...) do {} while (0)
58 #endif
60 #define IOPORT_SIZE 0x40
61 #define PNPMMIO_SIZE 0x20000
62 #define MIN_BUF_SIZE 60 /* Min. octets in an ethernet frame sans FCS */
64 /* this is the size past which hardware will drop packets when setting LPE=0 */
65 #define MAXIMUM_ETHERNET_VLAN_SIZE 1522
66 /* this is the size past which hardware will drop packets when setting LPE=1 */
67 #define MAXIMUM_ETHERNET_LPE_SIZE 16384
69 #define MAXIMUM_ETHERNET_HDR_LEN (14+4)
72 * HW models:
73 * E1000_DEV_ID_82540EM works with Windows, Linux, and OS X <= 10.8
74 * E1000_DEV_ID_82544GC_COPPER appears to work; not well tested
75 * E1000_DEV_ID_82545EM_COPPER works with Linux and OS X >= 10.6
76 * Others never tested
79 typedef struct E1000State_st {
80 /*< private >*/
81 PCIDevice parent_obj;
82 /*< public >*/
84 NICState *nic;
85 NICConf conf;
86 MemoryRegion mmio;
87 MemoryRegion io;
89 uint32_t mac_reg[0x8000];
90 uint16_t phy_reg[0x20];
91 uint16_t eeprom_data[64];
93 uint32_t rxbuf_size;
94 uint32_t rxbuf_min_shift;
95 struct e1000_tx {
96 unsigned char header[256];
97 unsigned char vlan_header[4];
98 /* Fields vlan and data must not be reordered or separated. */
99 unsigned char vlan[4];
100 unsigned char data[0x10000];
101 uint16_t size;
102 unsigned char sum_needed;
103 unsigned char vlan_needed;
104 uint8_t ipcss;
105 uint8_t ipcso;
106 uint16_t ipcse;
107 uint8_t tucss;
108 uint8_t tucso;
109 uint16_t tucse;
110 uint8_t hdr_len;
111 uint16_t mss;
112 uint32_t paylen;
113 uint16_t tso_frames;
114 char tse;
115 int8_t ip;
116 int8_t tcp;
117 char cptse; // current packet tse bit
118 } tx;
120 struct {
121 uint32_t val_in; // shifted in from guest driver
122 uint16_t bitnum_in;
123 uint16_t bitnum_out;
124 uint16_t reading;
125 uint32_t old_eecd;
126 } eecd_state;
128 QEMUTimer *autoneg_timer;
130 QEMUTimer *mit_timer; /* Mitigation timer. */
131 bool mit_timer_on; /* Mitigation timer is running. */
132 bool mit_irq_level; /* Tracks interrupt pin level. */
133 uint32_t mit_ide; /* Tracks E1000_TXD_CMD_IDE bit. */
135 /* Compatibility flags for migration to/from qemu 1.3.0 and older */
136 #define E1000_FLAG_AUTONEG_BIT 0
137 #define E1000_FLAG_MIT_BIT 1
138 #define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
139 #define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
140 uint32_t compat_flags;
141 } E1000State;
143 typedef struct E1000BaseClass {
144 PCIDeviceClass parent_class;
145 uint16_t phy_id2;
146 } E1000BaseClass;
148 #define TYPE_E1000_BASE "e1000-base"
150 #define E1000(obj) \
151 OBJECT_CHECK(E1000State, (obj), TYPE_E1000_BASE)
153 #define E1000_DEVICE_CLASS(klass) \
154 OBJECT_CLASS_CHECK(E1000BaseClass, (klass), TYPE_E1000_BASE)
155 #define E1000_DEVICE_GET_CLASS(obj) \
156 OBJECT_GET_CLASS(E1000BaseClass, (obj), TYPE_E1000_BASE)
158 #define defreg(x) x = (E1000_##x>>2)
159 enum {
160 defreg(CTRL), defreg(EECD), defreg(EERD), defreg(GPRC),
161 defreg(GPTC), defreg(ICR), defreg(ICS), defreg(IMC),
162 defreg(IMS), defreg(LEDCTL), defreg(MANC), defreg(MDIC),
163 defreg(MPC), defreg(PBA), defreg(RCTL), defreg(RDBAH),
164 defreg(RDBAL), defreg(RDH), defreg(RDLEN), defreg(RDT),
165 defreg(STATUS), defreg(SWSM), defreg(TCTL), defreg(TDBAH),
166 defreg(TDBAL), defreg(TDH), defreg(TDLEN), defreg(TDT),
167 defreg(TORH), defreg(TORL), defreg(TOTH), defreg(TOTL),
168 defreg(TPR), defreg(TPT), defreg(TXDCTL), defreg(WUFC),
169 defreg(RA), defreg(MTA), defreg(CRCERRS),defreg(VFTA),
170 defreg(VET), defreg(RDTR), defreg(RADV), defreg(TADV),
171 defreg(ITR),
174 static void
175 e1000_link_down(E1000State *s)
177 s->mac_reg[STATUS] &= ~E1000_STATUS_LU;
178 s->phy_reg[PHY_STATUS] &= ~MII_SR_LINK_STATUS;
179 s->phy_reg[PHY_STATUS] &= ~MII_SR_AUTONEG_COMPLETE;
180 s->phy_reg[PHY_LP_ABILITY] &= ~MII_LPAR_LPACK;
183 static void
184 e1000_link_up(E1000State *s)
186 s->mac_reg[STATUS] |= E1000_STATUS_LU;
187 s->phy_reg[PHY_STATUS] |= MII_SR_LINK_STATUS;
189 /* E1000_STATUS_LU is tested by e1000_can_receive() */
190 qemu_flush_queued_packets(qemu_get_queue(s->nic));
193 static bool
194 have_autoneg(E1000State *s)
196 return (s->compat_flags & E1000_FLAG_AUTONEG) &&
197 (s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN);
200 static void
201 set_phy_ctrl(E1000State *s, int index, uint16_t val)
203 /* bits 0-5 reserved; MII_CR_[RESTART_AUTO_NEG,RESET] are self clearing */
204 s->phy_reg[PHY_CTRL] = val & ~(0x3f |
205 MII_CR_RESET |
206 MII_CR_RESTART_AUTO_NEG);
209 * QEMU 1.3 does not support link auto-negotiation emulation, so if we
210 * migrate during auto negotiation, after migration the link will be
211 * down.
213 if (have_autoneg(s) && (val & MII_CR_RESTART_AUTO_NEG)) {
214 e1000_link_down(s);
215 DBGOUT(PHY, "Start link auto negotiation\n");
216 timer_mod(s->autoneg_timer,
217 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
221 static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = {
222 [PHY_CTRL] = set_phy_ctrl,
225 enum { NPHYWRITEOPS = ARRAY_SIZE(phyreg_writeops) };
227 enum { PHY_R = 1, PHY_W = 2, PHY_RW = PHY_R | PHY_W };
228 static const char phy_regcap[0x20] = {
229 [PHY_STATUS] = PHY_R, [M88E1000_EXT_PHY_SPEC_CTRL] = PHY_RW,
230 [PHY_ID1] = PHY_R, [M88E1000_PHY_SPEC_CTRL] = PHY_RW,
231 [PHY_CTRL] = PHY_RW, [PHY_1000T_CTRL] = PHY_RW,
232 [PHY_LP_ABILITY] = PHY_R, [PHY_1000T_STATUS] = PHY_R,
233 [PHY_AUTONEG_ADV] = PHY_RW, [M88E1000_RX_ERR_CNTR] = PHY_R,
234 [PHY_ID2] = PHY_R, [M88E1000_PHY_SPEC_STATUS] = PHY_R,
235 [PHY_AUTONEG_EXP] = PHY_R,
238 /* PHY_ID2 documented in 8254x_GBe_SDM.pdf, pp. 250 */
239 static const uint16_t phy_reg_init[] = {
240 [PHY_CTRL] = MII_CR_SPEED_SELECT_MSB |
241 MII_CR_FULL_DUPLEX |
242 MII_CR_AUTO_NEG_EN,
244 [PHY_STATUS] = MII_SR_EXTENDED_CAPS |
245 MII_SR_LINK_STATUS | /* link initially up */
246 MII_SR_AUTONEG_CAPS |
247 /* MII_SR_AUTONEG_COMPLETE: initially NOT completed */
248 MII_SR_PREAMBLE_SUPPRESS |
249 MII_SR_EXTENDED_STATUS |
250 MII_SR_10T_HD_CAPS |
251 MII_SR_10T_FD_CAPS |
252 MII_SR_100X_HD_CAPS |
253 MII_SR_100X_FD_CAPS,
255 [PHY_ID1] = 0x141,
256 /* [PHY_ID2] configured per DevId, from e1000_reset() */
257 [PHY_AUTONEG_ADV] = 0xde1,
258 [PHY_LP_ABILITY] = 0x1e0,
259 [PHY_1000T_CTRL] = 0x0e00,
260 [PHY_1000T_STATUS] = 0x3c00,
261 [M88E1000_PHY_SPEC_CTRL] = 0x360,
262 [M88E1000_PHY_SPEC_STATUS] = 0xac00,
263 [M88E1000_EXT_PHY_SPEC_CTRL] = 0x0d60,
266 static const uint32_t mac_reg_init[] = {
267 [PBA] = 0x00100030,
268 [LEDCTL] = 0x602,
269 [CTRL] = E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
270 E1000_CTRL_SPD_1000 | E1000_CTRL_SLU,
271 [STATUS] = 0x80000000 | E1000_STATUS_GIO_MASTER_ENABLE |
272 E1000_STATUS_ASDV | E1000_STATUS_MTXCKOK |
273 E1000_STATUS_SPEED_1000 | E1000_STATUS_FD |
274 E1000_STATUS_LU,
275 [MANC] = E1000_MANC_EN_MNG2HOST | E1000_MANC_RCV_TCO_EN |
276 E1000_MANC_ARP_EN | E1000_MANC_0298_EN |
277 E1000_MANC_RMCP_EN,
280 /* Helper function, *curr == 0 means the value is not set */
281 static inline void
282 mit_update_delay(uint32_t *curr, uint32_t value)
284 if (value && (*curr == 0 || value < *curr)) {
285 *curr = value;
289 static void
290 set_interrupt_cause(E1000State *s, int index, uint32_t val)
292 PCIDevice *d = PCI_DEVICE(s);
293 uint32_t pending_ints;
294 uint32_t mit_delay;
296 s->mac_reg[ICR] = val;
299 * Make sure ICR and ICS registers have the same value.
300 * The spec says that the ICS register is write-only. However in practice,
301 * on real hardware ICS is readable, and for reads it has the same value as
302 * ICR (except that ICS does not have the clear on read behaviour of ICR).
304 * The VxWorks PRO/1000 driver uses this behaviour.
306 s->mac_reg[ICS] = val;
308 pending_ints = (s->mac_reg[IMS] & s->mac_reg[ICR]);
309 if (!s->mit_irq_level && pending_ints) {
311 * Here we detect a potential raising edge. We postpone raising the
312 * interrupt line if we are inside the mitigation delay window
313 * (s->mit_timer_on == 1).
314 * We provide a partial implementation of interrupt mitigation,
315 * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for
316 * RADV and TADV, 256ns units for ITR). RDTR is only used to enable
317 * RADV; relative timers based on TIDV and RDTR are not implemented.
319 if (s->mit_timer_on) {
320 return;
322 if (s->compat_flags & E1000_FLAG_MIT) {
323 /* Compute the next mitigation delay according to pending
324 * interrupts and the current values of RADV (provided
325 * RDTR!=0), TADV and ITR.
326 * Then rearm the timer.
328 mit_delay = 0;
329 if (s->mit_ide &&
330 (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) {
331 mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
333 if (s->mac_reg[RDTR] && (pending_ints & E1000_ICS_RXT0)) {
334 mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4);
336 mit_update_delay(&mit_delay, s->mac_reg[ITR]);
338 if (mit_delay) {
339 s->mit_timer_on = 1;
340 timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
341 mit_delay * 256);
343 s->mit_ide = 0;
347 s->mit_irq_level = (pending_ints != 0);
348 pci_set_irq(d, s->mit_irq_level);
351 static void
352 e1000_mit_timer(void *opaque)
354 E1000State *s = opaque;
356 s->mit_timer_on = 0;
357 /* Call set_interrupt_cause to update the irq level (if necessary). */
358 set_interrupt_cause(s, 0, s->mac_reg[ICR]);
361 static void
362 set_ics(E1000State *s, int index, uint32_t val)
364 DBGOUT(INTERRUPT, "set_ics %x, ICR %x, IMR %x\n", val, s->mac_reg[ICR],
365 s->mac_reg[IMS]);
366 set_interrupt_cause(s, 0, val | s->mac_reg[ICR]);
369 static void
370 e1000_autoneg_timer(void *opaque)
372 E1000State *s = opaque;
373 if (!qemu_get_queue(s->nic)->link_down) {
374 e1000_link_up(s);
375 s->phy_reg[PHY_LP_ABILITY] |= MII_LPAR_LPACK;
376 s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
377 DBGOUT(PHY, "Auto negotiation is completed\n");
378 set_ics(s, 0, E1000_ICS_LSC); /* signal link status change to guest */
382 static int
383 rxbufsize(uint32_t v)
385 v &= E1000_RCTL_BSEX | E1000_RCTL_SZ_16384 | E1000_RCTL_SZ_8192 |
386 E1000_RCTL_SZ_4096 | E1000_RCTL_SZ_2048 | E1000_RCTL_SZ_1024 |
387 E1000_RCTL_SZ_512 | E1000_RCTL_SZ_256;
388 switch (v) {
389 case E1000_RCTL_BSEX | E1000_RCTL_SZ_16384:
390 return 16384;
391 case E1000_RCTL_BSEX | E1000_RCTL_SZ_8192:
392 return 8192;
393 case E1000_RCTL_BSEX | E1000_RCTL_SZ_4096:
394 return 4096;
395 case E1000_RCTL_SZ_1024:
396 return 1024;
397 case E1000_RCTL_SZ_512:
398 return 512;
399 case E1000_RCTL_SZ_256:
400 return 256;
402 return 2048;
405 static void e1000_reset(void *opaque)
407 E1000State *d = opaque;
408 E1000BaseClass *edc = E1000_DEVICE_GET_CLASS(d);
409 uint8_t *macaddr = d->conf.macaddr.a;
410 int i;
412 timer_del(d->autoneg_timer);
413 timer_del(d->mit_timer);
414 d->mit_timer_on = 0;
415 d->mit_irq_level = 0;
416 d->mit_ide = 0;
417 memset(d->phy_reg, 0, sizeof d->phy_reg);
418 memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
419 d->phy_reg[PHY_ID2] = edc->phy_id2;
420 memset(d->mac_reg, 0, sizeof d->mac_reg);
421 memmove(d->mac_reg, mac_reg_init, sizeof mac_reg_init);
422 d->rxbuf_min_shift = 1;
423 memset(&d->tx, 0, sizeof d->tx);
425 if (qemu_get_queue(d->nic)->link_down) {
426 e1000_link_down(d);
429 /* Throttle interrupts to prevent guest (e.g Win 2012) from
430 * reinjecting interrupts endlessly. TODO: fix non ITR case.
432 d->mac_reg[ITR] = 250;
434 /* Some guests expect pre-initialized RAH/RAL (AddrValid flag + MACaddr) */
435 d->mac_reg[RA] = 0;
436 d->mac_reg[RA + 1] = E1000_RAH_AV;
437 for (i = 0; i < 4; i++) {
438 d->mac_reg[RA] |= macaddr[i] << (8 * i);
439 d->mac_reg[RA + 1] |= (i < 2) ? macaddr[i + 4] << (8 * i) : 0;
441 qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
444 static void
445 set_ctrl(E1000State *s, int index, uint32_t val)
447 /* RST is self clearing */
448 s->mac_reg[CTRL] = val & ~E1000_CTRL_RST;
451 static void
452 set_rx_control(E1000State *s, int index, uint32_t val)
454 s->mac_reg[RCTL] = val;
455 s->rxbuf_size = rxbufsize(val);
456 s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
457 DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
458 s->mac_reg[RCTL]);
459 qemu_flush_queued_packets(qemu_get_queue(s->nic));
462 static void
463 set_mdic(E1000State *s, int index, uint32_t val)
465 uint32_t data = val & E1000_MDIC_DATA_MASK;
466 uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
468 if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) // phy #
469 val = s->mac_reg[MDIC] | E1000_MDIC_ERROR;
470 else if (val & E1000_MDIC_OP_READ) {
471 DBGOUT(MDIC, "MDIC read reg 0x%x\n", addr);
472 if (!(phy_regcap[addr] & PHY_R)) {
473 DBGOUT(MDIC, "MDIC read reg %x unhandled\n", addr);
474 val |= E1000_MDIC_ERROR;
475 } else
476 val = (val ^ data) | s->phy_reg[addr];
477 } else if (val & E1000_MDIC_OP_WRITE) {
478 DBGOUT(MDIC, "MDIC write reg 0x%x, value 0x%x\n", addr, data);
479 if (!(phy_regcap[addr] & PHY_W)) {
480 DBGOUT(MDIC, "MDIC write reg %x unhandled\n", addr);
481 val |= E1000_MDIC_ERROR;
482 } else {
483 if (addr < NPHYWRITEOPS && phyreg_writeops[addr]) {
484 phyreg_writeops[addr](s, index, data);
485 } else {
486 s->phy_reg[addr] = data;
490 s->mac_reg[MDIC] = val | E1000_MDIC_READY;
492 if (val & E1000_MDIC_INT_EN) {
493 set_ics(s, 0, E1000_ICR_MDAC);
497 static uint32_t
498 get_eecd(E1000State *s, int index)
500 uint32_t ret = E1000_EECD_PRES|E1000_EECD_GNT | s->eecd_state.old_eecd;
502 DBGOUT(EEPROM, "reading eeprom bit %d (reading %d)\n",
503 s->eecd_state.bitnum_out, s->eecd_state.reading);
504 if (!s->eecd_state.reading ||
505 ((s->eeprom_data[(s->eecd_state.bitnum_out >> 4) & 0x3f] >>
506 ((s->eecd_state.bitnum_out & 0xf) ^ 0xf))) & 1)
507 ret |= E1000_EECD_DO;
508 return ret;
511 static void
512 set_eecd(E1000State *s, int index, uint32_t val)
514 uint32_t oldval = s->eecd_state.old_eecd;
516 s->eecd_state.old_eecd = val & (E1000_EECD_SK | E1000_EECD_CS |
517 E1000_EECD_DI|E1000_EECD_FWE_MASK|E1000_EECD_REQ);
518 if (!(E1000_EECD_CS & val)) // CS inactive; nothing to do
519 return;
520 if (E1000_EECD_CS & (val ^ oldval)) { // CS rise edge; reset state
521 s->eecd_state.val_in = 0;
522 s->eecd_state.bitnum_in = 0;
523 s->eecd_state.bitnum_out = 0;
524 s->eecd_state.reading = 0;
526 if (!(E1000_EECD_SK & (val ^ oldval))) // no clock edge
527 return;
528 if (!(E1000_EECD_SK & val)) { // falling edge
529 s->eecd_state.bitnum_out++;
530 return;
532 s->eecd_state.val_in <<= 1;
533 if (val & E1000_EECD_DI)
534 s->eecd_state.val_in |= 1;
535 if (++s->eecd_state.bitnum_in == 9 && !s->eecd_state.reading) {
536 s->eecd_state.bitnum_out = ((s->eecd_state.val_in & 0x3f)<<4)-1;
537 s->eecd_state.reading = (((s->eecd_state.val_in >> 6) & 7) ==
538 EEPROM_READ_OPCODE_MICROWIRE);
540 DBGOUT(EEPROM, "eeprom bitnum in %d out %d, reading %d\n",
541 s->eecd_state.bitnum_in, s->eecd_state.bitnum_out,
542 s->eecd_state.reading);
545 static uint32_t
546 flash_eerd_read(E1000State *s, int x)
548 unsigned int index, r = s->mac_reg[EERD] & ~E1000_EEPROM_RW_REG_START;
550 if ((s->mac_reg[EERD] & E1000_EEPROM_RW_REG_START) == 0)
551 return (s->mac_reg[EERD]);
553 if ((index = r >> E1000_EEPROM_RW_ADDR_SHIFT) > EEPROM_CHECKSUM_REG)
554 return (E1000_EEPROM_RW_REG_DONE | r);
556 return ((s->eeprom_data[index] << E1000_EEPROM_RW_REG_DATA) |
557 E1000_EEPROM_RW_REG_DONE | r);
560 static void
561 putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse)
563 uint32_t sum;
565 if (cse && cse < n)
566 n = cse + 1;
567 if (sloc < n-1) {
568 sum = net_checksum_add(n-css, data+css);
569 stw_be_p(data + sloc, net_checksum_finish(sum));
573 static inline int
574 vlan_enabled(E1000State *s)
576 return ((s->mac_reg[CTRL] & E1000_CTRL_VME) != 0);
579 static inline int
580 vlan_rx_filter_enabled(E1000State *s)
582 return ((s->mac_reg[RCTL] & E1000_RCTL_VFE) != 0);
585 static inline int
586 is_vlan_packet(E1000State *s, const uint8_t *buf)
588 return (be16_to_cpup((uint16_t *)(buf + 12)) ==
589 le16_to_cpu(s->mac_reg[VET]));
592 static inline int
593 is_vlan_txd(uint32_t txd_lower)
595 return ((txd_lower & E1000_TXD_CMD_VLE) != 0);
598 /* FCS aka Ethernet CRC-32. We don't get it from backends and can't
599 * fill it in, just pad descriptor length by 4 bytes unless guest
600 * told us to strip it off the packet. */
601 static inline int
602 fcs_len(E1000State *s)
604 return (s->mac_reg[RCTL] & E1000_RCTL_SECRC) ? 0 : 4;
607 static void
608 e1000_send_packet(E1000State *s, const uint8_t *buf, int size)
610 NetClientState *nc = qemu_get_queue(s->nic);
611 if (s->phy_reg[PHY_CTRL] & MII_CR_LOOPBACK) {
612 nc->info->receive(nc, buf, size);
613 } else {
614 qemu_send_packet(nc, buf, size);
618 static void
619 xmit_seg(E1000State *s)
621 uint16_t len, *sp;
622 unsigned int frames = s->tx.tso_frames, css, sofar, n;
623 struct e1000_tx *tp = &s->tx;
625 if (tp->tse && tp->cptse) {
626 css = tp->ipcss;
627 DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
628 frames, tp->size, css);
629 if (tp->ip) { // IPv4
630 stw_be_p(tp->data+css+2, tp->size - css);
631 stw_be_p(tp->data+css+4,
632 be16_to_cpup((uint16_t *)(tp->data+css+4))+frames);
633 } else // IPv6
634 stw_be_p(tp->data+css+4, tp->size - css);
635 css = tp->tucss;
636 len = tp->size - css;
637 DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", tp->tcp, css, len);
638 if (tp->tcp) {
639 sofar = frames * tp->mss;
640 stl_be_p(tp->data+css+4, ldl_be_p(tp->data+css+4)+sofar); /* seq */
641 if (tp->paylen - sofar > tp->mss)
642 tp->data[css + 13] &= ~9; // PSH, FIN
643 } else // UDP
644 stw_be_p(tp->data+css+4, len);
645 if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
646 unsigned int phsum;
647 // add pseudo-header length before checksum calculation
648 sp = (uint16_t *)(tp->data + tp->tucso);
649 phsum = be16_to_cpup(sp) + len;
650 phsum = (phsum >> 16) + (phsum & 0xffff);
651 stw_be_p(sp, phsum);
653 tp->tso_frames++;
656 if (tp->sum_needed & E1000_TXD_POPTS_TXSM)
657 putsum(tp->data, tp->size, tp->tucso, tp->tucss, tp->tucse);
658 if (tp->sum_needed & E1000_TXD_POPTS_IXSM)
659 putsum(tp->data, tp->size, tp->ipcso, tp->ipcss, tp->ipcse);
660 if (tp->vlan_needed) {
661 memmove(tp->vlan, tp->data, 4);
662 memmove(tp->data, tp->data + 4, 8);
663 memcpy(tp->data + 8, tp->vlan_header, 4);
664 e1000_send_packet(s, tp->vlan, tp->size + 4);
665 } else
666 e1000_send_packet(s, tp->data, tp->size);
667 s->mac_reg[TPT]++;
668 s->mac_reg[GPTC]++;
669 n = s->mac_reg[TOTL];
670 if ((s->mac_reg[TOTL] += s->tx.size) < n)
671 s->mac_reg[TOTH]++;
674 static void
675 process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
677 PCIDevice *d = PCI_DEVICE(s);
678 uint32_t txd_lower = le32_to_cpu(dp->lower.data);
679 uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
680 unsigned int split_size = txd_lower & 0xffff, bytes, sz, op;
681 unsigned int msh = 0xfffff;
682 uint64_t addr;
683 struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
684 struct e1000_tx *tp = &s->tx;
686 s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE);
687 if (dtype == E1000_TXD_CMD_DEXT) { // context descriptor
688 op = le32_to_cpu(xp->cmd_and_length);
689 tp->ipcss = xp->lower_setup.ip_fields.ipcss;
690 tp->ipcso = xp->lower_setup.ip_fields.ipcso;
691 tp->ipcse = le16_to_cpu(xp->lower_setup.ip_fields.ipcse);
692 tp->tucss = xp->upper_setup.tcp_fields.tucss;
693 tp->tucso = xp->upper_setup.tcp_fields.tucso;
694 tp->tucse = le16_to_cpu(xp->upper_setup.tcp_fields.tucse);
695 tp->paylen = op & 0xfffff;
696 tp->hdr_len = xp->tcp_seg_setup.fields.hdr_len;
697 tp->mss = le16_to_cpu(xp->tcp_seg_setup.fields.mss);
698 tp->ip = (op & E1000_TXD_CMD_IP) ? 1 : 0;
699 tp->tcp = (op & E1000_TXD_CMD_TCP) ? 1 : 0;
700 tp->tse = (op & E1000_TXD_CMD_TSE) ? 1 : 0;
701 tp->tso_frames = 0;
702 if (tp->tucso == 0) { // this is probably wrong
703 DBGOUT(TXSUM, "TCP/UDP: cso 0!\n");
704 tp->tucso = tp->tucss + (tp->tcp ? 16 : 6);
706 return;
707 } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
708 // data descriptor
709 if (tp->size == 0) {
710 tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
712 tp->cptse = ( txd_lower & E1000_TXD_CMD_TSE ) ? 1 : 0;
713 } else {
714 // legacy descriptor
715 tp->cptse = 0;
718 if (vlan_enabled(s) && is_vlan_txd(txd_lower) &&
719 (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
720 tp->vlan_needed = 1;
721 stw_be_p(tp->vlan_header,
722 le16_to_cpu(s->mac_reg[VET]));
723 stw_be_p(tp->vlan_header + 2,
724 le16_to_cpu(dp->upper.fields.special));
727 addr = le64_to_cpu(dp->buffer_addr);
728 if (tp->tse && tp->cptse) {
729 msh = tp->hdr_len + tp->mss;
730 do {
731 bytes = split_size;
732 if (tp->size + bytes > msh)
733 bytes = msh - tp->size;
735 bytes = MIN(sizeof(tp->data) - tp->size, bytes);
736 pci_dma_read(d, addr, tp->data + tp->size, bytes);
737 sz = tp->size + bytes;
738 if (sz >= tp->hdr_len && tp->size < tp->hdr_len) {
739 memmove(tp->header, tp->data, tp->hdr_len);
741 tp->size = sz;
742 addr += bytes;
743 if (sz == msh) {
744 xmit_seg(s);
745 memmove(tp->data, tp->header, tp->hdr_len);
746 tp->size = tp->hdr_len;
748 split_size -= bytes;
749 } while (bytes && split_size);
750 } else if (!tp->tse && tp->cptse) {
751 // context descriptor TSE is not set, while data descriptor TSE is set
752 DBGOUT(TXERR, "TCP segmentation error\n");
753 } else {
754 split_size = MIN(sizeof(tp->data) - tp->size, split_size);
755 pci_dma_read(d, addr, tp->data + tp->size, split_size);
756 tp->size += split_size;
759 if (!(txd_lower & E1000_TXD_CMD_EOP))
760 return;
761 if (!(tp->tse && tp->cptse && tp->size < tp->hdr_len)) {
762 xmit_seg(s);
764 tp->tso_frames = 0;
765 tp->sum_needed = 0;
766 tp->vlan_needed = 0;
767 tp->size = 0;
768 tp->cptse = 0;
771 static uint32_t
772 txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
774 PCIDevice *d = PCI_DEVICE(s);
775 uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
777 if (!(txd_lower & (E1000_TXD_CMD_RS|E1000_TXD_CMD_RPS)))
778 return 0;
779 txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
780 ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
781 dp->upper.data = cpu_to_le32(txd_upper);
782 pci_dma_write(d, base + ((char *)&dp->upper - (char *)dp),
783 &dp->upper, sizeof(dp->upper));
784 return E1000_ICR_TXDW;
787 static uint64_t tx_desc_base(E1000State *s)
789 uint64_t bah = s->mac_reg[TDBAH];
790 uint64_t bal = s->mac_reg[TDBAL] & ~0xf;
792 return (bah << 32) + bal;
795 static void
796 start_xmit(E1000State *s)
798 PCIDevice *d = PCI_DEVICE(s);
799 dma_addr_t base;
800 struct e1000_tx_desc desc;
801 uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
803 if (!(s->mac_reg[TCTL] & E1000_TCTL_EN)) {
804 DBGOUT(TX, "tx disabled\n");
805 return;
808 while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
809 base = tx_desc_base(s) +
810 sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
811 pci_dma_read(d, base, &desc, sizeof(desc));
813 DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
814 (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
815 desc.upper.data);
817 process_tx_desc(s, &desc);
818 cause |= txdesc_writeback(s, base, &desc);
820 if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
821 s->mac_reg[TDH] = 0;
823 * the following could happen only if guest sw assigns
824 * bogus values to TDT/TDLEN.
825 * there's nothing too intelligent we could do about this.
827 if (s->mac_reg[TDH] == tdh_start) {
828 DBGOUT(TXERR, "TDH wraparound @%x, TDT %x, TDLEN %x\n",
829 tdh_start, s->mac_reg[TDT], s->mac_reg[TDLEN]);
830 break;
833 set_ics(s, 0, cause);
836 static int
837 receive_filter(E1000State *s, const uint8_t *buf, int size)
839 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
840 static const int mta_shift[] = {4, 3, 2, 0};
841 uint32_t f, rctl = s->mac_reg[RCTL], ra[2], *rp;
843 if (is_vlan_packet(s, buf) && vlan_rx_filter_enabled(s)) {
844 uint16_t vid = be16_to_cpup((uint16_t *)(buf + 14));
845 uint32_t vfta = le32_to_cpup((uint32_t *)(s->mac_reg + VFTA) +
846 ((vid >> 5) & 0x7f));
847 if ((vfta & (1 << (vid & 0x1f))) == 0)
848 return 0;
851 if (rctl & E1000_RCTL_UPE) // promiscuous
852 return 1;
854 if ((buf[0] & 1) && (rctl & E1000_RCTL_MPE)) // promiscuous mcast
855 return 1;
857 if ((rctl & E1000_RCTL_BAM) && !memcmp(buf, bcast, sizeof bcast))
858 return 1;
860 for (rp = s->mac_reg + RA; rp < s->mac_reg + RA + 32; rp += 2) {
861 if (!(rp[1] & E1000_RAH_AV))
862 continue;
863 ra[0] = cpu_to_le32(rp[0]);
864 ra[1] = cpu_to_le32(rp[1]);
865 if (!memcmp(buf, (uint8_t *)ra, 6)) {
866 DBGOUT(RXFILTER,
867 "unicast match[%d]: %02x:%02x:%02x:%02x:%02x:%02x\n",
868 (int)(rp - s->mac_reg - RA)/2,
869 buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
870 return 1;
873 DBGOUT(RXFILTER, "unicast mismatch: %02x:%02x:%02x:%02x:%02x:%02x\n",
874 buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
876 f = mta_shift[(rctl >> E1000_RCTL_MO_SHIFT) & 3];
877 f = (((buf[5] << 8) | buf[4]) >> f) & 0xfff;
878 if (s->mac_reg[MTA + (f >> 5)] & (1 << (f & 0x1f)))
879 return 1;
880 DBGOUT(RXFILTER,
881 "dropping, inexact filter mismatch: %02x:%02x:%02x:%02x:%02x:%02x MO %d MTA[%d] %x\n",
882 buf[0], buf[1], buf[2], buf[3], buf[4], buf[5],
883 (rctl >> E1000_RCTL_MO_SHIFT) & 3, f >> 5,
884 s->mac_reg[MTA + (f >> 5)]);
886 return 0;
889 static void
890 e1000_set_link_status(NetClientState *nc)
892 E1000State *s = qemu_get_nic_opaque(nc);
893 uint32_t old_status = s->mac_reg[STATUS];
895 if (nc->link_down) {
896 e1000_link_down(s);
897 } else {
898 if (have_autoneg(s) &&
899 !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
900 /* emulate auto-negotiation if supported */
901 timer_mod(s->autoneg_timer,
902 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
903 } else {
904 e1000_link_up(s);
908 if (s->mac_reg[STATUS] != old_status)
909 set_ics(s, 0, E1000_ICR_LSC);
912 static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
914 int bufs;
915 /* Fast-path short packets */
916 if (total_size <= s->rxbuf_size) {
917 return s->mac_reg[RDH] != s->mac_reg[RDT];
919 if (s->mac_reg[RDH] < s->mac_reg[RDT]) {
920 bufs = s->mac_reg[RDT] - s->mac_reg[RDH];
921 } else if (s->mac_reg[RDH] > s->mac_reg[RDT]) {
922 bufs = s->mac_reg[RDLEN] / sizeof(struct e1000_rx_desc) +
923 s->mac_reg[RDT] - s->mac_reg[RDH];
924 } else {
925 return false;
927 return total_size <= bufs * s->rxbuf_size;
930 static int
931 e1000_can_receive(NetClientState *nc)
933 E1000State *s = qemu_get_nic_opaque(nc);
935 return (s->mac_reg[STATUS] & E1000_STATUS_LU) &&
936 (s->mac_reg[RCTL] & E1000_RCTL_EN) &&
937 (s->parent_obj.config[PCI_COMMAND] & PCI_COMMAND_MASTER) &&
938 e1000_has_rxbufs(s, 1);
941 static uint64_t rx_desc_base(E1000State *s)
943 uint64_t bah = s->mac_reg[RDBAH];
944 uint64_t bal = s->mac_reg[RDBAL] & ~0xf;
946 return (bah << 32) + bal;
949 static ssize_t
950 e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
952 E1000State *s = qemu_get_nic_opaque(nc);
953 PCIDevice *d = PCI_DEVICE(s);
954 struct e1000_rx_desc desc;
955 dma_addr_t base;
956 unsigned int n, rdt;
957 uint32_t rdh_start;
958 uint16_t vlan_special = 0;
959 uint8_t vlan_status = 0;
960 uint8_t min_buf[MIN_BUF_SIZE];
961 struct iovec min_iov;
962 uint8_t *filter_buf = iov->iov_base;
963 size_t size = iov_size(iov, iovcnt);
964 size_t iov_ofs = 0;
965 size_t desc_offset;
966 size_t desc_size;
967 size_t total_size;
969 if (!(s->mac_reg[STATUS] & E1000_STATUS_LU)) {
970 return -1;
973 if (!(s->mac_reg[RCTL] & E1000_RCTL_EN)) {
974 return -1;
977 /* Pad to minimum Ethernet frame length */
978 if (size < sizeof(min_buf)) {
979 iov_to_buf(iov, iovcnt, 0, min_buf, size);
980 memset(&min_buf[size], 0, sizeof(min_buf) - size);
981 min_iov.iov_base = filter_buf = min_buf;
982 min_iov.iov_len = size = sizeof(min_buf);
983 iovcnt = 1;
984 iov = &min_iov;
985 } else if (iov->iov_len < MAXIMUM_ETHERNET_HDR_LEN) {
986 /* This is very unlikely, but may happen. */
987 iov_to_buf(iov, iovcnt, 0, min_buf, MAXIMUM_ETHERNET_HDR_LEN);
988 filter_buf = min_buf;
991 /* Discard oversized packets if !LPE and !SBP. */
992 if ((size > MAXIMUM_ETHERNET_LPE_SIZE ||
993 (size > MAXIMUM_ETHERNET_VLAN_SIZE
994 && !(s->mac_reg[RCTL] & E1000_RCTL_LPE)))
995 && !(s->mac_reg[RCTL] & E1000_RCTL_SBP)) {
996 return size;
999 if (!receive_filter(s, filter_buf, size)) {
1000 return size;
1003 if (vlan_enabled(s) && is_vlan_packet(s, filter_buf)) {
1004 vlan_special = cpu_to_le16(be16_to_cpup((uint16_t *)(filter_buf
1005 + 14)));
1006 iov_ofs = 4;
1007 if (filter_buf == iov->iov_base) {
1008 memmove(filter_buf + 4, filter_buf, 12);
1009 } else {
1010 iov_from_buf(iov, iovcnt, 4, filter_buf, 12);
1011 while (iov->iov_len <= iov_ofs) {
1012 iov_ofs -= iov->iov_len;
1013 iov++;
1016 vlan_status = E1000_RXD_STAT_VP;
1017 size -= 4;
1020 rdh_start = s->mac_reg[RDH];
1021 desc_offset = 0;
1022 total_size = size + fcs_len(s);
1023 if (!e1000_has_rxbufs(s, total_size)) {
1024 set_ics(s, 0, E1000_ICS_RXO);
1025 return -1;
1027 do {
1028 desc_size = total_size - desc_offset;
1029 if (desc_size > s->rxbuf_size) {
1030 desc_size = s->rxbuf_size;
1032 base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
1033 pci_dma_read(d, base, &desc, sizeof(desc));
1034 desc.special = vlan_special;
1035 desc.status |= (vlan_status | E1000_RXD_STAT_DD);
1036 if (desc.buffer_addr) {
1037 if (desc_offset < size) {
1038 size_t iov_copy;
1039 hwaddr ba = le64_to_cpu(desc.buffer_addr);
1040 size_t copy_size = size - desc_offset;
1041 if (copy_size > s->rxbuf_size) {
1042 copy_size = s->rxbuf_size;
1044 do {
1045 iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
1046 pci_dma_write(d, ba, iov->iov_base + iov_ofs, iov_copy);
1047 copy_size -= iov_copy;
1048 ba += iov_copy;
1049 iov_ofs += iov_copy;
1050 if (iov_ofs == iov->iov_len) {
1051 iov++;
1052 iov_ofs = 0;
1054 } while (copy_size);
1056 desc_offset += desc_size;
1057 desc.length = cpu_to_le16(desc_size);
1058 if (desc_offset >= total_size) {
1059 desc.status |= E1000_RXD_STAT_EOP | E1000_RXD_STAT_IXSM;
1060 } else {
1061 /* Guest zeroing out status is not a hardware requirement.
1062 Clear EOP in case guest didn't do it. */
1063 desc.status &= ~E1000_RXD_STAT_EOP;
1065 } else { // as per intel docs; skip descriptors with null buf addr
1066 DBGOUT(RX, "Null RX descriptor!!\n");
1068 pci_dma_write(d, base, &desc, sizeof(desc));
1070 if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
1071 s->mac_reg[RDH] = 0;
1072 /* see comment in start_xmit; same here */
1073 if (s->mac_reg[RDH] == rdh_start) {
1074 DBGOUT(RXERR, "RDH wraparound @%x, RDT %x, RDLEN %x\n",
1075 rdh_start, s->mac_reg[RDT], s->mac_reg[RDLEN]);
1076 set_ics(s, 0, E1000_ICS_RXO);
1077 return -1;
1079 } while (desc_offset < total_size);
1081 s->mac_reg[GPRC]++;
1082 s->mac_reg[TPR]++;
1083 /* TOR - Total Octets Received:
1084 * This register includes bytes received in a packet from the <Destination
1085 * Address> field through the <CRC> field, inclusively.
1087 n = s->mac_reg[TORL] + size + /* Always include FCS length. */ 4;
1088 if (n < s->mac_reg[TORL])
1089 s->mac_reg[TORH]++;
1090 s->mac_reg[TORL] = n;
1092 n = E1000_ICS_RXT0;
1093 if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
1094 rdt += s->mac_reg[RDLEN] / sizeof(desc);
1095 if (((rdt - s->mac_reg[RDH]) * sizeof(desc)) <= s->mac_reg[RDLEN] >>
1096 s->rxbuf_min_shift)
1097 n |= E1000_ICS_RXDMT0;
1099 set_ics(s, 0, n);
1101 return size;
1104 static ssize_t
1105 e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size)
1107 const struct iovec iov = {
1108 .iov_base = (uint8_t *)buf,
1109 .iov_len = size
1112 return e1000_receive_iov(nc, &iov, 1);
1115 static uint32_t
1116 mac_readreg(E1000State *s, int index)
1118 return s->mac_reg[index];
1121 static uint32_t
1122 mac_icr_read(E1000State *s, int index)
1124 uint32_t ret = s->mac_reg[ICR];
1126 DBGOUT(INTERRUPT, "ICR read: %x\n", ret);
1127 set_interrupt_cause(s, 0, 0);
1128 return ret;
1131 static uint32_t
1132 mac_read_clr4(E1000State *s, int index)
1134 uint32_t ret = s->mac_reg[index];
1136 s->mac_reg[index] = 0;
1137 return ret;
1140 static uint32_t
1141 mac_read_clr8(E1000State *s, int index)
1143 uint32_t ret = s->mac_reg[index];
1145 s->mac_reg[index] = 0;
1146 s->mac_reg[index-1] = 0;
1147 return ret;
1150 static void
1151 mac_writereg(E1000State *s, int index, uint32_t val)
1153 uint32_t macaddr[2];
1155 s->mac_reg[index] = val;
1157 if (index == RA + 1) {
1158 macaddr[0] = cpu_to_le32(s->mac_reg[RA]);
1159 macaddr[1] = cpu_to_le32(s->mac_reg[RA + 1]);
1160 qemu_format_nic_info_str(qemu_get_queue(s->nic), (uint8_t *)macaddr);
1164 static void
1165 set_rdt(E1000State *s, int index, uint32_t val)
1167 s->mac_reg[index] = val & 0xffff;
1168 if (e1000_has_rxbufs(s, 1)) {
1169 qemu_flush_queued_packets(qemu_get_queue(s->nic));
1173 static void
1174 set_16bit(E1000State *s, int index, uint32_t val)
1176 s->mac_reg[index] = val & 0xffff;
1179 static void
1180 set_dlen(E1000State *s, int index, uint32_t val)
1182 s->mac_reg[index] = val & 0xfff80;
1185 static void
1186 set_tctl(E1000State *s, int index, uint32_t val)
1188 s->mac_reg[index] = val;
1189 s->mac_reg[TDT] &= 0xffff;
1190 start_xmit(s);
1193 static void
1194 set_icr(E1000State *s, int index, uint32_t val)
1196 DBGOUT(INTERRUPT, "set_icr %x\n", val);
1197 set_interrupt_cause(s, 0, s->mac_reg[ICR] & ~val);
1200 static void
1201 set_imc(E1000State *s, int index, uint32_t val)
1203 s->mac_reg[IMS] &= ~val;
1204 set_ics(s, 0, 0);
1207 static void
1208 set_ims(E1000State *s, int index, uint32_t val)
1210 s->mac_reg[IMS] |= val;
1211 set_ics(s, 0, 0);
1214 #define getreg(x) [x] = mac_readreg
1215 static uint32_t (*macreg_readops[])(E1000State *, int) = {
1216 getreg(PBA), getreg(RCTL), getreg(TDH), getreg(TXDCTL),
1217 getreg(WUFC), getreg(TDT), getreg(CTRL), getreg(LEDCTL),
1218 getreg(MANC), getreg(MDIC), getreg(SWSM), getreg(STATUS),
1219 getreg(TORL), getreg(TOTL), getreg(IMS), getreg(TCTL),
1220 getreg(RDH), getreg(RDT), getreg(VET), getreg(ICS),
1221 getreg(TDBAL), getreg(TDBAH), getreg(RDBAH), getreg(RDBAL),
1222 getreg(TDLEN), getreg(RDLEN), getreg(RDTR), getreg(RADV),
1223 getreg(TADV), getreg(ITR),
1225 [TOTH] = mac_read_clr8, [TORH] = mac_read_clr8, [GPRC] = mac_read_clr4,
1226 [GPTC] = mac_read_clr4, [TPR] = mac_read_clr4, [TPT] = mac_read_clr4,
1227 [ICR] = mac_icr_read, [EECD] = get_eecd, [EERD] = flash_eerd_read,
1228 [CRCERRS ... MPC] = &mac_readreg,
1229 [RA ... RA+31] = &mac_readreg,
1230 [MTA ... MTA+127] = &mac_readreg,
1231 [VFTA ... VFTA+127] = &mac_readreg,
1233 enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
1235 #define putreg(x) [x] = mac_writereg
1236 static void (*macreg_writeops[])(E1000State *, int, uint32_t) = {
1237 putreg(PBA), putreg(EERD), putreg(SWSM), putreg(WUFC),
1238 putreg(TDBAL), putreg(TDBAH), putreg(TXDCTL), putreg(RDBAH),
1239 putreg(RDBAL), putreg(LEDCTL), putreg(VET),
1240 [TDLEN] = set_dlen, [RDLEN] = set_dlen, [TCTL] = set_tctl,
1241 [TDT] = set_tctl, [MDIC] = set_mdic, [ICS] = set_ics,
1242 [TDH] = set_16bit, [RDH] = set_16bit, [RDT] = set_rdt,
1243 [IMC] = set_imc, [IMS] = set_ims, [ICR] = set_icr,
1244 [EECD] = set_eecd, [RCTL] = set_rx_control, [CTRL] = set_ctrl,
1245 [RDTR] = set_16bit, [RADV] = set_16bit, [TADV] = set_16bit,
1246 [ITR] = set_16bit,
1247 [RA ... RA+31] = &mac_writereg,
1248 [MTA ... MTA+127] = &mac_writereg,
1249 [VFTA ... VFTA+127] = &mac_writereg,
1252 enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
1254 static void
1255 e1000_mmio_write(void *opaque, hwaddr addr, uint64_t val,
1256 unsigned size)
1258 E1000State *s = opaque;
1259 unsigned int index = (addr & 0x1ffff) >> 2;
1261 if (index < NWRITEOPS && macreg_writeops[index]) {
1262 macreg_writeops[index](s, index, val);
1263 } else if (index < NREADOPS && macreg_readops[index]) {
1264 DBGOUT(MMIO, "e1000_mmio_writel RO %x: 0x%04"PRIx64"\n", index<<2, val);
1265 } else {
1266 DBGOUT(UNKNOWN, "MMIO unknown write addr=0x%08x,val=0x%08"PRIx64"\n",
1267 index<<2, val);
1271 static uint64_t
1272 e1000_mmio_read(void *opaque, hwaddr addr, unsigned size)
1274 E1000State *s = opaque;
1275 unsigned int index = (addr & 0x1ffff) >> 2;
1277 if (index < NREADOPS && macreg_readops[index])
1279 return macreg_readops[index](s, index);
1281 DBGOUT(UNKNOWN, "MMIO unknown read addr=0x%08x\n", index<<2);
1282 return 0;
1285 static const MemoryRegionOps e1000_mmio_ops = {
1286 .read = e1000_mmio_read,
1287 .write = e1000_mmio_write,
1288 .endianness = DEVICE_LITTLE_ENDIAN,
1289 .impl = {
1290 .min_access_size = 4,
1291 .max_access_size = 4,
1295 static uint64_t e1000_io_read(void *opaque, hwaddr addr,
1296 unsigned size)
1298 E1000State *s = opaque;
1300 (void)s;
1301 return 0;
1304 static void e1000_io_write(void *opaque, hwaddr addr,
1305 uint64_t val, unsigned size)
1307 E1000State *s = opaque;
1309 (void)s;
1312 static const MemoryRegionOps e1000_io_ops = {
1313 .read = e1000_io_read,
1314 .write = e1000_io_write,
1315 .endianness = DEVICE_LITTLE_ENDIAN,
1318 static bool is_version_1(void *opaque, int version_id)
1320 return version_id == 1;
1323 static void e1000_pre_save(void *opaque)
1325 E1000State *s = opaque;
1326 NetClientState *nc = qemu_get_queue(s->nic);
1328 /* If the mitigation timer is active, emulate a timeout now. */
1329 if (s->mit_timer_on) {
1330 e1000_mit_timer(s);
1334 * If link is down and auto-negotiation is supported and ongoing,
1335 * complete auto-negotiation immediately. This allows us to look
1336 * at MII_SR_AUTONEG_COMPLETE to infer link status on load.
1338 if (nc->link_down && have_autoneg(s)) {
1339 s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
1343 static int e1000_post_load(void *opaque, int version_id)
1345 E1000State *s = opaque;
1346 NetClientState *nc = qemu_get_queue(s->nic);
1348 if (!(s->compat_flags & E1000_FLAG_MIT)) {
1349 s->mac_reg[ITR] = s->mac_reg[RDTR] = s->mac_reg[RADV] =
1350 s->mac_reg[TADV] = 0;
1351 s->mit_irq_level = false;
1353 s->mit_ide = 0;
1354 s->mit_timer_on = false;
1356 /* nc.link_down can't be migrated, so infer link_down according
1357 * to link status bit in mac_reg[STATUS].
1358 * Alternatively, restart link negotiation if it was in progress. */
1359 nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0;
1361 if (have_autoneg(s) &&
1362 !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
1363 nc->link_down = false;
1364 timer_mod(s->autoneg_timer,
1365 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
1368 return 0;
1371 static bool e1000_mit_state_needed(void *opaque)
1373 E1000State *s = opaque;
1375 return s->compat_flags & E1000_FLAG_MIT;
1378 static const VMStateDescription vmstate_e1000_mit_state = {
1379 .name = "e1000/mit_state",
1380 .version_id = 1,
1381 .minimum_version_id = 1,
1382 .needed = e1000_mit_state_needed,
1383 .fields = (VMStateField[]) {
1384 VMSTATE_UINT32(mac_reg[RDTR], E1000State),
1385 VMSTATE_UINT32(mac_reg[RADV], E1000State),
1386 VMSTATE_UINT32(mac_reg[TADV], E1000State),
1387 VMSTATE_UINT32(mac_reg[ITR], E1000State),
1388 VMSTATE_BOOL(mit_irq_level, E1000State),
1389 VMSTATE_END_OF_LIST()
1393 static const VMStateDescription vmstate_e1000 = {
1394 .name = "e1000",
1395 .version_id = 2,
1396 .minimum_version_id = 1,
1397 .pre_save = e1000_pre_save,
1398 .post_load = e1000_post_load,
1399 .fields = (VMStateField[]) {
1400 VMSTATE_PCI_DEVICE(parent_obj, E1000State),
1401 VMSTATE_UNUSED_TEST(is_version_1, 4), /* was instance id */
1402 VMSTATE_UNUSED(4), /* Was mmio_base. */
1403 VMSTATE_UINT32(rxbuf_size, E1000State),
1404 VMSTATE_UINT32(rxbuf_min_shift, E1000State),
1405 VMSTATE_UINT32(eecd_state.val_in, E1000State),
1406 VMSTATE_UINT16(eecd_state.bitnum_in, E1000State),
1407 VMSTATE_UINT16(eecd_state.bitnum_out, E1000State),
1408 VMSTATE_UINT16(eecd_state.reading, E1000State),
1409 VMSTATE_UINT32(eecd_state.old_eecd, E1000State),
1410 VMSTATE_UINT8(tx.ipcss, E1000State),
1411 VMSTATE_UINT8(tx.ipcso, E1000State),
1412 VMSTATE_UINT16(tx.ipcse, E1000State),
1413 VMSTATE_UINT8(tx.tucss, E1000State),
1414 VMSTATE_UINT8(tx.tucso, E1000State),
1415 VMSTATE_UINT16(tx.tucse, E1000State),
1416 VMSTATE_UINT32(tx.paylen, E1000State),
1417 VMSTATE_UINT8(tx.hdr_len, E1000State),
1418 VMSTATE_UINT16(tx.mss, E1000State),
1419 VMSTATE_UINT16(tx.size, E1000State),
1420 VMSTATE_UINT16(tx.tso_frames, E1000State),
1421 VMSTATE_UINT8(tx.sum_needed, E1000State),
1422 VMSTATE_INT8(tx.ip, E1000State),
1423 VMSTATE_INT8(tx.tcp, E1000State),
1424 VMSTATE_BUFFER(tx.header, E1000State),
1425 VMSTATE_BUFFER(tx.data, E1000State),
1426 VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64),
1427 VMSTATE_UINT16_ARRAY(phy_reg, E1000State, 0x20),
1428 VMSTATE_UINT32(mac_reg[CTRL], E1000State),
1429 VMSTATE_UINT32(mac_reg[EECD], E1000State),
1430 VMSTATE_UINT32(mac_reg[EERD], E1000State),
1431 VMSTATE_UINT32(mac_reg[GPRC], E1000State),
1432 VMSTATE_UINT32(mac_reg[GPTC], E1000State),
1433 VMSTATE_UINT32(mac_reg[ICR], E1000State),
1434 VMSTATE_UINT32(mac_reg[ICS], E1000State),
1435 VMSTATE_UINT32(mac_reg[IMC], E1000State),
1436 VMSTATE_UINT32(mac_reg[IMS], E1000State),
1437 VMSTATE_UINT32(mac_reg[LEDCTL], E1000State),
1438 VMSTATE_UINT32(mac_reg[MANC], E1000State),
1439 VMSTATE_UINT32(mac_reg[MDIC], E1000State),
1440 VMSTATE_UINT32(mac_reg[MPC], E1000State),
1441 VMSTATE_UINT32(mac_reg[PBA], E1000State),
1442 VMSTATE_UINT32(mac_reg[RCTL], E1000State),
1443 VMSTATE_UINT32(mac_reg[RDBAH], E1000State),
1444 VMSTATE_UINT32(mac_reg[RDBAL], E1000State),
1445 VMSTATE_UINT32(mac_reg[RDH], E1000State),
1446 VMSTATE_UINT32(mac_reg[RDLEN], E1000State),
1447 VMSTATE_UINT32(mac_reg[RDT], E1000State),
1448 VMSTATE_UINT32(mac_reg[STATUS], E1000State),
1449 VMSTATE_UINT32(mac_reg[SWSM], E1000State),
1450 VMSTATE_UINT32(mac_reg[TCTL], E1000State),
1451 VMSTATE_UINT32(mac_reg[TDBAH], E1000State),
1452 VMSTATE_UINT32(mac_reg[TDBAL], E1000State),
1453 VMSTATE_UINT32(mac_reg[TDH], E1000State),
1454 VMSTATE_UINT32(mac_reg[TDLEN], E1000State),
1455 VMSTATE_UINT32(mac_reg[TDT], E1000State),
1456 VMSTATE_UINT32(mac_reg[TORH], E1000State),
1457 VMSTATE_UINT32(mac_reg[TORL], E1000State),
1458 VMSTATE_UINT32(mac_reg[TOTH], E1000State),
1459 VMSTATE_UINT32(mac_reg[TOTL], E1000State),
1460 VMSTATE_UINT32(mac_reg[TPR], E1000State),
1461 VMSTATE_UINT32(mac_reg[TPT], E1000State),
1462 VMSTATE_UINT32(mac_reg[TXDCTL], E1000State),
1463 VMSTATE_UINT32(mac_reg[WUFC], E1000State),
1464 VMSTATE_UINT32(mac_reg[VET], E1000State),
1465 VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
1466 VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128),
1467 VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
1468 VMSTATE_END_OF_LIST()
1470 .subsections = (const VMStateDescription*[]) {
1471 &vmstate_e1000_mit_state,
1472 NULL
1477 * EEPROM contents documented in Tables 5-2 and 5-3, pp. 98-102.
1478 * Note: A valid DevId will be inserted during pci_e1000_init().
1480 static const uint16_t e1000_eeprom_template[64] = {
1481 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0x0000, 0x0000, 0x0000,
1482 0x3000, 0x1000, 0x6403, 0 /*DevId*/, 0x8086, 0 /*DevId*/, 0x8086, 0x3040,
1483 0x0008, 0x2000, 0x7e14, 0x0048, 0x1000, 0x00d8, 0x0000, 0x2700,
1484 0x6cc9, 0x3150, 0x0722, 0x040b, 0x0984, 0x0000, 0xc000, 0x0706,
1485 0x1008, 0x0000, 0x0f04, 0x7fff, 0x4d01, 0xffff, 0xffff, 0xffff,
1486 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
1487 0x0100, 0x4000, 0x121c, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
1488 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000,
1491 /* PCI interface */
1493 static void
1494 e1000_mmio_setup(E1000State *d)
1496 int i;
1497 const uint32_t excluded_regs[] = {
1498 E1000_MDIC, E1000_ICR, E1000_ICS, E1000_IMS,
1499 E1000_IMC, E1000_TCTL, E1000_TDT, PNPMMIO_SIZE
1502 memory_region_init_io(&d->mmio, OBJECT(d), &e1000_mmio_ops, d,
1503 "e1000-mmio", PNPMMIO_SIZE);
1504 memory_region_add_coalescing(&d->mmio, 0, excluded_regs[0]);
1505 for (i = 0; excluded_regs[i] != PNPMMIO_SIZE; i++)
1506 memory_region_add_coalescing(&d->mmio, excluded_regs[i] + 4,
1507 excluded_regs[i+1] - excluded_regs[i] - 4);
1508 memory_region_init_io(&d->io, OBJECT(d), &e1000_io_ops, d, "e1000-io", IOPORT_SIZE);
1511 static void
1512 pci_e1000_uninit(PCIDevice *dev)
1514 E1000State *d = E1000(dev);
1516 timer_del(d->autoneg_timer);
1517 timer_free(d->autoneg_timer);
1518 timer_del(d->mit_timer);
1519 timer_free(d->mit_timer);
1520 qemu_del_nic(d->nic);
1523 static NetClientInfo net_e1000_info = {
1524 .type = NET_CLIENT_OPTIONS_KIND_NIC,
1525 .size = sizeof(NICState),
1526 .can_receive = e1000_can_receive,
1527 .receive = e1000_receive,
1528 .receive_iov = e1000_receive_iov,
1529 .link_status_changed = e1000_set_link_status,
1532 static void e1000_write_config(PCIDevice *pci_dev, uint32_t address,
1533 uint32_t val, int len)
1535 E1000State *s = E1000(pci_dev);
1537 pci_default_write_config(pci_dev, address, val, len);
1539 if (range_covers_byte(address, len, PCI_COMMAND) &&
1540 (pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
1541 qemu_flush_queued_packets(qemu_get_queue(s->nic));
1546 static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp)
1548 DeviceState *dev = DEVICE(pci_dev);
1549 E1000State *d = E1000(pci_dev);
1550 PCIDeviceClass *pdc = PCI_DEVICE_GET_CLASS(pci_dev);
1551 uint8_t *pci_conf;
1552 uint16_t checksum = 0;
1553 int i;
1554 uint8_t *macaddr;
1556 pci_dev->config_write = e1000_write_config;
1558 pci_conf = pci_dev->config;
1560 /* TODO: RST# value should be 0, PCI spec 6.2.4 */
1561 pci_conf[PCI_CACHE_LINE_SIZE] = 0x10;
1563 pci_conf[PCI_INTERRUPT_PIN] = 1; /* interrupt pin A */
1565 e1000_mmio_setup(d);
1567 pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
1569 pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io);
1571 memmove(d->eeprom_data, e1000_eeprom_template,
1572 sizeof e1000_eeprom_template);
1573 qemu_macaddr_default_if_unset(&d->conf.macaddr);
1574 macaddr = d->conf.macaddr.a;
1575 for (i = 0; i < 3; i++)
1576 d->eeprom_data[i] = (macaddr[2*i+1]<<8) | macaddr[2*i];
1577 d->eeprom_data[11] = d->eeprom_data[13] = pdc->device_id;
1578 for (i = 0; i < EEPROM_CHECKSUM_REG; i++)
1579 checksum += d->eeprom_data[i];
1580 checksum = (uint16_t) EEPROM_SUM - checksum;
1581 d->eeprom_data[EEPROM_CHECKSUM_REG] = checksum;
1583 d->nic = qemu_new_nic(&net_e1000_info, &d->conf,
1584 object_get_typename(OBJECT(d)), dev->id, d);
1586 qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr);
1588 d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d);
1589 d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d);
1592 static void qdev_e1000_reset(DeviceState *dev)
1594 E1000State *d = E1000(dev);
1595 e1000_reset(d);
1598 static Property e1000_properties[] = {
1599 DEFINE_NIC_PROPERTIES(E1000State, conf),
1600 DEFINE_PROP_BIT("autonegotiation", E1000State,
1601 compat_flags, E1000_FLAG_AUTONEG_BIT, true),
1602 DEFINE_PROP_BIT("mitigation", E1000State,
1603 compat_flags, E1000_FLAG_MIT_BIT, true),
1604 DEFINE_PROP_END_OF_LIST(),
1607 typedef struct E1000Info {
1608 const char *name;
1609 uint16_t device_id;
1610 uint8_t revision;
1611 uint16_t phy_id2;
1612 } E1000Info;
1614 static void e1000_class_init(ObjectClass *klass, void *data)
1616 DeviceClass *dc = DEVICE_CLASS(klass);
1617 PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1618 E1000BaseClass *e = E1000_DEVICE_CLASS(klass);
1619 const E1000Info *info = data;
1621 k->realize = pci_e1000_realize;
1622 k->exit = pci_e1000_uninit;
1623 k->romfile = "efi-e1000.rom";
1624 k->vendor_id = PCI_VENDOR_ID_INTEL;
1625 k->device_id = info->device_id;
1626 k->revision = info->revision;
1627 e->phy_id2 = info->phy_id2;
1628 k->class_id = PCI_CLASS_NETWORK_ETHERNET;
1629 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1630 dc->desc = "Intel Gigabit Ethernet";
1631 dc->reset = qdev_e1000_reset;
1632 dc->vmsd = &vmstate_e1000;
1633 dc->props = e1000_properties;
1636 static void e1000_instance_init(Object *obj)
1638 E1000State *n = E1000(obj);
1639 device_add_bootindex_property(obj, &n->conf.bootindex,
1640 "bootindex", "/ethernet-phy@0",
1641 DEVICE(n), NULL);
1644 static const TypeInfo e1000_base_info = {
1645 .name = TYPE_E1000_BASE,
1646 .parent = TYPE_PCI_DEVICE,
1647 .instance_size = sizeof(E1000State),
1648 .instance_init = e1000_instance_init,
1649 .class_size = sizeof(E1000BaseClass),
1650 .abstract = true,
1653 static const E1000Info e1000_devices[] = {
1655 .name = "e1000",
1656 .device_id = E1000_DEV_ID_82540EM,
1657 .revision = 0x03,
1658 .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT,
1661 .name = "e1000-82544gc",
1662 .device_id = E1000_DEV_ID_82544GC_COPPER,
1663 .revision = 0x03,
1664 .phy_id2 = E1000_PHY_ID2_82544x,
1667 .name = "e1000-82545em",
1668 .device_id = E1000_DEV_ID_82545EM_COPPER,
1669 .revision = 0x03,
1670 .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT,
1674 static void e1000_register_types(void)
1676 int i;
1678 type_register_static(&e1000_base_info);
1679 for (i = 0; i < ARRAY_SIZE(e1000_devices); i++) {
1680 const E1000Info *info = &e1000_devices[i];
1681 TypeInfo type_info = {};
1683 type_info.name = info->name;
1684 type_info.parent = TYPE_E1000_BASE;
1685 type_info.class_data = (void *)info;
1686 type_info.class_init = e1000_class_init;
1687 type_info.instance_init = e1000_instance_init;
1689 type_register(&type_info);
1693 type_init(e1000_register_types)